@js-ak/excel-toolbox 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -62
- package/build/cjs/lib/merge-sheets-to-base-file-process-sync.js +105 -0
- package/build/cjs/lib/merge-sheets-to-base-file-process.js +3 -3
- package/build/cjs/lib/merge-sheets-to-base-file-sync.js +2 -2
- package/build/cjs/lib/merge-sheets-to-base-file.js +1 -1
- package/build/cjs/lib/template/template-fs.js +143 -63
- package/build/cjs/lib/template/template-memory.js +281 -59
- package/build/cjs/lib/template/utils/index.js +25 -0
- package/build/cjs/lib/template/utils/prepare-row-to-cells.js +5 -1
- package/build/cjs/lib/template/utils/regexp.js +32 -0
- package/build/cjs/lib/template/utils/update-dimension.js +15 -0
- package/build/cjs/lib/template/utils/validate-worksheet-xml.js +74 -74
- package/build/cjs/lib/template/utils/write-rows-to-stream.js +57 -17
- package/build/cjs/lib/xml/extract-rows-from-sheet-sync.js +67 -0
- package/build/cjs/lib/xml/extract-rows-from-sheet.js +4 -2
- package/build/cjs/lib/xml/extract-xml-from-sheet-sync.js +43 -0
- package/build/cjs/lib/xml/extract-xml-from-sheet.js +15 -15
- package/build/cjs/lib/xml/index.js +2 -1
- package/build/esm/lib/merge-sheets-to-base-file-process-sync.js +69 -0
- package/build/esm/lib/merge-sheets-to-base-file-process.js +3 -3
- package/build/esm/lib/merge-sheets-to-base-file-sync.js +2 -2
- package/build/esm/lib/merge-sheets-to-base-file.js +1 -1
- package/build/esm/lib/template/template-fs.js +140 -63
- package/build/esm/lib/template/template-memory.js +281 -59
- package/build/esm/lib/template/utils/index.js +2 -0
- package/build/esm/lib/template/utils/prepare-row-to-cells.js +5 -1
- package/build/esm/lib/template/utils/regexp.js +28 -0
- package/build/esm/lib/template/utils/update-dimension.js +15 -0
- package/build/esm/lib/template/utils/validate-worksheet-xml.js +74 -74
- package/build/esm/lib/template/utils/write-rows-to-stream.js +57 -17
- package/build/esm/lib/xml/extract-rows-from-sheet-sync.js +64 -0
- package/build/esm/lib/xml/extract-rows-from-sheet.js +4 -2
- package/build/esm/lib/xml/extract-xml-from-sheet-sync.js +40 -0
- package/build/esm/lib/xml/extract-xml-from-sheet.js +12 -15
- package/build/esm/lib/xml/index.js +2 -1
- package/build/types/lib/merge-sheets-to-base-file-process-sync.d.ts +27 -0
- package/build/types/lib/merge-sheets-to-base-file-process.d.ts +1 -1
- package/build/types/lib/template/template-fs.d.ts +2 -0
- package/build/types/lib/template/template-memory.d.ts +61 -0
- package/build/types/lib/template/utils/index.d.ts +2 -0
- package/build/types/lib/template/utils/prepare-row-to-cells.d.ts +5 -1
- package/build/types/lib/template/utils/regexp.d.ts +24 -0
- package/build/types/lib/template/utils/update-dimension.d.ts +15 -0
- package/build/types/lib/template/utils/write-rows-to-stream.d.ts +22 -9
- package/build/types/lib/xml/extract-rows-from-sheet-sync.d.ts +28 -0
- package/build/types/lib/xml/extract-rows-from-sheet.d.ts +2 -2
- package/build/types/lib/xml/extract-xml-from-sheet-sync.d.ts +14 -0
- package/build/types/lib/xml/extract-xml-from-sheet.d.ts +2 -2
- package/build/types/lib/xml/index.d.ts +2 -1
- package/package.json +1 -5
- package/build/cjs/lib/xml/extract-xml-from-system-content.js +0 -53
- package/build/esm/lib/xml/extract-xml-from-system-content.js +0 -49
- package/build/types/lib/xml/extract-xml-from-system-content.d.ts +0 -15
@@ -33,9 +33,7 @@ function validateWorksheetXml(xml) {
|
|
33
33
|
const requiredElements = [
|
34
34
|
{ name: "sheetViews", tag: "<sheetViews>" },
|
35
35
|
{ name: "sheetFormatPr", tag: "<sheetFormatPr" },
|
36
|
-
{ name: "cols", tag: "<cols>" },
|
37
36
|
{ name: "sheetData", tag: "<sheetData>" },
|
38
|
-
{ name: "mergeCells", tag: "<mergeCells" },
|
39
37
|
];
|
40
38
|
for (const { name, tag } of requiredElements) {
|
41
39
|
if (!xml.includes(tag)) {
|
@@ -100,59 +98,82 @@ function validateWorksheetXml(xml) {
|
|
100
98
|
}
|
101
99
|
}
|
102
100
|
// 4. Check mergeCells
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
const
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
101
|
+
if (xml.includes("<mergeCells")) {
|
102
|
+
const mergeCellsStart = xml.indexOf("<mergeCells");
|
103
|
+
const mergeCellsEnd = xml.indexOf("</mergeCells>");
|
104
|
+
if (mergeCellsStart === -1 || mergeCellsEnd === -1) {
|
105
|
+
return createError("Invalid mergeCells structure");
|
106
|
+
}
|
107
|
+
const mergeCellsContent = xml.substring(mergeCellsStart, mergeCellsEnd);
|
108
|
+
const countMatch = mergeCellsContent.match(/count="(\d+)"/);
|
109
|
+
if (!countMatch) {
|
110
|
+
return createError("Count attribute not specified for mergeCells");
|
111
|
+
}
|
112
|
+
const mergeCellTags = mergeCellsContent.match(/<mergeCell\s+ref="([A-Z]+\d+:[A-Z]+\d+)"\s*\/>/g);
|
113
|
+
if (!mergeCellTags) {
|
114
|
+
return createError("No merged cells found");
|
115
|
+
}
|
116
|
+
// Check if the number of mergeCells matches the count attribute
|
117
|
+
if (mergeCellTags.length !== parseInt(countMatch[1])) {
|
118
|
+
return createError("Mismatch in the number of merged cells", `Expected: ${countMatch[1]}, found: ${mergeCellTags.length}`);
|
119
|
+
}
|
120
|
+
// Check for duplicates of mergeCell
|
121
|
+
const mergeRefs = new Set();
|
122
|
+
const duplicates = new Set();
|
123
|
+
for (const mergeTag of mergeCellTags) {
|
124
|
+
const refMatch = mergeTag.match(/ref="([A-Z]+\d+:[A-Z]+\d+)"/);
|
125
|
+
if (!refMatch) {
|
126
|
+
return createError("Invalid merge cell format", `Tag: ${mergeTag}`);
|
127
|
+
}
|
128
|
+
const ref = refMatch[1];
|
129
|
+
if (mergeRefs.has(ref)) {
|
130
|
+
duplicates.add(ref);
|
131
|
+
}
|
132
|
+
else {
|
133
|
+
mergeRefs.add(ref);
|
134
|
+
}
|
132
135
|
}
|
133
|
-
|
134
|
-
|
136
|
+
if (duplicates.size > 0) {
|
137
|
+
return createError("Duplicates of merged cells found", `Duplicates: ${Array.from(duplicates).join(", ")}`);
|
138
|
+
}
|
139
|
+
// Check for overlapping merge ranges
|
140
|
+
const mergedRanges = Array.from(mergeRefs).map(ref => {
|
141
|
+
const [start, end] = ref.split(":");
|
142
|
+
return {
|
143
|
+
endCol: end.match(/[A-Z]+/)?.[0] || "",
|
144
|
+
endRow: parseInt(end.match(/\d+/)?.[0] || "0"),
|
145
|
+
startCol: start.match(/[A-Z]+/)?.[0] || "",
|
146
|
+
startRow: parseInt(start.match(/\d+/)?.[0] || "0"),
|
147
|
+
};
|
148
|
+
});
|
149
|
+
for (let i = 0; i < mergedRanges.length; i++) {
|
150
|
+
for (let j = i + 1; j < mergedRanges.length; j++) {
|
151
|
+
const a = mergedRanges[i];
|
152
|
+
const b = mergedRanges[j];
|
153
|
+
if (rangesIntersect(a, b)) {
|
154
|
+
return createError("Found intersecting merged cells", `Intersecting: ${getRangeString(a)} and ${getRangeString(b)}`);
|
155
|
+
}
|
156
|
+
}
|
135
157
|
}
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
const
|
153
|
-
|
154
|
-
|
155
|
-
return createError("Found intersecting merged cells", `Intersecting: ${getRangeString(a)} and ${getRangeString(b)}`);
|
158
|
+
// 6. Additional check: all mergeCell tags refer to existing cells
|
159
|
+
for (const mergeTag of mergeCellTags) {
|
160
|
+
const refMatch = mergeTag.match(/ref="([A-Z]+\d+:[A-Z]+\d+)"/);
|
161
|
+
if (!refMatch) {
|
162
|
+
return createError("Invalid merge cell format", `Tag: ${mergeTag}`);
|
163
|
+
}
|
164
|
+
const [cell1, cell2] = refMatch[1].split(":");
|
165
|
+
const cell1Col = cell1.match(/[A-Z]+/)?.[0];
|
166
|
+
const cell1Row = parseInt(cell1.match(/\d+/)?.[0] || "0");
|
167
|
+
const cell2Col = cell2.match(/[A-Z]+/)?.[0];
|
168
|
+
const cell2Row = parseInt(cell2.match(/\d+/)?.[0] || "0");
|
169
|
+
if (!cell1Col || !cell2Col || isNaN(cell1Row) || isNaN(cell2Row)) {
|
170
|
+
return createError("Invalid merged cell coordinates", `Merged cells: ${refMatch[1]}`);
|
171
|
+
}
|
172
|
+
// Check if the merged cells exist
|
173
|
+
const cell1Exists = allCells.some(c => c.row === cell1Row && c.col === cell1Col);
|
174
|
+
const cell2Exists = allCells.some(c => c.row === cell2Row && c.col === cell2Col);
|
175
|
+
if (!cell1Exists || !cell2Exists) {
|
176
|
+
return createError("Merged cell reference points to non-existent cells", `Merged cells: ${refMatch[1]}, missing: ${!cell1Exists ? `${cell1Col}${cell1Row}` : `${cell2Col}${cell2Row}`}`);
|
156
177
|
}
|
157
178
|
}
|
158
179
|
}
|
@@ -181,27 +202,6 @@ function validateWorksheetXml(xml) {
|
|
181
202
|
return createError("Cell is outside the specified area (by column)", `Cell: ${cell.col}${cell.row}, dimension: ${dimensionMatch[1]}`);
|
182
203
|
}
|
183
204
|
}
|
184
|
-
// 6. Additional check: all mergeCell tags refer to existing cells
|
185
|
-
for (const mergeTag of mergeCellTags) {
|
186
|
-
const refMatch = mergeTag.match(/ref="([A-Z]+\d+:[A-Z]+\d+)"/);
|
187
|
-
if (!refMatch) {
|
188
|
-
return createError("Invalid merge cell format", `Tag: ${mergeTag}`);
|
189
|
-
}
|
190
|
-
const [cell1, cell2] = refMatch[1].split(":");
|
191
|
-
const cell1Col = cell1.match(/[A-Z]+/)?.[0];
|
192
|
-
const cell1Row = parseInt(cell1.match(/\d+/)?.[0] || "0");
|
193
|
-
const cell2Col = cell2.match(/[A-Z]+/)?.[0];
|
194
|
-
const cell2Row = parseInt(cell2.match(/\d+/)?.[0] || "0");
|
195
|
-
if (!cell1Col || !cell2Col || isNaN(cell1Row) || isNaN(cell2Row)) {
|
196
|
-
return createError("Invalid merged cell coordinates", `Merged cells: ${refMatch[1]}`);
|
197
|
-
}
|
198
|
-
// Check if the merged cells exist
|
199
|
-
const cell1Exists = allCells.some(c => c.row === cell1Row && c.col === cell1Col);
|
200
|
-
const cell2Exists = allCells.some(c => c.row === cell2Row && c.col === cell2Col);
|
201
|
-
if (!cell1Exists || !cell2Exists) {
|
202
|
-
return createError("Merged cell reference points to non-existent cells", `Merged cells: ${refMatch[1]}, missing: ${!cell1Exists ? `${cell1Col}${cell1Row}` : `${cell2Col}${cell2Row}`}`);
|
203
|
-
}
|
204
|
-
}
|
205
205
|
return { isValid: true };
|
206
206
|
}
|
207
207
|
// A function to check if two ranges intersect
|
@@ -13,34 +13,74 @@ const prepare_row_to_cells_js_1 = require("./prepare-row-to-cells.js");
|
|
13
13
|
* for the first row written to the file. Subsequent rows are written
|
14
14
|
* with incrementing row numbers.
|
15
15
|
*
|
16
|
-
* @param output - A file write stream to write the Excel XML to.
|
17
|
-
* @param rows - An async iterable of rows, where each row is an array
|
18
|
-
*
|
19
|
-
* @param startRowNumber - The starting row number to use for the first
|
20
|
-
*
|
21
|
-
*
|
22
|
-
*
|
23
|
-
*
|
24
|
-
*
|
16
|
+
* @param {WritableLike} output - A file write stream to write the Excel XML to.
|
17
|
+
* @param {AsyncIterable<unknown[] | unknown[][]>} rows - An async iterable of rows, where each row is an array
|
18
|
+
* of values or an array of arrays of values.
|
19
|
+
* @param {number} startRowNumber - The starting row number to use for the first
|
20
|
+
* row written to the file.
|
21
|
+
* @returns {Promise<{
|
22
|
+
* dimension: {
|
23
|
+
* maxColumn: string;
|
24
|
+
* maxRow: number;
|
25
|
+
* minColumn: string;
|
26
|
+
* minRow: number;
|
27
|
+
* };
|
28
|
+
* rowNumber: number;
|
29
|
+
* }>} An object containing:
|
30
|
+
* - dimension: The boundaries of the written data (min/max columns and rows)
|
31
|
+
* - rowNumber: The last row number written to the file
|
25
32
|
*/
|
26
33
|
async function writeRowsToStream(output, rows, startRowNumber) {
|
27
34
|
let rowNumber = startRowNumber;
|
35
|
+
const dimension = {
|
36
|
+
maxColumn: "A",
|
37
|
+
maxRow: startRowNumber,
|
38
|
+
minColumn: "A",
|
39
|
+
minRow: startRowNumber,
|
40
|
+
};
|
41
|
+
// Функция для сравнения колонок (A < B, AA > Z и т.д.)
|
42
|
+
const compareColumns = (a, b) => {
|
43
|
+
if (a === b)
|
44
|
+
return 0;
|
45
|
+
return a.length === b.length ? (a < b ? -1 : 1) : (a.length < b.length ? -1 : 1);
|
46
|
+
};
|
47
|
+
const processRow = (row, currentRowNumber) => {
|
48
|
+
const cells = (0, prepare_row_to_cells_js_1.prepareRowToCells)(row, currentRowNumber);
|
49
|
+
if (cells.length === 0)
|
50
|
+
return;
|
51
|
+
output.write(`<row r="${currentRowNumber}">${cells.map(cell => cell.cellXml).join("")}</row>`);
|
52
|
+
// Обновление границ
|
53
|
+
const firstCellRef = cells[0]?.cellRef;
|
54
|
+
const lastCellRef = cells[cells.length - 1]?.cellRef;
|
55
|
+
if (firstCellRef) {
|
56
|
+
const colLetters = firstCellRef.match(/[A-Z]+/)?.[0] || "";
|
57
|
+
if (compareColumns(colLetters, dimension.minColumn) < 0) {
|
58
|
+
dimension.minColumn = colLetters;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
if (lastCellRef) {
|
62
|
+
const colLetters = lastCellRef.match(/[A-Z]+/)?.[0] || "";
|
63
|
+
if (compareColumns(colLetters, dimension.maxColumn) > 0) {
|
64
|
+
dimension.maxColumn = colLetters;
|
65
|
+
}
|
66
|
+
}
|
67
|
+
dimension.maxRow = currentRowNumber;
|
68
|
+
};
|
28
69
|
for await (const row of rows) {
|
29
|
-
|
70
|
+
if (!row.length)
|
71
|
+
continue;
|
30
72
|
if (Array.isArray(row[0])) {
|
31
73
|
for (const subRow of row) {
|
32
|
-
|
33
|
-
|
34
|
-
|
74
|
+
if (!subRow.length)
|
75
|
+
continue;
|
76
|
+
processRow(subRow, rowNumber);
|
35
77
|
rowNumber++;
|
36
78
|
}
|
37
79
|
}
|
38
80
|
else {
|
39
|
-
|
40
|
-
// Write the row to the file
|
41
|
-
output.write(`<row r="${rowNumber}">${cells.join("")}</row>`);
|
81
|
+
processRow(row, rowNumber);
|
42
82
|
rowNumber++;
|
43
83
|
}
|
44
84
|
}
|
45
|
-
return { rowNumber };
|
85
|
+
return { dimension, rowNumber };
|
46
86
|
}
|
@@ -0,0 +1,67 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.extractRowsFromSheetSync = extractRowsFromSheetSync;
|
4
|
+
const extract_xml_from_sheet_sync_js_1 = require("./extract-xml-from-sheet-sync.js");
|
5
|
+
/**
|
6
|
+
* Parses a worksheet (either as Buffer or string) to extract row data,
|
7
|
+
* last row number, and merge cell information from Excel XML format.
|
8
|
+
*
|
9
|
+
* This function is particularly useful for processing Excel files in
|
10
|
+
* Open XML Spreadsheet format (.xlsx).
|
11
|
+
*
|
12
|
+
* @param {Buffer|string} sheet - The worksheet content to parse, either as:
|
13
|
+
* - Buffer (binary Excel sheet)
|
14
|
+
* - string (raw XML content)
|
15
|
+
* @returns {{
|
16
|
+
* rows: string[],
|
17
|
+
* lastRowNumber: number,
|
18
|
+
* mergeCells: {ref: string}[]
|
19
|
+
* }} An object containing:
|
20
|
+
* - rows: Array of raw XML strings for each <row> element
|
21
|
+
* - lastRowNumber: Highest row number found in the sheet (1-based)
|
22
|
+
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
23
|
+
* @throws {Error} If the sheetData section is not found in the XML
|
24
|
+
*/
|
25
|
+
function extractRowsFromSheetSync(sheet) {
|
26
|
+
// Convert Buffer input to XML string if needed
|
27
|
+
const xml = typeof sheet === "string"
|
28
|
+
? sheet
|
29
|
+
: (0, extract_xml_from_sheet_sync_js_1.extractXmlFromSheetSync)(sheet);
|
30
|
+
// Extract the sheetData section containing all rows
|
31
|
+
const sheetDataMatch = xml.match(/<sheetData[^>]*>([\s\S]*?)<\/sheetData>/);
|
32
|
+
if (!sheetDataMatch) {
|
33
|
+
throw new Error("sheetData not found in worksheet XML");
|
34
|
+
}
|
35
|
+
const sheetDataContent = sheetDataMatch[1] || "";
|
36
|
+
// Extract all <row> elements using regex
|
37
|
+
const rowMatches = [...sheetDataContent.matchAll(/<row\b[^>]*\/>|<row\b[^>]*>[\s\S]*?<\/row>/g)];
|
38
|
+
const rows = rowMatches.map(match => match[0]);
|
39
|
+
// Calculate the highest row number present in the sheet
|
40
|
+
const lastRowNumber = rowMatches
|
41
|
+
.map(match => {
|
42
|
+
// Extract row number from r="..." attribute (1-based)
|
43
|
+
const rowNumMatch = match[0].match(/r="(\d+)"/);
|
44
|
+
return rowNumMatch?.[1] ? parseInt(rowNumMatch[1], 10) : null;
|
45
|
+
})
|
46
|
+
.filter((row) => row !== null) // Type guard to filter out nulls
|
47
|
+
.reduce((max, current) => Math.max(max, current), 0); // Find maximum row number
|
48
|
+
// Extract all merged cell ranges from the worksheet
|
49
|
+
const mergeCells = [];
|
50
|
+
const mergeCellsMatch = xml.match(/<mergeCells[^>]*>([\s\S]*?)<\/mergeCells>/);
|
51
|
+
if (mergeCellsMatch) {
|
52
|
+
// Find all mergeCell entries with ref attributes
|
53
|
+
const mergeCellMatches = mergeCellsMatch[1]?.match(/<mergeCell[^>]+ref="([^"]+)"[^>]*>/g) || [];
|
54
|
+
mergeCellMatches.forEach(match => {
|
55
|
+
const refMatch = match.match(/ref="([^"]+)"/);
|
56
|
+
if (refMatch?.[1]) {
|
57
|
+
mergeCells.push({ ref: refMatch[1] }); // Store the cell range (e.g., "A1:B2")
|
58
|
+
}
|
59
|
+
});
|
60
|
+
}
|
61
|
+
return {
|
62
|
+
lastRowNumber,
|
63
|
+
mergeCells,
|
64
|
+
rows,
|
65
|
+
xml,
|
66
|
+
};
|
67
|
+
}
|
@@ -22,9 +22,11 @@ const extract_xml_from_sheet_js_1 = require("./extract-xml-from-sheet.js");
|
|
22
22
|
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
23
23
|
* @throws {Error} If the sheetData section is not found in the XML
|
24
24
|
*/
|
25
|
-
function extractRowsFromSheet(sheet) {
|
25
|
+
async function extractRowsFromSheet(sheet) {
|
26
26
|
// Convert Buffer input to XML string if needed
|
27
|
-
const xml = typeof sheet === "string"
|
27
|
+
const xml = typeof sheet === "string"
|
28
|
+
? sheet
|
29
|
+
: await (0, extract_xml_from_sheet_js_1.extractXmlFromSheet)(sheet);
|
28
30
|
// Extract the sheetData section containing all rows
|
29
31
|
const sheetDataMatch = xml.match(/<sheetData[^>]*>([\s\S]*?)<\/sheetData>/);
|
30
32
|
if (!sheetDataMatch) {
|
@@ -0,0 +1,43 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.extractXmlFromSheetSync = extractXmlFromSheetSync;
|
4
|
+
const node_zlib_1 = require("node:zlib");
|
5
|
+
/**
|
6
|
+
* Extracts and parses XML content from an Excel worksheet file (e.g., xl/worksheets/sheet1.xml).
|
7
|
+
* Handles both compressed (raw deflate) and uncompressed (plain XML) formats.
|
8
|
+
*
|
9
|
+
* This function is designed to work with Excel Open XML (.xlsx) worksheet files,
|
10
|
+
* which may be stored in either compressed or uncompressed format within the ZIP container.
|
11
|
+
*
|
12
|
+
* @param {Buffer} buffer - The file content to process, which may be:
|
13
|
+
* - Raw XML text
|
14
|
+
* - Deflate-compressed XML data (without zlib headers)
|
15
|
+
* @returns {string} - The extracted XML content as a UTF-8 string
|
16
|
+
* @throws {Error} - If the buffer is empty or cannot be processed
|
17
|
+
*/
|
18
|
+
function extractXmlFromSheetSync(buffer) {
|
19
|
+
if (!buffer || buffer.length === 0) {
|
20
|
+
throw new Error("Empty buffer provided");
|
21
|
+
}
|
22
|
+
let xml;
|
23
|
+
// Check if the buffer starts with an XML declaration (<?xml)
|
24
|
+
const head = buffer.subarray(0, 1024).toString("utf8").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "").trim();
|
25
|
+
const isXml = /^<\?xml[\s\S]+<\w+[\s>]/.test(head);
|
26
|
+
if (isXml) {
|
27
|
+
// Case 1: Already uncompressed XML - convert directly to string
|
28
|
+
xml = buffer.toString("utf8");
|
29
|
+
}
|
30
|
+
else {
|
31
|
+
// Case 2: Attempt to decompress as raw deflate data
|
32
|
+
try {
|
33
|
+
xml = (0, node_zlib_1.inflateRawSync)(buffer).toString("utf8");
|
34
|
+
}
|
35
|
+
catch (err) {
|
36
|
+
throw new Error("Failed to decompress sheet XML: " + (err instanceof Error ? err.message : String(err)));
|
37
|
+
}
|
38
|
+
}
|
39
|
+
// Sanitize XML by removing control characters (except tab, newline, carriage return)
|
40
|
+
// This handles potential corruption from binary data or encoding issues
|
41
|
+
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
42
|
+
return xml;
|
43
|
+
}
|
@@ -1,7 +1,12 @@
|
|
1
1
|
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
6
|
exports.extractXmlFromSheet = extractXmlFromSheet;
|
4
|
-
const
|
7
|
+
const node_util_1 = __importDefault(require("node:util"));
|
8
|
+
const node_zlib_1 = __importDefault(require("node:zlib"));
|
9
|
+
const inflateRaw = node_util_1.default.promisify(node_zlib_1.default.inflateRaw);
|
5
10
|
/**
|
6
11
|
* Extracts and parses XML content from an Excel worksheet file (e.g., xl/worksheets/sheet1.xml).
|
7
12
|
* Handles both compressed (raw deflate) and uncompressed (plain XML) formats.
|
@@ -12,35 +17,30 @@ const pako_1 = require("pako");
|
|
12
17
|
* @param {Buffer} buffer - The file content to process, which may be:
|
13
18
|
* - Raw XML text
|
14
19
|
* - Deflate-compressed XML data (without zlib headers)
|
15
|
-
* @returns {string} - The extracted XML content as a UTF-8 string
|
20
|
+
* @returns {Promise<string>} - The extracted XML content as a UTF-8 string
|
16
21
|
* @throws {Error} - If the buffer is empty or cannot be processed
|
17
22
|
*/
|
18
|
-
function extractXmlFromSheet(buffer) {
|
23
|
+
async function extractXmlFromSheet(buffer) {
|
19
24
|
if (!buffer || buffer.length === 0) {
|
20
25
|
throw new Error("Empty buffer provided");
|
21
26
|
}
|
22
27
|
let xml;
|
23
28
|
// Check if the buffer starts with an XML declaration (<?xml)
|
24
|
-
const
|
25
|
-
|
29
|
+
const head = buffer.subarray(0, 1024).toString("utf8").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "").trim();
|
30
|
+
const isXml = /^<\?xml[\s\S]+<\w+[\s>]/.test(head);
|
31
|
+
if (isXml) {
|
26
32
|
// Case 1: Already uncompressed XML - convert directly to string
|
27
33
|
xml = buffer.toString("utf8");
|
28
34
|
}
|
29
35
|
else {
|
30
36
|
// Case 2: Attempt to decompress as raw deflate data
|
31
|
-
|
32
|
-
|
33
|
-
if (inflated && inflated.includes("<sheetData")) {
|
34
|
-
xml = inflated;
|
37
|
+
try {
|
38
|
+
xml = (await inflateRaw(buffer)).toString("utf8");
|
35
39
|
}
|
36
|
-
|
37
|
-
throw new Error("
|
40
|
+
catch (err) {
|
41
|
+
throw new Error("Failed to decompress sheet XML: " + (err instanceof Error ? err.message : String(err)));
|
38
42
|
}
|
39
43
|
}
|
40
|
-
// Fallback: If no XML obtained yet, try direct UTF-8 conversion
|
41
|
-
if (!xml) {
|
42
|
-
xml = buffer.toString("utf8");
|
43
|
-
}
|
44
44
|
// Sanitize XML by removing control characters (except tab, newline, carriage return)
|
45
45
|
// This handles potential corruption from binary data or encoding issues
|
46
46
|
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
@@ -15,7 +15,8 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
15
15
|
};
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
17
17
|
__exportStar(require("./build-merged-sheet.js"), exports);
|
18
|
+
__exportStar(require("./extract-rows-from-sheet-sync.js"), exports);
|
18
19
|
__exportStar(require("./extract-rows-from-sheet.js"), exports);
|
20
|
+
__exportStar(require("./extract-xml-from-sheet-sync.js"), exports);
|
19
21
|
__exportStar(require("./extract-xml-from-sheet.js"), exports);
|
20
|
-
__exportStar(require("./extract-xml-from-system-content.js"), exports);
|
21
22
|
__exportStar(require("./shift-row-indices.js"), exports);
|
@@ -0,0 +1,69 @@
|
|
1
|
+
import * as Utils from "./utils/index.js";
|
2
|
+
import * as Xml from "./xml/index.js";
|
3
|
+
/**
|
4
|
+
* Merges rows from other Excel files into a base Excel file.
|
5
|
+
*
|
6
|
+
* This function is a process-friendly version of mergeSheetsToBaseFile.
|
7
|
+
* It takes a single object with the following properties:
|
8
|
+
* - additions: An array of objects with two properties:
|
9
|
+
* - files: A dictionary of file paths to their corresponding XML content
|
10
|
+
* - sheetIndexes: The 1-based indexes of the sheet to extract rows from
|
11
|
+
* - baseFiles: A dictionary of file paths to their corresponding XML content
|
12
|
+
* - baseSheetIndex: The 1-based index of the sheet in the base file to add rows to
|
13
|
+
* - gap: The number of empty rows to insert between each added section
|
14
|
+
* - sheetNamesToRemove: The names of sheets to remove from the output file
|
15
|
+
* - sheetsToRemove: The 1-based indices of sheets to remove from the output file
|
16
|
+
*
|
17
|
+
* The function returns a dictionary of file paths to their corresponding XML content.
|
18
|
+
*/
|
19
|
+
export function mergeSheetsToBaseFileProcessSync(data) {
|
20
|
+
const { additions, baseFiles, baseSheetIndex, gap, sheetNamesToRemove, sheetsToRemove, } = data;
|
21
|
+
const basePath = `xl/worksheets/sheet${baseSheetIndex}.xml`;
|
22
|
+
if (!baseFiles[basePath]) {
|
23
|
+
throw new Error(`Base file does not contain ${basePath}`);
|
24
|
+
}
|
25
|
+
const { lastRowNumber, mergeCells: baseMergeCells, rows: baseRows, xml, } = Xml.extractRowsFromSheetSync(baseFiles[basePath]);
|
26
|
+
const allRows = [...baseRows];
|
27
|
+
const allMergeCells = [...baseMergeCells];
|
28
|
+
let currentRowOffset = lastRowNumber + gap;
|
29
|
+
for (const { files, sheetIndexes } of additions) {
|
30
|
+
for (const sheetIndex of sheetIndexes) {
|
31
|
+
const sheetPath = `xl/worksheets/sheet${sheetIndex}.xml`;
|
32
|
+
if (!files[sheetPath]) {
|
33
|
+
throw new Error(`File does not contain ${sheetPath}`);
|
34
|
+
}
|
35
|
+
const { mergeCells, rows } = Xml.extractRowsFromSheetSync(files[sheetPath]);
|
36
|
+
const shiftedRows = Xml.shiftRowIndices(rows, currentRowOffset);
|
37
|
+
const shiftedMergeCells = mergeCells.map(cell => {
|
38
|
+
const [start, end] = cell.ref.split(":");
|
39
|
+
if (!start || !end) {
|
40
|
+
return cell;
|
41
|
+
}
|
42
|
+
const shiftedStart = Utils.shiftCellRef(start, currentRowOffset);
|
43
|
+
const shiftedEnd = Utils.shiftCellRef(end, currentRowOffset);
|
44
|
+
return { ...cell, ref: `${shiftedStart}:${shiftedEnd}` };
|
45
|
+
});
|
46
|
+
allRows.push(...shiftedRows);
|
47
|
+
allMergeCells.push(...shiftedMergeCells);
|
48
|
+
currentRowOffset += Utils.getMaxRowNumber(rows) + gap;
|
49
|
+
}
|
50
|
+
}
|
51
|
+
const mergedXml = Xml.buildMergedSheet(xml, allRows, allMergeCells);
|
52
|
+
baseFiles[basePath] = mergedXml;
|
53
|
+
for (const sheetIndex of sheetsToRemove) {
|
54
|
+
const sheetPath = `xl/worksheets/sheet${sheetIndex}.xml`;
|
55
|
+
delete baseFiles[sheetPath];
|
56
|
+
if (baseFiles["xl/workbook.xml"]) {
|
57
|
+
baseFiles["xl/workbook.xml"] = Buffer.from(Utils.removeSheetFromWorkbook(baseFiles["xl/workbook.xml"].toString(), sheetIndex));
|
58
|
+
}
|
59
|
+
if (baseFiles["xl/_rels/workbook.xml.rels"]) {
|
60
|
+
baseFiles["xl/_rels/workbook.xml.rels"] = Buffer.from(Utils.removeSheetFromRels(baseFiles["xl/_rels/workbook.xml.rels"].toString(), sheetIndex));
|
61
|
+
}
|
62
|
+
if (baseFiles["[Content_Types].xml"]) {
|
63
|
+
baseFiles["[Content_Types].xml"] = Buffer.from(Utils.removeSheetFromContentTypes(baseFiles["[Content_Types].xml"].toString(), sheetIndex));
|
64
|
+
}
|
65
|
+
}
|
66
|
+
for (const sheetName of sheetNamesToRemove) {
|
67
|
+
Utils.removeSheetByName(baseFiles, sheetName);
|
68
|
+
}
|
69
|
+
}
|
@@ -16,13 +16,13 @@ import * as Xml from "./xml/index.js";
|
|
16
16
|
*
|
17
17
|
* The function returns a dictionary of file paths to their corresponding XML content.
|
18
18
|
*/
|
19
|
-
export function mergeSheetsToBaseFileProcess(data) {
|
19
|
+
export async function mergeSheetsToBaseFileProcess(data) {
|
20
20
|
const { additions, baseFiles, baseSheetIndex, gap, sheetNamesToRemove, sheetsToRemove, } = data;
|
21
21
|
const basePath = `xl/worksheets/sheet${baseSheetIndex}.xml`;
|
22
22
|
if (!baseFiles[basePath]) {
|
23
23
|
throw new Error(`Base file does not contain ${basePath}`);
|
24
24
|
}
|
25
|
-
const { lastRowNumber, mergeCells: baseMergeCells, rows: baseRows, xml, } = Xml.extractRowsFromSheet(baseFiles[basePath]);
|
25
|
+
const { lastRowNumber, mergeCells: baseMergeCells, rows: baseRows, xml, } = await Xml.extractRowsFromSheet(baseFiles[basePath]);
|
26
26
|
const allRows = [...baseRows];
|
27
27
|
const allMergeCells = [...baseMergeCells];
|
28
28
|
let currentRowOffset = lastRowNumber + gap;
|
@@ -32,7 +32,7 @@ export function mergeSheetsToBaseFileProcess(data) {
|
|
32
32
|
if (!files[sheetPath]) {
|
33
33
|
throw new Error(`File does not contain ${sheetPath}`);
|
34
34
|
}
|
35
|
-
const { mergeCells, rows } = Xml.extractRowsFromSheet(files[sheetPath]);
|
35
|
+
const { mergeCells, rows } = await Xml.extractRowsFromSheet(files[sheetPath]);
|
36
36
|
const shiftedRows = Xml.shiftRowIndices(rows, currentRowOffset);
|
37
37
|
const shiftedMergeCells = mergeCells.map(cell => {
|
38
38
|
const [start, end] = cell.ref.split(":");
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import * as Utils from "./utils/index.js";
|
2
2
|
import * as Zip from "./zip/index.js";
|
3
|
-
import {
|
3
|
+
import { mergeSheetsToBaseFileProcessSync } from "./merge-sheets-to-base-file-process-sync.js";
|
4
4
|
/**
|
5
5
|
* Merge rows from other Excel files into a base Excel file.
|
6
6
|
* The output is a new Excel file with the merged content.
|
@@ -29,7 +29,7 @@ export function mergeSheetsToBaseFileSync(data) {
|
|
29
29
|
sheetIndexes,
|
30
30
|
});
|
31
31
|
}
|
32
|
-
|
32
|
+
mergeSheetsToBaseFileProcessSync({
|
33
33
|
additions: additionsUpdated,
|
34
34
|
baseFiles,
|
35
35
|
baseSheetIndex,
|