@js-ak/excel-toolbox 1.6.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cjs/lib/merge-sheets-to-base-file-process-sync.js +105 -0
- package/build/cjs/lib/merge-sheets-to-base-file-process.js +3 -3
- package/build/cjs/lib/merge-sheets-to-base-file-sync.js +2 -2
- package/build/cjs/lib/merge-sheets-to-base-file.js +1 -1
- package/build/cjs/lib/template/template-fs.js +97 -18
- package/build/cjs/lib/template/template-memory.js +110 -43
- package/build/cjs/lib/template/utils/compare-columns.js +16 -0
- package/build/cjs/lib/template/utils/index.js +1 -0
- package/build/cjs/lib/xml/extract-rows-from-sheet-sync.js +67 -0
- package/build/cjs/lib/xml/extract-rows-from-sheet.js +4 -2
- package/build/cjs/lib/xml/extract-xml-from-sheet-sync.js +43 -0
- package/build/cjs/lib/xml/extract-xml-from-sheet.js +15 -15
- package/build/cjs/lib/xml/index.js +2 -1
- package/build/esm/lib/merge-sheets-to-base-file-process-sync.js +69 -0
- package/build/esm/lib/merge-sheets-to-base-file-process.js +3 -3
- package/build/esm/lib/merge-sheets-to-base-file-sync.js +2 -2
- package/build/esm/lib/merge-sheets-to-base-file.js +1 -1
- package/build/esm/lib/template/template-fs.js +97 -18
- package/build/esm/lib/template/template-memory.js +110 -43
- package/build/esm/lib/template/utils/compare-columns.js +13 -0
- package/build/esm/lib/template/utils/index.js +1 -0
- package/build/esm/lib/xml/extract-rows-from-sheet-sync.js +64 -0
- package/build/esm/lib/xml/extract-rows-from-sheet.js +4 -2
- package/build/esm/lib/xml/extract-xml-from-sheet-sync.js +40 -0
- package/build/esm/lib/xml/extract-xml-from-sheet.js +12 -15
- package/build/esm/lib/xml/index.js +2 -1
- package/build/types/lib/merge-sheets-to-base-file-process-sync.d.ts +27 -0
- package/build/types/lib/merge-sheets-to-base-file-process.d.ts +1 -1
- package/build/types/lib/template/template-fs.d.ts +14 -0
- package/build/types/lib/template/template-memory.d.ts +4 -2
- package/build/types/lib/template/utils/compare-columns.d.ts +8 -0
- package/build/types/lib/template/utils/index.d.ts +1 -0
- package/build/types/lib/xml/extract-rows-from-sheet-sync.d.ts +28 -0
- package/build/types/lib/xml/extract-rows-from-sheet.d.ts +2 -2
- package/build/types/lib/xml/extract-xml-from-sheet-sync.d.ts +14 -0
- package/build/types/lib/xml/extract-xml-from-sheet.d.ts +2 -2
- package/build/types/lib/xml/index.d.ts +2 -1
- package/package.json +1 -5
- package/build/cjs/lib/xml/extract-xml-from-system-content.js +0 -53
- package/build/esm/lib/xml/extract-xml-from-system-content.js +0 -49
- package/build/types/lib/xml/extract-xml-from-system-content.d.ts +0 -15
@@ -0,0 +1,64 @@
|
|
1
|
+
import { extractXmlFromSheetSync } from "./extract-xml-from-sheet-sync.js";
|
2
|
+
/**
|
3
|
+
* Parses a worksheet (either as Buffer or string) to extract row data,
|
4
|
+
* last row number, and merge cell information from Excel XML format.
|
5
|
+
*
|
6
|
+
* This function is particularly useful for processing Excel files in
|
7
|
+
* Open XML Spreadsheet format (.xlsx).
|
8
|
+
*
|
9
|
+
* @param {Buffer|string} sheet - The worksheet content to parse, either as:
|
10
|
+
* - Buffer (binary Excel sheet)
|
11
|
+
* - string (raw XML content)
|
12
|
+
* @returns {{
|
13
|
+
* rows: string[],
|
14
|
+
* lastRowNumber: number,
|
15
|
+
* mergeCells: {ref: string}[]
|
16
|
+
* }} An object containing:
|
17
|
+
* - rows: Array of raw XML strings for each <row> element
|
18
|
+
* - lastRowNumber: Highest row number found in the sheet (1-based)
|
19
|
+
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
20
|
+
* @throws {Error} If the sheetData section is not found in the XML
|
21
|
+
*/
|
22
|
+
export function extractRowsFromSheetSync(sheet) {
|
23
|
+
// Convert Buffer input to XML string if needed
|
24
|
+
const xml = typeof sheet === "string"
|
25
|
+
? sheet
|
26
|
+
: extractXmlFromSheetSync(sheet);
|
27
|
+
// Extract the sheetData section containing all rows
|
28
|
+
const sheetDataMatch = xml.match(/<sheetData[^>]*>([\s\S]*?)<\/sheetData>/);
|
29
|
+
if (!sheetDataMatch) {
|
30
|
+
throw new Error("sheetData not found in worksheet XML");
|
31
|
+
}
|
32
|
+
const sheetDataContent = sheetDataMatch[1] || "";
|
33
|
+
// Extract all <row> elements using regex
|
34
|
+
const rowMatches = [...sheetDataContent.matchAll(/<row\b[^>]*\/>|<row\b[^>]*>[\s\S]*?<\/row>/g)];
|
35
|
+
const rows = rowMatches.map(match => match[0]);
|
36
|
+
// Calculate the highest row number present in the sheet
|
37
|
+
const lastRowNumber = rowMatches
|
38
|
+
.map(match => {
|
39
|
+
// Extract row number from r="..." attribute (1-based)
|
40
|
+
const rowNumMatch = match[0].match(/r="(\d+)"/);
|
41
|
+
return rowNumMatch?.[1] ? parseInt(rowNumMatch[1], 10) : null;
|
42
|
+
})
|
43
|
+
.filter((row) => row !== null) // Type guard to filter out nulls
|
44
|
+
.reduce((max, current) => Math.max(max, current), 0); // Find maximum row number
|
45
|
+
// Extract all merged cell ranges from the worksheet
|
46
|
+
const mergeCells = [];
|
47
|
+
const mergeCellsMatch = xml.match(/<mergeCells[^>]*>([\s\S]*?)<\/mergeCells>/);
|
48
|
+
if (mergeCellsMatch) {
|
49
|
+
// Find all mergeCell entries with ref attributes
|
50
|
+
const mergeCellMatches = mergeCellsMatch[1]?.match(/<mergeCell[^>]+ref="([^"]+)"[^>]*>/g) || [];
|
51
|
+
mergeCellMatches.forEach(match => {
|
52
|
+
const refMatch = match.match(/ref="([^"]+)"/);
|
53
|
+
if (refMatch?.[1]) {
|
54
|
+
mergeCells.push({ ref: refMatch[1] }); // Store the cell range (e.g., "A1:B2")
|
55
|
+
}
|
56
|
+
});
|
57
|
+
}
|
58
|
+
return {
|
59
|
+
lastRowNumber,
|
60
|
+
mergeCells,
|
61
|
+
rows,
|
62
|
+
xml,
|
63
|
+
};
|
64
|
+
}
|
@@ -19,9 +19,11 @@ import { extractXmlFromSheet } from "./extract-xml-from-sheet.js";
|
|
19
19
|
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
20
20
|
* @throws {Error} If the sheetData section is not found in the XML
|
21
21
|
*/
|
22
|
-
export function extractRowsFromSheet(sheet) {
|
22
|
+
export async function extractRowsFromSheet(sheet) {
|
23
23
|
// Convert Buffer input to XML string if needed
|
24
|
-
const xml = typeof sheet === "string"
|
24
|
+
const xml = typeof sheet === "string"
|
25
|
+
? sheet
|
26
|
+
: await extractXmlFromSheet(sheet);
|
25
27
|
// Extract the sheetData section containing all rows
|
26
28
|
const sheetDataMatch = xml.match(/<sheetData[^>]*>([\s\S]*?)<\/sheetData>/);
|
27
29
|
if (!sheetDataMatch) {
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import { inflateRawSync } from "node:zlib";
|
2
|
+
/**
|
3
|
+
* Extracts and parses XML content from an Excel worksheet file (e.g., xl/worksheets/sheet1.xml).
|
4
|
+
* Handles both compressed (raw deflate) and uncompressed (plain XML) formats.
|
5
|
+
*
|
6
|
+
* This function is designed to work with Excel Open XML (.xlsx) worksheet files,
|
7
|
+
* which may be stored in either compressed or uncompressed format within the ZIP container.
|
8
|
+
*
|
9
|
+
* @param {Buffer} buffer - The file content to process, which may be:
|
10
|
+
* - Raw XML text
|
11
|
+
* - Deflate-compressed XML data (without zlib headers)
|
12
|
+
* @returns {string} - The extracted XML content as a UTF-8 string
|
13
|
+
* @throws {Error} - If the buffer is empty or cannot be processed
|
14
|
+
*/
|
15
|
+
export function extractXmlFromSheetSync(buffer) {
|
16
|
+
if (!buffer || buffer.length === 0) {
|
17
|
+
throw new Error("Empty buffer provided");
|
18
|
+
}
|
19
|
+
let xml;
|
20
|
+
// Check if the buffer starts with an XML declaration (<?xml)
|
21
|
+
const head = buffer.subarray(0, 1024).toString("utf8").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "").trim();
|
22
|
+
const isXml = /^<\?xml[\s\S]+<\w+[\s>]/.test(head);
|
23
|
+
if (isXml) {
|
24
|
+
// Case 1: Already uncompressed XML - convert directly to string
|
25
|
+
xml = buffer.toString("utf8");
|
26
|
+
}
|
27
|
+
else {
|
28
|
+
// Case 2: Attempt to decompress as raw deflate data
|
29
|
+
try {
|
30
|
+
xml = inflateRawSync(buffer).toString("utf8");
|
31
|
+
}
|
32
|
+
catch (err) {
|
33
|
+
throw new Error("Failed to decompress sheet XML: " + (err instanceof Error ? err.message : String(err)));
|
34
|
+
}
|
35
|
+
}
|
36
|
+
// Sanitize XML by removing control characters (except tab, newline, carriage return)
|
37
|
+
// This handles potential corruption from binary data or encoding issues
|
38
|
+
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
39
|
+
return xml;
|
40
|
+
}
|
@@ -1,4 +1,6 @@
|
|
1
|
-
import
|
1
|
+
import util from "node:util";
|
2
|
+
import zlib from "node:zlib";
|
3
|
+
const inflateRaw = util.promisify(zlib.inflateRaw);
|
2
4
|
/**
|
3
5
|
* Extracts and parses XML content from an Excel worksheet file (e.g., xl/worksheets/sheet1.xml).
|
4
6
|
* Handles both compressed (raw deflate) and uncompressed (plain XML) formats.
|
@@ -9,35 +11,30 @@ import { inflateRaw } from "pako";
|
|
9
11
|
* @param {Buffer} buffer - The file content to process, which may be:
|
10
12
|
* - Raw XML text
|
11
13
|
* - Deflate-compressed XML data (without zlib headers)
|
12
|
-
* @returns {string} - The extracted XML content as a UTF-8 string
|
14
|
+
* @returns {Promise<string>} - The extracted XML content as a UTF-8 string
|
13
15
|
* @throws {Error} - If the buffer is empty or cannot be processed
|
14
16
|
*/
|
15
|
-
export function extractXmlFromSheet(buffer) {
|
17
|
+
export async function extractXmlFromSheet(buffer) {
|
16
18
|
if (!buffer || buffer.length === 0) {
|
17
19
|
throw new Error("Empty buffer provided");
|
18
20
|
}
|
19
21
|
let xml;
|
20
22
|
// Check if the buffer starts with an XML declaration (<?xml)
|
21
|
-
const
|
22
|
-
|
23
|
+
const head = buffer.subarray(0, 1024).toString("utf8").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "").trim();
|
24
|
+
const isXml = /^<\?xml[\s\S]+<\w+[\s>]/.test(head);
|
25
|
+
if (isXml) {
|
23
26
|
// Case 1: Already uncompressed XML - convert directly to string
|
24
27
|
xml = buffer.toString("utf8");
|
25
28
|
}
|
26
29
|
else {
|
27
30
|
// Case 2: Attempt to decompress as raw deflate data
|
28
|
-
|
29
|
-
|
30
|
-
if (inflated && inflated.includes("<sheetData")) {
|
31
|
-
xml = inflated;
|
31
|
+
try {
|
32
|
+
xml = (await inflateRaw(buffer)).toString("utf8");
|
32
33
|
}
|
33
|
-
|
34
|
-
throw new Error("
|
34
|
+
catch (err) {
|
35
|
+
throw new Error("Failed to decompress sheet XML: " + (err instanceof Error ? err.message : String(err)));
|
35
36
|
}
|
36
37
|
}
|
37
|
-
// Fallback: If no XML obtained yet, try direct UTF-8 conversion
|
38
|
-
if (!xml) {
|
39
|
-
xml = buffer.toString("utf8");
|
40
|
-
}
|
41
38
|
// Sanitize XML by removing control characters (except tab, newline, carriage return)
|
42
39
|
// This handles potential corruption from binary data or encoding issues
|
43
40
|
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
@@ -1,5 +1,6 @@
|
|
1
1
|
export * from "./build-merged-sheet.js";
|
2
|
+
export * from "./extract-rows-from-sheet-sync.js";
|
2
3
|
export * from "./extract-rows-from-sheet.js";
|
4
|
+
export * from "./extract-xml-from-sheet-sync.js";
|
3
5
|
export * from "./extract-xml-from-sheet.js";
|
4
|
-
export * from "./extract-xml-from-system-content.js";
|
5
6
|
export * from "./shift-row-indices.js";
|
@@ -0,0 +1,27 @@
|
|
1
|
+
/**
|
2
|
+
* Merges rows from other Excel files into a base Excel file.
|
3
|
+
*
|
4
|
+
* This function is a process-friendly version of mergeSheetsToBaseFile.
|
5
|
+
* It takes a single object with the following properties:
|
6
|
+
* - additions: An array of objects with two properties:
|
7
|
+
* - files: A dictionary of file paths to their corresponding XML content
|
8
|
+
* - sheetIndexes: The 1-based indexes of the sheet to extract rows from
|
9
|
+
* - baseFiles: A dictionary of file paths to their corresponding XML content
|
10
|
+
* - baseSheetIndex: The 1-based index of the sheet in the base file to add rows to
|
11
|
+
* - gap: The number of empty rows to insert between each added section
|
12
|
+
* - sheetNamesToRemove: The names of sheets to remove from the output file
|
13
|
+
* - sheetsToRemove: The 1-based indices of sheets to remove from the output file
|
14
|
+
*
|
15
|
+
* The function returns a dictionary of file paths to their corresponding XML content.
|
16
|
+
*/
|
17
|
+
export declare function mergeSheetsToBaseFileProcessSync(data: {
|
18
|
+
additions: {
|
19
|
+
files: Record<string, Buffer>;
|
20
|
+
sheetIndexes: number[];
|
21
|
+
}[];
|
22
|
+
baseFiles: Record<string, Buffer>;
|
23
|
+
baseSheetIndex: number;
|
24
|
+
gap: number;
|
25
|
+
sheetNamesToRemove: string[];
|
26
|
+
sheetsToRemove: number[];
|
27
|
+
}): void;
|
@@ -29,6 +29,7 @@ export declare class TemplateFs {
|
|
29
29
|
* @experimental This API is experimental and might change in future versions.
|
30
30
|
*/
|
31
31
|
constructor(fileKeys: Set<string>, destination: string);
|
32
|
+
/** Public methods */
|
32
33
|
/**
|
33
34
|
* Copies a sheet from the template to a new name.
|
34
35
|
*
|
@@ -91,6 +92,18 @@ export declare class TemplateFs {
|
|
91
92
|
startRowNumber?: number;
|
92
93
|
rows: AsyncIterable<unknown[]>;
|
93
94
|
}): Promise<void>;
|
95
|
+
/**
|
96
|
+
* Removes sheets from the workbook.
|
97
|
+
*
|
98
|
+
* @param {Object} data
|
99
|
+
* @param {number[]} [data.sheetIndexes] - The 1-based indexes of the sheets to remove.
|
100
|
+
* @param {string[]} [data.sheetNames] - The names of the sheets to remove.
|
101
|
+
* @returns {void}
|
102
|
+
*/
|
103
|
+
removeSheets(data: {
|
104
|
+
sheetNames?: string[];
|
105
|
+
sheetIndexes?: number[];
|
106
|
+
}): Promise<void>;
|
94
107
|
/**
|
95
108
|
* Saves the modified Excel template to a buffer.
|
96
109
|
*
|
@@ -128,6 +141,7 @@ export declare class TemplateFs {
|
|
128
141
|
* @experimental This API is experimental and might change in future versions.
|
129
142
|
*/
|
130
143
|
validate(): Promise<void>;
|
144
|
+
/** Static methods */
|
131
145
|
/**
|
132
146
|
* Creates a Template instance from an Excel file source.
|
133
147
|
* Removes any existing files in the destination directory.
|
@@ -19,6 +19,7 @@ export declare class TemplateMemory {
|
|
19
19
|
* @experimental This API is experimental and might change in future versions.
|
20
20
|
*/
|
21
21
|
constructor(files: Record<string, Buffer>);
|
22
|
+
/** Public methods */
|
22
23
|
/**
|
23
24
|
* Copies a sheet from the template to a new name.
|
24
25
|
*
|
@@ -118,7 +119,7 @@ export declare class TemplateMemory {
|
|
118
119
|
baseSheetIndex?: number;
|
119
120
|
baseSheetName?: string;
|
120
121
|
gap?: number;
|
121
|
-
}): void
|
122
|
+
}): Promise<void>;
|
122
123
|
/**
|
123
124
|
* Removes sheets from the workbook.
|
124
125
|
*
|
@@ -130,7 +131,8 @@ export declare class TemplateMemory {
|
|
130
131
|
removeSheets(data: {
|
131
132
|
sheetNames?: string[];
|
132
133
|
sheetIndexes?: number[];
|
133
|
-
}): void
|
134
|
+
}): Promise<void>;
|
135
|
+
/** Static methods */
|
134
136
|
/**
|
135
137
|
* Creates a Template instance from an Excel file source.
|
136
138
|
*
|
@@ -0,0 +1,8 @@
|
|
1
|
+
/**
|
2
|
+
* Compares two column strings and returns a number indicating their relative order.
|
3
|
+
*
|
4
|
+
* @param a - The first column string to compare.
|
5
|
+
* @param b - The second column string to compare.
|
6
|
+
* @returns 0 if the columns are equal, -1 if the first column is less than the second, or 1 if the first column is greater than the second.
|
7
|
+
*/
|
8
|
+
export declare function compareColumns(a: string, b: string): number;
|
@@ -4,6 +4,7 @@ export * from "./check-row.js";
|
|
4
4
|
export * from "./check-rows.js";
|
5
5
|
export * from "./check-start-row.js";
|
6
6
|
export * from "./column-index-to-letter.js";
|
7
|
+
export * from "./compare-columns.js";
|
7
8
|
export * from "./escape-xml.js";
|
8
9
|
export * from "./extract-xml-declaration.js";
|
9
10
|
export * from "./get-by-path.js";
|
@@ -0,0 +1,28 @@
|
|
1
|
+
/**
|
2
|
+
* Parses a worksheet (either as Buffer or string) to extract row data,
|
3
|
+
* last row number, and merge cell information from Excel XML format.
|
4
|
+
*
|
5
|
+
* This function is particularly useful for processing Excel files in
|
6
|
+
* Open XML Spreadsheet format (.xlsx).
|
7
|
+
*
|
8
|
+
* @param {Buffer|string} sheet - The worksheet content to parse, either as:
|
9
|
+
* - Buffer (binary Excel sheet)
|
10
|
+
* - string (raw XML content)
|
11
|
+
* @returns {{
|
12
|
+
* rows: string[],
|
13
|
+
* lastRowNumber: number,
|
14
|
+
* mergeCells: {ref: string}[]
|
15
|
+
* }} An object containing:
|
16
|
+
* - rows: Array of raw XML strings for each <row> element
|
17
|
+
* - lastRowNumber: Highest row number found in the sheet (1-based)
|
18
|
+
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
19
|
+
* @throws {Error} If the sheetData section is not found in the XML
|
20
|
+
*/
|
21
|
+
export declare function extractRowsFromSheetSync(sheet: Buffer | string): {
|
22
|
+
rows: string[];
|
23
|
+
lastRowNumber: number;
|
24
|
+
mergeCells: {
|
25
|
+
ref: string;
|
26
|
+
}[];
|
27
|
+
xml: string;
|
28
|
+
};
|
@@ -18,11 +18,11 @@
|
|
18
18
|
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
19
19
|
* @throws {Error} If the sheetData section is not found in the XML
|
20
20
|
*/
|
21
|
-
export declare function extractRowsFromSheet(sheet: Buffer | string): {
|
21
|
+
export declare function extractRowsFromSheet(sheet: Buffer | string): Promise<{
|
22
22
|
rows: string[];
|
23
23
|
lastRowNumber: number;
|
24
24
|
mergeCells: {
|
25
25
|
ref: string;
|
26
26
|
}[];
|
27
27
|
xml: string;
|
28
|
-
}
|
28
|
+
}>;
|
@@ -0,0 +1,14 @@
|
|
1
|
+
/**
|
2
|
+
* Extracts and parses XML content from an Excel worksheet file (e.g., xl/worksheets/sheet1.xml).
|
3
|
+
* Handles both compressed (raw deflate) and uncompressed (plain XML) formats.
|
4
|
+
*
|
5
|
+
* This function is designed to work with Excel Open XML (.xlsx) worksheet files,
|
6
|
+
* which may be stored in either compressed or uncompressed format within the ZIP container.
|
7
|
+
*
|
8
|
+
* @param {Buffer} buffer - The file content to process, which may be:
|
9
|
+
* - Raw XML text
|
10
|
+
* - Deflate-compressed XML data (without zlib headers)
|
11
|
+
* @returns {string} - The extracted XML content as a UTF-8 string
|
12
|
+
* @throws {Error} - If the buffer is empty or cannot be processed
|
13
|
+
*/
|
14
|
+
export declare function extractXmlFromSheetSync(buffer: Buffer): string;
|
@@ -8,7 +8,7 @@
|
|
8
8
|
* @param {Buffer} buffer - The file content to process, which may be:
|
9
9
|
* - Raw XML text
|
10
10
|
* - Deflate-compressed XML data (without zlib headers)
|
11
|
-
* @returns {string} - The extracted XML content as a UTF-8 string
|
11
|
+
* @returns {Promise<string>} - The extracted XML content as a UTF-8 string
|
12
12
|
* @throws {Error} - If the buffer is empty or cannot be processed
|
13
13
|
*/
|
14
|
-
export declare function extractXmlFromSheet(buffer: Buffer): string
|
14
|
+
export declare function extractXmlFromSheet(buffer: Buffer): Promise<string>;
|
@@ -1,5 +1,6 @@
|
|
1
1
|
export * from "./build-merged-sheet.js";
|
2
|
+
export * from "./extract-rows-from-sheet-sync.js";
|
2
3
|
export * from "./extract-rows-from-sheet.js";
|
4
|
+
export * from "./extract-xml-from-sheet-sync.js";
|
3
5
|
export * from "./extract-xml-from-sheet.js";
|
4
|
-
export * from "./extract-xml-from-system-content.js";
|
5
6
|
export * from "./shift-row-indices.js";
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@js-ak/excel-toolbox",
|
3
|
-
"version": "1.
|
3
|
+
"version": "1.8.0",
|
4
4
|
"description": "excel-toolbox",
|
5
5
|
"publishConfig": {
|
6
6
|
"access": "public",
|
@@ -70,7 +70,6 @@
|
|
70
70
|
"@semantic-release/release-notes-generator": "14.0.0",
|
71
71
|
"@stylistic/eslint-plugin-ts": "4.2.0",
|
72
72
|
"@types/node": "22.14.0",
|
73
|
-
"@types/pako": "2.0.3",
|
74
73
|
"@vitest/coverage-v8": "3.1.2",
|
75
74
|
"eslint": "9.24.0",
|
76
75
|
"eslint-plugin-sort-destructure-keys": "2.0.0",
|
@@ -80,8 +79,5 @@
|
|
80
79
|
"typescript": "5.8.3",
|
81
80
|
"typescript-eslint": "8.29.0",
|
82
81
|
"vitest": "3.1.2"
|
83
|
-
},
|
84
|
-
"dependencies": {
|
85
|
-
"pako": "2.1.0"
|
86
82
|
}
|
87
83
|
}
|
@@ -1,53 +0,0 @@
|
|
1
|
-
"use strict";
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
-
exports.extractXmlFromSystemContent = void 0;
|
4
|
-
const pako_1 = require("pako");
|
5
|
-
/**
|
6
|
-
* Extracts and decompresses XML content from Excel system files (e.g., workbook.xml, [Content_Types].xml).
|
7
|
-
* Handles both compressed (raw DEFLATE) and uncompressed (plain XML) formats with comprehensive error handling.
|
8
|
-
*
|
9
|
-
* @param {Buffer} buffer - The file content to process, which may be:
|
10
|
-
* - Raw XML text
|
11
|
-
* - DEFLATE-compressed XML data (without zlib headers)
|
12
|
-
* @param {string} name - The filename being processed (for error reporting)
|
13
|
-
* @returns {string} - The extracted XML content as a sanitized UTF-8 string
|
14
|
-
* @throws {Error} - With descriptive messages for various failure scenarios:
|
15
|
-
* - Empty buffer
|
16
|
-
* - Decompression failures
|
17
|
-
* - Invalid XML content
|
18
|
-
*/
|
19
|
-
const extractXmlFromSystemContent = (buffer, name) => {
|
20
|
-
// Validate input buffer
|
21
|
-
if (!buffer || buffer.length === 0) {
|
22
|
-
throw new Error(`Empty data buffer provided for file ${name}`);
|
23
|
-
}
|
24
|
-
let xml;
|
25
|
-
// Check for XML declaration in first 5 bytes (<?xml)
|
26
|
-
const startsWithXml = buffer.subarray(0, 5).toString("utf8").trim().startsWith("<?xml");
|
27
|
-
if (startsWithXml) {
|
28
|
-
// Case 1: Already uncompressed XML - convert directly to string
|
29
|
-
xml = buffer.toString("utf8");
|
30
|
-
}
|
31
|
-
else {
|
32
|
-
// Case 2: Attempt DEFLATE decompression
|
33
|
-
try {
|
34
|
-
const inflated = (0, pako_1.inflateRaw)(buffer, { to: "string" });
|
35
|
-
// Validate decompressed content contains XML declaration
|
36
|
-
if (inflated && inflated.includes("<?xml")) {
|
37
|
-
xml = inflated;
|
38
|
-
}
|
39
|
-
else {
|
40
|
-
throw new Error(`Decompressed data doesn't contain valid XML in ${name}`);
|
41
|
-
}
|
42
|
-
}
|
43
|
-
catch (error) {
|
44
|
-
const message = error instanceof Error ? error.message : "Unknown error";
|
45
|
-
throw new Error(`Failed to decompress ${name}: ${message}`);
|
46
|
-
}
|
47
|
-
}
|
48
|
-
// Sanitize XML by removing illegal control characters (per XML 1.0 spec)
|
49
|
-
// Preserves tabs (0x09), newlines (0x0A), and carriage returns (0x0D)
|
50
|
-
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
51
|
-
return xml;
|
52
|
-
};
|
53
|
-
exports.extractXmlFromSystemContent = extractXmlFromSystemContent;
|
@@ -1,49 +0,0 @@
|
|
1
|
-
import { inflateRaw } from "pako";
|
2
|
-
/**
|
3
|
-
* Extracts and decompresses XML content from Excel system files (e.g., workbook.xml, [Content_Types].xml).
|
4
|
-
* Handles both compressed (raw DEFLATE) and uncompressed (plain XML) formats with comprehensive error handling.
|
5
|
-
*
|
6
|
-
* @param {Buffer} buffer - The file content to process, which may be:
|
7
|
-
* - Raw XML text
|
8
|
-
* - DEFLATE-compressed XML data (without zlib headers)
|
9
|
-
* @param {string} name - The filename being processed (for error reporting)
|
10
|
-
* @returns {string} - The extracted XML content as a sanitized UTF-8 string
|
11
|
-
* @throws {Error} - With descriptive messages for various failure scenarios:
|
12
|
-
* - Empty buffer
|
13
|
-
* - Decompression failures
|
14
|
-
* - Invalid XML content
|
15
|
-
*/
|
16
|
-
export const extractXmlFromSystemContent = (buffer, name) => {
|
17
|
-
// Validate input buffer
|
18
|
-
if (!buffer || buffer.length === 0) {
|
19
|
-
throw new Error(`Empty data buffer provided for file ${name}`);
|
20
|
-
}
|
21
|
-
let xml;
|
22
|
-
// Check for XML declaration in first 5 bytes (<?xml)
|
23
|
-
const startsWithXml = buffer.subarray(0, 5).toString("utf8").trim().startsWith("<?xml");
|
24
|
-
if (startsWithXml) {
|
25
|
-
// Case 1: Already uncompressed XML - convert directly to string
|
26
|
-
xml = buffer.toString("utf8");
|
27
|
-
}
|
28
|
-
else {
|
29
|
-
// Case 2: Attempt DEFLATE decompression
|
30
|
-
try {
|
31
|
-
const inflated = inflateRaw(buffer, { to: "string" });
|
32
|
-
// Validate decompressed content contains XML declaration
|
33
|
-
if (inflated && inflated.includes("<?xml")) {
|
34
|
-
xml = inflated;
|
35
|
-
}
|
36
|
-
else {
|
37
|
-
throw new Error(`Decompressed data doesn't contain valid XML in ${name}`);
|
38
|
-
}
|
39
|
-
}
|
40
|
-
catch (error) {
|
41
|
-
const message = error instanceof Error ? error.message : "Unknown error";
|
42
|
-
throw new Error(`Failed to decompress ${name}: ${message}`);
|
43
|
-
}
|
44
|
-
}
|
45
|
-
// Sanitize XML by removing illegal control characters (per XML 1.0 spec)
|
46
|
-
// Preserves tabs (0x09), newlines (0x0A), and carriage returns (0x0D)
|
47
|
-
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
48
|
-
return xml;
|
49
|
-
};
|
@@ -1,15 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Extracts and decompresses XML content from Excel system files (e.g., workbook.xml, [Content_Types].xml).
|
3
|
-
* Handles both compressed (raw DEFLATE) and uncompressed (plain XML) formats with comprehensive error handling.
|
4
|
-
*
|
5
|
-
* @param {Buffer} buffer - The file content to process, which may be:
|
6
|
-
* - Raw XML text
|
7
|
-
* - DEFLATE-compressed XML data (without zlib headers)
|
8
|
-
* @param {string} name - The filename being processed (for error reporting)
|
9
|
-
* @returns {string} - The extracted XML content as a sanitized UTF-8 string
|
10
|
-
* @throws {Error} - With descriptive messages for various failure scenarios:
|
11
|
-
* - Empty buffer
|
12
|
-
* - Decompression failures
|
13
|
-
* - Invalid XML content
|
14
|
-
*/
|
15
|
-
export declare const extractXmlFromSystemContent: (buffer: Buffer, name: string) => string;
|