@cj-tech-master/excelts 1.4.3 → 1.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/README_zh.md +3 -3
- package/dist/browser/excelts.iife.js +12841 -7484
- package/dist/browser/excelts.iife.js.map +1 -1
- package/dist/browser/excelts.iife.min.js +86 -23
- package/dist/cjs/doc/column.js +1 -1
- package/dist/cjs/doc/row.js +9 -4
- package/dist/cjs/doc/worksheet.js +9 -4
- package/dist/cjs/stream/xlsx/workbook-writer.js +3 -2
- package/dist/cjs/utils/unzip/extract.js +166 -0
- package/dist/cjs/utils/unzip/index.js +7 -1
- package/dist/cjs/utils/xml-stream.js +25 -3
- package/dist/cjs/utils/zip/compress.js +261 -0
- package/dist/cjs/utils/zip/crc32.js +154 -0
- package/dist/cjs/utils/zip/index.js +70 -0
- package/dist/cjs/utils/zip/zip-builder.js +378 -0
- package/dist/cjs/utils/zip-stream.js +30 -34
- package/dist/cjs/xlsx/xform/book/defined-name-xform.js +36 -2
- package/dist/cjs/xlsx/xform/list-xform.js +6 -0
- package/dist/cjs/xlsx/xform/sheet/cell-xform.js +6 -1
- package/dist/cjs/xlsx/xform/sheet/row-xform.js +24 -2
- package/dist/cjs/xlsx/xform/table/filter-column-xform.js +4 -0
- package/dist/esm/doc/column.js +1 -1
- package/dist/esm/doc/row.js +9 -4
- package/dist/esm/doc/worksheet.js +9 -4
- package/dist/esm/stream/xlsx/workbook-writer.js +3 -2
- package/dist/esm/utils/unzip/extract.js +160 -0
- package/dist/esm/utils/unzip/index.js +2 -0
- package/dist/esm/utils/xml-stream.js +25 -3
- package/dist/esm/utils/zip/compress.js +220 -0
- package/dist/esm/utils/zip/crc32.js +116 -0
- package/dist/esm/utils/zip/index.js +55 -0
- package/dist/esm/utils/zip/zip-builder.js +372 -0
- package/dist/esm/utils/zip-stream.js +30 -34
- package/dist/esm/xlsx/xform/book/defined-name-xform.js +36 -2
- package/dist/esm/xlsx/xform/list-xform.js +6 -0
- package/dist/esm/xlsx/xform/sheet/cell-xform.js +6 -1
- package/dist/esm/xlsx/xform/sheet/row-xform.js +24 -2
- package/dist/esm/xlsx/xform/table/filter-column-xform.js +4 -0
- package/dist/types/doc/cell.d.ts +10 -6
- package/dist/types/doc/column.d.ts +8 -4
- package/dist/types/doc/row.d.ts +9 -8
- package/dist/types/doc/worksheet.d.ts +2 -2
- package/dist/types/utils/unzip/extract.d.ts +92 -0
- package/dist/types/utils/unzip/index.d.ts +1 -0
- package/dist/types/utils/xml-stream.d.ts +2 -0
- package/dist/types/utils/zip/compress.d.ts +83 -0
- package/dist/types/utils/zip/crc32.d.ts +55 -0
- package/dist/types/utils/zip/index.d.ts +52 -0
- package/dist/types/utils/zip/zip-builder.d.ts +110 -0
- package/dist/types/utils/zip-stream.d.ts +6 -12
- package/dist/types/xlsx/xform/list-xform.d.ts +1 -0
- package/dist/types/xlsx/xform/sheet/row-xform.d.ts +2 -0
- package/package.json +8 -8
|
@@ -5,37 +5,25 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.ZipWriter = void 0;
|
|
7
7
|
const events_1 = __importDefault(require("events"));
|
|
8
|
-
const
|
|
8
|
+
const index_js_1 = require("./zip/index");
|
|
9
9
|
const stream_buf_js_1 = require("./stream-buf");
|
|
10
10
|
// =============================================================================
|
|
11
11
|
// The ZipWriter class
|
|
12
12
|
// Packs streamed data into an output zip stream
|
|
13
|
+
// Uses native zlib (Node.js) or CompressionStream (browser) for best performance
|
|
13
14
|
class ZipWriter extends events_1.default.EventEmitter {
|
|
14
15
|
constructor(options) {
|
|
15
16
|
super();
|
|
16
|
-
this.options = Object.assign({
|
|
17
|
-
type: "nodebuffer",
|
|
18
|
-
compression: "DEFLATE"
|
|
19
|
-
}, options);
|
|
20
|
-
// Default compression level is 6 (good balance of speed and size)
|
|
21
|
-
// 0 = no compression, 9 = best compression
|
|
22
|
-
const level = this.options.compressionOptions?.level ?? 6;
|
|
23
|
-
this.compressionLevel = Math.max(0, Math.min(9, level));
|
|
24
|
-
this.files = {};
|
|
25
|
-
this.stream = new stream_buf_js_1.StreamBuf();
|
|
26
17
|
this.finalized = false;
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
});
|
|
18
|
+
this.pendingWrites = [];
|
|
19
|
+
// Determine compression level:
|
|
20
|
+
// - STORE mode = 0 (no compression)
|
|
21
|
+
// - DEFLATE mode = user level or default 1 (fast compression)
|
|
22
|
+
const level = options?.compression === "STORE"
|
|
23
|
+
? 0
|
|
24
|
+
: Math.max(0, Math.min(9, options?.compressionOptions?.level ?? 1));
|
|
25
|
+
this.stream = new stream_buf_js_1.StreamBuf();
|
|
26
|
+
this.zipBuilder = new index_js_1.ZipBuilder({ level });
|
|
39
27
|
}
|
|
40
28
|
append(data, options) {
|
|
41
29
|
let buffer;
|
|
@@ -49,7 +37,7 @@ class ZipWriter extends events_1.default.EventEmitter {
|
|
|
49
37
|
buffer = Buffer.from(data, "utf8");
|
|
50
38
|
}
|
|
51
39
|
else if (Buffer.isBuffer(data)) {
|
|
52
|
-
// Buffer extends Uint8Array,
|
|
40
|
+
// Buffer extends Uint8Array, can use it directly - no copy needed
|
|
53
41
|
buffer = data;
|
|
54
42
|
}
|
|
55
43
|
else if (ArrayBuffer.isView(data)) {
|
|
@@ -64,14 +52,16 @@ class ZipWriter extends events_1.default.EventEmitter {
|
|
|
64
52
|
// Assume it's already a Uint8Array or compatible type
|
|
65
53
|
buffer = data;
|
|
66
54
|
}
|
|
67
|
-
// Add file to zip using
|
|
68
|
-
//
|
|
69
|
-
const
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
55
|
+
// Add file to zip using native compression
|
|
56
|
+
// addFile returns chunks that we write to stream immediately
|
|
57
|
+
const writePromise = this.zipBuilder
|
|
58
|
+
.addFile({ name: options.name, data: buffer })
|
|
59
|
+
.then(chunks => {
|
|
60
|
+
for (const chunk of chunks) {
|
|
61
|
+
this.stream.write(Buffer.from(chunk));
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
this.pendingWrites.push(writePromise);
|
|
75
65
|
}
|
|
76
66
|
push(chunk) {
|
|
77
67
|
return this.stream.push(chunk);
|
|
@@ -81,8 +71,14 @@ class ZipWriter extends events_1.default.EventEmitter {
|
|
|
81
71
|
return;
|
|
82
72
|
}
|
|
83
73
|
this.finalized = true;
|
|
84
|
-
//
|
|
85
|
-
this.
|
|
74
|
+
// Wait for all pending writes to complete
|
|
75
|
+
await Promise.all(this.pendingWrites);
|
|
76
|
+
// Finalize the zip and write central directory
|
|
77
|
+
const finalChunks = this.zipBuilder.finalize();
|
|
78
|
+
for (const chunk of finalChunks) {
|
|
79
|
+
this.stream.write(Buffer.from(chunk));
|
|
80
|
+
}
|
|
81
|
+
this.stream.end();
|
|
86
82
|
this.emit("finish");
|
|
87
83
|
}
|
|
88
84
|
// ==========================================================================
|
|
@@ -46,16 +46,50 @@ class DefinedNamesXform extends base_xform_js_1.BaseXform {
|
|
|
46
46
|
}
|
|
47
47
|
}
|
|
48
48
|
exports.DefinedNamesXform = DefinedNamesXform;
|
|
49
|
+
// Regex to validate cell range format:
|
|
50
|
+
// - Cell: $A$1 or A1
|
|
51
|
+
// - Range: $A$1:$B$10 or A1:B10
|
|
52
|
+
// - Row range: $1:$2 (for print titles)
|
|
53
|
+
// - Column range: $A:$B (for print titles)
|
|
54
|
+
const cellRangeRegexp = /^[$]?[A-Za-z]{1,3}[$]?\d+(:[$]?[A-Za-z]{1,3}[$]?\d+)?$/;
|
|
55
|
+
const rowRangeRegexp = /^[$]?\d+:[$]?\d+$/;
|
|
56
|
+
const colRangeRegexp = /^[$]?[A-Za-z]{1,3}:[$]?[A-Za-z]{1,3}$/;
|
|
49
57
|
function isValidRange(range) {
|
|
58
|
+
// Skip array constants wrapped in {} - these are not valid cell ranges
|
|
59
|
+
// e.g., {"'Sheet1'!$A$1:$B$10"} or {#N/A,#N/A,FALSE,"text"}
|
|
60
|
+
if (range.startsWith("{") || range.endsWith("}")) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
// Extract the cell reference part (after the sheet name if present)
|
|
64
|
+
const cellRef = range.split("!").pop() || "";
|
|
65
|
+
// Must match one of the valid patterns
|
|
66
|
+
if (!cellRangeRegexp.test(cellRef) &&
|
|
67
|
+
!rowRangeRegexp.test(cellRef) &&
|
|
68
|
+
!colRangeRegexp.test(cellRef)) {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
50
71
|
try {
|
|
51
|
-
col_cache_js_1.colCache.decodeEx(range);
|
|
52
|
-
|
|
72
|
+
const decoded = col_cache_js_1.colCache.decodeEx(range);
|
|
73
|
+
// For cell ranges: row/col or top/bottom/left/right should be valid numbers
|
|
74
|
+
// For row ranges ($1:$2): top/bottom are numbers, left/right are null
|
|
75
|
+
// For column ranges ($A:$B): left/right are numbers, top/bottom are null
|
|
76
|
+
if (("row" in decoded && typeof decoded.row === "number") ||
|
|
77
|
+
("top" in decoded && typeof decoded.top === "number") ||
|
|
78
|
+
("left" in decoded && typeof decoded.left === "number")) {
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
return false;
|
|
53
82
|
}
|
|
54
83
|
catch {
|
|
55
84
|
return false;
|
|
56
85
|
}
|
|
57
86
|
}
|
|
58
87
|
function extractRanges(parsedText) {
|
|
88
|
+
// Skip if the entire text is wrapped in {} (array constant)
|
|
89
|
+
const trimmed = parsedText.trim();
|
|
90
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) {
|
|
91
|
+
return [];
|
|
92
|
+
}
|
|
59
93
|
const ranges = [];
|
|
60
94
|
let quotesOpened = false;
|
|
61
95
|
let last = "";
|
|
@@ -410,7 +410,12 @@ class CellXform extends base_xform_js_1.BaseXform {
|
|
|
410
410
|
}
|
|
411
411
|
break;
|
|
412
412
|
case enums_js_1.Enums.ValueType.Formula:
|
|
413
|
-
|
|
413
|
+
// Only convert formula result to date if the result is a number
|
|
414
|
+
// String results (t="str") should not be converted even if the cell has a date format
|
|
415
|
+
if (model.result !== undefined &&
|
|
416
|
+
typeof model.result === "number" &&
|
|
417
|
+
style &&
|
|
418
|
+
(0, utils_js_1.isDateFmt)(style.numFmt)) {
|
|
414
419
|
model.result = (0, utils_js_1.excelToDate)(model.result, options.date1904);
|
|
415
420
|
}
|
|
416
421
|
if (model.shareType === "shared") {
|
|
@@ -4,6 +4,7 @@ exports.RowXform = void 0;
|
|
|
4
4
|
const base_xform_js_1 = require("../base-xform");
|
|
5
5
|
const cell_xform_js_1 = require("./cell-xform");
|
|
6
6
|
const utils_js_1 = require("../../../utils/utils");
|
|
7
|
+
const col_cache_js_1 = require("../../../utils/col-cache");
|
|
7
8
|
class RowXform extends base_xform_js_1.BaseXform {
|
|
8
9
|
constructor(options) {
|
|
9
10
|
super();
|
|
@@ -15,6 +16,11 @@ class RowXform extends base_xform_js_1.BaseXform {
|
|
|
15
16
|
get tag() {
|
|
16
17
|
return "row";
|
|
17
18
|
}
|
|
19
|
+
reset() {
|
|
20
|
+
super.reset();
|
|
21
|
+
this.numRowsSeen = 0;
|
|
22
|
+
this.lastCellCol = 0;
|
|
23
|
+
}
|
|
18
24
|
prepare(model, options) {
|
|
19
25
|
const styleId = options.styles.addStyleModel(model.style);
|
|
20
26
|
if (styleId) {
|
|
@@ -65,11 +71,15 @@ class RowXform extends base_xform_js_1.BaseXform {
|
|
|
65
71
|
}
|
|
66
72
|
if (node.name === "row") {
|
|
67
73
|
this.numRowsSeen += 1;
|
|
74
|
+
// Reset lastCellCol for each new row
|
|
75
|
+
this.lastCellCol = 0;
|
|
68
76
|
const spans = node.attributes.spans
|
|
69
77
|
? node.attributes.spans.split(":").map((span) => parseInt(span, 10))
|
|
70
78
|
: [undefined, undefined];
|
|
79
|
+
// If r attribute is missing, use numRowsSeen as the row number
|
|
80
|
+
const rowNumber = node.attributes.r ? parseInt(node.attributes.r, 10) : this.numRowsSeen;
|
|
71
81
|
const model = (this.model = {
|
|
72
|
-
number:
|
|
82
|
+
number: rowNumber,
|
|
73
83
|
min: spans[0],
|
|
74
84
|
max: spans[1],
|
|
75
85
|
cells: []
|
|
@@ -109,7 +119,19 @@ class RowXform extends base_xform_js_1.BaseXform {
|
|
|
109
119
|
parseClose(name) {
|
|
110
120
|
if (this.parser) {
|
|
111
121
|
if (!this.parser.parseClose(name)) {
|
|
112
|
-
this.
|
|
122
|
+
const cellModel = this.parser.model;
|
|
123
|
+
// If cell has address, extract column number from it
|
|
124
|
+
// Otherwise, calculate address based on position
|
|
125
|
+
if (cellModel.address) {
|
|
126
|
+
const decoded = col_cache_js_1.colCache.decodeAddress(cellModel.address);
|
|
127
|
+
this.lastCellCol = decoded.col;
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
// No r attribute, calculate address from position
|
|
131
|
+
this.lastCellCol += 1;
|
|
132
|
+
cellModel.address = col_cache_js_1.colCache.encodeAddress(this.model.number, this.lastCellCol);
|
|
133
|
+
}
|
|
134
|
+
this.model.cells.push(cellModel);
|
|
113
135
|
if (this.maxItems && this.model.cells.length > this.maxItems) {
|
|
114
136
|
throw new Error(`Max column count (${this.maxItems}) exceeded`);
|
|
115
137
|
}
|
|
@@ -57,6 +57,10 @@ class FilterColumnXform extends base_xform_js_1.BaseXform {
|
|
|
57
57
|
filterButton: attributes.hiddenButton === "0"
|
|
58
58
|
};
|
|
59
59
|
return true;
|
|
60
|
+
case "dynamicFilter":
|
|
61
|
+
// Ignore dynamicFilter nodes - we don't need to preserve them for reading
|
|
62
|
+
// See: https://github.com/exceljs/exceljs/issues/2972
|
|
63
|
+
return true;
|
|
60
64
|
default:
|
|
61
65
|
this.parser = this.map[node.name];
|
|
62
66
|
if (this.parser) {
|
package/dist/esm/doc/column.js
CHANGED
package/dist/esm/doc/row.js
CHANGED
|
@@ -112,10 +112,15 @@ class Row {
|
|
|
112
112
|
cDst._comment = undefined;
|
|
113
113
|
}
|
|
114
114
|
}
|
|
115
|
-
eachCell(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
115
|
+
eachCell(optionsOrIteratee, maybeIteratee) {
|
|
116
|
+
let options = null;
|
|
117
|
+
let iteratee;
|
|
118
|
+
if (typeof optionsOrIteratee === "function") {
|
|
119
|
+
iteratee = optionsOrIteratee;
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
options = optionsOrIteratee;
|
|
123
|
+
iteratee = maybeIteratee;
|
|
119
124
|
}
|
|
120
125
|
if (options && options.includeEmpty) {
|
|
121
126
|
const n = this._cells.length;
|
|
@@ -470,10 +470,15 @@ class Worksheet {
|
|
|
470
470
|
// account for defined names
|
|
471
471
|
this.workbook.definedNames.spliceRows(this.name, start, count, nInserts);
|
|
472
472
|
}
|
|
473
|
-
eachRow(
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
473
|
+
eachRow(optionsOrIteratee, maybeIteratee) {
|
|
474
|
+
let options;
|
|
475
|
+
let iteratee;
|
|
476
|
+
if (typeof optionsOrIteratee === "function") {
|
|
477
|
+
iteratee = optionsOrIteratee;
|
|
478
|
+
}
|
|
479
|
+
else {
|
|
480
|
+
options = optionsOrIteratee;
|
|
481
|
+
iteratee = maybeIteratee;
|
|
477
482
|
}
|
|
478
483
|
if (options && options.includeEmpty) {
|
|
479
484
|
const n = this._rows.length;
|
|
@@ -31,8 +31,9 @@ class WorkbookWriter {
|
|
|
31
31
|
this.views = [];
|
|
32
32
|
this.zipOptions = options.zip;
|
|
33
33
|
// Extract compression level from zip options (supports both zlib.level and compressionOptions.level)
|
|
34
|
-
// Default compression level is
|
|
35
|
-
|
|
34
|
+
// Default compression level is 1 (fast compression with good ratio)
|
|
35
|
+
// Level 1 is ~2x faster than level 6 with only ~7% larger files
|
|
36
|
+
const level = options.zip?.zlib?.level ?? options.zip?.compressionOptions?.level ?? 1;
|
|
36
37
|
this.compressionLevel = Math.max(0, Math.min(9, level));
|
|
37
38
|
this.media = [];
|
|
38
39
|
this.commentRefs = [];
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simple ZIP extraction utilities
|
|
3
|
+
* Provides easy-to-use Promise-based API for extracting ZIP files
|
|
4
|
+
*/
|
|
5
|
+
import { Readable } from "stream";
|
|
6
|
+
import { createParse } from "./parse.js";
|
|
7
|
+
/**
|
|
8
|
+
* Extract all files from a ZIP buffer
|
|
9
|
+
*
|
|
10
|
+
* @param zipData - ZIP file data as Buffer or Uint8Array
|
|
11
|
+
* @returns Map of file paths to their content
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```ts
|
|
15
|
+
* import { extractAll } from "./utils/unzip/extract.js";
|
|
16
|
+
*
|
|
17
|
+
* const zipData = fs.readFileSync("archive.zip");
|
|
18
|
+
* const files = await extractAll(zipData);
|
|
19
|
+
*
|
|
20
|
+
* for (const [path, file] of files) {
|
|
21
|
+
* console.log(`${path}: ${file.data.length} bytes`);
|
|
22
|
+
* }
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export async function extractAll(zipData) {
|
|
26
|
+
const files = new Map();
|
|
27
|
+
const buffer = Buffer.isBuffer(zipData) ? zipData : Buffer.from(zipData);
|
|
28
|
+
const parse = createParse({ forceStream: true });
|
|
29
|
+
const stream = Readable.from([buffer]);
|
|
30
|
+
stream.pipe(parse);
|
|
31
|
+
for await (const entry of parse) {
|
|
32
|
+
const zipEntry = entry;
|
|
33
|
+
const isDirectory = zipEntry.type === "Directory";
|
|
34
|
+
if (isDirectory) {
|
|
35
|
+
files.set(zipEntry.path, {
|
|
36
|
+
path: zipEntry.path,
|
|
37
|
+
data: Buffer.alloc(0),
|
|
38
|
+
isDirectory: true,
|
|
39
|
+
size: 0
|
|
40
|
+
});
|
|
41
|
+
zipEntry.autodrain();
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
const data = await zipEntry.buffer();
|
|
45
|
+
files.set(zipEntry.path, {
|
|
46
|
+
path: zipEntry.path,
|
|
47
|
+
data,
|
|
48
|
+
isDirectory: false,
|
|
49
|
+
size: data.length
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return files;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Extract a single file from a ZIP buffer
|
|
57
|
+
*
|
|
58
|
+
* @param zipData - ZIP file data as Buffer or Uint8Array
|
|
59
|
+
* @param filePath - Path of the file to extract
|
|
60
|
+
* @returns File content as Buffer, or null if not found
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```ts
|
|
64
|
+
* import { extractFile } from "./utils/unzip/extract.js";
|
|
65
|
+
*
|
|
66
|
+
* const zipData = fs.readFileSync("archive.zip");
|
|
67
|
+
* const content = await extractFile(zipData, "readme.txt");
|
|
68
|
+
* if (content) {
|
|
69
|
+
* console.log(content.toString("utf-8"));
|
|
70
|
+
* }
|
|
71
|
+
* ```
|
|
72
|
+
*/
|
|
73
|
+
export async function extractFile(zipData, filePath) {
|
|
74
|
+
const buffer = Buffer.isBuffer(zipData) ? zipData : Buffer.from(zipData);
|
|
75
|
+
const parse = createParse({ forceStream: true });
|
|
76
|
+
const stream = Readable.from([buffer]);
|
|
77
|
+
stream.pipe(parse);
|
|
78
|
+
for await (const entry of parse) {
|
|
79
|
+
const zipEntry = entry;
|
|
80
|
+
if (zipEntry.path === filePath) {
|
|
81
|
+
if (zipEntry.type === "Directory") {
|
|
82
|
+
return Buffer.alloc(0);
|
|
83
|
+
}
|
|
84
|
+
return zipEntry.buffer();
|
|
85
|
+
}
|
|
86
|
+
zipEntry.autodrain();
|
|
87
|
+
}
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* List all file paths in a ZIP buffer (without extracting content)
|
|
92
|
+
*
|
|
93
|
+
* @param zipData - ZIP file data as Buffer or Uint8Array
|
|
94
|
+
* @returns Array of file paths
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```ts
|
|
98
|
+
* import { listFiles } from "./utils/unzip/extract.js";
|
|
99
|
+
*
|
|
100
|
+
* const zipData = fs.readFileSync("archive.zip");
|
|
101
|
+
* const paths = await listFiles(zipData);
|
|
102
|
+
* console.log(paths); // ["file1.txt", "folder/file2.txt", ...]
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
export async function listFiles(zipData) {
|
|
106
|
+
const paths = [];
|
|
107
|
+
const buffer = Buffer.isBuffer(zipData) ? zipData : Buffer.from(zipData);
|
|
108
|
+
const parse = createParse({ forceStream: true });
|
|
109
|
+
const stream = Readable.from([buffer]);
|
|
110
|
+
stream.pipe(parse);
|
|
111
|
+
for await (const entry of parse) {
|
|
112
|
+
const zipEntry = entry;
|
|
113
|
+
paths.push(zipEntry.path);
|
|
114
|
+
zipEntry.autodrain();
|
|
115
|
+
}
|
|
116
|
+
return paths;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Iterate over ZIP entries with a callback (memory efficient for large ZIPs)
|
|
120
|
+
*
|
|
121
|
+
* @param zipData - ZIP file data as Buffer or Uint8Array
|
|
122
|
+
* @param callback - Async callback for each entry, return false to stop iteration
|
|
123
|
+
*
|
|
124
|
+
* @example
|
|
125
|
+
* ```ts
|
|
126
|
+
* import { forEachEntry } from "./utils/unzip/extract.js";
|
|
127
|
+
*
|
|
128
|
+
* await forEachEntry(zipData, async (path, getData) => {
|
|
129
|
+
* if (path.endsWith(".xml")) {
|
|
130
|
+
* const content = await getData();
|
|
131
|
+
* console.log(content.toString("utf-8"));
|
|
132
|
+
* }
|
|
133
|
+
* return true; // continue iteration
|
|
134
|
+
* });
|
|
135
|
+
* ```
|
|
136
|
+
*/
|
|
137
|
+
export async function forEachEntry(zipData, callback) {
|
|
138
|
+
const buffer = Buffer.isBuffer(zipData) ? zipData : Buffer.from(zipData);
|
|
139
|
+
const parse = createParse({ forceStream: true });
|
|
140
|
+
const stream = Readable.from([buffer]);
|
|
141
|
+
stream.pipe(parse);
|
|
142
|
+
for await (const entry of parse) {
|
|
143
|
+
const zipEntry = entry;
|
|
144
|
+
let dataPromise = null;
|
|
145
|
+
const getData = () => {
|
|
146
|
+
if (!dataPromise) {
|
|
147
|
+
dataPromise = zipEntry.buffer();
|
|
148
|
+
}
|
|
149
|
+
return dataPromise;
|
|
150
|
+
};
|
|
151
|
+
const shouldContinue = await callback(zipEntry.path, getData, zipEntry);
|
|
152
|
+
// If callback didn't read data, drain it
|
|
153
|
+
if (!dataPromise) {
|
|
154
|
+
zipEntry.autodrain();
|
|
155
|
+
}
|
|
156
|
+
if (shouldContinue === false) {
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
@@ -10,3 +10,5 @@ export { bufferStream } from "./buffer-stream.js";
|
|
|
10
10
|
export { parse as parseBuffer } from "./parse-buffer.js";
|
|
11
11
|
export { parseDateTime } from "./parse-datetime.js";
|
|
12
12
|
export { parseExtraField } from "./parse-extra-field.js";
|
|
13
|
+
// Simple extraction API
|
|
14
|
+
export { extractAll, extractFile, listFiles, forEachEntry } from "./extract.js";
|
|
@@ -4,6 +4,8 @@ const OPEN_ANGLE = "<";
|
|
|
4
4
|
const CLOSE_ANGLE = ">";
|
|
5
5
|
const OPEN_ANGLE_SLASH = "</";
|
|
6
6
|
const CLOSE_SLASH_ANGLE = "/>";
|
|
7
|
+
// Chunk size for periodic consolidation (reduces final join overhead)
|
|
8
|
+
const CHUNK_SIZE = 10000;
|
|
7
9
|
function pushAttribute(xml, name, value) {
|
|
8
10
|
xml.push(` ${name}="${xmlEncode(value.toString())}"`);
|
|
9
11
|
}
|
|
@@ -21,15 +23,23 @@ function pushAttributes(xml, attributes) {
|
|
|
21
23
|
class XmlStream {
|
|
22
24
|
constructor() {
|
|
23
25
|
this._xml = [];
|
|
26
|
+
this._chunks = [];
|
|
24
27
|
this._stack = [];
|
|
25
28
|
this._rollbacks = [];
|
|
26
29
|
}
|
|
30
|
+
_consolidate() {
|
|
31
|
+
// Periodically join small strings into larger chunks to reduce final join overhead
|
|
32
|
+
if (this._xml.length >= CHUNK_SIZE) {
|
|
33
|
+
this._chunks.push(this._xml.join(""));
|
|
34
|
+
this._xml = [];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
27
37
|
get tos() {
|
|
28
38
|
return this._stack.length ? this._stack[this._stack.length - 1] : undefined;
|
|
29
39
|
}
|
|
30
40
|
get cursor() {
|
|
31
41
|
// handy way to track whether anything has been added
|
|
32
|
-
return this._xml.length;
|
|
42
|
+
return this._chunks.length * CHUNK_SIZE + this._xml.length;
|
|
33
43
|
}
|
|
34
44
|
openXml(docAttributes) {
|
|
35
45
|
const xml = this._xml;
|
|
@@ -96,6 +106,7 @@ class XmlStream {
|
|
|
96
106
|
}
|
|
97
107
|
this.open = false;
|
|
98
108
|
this.leaf = false;
|
|
109
|
+
this._consolidate();
|
|
99
110
|
}
|
|
100
111
|
leafNode(name, attributes, text) {
|
|
101
112
|
this.openNode(name, attributes);
|
|
@@ -115,7 +126,8 @@ class XmlStream {
|
|
|
115
126
|
xml: this._xml.length,
|
|
116
127
|
stack: this._stack.length,
|
|
117
128
|
leaf: this.leaf,
|
|
118
|
-
open: this.open
|
|
129
|
+
open: this.open,
|
|
130
|
+
chunksLength: this._chunks.length
|
|
119
131
|
});
|
|
120
132
|
return this.cursor;
|
|
121
133
|
}
|
|
@@ -130,12 +142,22 @@ class XmlStream {
|
|
|
130
142
|
if (this._stack.length > r.stack) {
|
|
131
143
|
this._stack.splice(r.stack, this._stack.length - r.stack);
|
|
132
144
|
}
|
|
145
|
+
if (this._chunks.length > r.chunksLength) {
|
|
146
|
+
this._chunks.splice(r.chunksLength, this._chunks.length - r.chunksLength);
|
|
147
|
+
}
|
|
133
148
|
this.leaf = r.leaf;
|
|
134
149
|
this.open = r.open;
|
|
135
150
|
}
|
|
136
151
|
get xml() {
|
|
137
152
|
this.closeAll();
|
|
138
|
-
|
|
153
|
+
// Join chunks first, then remaining xml array
|
|
154
|
+
if (this._chunks.length === 0) {
|
|
155
|
+
return this._xml.join("");
|
|
156
|
+
}
|
|
157
|
+
if (this._xml.length > 0) {
|
|
158
|
+
this._chunks.push(this._xml.join(""));
|
|
159
|
+
}
|
|
160
|
+
return this._chunks.join("");
|
|
139
161
|
}
|
|
140
162
|
}
|
|
141
163
|
XmlStream.StdDocAttributes = {
|