@cj-tech-master/excelts 1.4.2 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/README_zh.md +3 -3
- package/dist/browser/excelts.iife.js +8135 -2722
- package/dist/browser/excelts.iife.js.map +1 -1
- package/dist/browser/excelts.iife.min.js +86 -23
- package/dist/cjs/stream/xlsx/workbook-writer.js +3 -2
- package/dist/cjs/utils/cell-format.js +13 -9
- package/dist/cjs/utils/sheet-utils.js +125 -15
- package/dist/cjs/utils/unzip/extract.js +166 -0
- package/dist/cjs/utils/unzip/index.js +7 -1
- package/dist/cjs/utils/xml-stream.js +25 -3
- package/dist/cjs/utils/zip/compress.js +261 -0
- package/dist/cjs/utils/zip/crc32.js +154 -0
- package/dist/cjs/utils/zip/index.js +70 -0
- package/dist/cjs/utils/zip/zip-builder.js +378 -0
- package/dist/cjs/utils/zip-stream.js +30 -34
- package/dist/cjs/xlsx/xform/book/defined-name-xform.js +36 -2
- package/dist/cjs/xlsx/xform/list-xform.js +6 -0
- package/dist/cjs/xlsx/xform/sheet/cell-xform.js +6 -1
- package/dist/cjs/xlsx/xform/sheet/row-xform.js +24 -2
- package/dist/cjs/xlsx/xform/table/filter-column-xform.js +4 -0
- package/dist/esm/stream/xlsx/workbook-writer.js +3 -2
- package/dist/esm/utils/cell-format.js +13 -9
- package/dist/esm/utils/sheet-utils.js +125 -15
- package/dist/esm/utils/unzip/extract.js +160 -0
- package/dist/esm/utils/unzip/index.js +2 -0
- package/dist/esm/utils/xml-stream.js +25 -3
- package/dist/esm/utils/zip/compress.js +220 -0
- package/dist/esm/utils/zip/crc32.js +116 -0
- package/dist/esm/utils/zip/index.js +55 -0
- package/dist/esm/utils/zip/zip-builder.js +372 -0
- package/dist/esm/utils/zip-stream.js +30 -34
- package/dist/esm/xlsx/xform/book/defined-name-xform.js +36 -2
- package/dist/esm/xlsx/xform/list-xform.js +6 -0
- package/dist/esm/xlsx/xform/sheet/cell-xform.js +6 -1
- package/dist/esm/xlsx/xform/sheet/row-xform.js +24 -2
- package/dist/esm/xlsx/xform/table/filter-column-xform.js +4 -0
- package/dist/types/utils/sheet-utils.d.ts +8 -2
- package/dist/types/utils/unzip/extract.d.ts +92 -0
- package/dist/types/utils/unzip/index.d.ts +1 -0
- package/dist/types/utils/xml-stream.d.ts +2 -0
- package/dist/types/utils/zip/compress.d.ts +83 -0
- package/dist/types/utils/zip/crc32.d.ts +55 -0
- package/dist/types/utils/zip/index.d.ts +52 -0
- package/dist/types/utils/zip/zip-builder.d.ts +110 -0
- package/dist/types/utils/zip-stream.d.ts +6 -12
- package/dist/types/xlsx/xform/list-xform.d.ts +1 -0
- package/dist/types/xlsx/xform/sheet/row-xform.d.ts +2 -0
- package/package.json +1 -1
|
@@ -37,8 +37,9 @@ class WorkbookWriter {
|
|
|
37
37
|
this.views = [];
|
|
38
38
|
this.zipOptions = options.zip;
|
|
39
39
|
// Extract compression level from zip options (supports both zlib.level and compressionOptions.level)
|
|
40
|
-
// Default compression level is
|
|
41
|
-
|
|
40
|
+
// Default compression level is 1 (fast compression with good ratio)
|
|
41
|
+
// Level 1 is ~2x faster than level 6 with only ~7% larger files
|
|
42
|
+
const level = options.zip?.zlib?.level ?? options.zip?.compressionOptions?.level ?? 1;
|
|
42
43
|
this.compressionLevel = Math.max(0, Math.min(9, level));
|
|
43
44
|
this.media = [];
|
|
44
45
|
this.commentRefs = [];
|
|
@@ -221,20 +221,24 @@ const DAYS_LONG = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Frid
|
|
|
221
221
|
* @param fmt Format string
|
|
222
222
|
*/
|
|
223
223
|
function formatDate(serial, fmt) {
|
|
224
|
+
// Extract time components directly from serial number (timezone-agnostic)
|
|
225
|
+
const totalSeconds = Math.round(serial * 86400);
|
|
226
|
+
const timeOfDay = totalSeconds % 86400;
|
|
227
|
+
const hours = Math.floor(timeOfDay / 3600);
|
|
228
|
+
const minutes = Math.floor((timeOfDay % 3600) / 60);
|
|
229
|
+
const seconds = timeOfDay % 60;
|
|
230
|
+
// For date components, use excelToDate but only for date parts
|
|
224
231
|
const date = (0, utils_js_1.excelToDate)(serial, false);
|
|
225
|
-
const year = date.
|
|
226
|
-
const month = date.
|
|
227
|
-
const day = date.
|
|
228
|
-
const
|
|
229
|
-
const minutes = date.getMinutes();
|
|
230
|
-
const seconds = date.getSeconds();
|
|
231
|
-
const dayOfWeek = date.getDay();
|
|
232
|
+
const year = date.getUTCFullYear();
|
|
233
|
+
const month = date.getUTCMonth(); // 0-indexed
|
|
234
|
+
const day = date.getUTCDate();
|
|
235
|
+
const dayOfWeek = date.getUTCDay();
|
|
232
236
|
// Calculate fractional seconds from serial
|
|
233
|
-
const
|
|
234
|
-
const fractionalSeconds = totalSeconds - Math.floor(totalSeconds);
|
|
237
|
+
const fractionalSeconds = serial * 86400 - Math.floor(serial * 86400);
|
|
235
238
|
// Check for AM/PM
|
|
236
239
|
const hasAmPm = /AM\/PM|A\/P/i.test(fmt);
|
|
237
240
|
const isPm = hours >= 12;
|
|
241
|
+
// Standard 12-hour format: 0 and 12 both display as 12
|
|
238
242
|
const hours12 = hours % 12 || 12;
|
|
239
243
|
// Remove color codes like [Red], [Green], etc. but keep elapsed time brackets
|
|
240
244
|
let result = fmt.replace(/\[(Red|Green|Blue|Yellow|Magenta|Cyan|White|Black|Color\d+)\]/gi, "");
|
|
@@ -24,30 +24,140 @@ exports.sheetAddAoa = sheetAddAoa;
|
|
|
24
24
|
exports.sheetToAoa = sheetToAoa;
|
|
25
25
|
const workbook_js_1 = require("../doc/workbook");
|
|
26
26
|
const col_cache_js_1 = require("./col-cache");
|
|
27
|
-
const utils_js_1 = require("./utils");
|
|
28
27
|
const cell_format_js_1 = require("./cell-format");
|
|
28
|
+
/**
|
|
29
|
+
* Convert a Date object back to Excel serial number without timezone issues.
|
|
30
|
+
* This reverses the excelToDate conversion exactly.
|
|
31
|
+
* excelToDate uses: new Date(Math.round((v - 25569) * 24 * 3600 * 1000))
|
|
32
|
+
* So we reverse it: (date.getTime() / (24 * 3600 * 1000)) + 25569
|
|
33
|
+
*/
|
|
34
|
+
function dateToExcelSerial(d) {
|
|
35
|
+
return d.getTime() / (24 * 3600 * 1000) + 25569;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Check if format is a pure time format (no date components like y, m for month, d)
|
|
39
|
+
* Time formats only contain: h, m (minutes in time context), s, AM/PM
|
|
40
|
+
* Excludes elapsed time formats like [h]:mm:ss which should keep full serial number
|
|
41
|
+
*/
|
|
42
|
+
function isTimeOnlyFormat(fmt) {
|
|
43
|
+
// Remove quoted strings first
|
|
44
|
+
const cleaned = fmt.replace(/"[^"]*"/g, "");
|
|
45
|
+
// Elapsed time formats [h], [m], [s] should NOT be treated as time-only
|
|
46
|
+
// They need the full serial number to calculate total hours/minutes/seconds
|
|
47
|
+
if (/\[[hms]\]/i.test(cleaned)) {
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
// Remove color codes and conditions (but we already checked for [h], [m], [s])
|
|
51
|
+
const withoutBrackets = cleaned.replace(/\[[^\]]*\]/g, "");
|
|
52
|
+
// Check if it has time components (h, s, or AM/PM)
|
|
53
|
+
const hasTimeComponents = /[hs]/i.test(withoutBrackets) || /AM\/PM|A\/P/i.test(withoutBrackets);
|
|
54
|
+
// Check if it has date components (y, d, or m not adjacent to h/s which would make it minutes)
|
|
55
|
+
// In Excel: "m" after "h" or before "s" is minutes, otherwise it's month
|
|
56
|
+
const hasDateComponents = /[yd]/i.test(withoutBrackets);
|
|
57
|
+
// If it has time but no date components, it's a time-only format
|
|
58
|
+
// Also check for standalone 'm' that's not minutes (not near h or s)
|
|
59
|
+
if (hasDateComponents) {
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
// Check for month 'm' - if 'm' exists but not in h:m or m:s context, it's a date format
|
|
63
|
+
if (/m/i.test(withoutBrackets) && !hasTimeComponents) {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
return hasTimeComponents;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Check if format is a date format (contains y, d, or month-m)
|
|
70
|
+
* Used to determine if dateFormat override should be applied
|
|
71
|
+
*/
|
|
72
|
+
function isDateFormat(fmt) {
|
|
73
|
+
// Remove quoted strings first
|
|
74
|
+
const cleaned = fmt.replace(/"[^"]*"/g, "");
|
|
75
|
+
// Elapsed time formats [h], [m], [s] are NOT date formats
|
|
76
|
+
if (/\[[hms]\]/i.test(cleaned)) {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
// Remove color codes and conditions
|
|
80
|
+
const withoutBrackets = cleaned.replace(/\[[^\]]*\]/g, "");
|
|
81
|
+
// Check for year or day components
|
|
82
|
+
if (/[yd]/i.test(withoutBrackets)) {
|
|
83
|
+
return true;
|
|
84
|
+
}
|
|
85
|
+
// Check for month 'm' - only if it's NOT in time context (not near h or s)
|
|
86
|
+
// In Excel: "m" after "h" or before "s" is minutes, otherwise it's month
|
|
87
|
+
if (/m/i.test(withoutBrackets)) {
|
|
88
|
+
const hasTimeComponents = /[hs]/i.test(withoutBrackets) || /AM\/PM|A\/P/i.test(withoutBrackets);
|
|
89
|
+
// If no time components, 'm' is month
|
|
90
|
+
if (!hasTimeComponents) {
|
|
91
|
+
return true;
|
|
92
|
+
}
|
|
93
|
+
// If has time components, need to check if 'm' is month or minutes
|
|
94
|
+
// Simplified: if format has both date-like and time-like patterns, consider it a date format
|
|
95
|
+
// e.g., "m/d/yy h:mm" - has 'm' as month and 'mm' as minutes
|
|
96
|
+
}
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Format a value (Date, number, boolean, string) according to the given format
|
|
101
|
+
* Handles timezone-independent conversion for Date objects
|
|
102
|
+
* @param value - The value to format
|
|
103
|
+
* @param fmt - The format string to use
|
|
104
|
+
* @param dateFormat - Optional override format for date values (not applied to time or elapsed time formats)
|
|
105
|
+
*/
|
|
106
|
+
function formatValue(value, fmt, dateFormat) {
|
|
107
|
+
// Date object - convert back to Excel serial number
|
|
108
|
+
if (value instanceof Date) {
|
|
109
|
+
let serial = dateToExcelSerial(value);
|
|
110
|
+
// For time-only formats, use only the fractional part (time portion)
|
|
111
|
+
if (isTimeOnlyFormat(fmt)) {
|
|
112
|
+
serial = serial % 1;
|
|
113
|
+
if (serial < 0) {
|
|
114
|
+
serial += 1;
|
|
115
|
+
}
|
|
116
|
+
return (0, cell_format_js_1.format)(fmt, serial);
|
|
117
|
+
}
|
|
118
|
+
// Only apply dateFormat override to actual date formats
|
|
119
|
+
// (not elapsed time formats like [h]:mm:ss)
|
|
120
|
+
const actualFmt = dateFormat && isDateFormat(fmt) ? dateFormat : fmt;
|
|
121
|
+
return (0, cell_format_js_1.format)(actualFmt, serial);
|
|
122
|
+
}
|
|
123
|
+
// Number/Boolean/String - let cellFormat handle it
|
|
124
|
+
return (0, cell_format_js_1.format)(fmt, value);
|
|
125
|
+
}
|
|
29
126
|
/**
|
|
30
127
|
* Get formatted display text for a cell value
|
|
31
128
|
* Returns the value formatted according to the cell's numFmt
|
|
32
|
-
* This matches Excel's display exactly
|
|
129
|
+
* This matches Excel's display exactly (timezone-independent)
|
|
130
|
+
* @param cell - The cell to get display text for
|
|
131
|
+
* @param dateFormat - Optional override format for date values
|
|
33
132
|
*/
|
|
34
|
-
function getCellDisplayText(cell) {
|
|
133
|
+
function getCellDisplayText(cell, dateFormat) {
|
|
35
134
|
const value = cell.value;
|
|
36
135
|
const fmt = cell.numFmt || "General";
|
|
37
136
|
// Null/undefined
|
|
38
137
|
if (value == null) {
|
|
39
138
|
return "";
|
|
40
139
|
}
|
|
41
|
-
// Date
|
|
42
|
-
if (value instanceof Date
|
|
43
|
-
|
|
44
|
-
|
|
140
|
+
// Date/Number/Boolean/String - format directly
|
|
141
|
+
if (value instanceof Date ||
|
|
142
|
+
typeof value === "number" ||
|
|
143
|
+
typeof value === "boolean" ||
|
|
144
|
+
typeof value === "string") {
|
|
145
|
+
return formatValue(value, fmt, dateFormat);
|
|
45
146
|
}
|
|
46
|
-
//
|
|
47
|
-
if (typeof value === "
|
|
48
|
-
|
|
147
|
+
// Formula type - use the result value
|
|
148
|
+
if (typeof value === "object" && "formula" in value) {
|
|
149
|
+
const result = value.result;
|
|
150
|
+
if (result == null) {
|
|
151
|
+
return "";
|
|
152
|
+
}
|
|
153
|
+
if (result instanceof Date ||
|
|
154
|
+
typeof result === "number" ||
|
|
155
|
+
typeof result === "boolean" ||
|
|
156
|
+
typeof result === "string") {
|
|
157
|
+
return formatValue(result, fmt, dateFormat);
|
|
158
|
+
}
|
|
49
159
|
}
|
|
50
|
-
// Fallback to cell.text for other types (rich text, hyperlink, error,
|
|
160
|
+
// Fallback to cell.text for other types (rich text, hyperlink, error, etc.)
|
|
51
161
|
return cell.text;
|
|
52
162
|
}
|
|
53
163
|
// =============================================================================
|
|
@@ -283,7 +393,7 @@ function sheetToJson(worksheet, opts) {
|
|
|
283
393
|
let isEmpty = true;
|
|
284
394
|
for (let col = startCol; col <= endCol; col++) {
|
|
285
395
|
const cell = worksheet.getCell(row, col);
|
|
286
|
-
const val = o.raw === false ? getCellDisplayText(cell).trim() : cell.value;
|
|
396
|
+
const val = o.raw === false ? getCellDisplayText(cell, o.dateFormat).trim() : cell.value;
|
|
287
397
|
if (val != null && val !== "") {
|
|
288
398
|
rowData[col - startCol] = val;
|
|
289
399
|
isEmpty = false;
|
|
@@ -311,7 +421,7 @@ function sheetToJson(worksheet, opts) {
|
|
|
311
421
|
let isEmpty = true;
|
|
312
422
|
for (let col = startCol; col <= endCol; col++) {
|
|
313
423
|
const cell = worksheet.getCell(row, col);
|
|
314
|
-
const val = o.raw === false ? getCellDisplayText(cell).trim() : cell.value;
|
|
424
|
+
const val = o.raw === false ? getCellDisplayText(cell, o.dateFormat).trim() : cell.value;
|
|
315
425
|
const key = encodeCol(col - 1); // 0-indexed for encodeCol
|
|
316
426
|
if (val != null && val !== "") {
|
|
317
427
|
rowData[key] = val;
|
|
@@ -338,7 +448,7 @@ function sheetToJson(worksheet, opts) {
|
|
|
338
448
|
const colIdx = col - startCol;
|
|
339
449
|
const key = headerOpt[colIdx] ?? `__EMPTY_${colIdx}`;
|
|
340
450
|
const cell = worksheet.getCell(row, col);
|
|
341
|
-
const val = o.raw === false ? getCellDisplayText(cell).trim() : cell.value;
|
|
451
|
+
const val = o.raw === false ? getCellDisplayText(cell, o.dateFormat).trim() : cell.value;
|
|
342
452
|
if (val != null && val !== "") {
|
|
343
453
|
rowData[key] = val;
|
|
344
454
|
isEmpty = false;
|
|
@@ -380,7 +490,7 @@ function sheetToJson(worksheet, opts) {
|
|
|
380
490
|
let isEmpty = true;
|
|
381
491
|
for (let col = startCol; col <= endCol; col++) {
|
|
382
492
|
const cell = worksheet.getCell(row, col);
|
|
383
|
-
const val = o.raw === false ? getCellDisplayText(cell).trim() : cell.value;
|
|
493
|
+
const val = o.raw === false ? getCellDisplayText(cell, o.dateFormat).trim() : cell.value;
|
|
384
494
|
const key = headers[col - startCol];
|
|
385
495
|
if (val != null && val !== "") {
|
|
386
496
|
rowData[key] = val;
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Simple ZIP extraction utilities
|
|
4
|
+
* Provides easy-to-use Promise-based API for extracting ZIP files
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.extractAll = extractAll;
|
|
8
|
+
exports.extractFile = extractFile;
|
|
9
|
+
exports.listFiles = listFiles;
|
|
10
|
+
exports.forEachEntry = forEachEntry;
|
|
11
|
+
const stream_1 = require("stream");
|
|
12
|
+
const parse_js_1 = require("./parse");
|
|
13
|
+
/**
|
|
14
|
+
* Extract all files from a ZIP buffer
|
|
15
|
+
*
|
|
16
|
+
* @param zipData - ZIP file data as Buffer or Uint8Array
|
|
17
|
+
* @returns Map of file paths to their content
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```ts
|
|
21
|
+
* import { extractAll } from "./utils/unzip/extract.js";
|
|
22
|
+
*
|
|
23
|
+
* const zipData = fs.readFileSync("archive.zip");
|
|
24
|
+
* const files = await extractAll(zipData);
|
|
25
|
+
*
|
|
26
|
+
* for (const [path, file] of files) {
|
|
27
|
+
* console.log(`${path}: ${file.data.length} bytes`);
|
|
28
|
+
* }
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
async function extractAll(zipData) {
|
|
32
|
+
const files = new Map();
|
|
33
|
+
const buffer = Buffer.isBuffer(zipData) ? zipData : Buffer.from(zipData);
|
|
34
|
+
const parse = (0, parse_js_1.createParse)({ forceStream: true });
|
|
35
|
+
const stream = stream_1.Readable.from([buffer]);
|
|
36
|
+
stream.pipe(parse);
|
|
37
|
+
for await (const entry of parse) {
|
|
38
|
+
const zipEntry = entry;
|
|
39
|
+
const isDirectory = zipEntry.type === "Directory";
|
|
40
|
+
if (isDirectory) {
|
|
41
|
+
files.set(zipEntry.path, {
|
|
42
|
+
path: zipEntry.path,
|
|
43
|
+
data: Buffer.alloc(0),
|
|
44
|
+
isDirectory: true,
|
|
45
|
+
size: 0
|
|
46
|
+
});
|
|
47
|
+
zipEntry.autodrain();
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
const data = await zipEntry.buffer();
|
|
51
|
+
files.set(zipEntry.path, {
|
|
52
|
+
path: zipEntry.path,
|
|
53
|
+
data,
|
|
54
|
+
isDirectory: false,
|
|
55
|
+
size: data.length
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return files;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Extract a single file from a ZIP buffer
|
|
63
|
+
*
|
|
64
|
+
* @param zipData - ZIP file data as Buffer or Uint8Array
|
|
65
|
+
* @param filePath - Path of the file to extract
|
|
66
|
+
* @returns File content as Buffer, or null if not found
|
|
67
|
+
*
|
|
68
|
+
* @example
|
|
69
|
+
* ```ts
|
|
70
|
+
* import { extractFile } from "./utils/unzip/extract.js";
|
|
71
|
+
*
|
|
72
|
+
* const zipData = fs.readFileSync("archive.zip");
|
|
73
|
+
* const content = await extractFile(zipData, "readme.txt");
|
|
74
|
+
* if (content) {
|
|
75
|
+
* console.log(content.toString("utf-8"));
|
|
76
|
+
* }
|
|
77
|
+
* ```
|
|
78
|
+
*/
|
|
79
|
+
async function extractFile(zipData, filePath) {
|
|
80
|
+
const buffer = Buffer.isBuffer(zipData) ? zipData : Buffer.from(zipData);
|
|
81
|
+
const parse = (0, parse_js_1.createParse)({ forceStream: true });
|
|
82
|
+
const stream = stream_1.Readable.from([buffer]);
|
|
83
|
+
stream.pipe(parse);
|
|
84
|
+
for await (const entry of parse) {
|
|
85
|
+
const zipEntry = entry;
|
|
86
|
+
if (zipEntry.path === filePath) {
|
|
87
|
+
if (zipEntry.type === "Directory") {
|
|
88
|
+
return Buffer.alloc(0);
|
|
89
|
+
}
|
|
90
|
+
return zipEntry.buffer();
|
|
91
|
+
}
|
|
92
|
+
zipEntry.autodrain();
|
|
93
|
+
}
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* List all file paths in a ZIP buffer (without extracting content)
|
|
98
|
+
*
|
|
99
|
+
* @param zipData - ZIP file data as Buffer or Uint8Array
|
|
100
|
+
* @returns Array of file paths
|
|
101
|
+
*
|
|
102
|
+
* @example
|
|
103
|
+
* ```ts
|
|
104
|
+
* import { listFiles } from "./utils/unzip/extract.js";
|
|
105
|
+
*
|
|
106
|
+
* const zipData = fs.readFileSync("archive.zip");
|
|
107
|
+
* const paths = await listFiles(zipData);
|
|
108
|
+
* console.log(paths); // ["file1.txt", "folder/file2.txt", ...]
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
async function listFiles(zipData) {
|
|
112
|
+
const paths = [];
|
|
113
|
+
const buffer = Buffer.isBuffer(zipData) ? zipData : Buffer.from(zipData);
|
|
114
|
+
const parse = (0, parse_js_1.createParse)({ forceStream: true });
|
|
115
|
+
const stream = stream_1.Readable.from([buffer]);
|
|
116
|
+
stream.pipe(parse);
|
|
117
|
+
for await (const entry of parse) {
|
|
118
|
+
const zipEntry = entry;
|
|
119
|
+
paths.push(zipEntry.path);
|
|
120
|
+
zipEntry.autodrain();
|
|
121
|
+
}
|
|
122
|
+
return paths;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Iterate over ZIP entries with a callback (memory efficient for large ZIPs)
|
|
126
|
+
*
|
|
127
|
+
* @param zipData - ZIP file data as Buffer or Uint8Array
|
|
128
|
+
* @param callback - Async callback for each entry, return false to stop iteration
|
|
129
|
+
*
|
|
130
|
+
* @example
|
|
131
|
+
* ```ts
|
|
132
|
+
* import { forEachEntry } from "./utils/unzip/extract.js";
|
|
133
|
+
*
|
|
134
|
+
* await forEachEntry(zipData, async (path, getData) => {
|
|
135
|
+
* if (path.endsWith(".xml")) {
|
|
136
|
+
* const content = await getData();
|
|
137
|
+
* console.log(content.toString("utf-8"));
|
|
138
|
+
* }
|
|
139
|
+
* return true; // continue iteration
|
|
140
|
+
* });
|
|
141
|
+
* ```
|
|
142
|
+
*/
|
|
143
|
+
async function forEachEntry(zipData, callback) {
|
|
144
|
+
const buffer = Buffer.isBuffer(zipData) ? zipData : Buffer.from(zipData);
|
|
145
|
+
const parse = (0, parse_js_1.createParse)({ forceStream: true });
|
|
146
|
+
const stream = stream_1.Readable.from([buffer]);
|
|
147
|
+
stream.pipe(parse);
|
|
148
|
+
for await (const entry of parse) {
|
|
149
|
+
const zipEntry = entry;
|
|
150
|
+
let dataPromise = null;
|
|
151
|
+
const getData = () => {
|
|
152
|
+
if (!dataPromise) {
|
|
153
|
+
dataPromise = zipEntry.buffer();
|
|
154
|
+
}
|
|
155
|
+
return dataPromise;
|
|
156
|
+
};
|
|
157
|
+
const shouldContinue = await callback(zipEntry.path, getData, zipEntry);
|
|
158
|
+
// If callback didn't read data, drain it
|
|
159
|
+
if (!dataPromise) {
|
|
160
|
+
zipEntry.autodrain();
|
|
161
|
+
}
|
|
162
|
+
if (shouldContinue === false) {
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* License: MIT
|
|
6
6
|
*/
|
|
7
7
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
-
exports.parseExtraField = exports.parseDateTime = exports.parseBuffer = exports.bufferStream = exports.NoopStream = exports.PullStream = exports.createParse = exports.Parse = void 0;
|
|
8
|
+
exports.forEachEntry = exports.listFiles = exports.extractFile = exports.extractAll = exports.parseExtraField = exports.parseDateTime = exports.parseBuffer = exports.bufferStream = exports.NoopStream = exports.PullStream = exports.createParse = exports.Parse = void 0;
|
|
9
9
|
var parse_js_1 = require("./parse");
|
|
10
10
|
Object.defineProperty(exports, "Parse", { enumerable: true, get: function () { return parse_js_1.Parse; } });
|
|
11
11
|
Object.defineProperty(exports, "createParse", { enumerable: true, get: function () { return parse_js_1.createParse; } });
|
|
@@ -21,3 +21,9 @@ var parse_datetime_js_1 = require("./parse-datetime");
|
|
|
21
21
|
Object.defineProperty(exports, "parseDateTime", { enumerable: true, get: function () { return parse_datetime_js_1.parseDateTime; } });
|
|
22
22
|
var parse_extra_field_js_1 = require("./parse-extra-field");
|
|
23
23
|
Object.defineProperty(exports, "parseExtraField", { enumerable: true, get: function () { return parse_extra_field_js_1.parseExtraField; } });
|
|
24
|
+
// Simple extraction API
|
|
25
|
+
var extract_js_1 = require("./extract");
|
|
26
|
+
Object.defineProperty(exports, "extractAll", { enumerable: true, get: function () { return extract_js_1.extractAll; } });
|
|
27
|
+
Object.defineProperty(exports, "extractFile", { enumerable: true, get: function () { return extract_js_1.extractFile; } });
|
|
28
|
+
Object.defineProperty(exports, "listFiles", { enumerable: true, get: function () { return extract_js_1.listFiles; } });
|
|
29
|
+
Object.defineProperty(exports, "forEachEntry", { enumerable: true, get: function () { return extract_js_1.forEachEntry; } });
|
|
@@ -7,6 +7,8 @@ const OPEN_ANGLE = "<";
|
|
|
7
7
|
const CLOSE_ANGLE = ">";
|
|
8
8
|
const OPEN_ANGLE_SLASH = "</";
|
|
9
9
|
const CLOSE_SLASH_ANGLE = "/>";
|
|
10
|
+
// Chunk size for periodic consolidation (reduces final join overhead)
|
|
11
|
+
const CHUNK_SIZE = 10000;
|
|
10
12
|
function pushAttribute(xml, name, value) {
|
|
11
13
|
xml.push(` ${name}="${(0, utils_js_1.xmlEncode)(value.toString())}"`);
|
|
12
14
|
}
|
|
@@ -24,15 +26,23 @@ function pushAttributes(xml, attributes) {
|
|
|
24
26
|
class XmlStream {
|
|
25
27
|
constructor() {
|
|
26
28
|
this._xml = [];
|
|
29
|
+
this._chunks = [];
|
|
27
30
|
this._stack = [];
|
|
28
31
|
this._rollbacks = [];
|
|
29
32
|
}
|
|
33
|
+
_consolidate() {
|
|
34
|
+
// Periodically join small strings into larger chunks to reduce final join overhead
|
|
35
|
+
if (this._xml.length >= CHUNK_SIZE) {
|
|
36
|
+
this._chunks.push(this._xml.join(""));
|
|
37
|
+
this._xml = [];
|
|
38
|
+
}
|
|
39
|
+
}
|
|
30
40
|
get tos() {
|
|
31
41
|
return this._stack.length ? this._stack[this._stack.length - 1] : undefined;
|
|
32
42
|
}
|
|
33
43
|
get cursor() {
|
|
34
44
|
// handy way to track whether anything has been added
|
|
35
|
-
return this._xml.length;
|
|
45
|
+
return this._chunks.length * CHUNK_SIZE + this._xml.length;
|
|
36
46
|
}
|
|
37
47
|
openXml(docAttributes) {
|
|
38
48
|
const xml = this._xml;
|
|
@@ -99,6 +109,7 @@ class XmlStream {
|
|
|
99
109
|
}
|
|
100
110
|
this.open = false;
|
|
101
111
|
this.leaf = false;
|
|
112
|
+
this._consolidate();
|
|
102
113
|
}
|
|
103
114
|
leafNode(name, attributes, text) {
|
|
104
115
|
this.openNode(name, attributes);
|
|
@@ -118,7 +129,8 @@ class XmlStream {
|
|
|
118
129
|
xml: this._xml.length,
|
|
119
130
|
stack: this._stack.length,
|
|
120
131
|
leaf: this.leaf,
|
|
121
|
-
open: this.open
|
|
132
|
+
open: this.open,
|
|
133
|
+
chunksLength: this._chunks.length
|
|
122
134
|
});
|
|
123
135
|
return this.cursor;
|
|
124
136
|
}
|
|
@@ -133,12 +145,22 @@ class XmlStream {
|
|
|
133
145
|
if (this._stack.length > r.stack) {
|
|
134
146
|
this._stack.splice(r.stack, this._stack.length - r.stack);
|
|
135
147
|
}
|
|
148
|
+
if (this._chunks.length > r.chunksLength) {
|
|
149
|
+
this._chunks.splice(r.chunksLength, this._chunks.length - r.chunksLength);
|
|
150
|
+
}
|
|
136
151
|
this.leaf = r.leaf;
|
|
137
152
|
this.open = r.open;
|
|
138
153
|
}
|
|
139
154
|
get xml() {
|
|
140
155
|
this.closeAll();
|
|
141
|
-
|
|
156
|
+
// Join chunks first, then remaining xml array
|
|
157
|
+
if (this._chunks.length === 0) {
|
|
158
|
+
return this._xml.join("");
|
|
159
|
+
}
|
|
160
|
+
if (this._xml.length > 0) {
|
|
161
|
+
this._chunks.push(this._xml.join(""));
|
|
162
|
+
}
|
|
163
|
+
return this._chunks.join("");
|
|
142
164
|
}
|
|
143
165
|
}
|
|
144
166
|
exports.XmlStream = XmlStream;
|