xlsx-for-ai 1.5.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +12 -0
- package/lib/engine.js +1 -1
- package/lib/redactWorkbook.js +67 -5
- package/package.json +2 -2
package/index.js
CHANGED
|
@@ -381,6 +381,18 @@ function detectRegion(ws) {
|
|
|
381
381
|
const colCount = ws.columnCount;
|
|
382
382
|
if (rowCount === 0 || colCount === 0) return null;
|
|
383
383
|
|
|
384
|
+
// ExcelJS reports rowCount/columnCount as the highest USED row/column,
|
|
385
|
+
// not actual storage. A workbook with one cell at XFD1048576 reports
|
|
386
|
+
// 1048576 × 16384 = ~17B coordinates. Refuse the scan past 5M cells —
|
|
387
|
+
// pathological/malicious inputs would otherwise hang the CLI.
|
|
388
|
+
if (rowCount * colCount > 5_000_000) {
|
|
389
|
+
console.warn(
|
|
390
|
+
`detectRegion: workbook reports ${rowCount}×${colCount} cell dimensions, ` +
|
|
391
|
+
`exceeds 5M-cell scan cap; skipping region detection`
|
|
392
|
+
);
|
|
393
|
+
return null;
|
|
394
|
+
}
|
|
395
|
+
|
|
384
396
|
for (let r = 1; r <= rowCount; r++) {
|
|
385
397
|
const row = ws.getRow(r);
|
|
386
398
|
for (let c = 1; c <= colCount; c++) {
|
package/lib/engine.js
CHANGED
|
@@ -23,7 +23,7 @@ class ExcelJSEngine {
|
|
|
23
23
|
/** Engine identifier — useful for diagnostics. */
|
|
24
24
|
get name() { return 'exceljs'; }
|
|
25
25
|
get version() {
|
|
26
|
-
try { return require('exceljs/package.json').version; } catch (_) { return 'unknown'; }
|
|
26
|
+
try { return require('@protobi/exceljs/package.json').version; } catch (_) { return 'unknown'; }
|
|
27
27
|
}
|
|
28
28
|
|
|
29
29
|
/**
|
package/lib/redactWorkbook.js
CHANGED
|
@@ -117,17 +117,36 @@ function redactSharedStringsXml(xml) {
|
|
|
117
117
|
}
|
|
118
118
|
|
|
119
119
|
// Comments: <comment><text><r>...<t>USER TEXT</t></r></text></comment>
|
|
120
|
-
// Replace every <t> payload with "x".
|
|
120
|
+
// Replace every <t> payload with "x". Also strips <author>NAME</author>
|
|
121
|
+
// display names in <authors>; the numeric authorId on each <comment>
|
|
122
|
+
// references the (now redacted) author entry.
|
|
121
123
|
function redactCommentsXml(xml) {
|
|
122
|
-
|
|
124
|
+
let out = xml.replace(/(<t\b[^>]*>)([\s\S]*?)(<\/t>)/g, (m, open, payload, close) => {
|
|
125
|
+
return open + (payload === '' ? '' : 'x') + close;
|
|
126
|
+
});
|
|
127
|
+
out = out.replace(/(<author\b[^>]*>)([\s\S]*?)(<\/author>)/g, (m, open, payload, close) => {
|
|
123
128
|
return open + (payload === '' ? '' : 'x') + close;
|
|
124
129
|
});
|
|
130
|
+
return out;
|
|
125
131
|
}
|
|
126
132
|
|
|
127
133
|
// Threaded comments: <threadedComment ... text="USER TEXT" .../>
|
|
128
|
-
// Excel encodes the body as an attribute — must redact in place.
|
|
134
|
+
// Excel encodes the body as an attribute — must redact in place. Both
|
|
135
|
+
// double-quoted and single-quoted attribute values are valid XML and we
|
|
136
|
+
// must scrub both forms.
|
|
129
137
|
function redactThreadedCommentsXml(xml) {
|
|
130
|
-
return xml.replace(/\btext="[^"]*"/g, 'text="x"');
|
|
138
|
+
return xml.replace(/\btext=("[^"]*"|'[^']*')/g, 'text="x"');
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// xl/persons/person.xml — author registry for threaded comments.
|
|
142
|
+
// <person displayName="Alice" id="..." userId="alice@co.com" providerId="AzureAD"/>
|
|
143
|
+
// Strip the three identifying attributes; leave id (a UUID) so threaded comment
|
|
144
|
+
// authorId references still resolve.
|
|
145
|
+
function redactPersonsXml(xml) {
|
|
146
|
+
return xml
|
|
147
|
+
.replace(/\bdisplayName="[^"]*"/g, 'displayName="x"')
|
|
148
|
+
.replace(/\buserId="[^"]*"/g, 'userId="x"')
|
|
149
|
+
.replace(/\bproviderId="[^"]*"/g, 'providerId="x"');
|
|
131
150
|
}
|
|
132
151
|
|
|
133
152
|
// docProps/core.xml — strip author, title, subject, description, keywords,
|
|
@@ -196,9 +215,40 @@ function redactAppXml(xml) {
|
|
|
196
215
|
function redactCustomPropsXml(xml) {
|
|
197
216
|
// Custom property values live inside <vt:*> typed-value elements.
|
|
198
217
|
// Replace their inner text with empty string (preserves type nodes).
|
|
199
|
-
|
|
218
|
+
//
|
|
219
|
+
// The character class includes digits so OOXML numeric type names
|
|
220
|
+
// (vt:r4, vt:r8, vt:i1/i2/i4/i8, vt:ui1/ui2/ui4/ui8, vt:filetime) match.
|
|
221
|
+
// The \2 backreference forces the open and close tag names to match,
|
|
222
|
+
// so a payload that contains nested elements (e.g.
|
|
223
|
+
// <vt:variant><vt:lpwstr>X</vt:lpwstr></vt:variant>) doesn't produce
|
|
224
|
+
// mangled output. The inner text class [^<] keeps the match strictly
|
|
225
|
+
// text-only; the outer wrapper is left structurally intact and any
|
|
226
|
+
// nested vt:* elements get scrubbed by the same regex on overlapping
|
|
227
|
+
// passes.
|
|
228
|
+
return xml.replace(/(<(vt:[a-zA-Z0-9]+)\b[^>]*>)[^<]*(<\/\2>)/g, '$1$3');
|
|
200
229
|
}
|
|
201
230
|
|
|
231
|
+
// 1×1 transparent PNG — minimum valid PNG bytes. Used as a safe placeholder
|
|
232
|
+
// when stripping xl/media/ binary blobs so the ZIP remains structurally valid
|
|
233
|
+
// and drawing relationships don't point to missing entries.
|
|
234
|
+
// (96 bytes: PNG sig + IHDR + IDAT with one transparent pixel + IEND)
|
|
235
|
+
const TRANSPARENT_1X1_PNG = Buffer.from(
|
|
236
|
+
'89504e470d0a1a0a' + // PNG signature
|
|
237
|
+
'0000000d49484452' + // IHDR length + type
|
|
238
|
+
'00000001' + // width = 1
|
|
239
|
+
'00000001' + // height = 1
|
|
240
|
+
'08060000' + // 8-bit RGBA
|
|
241
|
+
'001f15c4' + // IHDR CRC
|
|
242
|
+
'89' + // IHDR chunk footer padding
|
|
243
|
+
'0000000a49444154' + // IDAT length + type
|
|
244
|
+
'789c6260' + // zlib header + deflate block
|
|
245
|
+
'0000000200' + // deflate end
|
|
246
|
+
'01e221bc33' + // IDAT CRC
|
|
247
|
+
'0000000049454e44' + // IEND length + type
|
|
248
|
+
'ae426082', // IEND CRC
|
|
249
|
+
'hex',
|
|
250
|
+
);
|
|
251
|
+
|
|
202
252
|
async function exportRedactedWorkbook(inputPath, outputPath) {
|
|
203
253
|
if (!fs.existsSync(inputPath)) {
|
|
204
254
|
throw new Error(`File not found: ${inputPath}`);
|
|
@@ -238,6 +288,14 @@ async function exportRedactedWorkbook(inputPath, outputPath) {
|
|
|
238
288
|
} else if (/^docProps\/custom\.xml$/i.test(name)) {
|
|
239
289
|
const xml = await file.async('string');
|
|
240
290
|
zip.file(name, redactCustomPropsXml(xml));
|
|
291
|
+
} else if (/^xl\/persons\/person\.xml$/i.test(name)) {
|
|
292
|
+
const xml = await file.async('string');
|
|
293
|
+
zip.file(name, redactPersonsXml(xml));
|
|
294
|
+
} else if (/^xl\/media\//i.test(name)) {
|
|
295
|
+
// Embedded images / media — replace with a 1×1 transparent PNG so
|
|
296
|
+
// drawing relationships remain valid and the ZIP is structurally intact,
|
|
297
|
+
// but no user-supplied binary data survives in the output.
|
|
298
|
+
zip.file(name, TRANSPARENT_1X1_PNG);
|
|
241
299
|
}
|
|
242
300
|
// All other parts pass through untouched.
|
|
243
301
|
}
|
|
@@ -258,7 +316,11 @@ module.exports = {
|
|
|
258
316
|
// exported for unit testing
|
|
259
317
|
_redactSheetXml: redactSheetXml,
|
|
260
318
|
_redactSharedStringsXml: redactSharedStringsXml,
|
|
319
|
+
_redactCommentsXml: redactCommentsXml,
|
|
320
|
+
_redactThreadedCommentsXml: redactThreadedCommentsXml,
|
|
261
321
|
_redactCoreXml: redactCoreXml,
|
|
262
322
|
_redactAppXml: redactAppXml,
|
|
263
323
|
_redactCustomPropsXml: redactCustomPropsXml,
|
|
324
|
+
_redactPersonsXml: redactPersonsXml,
|
|
325
|
+
_TRANSPARENT_1X1_PNG: TRANSPARENT_1X1_PNG,
|
|
264
326
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xlsx-for-ai",
|
|
3
|
-
"version": "1.5.
|
|
3
|
+
"version": "1.5.2",
|
|
4
4
|
"description": "CLI that converts .xlsx files into rich text or JSON dumps that AI coding agents (Claude, Cursor, Copilot, ChatGPT, etc.) can read — preserving values, formulas, formatting, colors, column widths, frozen panes, named ranges, tables, and more.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"LICENSE"
|
|
18
18
|
],
|
|
19
19
|
"scripts": {
|
|
20
|
-
"test": "node --test test/round-trip.test.js test/output-matrix.test.js test/unit/*.test.js tests/telemetry-sanitize.test.js tests/telemetry-config.test.js tests/telemetry-consent-version.test.js tests/telemetry-flags.test.js"
|
|
20
|
+
"test": "node --test test/round-trip.test.js test/output-matrix.test.js test/unit/*.test.js tests/telemetry-sanitize.test.js tests/telemetry-config.test.js tests/telemetry-consent-version.test.js tests/telemetry-flags.test.js tests/redactWorkbook-leak-check.test.js"
|
|
21
21
|
},
|
|
22
22
|
"keywords": [
|
|
23
23
|
"xlsx",
|