xlsx-for-ai 1.5.1 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -381,6 +381,18 @@ function detectRegion(ws) {
381
381
  const colCount = ws.columnCount;
382
382
  if (rowCount === 0 || colCount === 0) return null;
383
383
 
384
+ // ExcelJS reports rowCount/columnCount as the highest USED row/column,
385
+ // not actual storage. A workbook with one cell at XFD1048576 reports
386
+ // 1048576 × 16384 = ~17B coordinates. Refuse the scan past 5M cells —
387
+ // pathological/malicious inputs would otherwise hang the CLI.
388
+ if (rowCount * colCount > 5_000_000) {
389
+ console.warn(
390
+ `detectRegion: workbook reports ${rowCount}×${colCount} cell dimensions, ` +
391
+ `exceeds 5M-cell scan cap; skipping region detection`
392
+ );
393
+ return null;
394
+ }
395
+
384
396
  for (let r = 1; r <= rowCount; r++) {
385
397
  const row = ws.getRow(r);
386
398
  for (let c = 1; c <= colCount; c++) {
package/lib/engine.js CHANGED
@@ -23,7 +23,7 @@ class ExcelJSEngine {
23
23
  /** Engine identifier — useful for diagnostics. */
24
24
  get name() { return 'exceljs'; }
25
25
  get version() {
26
- try { return require('exceljs/package.json').version; } catch (_) { return 'unknown'; }
26
+ try { return require('@protobi/exceljs/package.json').version; } catch (_) { return 'unknown'; }
27
27
  }
28
28
 
29
29
  /**
@@ -117,17 +117,36 @@ function redactSharedStringsXml(xml) {
117
117
  }
118
118
 
119
119
  // Comments: <comment><text><r>...<t>USER TEXT</t></r></text></comment>
120
- // Replace every <t> payload with "x".
120
+ // Replace every <t> payload with "x". Also strips <author>NAME</author>
121
+ // display names in <authors>; the numeric authorId on each <comment>
122
+ // references the (now redacted) author entry.
121
123
  function redactCommentsXml(xml) {
122
- return xml.replace(/(<t\b[^>]*>)([\s\S]*?)(<\/t>)/g, (m, open, payload, close) => {
124
+ let out = xml.replace(/(<t\b[^>]*>)([\s\S]*?)(<\/t>)/g, (m, open, payload, close) => {
125
+ return open + (payload === '' ? '' : 'x') + close;
126
+ });
127
+ out = out.replace(/(<author\b[^>]*>)([\s\S]*?)(<\/author>)/g, (m, open, payload, close) => {
123
128
  return open + (payload === '' ? '' : 'x') + close;
124
129
  });
130
+ return out;
125
131
  }
126
132
 
127
133
  // Threaded comments: <threadedComment ... text="USER TEXT" .../>
128
- // Excel encodes the body as an attribute — must redact in place.
134
+ // Excel encodes the body as an attribute — must redact in place. Both
135
+ // double-quoted and single-quoted attribute values are valid XML and we
136
+ // must scrub both forms.
129
137
  function redactThreadedCommentsXml(xml) {
130
- return xml.replace(/\btext="[^"]*"/g, 'text="x"');
138
+ return xml.replace(/\btext=("[^"]*"|'[^']*')/g, 'text="x"');
139
+ }
140
+
141
+ // xl/persons/person.xml — author registry for threaded comments.
142
+ // <person displayName="Alice" id="..." userId="alice@co.com" providerId="AzureAD"/>
143
+ // Strip the three identifying attributes; leave id (a UUID) so threaded comment
144
+ // authorId references still resolve.
145
+ function redactPersonsXml(xml) {
146
+ return xml
147
+ .replace(/\bdisplayName="[^"]*"/g, 'displayName="x"')
148
+ .replace(/\buserId="[^"]*"/g, 'userId="x"')
149
+ .replace(/\bproviderId="[^"]*"/g, 'providerId="x"');
131
150
  }
132
151
 
133
152
  // docProps/core.xml — strip author, title, subject, description, keywords,
@@ -196,9 +215,40 @@ function redactAppXml(xml) {
196
215
  function redactCustomPropsXml(xml) {
197
216
  // Custom property values live inside <vt:*> typed-value elements.
198
217
  // Replace their inner text with empty string (preserves type nodes).
199
- return xml.replace(/(<vt:[a-zA-Z]+\b[^>]*>)[^<]*(.*?)(<\/vt:[a-zA-Z]+>)/g, '$1$3');
218
+ //
219
+ // The character class includes digits so OOXML numeric type names
220
+ // (vt:r4, vt:r8, vt:i1/i2/i4/i8, vt:ui1/ui2/ui4/ui8, vt:filetime) match.
221
+ // The \2 backreference forces the open and close tag names to match,
222
+ // so a payload that contains nested elements (e.g.
223
+ // <vt:variant><vt:lpwstr>X</vt:lpwstr></vt:variant>) doesn't produce
224
+ // mangled output. The inner text class [^<] keeps the match strictly
225
+ // text-only; the outer wrapper is left structurally intact and any
226
+ // nested vt:* elements get scrubbed by the same regex on overlapping
227
+ // passes.
228
+ return xml.replace(/(<(vt:[a-zA-Z0-9]+)\b[^>]*>)[^<]*(<\/\2>)/g, '$1$3');
200
229
  }
201
230
 
231
+ // 1×1 transparent PNG — minimum valid PNG bytes. Used as a safe placeholder
232
+ // when stripping xl/media/ binary blobs so the ZIP remains structurally valid
233
+ // and drawing relationships don't point to missing entries.
234
+ // (96 bytes: PNG sig + IHDR + IDAT with one transparent pixel + IEND)
235
+ const TRANSPARENT_1X1_PNG = Buffer.from(
236
+ '89504e470d0a1a0a' + // PNG signature
237
+ '0000000d49484452' + // IHDR length + type
238
+ '00000001' + // width = 1
239
+ '00000001' + // height = 1
240
+ '08060000' + // 8-bit RGBA
241
+ '001f15c4' + // IHDR CRC
242
+ '89' + // IHDR chunk footer padding
243
+ '0000000a49444154' + // IDAT length + type
244
+ '789c6260' + // zlib header + deflate block
245
+ '0000000200' + // deflate end
246
+ '01e221bc33' + // IDAT CRC
247
+ '0000000049454e44' + // IEND length + type
248
+ 'ae426082', // IEND CRC
249
+ 'hex',
250
+ );
251
+
202
252
  async function exportRedactedWorkbook(inputPath, outputPath) {
203
253
  if (!fs.existsSync(inputPath)) {
204
254
  throw new Error(`File not found: ${inputPath}`);
@@ -238,6 +288,14 @@ async function exportRedactedWorkbook(inputPath, outputPath) {
238
288
  } else if (/^docProps\/custom\.xml$/i.test(name)) {
239
289
  const xml = await file.async('string');
240
290
  zip.file(name, redactCustomPropsXml(xml));
291
+ } else if (/^xl\/persons\/person\.xml$/i.test(name)) {
292
+ const xml = await file.async('string');
293
+ zip.file(name, redactPersonsXml(xml));
294
+ } else if (/^xl\/media\//i.test(name)) {
295
+ // Embedded images / media — replace with a 1×1 transparent PNG so
296
+ // drawing relationships remain valid and the ZIP is structurally intact,
297
+ // but no user-supplied binary data survives in the output.
298
+ zip.file(name, TRANSPARENT_1X1_PNG);
241
299
  }
242
300
  // All other parts pass through untouched.
243
301
  }
@@ -258,7 +316,11 @@ module.exports = {
258
316
  // exported for unit testing
259
317
  _redactSheetXml: redactSheetXml,
260
318
  _redactSharedStringsXml: redactSharedStringsXml,
319
+ _redactCommentsXml: redactCommentsXml,
320
+ _redactThreadedCommentsXml: redactThreadedCommentsXml,
261
321
  _redactCoreXml: redactCoreXml,
262
322
  _redactAppXml: redactAppXml,
263
323
  _redactCustomPropsXml: redactCustomPropsXml,
324
+ _redactPersonsXml: redactPersonsXml,
325
+ _TRANSPARENT_1X1_PNG: TRANSPARENT_1X1_PNG,
264
326
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xlsx-for-ai",
3
- "version": "1.5.1",
3
+ "version": "1.5.2",
4
4
  "description": "CLI that converts .xlsx files into rich text or JSON dumps that AI coding agents (Claude, Cursor, Copilot, ChatGPT, etc.) can read — preserving values, formulas, formatting, colors, column widths, frozen panes, named ranges, tables, and more.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -17,7 +17,7 @@
17
17
  "LICENSE"
18
18
  ],
19
19
  "scripts": {
20
- "test": "node --test test/round-trip.test.js test/output-matrix.test.js test/unit/*.test.js tests/telemetry-sanitize.test.js tests/telemetry-config.test.js tests/telemetry-consent-version.test.js tests/telemetry-flags.test.js"
20
+ "test": "node --test test/round-trip.test.js test/output-matrix.test.js test/unit/*.test.js tests/telemetry-sanitize.test.js tests/telemetry-config.test.js tests/telemetry-consent-version.test.js tests/telemetry-flags.test.js tests/redactWorkbook-leak-check.test.js"
21
21
  },
22
22
  "keywords": [
23
23
  "xlsx",