@cj-tech-master/excelts 9.5.5 → 9.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/browser/modules/excel/worksheet.d.ts +11 -0
  2. package/dist/browser/modules/excel/worksheet.js +13 -0
  3. package/dist/browser/modules/formula/integration/apply-writeback-plan.js +17 -3
  4. package/dist/browser/modules/formula/integration/workbook-adapter.js +20 -1
  5. package/dist/browser/modules/formula/integration/workbook-snapshot.d.ts +12 -0
  6. package/dist/browser/modules/formula/materialize/build-writeback-plan.js +47 -0
  7. package/dist/browser/modules/formula/materialize/types.d.ts +19 -3
  8. package/dist/browser/modules/formula/materialize/types.js +13 -3
  9. package/dist/browser/modules/pdf/builder/document-builder.js +2 -2
  10. package/dist/browser/modules/pdf/font/system-fonts.d.ts +24 -4
  11. package/dist/browser/modules/pdf/font/system-fonts.js +76 -32
  12. package/dist/browser/modules/pdf/render/pdf-exporter.js +6 -3
  13. package/dist/browser/modules/word/advanced/field-engine.js +151 -23
  14. package/dist/browser/modules/word/advanced/math-convert.js +2 -1
  15. package/dist/browser/modules/word/advanced/style-map.js +44 -6
  16. package/dist/browser/modules/word/convert/html/html-import.js +434 -71
  17. package/dist/browser/modules/word/convert/markdown/markdown-renderer.js +11 -3
  18. package/dist/browser/modules/word/layout/layout-full.js +4 -1
  19. package/dist/browser/modules/word/security/digital-signatures.js +160 -33
  20. package/dist/browser/modules/word/security/encryption.js +109 -9
  21. package/dist/cjs/modules/excel/worksheet.js +13 -0
  22. package/dist/cjs/modules/formula/integration/apply-writeback-plan.js +17 -3
  23. package/dist/cjs/modules/formula/integration/workbook-adapter.js +20 -1
  24. package/dist/cjs/modules/formula/materialize/build-writeback-plan.js +47 -0
  25. package/dist/cjs/modules/formula/materialize/types.js +13 -3
  26. package/dist/cjs/modules/pdf/builder/document-builder.js +1 -1
  27. package/dist/cjs/modules/pdf/font/system-fonts.js +77 -32
  28. package/dist/cjs/modules/pdf/render/pdf-exporter.js +5 -2
  29. package/dist/cjs/modules/word/advanced/field-engine.js +151 -23
  30. package/dist/cjs/modules/word/advanced/math-convert.js +2 -1
  31. package/dist/cjs/modules/word/advanced/style-map.js +44 -6
  32. package/dist/cjs/modules/word/convert/html/html-import.js +434 -71
  33. package/dist/cjs/modules/word/convert/markdown/markdown-renderer.js +11 -3
  34. package/dist/cjs/modules/word/layout/layout-full.js +4 -1
  35. package/dist/cjs/modules/word/security/digital-signatures.js +160 -33
  36. package/dist/cjs/modules/word/security/encryption.js +109 -9
  37. package/dist/esm/modules/excel/worksheet.js +13 -0
  38. package/dist/esm/modules/formula/integration/apply-writeback-plan.js +17 -3
  39. package/dist/esm/modules/formula/integration/workbook-adapter.js +20 -1
  40. package/dist/esm/modules/formula/materialize/build-writeback-plan.js +47 -0
  41. package/dist/esm/modules/formula/materialize/types.js +13 -3
  42. package/dist/esm/modules/pdf/builder/document-builder.js +2 -2
  43. package/dist/esm/modules/pdf/font/system-fonts.js +76 -32
  44. package/dist/esm/modules/pdf/render/pdf-exporter.js +6 -3
  45. package/dist/esm/modules/word/advanced/field-engine.js +151 -23
  46. package/dist/esm/modules/word/advanced/math-convert.js +2 -1
  47. package/dist/esm/modules/word/advanced/style-map.js +44 -6
  48. package/dist/esm/modules/word/convert/html/html-import.js +434 -71
  49. package/dist/esm/modules/word/convert/markdown/markdown-renderer.js +11 -3
  50. package/dist/esm/modules/word/layout/layout-full.js +4 -1
  51. package/dist/esm/modules/word/security/digital-signatures.js +160 -33
  52. package/dist/esm/modules/word/security/encryption.js +109 -9
  53. package/dist/iife/excelts.iife.js +40 -26
  54. package/dist/iife/excelts.iife.js.map +1 -1
  55. package/dist/iife/excelts.iife.min.js +3 -3
  56. package/dist/types/modules/excel/worksheet.d.ts +11 -0
  57. package/dist/types/modules/formula/integration/workbook-snapshot.d.ts +12 -0
  58. package/dist/types/modules/formula/materialize/types.d.ts +19 -3
  59. package/dist/types/modules/pdf/font/system-fonts.d.ts +24 -4
  60. package/package.json +1 -1
@@ -52,52 +52,179 @@ function parseSignatureXml(xmlStr, fileName) {
52
52
  fileName,
53
53
  cryptographicStatus: "not-verified"
54
54
  };
55
- // Extract Office-specific metadata from <SignatureInfoV1>
56
- const sigTextMatch = /<SignatureText[^>]*>([^<]*)<\/SignatureText>/.exec(xmlStr);
57
- if (sigTextMatch) {
58
- info.signer = (0, encode_1.xmlDecode)(sigTextMatch[1]);
55
+ // Each `<TagName ...>...</TagName>` lookup used to be a regex of the
56
+ // form `/<Tag[^>]*>([^<]*)<\/Tag>/.exec(xmlStr)`. Although `[^>]*` and
57
+ // `[^<]*` are linear in isolation, running ten such regexes against
58
+ // attacker-controlled signature XML triggers CodeQL's
59
+ // `js/polynomial-redos` rule. `extractTextElement` performs the same
60
+ // job in a single linear scan and cannot exhibit super-linear runtime.
61
+ const signer = extractTextElement(xmlStr, "SignatureText");
62
+ if (signer !== undefined) {
63
+ info.signer = (0, encode_1.xmlDecode)(signer);
59
64
  }
60
- const sigCommentsMatch = /<SignatureComments[^>]*>([^<]*)<\/SignatureComments>/.exec(xmlStr);
61
- if (sigCommentsMatch) {
62
- info.signatureComments = (0, encode_1.xmlDecode)(sigCommentsMatch[1]);
65
+ const sigComments = extractTextElement(xmlStr, "SignatureComments");
66
+ if (sigComments !== undefined) {
67
+ info.signatureComments = (0, encode_1.xmlDecode)(sigComments);
63
68
  }
64
- const purposeMatch = /<SignaturePurpose[^>]*>([^<]*)<\/SignaturePurpose>/.exec(xmlStr);
65
- if (purposeMatch) {
66
- info.purpose = (0, encode_1.xmlDecode)(purposeMatch[1]);
69
+ const purpose = extractTextElement(xmlStr, "SignaturePurpose");
70
+ if (purpose !== undefined) {
71
+ info.purpose = (0, encode_1.xmlDecode)(purpose);
67
72
  }
68
- const dateMatch = /<SignatureDate[^>]*>([^<]*)<\/SignatureDate>/.exec(xmlStr);
69
- if (dateMatch) {
70
- info.signDate = (0, encode_1.xmlDecode)(dateMatch[1]);
73
+ const signDate = extractTextElement(xmlStr, "SignatureDate");
74
+ if (signDate !== undefined) {
75
+ info.signDate = (0, encode_1.xmlDecode)(signDate);
71
76
  }
72
- const providerMatch = /<SignatureProviderUrl[^>]*>([^<]*)<\/SignatureProviderUrl>/.exec(xmlStr);
73
- if (providerMatch) {
74
- info.providerUrl = (0, encode_1.xmlDecode)(providerMatch[1]);
77
+ const providerUrl = extractTextElement(xmlStr, "SignatureProviderUrl");
78
+ if (providerUrl !== undefined) {
79
+ info.providerUrl = (0, encode_1.xmlDecode)(providerUrl);
75
80
  }
76
- // Commitment type
77
- const commitMatch = /<CommitmentType[^>]*>\s*<CommitmentTypeIndication[^>]*>\s*<CommitmentTypeId>([^<]*)<\/CommitmentTypeId>/.exec(xmlStr);
78
- if (commitMatch) {
79
- info.commitmentType = (0, encode_1.xmlDecode)(commitMatch[1]);
81
+ // Commitment type — nested element. Read the full `<CommitmentType>`
82
+ // body (which contains nested elements, hence `allowAngleBrackets`)
83
+ // then look for `<CommitmentTypeId>` inside.
84
+ const commitmentBlock = extractTextElement(xmlStr, "CommitmentType", {
85
+ allowAngleBrackets: true
86
+ });
87
+ if (commitmentBlock !== undefined) {
88
+ const commitmentId = extractTextElement(commitmentBlock, "CommitmentTypeId");
89
+ if (commitmentId !== undefined) {
90
+ info.commitmentType = (0, encode_1.xmlDecode)(commitmentId);
91
+ }
80
92
  }
81
- // Extract signature value (base64)
82
- const sigValMatch = /<SignatureValue[^>]*>([^]*?)<\/SignatureValue>/.exec(xmlStr);
83
- if (sigValMatch) {
84
- info.signatureValue = sigValMatch[1].trim();
93
+ // Signature value (base64 — may legitimately span newlines, so don't strip).
94
+ const signatureValue = extractTextElement(xmlStr, "SignatureValue", { allowAngleBrackets: true });
95
+ if (signatureValue !== undefined) {
96
+ info.signatureValue = signatureValue.trim();
85
97
  }
86
98
  // Certificate details from <X509Data>
87
- const certSubjectMatch = /<X509SubjectName[^>]*>([^<]*)<\/X509SubjectName>/.exec(xmlStr);
88
- if (certSubjectMatch) {
89
- info.certificateSubject = (0, encode_1.xmlDecode)(certSubjectMatch[1]);
99
+ const certSubject = extractTextElement(xmlStr, "X509SubjectName");
100
+ if (certSubject !== undefined) {
101
+ info.certificateSubject = (0, encode_1.xmlDecode)(certSubject);
90
102
  }
91
- const certIssuerMatch = /<X509IssuerName[^>]*>([^<]*)<\/X509IssuerName>/.exec(xmlStr);
92
- if (certIssuerMatch) {
93
- info.certificateIssuer = (0, encode_1.xmlDecode)(certIssuerMatch[1]);
103
+ const certIssuer = extractTextElement(xmlStr, "X509IssuerName");
104
+ if (certIssuer !== undefined) {
105
+ info.certificateIssuer = (0, encode_1.xmlDecode)(certIssuer);
94
106
  }
95
- const certSerialMatch = /<X509SerialNumber[^>]*>([^<]*)<\/X509SerialNumber>/.exec(xmlStr);
96
- if (certSerialMatch) {
97
- info.certificateSerialNumber = (0, encode_1.xmlDecode)(certSerialMatch[1]);
107
+ const certSerial = extractTextElement(xmlStr, "X509SerialNumber");
108
+ if (certSerial !== undefined) {
109
+ info.certificateSerialNumber = (0, encode_1.xmlDecode)(certSerial);
98
110
  }
99
111
  return info;
100
112
  }
113
+ /**
114
+ * Find the first occurrence of `<tagName ...>...</tagName>` in `xml` and
115
+ * return the inner text (verbatim — the caller is responsible for entity
116
+ * decoding via `xmlDecode`).
117
+ *
118
+ * Implemented as a linear index scan rather than a regex match. The previous
119
+ * regex-based implementation tripped CodeQL's polynomial-regex detector
120
+ * because the input is attacker-controlled signature XML.
121
+ *
122
+ * @param xml - The XML text to search.
123
+ * @param tagName - Local element name (no namespace prefix). The match
124
+ * ignores any namespace prefix actually present in the document.
125
+ * @param options.allowAngleBrackets - When true, the inner text is read up
126
+ * to the literal `</tagName>` close tag rather than the next `<`. This
127
+ * is appropriate for elements like `SignatureValue` where the body is
128
+ * base64 and cannot legitimately contain `<` anyway, but lets the function
129
+ * tolerate accidental whitespace/newlines that some signers insert.
130
+ */
131
+ function extractTextElement(xml, tagName, options = {}) {
132
+ const n = xml.length;
133
+ let from = 0;
134
+ while (from < n) {
135
+ const lt = xml.indexOf("<", from);
136
+ if (lt < 0) {
137
+ return undefined;
138
+ }
139
+ // Skip an optional namespace prefix: <ns:Tag ...>.
140
+ let nameStart = lt + 1;
141
+ // Look ahead for either the bare tag name or `prefix:tagName`. We do
142
+ // a forward scan rather than an unbounded regex match.
143
+ const colon = xml.indexOf(":", nameStart);
144
+ const ws = findTagNameEnd(xml, nameStart);
145
+ if (colon > 0 && colon < ws) {
146
+ nameStart = colon + 1;
147
+ }
148
+ if (xml.slice(nameStart, nameStart + tagName.length) !== tagName ||
149
+ !isTagNameBoundary(xml.charCodeAt(nameStart + tagName.length))) {
150
+ from = lt + 1;
151
+ continue;
152
+ }
153
+ // Found `<…tagName`. Find the closing `>` of the open tag.
154
+ const openEnd = xml.indexOf(">", nameStart + tagName.length);
155
+ if (openEnd < 0) {
156
+ return undefined;
157
+ }
158
+ // Self-closing? Then the element has no text content.
159
+ if (xml.charCodeAt(openEnd - 1) === 0x2f /* '/' */) {
160
+ return "";
161
+ }
162
+ const bodyStart = openEnd + 1;
163
+ if (options.allowAngleBrackets) {
164
+ // Search for the matching close tag (allowing namespace prefix).
165
+ const closeIdx = findCloseTag(xml, bodyStart, tagName);
166
+ if (closeIdx < 0) {
167
+ return undefined;
168
+ }
169
+ return xml.slice(bodyStart, closeIdx);
170
+ }
171
+ // Default behaviour: text content has no `<`. Stop at the next `<`.
172
+ const lt2 = xml.indexOf("<", bodyStart);
173
+ if (lt2 < 0) {
174
+ return undefined;
175
+ }
176
+ return xml.slice(bodyStart, lt2);
177
+ }
178
+ return undefined;
179
+ }
180
+ function findTagNameEnd(xml, start) {
181
+ const n = xml.length;
182
+ let i = start;
183
+ while (i < n) {
184
+ const c = xml.charCodeAt(i);
185
+ if (c === 0x20 || c === 0x09 || c === 0x0a || c === 0x0d || c === 0x2f || c === 0x3e) {
186
+ return i;
187
+ }
188
+ i++;
189
+ }
190
+ return n;
191
+ }
192
+ function isTagNameBoundary(c) {
193
+ return (c === 0x20 || // space
194
+ c === 0x09 || // tab
195
+ c === 0x0a || // LF
196
+ c === 0x0d || // CR
197
+ c === 0x2f || // '/'
198
+ c === 0x3e // '>'
199
+ );
200
+ }
201
+ function findCloseTag(xml, from, tagName) {
202
+ const n = xml.length;
203
+ let i = from;
204
+ while (i < n) {
205
+ const lt = xml.indexOf("</", i);
206
+ if (lt < 0) {
207
+ return -1;
208
+ }
209
+ let p = lt + 2;
210
+ // Optional namespace prefix
211
+ const colon = xml.indexOf(":", p);
212
+ const gt = xml.indexOf(">", p);
213
+ if (gt < 0) {
214
+ return -1;
215
+ }
216
+ if (colon > 0 && colon < gt) {
217
+ p = colon + 1;
218
+ }
219
+ if (xml.slice(p, p + tagName.length) === tagName &&
220
+ // Allow trailing whitespace before '>' but require a boundary char.
221
+ isTagNameBoundary(xml.charCodeAt(p + tagName.length))) {
222
+ return lt;
223
+ }
224
+ i = lt + 2;
225
+ }
226
+ return -1;
227
+ }
101
228
  /**
102
229
  * Extract all digital signatures from opaque parts of a document.
103
230
  *
@@ -334,13 +334,17 @@ function bytesEqual(a, b) {
334
334
  * @returns Parsed agile encryption info.
335
335
  */
336
336
  function parseEncryptionInfoXml(xmlStr) {
337
- // Simple regex-based extraction for the key <keyEncryptors><keyEncryptor>... element
338
- const keyDataMatch = /<keyData\s([\s\S]*?)\/>/.exec(xmlStr);
339
- const pwdEncryptorMatch = /<p:encryptedKey\s([\s\S]*?)\/>/.exec(xmlStr);
340
- if (!keyDataMatch || !pwdEncryptorMatch) {
337
+ // Locate the `<keyData ... />` and `<p:encryptedKey ... />` elements
338
+ // with a linear scan. Using regular expressions with lazy `[\s\S]*?`
339
+ // quantifiers triggers CodeQL's polynomial-regex warning because the
340
+ // input is attacker-controlled (a hostile EncryptionInfo XML stream
341
+ // with very long unterminated tags caused catastrophic backtracking).
342
+ const keyDataAttrs = extractSelfClosingTagAttrs(xmlStr, "keyData");
343
+ const pwdAttrs = extractSelfClosingTagAttrs(xmlStr, "p:encryptedKey");
344
+ if (!keyDataAttrs || !pwdAttrs) {
341
345
  throw new errors_1.DocxDecryptionError("Invalid EncryptionInfo XML - missing keyData or encryptedKey");
342
346
  }
343
- const pwdData = parseAttrs(pwdEncryptorMatch[1]);
347
+ const pwdData = pwdAttrs;
344
348
  // Required cryptographic fields — empty/missing values cannot decrypt and
345
349
  // produce confusing CryptoOperation errors downstream. Fail fast with a
346
350
  // clear message instead.
@@ -403,12 +407,108 @@ function parseEncryptionInfoXml(xmlStr) {
403
407
  blockSize
404
408
  };
405
409
  }
410
+ /**
411
+ * Find a `<tagName ... />` element and return its parsed attributes, or
412
+ * `null` if no such self-closing element exists.
413
+ *
414
+ * Uses a linear scan instead of a regex with `[\s\S]*?` to avoid
415
+ * catastrophic backtracking on adversarial EncryptionInfo XML.
416
+ */
417
+ function extractSelfClosingTagAttrs(xml, tagName) {
418
+ const needle = `<${tagName}`;
419
+ let from = 0;
420
+ while (from <= xml.length) {
421
+ const start = xml.indexOf(needle, from);
422
+ if (start < 0) {
423
+ return null;
424
+ }
425
+ const after = start + needle.length;
426
+ const ch = xml.charCodeAt(after);
427
+ // Require a whitespace, '/' or '>' after the tag name so `<keyDataExtra`
428
+ // does not match `<keyData`.
429
+ if (ch !== 0x20 && ch !== 0x09 && ch !== 0x0a && ch !== 0x0d && ch !== 0x2f && ch !== 0x3e) {
430
+ from = after;
431
+ continue;
432
+ }
433
+ // Find the closing '>' from `start`. Bail out if there isn't one.
434
+ const close = xml.indexOf(">", after);
435
+ if (close < 0) {
436
+ return null;
437
+ }
438
+ // The element must be self-closing: the char before '>' is '/'.
439
+ if (xml.charCodeAt(close - 1) !== 0x2f) {
440
+ from = close + 1;
441
+ continue;
442
+ }
443
+ const inner = xml.slice(after, close - 1);
444
+ return parseAttrs(inner);
445
+ }
446
+ return null;
447
+ }
448
+ /**
449
+ * Parse XML-style attributes (`name="value"`) from a fragment. Implemented
450
+ * as a single linear scan rather than a global regex so attacker-controlled
451
+ * input cannot trigger polynomial-time backtracking (CodeQL js/polynomial-redos).
452
+ */
406
453
  function parseAttrs(str) {
407
454
  const attrs = {};
408
- const re = /(\w+)="([^"]*)"/g;
409
- let match;
410
- while ((match = re.exec(str)) !== null) {
411
- attrs[match[1]] = match[2];
455
+ const n = str.length;
456
+ let i = 0;
457
+ while (i < n) {
458
+ // Skip whitespace.
459
+ while (i < n) {
460
+ const c = str.charCodeAt(i);
461
+ if (c !== 0x20 && c !== 0x09 && c !== 0x0a && c !== 0x0d) {
462
+ break;
463
+ }
464
+ i++;
465
+ }
466
+ if (i >= n) {
467
+ break;
468
+ }
469
+ // Read attribute name (\w+ equivalent: [A-Za-z0-9_]+).
470
+ const nameStart = i;
471
+ while (i < n) {
472
+ const c = str.charCodeAt(i);
473
+ const isWord = (c >= 0x30 && c <= 0x39) || // 0-9
474
+ (c >= 0x41 && c <= 0x5a) || // A-Z
475
+ (c >= 0x61 && c <= 0x7a) || // a-z
476
+ c === 0x5f; // _
477
+ if (!isWord) {
478
+ break;
479
+ }
480
+ i++;
481
+ }
482
+ if (i === nameStart) {
483
+ // Not at an attribute — advance one char so we make progress.
484
+ i++;
485
+ continue;
486
+ }
487
+ const name = str.slice(nameStart, i);
488
+ // Expect `="` exactly. Anything else means we resync to the next
489
+ // whitespace and try again — robust to malformed input.
490
+ if (i + 1 >= n || str.charCodeAt(i) !== 0x3d || str.charCodeAt(i + 1) !== 0x22) {
491
+ // Skip to next whitespace.
492
+ while (i < n) {
493
+ const c = str.charCodeAt(i);
494
+ if (c === 0x20 || c === 0x09 || c === 0x0a || c === 0x0d) {
495
+ break;
496
+ }
497
+ i++;
498
+ }
499
+ continue;
500
+ }
501
+ i += 2; // past `="`
502
+ // Read until next `"`.
503
+ const valStart = i;
504
+ const valEnd = str.indexOf('"', i);
505
+ if (valEnd < 0) {
506
+ // Unterminated value — store what we have and stop.
507
+ attrs[name] = str.slice(valStart);
508
+ break;
509
+ }
510
+ attrs[name] = str.slice(valStart, valEnd);
511
+ i = valEnd + 1;
412
512
  }
413
513
  return attrs;
414
514
  }
@@ -907,6 +907,19 @@ class Worksheet {
907
907
  // return true if this._merges has a merge object
908
908
  return Object.values(this._merges).some(Boolean);
909
909
  }
910
+ /**
911
+ * Read-only enumeration of every merged region on this sheet
912
+ * (1-based, inclusive). Consumed by the formula engine's snapshot
913
+ * builder to detect `#SPILL!` conflicts. See issue #162 follow-up.
914
+ */
915
+ get mergedRegions() {
916
+ return Object.values(this._merges).map(merge => ({
917
+ top: merge.top,
918
+ left: merge.left,
919
+ bottom: merge.bottom,
920
+ right: merge.right
921
+ }));
922
+ }
910
923
  /**
911
924
  * Scan the range and if any cell is part of a merge, un-merge the group.
912
925
  * Note this function can affect multiple merges and merge-blocks are
@@ -136,8 +136,14 @@ function applySpillWrite(workbook, op) {
136
136
  }
137
137
  }
138
138
  else {
139
- // Ghost cell: set value (not result)
139
+ // Ghost cell: set value (not result). Defence in depth — the
140
+ // plan builder rejects spills onto merged regions, so a Merge
141
+ // type here is unreachable; guard anyway because writing
142
+ // through `MergeValue`'s setter would clobber the master.
140
143
  const targetCell = ws.getCell(targetRow, targetCol);
144
+ if (targetCell.type === CellValueTypeLike.Merge) {
145
+ continue;
146
+ }
141
147
  targetCell.value = snapshotValueToRaw(val);
142
148
  }
143
149
  }
@@ -160,9 +166,17 @@ function applyCleanupWrite(workbook, op) {
160
166
  }
161
167
  for (const { row, col } of op.cells) {
162
168
  const cell = ws.findCell(row, col);
163
- if (cell) {
164
- cell.value = null;
169
+ if (!cell) {
170
+ continue;
171
+ }
172
+ // Defence in depth: writing `null` to a merge slave would forward
173
+ // through `MergeValue`'s setter and wipe the master's value. The
174
+ // plan builder already skips merged regions in `collectStaleGhosts`,
175
+ // so this guard is belt-and-suspenders.
176
+ if (cell.type === CellValueTypeLike.Merge) {
177
+ continue;
165
178
  }
179
+ cell.value = null;
166
180
  }
167
181
  }
168
182
  // ============================================================================
@@ -95,13 +95,24 @@ function buildWorksheetSnapshot(ws, date1904) {
95
95
  ? { top: dims.top, left: dims.left, bottom: dims.bottom, right: dims.right }
96
96
  : null;
97
97
  const tables = buildTables(ws);
98
+ // Defensive copy — snapshot must not alias host-owned arrays.
99
+ const hostMergedRegions = ws.mergedRegions;
100
+ const mergedRegions = hostMergedRegions
101
+ ? hostMergedRegions.map(r => ({
102
+ top: r.top,
103
+ left: r.left,
104
+ bottom: r.bottom,
105
+ right: r.right
106
+ }))
107
+ : [];
98
108
  return {
99
109
  id: ws.id,
100
110
  name: ws.name,
101
111
  dimensions,
102
112
  cells,
103
113
  hiddenRows,
104
- tables
114
+ tables,
115
+ mergedRegions
105
116
  };
106
117
  }
107
118
  // ============================================================================
@@ -113,6 +124,14 @@ function buildCellSnapshot(cell, row, col, date1904) {
113
124
  if (cellType === CellValueTypeLike.Null) {
114
125
  return null;
115
126
  }
127
+ // Skip merge slaves — Excel treats them as blank for formula
128
+ // purposes, but the host's `MergeValue` proxy would forward
129
+ // `cell.value` from the master, so letting them into `cells` would
130
+ // double-count master values in range aggregates. See issue #162
131
+ // and the `Merge` case in `CellValueTypeLike`.
132
+ if (cellType === CellValueTypeLike.Merge) {
133
+ return null;
134
+ }
116
135
  // ── Formula cells ──
117
136
  if (cellType === CellValueTypeLike.Formula) {
118
137
  return buildFormulaCellSnapshot(cell, row, col, date1904);
@@ -247,6 +247,14 @@ activeSpillTargets) {
247
247
  if (inst.row + arr.height - 1 > 1048576 || inst.col + arr.width - 1 > 16384) {
248
248
  return "error";
249
249
  }
250
+ // Reject if the source cell itself sits inside a merged region.
251
+ // Excel reports #SPILL! whenever a dynamic-array formula is placed
252
+ // in a merged cell, even when the ghosts land outside the merge.
253
+ // The ghost loop below skips `(r=0, c=0)` and the master's value is
254
+ // already in `cells`, so it would not catch this case.
255
+ if (isInMergedRegion(ws, inst.row, inst.col)) {
256
+ return "error";
257
+ }
250
258
  // Check spill availability: verify all target ghost cells are unoccupied
251
259
  for (let r = 0; r < arr.height; r++) {
252
260
  for (let c = 0; c < arr.width; c++) {
@@ -261,6 +269,16 @@ activeSpillTargets) {
261
269
  if (activeSpillTargets.has(targetKey)) {
262
270
  return "error";
263
271
  }
272
+ // Refuse to spill onto any cell that belongs to a merged region.
273
+ // The cell itself may be a merge slave — which the snapshot
274
+ // builder filters out of `ws.cells`, so the value/formula checks
275
+ // below would treat it as empty — but writing there would mutate
276
+ // the master via `MergeValue`'s setter in `@excel/cell` and
277
+ // silently corrupt the merge. Excel reports `#SPILL!` whenever a
278
+ // dynamic-array result tries to land in a merge.
279
+ if (isInMergedRegion(ws, targetRow, targetCol)) {
280
+ return "error";
281
+ }
264
282
  // Check if the cell is a ghost from ANY previous spill.
265
283
  // If the user hasn't modified it, it's safe to overwrite — the
266
284
  // originating formula will clean it up (or we'll overwrite it).
@@ -349,6 +367,19 @@ function collectStaleGhosts(region, previousGhosts, snapshot) {
349
367
  }
350
368
  const targetRow = region.sourceRow + r;
351
369
  const targetCol = region.sourceCol + c;
370
+ // If the user (or a previous edit) has placed this former ghost
371
+ // inside a merged region, skip it. The cell is now either a merge
372
+ // master (carrying the user's intentional value) or a merge slave
373
+ // (whose `cell.value = null` writeback would forward through
374
+ // `MergeValue`'s setter and clobber the master). Either way,
375
+ // cleanup must not touch it. The snapshot builder filters merge
376
+ // slaves out of `ws.cells` (see issue #162), so the
377
+ // `isGhostUnmodified` check below would otherwise miss this case
378
+ // — `cell` would be `undefined`, which currently means
379
+ // "unmodified, safe to wipe".
380
+ if (isInMergedRegion(ws, targetRow, targetCol)) {
381
+ continue;
382
+ }
352
383
  const targetKey = spillCellKeyFromId(region.worksheetId, targetRow, targetCol);
353
384
  const cell = ws.cells.get(snapshotCellKey(targetRow, targetCol));
354
385
  if (isGhostUnmodified(cell, targetKey, previousGhosts)) {
@@ -380,6 +411,22 @@ function emitPreviousSpillCleanup(previousRegion, previousGhosts, snapshot, oper
380
411
  cells: cleanupCells
381
412
  });
382
413
  }
414
+ /**
415
+ * Test whether `(row, col)` falls inside any merged region of `ws`.
416
+ *
417
+ * Linear scan — merge counts per sheet are small in practice. The
418
+ * snapshot builder filters merge slaves out of `ws.cells`, so callers
419
+ * use this helper to recover the "is this cell part of a merge?"
420
+ * signal that the cell map alone no longer carries.
421
+ */
422
+ function isInMergedRegion(ws, row, col) {
423
+ for (const region of ws.mergedRegions) {
424
+ if (row >= region.top && row <= region.bottom && col >= region.left && col <= region.right) {
425
+ return true;
426
+ }
427
+ }
428
+ return false;
429
+ }
383
430
  function isGhostUnmodified(cell, ghostKey, previousGhosts) {
384
431
  if (!cell) {
385
432
  return true;
@@ -13,17 +13,27 @@
13
13
  // ValueType — numeric mirror of `@excel/enums` ValueType
14
14
  // ============================================================================
15
15
  /**
16
- * Numeric cell-type tag exposed by host cells. The engine only compares
17
- * against `Null` and `Formula`; any other value is treated as a scalar
18
- * literal.
16
+ * Numeric cell-type tag exposed by host cells. The engine compares
17
+ * against `Null`, `Merge`, and `Formula`; any other value is treated as
18
+ * a scalar literal.
19
+ *
20
+ * `Merge` identifies a non-master cell inside a merged region. The
21
+ * host's in-memory model may proxy `cell.value` from slaves to the
22
+ * master (see `MergeValue` in `@excel/cell`), so the snapshot builder
23
+ * must filter merge slaves out — otherwise range aggregates count the
24
+ * master's value once per slave. See issue #162.
19
25
  *
20
26
  * Kept as inline numeric literals (not an enum) so this file stays free
21
27
  * of runtime dependencies. The `const` object and `type` alias share a
22
28
  * name via TypeScript's declaration merging — the value form
23
29
  * (`CellValueTypeLike.Null`, `CellValueTypeLike.Formula`) is used at
24
30
  * comparison sites, the type form annotates `CellLike.type`.
31
+ *
32
+ * The numeric values must stay in sync with `ValueType` in
33
+ * `@excel/enums`, which is what `@excel/cell` writes into `cell.type`.
25
34
  */
26
35
  export const CellValueTypeLike = {
27
36
  Null: 0,
37
+ Merge: 1,
28
38
  Formula: 6
29
39
  };
@@ -24,7 +24,7 @@ import { PdfContentStream } from "../core/pdf-stream.js";
24
24
  import { PdfWriter } from "../core/pdf-writer.js";
25
25
  import { writePdfAMetadata, writePdfAOutputIntent } from "../core/pdfa.js";
26
26
  import { FontManager } from "../font/font-manager.js";
27
- import { discoverSystemFontCandidates } from "../font/system-fonts.js";
27
+ import { iterateSystemFontCandidates } from "../font/system-fonts.js";
28
28
  import { parseTtf } from "../font/ttf-parser.js";
29
29
  import { wrapTextLines, emitTextWithMatrix, alphaGsName } from "../render/page-renderer.js";
30
30
  import { writeImageXObject } from "./image-utils.js";
@@ -839,7 +839,7 @@ export class PdfDocumentBuilder {
839
839
  if (nonWinAnsi.size > 0) {
840
840
  // Try auto-discovery unless the caller opted out.
841
841
  if (!this._disableFontAutoDiscovery) {
842
- for (const candidate of discoverSystemFontCandidates()) {
842
+ for (const candidate of iterateSystemFontCandidates()) {
843
843
  try {
844
844
  const testTtf = parseTtf(candidate);
845
845
  const allCovered = [...nonWinAnsi].every(cp => testTtf.cmap.has(cp));