@cj-tech-master/excelts 9.5.5 → 9.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/browser/modules/excel/worksheet.d.ts +11 -0
  2. package/dist/browser/modules/excel/worksheet.js +13 -0
  3. package/dist/browser/modules/formula/integration/apply-writeback-plan.js +17 -3
  4. package/dist/browser/modules/formula/integration/workbook-adapter.js +20 -1
  5. package/dist/browser/modules/formula/integration/workbook-snapshot.d.ts +12 -0
  6. package/dist/browser/modules/formula/materialize/build-writeback-plan.js +47 -0
  7. package/dist/browser/modules/formula/materialize/types.d.ts +19 -3
  8. package/dist/browser/modules/formula/materialize/types.js +13 -3
  9. package/dist/browser/modules/pdf/builder/document-builder.js +2 -2
  10. package/dist/browser/modules/pdf/font/system-fonts.d.ts +24 -4
  11. package/dist/browser/modules/pdf/font/system-fonts.js +76 -32
  12. package/dist/browser/modules/pdf/render/pdf-exporter.js +6 -3
  13. package/dist/browser/modules/word/advanced/field-engine.js +151 -23
  14. package/dist/browser/modules/word/advanced/math-convert.js +2 -1
  15. package/dist/browser/modules/word/advanced/style-map.js +44 -6
  16. package/dist/browser/modules/word/convert/html/html-import.js +434 -71
  17. package/dist/browser/modules/word/convert/markdown/markdown-renderer.js +11 -3
  18. package/dist/browser/modules/word/layout/layout-full.js +4 -1
  19. package/dist/browser/modules/word/security/digital-signatures.js +160 -33
  20. package/dist/browser/modules/word/security/encryption.js +109 -9
  21. package/dist/cjs/modules/excel/worksheet.js +13 -0
  22. package/dist/cjs/modules/formula/integration/apply-writeback-plan.js +17 -3
  23. package/dist/cjs/modules/formula/integration/workbook-adapter.js +20 -1
  24. package/dist/cjs/modules/formula/materialize/build-writeback-plan.js +47 -0
  25. package/dist/cjs/modules/formula/materialize/types.js +13 -3
  26. package/dist/cjs/modules/pdf/builder/document-builder.js +1 -1
  27. package/dist/cjs/modules/pdf/font/system-fonts.js +77 -32
  28. package/dist/cjs/modules/pdf/render/pdf-exporter.js +5 -2
  29. package/dist/cjs/modules/word/advanced/field-engine.js +151 -23
  30. package/dist/cjs/modules/word/advanced/math-convert.js +2 -1
  31. package/dist/cjs/modules/word/advanced/style-map.js +44 -6
  32. package/dist/cjs/modules/word/convert/html/html-import.js +434 -71
  33. package/dist/cjs/modules/word/convert/markdown/markdown-renderer.js +11 -3
  34. package/dist/cjs/modules/word/layout/layout-full.js +4 -1
  35. package/dist/cjs/modules/word/security/digital-signatures.js +160 -33
  36. package/dist/cjs/modules/word/security/encryption.js +109 -9
  37. package/dist/esm/modules/excel/worksheet.js +13 -0
  38. package/dist/esm/modules/formula/integration/apply-writeback-plan.js +17 -3
  39. package/dist/esm/modules/formula/integration/workbook-adapter.js +20 -1
  40. package/dist/esm/modules/formula/materialize/build-writeback-plan.js +47 -0
  41. package/dist/esm/modules/formula/materialize/types.js +13 -3
  42. package/dist/esm/modules/pdf/builder/document-builder.js +2 -2
  43. package/dist/esm/modules/pdf/font/system-fonts.js +76 -32
  44. package/dist/esm/modules/pdf/render/pdf-exporter.js +6 -3
  45. package/dist/esm/modules/word/advanced/field-engine.js +151 -23
  46. package/dist/esm/modules/word/advanced/math-convert.js +2 -1
  47. package/dist/esm/modules/word/advanced/style-map.js +44 -6
  48. package/dist/esm/modules/word/convert/html/html-import.js +434 -71
  49. package/dist/esm/modules/word/convert/markdown/markdown-renderer.js +11 -3
  50. package/dist/esm/modules/word/layout/layout-full.js +4 -1
  51. package/dist/esm/modules/word/security/digital-signatures.js +160 -33
  52. package/dist/esm/modules/word/security/encryption.js +109 -9
  53. package/dist/iife/excelts.iife.js +40 -26
  54. package/dist/iife/excelts.iife.js.map +1 -1
  55. package/dist/iife/excelts.iife.min.js +3 -3
  56. package/dist/types/modules/excel/worksheet.d.ts +11 -0
  57. package/dist/types/modules/formula/integration/workbook-snapshot.d.ts +12 -0
  58. package/dist/types/modules/formula/materialize/types.d.ts +19 -3
  59. package/dist/types/modules/pdf/font/system-fonts.d.ts +24 -4
  60. package/package.json +1 -1
@@ -67,73 +67,342 @@ function tokenize(html) {
67
67
  // instructions before tokenising — none of them should appear as text
68
68
  // in the document body. The previous regex treated `<!doctype html>`
69
69
  // as a text node containing `"!doctype html>"`.
70
- const stripped = html
71
- .replace(/<!--[\s\S]*?-->/g, "")
72
- .replace(/<!doctype[^>]*>/gi, "")
73
- .replace(/<!\[CDATA\[[\s\S]*?\]\]>/g, "")
74
- .replace(/<\?[\s\S]*?\?>/g, "");
75
- // Match a tag, OR a run of text. Text is anything-up-to-the-next-tag,
76
- // with the addition that a `<` not followed by a tag-like character is
77
- // treated as literal text (so "1 < 2" / "a<b" / "<<" survive instead
78
- // of being silently swallowed).
79
- const re = /<\/?([a-zA-Z][a-zA-Z0-9]*)((?:\s+[^>]*?)?)\/?\s*>|((?:[^<]|<(?![/a-zA-Z]))+)/g;
80
- const tagRe = /^<(\/?)([a-zA-Z][a-zA-Z0-9]*)((?:\s+[^>]*?)?)(\/?)\s*>$/;
81
- let m;
82
- while ((m = re.exec(stripped)) !== null) {
83
- const fullMatch = m[0];
84
- if (m[3] !== undefined) {
85
- // Text node
86
- const text = decodeHtmlEntities(m[3]);
70
+ //
71
+ // We use a single linear scan rather than chained `.replace()` calls so
72
+ // we are immune to two CodeQL findings:
73
+ // - Incomplete multi-character sanitization: chained replaces let
74
+ // payloads such as `<!--<!--x-->-->` leak through (each pass only
75
+ // removes one layer, leaving `-->` behind).
76
+ // - Polynomial regular expression on uncontrolled data: lazy
77
+ // quantifiers like `<!--[\s\S]*?-->` exhibit catastrophic
78
+ // backtracking on adversarial input.
79
+ const stripped = stripSgmlNoise(html);
80
+ // The tokenizer is implemented as a linear index scan rather than a
81
+ // global regex (`/<\/?…(?:\s+[^>]*?)?\/?\s*>|((?:[^<]|…)+)/g`). The
82
+ // previous regex form combined an optional lazy attribute span with
83
+ // an optional `\/?` and optional trailing whitespace, which CodeQL
84
+ // flagged as polynomial-redos: an adversarial payload such as
85
+ // `<a` followed by many spaces but no closing `>` triggered
86
+ // catastrophic backtracking.
87
+ //
88
+ // The scan below is strictly O(n):
89
+ // - At every position we either advance one character or jump
90
+ // forward to the next `<` / `>` via a single `indexOf`.
91
+ // - Attribute parsing is delegated to `parseHtmlAttrs`, which is
92
+ // itself a linear scanner.
93
+ const n = stripped.length;
94
+ let i = 0;
95
+ while (i < n) {
96
+ // Scan a text run: everything up to the next position that begins
97
+ // a tag (`<` followed by a letter, or `</` followed by a letter).
98
+ // Bare `<` characters and unfinished tag-like fragments are kept
99
+ // inside the text run so that input such as `1 < 2`, `a<b<c`,
100
+ // `<<<<` or `<unfinished` (with no closing `>` anywhere) is not
101
+ // shattered into a stream of single-character runs.
102
+ if (stripped.charCodeAt(i) !== 0x3c /* '<' */ || !isTagStart(stripped, i)) {
103
+ const textEnd = scanTextEnd(stripped, i);
104
+ const raw = stripped.slice(i, textEnd);
105
+ const text = decodeHtmlEntities(raw);
87
106
  if (text) {
88
107
  tokens.push({ type: "text", value: text });
89
108
  }
109
+ i = textEnd;
110
+ if (i >= n) {
111
+ break;
112
+ }
113
+ // Fall through: position `i` is now at a real tag start.
114
+ }
115
+ // We are at '<' that introduces a tag (guaranteed by the
116
+ // `isTagStart` check above).
117
+ const next = stripped.charCodeAt(i + 1);
118
+ const isClose = next === 0x2f; /* '/' */
119
+ const nameStart = isClose ? i + 2 : i + 1;
120
+ // Defensive: the loop guard above should already ensure this, but
121
+ // keep the check so a future refactor cannot silently turn a bare
122
+ // `<` into an attempted tag parse.
123
+ if (!isAsciiAlpha(stripped.charCodeAt(nameStart))) {
124
+ tokens.push({ type: "text", value: "<" });
125
+ i++;
126
+ continue;
90
127
  }
91
- else {
92
- const tagMatch = tagRe.exec(fullMatch);
93
- if (tagMatch) {
94
- const isClose = tagMatch[1] === "/";
95
- const tag = tagMatch[2].toLowerCase();
96
- const attrStr = tagMatch[3];
97
- const selfClose = tagMatch[4] === "/" || VOID_ELEMENTS.has(tag);
98
- const attrs = parseHtmlAttrs(attrStr);
99
- if (isClose) {
100
- tokens.push({ type: "close", tag, attrs: {} });
101
- }
102
- else if (selfClose) {
103
- tokens.push({ type: "selfclose", tag, attrs });
104
- }
105
- else {
106
- tokens.push({ type: "open", tag, attrs });
107
- // Raw-text elements: their body must not be parsed as markup. Skip
108
- // forward to the matching close tag and either capture the body as
109
- // a single text token (for <style>, which is post-processed by
110
- // extractStyleRules) or discard it entirely (for <script>, etc.).
111
- // Without this, embedded scripts would leak into the document body.
112
- if (RAW_TEXT_ELEMENTS.has(tag)) {
113
- const closeRe = new RegExp(`</${tag}\\s*>`, "i");
114
- closeRe.lastIndex = re.lastIndex;
115
- const startBody = re.lastIndex;
116
- const closeMatch = closeRe.exec(stripped);
117
- if (closeMatch) {
118
- const body = stripped.slice(startBody, closeMatch.index);
119
- if (RAW_TEXT_PRESERVE_BODY.has(tag)) {
120
- tokens.push({ type: "text", value: body });
121
- }
122
- tokens.push({ type: "close", tag, attrs: {} });
123
- re.lastIndex = closeMatch.index + closeMatch[0].length;
124
- }
125
- else {
126
- // No closing tag discard the rest of the input for this
127
- // raw-text element to avoid emitting markup as text.
128
- re.lastIndex = stripped.length;
129
- }
130
- }
128
+ // Read the tag name: [A-Za-z][A-Za-z0-9]*.
129
+ let p = nameStart + 1;
130
+ while (p < n) {
131
+ const c = stripped.charCodeAt(p);
132
+ if (!isAsciiAlpha(c) && !isAsciiDigit(c)) {
133
+ break;
134
+ }
135
+ p++;
136
+ }
137
+ const tagName = stripped.slice(nameStart, p).toLowerCase();
138
+ // Find the closing '>' of the tag. We have to be careful not to
139
+ // mistake a '>' inside a quoted attribute value for the tag end.
140
+ const tagEnd = findTagEnd(stripped, p);
141
+ if (tagEnd < 0) {
142
+ // No closing '>' — the rest of the input is malformed; treat the
143
+ // remainder as text. (Original regex would simply not match and
144
+ // leave the same characters as text via the alternation.)
145
+ const text = decodeHtmlEntities(stripped.slice(i));
146
+ if (text) {
147
+ tokens.push({ type: "text", value: text });
148
+ }
149
+ // `break` exits the loop directly; no need to assign `i = n`
150
+ // first (CodeQL js/useless-assignment-to-local).
151
+ break;
152
+ }
153
+ // Inside [p, tagEnd) lie attributes (and possibly a trailing '/').
154
+ let inner = stripped.slice(p, tagEnd);
155
+ // Detect self-close: trailing '/'. Strip it so it is not parsed as
156
+ // an attribute name.
157
+ let selfClose = false;
158
+ // Trim trailing whitespace, then a single '/'.
159
+ let innerEnd = inner.length;
160
+ while (innerEnd > 0 && isHtmlSpace(inner.charCodeAt(innerEnd - 1))) {
161
+ innerEnd--;
162
+ }
163
+ if (innerEnd > 0 && inner.charCodeAt(innerEnd - 1) === 0x2f) {
164
+ selfClose = true;
165
+ innerEnd--;
166
+ }
167
+ inner = inner.slice(0, innerEnd);
168
+ if (isClose) {
169
+ tokens.push({ type: "close", tag: tagName, attrs: {} });
170
+ i = tagEnd + 1;
171
+ continue;
172
+ }
173
+ const attrs = parseHtmlAttrs(inner);
174
+ const isVoidElement = VOID_ELEMENTS.has(tagName);
175
+ if (selfClose || isVoidElement) {
176
+ tokens.push({ type: "selfclose", tag: tagName, attrs });
177
+ i = tagEnd + 1;
178
+ continue;
179
+ }
180
+ tokens.push({ type: "open", tag: tagName, attrs });
181
+ i = tagEnd + 1;
182
+ // Raw-text elements: their body must not be parsed as markup.
183
+ if (RAW_TEXT_ELEMENTS.has(tagName)) {
184
+ const closeIdx = findRawTextClose(stripped, i, tagName);
185
+ if (closeIdx === null) {
186
+ // No closing tag — discard the rest of the input for this
187
+ // raw-text element to avoid emitting markup as text.
188
+ i = n;
189
+ }
190
+ else {
191
+ const body = stripped.slice(i, closeIdx.bodyEnd);
192
+ if (RAW_TEXT_PRESERVE_BODY.has(tagName)) {
193
+ tokens.push({ type: "text", value: body });
131
194
  }
195
+ tokens.push({ type: "close", tag: tagName, attrs: {} });
196
+ i = closeIdx.next;
132
197
  }
133
198
  }
134
199
  }
135
200
  return tokens;
136
201
  }
202
+ function isAsciiAlpha(c) {
203
+ return (c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a);
204
+ }
205
+ function isAsciiDigit(c) {
206
+ return c >= 0x30 && c <= 0x39;
207
+ }
208
+ function isHtmlSpace(c) {
209
+ return c === 0x20 || c === 0x09 || c === 0x0a || c === 0x0d || c === 0x0c;
210
+ }
211
+ /**
212
+ * Scan forward from `from` to the position of the next '<' that
213
+ * introduces a tag (i.e. is followed by `[a-zA-Z]` or `/[a-zA-Z]`).
214
+ * A bare '<' (e.g. in `1 < 2`) is included in the text run.
215
+ */
216
+ function scanTextEnd(s, from) {
217
+ const n = s.length;
218
+ let i = from;
219
+ while (i < n) {
220
+ const lt = s.indexOf("<", i);
221
+ if (lt < 0) {
222
+ return n;
223
+ }
224
+ if (isTagStart(s, lt)) {
225
+ return lt;
226
+ }
227
+ // Bare '<' or `</` not followed by a letter — keep scanning.
228
+ i = lt + 1;
229
+ }
230
+ return n;
231
+ }
232
+ /**
233
+ * Return true if position `pos` in `s` is `<` followed by a letter
234
+ * (open tag) or `</` followed by a letter (close tag). Used to
235
+ * distinguish "real" tag starts from literal `<` characters.
236
+ */
237
+ function isTagStart(s, pos) {
238
+ if (s.charCodeAt(pos) !== 0x3c /* '<' */) {
239
+ return false;
240
+ }
241
+ const next = s.charCodeAt(pos + 1);
242
+ if (isAsciiAlpha(next)) {
243
+ return true;
244
+ }
245
+ if (next === 0x2f /* '/' */ && isAsciiAlpha(s.charCodeAt(pos + 2))) {
246
+ return true;
247
+ }
248
+ return false;
249
+ }
250
+ /**
251
+ * Find the index of the '>' that closes the tag opened just before
252
+ * `from`. Honours quoted attribute values so that `<a href="x>y">`
253
+ * does not stop at the '>' inside quotes.
254
+ *
255
+ * Returns -1 if no closing '>' is found before EOF.
256
+ */
257
+ function findTagEnd(s, from) {
258
+ const n = s.length;
259
+ let i = from;
260
+ while (i < n) {
261
+ const c = s.charCodeAt(i);
262
+ if (c === 0x22 /* '"' */ || c === 0x27 /* "'" */) {
263
+ const close = s.indexOf(c === 0x22 ? '"' : "'", i + 1);
264
+ if (close < 0) {
265
+ return -1;
266
+ }
267
+ i = close + 1;
268
+ continue;
269
+ }
270
+ if (c === 0x3e /* '>' */) {
271
+ return i;
272
+ }
273
+ i++;
274
+ }
275
+ return -1;
276
+ }
277
+ /**
278
+ * Find the closing tag for a raw-text element (e.g. `</script>`),
279
+ * starting at `from`. Returns the position immediately after the
280
+ * close tag (`next`) plus the position where the body ends (`bodyEnd`,
281
+ * i.e. the start of the close-tag literal).
282
+ *
283
+ * Implemented with a linear scan (no dynamic `RegExp`) so that
284
+ * adversarial bodies cannot trigger super-linear runtime.
285
+ */
286
+ function findRawTextClose(s, from, tagName) {
287
+ const n = s.length;
288
+ let i = from;
289
+ while (i < n) {
290
+ const lt = s.indexOf("</", i);
291
+ if (lt < 0) {
292
+ return null;
293
+ }
294
+ const after = lt + 2;
295
+ // Compare tag name case-insensitively.
296
+ let ok = true;
297
+ for (let k = 0; k < tagName.length; k++) {
298
+ const a = s.charCodeAt(after + k);
299
+ const aLower = a >= 0x41 && a <= 0x5a ? a + 0x20 : a;
300
+ if (aLower !== tagName.charCodeAt(k)) {
301
+ ok = false;
302
+ break;
303
+ }
304
+ }
305
+ if (!ok) {
306
+ i = after;
307
+ continue;
308
+ }
309
+ // Skip any trailing whitespace before '>'.
310
+ let p = after + tagName.length;
311
+ while (p < n && isHtmlSpace(s.charCodeAt(p))) {
312
+ p++;
313
+ }
314
+ if (p < n && s.charCodeAt(p) === 0x3e /* '>' */) {
315
+ return { bodyEnd: lt, next: p + 1 };
316
+ }
317
+ i = after;
318
+ }
319
+ return null;
320
+ }
321
+ /**
322
+ * Strip HTML comments, doctype declarations, CDATA sections and SGML
323
+ * processing instructions in a single linear scan.
324
+ *
325
+ * A linear scan (vs. chained `String.prototype.replace` with regular
326
+ * expressions) is required for two reasons:
327
+ *
328
+ * 1. **Incomplete multi-character sanitization** — chained replaces are
329
+ * each one pass; an attacker can nest the syntax (e.g.
330
+ * `<!--<!--x-->-->`) so the outer marker survives after the inner
331
+ * one is removed.
332
+ * 2. **Catastrophic backtracking** — lazy quantifiers such as
333
+ * `<!--[\s\S]*?-->` are polynomial-time on adversarial input
334
+ * (very long unterminated comments).
335
+ *
336
+ * The scan is O(n) in the input length and removes nested constructs by
337
+ * not advancing past the closing marker into already-emitted text.
338
+ */
339
+ function stripSgmlNoise(input) {
340
+ let out = "";
341
+ let i = 0;
342
+ const n = input.length;
343
+ while (i < n) {
344
+ if (input.charCodeAt(i) !== 0x3c /* '<' */) {
345
+ out += input[i];
346
+ i++;
347
+ continue;
348
+ }
349
+ // Comment: <!-- ... -->
350
+ // If the closing `-->` is missing the input is malformed. The
351
+ // previous regex (`/<!--[\s\S]*?-->/g`) simply did not match in that
352
+ // case and left the text in place; we preserve that behaviour rather
353
+ // than swallowing the rest of the document, which would silently
354
+ // change the parse for legitimate inputs that happen to contain a
355
+ // stray `<!--`.
356
+ if (input.startsWith("<!--", i)) {
357
+ const end = input.indexOf("-->", i + 4);
358
+ if (end < 0) {
359
+ out += "<";
360
+ i++;
361
+ continue;
362
+ }
363
+ i = end + 3;
364
+ continue;
365
+ }
366
+ // CDATA: <![CDATA[ ... ]]>
367
+ if (input.startsWith("<![CDATA[", i)) {
368
+ const end = input.indexOf("]]>", i + 9);
369
+ if (end < 0) {
370
+ out += "<";
371
+ i++;
372
+ continue;
373
+ }
374
+ i = end + 3;
375
+ continue;
376
+ }
377
+ // Doctype: <!doctype ...> (case-insensitive)
378
+ if (input.charCodeAt(i + 1) === 0x21 /* '!' */ &&
379
+ input.slice(i + 2, i + 9).toLowerCase() === "doctype") {
380
+ const end = input.indexOf(">", i + 9);
381
+ if (end < 0) {
382
+ out += "<";
383
+ i++;
384
+ continue;
385
+ }
386
+ i = end + 1;
387
+ continue;
388
+ }
389
+ // Processing instruction: <? ... ?>
390
+ if (input.charCodeAt(i + 1) === 0x3f /* '?' */) {
391
+ const end = input.indexOf("?>", i + 2);
392
+ if (end < 0) {
393
+ out += "<";
394
+ i++;
395
+ continue;
396
+ }
397
+ i = end + 2;
398
+ continue;
399
+ }
400
+ // Not an SGML noise construct — emit the '<' literally and continue.
401
+ out += "<";
402
+ i++;
403
+ }
404
+ return out;
405
+ }
137
406
  /**
138
407
  * HTML elements whose body is not parsed as markup. Their content is either
139
408
  * preserved (style) for downstream processing or discarded entirely.
@@ -204,26 +473,110 @@ function extractStyleRules(tokens) {
204
473
  }
205
474
  return result;
206
475
  }
476
+ /**
477
+ * Parse HTML-style attributes from the inside of a start tag, e.g.
478
+ * `class="x" id='y' disabled href=foo`.
479
+ *
480
+ * Implemented as a linear scan rather than the previous global regex
481
+ * `/([a-zA-Z_][\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g` so
482
+ * adversarial start-tag content cannot trigger polynomial-redos
483
+ * (CodeQL js/polynomial-redos). Behaviour matches the regex form on
484
+ * well-formed inputs:
485
+ * - Attribute names lower-cased.
486
+ * - Double-quoted, single-quoted and unquoted values supported.
487
+ * - Boolean attributes (no `=`) yield an empty string value.
488
+ */
207
489
  function parseHtmlAttrs(str) {
208
490
  const attrs = {};
209
- const re = /([a-zA-Z_][\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
210
- let m;
211
- while ((m = re.exec(str)) !== null) {
212
- attrs[m[1].toLowerCase()] = m[2] ?? m[3] ?? m[4] ?? "";
491
+ const n = str.length;
492
+ let i = 0;
493
+ while (i < n) {
494
+ // Skip whitespace.
495
+ while (i < n && isHtmlSpace(str.charCodeAt(i))) {
496
+ i++;
497
+ }
498
+ if (i >= n) {
499
+ break;
500
+ }
501
+ // Read attribute name: [A-Za-z_][\w-]*.
502
+ const nameStart = i;
503
+ const first = str.charCodeAt(i);
504
+ if (!isAsciiAlpha(first) && first !== 0x5f /* '_' */) {
505
+ // Not a valid attribute-name start — skip one char and resync.
506
+ i++;
507
+ continue;
508
+ }
509
+ i++;
510
+ while (i < n) {
511
+ const c = str.charCodeAt(i);
512
+ if (isAsciiAlpha(c) || isAsciiDigit(c) || c === 0x5f /* '_' */ || c === 0x2d /* '-' */) {
513
+ i++;
514
+ continue;
515
+ }
516
+ break;
517
+ }
518
+ const name = str.slice(nameStart, i).toLowerCase();
519
+ // Optional `\s*=\s*` then a value.
520
+ let j = i;
521
+ while (j < n && isHtmlSpace(str.charCodeAt(j))) {
522
+ j++;
523
+ }
524
+ if (j >= n || str.charCodeAt(j) !== 0x3d /* '=' */) {
525
+ // Boolean attribute.
526
+ attrs[name] = "";
527
+ continue;
528
+ }
529
+ j++; // past '='
530
+ while (j < n && isHtmlSpace(str.charCodeAt(j))) {
531
+ j++;
532
+ }
533
+ if (j >= n) {
534
+ attrs[name] = "";
535
+ i = j;
536
+ continue;
537
+ }
538
+ const q = str.charCodeAt(j);
539
+ if (q === 0x22 /* '"' */ || q === 0x27 /* "'" */) {
540
+ const close = str.indexOf(q === 0x22 ? '"' : "'", j + 1);
541
+ if (close < 0) {
542
+ // Unterminated quoted value — take whatever is left and stop.
543
+ attrs[name] = str.slice(j + 1);
544
+ break;
545
+ }
546
+ attrs[name] = str.slice(j + 1, close);
547
+ i = close + 1;
548
+ continue;
549
+ }
550
+ // Unquoted value: run of non-whitespace.
551
+ const valStart = j;
552
+ while (j < n && !isHtmlSpace(str.charCodeAt(j))) {
553
+ j++;
554
+ }
555
+ attrs[name] = str.slice(valStart, j);
556
+ i = j;
213
557
  }
214
558
  return attrs;
215
559
  }
216
560
  function decodeHtmlEntities(text) {
217
- return text
218
- .replace(/&amp;/g, "&")
219
- .replace(/&lt;/g, "<")
220
- .replace(/&gt;/g, ">")
221
- .replace(/&quot;/g, '"')
222
- .replace(/&#39;/g, "'")
223
- .replace(/&nbsp;/g, "\u00A0")
224
- .replace(/&#(\d+);/g, (_, n) => safeFromCodePoint(parseInt(n, 10)))
225
- .replace(/&#x([a-fA-F0-9]+);/g, (_, n) => safeFromCodePoint(parseInt(n, 16)))
226
- .replace(/&([a-zA-Z]+);/g, (match, name) => HTML_ENTITIES[name] ?? match);
561
+ // Decode every entity in a single pass. Chaining `.replace()` calls
562
+ // (first `&amp;` → `&`, then `&lt;` → `<`, …) re-runs the later
563
+ // replacements over the output of the earlier ones, so input like
564
+ // `&amp;lt;` would round-trip to `<` instead of the intended `&lt;`.
565
+ // CodeQL flags this as "Double escaping or unescaping". A single
566
+ // alternation guarantees each source position is decoded at most once.
567
+ return text.replace(/&(?:#(\d+)|#[xX]([a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));/g, (match, dec, hex, name) => {
568
+ if (dec !== undefined) {
569
+ return safeFromCodePoint(parseInt(dec, 10));
570
+ }
571
+ if (hex !== undefined) {
572
+ return safeFromCodePoint(parseInt(hex, 16));
573
+ }
574
+ if (name !== undefined) {
575
+ const replacement = HTML_ENTITIES[name];
576
+ return replacement ?? match;
577
+ }
578
+ return match;
579
+ });
227
580
  }
228
581
  /**
229
582
  * Convert a numeric character reference to a string. Uses fromCodePoint so
@@ -243,6 +596,16 @@ function safeFromCodePoint(cp) {
243
596
  }
244
597
  /** Common HTML named entities mapped to their Unicode characters. */
245
598
  const HTML_ENTITIES = {
599
+ // Core XML/HTML entities — these used to be handled as standalone
600
+ // chained `.replace()` calls in `decodeHtmlEntities`. They must live
601
+ // in this table so the single-pass decoder can resolve them without
602
+ // re-running over already-decoded output (CodeQL "double unescaping").
603
+ amp: "&",
604
+ lt: "<",
605
+ gt: ">",
606
+ quot: '"',
607
+ apos: "'",
608
+ nbsp: "\u00A0",
246
609
  // Punctuation & Typography
247
610
  mdash: "\u2014",
248
611
  ndash: "\u2013",
@@ -188,8 +188,13 @@ function renderTable(state, table) {
188
188
  cellParts.push(renderInlineChildren(state, block.children).trim());
189
189
  }
190
190
  }
191
- // Escape pipe characters to prevent table structure corruption
192
- rowTexts.push(cellParts.join(" ").replace(/\|/g, "\\|"));
191
+ // Escape pipe characters to prevent table structure corruption.
192
+ // Backslashes must be escaped *first*: replacing `|` first leaves
193
+ // a literal `\|` in the source untouched, but a subsequent
194
+ // `\` → `\\` pass would then double-escape it into `\\|`,
195
+ // breaking GFM tables. CodeQL flags the single-pass form as
196
+ // "Incomplete string escaping or encoding".
197
+ rowTexts.push(cellParts.join(" ").replace(/\\/g, "\\\\").replace(/\|/g, "\\|"));
193
198
  }
194
199
  grid.push(rowTexts);
195
200
  }
@@ -470,7 +475,10 @@ function isMonospaceFont(font) {
470
475
  if (typeof font === "string") {
471
476
  return isMonospaceFontName(font);
472
477
  }
473
- if (typeof font === "object" && font !== null) {
478
+ // `!font` above already discarded `null`; `font !== null` here was
479
+ // therefore always true and CodeQL flagged it as a comparison
480
+ // between inconvertible types.
481
+ if (typeof font === "object") {
474
482
  const f = font;
475
483
  return (isMonospaceFontName(f.ascii) ||
476
484
  isMonospaceFontName(f.hAnsi));
@@ -1254,7 +1254,10 @@ function resolveColorHex(color) {
1254
1254
  if (typeof color === "string") {
1255
1255
  return color;
1256
1256
  }
1257
- if (typeof color === "object" && color !== null && "value" in color) {
1257
+ // The `!color` check above already discarded `null`; an additional
1258
+ // `color !== null` test was always true and CodeQL flagged it as a
1259
+ // comparison between inconvertible types.
1260
+ if (typeof color === "object" && "value" in color) {
1258
1261
  return color.value;
1259
1262
  }
1260
1263
  return undefined;