@cj-tech-master/excelts 9.5.5 → 9.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/browser/modules/excel/worksheet.d.ts +11 -0
  2. package/dist/browser/modules/excel/worksheet.js +13 -0
  3. package/dist/browser/modules/formula/integration/apply-writeback-plan.js +17 -3
  4. package/dist/browser/modules/formula/integration/workbook-adapter.js +20 -1
  5. package/dist/browser/modules/formula/integration/workbook-snapshot.d.ts +12 -0
  6. package/dist/browser/modules/formula/materialize/build-writeback-plan.js +47 -0
  7. package/dist/browser/modules/formula/materialize/types.d.ts +19 -3
  8. package/dist/browser/modules/formula/materialize/types.js +13 -3
  9. package/dist/browser/modules/pdf/builder/document-builder.js +2 -2
  10. package/dist/browser/modules/pdf/font/system-fonts.d.ts +24 -4
  11. package/dist/browser/modules/pdf/font/system-fonts.js +76 -32
  12. package/dist/browser/modules/pdf/render/pdf-exporter.js +6 -3
  13. package/dist/browser/modules/word/advanced/field-engine.js +151 -23
  14. package/dist/browser/modules/word/advanced/math-convert.js +2 -1
  15. package/dist/browser/modules/word/advanced/style-map.js +44 -6
  16. package/dist/browser/modules/word/convert/html/html-import.js +434 -71
  17. package/dist/browser/modules/word/convert/markdown/markdown-renderer.js +11 -3
  18. package/dist/browser/modules/word/layout/layout-full.js +4 -1
  19. package/dist/browser/modules/word/security/digital-signatures.js +160 -33
  20. package/dist/browser/modules/word/security/encryption.js +109 -9
  21. package/dist/cjs/modules/excel/worksheet.js +13 -0
  22. package/dist/cjs/modules/formula/integration/apply-writeback-plan.js +17 -3
  23. package/dist/cjs/modules/formula/integration/workbook-adapter.js +20 -1
  24. package/dist/cjs/modules/formula/materialize/build-writeback-plan.js +47 -0
  25. package/dist/cjs/modules/formula/materialize/types.js +13 -3
  26. package/dist/cjs/modules/pdf/builder/document-builder.js +1 -1
  27. package/dist/cjs/modules/pdf/font/system-fonts.js +77 -32
  28. package/dist/cjs/modules/pdf/render/pdf-exporter.js +5 -2
  29. package/dist/cjs/modules/word/advanced/field-engine.js +151 -23
  30. package/dist/cjs/modules/word/advanced/math-convert.js +2 -1
  31. package/dist/cjs/modules/word/advanced/style-map.js +44 -6
  32. package/dist/cjs/modules/word/convert/html/html-import.js +434 -71
  33. package/dist/cjs/modules/word/convert/markdown/markdown-renderer.js +11 -3
  34. package/dist/cjs/modules/word/layout/layout-full.js +4 -1
  35. package/dist/cjs/modules/word/security/digital-signatures.js +160 -33
  36. package/dist/cjs/modules/word/security/encryption.js +109 -9
  37. package/dist/esm/modules/excel/worksheet.js +13 -0
  38. package/dist/esm/modules/formula/integration/apply-writeback-plan.js +17 -3
  39. package/dist/esm/modules/formula/integration/workbook-adapter.js +20 -1
  40. package/dist/esm/modules/formula/materialize/build-writeback-plan.js +47 -0
  41. package/dist/esm/modules/formula/materialize/types.js +13 -3
  42. package/dist/esm/modules/pdf/builder/document-builder.js +2 -2
  43. package/dist/esm/modules/pdf/font/system-fonts.js +76 -32
  44. package/dist/esm/modules/pdf/render/pdf-exporter.js +6 -3
  45. package/dist/esm/modules/word/advanced/field-engine.js +151 -23
  46. package/dist/esm/modules/word/advanced/math-convert.js +2 -1
  47. package/dist/esm/modules/word/advanced/style-map.js +44 -6
  48. package/dist/esm/modules/word/convert/html/html-import.js +434 -71
  49. package/dist/esm/modules/word/convert/markdown/markdown-renderer.js +11 -3
  50. package/dist/esm/modules/word/layout/layout-full.js +4 -1
  51. package/dist/esm/modules/word/security/digital-signatures.js +160 -33
  52. package/dist/esm/modules/word/security/encryption.js +109 -9
  53. package/dist/iife/excelts.iife.js +40 -26
  54. package/dist/iife/excelts.iife.js.map +1 -1
  55. package/dist/iife/excelts.iife.min.js +3 -3
  56. package/dist/types/modules/excel/worksheet.d.ts +11 -0
  57. package/dist/types/modules/formula/integration/workbook-snapshot.d.ts +12 -0
  58. package/dist/types/modules/formula/materialize/types.d.ts +19 -3
  59. package/dist/types/modules/pdf/font/system-fonts.d.ts +24 -4
  60. package/package.json +1 -1
@@ -70,73 +70,342 @@ function tokenize(html) {
70
70
  // instructions before tokenising — none of them should appear as text
71
71
  // in the document body. The previous regex treated `<!doctype html>`
72
72
  // as a text node containing `"!doctype html>"`.
73
- const stripped = html
74
- .replace(/<!--[\s\S]*?-->/g, "")
75
- .replace(/<!doctype[^>]*>/gi, "")
76
- .replace(/<!\[CDATA\[[\s\S]*?\]\]>/g, "")
77
- .replace(/<\?[\s\S]*?\?>/g, "");
78
- // Match a tag, OR a run of text. Text is anything-up-to-the-next-tag,
79
- // with the addition that a `<` not followed by a tag-like character is
80
- // treated as literal text (so "1 < 2" / "a<b" / "<<" survive instead
81
- // of being silently swallowed).
82
- const re = /<\/?([a-zA-Z][a-zA-Z0-9]*)((?:\s+[^>]*?)?)\/?\s*>|((?:[^<]|<(?![/a-zA-Z]))+)/g;
83
- const tagRe = /^<(\/?)([a-zA-Z][a-zA-Z0-9]*)((?:\s+[^>]*?)?)(\/?)\s*>$/;
84
- let m;
85
- while ((m = re.exec(stripped)) !== null) {
86
- const fullMatch = m[0];
87
- if (m[3] !== undefined) {
88
- // Text node
89
- const text = decodeHtmlEntities(m[3]);
73
+ //
74
+ // We use a single linear scan rather than chained `.replace()` calls so
75
+ // we are immune to two CodeQL findings:
76
+ // - Incomplete multi-character sanitization: chained replaces let
77
+ // payloads such as `<!--<!--x-->-->` leak through (each pass only
78
+ // removes one layer, leaving `-->` behind).
79
+ // - Polynomial regular expression on uncontrolled data: lazy
80
+ // quantifiers like `<!--[\s\S]*?-->` exhibit catastrophic
81
+ // backtracking on adversarial input.
82
+ const stripped = stripSgmlNoise(html);
83
+ // The tokenizer is implemented as a linear index scan rather than a
84
+ // global regex (`/<\/?…(?:\s+[^>]*?)?\/?\s*>|((?:[^<]|…)+)/g`). The
85
+ // previous regex form combined an optional lazy attribute span with
86
+ // an optional `\/?` and optional trailing whitespace, which CodeQL
87
+ // flagged as polynomial-redos: an adversarial payload such as
88
+ // `<a` followed by many spaces but no closing `>` triggered
89
+ // catastrophic backtracking.
90
+ //
91
+ // The scan below is strictly O(n):
92
+ // - At every position we either advance one character or jump
93
+ // forward to the next `<` / `>` via a single `indexOf`.
94
+ // - Attribute parsing is delegated to `parseHtmlAttrs`, which is
95
+ // itself a linear scanner.
96
+ const n = stripped.length;
97
+ let i = 0;
98
+ while (i < n) {
99
+ // Scan a text run: everything up to the next position that begins
100
+ // a tag (`<` followed by a letter, or `</` followed by a letter).
101
+ // Bare `<` characters and unfinished tag-like fragments are kept
102
+ // inside the text run so that input such as `1 < 2`, `a<b<c`,
103
+ // `<<<<` or `<unfinished` (with no closing `>` anywhere) is not
104
+ // shattered into a stream of single-character runs.
105
+ if (stripped.charCodeAt(i) !== 0x3c /* '<' */ || !isTagStart(stripped, i)) {
106
+ const textEnd = scanTextEnd(stripped, i);
107
+ const raw = stripped.slice(i, textEnd);
108
+ const text = decodeHtmlEntities(raw);
90
109
  if (text) {
91
110
  tokens.push({ type: "text", value: text });
92
111
  }
112
+ i = textEnd;
113
+ if (i >= n) {
114
+ break;
115
+ }
116
+ // Fall through: position `i` is now at a real tag start.
117
+ }
118
+ // We are at '<' that introduces a tag (guaranteed by the
119
+ // `isTagStart` check above).
120
+ const next = stripped.charCodeAt(i + 1);
121
+ const isClose = next === 0x2f; /* '/' */
122
+ const nameStart = isClose ? i + 2 : i + 1;
123
+ // Defensive: the loop guard above should already ensure this, but
124
+ // keep the check so a future refactor cannot silently turn a bare
125
+ // `<` into an attempted tag parse.
126
+ if (!isAsciiAlpha(stripped.charCodeAt(nameStart))) {
127
+ tokens.push({ type: "text", value: "<" });
128
+ i++;
129
+ continue;
93
130
  }
94
- else {
95
- const tagMatch = tagRe.exec(fullMatch);
96
- if (tagMatch) {
97
- const isClose = tagMatch[1] === "/";
98
- const tag = tagMatch[2].toLowerCase();
99
- const attrStr = tagMatch[3];
100
- const selfClose = tagMatch[4] === "/" || VOID_ELEMENTS.has(tag);
101
- const attrs = parseHtmlAttrs(attrStr);
102
- if (isClose) {
103
- tokens.push({ type: "close", tag, attrs: {} });
104
- }
105
- else if (selfClose) {
106
- tokens.push({ type: "selfclose", tag, attrs });
107
- }
108
- else {
109
- tokens.push({ type: "open", tag, attrs });
110
- // Raw-text elements: their body must not be parsed as markup. Skip
111
- // forward to the matching close tag and either capture the body as
112
- // a single text token (for <style>, which is post-processed by
113
- // extractStyleRules) or discard it entirely (for <script>, etc.).
114
- // Without this, embedded scripts would leak into the document body.
115
- if (RAW_TEXT_ELEMENTS.has(tag)) {
116
- const closeRe = new RegExp(`</${tag}\\s*>`, "i");
117
- closeRe.lastIndex = re.lastIndex;
118
- const startBody = re.lastIndex;
119
- const closeMatch = closeRe.exec(stripped);
120
- if (closeMatch) {
121
- const body = stripped.slice(startBody, closeMatch.index);
122
- if (RAW_TEXT_PRESERVE_BODY.has(tag)) {
123
- tokens.push({ type: "text", value: body });
124
- }
125
- tokens.push({ type: "close", tag, attrs: {} });
126
- re.lastIndex = closeMatch.index + closeMatch[0].length;
127
- }
128
- else {
129
- // No closing tag discard the rest of the input for this
130
- // raw-text element to avoid emitting markup as text.
131
- re.lastIndex = stripped.length;
132
- }
133
- }
131
+ // Read the tag name: [A-Za-z][A-Za-z0-9]*.
132
+ let p = nameStart + 1;
133
+ while (p < n) {
134
+ const c = stripped.charCodeAt(p);
135
+ if (!isAsciiAlpha(c) && !isAsciiDigit(c)) {
136
+ break;
137
+ }
138
+ p++;
139
+ }
140
+ const tagName = stripped.slice(nameStart, p).toLowerCase();
141
+ // Find the closing '>' of the tag. We have to be careful not to
142
+ // mistake a '>' inside a quoted attribute value for the tag end.
143
+ const tagEnd = findTagEnd(stripped, p);
144
+ if (tagEnd < 0) {
145
+ // No closing '>' — the rest of the input is malformed; treat the
146
+ // remainder as text. (Original regex would simply not match and
147
+ // leave the same characters as text via the alternation.)
148
+ const text = decodeHtmlEntities(stripped.slice(i));
149
+ if (text) {
150
+ tokens.push({ type: "text", value: text });
151
+ }
152
+ // `break` exits the loop directly; no need to assign `i = n`
153
+ // first (CodeQL js/useless-assignment-to-local).
154
+ break;
155
+ }
156
+ // Inside [p, tagEnd) lie attributes (and possibly a trailing '/').
157
+ let inner = stripped.slice(p, tagEnd);
158
+ // Detect self-close: trailing '/'. Strip it so it is not parsed as
159
+ // an attribute name.
160
+ let selfClose = false;
161
+ // Trim trailing whitespace, then a single '/'.
162
+ let innerEnd = inner.length;
163
+ while (innerEnd > 0 && isHtmlSpace(inner.charCodeAt(innerEnd - 1))) {
164
+ innerEnd--;
165
+ }
166
+ if (innerEnd > 0 && inner.charCodeAt(innerEnd - 1) === 0x2f) {
167
+ selfClose = true;
168
+ innerEnd--;
169
+ }
170
+ inner = inner.slice(0, innerEnd);
171
+ if (isClose) {
172
+ tokens.push({ type: "close", tag: tagName, attrs: {} });
173
+ i = tagEnd + 1;
174
+ continue;
175
+ }
176
+ const attrs = parseHtmlAttrs(inner);
177
+ const isVoidElement = VOID_ELEMENTS.has(tagName);
178
+ if (selfClose || isVoidElement) {
179
+ tokens.push({ type: "selfclose", tag: tagName, attrs });
180
+ i = tagEnd + 1;
181
+ continue;
182
+ }
183
+ tokens.push({ type: "open", tag: tagName, attrs });
184
+ i = tagEnd + 1;
185
+ // Raw-text elements: their body must not be parsed as markup.
186
+ if (RAW_TEXT_ELEMENTS.has(tagName)) {
187
+ const closeIdx = findRawTextClose(stripped, i, tagName);
188
+ if (closeIdx === null) {
189
+ // No closing tag — discard the rest of the input for this
190
+ // raw-text element to avoid emitting markup as text.
191
+ i = n;
192
+ }
193
+ else {
194
+ const body = stripped.slice(i, closeIdx.bodyEnd);
195
+ if (RAW_TEXT_PRESERVE_BODY.has(tagName)) {
196
+ tokens.push({ type: "text", value: body });
134
197
  }
198
+ tokens.push({ type: "close", tag: tagName, attrs: {} });
199
+ i = closeIdx.next;
135
200
  }
136
201
  }
137
202
  }
138
203
  return tokens;
139
204
  }
205
+ function isAsciiAlpha(c) {
206
+ return (c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a);
207
+ }
208
+ function isAsciiDigit(c) {
209
+ return c >= 0x30 && c <= 0x39;
210
+ }
211
+ function isHtmlSpace(c) {
212
+ return c === 0x20 || c === 0x09 || c === 0x0a || c === 0x0d || c === 0x0c;
213
+ }
214
+ /**
215
+ * Scan forward from `from` to the position of the next '<' that
216
+ * introduces a tag (i.e. is followed by `[a-zA-Z]` or `/[a-zA-Z]`).
217
+ * A bare '<' (e.g. in `1 < 2`) is included in the text run.
218
+ */
219
+ function scanTextEnd(s, from) {
220
+ const n = s.length;
221
+ let i = from;
222
+ while (i < n) {
223
+ const lt = s.indexOf("<", i);
224
+ if (lt < 0) {
225
+ return n;
226
+ }
227
+ if (isTagStart(s, lt)) {
228
+ return lt;
229
+ }
230
+ // Bare '<' or `</` not followed by a letter — keep scanning.
231
+ i = lt + 1;
232
+ }
233
+ return n;
234
+ }
235
+ /**
236
+ * Return true if position `pos` in `s` is `<` followed by a letter
237
+ * (open tag) or `</` followed by a letter (close tag). Used to
238
+ * distinguish "real" tag starts from literal `<` characters.
239
+ */
240
+ function isTagStart(s, pos) {
241
+ if (s.charCodeAt(pos) !== 0x3c /* '<' */) {
242
+ return false;
243
+ }
244
+ const next = s.charCodeAt(pos + 1);
245
+ if (isAsciiAlpha(next)) {
246
+ return true;
247
+ }
248
+ if (next === 0x2f /* '/' */ && isAsciiAlpha(s.charCodeAt(pos + 2))) {
249
+ return true;
250
+ }
251
+ return false;
252
+ }
253
+ /**
254
+ * Find the index of the '>' that closes the tag opened just before
255
+ * `from`. Honours quoted attribute values so that `<a href="x>y">`
256
+ * does not stop at the '>' inside quotes.
257
+ *
258
+ * Returns -1 if no closing '>' is found before EOF.
259
+ */
260
+ function findTagEnd(s, from) {
261
+ const n = s.length;
262
+ let i = from;
263
+ while (i < n) {
264
+ const c = s.charCodeAt(i);
265
+ if (c === 0x22 /* '"' */ || c === 0x27 /* "'" */) {
266
+ const close = s.indexOf(c === 0x22 ? '"' : "'", i + 1);
267
+ if (close < 0) {
268
+ return -1;
269
+ }
270
+ i = close + 1;
271
+ continue;
272
+ }
273
+ if (c === 0x3e /* '>' */) {
274
+ return i;
275
+ }
276
+ i++;
277
+ }
278
+ return -1;
279
+ }
280
+ /**
281
+ * Find the closing tag for a raw-text element (e.g. `</script>`),
282
+ * starting at `from`. Returns the position immediately after the
283
+ * close tag (`next`) plus the position where the body ends (`bodyEnd`,
284
+ * i.e. the start of the close-tag literal).
285
+ *
286
+ * Implemented with a linear scan (no dynamic `RegExp`) so that
287
+ * adversarial bodies cannot trigger super-linear runtime.
288
+ */
289
+ function findRawTextClose(s, from, tagName) {
290
+ const n = s.length;
291
+ let i = from;
292
+ while (i < n) {
293
+ const lt = s.indexOf("</", i);
294
+ if (lt < 0) {
295
+ return null;
296
+ }
297
+ const after = lt + 2;
298
+ // Compare tag name case-insensitively.
299
+ let ok = true;
300
+ for (let k = 0; k < tagName.length; k++) {
301
+ const a = s.charCodeAt(after + k);
302
+ const aLower = a >= 0x41 && a <= 0x5a ? a + 0x20 : a;
303
+ if (aLower !== tagName.charCodeAt(k)) {
304
+ ok = false;
305
+ break;
306
+ }
307
+ }
308
+ if (!ok) {
309
+ i = after;
310
+ continue;
311
+ }
312
+ // Skip any trailing whitespace before '>'.
313
+ let p = after + tagName.length;
314
+ while (p < n && isHtmlSpace(s.charCodeAt(p))) {
315
+ p++;
316
+ }
317
+ if (p < n && s.charCodeAt(p) === 0x3e /* '>' */) {
318
+ return { bodyEnd: lt, next: p + 1 };
319
+ }
320
+ i = after;
321
+ }
322
+ return null;
323
+ }
324
+ /**
325
+ * Strip HTML comments, doctype declarations, CDATA sections and SGML
326
+ * processing instructions in a single linear scan.
327
+ *
328
+ * A linear scan (vs. chained `String.prototype.replace` with regular
329
+ * expressions) is required for two reasons:
330
+ *
331
+ * 1. **Incomplete multi-character sanitization** — chained replaces are
332
+ * each one pass; an attacker can nest the syntax (e.g.
333
+ * `<!--<!--x-->-->`) so the outer marker survives after the inner
334
+ * one is removed.
335
+ * 2. **Catastrophic backtracking** — lazy quantifiers such as
336
+ * `<!--[\s\S]*?-->` are polynomial-time on adversarial input
337
+ * (very long unterminated comments).
338
+ *
339
+ * The scan is O(n) in the input length and removes nested constructs by
340
+ * not advancing past the closing marker into already-emitted text.
341
+ */
342
+ function stripSgmlNoise(input) {
343
+ let out = "";
344
+ let i = 0;
345
+ const n = input.length;
346
+ while (i < n) {
347
+ if (input.charCodeAt(i) !== 0x3c /* '<' */) {
348
+ out += input[i];
349
+ i++;
350
+ continue;
351
+ }
352
+ // Comment: <!-- ... -->
353
+ // If the closing `-->` is missing the input is malformed. The
354
+ // previous regex (`/<!--[\s\S]*?-->/g`) simply did not match in that
355
+ // case and left the text in place; we preserve that behaviour rather
356
+ // than swallowing the rest of the document, which would silently
357
+ // change the parse for legitimate inputs that happen to contain a
358
+ // stray `<!--`.
359
+ if (input.startsWith("<!--", i)) {
360
+ const end = input.indexOf("-->", i + 4);
361
+ if (end < 0) {
362
+ out += "<";
363
+ i++;
364
+ continue;
365
+ }
366
+ i = end + 3;
367
+ continue;
368
+ }
369
+ // CDATA: <![CDATA[ ... ]]>
370
+ if (input.startsWith("<![CDATA[", i)) {
371
+ const end = input.indexOf("]]>", i + 9);
372
+ if (end < 0) {
373
+ out += "<";
374
+ i++;
375
+ continue;
376
+ }
377
+ i = end + 3;
378
+ continue;
379
+ }
380
+ // Doctype: <!doctype ...> (case-insensitive)
381
+ if (input.charCodeAt(i + 1) === 0x21 /* '!' */ &&
382
+ input.slice(i + 2, i + 9).toLowerCase() === "doctype") {
383
+ const end = input.indexOf(">", i + 9);
384
+ if (end < 0) {
385
+ out += "<";
386
+ i++;
387
+ continue;
388
+ }
389
+ i = end + 1;
390
+ continue;
391
+ }
392
+ // Processing instruction: <? ... ?>
393
+ if (input.charCodeAt(i + 1) === 0x3f /* '?' */) {
394
+ const end = input.indexOf("?>", i + 2);
395
+ if (end < 0) {
396
+ out += "<";
397
+ i++;
398
+ continue;
399
+ }
400
+ i = end + 2;
401
+ continue;
402
+ }
403
+ // Not an SGML noise construct — emit the '<' literally and continue.
404
+ out += "<";
405
+ i++;
406
+ }
407
+ return out;
408
+ }
140
409
  /**
141
410
  * HTML elements whose body is not parsed as markup. Their content is either
142
411
  * preserved (style) for downstream processing or discarded entirely.
@@ -207,26 +476,110 @@ function extractStyleRules(tokens) {
207
476
  }
208
477
  return result;
209
478
  }
479
+ /**
480
+ * Parse HTML-style attributes from the inside of a start tag, e.g.
481
+ * `class="x" id='y' disabled href=foo`.
482
+ *
483
+ * Implemented as a linear scan rather than the previous global regex
484
+ * `/([a-zA-Z_][\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g` so
485
+ * adversarial start-tag content cannot trigger polynomial-redos
486
+ * (CodeQL js/polynomial-redos). Behaviour matches the regex form on
487
+ * well-formed inputs:
488
+ * - Attribute names lower-cased.
489
+ * - Double-quoted, single-quoted and unquoted values supported.
490
+ * - Boolean attributes (no `=`) yield an empty string value.
491
+ */
210
492
  function parseHtmlAttrs(str) {
211
493
  const attrs = {};
212
- const re = /([a-zA-Z_][\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
213
- let m;
214
- while ((m = re.exec(str)) !== null) {
215
- attrs[m[1].toLowerCase()] = m[2] ?? m[3] ?? m[4] ?? "";
494
+ const n = str.length;
495
+ let i = 0;
496
+ while (i < n) {
497
+ // Skip whitespace.
498
+ while (i < n && isHtmlSpace(str.charCodeAt(i))) {
499
+ i++;
500
+ }
501
+ if (i >= n) {
502
+ break;
503
+ }
504
+ // Read attribute name: [A-Za-z_][\w-]*.
505
+ const nameStart = i;
506
+ const first = str.charCodeAt(i);
507
+ if (!isAsciiAlpha(first) && first !== 0x5f /* '_' */) {
508
+ // Not a valid attribute-name start — skip one char and resync.
509
+ i++;
510
+ continue;
511
+ }
512
+ i++;
513
+ while (i < n) {
514
+ const c = str.charCodeAt(i);
515
+ if (isAsciiAlpha(c) || isAsciiDigit(c) || c === 0x5f /* '_' */ || c === 0x2d /* '-' */) {
516
+ i++;
517
+ continue;
518
+ }
519
+ break;
520
+ }
521
+ const name = str.slice(nameStart, i).toLowerCase();
522
+ // Optional `\s*=\s*` then a value.
523
+ let j = i;
524
+ while (j < n && isHtmlSpace(str.charCodeAt(j))) {
525
+ j++;
526
+ }
527
+ if (j >= n || str.charCodeAt(j) !== 0x3d /* '=' */) {
528
+ // Boolean attribute.
529
+ attrs[name] = "";
530
+ continue;
531
+ }
532
+ j++; // past '='
533
+ while (j < n && isHtmlSpace(str.charCodeAt(j))) {
534
+ j++;
535
+ }
536
+ if (j >= n) {
537
+ attrs[name] = "";
538
+ i = j;
539
+ continue;
540
+ }
541
+ const q = str.charCodeAt(j);
542
+ if (q === 0x22 /* '"' */ || q === 0x27 /* "'" */) {
543
+ const close = str.indexOf(q === 0x22 ? '"' : "'", j + 1);
544
+ if (close < 0) {
545
+ // Unterminated quoted value — take whatever is left and stop.
546
+ attrs[name] = str.slice(j + 1);
547
+ break;
548
+ }
549
+ attrs[name] = str.slice(j + 1, close);
550
+ i = close + 1;
551
+ continue;
552
+ }
553
+ // Unquoted value: run of non-whitespace.
554
+ const valStart = j;
555
+ while (j < n && !isHtmlSpace(str.charCodeAt(j))) {
556
+ j++;
557
+ }
558
+ attrs[name] = str.slice(valStart, j);
559
+ i = j;
216
560
  }
217
561
  return attrs;
218
562
  }
219
563
  function decodeHtmlEntities(text) {
220
- return text
221
- .replace(/&amp;/g, "&")
222
- .replace(/&lt;/g, "<")
223
- .replace(/&gt;/g, ">")
224
- .replace(/&quot;/g, '"')
225
- .replace(/&#39;/g, "'")
226
- .replace(/&nbsp;/g, "\u00A0")
227
- .replace(/&#(\d+);/g, (_, n) => safeFromCodePoint(parseInt(n, 10)))
228
- .replace(/&#x([a-fA-F0-9]+);/g, (_, n) => safeFromCodePoint(parseInt(n, 16)))
229
- .replace(/&([a-zA-Z]+);/g, (match, name) => HTML_ENTITIES[name] ?? match);
564
+ // Decode every entity in a single pass. Chaining `.replace()` calls
565
+ // (first `&amp;` → `&`, then `&lt;` → `<`, …) re-runs the later
566
+ // replacements over the output of the earlier ones, so input like
567
+ // `&amp;lt;` would round-trip to `<` instead of the intended `&lt;`.
568
+ // CodeQL flags this as "Double escaping or unescaping". A single
569
+ // alternation guarantees each source position is decoded at most once.
570
+ return text.replace(/&(?:#(\d+)|#[xX]([a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));/g, (match, dec, hex, name) => {
571
+ if (dec !== undefined) {
572
+ return safeFromCodePoint(parseInt(dec, 10));
573
+ }
574
+ if (hex !== undefined) {
575
+ return safeFromCodePoint(parseInt(hex, 16));
576
+ }
577
+ if (name !== undefined) {
578
+ const replacement = HTML_ENTITIES[name];
579
+ return replacement ?? match;
580
+ }
581
+ return match;
582
+ });
230
583
  }
231
584
  /**
232
585
  * Convert a numeric character reference to a string. Uses fromCodePoint so
@@ -246,6 +599,16 @@ function safeFromCodePoint(cp) {
246
599
  }
247
600
  /** Common HTML named entities mapped to their Unicode characters. */
248
601
  const HTML_ENTITIES = {
602
+ // Core XML/HTML entities — these used to be handled as standalone
603
+ // chained `.replace()` calls in `decodeHtmlEntities`. They must live
604
+ // in this table so the single-pass decoder can resolve them without
605
+ // re-running over already-decoded output (CodeQL "double unescaping").
606
+ amp: "&",
607
+ lt: "<",
608
+ gt: ">",
609
+ quot: '"',
610
+ apos: "'",
611
+ nbsp: "\u00A0",
249
612
  // Punctuation & Typography
250
613
  mdash: "\u2014",
251
614
  ndash: "\u2013",
@@ -191,8 +191,13 @@ function renderTable(state, table) {
191
191
  cellParts.push(renderInlineChildren(state, block.children).trim());
192
192
  }
193
193
  }
194
- // Escape pipe characters to prevent table structure corruption
195
- rowTexts.push(cellParts.join(" ").replace(/\|/g, "\\|"));
194
+ // Escape pipe characters to prevent table structure corruption.
195
+ // Backslashes must be escaped *first*: replacing `|` first leaves
196
+ // a literal `\|` in the source untouched, but a subsequent
197
+ // `\` → `\\` pass would then double-escape it into `\\|`,
198
+ // breaking GFM tables. CodeQL flags the single-pass form as
199
+ // "Incomplete string escaping or encoding".
200
+ rowTexts.push(cellParts.join(" ").replace(/\\/g, "\\\\").replace(/\|/g, "\\|"));
196
201
  }
197
202
  grid.push(rowTexts);
198
203
  }
@@ -473,7 +478,10 @@ function isMonospaceFont(font) {
473
478
  if (typeof font === "string") {
474
479
  return isMonospaceFontName(font);
475
480
  }
476
- if (typeof font === "object" && font !== null) {
481
+ // `!font` above already discarded `null`; `font !== null` here was
482
+ // therefore always true and CodeQL flagged it as a comparison
483
+ // between inconvertible types.
484
+ if (typeof font === "object") {
477
485
  const f = font;
478
486
  return (isMonospaceFontName(f.ascii) ||
479
487
  isMonospaceFontName(f.hAnsi));
@@ -1257,7 +1257,10 @@ function resolveColorHex(color) {
1257
1257
  if (typeof color === "string") {
1258
1258
  return color;
1259
1259
  }
1260
- if (typeof color === "object" && color !== null && "value" in color) {
1260
+ // The `!color` check above already discarded `null`; an additional
1261
+ // `color !== null` test was always true and CodeQL flagged it as a
1262
+ // comparison between inconvertible types.
1263
+ if (typeof color === "object" && "value" in color) {
1261
1264
  return color.value;
1262
1265
  }
1263
1266
  return undefined;