@cj-tech-master/excelts 7.5.0 → 7.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/modules/excel/stream/hyperlink-reader.js +1 -1
- package/dist/browser/modules/excel/stream/workbook-reader.browser.js +2 -2
- package/dist/browser/modules/excel/stream/worksheet-reader.js +1 -1
- package/dist/browser/modules/excel/xlsx/xform/base-xform.js +1 -1
- package/dist/browser/modules/xml/dom.js +2 -1
- package/dist/browser/modules/xml/index.d.ts +1 -1
- package/dist/browser/modules/xml/sax.d.ts +41 -0
- package/dist/browser/modules/xml/sax.js +265 -76
- package/dist/browser/modules/xml/to-object.js +2 -1
- package/dist/browser/modules/xml/types.d.ts +24 -0
- package/dist/cjs/modules/excel/stream/hyperlink-reader.js +1 -1
- package/dist/cjs/modules/excel/stream/workbook-reader.browser.js +2 -2
- package/dist/cjs/modules/excel/stream/worksheet-reader.js +1 -1
- package/dist/cjs/modules/excel/xlsx/xform/base-xform.js +1 -1
- package/dist/cjs/modules/xml/dom.js +2 -1
- package/dist/cjs/modules/xml/sax.js +265 -76
- package/dist/cjs/modules/xml/to-object.js +2 -1
- package/dist/esm/modules/excel/stream/hyperlink-reader.js +1 -1
- package/dist/esm/modules/excel/stream/workbook-reader.browser.js +2 -2
- package/dist/esm/modules/excel/stream/worksheet-reader.js +1 -1
- package/dist/esm/modules/excel/xlsx/xform/base-xform.js +1 -1
- package/dist/esm/modules/xml/dom.js +2 -1
- package/dist/esm/modules/xml/sax.js +265 -76
- package/dist/esm/modules/xml/to-object.js +2 -1
- package/dist/iife/excelts.iife.js +196 -54
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +44 -44
- package/dist/types/modules/xml/index.d.ts +1 -1
- package/dist/types/modules/xml/sax.d.ts +41 -0
- package/dist/types/modules/xml/types.d.ts +24 -0
- package/package.json +1 -1
|
@@ -96,7 +96,8 @@ function parseXml(xml, options) {
|
|
|
96
96
|
fragment: options?.fragment ?? false,
|
|
97
97
|
xmlns: options?.xmlns ?? false,
|
|
98
98
|
maxDepth: options?.maxDepth,
|
|
99
|
-
maxEntityExpansions: options?.maxEntityExpansions
|
|
99
|
+
maxEntityExpansions: options?.maxEntityExpansions,
|
|
100
|
+
invalidCharHandling: options?.invalidCharHandling
|
|
100
101
|
});
|
|
101
102
|
// Stack of elements being built. The bottom is a synthetic root
|
|
102
103
|
// that collects top-level nodes.
|
|
@@ -40,6 +40,8 @@ const GREATER = 0x3e; // >
|
|
|
40
40
|
const QUESTION = 0x3f; // ?
|
|
41
41
|
const OPEN_BRACKET = 0x5b; // [
|
|
42
42
|
const CLOSE_BRACKET = 0x5d; // ]
|
|
43
|
+
const REPLACEMENT_CHAR = 0xfffd; // U+FFFD REPLACEMENT CHARACTER
|
|
44
|
+
const REPLACEMENT_STR = "\uFFFD"; // Pre-allocated string form of U+FFFD
|
|
43
45
|
// =============================================================================
|
|
44
46
|
// Pre-computed Lookup Tables
|
|
45
47
|
// =============================================================================
|
|
@@ -238,6 +240,7 @@ class SaxParser {
|
|
|
238
240
|
this.maxDepth = options?.maxDepth !== undefined ? options.maxDepth : 256;
|
|
239
241
|
this.maxEntityExpansions =
|
|
240
242
|
options?.maxEntityExpansions !== undefined ? options.maxEntityExpansions : 10000;
|
|
243
|
+
this.invalidCharHandling = options?.invalidCharHandling ?? "error";
|
|
241
244
|
this._init();
|
|
242
245
|
}
|
|
243
246
|
get closed() {
|
|
@@ -355,87 +358,215 @@ class SaxParser {
|
|
|
355
358
|
return this.write(null);
|
|
356
359
|
}
|
|
357
360
|
// ===========================================================================
|
|
358
|
-
// Character
|
|
361
|
+
// Invalid Character Handling
|
|
359
362
|
// ===========================================================================
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
363
|
+
/**
|
|
364
|
+
* Handle an invalid XML character according to the configured strategy.
|
|
365
|
+
*
|
|
366
|
+
* Used by `handleTextInRoot()` fast path which manages its own text accumulation
|
|
367
|
+
* and cannot use the `getCode()` loop approach.
|
|
368
|
+
*
|
|
369
|
+
* - `"error"`: call `fail()` and return the original code.
|
|
370
|
+
* - `"skip"`: return `REPLACEMENT_CHAR` as a sentinel (caller handles skip).
|
|
371
|
+
* - `"replace"`: return `REPLACEMENT_CHAR`.
|
|
372
|
+
*
|
|
373
|
+
* Note: For `getCode()`, invalid char handling is inlined to avoid recursion.
|
|
374
|
+
*
|
|
375
|
+
* @param code - The invalid character code point.
|
|
376
|
+
* @param kind - Optional description (e.g. "lone surrogate") for error messages.
|
|
377
|
+
* @returns The code point to use.
|
|
378
|
+
*/
|
|
379
|
+
handleInvalidChar(code, kind) {
|
|
380
|
+
switch (this.invalidCharHandling) {
|
|
381
|
+
case "replace":
|
|
382
|
+
return REPLACEMENT_CHAR;
|
|
383
|
+
case "skip":
|
|
384
|
+
// Caller is responsible for the actual skip logic.
|
|
385
|
+
// We return -2 as a sentinel to tell getCode()'s loop to continue.
|
|
386
|
+
return -2;
|
|
387
|
+
default: {
|
|
388
|
+
// "error" — existing strict behavior
|
|
389
|
+
const label = kind
|
|
390
|
+
? `invalid XML character: ${kind} 0x${code.toString(16)}`
|
|
391
|
+
: `invalid XML character: 0x${code.toString(16)}`;
|
|
392
|
+
this.fail(label);
|
|
393
|
+
return code;
|
|
373
394
|
}
|
|
374
|
-
return code;
|
|
375
395
|
}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Handle an invalid character inside the `handleTextInRoot()` fast loop.
|
|
399
|
+
*
|
|
400
|
+
* Unlike `handleInvalidChar()` (which returns a code point for `getCode()`),
|
|
401
|
+
* this method manages the text accumulation state (`this.text`, `start`) that
|
|
402
|
+
* the fast text loop relies on.
|
|
403
|
+
*
|
|
404
|
+
* - `"error"`: call `fail()`, leave text accumulation unchanged (char stays in output).
|
|
405
|
+
* - `"skip"`: flush text up to the invalid char, skip it, return new `start`.
|
|
406
|
+
* - `"replace"`: flush text up to the invalid char, append U+FFFD, return new `start`.
|
|
407
|
+
*
|
|
408
|
+
* @returns The updated `start` index for the text accumulation loop.
|
|
409
|
+
*/
|
|
410
|
+
handleInvalidCharInText(code, handler, start, kind) {
|
|
411
|
+
switch (this.invalidCharHandling) {
|
|
412
|
+
case "skip":
|
|
413
|
+
// Flush text accumulated before this invalid char, then skip it
|
|
414
|
+
if (handler && start < this.prevI) {
|
|
415
|
+
this.text += this.chunk.slice(start, this.prevI);
|
|
416
|
+
}
|
|
417
|
+
return this.i;
|
|
418
|
+
case "replace":
|
|
419
|
+
// Flush text accumulated before this invalid char, append replacement
|
|
420
|
+
if (handler) {
|
|
421
|
+
if (start < this.prevI) {
|
|
422
|
+
this.text += this.chunk.slice(start, this.prevI);
|
|
423
|
+
}
|
|
424
|
+
this.text += REPLACEMENT_STR;
|
|
425
|
+
}
|
|
426
|
+
return this.i;
|
|
427
|
+
default: {
|
|
428
|
+
// "error" — existing strict behavior, char stays in output
|
|
429
|
+
const label = kind
|
|
430
|
+
? `invalid XML character: ${kind} 0x${code.toString(16)}`
|
|
431
|
+
: `invalid XML character: 0x${code.toString(16)}`;
|
|
432
|
+
this.fail(label);
|
|
433
|
+
return start;
|
|
380
434
|
}
|
|
381
|
-
return code;
|
|
382
435
|
}
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* Handle an invalid character inside `sAttribValueQuoted()`.
|
|
439
|
+
*
|
|
440
|
+
* Same pattern as `handleInvalidCharInText()` but for attribute value
|
|
441
|
+
* accumulation (always uses `this.text`, no conditional handler check).
|
|
442
|
+
*
|
|
443
|
+
* @returns The updated `start` index.
|
|
444
|
+
*/
|
|
445
|
+
handleInvalidCharInAttr(code, start, kind) {
|
|
446
|
+
switch (this.invalidCharHandling) {
|
|
447
|
+
case "skip":
|
|
448
|
+
if (start < this.prevI) {
|
|
449
|
+
this.text += this.chunk.slice(start, this.prevI);
|
|
450
|
+
}
|
|
451
|
+
return this.i;
|
|
452
|
+
case "replace":
|
|
453
|
+
if (start < this.prevI) {
|
|
454
|
+
this.text += this.chunk.slice(start, this.prevI);
|
|
455
|
+
}
|
|
456
|
+
this.text += REPLACEMENT_STR;
|
|
457
|
+
return this.i;
|
|
458
|
+
default: {
|
|
459
|
+
const label = kind
|
|
460
|
+
? `invalid XML character: ${kind} 0x${code.toString(16)}`
|
|
461
|
+
: `invalid XML character: 0x${code.toString(16)}`;
|
|
462
|
+
this.fail(label);
|
|
463
|
+
return start;
|
|
392
464
|
}
|
|
393
|
-
return NL;
|
|
394
465
|
}
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
466
|
+
}
|
|
467
|
+
// ===========================================================================
|
|
468
|
+
// Character Reading
|
|
469
|
+
// ===========================================================================
|
|
470
|
+
getCode() {
|
|
471
|
+
// Loop to handle skip mode: when an invalid char returns -2, we retry
|
|
472
|
+
// with the next character instead of recursing (avoids stack overflow
|
|
473
|
+
// on long runs of consecutive invalid characters).
|
|
474
|
+
for (;;) {
|
|
475
|
+
const { chunk } = this;
|
|
476
|
+
const i = this.i;
|
|
477
|
+
this.prevI = i;
|
|
478
|
+
this.i = i + 1;
|
|
479
|
+
if (i >= chunk.length) {
|
|
480
|
+
return -1;
|
|
481
|
+
}
|
|
482
|
+
const code = chunk.charCodeAt(i);
|
|
483
|
+
// Ultra-fast path: printable ASCII (0x20-0x7E) — the vast majority of XML content.
|
|
484
|
+
// No validation needed; these are always valid XML 1.0 characters.
|
|
485
|
+
if (code >= 0x20 && code <= 0x7e) {
|
|
486
|
+
if (this.trackPosition) {
|
|
487
|
+
this.column++;
|
|
488
|
+
}
|
|
489
|
+
return code;
|
|
401
490
|
}
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
// Handle surrogates
|
|
405
|
-
if (code >= 0xd800 && code <= 0xdbff) {
|
|
406
|
-
const next = chunk.charCodeAt(i + 1);
|
|
407
|
-
if (next >= 0xdc00 && next <= 0xdfff) {
|
|
408
|
-
this.i = i + 2;
|
|
491
|
+
// Secondary fast path: TAB (0x09) — common in attribute values
|
|
492
|
+
if (code === TAB) {
|
|
409
493
|
if (this.trackPosition) {
|
|
410
494
|
this.column++;
|
|
411
495
|
}
|
|
412
|
-
return
|
|
496
|
+
return code;
|
|
413
497
|
}
|
|
414
|
-
//
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
498
|
+
// Handle CR (normalize CR and CR+LF to LF per XML 1.0 §2.11)
|
|
499
|
+
if (code === CR) {
|
|
500
|
+
if (chunk.charCodeAt(i + 1) === NL) {
|
|
501
|
+
this.i = i + 2;
|
|
502
|
+
}
|
|
503
|
+
if (this.trackPosition) {
|
|
504
|
+
this.line++;
|
|
505
|
+
this.column = 0;
|
|
506
|
+
this.positionAtNewLine = this.position;
|
|
507
|
+
}
|
|
508
|
+
return NL;
|
|
509
|
+
}
|
|
510
|
+
// Handle LF
|
|
511
|
+
if (code === NL) {
|
|
512
|
+
if (this.trackPosition) {
|
|
513
|
+
this.line++;
|
|
514
|
+
this.column = 0;
|
|
515
|
+
this.positionAtNewLine = this.position;
|
|
516
|
+
}
|
|
517
|
+
return NL;
|
|
518
|
+
}
|
|
519
|
+
// Handle surrogates
|
|
520
|
+
if (code >= 0xd800 && code <= 0xdbff) {
|
|
521
|
+
const next = chunk.charCodeAt(i + 1);
|
|
522
|
+
if (next >= 0xdc00 && next <= 0xdfff) {
|
|
523
|
+
this.i = i + 2;
|
|
524
|
+
if (this.trackPosition) {
|
|
525
|
+
this.column++;
|
|
526
|
+
}
|
|
527
|
+
return 0x10000 + ((code - 0xd800) * 0x400 + (next - 0xdc00));
|
|
528
|
+
}
|
|
529
|
+
// Lone high surrogate — invalid XML character
|
|
530
|
+
const result = this.handleInvalidChar(code, "lone surrogate");
|
|
531
|
+
if (result !== -2) {
|
|
532
|
+
return result;
|
|
533
|
+
}
|
|
534
|
+
continue; // skip: loop to next char
|
|
535
|
+
}
|
|
536
|
+
// Lone low surrogate — invalid XML character
|
|
537
|
+
if (code >= 0xdc00 && code <= 0xdfff) {
|
|
538
|
+
const result = this.handleInvalidChar(code, "lone surrogate");
|
|
539
|
+
if (result !== -2) {
|
|
540
|
+
return result;
|
|
541
|
+
}
|
|
542
|
+
continue;
|
|
543
|
+
}
|
|
544
|
+
// Non-ASCII above surrogate range (0x80-0xD7FF, 0xE000-0xFFFD) — all valid XML
|
|
545
|
+
if (code >= 0x80) {
|
|
546
|
+
if (this.trackPosition) {
|
|
547
|
+
this.column++;
|
|
548
|
+
}
|
|
549
|
+
// Reject 0xFFFE and 0xFFFF
|
|
550
|
+
if (code === 0xfffe || code === 0xffff) {
|
|
551
|
+
const result = this.handleInvalidChar(code);
|
|
552
|
+
if (result !== -2) {
|
|
553
|
+
return result;
|
|
554
|
+
}
|
|
555
|
+
continue;
|
|
556
|
+
}
|
|
557
|
+
return code;
|
|
558
|
+
}
|
|
559
|
+
// Remaining: ASCII control characters (0x00-0x08, 0x0B, 0x0C, 0x0E-0x1F, 0x7F)
|
|
560
|
+
// All invalid in XML 1.0
|
|
423
561
|
if (this.trackPosition) {
|
|
424
562
|
this.column++;
|
|
425
563
|
}
|
|
426
|
-
|
|
427
|
-
if (
|
|
428
|
-
|
|
564
|
+
const result = this.handleInvalidChar(code);
|
|
565
|
+
if (result !== -2) {
|
|
566
|
+
return result;
|
|
429
567
|
}
|
|
430
|
-
|
|
431
|
-
}
|
|
432
|
-
// Remaining: ASCII control characters (0x00-0x08, 0x0B, 0x0C, 0x0E-0x1F, 0x7F)
|
|
433
|
-
// All invalid in XML 1.0
|
|
434
|
-
if (this.trackPosition) {
|
|
435
|
-
this.column++;
|
|
568
|
+
// skip: continue to next char
|
|
436
569
|
}
|
|
437
|
-
this.fail("invalid XML character: 0x" + code.toString(16));
|
|
438
|
-
return code;
|
|
439
570
|
}
|
|
440
571
|
unget() {
|
|
441
572
|
this.i = this.prevI;
|
|
@@ -642,16 +773,16 @@ class SaxParser {
|
|
|
642
773
|
}
|
|
643
774
|
else {
|
|
644
775
|
this.i++;
|
|
645
|
-
this.
|
|
776
|
+
start = this.handleInvalidCharInText(code, handler, start, "lone surrogate");
|
|
646
777
|
}
|
|
647
778
|
}
|
|
648
779
|
else if (code >= 0xdc00 && code <= 0xdfff) {
|
|
649
780
|
this.i++;
|
|
650
|
-
this.
|
|
781
|
+
start = this.handleInvalidCharInText(code, handler, start, "lone surrogate");
|
|
651
782
|
}
|
|
652
783
|
else if (code === 0xfffe || code === 0xffff) {
|
|
653
784
|
this.i++;
|
|
654
|
-
this.
|
|
785
|
+
start = this.handleInvalidCharInText(code, handler, start);
|
|
655
786
|
}
|
|
656
787
|
else {
|
|
657
788
|
this.i++;
|
|
@@ -667,7 +798,7 @@ class SaxParser {
|
|
|
667
798
|
if (this.trackPosition) {
|
|
668
799
|
this.column++;
|
|
669
800
|
}
|
|
670
|
-
this.
|
|
801
|
+
start = this.handleInvalidCharInText(code, handler, start);
|
|
671
802
|
}
|
|
672
803
|
// End of chunk
|
|
673
804
|
if (handler && start < this.i) {
|
|
@@ -679,14 +810,42 @@ class SaxParser {
|
|
|
679
810
|
let { i: start } = this;
|
|
680
811
|
const handler = this._handlers.text;
|
|
681
812
|
let nonSpace = false;
|
|
813
|
+
const isSkip = this.invalidCharHandling === "skip";
|
|
814
|
+
const isReplace = this.invalidCharHandling === "replace";
|
|
682
815
|
while (true) {
|
|
816
|
+
const iBeforeGet = this.i;
|
|
683
817
|
const c = this.getCode();
|
|
684
818
|
if (c === -1) {
|
|
685
|
-
if (handler && start <
|
|
686
|
-
this.text += chunk.slice(start,
|
|
819
|
+
if (handler && start < iBeforeGet) {
|
|
820
|
+
this.text += chunk.slice(start, iBeforeGet);
|
|
687
821
|
}
|
|
688
822
|
break;
|
|
689
823
|
}
|
|
824
|
+
// In skip mode, getCode() may have internally looped past invalid chars.
|
|
825
|
+
// Flush valid text before the gap and advance start past it.
|
|
826
|
+
if (isSkip && this.prevI > iBeforeGet) {
|
|
827
|
+
if (handler && start < iBeforeGet) {
|
|
828
|
+
this.text += chunk.slice(start, iBeforeGet);
|
|
829
|
+
}
|
|
830
|
+
start = this.prevI;
|
|
831
|
+
}
|
|
832
|
+
// In replace mode, getCode() returns REPLACEMENT_CHAR for invalid chars
|
|
833
|
+
// but the original byte is still in the chunk. Detect this by checking
|
|
834
|
+
// whether getCode() returned REPLACEMENT_CHAR while the raw chunk byte
|
|
835
|
+
// at prevI is NOT U+FFFD (i.e., it was substituted by handleInvalidChar).
|
|
836
|
+
if (isReplace &&
|
|
837
|
+
c === REPLACEMENT_CHAR &&
|
|
838
|
+
chunk.charCodeAt(this.prevI) !== REPLACEMENT_CHAR) {
|
|
839
|
+
if (handler) {
|
|
840
|
+
if (start < this.prevI) {
|
|
841
|
+
this.text += chunk.slice(start, this.prevI);
|
|
842
|
+
}
|
|
843
|
+
this.text += REPLACEMENT_STR;
|
|
844
|
+
}
|
|
845
|
+
start = this.i;
|
|
846
|
+
nonSpace = true;
|
|
847
|
+
continue;
|
|
848
|
+
}
|
|
690
849
|
if (c === LESS) {
|
|
691
850
|
if (handler) {
|
|
692
851
|
const slice = chunk.slice(start, this.prevI);
|
|
@@ -1063,13 +1222,43 @@ class SaxParser {
|
|
|
1063
1222
|
start = this.i;
|
|
1064
1223
|
continue;
|
|
1065
1224
|
}
|
|
1066
|
-
//
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1225
|
+
// Non-ASCII (>= 0x80) — mostly valid, handle inline like handleTextInRoot
|
|
1226
|
+
if (code >= 0x80) {
|
|
1227
|
+
this.prevI = this.i;
|
|
1228
|
+
if (code >= 0xd800 && code <= 0xdbff) {
|
|
1229
|
+
const next = chunk.charCodeAt(this.i + 1);
|
|
1230
|
+
if (next >= 0xdc00 && next <= 0xdfff) {
|
|
1231
|
+
this.i += 2; // valid surrogate pair
|
|
1232
|
+
}
|
|
1233
|
+
else {
|
|
1234
|
+
this.i++;
|
|
1235
|
+
start = this.handleInvalidCharInAttr(code, start, "lone surrogate");
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
1238
|
+
else if (code >= 0xdc00 && code <= 0xdfff) {
|
|
1239
|
+
this.i++;
|
|
1240
|
+
start = this.handleInvalidCharInAttr(code, start, "lone surrogate");
|
|
1241
|
+
}
|
|
1242
|
+
else if (code === 0xfffe || code === 0xffff) {
|
|
1243
|
+
this.i++;
|
|
1244
|
+
start = this.handleInvalidCharInAttr(code, start);
|
|
1245
|
+
}
|
|
1246
|
+
else {
|
|
1247
|
+
this.i++; // valid non-ASCII BMP char
|
|
1248
|
+
}
|
|
1249
|
+
if (this.trackPosition) {
|
|
1250
|
+
this.column++;
|
|
1251
|
+
}
|
|
1252
|
+
continue;
|
|
1253
|
+
}
|
|
1254
|
+
// Remaining: ASCII control characters (0x00-0x08, 0x0B, 0x0C, 0x0E-0x1F, 0x7F)
|
|
1255
|
+
// All invalid in XML 1.0
|
|
1256
|
+
this.prevI = this.i;
|
|
1257
|
+
this.i++;
|
|
1258
|
+
if (this.trackPosition) {
|
|
1259
|
+
this.column++;
|
|
1071
1260
|
}
|
|
1072
|
-
|
|
1261
|
+
start = this.handleInvalidCharInAttr(code, start);
|
|
1073
1262
|
}
|
|
1074
1263
|
// End of chunk
|
|
1075
1264
|
this.text += chunk.slice(start, this.i);
|
|
@@ -45,7 +45,8 @@ function parseXmlToObject(xml, options) {
|
|
|
45
45
|
position: false,
|
|
46
46
|
fragment: options?.fragment ?? false,
|
|
47
47
|
maxDepth: options?.maxDepth,
|
|
48
|
-
maxEntityExpansions: options?.maxEntityExpansions
|
|
48
|
+
maxEntityExpansions: options?.maxEntityExpansions,
|
|
49
|
+
invalidCharHandling: options?.invalidCharHandling
|
|
49
50
|
});
|
|
50
51
|
// Stack: bottom is a synthetic root frame that collects the document root.
|
|
51
52
|
const syntheticObj = Object.create(null);
|
|
@@ -43,7 +43,7 @@ class HyperlinkReader extends EventEmitter {
|
|
|
43
43
|
return;
|
|
44
44
|
}
|
|
45
45
|
try {
|
|
46
|
-
const parser = new SaxParser({ position: false });
|
|
46
|
+
const parser = new SaxParser({ position: false, invalidCharHandling: "skip" });
|
|
47
47
|
const decoder = new TextDecoder("utf-8", { fatal: true });
|
|
48
48
|
parser.on("opentag", (node) => {
|
|
49
49
|
if (node.name !== "Relationship") {
|
|
@@ -211,7 +211,7 @@ export class WorkbookReaderBase extends EventEmitter {
|
|
|
211
211
|
// For "cache" mode, use direct SAX callbacks (no event objects, no async generator overhead)
|
|
212
212
|
if (this.options.sharedStrings === "cache") {
|
|
213
213
|
const sharedStrings = this.sharedStrings;
|
|
214
|
-
const parser = new SaxParser({ position: false });
|
|
214
|
+
const parser = new SaxParser({ position: false, invalidCharHandling: "skip" });
|
|
215
215
|
parser.on("opentag", (node) => {
|
|
216
216
|
switch (node.name) {
|
|
217
217
|
case "b":
|
|
@@ -311,7 +311,7 @@ export class WorkbookReaderBase extends EventEmitter {
|
|
|
311
311
|
return;
|
|
312
312
|
}
|
|
313
313
|
// "emit" mode — must yield, so use direct SAX with per-chunk yield
|
|
314
|
-
const emitParser = new SaxParser();
|
|
314
|
+
const emitParser = new SaxParser({ invalidCharHandling: "skip" });
|
|
315
315
|
const emitDecoder = new TextDecoder("utf-8", { fatal: true });
|
|
316
316
|
let pendingEmits = [];
|
|
317
317
|
emitParser.on("opentag", (node) => {
|
|
@@ -143,7 +143,7 @@ class WorksheetReader extends EventEmitter {
|
|
|
143
143
|
// Direct SAX callback mode — zero intermediate event objects.
|
|
144
144
|
// We collect worksheet events per-chunk and yield them.
|
|
145
145
|
let worksheetEvents = null;
|
|
146
|
-
const parser = new SaxParser({ position: false });
|
|
146
|
+
const parser = new SaxParser({ position: false, invalidCharHandling: "skip" });
|
|
147
147
|
parser.on("opentag", (node) => {
|
|
148
148
|
if (emitSheet) {
|
|
149
149
|
switch (node.name) {
|
|
@@ -159,7 +159,7 @@ class BaseXform {
|
|
|
159
159
|
* Use this instead of parse(parseSax(stream)) for hot paths.
|
|
160
160
|
*/
|
|
161
161
|
async parseStreamDirect(stream) {
|
|
162
|
-
const parser = new SaxParser();
|
|
162
|
+
const parser = new SaxParser({ invalidCharHandling: "skip" });
|
|
163
163
|
const decoder = new TextDecoder("utf-8", { fatal: true });
|
|
164
164
|
let done = false;
|
|
165
165
|
let finalModel;
|
|
@@ -87,7 +87,8 @@ function parseXml(xml, options) {
|
|
|
87
87
|
fragment: options?.fragment ?? false,
|
|
88
88
|
xmlns: options?.xmlns ?? false,
|
|
89
89
|
maxDepth: options?.maxDepth,
|
|
90
|
-
maxEntityExpansions: options?.maxEntityExpansions
|
|
90
|
+
maxEntityExpansions: options?.maxEntityExpansions,
|
|
91
|
+
invalidCharHandling: options?.invalidCharHandling
|
|
91
92
|
});
|
|
92
93
|
// Stack of elements being built. The bottom is a synthetic root
|
|
93
94
|
// that collects top-level nodes.
|