@ai-sdk-tool/rxml 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +391 -46
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +17 -1
- package/dist/index.d.ts +17 -1
- package/dist/index.js +385 -45
- package/dist/index.js.map +1 -1
- package/package.json +4 -14
- package/index.d.ts +0 -12
package/dist/index.d.cts
CHANGED
|
@@ -184,6 +184,10 @@ declare class XMLTokenizer {
|
|
|
184
184
|
setPosition(pos: number): void;
|
|
185
185
|
}
|
|
186
186
|
|
|
187
|
+
declare function unwrapJsonSchema(schema: unknown): unknown;
|
|
188
|
+
declare function getSchemaType(schema: unknown): string | undefined;
|
|
189
|
+
declare function coerceBySchema(value: unknown, schema?: unknown): unknown;
|
|
190
|
+
|
|
187
191
|
/**
|
|
188
192
|
* Schema-aware type coercion for robust-xml
|
|
189
193
|
* Integrates with the existing coercion system but adds XML-specific handling
|
|
@@ -238,6 +242,13 @@ declare function countTagOccurrences(xmlContent: string, tagName: string, exclud
|
|
|
238
242
|
start: number;
|
|
239
243
|
end: number;
|
|
240
244
|
}>, shouldSkipFirst?: boolean): number;
|
|
245
|
+
/**
|
|
246
|
+
* Find all top-level ranges for a tag (for handling duplicates)
|
|
247
|
+
*/
|
|
248
|
+
declare function findAllTopLevelRanges(xmlContent: string, tagName: string): Array<{
|
|
249
|
+
start: number;
|
|
250
|
+
end: number;
|
|
251
|
+
}>;
|
|
241
252
|
|
|
242
253
|
/**
|
|
243
254
|
* XML stringification based on TXML's stringify approach
|
|
@@ -261,6 +272,11 @@ declare function stringifyNode(node: RXMLNode, depth?: number, format?: boolean,
|
|
|
261
272
|
*/
|
|
262
273
|
declare function toContentString(nodes: (RXMLNode | string)[]): string;
|
|
263
274
|
|
|
275
|
+
/**
|
|
276
|
+
* Unescape XML entities
|
|
277
|
+
*/
|
|
278
|
+
declare function unescapeXml(text: string): string;
|
|
279
|
+
|
|
264
280
|
/**
|
|
265
281
|
* Error classes for robust-xml parser
|
|
266
282
|
*/
|
|
@@ -292,4 +308,4 @@ interface Options {
|
|
|
292
308
|
onError?: (message: string, context?: Record<string, unknown>) => void;
|
|
293
309
|
}
|
|
294
310
|
|
|
295
|
-
export { type Options, type ParseOptions, RXMLCoercionError, RXMLDuplicateStringTagError, type RXMLNode, RXMLParseError, RXMLStreamError, RXMLStringifyError, type StringifyOptions, XMLTokenizer, XMLTransformStream, coerceDomBySchema, countTagOccurrences, createXMLStream, domToObject, extractRawInner, filter, findElementByIdStream, findElementsByClassStream, findFirstTopLevelRange, getPropertySchema, getStringTypedProperties, parse, parseFromStream, parseNode, parseWithoutSchema, processArrayContent, processIndexedTuple, processXMLStream, simplify, stringify, stringifyNode, stringifyNodes, toContentString };
|
|
311
|
+
export { type Options, type ParseOptions, RXMLCoercionError, RXMLDuplicateStringTagError, type RXMLNode, RXMLParseError, RXMLStreamError, RXMLStringifyError, type StringifyOptions, XMLTokenizer, XMLTransformStream, coerceBySchema, coerceDomBySchema, countTagOccurrences, createXMLStream, domToObject, extractRawInner, filter, findAllTopLevelRanges, findElementByIdStream, findElementsByClassStream, findFirstTopLevelRange, getPropertySchema, getSchemaType, getStringTypedProperties, parse, parseFromStream, parseNode, parseWithoutSchema, processArrayContent, processIndexedTuple, processXMLStream, simplify, stringify, stringifyNode, stringifyNodes, toContentString, unescapeXml, unwrapJsonSchema };
|
package/dist/index.d.ts
CHANGED
|
@@ -184,6 +184,10 @@ declare class XMLTokenizer {
|
|
|
184
184
|
setPosition(pos: number): void;
|
|
185
185
|
}
|
|
186
186
|
|
|
187
|
+
declare function unwrapJsonSchema(schema: unknown): unknown;
|
|
188
|
+
declare function getSchemaType(schema: unknown): string | undefined;
|
|
189
|
+
declare function coerceBySchema(value: unknown, schema?: unknown): unknown;
|
|
190
|
+
|
|
187
191
|
/**
|
|
188
192
|
* Schema-aware type coercion for robust-xml
|
|
189
193
|
* Integrates with the existing coercion system but adds XML-specific handling
|
|
@@ -238,6 +242,13 @@ declare function countTagOccurrences(xmlContent: string, tagName: string, exclud
|
|
|
238
242
|
start: number;
|
|
239
243
|
end: number;
|
|
240
244
|
}>, shouldSkipFirst?: boolean): number;
|
|
245
|
+
/**
|
|
246
|
+
* Find all top-level ranges for a tag (for handling duplicates)
|
|
247
|
+
*/
|
|
248
|
+
declare function findAllTopLevelRanges(xmlContent: string, tagName: string): Array<{
|
|
249
|
+
start: number;
|
|
250
|
+
end: number;
|
|
251
|
+
}>;
|
|
241
252
|
|
|
242
253
|
/**
|
|
243
254
|
* XML stringification based on TXML's stringify approach
|
|
@@ -261,6 +272,11 @@ declare function stringifyNode(node: RXMLNode, depth?: number, format?: boolean,
|
|
|
261
272
|
*/
|
|
262
273
|
declare function toContentString(nodes: (RXMLNode | string)[]): string;
|
|
263
274
|
|
|
275
|
+
/**
|
|
276
|
+
* Unescape XML entities
|
|
277
|
+
*/
|
|
278
|
+
declare function unescapeXml(text: string): string;
|
|
279
|
+
|
|
264
280
|
/**
|
|
265
281
|
* Error classes for robust-xml parser
|
|
266
282
|
*/
|
|
@@ -292,4 +308,4 @@ interface Options {
|
|
|
292
308
|
onError?: (message: string, context?: Record<string, unknown>) => void;
|
|
293
309
|
}
|
|
294
310
|
|
|
295
|
-
export { type Options, type ParseOptions, RXMLCoercionError, RXMLDuplicateStringTagError, type RXMLNode, RXMLParseError, RXMLStreamError, RXMLStringifyError, type StringifyOptions, XMLTokenizer, XMLTransformStream, coerceDomBySchema, countTagOccurrences, createXMLStream, domToObject, extractRawInner, filter, findElementByIdStream, findElementsByClassStream, findFirstTopLevelRange, getPropertySchema, getStringTypedProperties, parse, parseFromStream, parseNode, parseWithoutSchema, processArrayContent, processIndexedTuple, processXMLStream, simplify, stringify, stringifyNode, stringifyNodes, toContentString };
|
|
311
|
+
export { type Options, type ParseOptions, RXMLCoercionError, RXMLDuplicateStringTagError, type RXMLNode, RXMLParseError, RXMLStreamError, RXMLStringifyError, type StringifyOptions, XMLTokenizer, XMLTransformStream, coerceBySchema, coerceDomBySchema, countTagOccurrences, createXMLStream, domToObject, extractRawInner, filter, findAllTopLevelRanges, findElementByIdStream, findElementsByClassStream, findFirstTopLevelRange, getPropertySchema, getSchemaType, getStringTypedProperties, parse, parseFromStream, parseNode, parseWithoutSchema, processArrayContent, processIndexedTuple, processXMLStream, simplify, stringify, stringifyNode, stringifyNodes, toContentString, unescapeXml, unwrapJsonSchema };
|
package/dist/index.js
CHANGED
|
@@ -340,22 +340,33 @@ function coerceDomBySchema(domObject, schema) {
|
|
|
340
340
|
}
|
|
341
341
|
}
|
|
342
342
|
function getStringTypedProperties(schema) {
|
|
343
|
-
const
|
|
344
|
-
const
|
|
345
|
-
|
|
343
|
+
const collected = /* @__PURE__ */ new Set();
|
|
344
|
+
const visit = (s) => {
|
|
345
|
+
const unwrapped = unwrapJsonSchema(s);
|
|
346
|
+
if (!unwrapped || typeof unwrapped !== "object") return;
|
|
346
347
|
const u = unwrapped;
|
|
347
|
-
const
|
|
348
|
-
if (
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
const
|
|
352
|
-
|
|
353
|
-
|
|
348
|
+
const type = getSchemaType(unwrapped);
|
|
349
|
+
if (type === "object") {
|
|
350
|
+
const props = u.properties;
|
|
351
|
+
if (props && typeof props === "object") {
|
|
352
|
+
for (const [key, propSchema] of Object.entries(props)) {
|
|
353
|
+
const t = getSchemaType(propSchema);
|
|
354
|
+
if (t === "string") {
|
|
355
|
+
collected.add(key);
|
|
356
|
+
} else if (t === "object" || t === "array") {
|
|
357
|
+
visit(propSchema);
|
|
358
|
+
}
|
|
354
359
|
}
|
|
355
360
|
}
|
|
361
|
+
} else if (type === "array") {
|
|
362
|
+
const items = u.items;
|
|
363
|
+
if (items) visit(items);
|
|
364
|
+
const prefix = u.prefixItems;
|
|
365
|
+
if (Array.isArray(prefix)) prefix.forEach(visit);
|
|
356
366
|
}
|
|
357
|
-
}
|
|
358
|
-
|
|
367
|
+
};
|
|
368
|
+
visit(schema);
|
|
369
|
+
return collected;
|
|
359
370
|
}
|
|
360
371
|
function processArrayContent(value, schema, textNodeName) {
|
|
361
372
|
if (!Array.isArray(value)) return value;
|
|
@@ -499,6 +510,9 @@ function escapeXmlMinimalAttr(value, wrapper = '"') {
|
|
|
499
510
|
}
|
|
500
511
|
return escaped;
|
|
501
512
|
}
|
|
513
|
+
function unescapeXml(text) {
|
|
514
|
+
return text.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'").replace(/&/g, "&");
|
|
515
|
+
}
|
|
502
516
|
|
|
503
517
|
// src/schema/extraction.ts
|
|
504
518
|
function extractRawInner(xmlContent, tagName) {
|
|
@@ -516,6 +530,11 @@ function extractRawInner(xmlContent, tagName) {
|
|
|
516
530
|
if (i >= len) return void 0;
|
|
517
531
|
const ch = xmlContent[i];
|
|
518
532
|
if (ch === "!") {
|
|
533
|
+
if (xmlContent.startsWith("!DOCTYPE", i + 1)) {
|
|
534
|
+
const gt2 = xmlContent.indexOf(">", i + 1);
|
|
535
|
+
i = gt2 === -1 ? len : gt2 + 1;
|
|
536
|
+
continue;
|
|
537
|
+
}
|
|
519
538
|
if (xmlContent.startsWith("!--", i + 1)) {
|
|
520
539
|
const close = xmlContent.indexOf("-->", i + 4);
|
|
521
540
|
i = close === -1 ? len : close + 3;
|
|
@@ -580,6 +599,11 @@ function extractRawInner(xmlContent, tagName) {
|
|
|
580
599
|
if (nx >= len) break;
|
|
581
600
|
const h = xmlContent[nx];
|
|
582
601
|
if (h === "!") {
|
|
602
|
+
if (xmlContent.startsWith("!DOCTYPE", nx + 1)) {
|
|
603
|
+
const gt22 = xmlContent.indexOf(">", nx + 1);
|
|
604
|
+
pos = gt22 === -1 ? len : gt22 + 1;
|
|
605
|
+
continue;
|
|
606
|
+
}
|
|
583
607
|
if (xmlContent.startsWith("!--", nx + 1)) {
|
|
584
608
|
const close = xmlContent.indexOf("-->", nx + 4);
|
|
585
609
|
pos = close === -1 ? len : close + 3;
|
|
@@ -660,6 +684,155 @@ function extractRawInner(xmlContent, tagName) {
|
|
|
660
684
|
}
|
|
661
685
|
return void 0;
|
|
662
686
|
}
|
|
687
|
+
function findAllInnerRanges(xmlContent, tagName) {
|
|
688
|
+
const len = xmlContent.length;
|
|
689
|
+
const target = tagName;
|
|
690
|
+
const ranges = [];
|
|
691
|
+
let i = 0;
|
|
692
|
+
while (i < len) {
|
|
693
|
+
const lt = xmlContent.indexOf("<", i);
|
|
694
|
+
if (lt === -1) break;
|
|
695
|
+
i = lt + 1;
|
|
696
|
+
if (i >= len) break;
|
|
697
|
+
const ch = xmlContent[i];
|
|
698
|
+
if (ch === "!") {
|
|
699
|
+
if (xmlContent.startsWith("!--", i + 1)) {
|
|
700
|
+
const close = xmlContent.indexOf("-->", i + 4);
|
|
701
|
+
i = close === -1 ? len : close + 3;
|
|
702
|
+
continue;
|
|
703
|
+
}
|
|
704
|
+
if (xmlContent.startsWith("![CDATA[", i + 1)) {
|
|
705
|
+
const close = xmlContent.indexOf("]]>", i + 9);
|
|
706
|
+
i = close === -1 ? len : close + 3;
|
|
707
|
+
continue;
|
|
708
|
+
}
|
|
709
|
+
const gt = xmlContent.indexOf(">", i + 1);
|
|
710
|
+
i = gt === -1 ? len : gt + 1;
|
|
711
|
+
continue;
|
|
712
|
+
}
|
|
713
|
+
if (ch === "?") {
|
|
714
|
+
const close = xmlContent.indexOf("?>", i + 1);
|
|
715
|
+
i = close === -1 ? len : close + 2;
|
|
716
|
+
continue;
|
|
717
|
+
}
|
|
718
|
+
if (ch === "/") {
|
|
719
|
+
const gt = xmlContent.indexOf(">", i + 1);
|
|
720
|
+
i = gt === -1 ? len : gt + 1;
|
|
721
|
+
continue;
|
|
722
|
+
}
|
|
723
|
+
let j = i;
|
|
724
|
+
if (j < len && isNameStartChar(xmlContent[j])) {
|
|
725
|
+
j++;
|
|
726
|
+
while (j < len && isNameChar(xmlContent[j])) j++;
|
|
727
|
+
}
|
|
728
|
+
const name = xmlContent.slice(i, j);
|
|
729
|
+
let k = j;
|
|
730
|
+
let isSelfClosing = false;
|
|
731
|
+
while (k < len) {
|
|
732
|
+
const c = xmlContent[k];
|
|
733
|
+
if (c === '"' || c === "'") {
|
|
734
|
+
k = skipQuoted(xmlContent, k);
|
|
735
|
+
continue;
|
|
736
|
+
}
|
|
737
|
+
if (c === ">") break;
|
|
738
|
+
if (c === "/" && xmlContent[k + 1] === ">") {
|
|
739
|
+
isSelfClosing = true;
|
|
740
|
+
k++;
|
|
741
|
+
break;
|
|
742
|
+
}
|
|
743
|
+
k++;
|
|
744
|
+
}
|
|
745
|
+
const tagEnd = k;
|
|
746
|
+
if (name !== target) {
|
|
747
|
+
i = xmlContent[tagEnd] === ">" ? tagEnd + 1 : tagEnd + 1;
|
|
748
|
+
continue;
|
|
749
|
+
}
|
|
750
|
+
const contentStart = xmlContent[tagEnd] === ">" ? tagEnd + 1 : tagEnd + 1;
|
|
751
|
+
if (isSelfClosing) {
|
|
752
|
+
ranges.push({ start: contentStart, end: contentStart });
|
|
753
|
+
i = contentStart;
|
|
754
|
+
continue;
|
|
755
|
+
}
|
|
756
|
+
let pos = contentStart;
|
|
757
|
+
let sameDepth = 1;
|
|
758
|
+
while (pos < len) {
|
|
759
|
+
const nextLt = xmlContent.indexOf("<", pos);
|
|
760
|
+
if (nextLt === -1) break;
|
|
761
|
+
const nx = nextLt + 1;
|
|
762
|
+
if (nx >= len) break;
|
|
763
|
+
const h = xmlContent[nx];
|
|
764
|
+
if (h === "!") {
|
|
765
|
+
if (xmlContent.startsWith("!--", nx + 1)) {
|
|
766
|
+
const close = xmlContent.indexOf("-->", nx + 4);
|
|
767
|
+
pos = close === -1 ? len : close + 3;
|
|
768
|
+
continue;
|
|
769
|
+
}
|
|
770
|
+
if (xmlContent.startsWith("![CDATA[", nx + 1)) {
|
|
771
|
+
const close = xmlContent.indexOf("]]>", nx + 9);
|
|
772
|
+
pos = close === -1 ? len : close + 3;
|
|
773
|
+
continue;
|
|
774
|
+
}
|
|
775
|
+
const gt2 = xmlContent.indexOf(">", nx + 1);
|
|
776
|
+
pos = gt2 === -1 ? len : gt2 + 1;
|
|
777
|
+
continue;
|
|
778
|
+
} else if (h === "?") {
|
|
779
|
+
const close = xmlContent.indexOf("?>", nx + 1);
|
|
780
|
+
pos = close === -1 ? len : close + 2;
|
|
781
|
+
continue;
|
|
782
|
+
} else if (h === "/") {
|
|
783
|
+
let t = nx + 1;
|
|
784
|
+
if (t < len && isNameStartChar(xmlContent[t])) {
|
|
785
|
+
t++;
|
|
786
|
+
while (t < len && isNameChar(xmlContent[t])) t++;
|
|
787
|
+
}
|
|
788
|
+
const endName = xmlContent.slice(nx + 1, t);
|
|
789
|
+
const gt2 = xmlContent.indexOf(">", t);
|
|
790
|
+
if (endName === target) {
|
|
791
|
+
sameDepth--;
|
|
792
|
+
if (sameDepth === 0) {
|
|
793
|
+
ranges.push({ start: contentStart, end: nextLt });
|
|
794
|
+
i = gt2 === -1 ? len : gt2 + 1;
|
|
795
|
+
break;
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
pos = gt2 === -1 ? len : gt2 + 1;
|
|
799
|
+
continue;
|
|
800
|
+
} else {
|
|
801
|
+
let t = nx;
|
|
802
|
+
if (t < len && isNameStartChar(xmlContent[t])) {
|
|
803
|
+
t++;
|
|
804
|
+
while (t < len && isNameChar(xmlContent[t])) t++;
|
|
805
|
+
}
|
|
806
|
+
let u = t;
|
|
807
|
+
let isSelfClosingNested = false;
|
|
808
|
+
while (u < len) {
|
|
809
|
+
const cu = xmlContent[u];
|
|
810
|
+
if (cu === '"' || cu === "'") {
|
|
811
|
+
u = skipQuoted(xmlContent, u);
|
|
812
|
+
continue;
|
|
813
|
+
}
|
|
814
|
+
if (cu === ">") break;
|
|
815
|
+
if (cu === "/" && xmlContent[u + 1] === ">") {
|
|
816
|
+
isSelfClosingNested = true;
|
|
817
|
+
u++;
|
|
818
|
+
break;
|
|
819
|
+
}
|
|
820
|
+
u++;
|
|
821
|
+
}
|
|
822
|
+
const startName = xmlContent.slice(nx, t);
|
|
823
|
+
if (startName === target && !isSelfClosingNested) {
|
|
824
|
+
sameDepth++;
|
|
825
|
+
}
|
|
826
|
+
pos = xmlContent[u] === ">" ? u + 1 : u + 1;
|
|
827
|
+
continue;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
if (sameDepth !== 0) {
|
|
831
|
+
break;
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
return ranges;
|
|
835
|
+
}
|
|
663
836
|
function findFirstTopLevelRange(xmlContent, tagName) {
|
|
664
837
|
const len = xmlContent.length;
|
|
665
838
|
const target = tagName;
|
|
@@ -672,6 +845,11 @@ function findFirstTopLevelRange(xmlContent, tagName) {
|
|
|
672
845
|
if (i >= len) return void 0;
|
|
673
846
|
const ch = xmlContent[i];
|
|
674
847
|
if (ch === "!") {
|
|
848
|
+
if (xmlContent.startsWith("!DOCTYPE", i + 1)) {
|
|
849
|
+
const gt2 = xmlContent.indexOf(">", i + 1);
|
|
850
|
+
i = gt2 === -1 ? len : gt2 + 1;
|
|
851
|
+
continue;
|
|
852
|
+
}
|
|
675
853
|
if (xmlContent.startsWith("!--", i + 1)) {
|
|
676
854
|
const close = xmlContent.indexOf("-->", i + 4);
|
|
677
855
|
i = close === -1 ? len : close + 3;
|
|
@@ -730,6 +908,11 @@ function findFirstTopLevelRange(xmlContent, tagName) {
|
|
|
730
908
|
if (nx >= len) break;
|
|
731
909
|
const h = xmlContent[nx];
|
|
732
910
|
if (h === "!") {
|
|
911
|
+
if (xmlContent.startsWith("!DOCTYPE", nx + 1)) {
|
|
912
|
+
const gt22 = xmlContent.indexOf(">", nx + 1);
|
|
913
|
+
pos = gt22 === -1 ? len : gt22 + 1;
|
|
914
|
+
continue;
|
|
915
|
+
}
|
|
733
916
|
if (xmlContent.startsWith("!--", nx + 1)) {
|
|
734
917
|
const close = xmlContent.indexOf("-->", nx + 4);
|
|
735
918
|
pos = close === -1 ? len : close + 3;
|
|
@@ -877,6 +1060,101 @@ function countTagOccurrences(xmlContent, tagName, excludeRanges, shouldSkipFirst
|
|
|
877
1060
|
}
|
|
878
1061
|
return count;
|
|
879
1062
|
}
|
|
1063
|
+
function findAllTopLevelRanges(xmlContent, tagName) {
|
|
1064
|
+
const ranges = [];
|
|
1065
|
+
const len = xmlContent.length;
|
|
1066
|
+
const target = tagName;
|
|
1067
|
+
let i = 0;
|
|
1068
|
+
let depth = 0;
|
|
1069
|
+
while (i < len) {
|
|
1070
|
+
const lt = xmlContent.indexOf("<", i);
|
|
1071
|
+
if (lt === -1) break;
|
|
1072
|
+
i = lt + 1;
|
|
1073
|
+
if (i >= len) break;
|
|
1074
|
+
const ch = xmlContent[i];
|
|
1075
|
+
if (ch === "!") {
|
|
1076
|
+
if (xmlContent.startsWith("!DOCTYPE", i + 1)) {
|
|
1077
|
+
const gt2 = xmlContent.indexOf(">", i + 1);
|
|
1078
|
+
i = gt2 === -1 ? len : gt2 + 1;
|
|
1079
|
+
continue;
|
|
1080
|
+
}
|
|
1081
|
+
if (xmlContent.startsWith("!--", i + 1)) {
|
|
1082
|
+
const close = xmlContent.indexOf("-->", i + 4);
|
|
1083
|
+
i = close === -1 ? len : close + 3;
|
|
1084
|
+
continue;
|
|
1085
|
+
}
|
|
1086
|
+
if (xmlContent.startsWith("![CDATA[", i + 1)) {
|
|
1087
|
+
const close = xmlContent.indexOf("]]>", i + 9);
|
|
1088
|
+
i = close === -1 ? len : close + 3;
|
|
1089
|
+
continue;
|
|
1090
|
+
}
|
|
1091
|
+
const gt = xmlContent.indexOf(">", i + 1);
|
|
1092
|
+
i = gt === -1 ? len : gt + 1;
|
|
1093
|
+
continue;
|
|
1094
|
+
} else if (ch === "?") {
|
|
1095
|
+
const close = xmlContent.indexOf("?>", i + 1);
|
|
1096
|
+
i = close === -1 ? len : close + 2;
|
|
1097
|
+
continue;
|
|
1098
|
+
} else if (ch === "/") {
|
|
1099
|
+
i++;
|
|
1100
|
+
const { name: name2, newPos: newPos2 } = parseName(xmlContent, i);
|
|
1101
|
+
if (name2 === target) depth--;
|
|
1102
|
+
i = xmlContent.indexOf(">", newPos2);
|
|
1103
|
+
if (i === -1) break;
|
|
1104
|
+
i++;
|
|
1105
|
+
continue;
|
|
1106
|
+
}
|
|
1107
|
+
const { name, newPos } = parseName(xmlContent, i);
|
|
1108
|
+
i = newPos;
|
|
1109
|
+
let k = i;
|
|
1110
|
+
while (k < len && xmlContent[k] !== ">") {
|
|
1111
|
+
const c = xmlContent[k];
|
|
1112
|
+
if (c === '"' || c === "'") {
|
|
1113
|
+
k = skipQuoted(xmlContent, k);
|
|
1114
|
+
continue;
|
|
1115
|
+
}
|
|
1116
|
+
if (c === "/" && xmlContent[k + 1] === ">") {
|
|
1117
|
+
k++;
|
|
1118
|
+
break;
|
|
1119
|
+
}
|
|
1120
|
+
k++;
|
|
1121
|
+
}
|
|
1122
|
+
if (name === target && depth === 0) {
|
|
1123
|
+
const tagStart = lt;
|
|
1124
|
+
const isSelfClosing = xmlContent[k] === "/" || xmlContent.startsWith("/>", k);
|
|
1125
|
+
if (isSelfClosing) {
|
|
1126
|
+
ranges.push({
|
|
1127
|
+
start: tagStart,
|
|
1128
|
+
end: k + (xmlContent[k] === "/" ? 2 : 1)
|
|
1129
|
+
});
|
|
1130
|
+
} else {
|
|
1131
|
+
depth++;
|
|
1132
|
+
let closeDepth = 1;
|
|
1133
|
+
let j = k + 1;
|
|
1134
|
+
while (j < len && closeDepth > 0) {
|
|
1135
|
+
const nextLt = xmlContent.indexOf("<", j);
|
|
1136
|
+
if (nextLt === -1) break;
|
|
1137
|
+
if (xmlContent[nextLt + 1] === "/") {
|
|
1138
|
+
const { name: closeName } = parseName(xmlContent, nextLt + 2);
|
|
1139
|
+
if (closeName === target) closeDepth--;
|
|
1140
|
+
} else if (xmlContent[nextLt + 1] !== "!" && xmlContent[nextLt + 1] !== "?") {
|
|
1141
|
+
const { name: openName } = parseName(xmlContent, nextLt + 1);
|
|
1142
|
+
if (openName === target) closeDepth++;
|
|
1143
|
+
}
|
|
1144
|
+
j = xmlContent.indexOf(">", nextLt + 1);
|
|
1145
|
+
if (j === -1) break;
|
|
1146
|
+
j++;
|
|
1147
|
+
}
|
|
1148
|
+
if (closeDepth === 0) {
|
|
1149
|
+
ranges.push({ start: tagStart, end: j });
|
|
1150
|
+
}
|
|
1151
|
+
depth--;
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
i = k + 1;
|
|
1155
|
+
}
|
|
1156
|
+
return ranges;
|
|
1157
|
+
}
|
|
880
1158
|
|
|
881
1159
|
// src/core/tokenizer.ts
|
|
882
1160
|
var XMLTokenizer = class {
|
|
@@ -1128,6 +1406,30 @@ var XMLTokenizer = class {
|
|
|
1128
1406
|
};
|
|
1129
1407
|
|
|
1130
1408
|
// src/core/parser.ts
|
|
1409
|
+
function deepDecodeStringsBySchema(input, schema) {
|
|
1410
|
+
var _a;
|
|
1411
|
+
if (input == null || schema == null) return input;
|
|
1412
|
+
const type = getSchemaType(schema);
|
|
1413
|
+
if (type === "string" && typeof input === "string") {
|
|
1414
|
+
return unescapeXml(input);
|
|
1415
|
+
}
|
|
1416
|
+
if (type === "array" && Array.isArray(input)) {
|
|
1417
|
+
const unwrapped = unwrapJsonSchema(schema);
|
|
1418
|
+
const itemSchema = (_a = unwrapped == null ? void 0 : unwrapped.items) != null ? _a : {};
|
|
1419
|
+
return input.map((item) => deepDecodeStringsBySchema(item, itemSchema));
|
|
1420
|
+
}
|
|
1421
|
+
if (type === "object" && input && typeof input === "object") {
|
|
1422
|
+
const obj = input;
|
|
1423
|
+
const out = {};
|
|
1424
|
+
for (const key of Object.keys(obj)) {
|
|
1425
|
+
const childSchema = getPropertySchema(schema, key);
|
|
1426
|
+
out[key] = deepDecodeStringsBySchema(obj[key], childSchema);
|
|
1427
|
+
}
|
|
1428
|
+
return out;
|
|
1429
|
+
}
|
|
1430
|
+
if (typeof input === "string") return unescapeXml(input);
|
|
1431
|
+
return input;
|
|
1432
|
+
}
|
|
1131
1433
|
function parse(xmlInner, schema, options = {}) {
|
|
1132
1434
|
var _a, _b, _c;
|
|
1133
1435
|
const textNodeName = (_a = options.textNodeName) != null ? _a : "#text";
|
|
@@ -1193,11 +1495,25 @@ function parse(xmlInner, schema, options = {}) {
|
|
|
1193
1495
|
}
|
|
1194
1496
|
}
|
|
1195
1497
|
}
|
|
1196
|
-
const
|
|
1498
|
+
const getTopLevelStringProps = (s) => {
|
|
1499
|
+
const set = /* @__PURE__ */ new Set();
|
|
1500
|
+
const unwrapped = unwrapJsonSchema(s);
|
|
1501
|
+
if (unwrapped && typeof unwrapped === "object") {
|
|
1502
|
+
const props = unwrapped.properties;
|
|
1503
|
+
if (props && typeof props === "object") {
|
|
1504
|
+
for (const [k, v] of Object.entries(props)) {
|
|
1505
|
+
if (getSchemaType(v) === "string") set.add(k);
|
|
1506
|
+
}
|
|
1507
|
+
}
|
|
1508
|
+
}
|
|
1509
|
+
return set;
|
|
1510
|
+
};
|
|
1511
|
+
const topLevelStringProps = getTopLevelStringProps(schema);
|
|
1512
|
+
const deepStringTypedProps = getStringTypedProperties(schema);
|
|
1197
1513
|
const duplicateKeys = /* @__PURE__ */ new Set();
|
|
1198
|
-
for (const key of
|
|
1514
|
+
for (const key of topLevelStringProps) {
|
|
1199
1515
|
const excludeRanges = [];
|
|
1200
|
-
for (const other of
|
|
1516
|
+
for (const other of topLevelStringProps) {
|
|
1201
1517
|
if (other === key) continue;
|
|
1202
1518
|
const range = findFirstTopLevelRange(actualXmlInner, other);
|
|
1203
1519
|
if (range) excludeRanges.push(range);
|
|
@@ -1227,37 +1543,30 @@ function parse(xmlInner, schema, options = {}) {
|
|
|
1227
1543
|
const originalContentMap = /* @__PURE__ */ new Map();
|
|
1228
1544
|
try {
|
|
1229
1545
|
const ranges = [];
|
|
1230
|
-
for (const key of
|
|
1231
|
-
const
|
|
1232
|
-
|
|
1546
|
+
for (const key of deepStringTypedProps) {
|
|
1547
|
+
const innerRanges = findAllInnerRanges(actualXmlInner, key);
|
|
1548
|
+
for (const r of innerRanges) {
|
|
1549
|
+
if (r.end > r.start) ranges.push({ ...r, key });
|
|
1550
|
+
}
|
|
1233
1551
|
}
|
|
1234
1552
|
if (ranges.length > 0) {
|
|
1235
1553
|
const sorted = [...ranges].sort((a, b) => a.start - b.start);
|
|
1236
|
-
|
|
1554
|
+
let rebuilt = "";
|
|
1555
|
+
let cursor = 0;
|
|
1237
1556
|
for (const r of sorted) {
|
|
1238
|
-
|
|
1239
|
-
if (last && r.start >= last.start && r.end <= last.end) {
|
|
1557
|
+
if (r.start < cursor) {
|
|
1240
1558
|
continue;
|
|
1241
1559
|
}
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
for (const r of filtered) {
|
|
1249
|
-
if (cursor < r.start)
|
|
1250
|
-
rebuilt += actualXmlInner.slice(cursor, r.start);
|
|
1251
|
-
const placeholder = `__RXML_PLACEHOLDER_${r.key}__`;
|
|
1252
|
-
const originalContent = actualXmlInner.slice(r.start, r.end);
|
|
1253
|
-
originalContentMap.set(placeholder, originalContent);
|
|
1254
|
-
rebuilt += placeholder;
|
|
1255
|
-
cursor = r.end;
|
|
1256
|
-
}
|
|
1257
|
-
if (cursor < actualXmlInner.length)
|
|
1258
|
-
rebuilt += actualXmlInner.slice(cursor);
|
|
1259
|
-
xmlInnerForParsing = rebuilt;
|
|
1560
|
+
if (cursor < r.start) rebuilt += actualXmlInner.slice(cursor, r.start);
|
|
1561
|
+
const placeholder = `__RXML_PLACEHOLDER_${r.key}_${r.start}_${r.end}__`;
|
|
1562
|
+
const originalContent = actualXmlInner.slice(r.start, r.end);
|
|
1563
|
+
originalContentMap.set(placeholder, originalContent);
|
|
1564
|
+
rebuilt += placeholder;
|
|
1565
|
+
cursor = r.end;
|
|
1260
1566
|
}
|
|
1567
|
+
if (cursor < actualXmlInner.length)
|
|
1568
|
+
rebuilt += actualXmlInner.slice(cursor);
|
|
1569
|
+
xmlInnerForParsing = rebuilt;
|
|
1261
1570
|
}
|
|
1262
1571
|
} catch (error) {
|
|
1263
1572
|
if (options.onError) {
|
|
@@ -1281,9 +1590,35 @@ function parse(xmlInner, schema, options = {}) {
|
|
|
1281
1590
|
throw new RXMLParseError("Failed to parse XML", cause);
|
|
1282
1591
|
}
|
|
1283
1592
|
const parsedArgs = domToObject(parsedNodes, schema, textNodeName);
|
|
1593
|
+
const restorePlaceholdersDeep = (val) => {
|
|
1594
|
+
if (val == null) return val;
|
|
1595
|
+
if (typeof val === "string") {
|
|
1596
|
+
if (val.startsWith("__RXML_PLACEHOLDER_")) {
|
|
1597
|
+
const orig = originalContentMap.get(val);
|
|
1598
|
+
return orig !== void 0 ? orig : val;
|
|
1599
|
+
}
|
|
1600
|
+
return val;
|
|
1601
|
+
}
|
|
1602
|
+
if (Array.isArray(val)) return val.map(restorePlaceholdersDeep);
|
|
1603
|
+
if (typeof val === "object") {
|
|
1604
|
+
const obj = val;
|
|
1605
|
+
const out = {};
|
|
1606
|
+
for (const [k, v] of Object.entries(obj)) {
|
|
1607
|
+
const restored = restorePlaceholdersDeep(v);
|
|
1608
|
+
if (k === textNodeName && typeof restored === "string") {
|
|
1609
|
+
out[k] = restored.trim();
|
|
1610
|
+
} else {
|
|
1611
|
+
out[k] = restored;
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
return out;
|
|
1615
|
+
}
|
|
1616
|
+
return val;
|
|
1617
|
+
};
|
|
1618
|
+
const parsedArgsRestored = restorePlaceholdersDeep(parsedArgs);
|
|
1284
1619
|
const args = {};
|
|
1285
|
-
for (const k of Object.keys(
|
|
1286
|
-
const v =
|
|
1620
|
+
for (const k of Object.keys(parsedArgsRestored || {})) {
|
|
1621
|
+
const v = parsedArgsRestored[k];
|
|
1287
1622
|
let val = v;
|
|
1288
1623
|
const propSchema = getPropertySchema(schema, k);
|
|
1289
1624
|
const propType = getSchemaType(propSchema);
|
|
@@ -1393,7 +1728,7 @@ function parse(xmlInner, schema, options = {}) {
|
|
|
1393
1728
|
}
|
|
1394
1729
|
args[k] = typeof val === "string" ? val.trim() : val;
|
|
1395
1730
|
}
|
|
1396
|
-
for (const key of
|
|
1731
|
+
for (const key of topLevelStringProps) {
|
|
1397
1732
|
if (!Object.prototype.hasOwnProperty.call(args, key)) {
|
|
1398
1733
|
const raw = extractRawInner(actualXmlInner, key);
|
|
1399
1734
|
if (typeof raw === "string") {
|
|
@@ -1416,7 +1751,8 @@ function parse(xmlInner, schema, options = {}) {
|
|
|
1416
1751
|
}
|
|
1417
1752
|
try {
|
|
1418
1753
|
const coerced = coerceDomBySchema(dataToCoerce, schema);
|
|
1419
|
-
|
|
1754
|
+
const decoded = deepDecodeStringsBySchema(coerced, schema);
|
|
1755
|
+
return decoded;
|
|
1420
1756
|
} catch (error) {
|
|
1421
1757
|
throw new RXMLCoercionError("Failed to coerce by schema", error);
|
|
1422
1758
|
}
|
|
@@ -1650,7 +1986,6 @@ var XMLTransformStream = class extends Transform {
|
|
|
1650
1986
|
continue;
|
|
1651
1987
|
}
|
|
1652
1988
|
}
|
|
1653
|
-
const closingTag = `</${tagName}>`;
|
|
1654
1989
|
let depth = 1;
|
|
1655
1990
|
let searchStart = openTagEnd + 1;
|
|
1656
1991
|
let elementEnd = -1;
|
|
@@ -2041,16 +2376,19 @@ export {
|
|
|
2041
2376
|
RXMLStringifyError,
|
|
2042
2377
|
XMLTokenizer,
|
|
2043
2378
|
XMLTransformStream,
|
|
2379
|
+
coerceBySchema,
|
|
2044
2380
|
coerceDomBySchema,
|
|
2045
2381
|
countTagOccurrences,
|
|
2046
2382
|
createXMLStream,
|
|
2047
2383
|
domToObject,
|
|
2048
2384
|
extractRawInner,
|
|
2049
2385
|
filter,
|
|
2386
|
+
findAllTopLevelRanges,
|
|
2050
2387
|
findElementByIdStream,
|
|
2051
2388
|
findElementsByClassStream,
|
|
2052
2389
|
findFirstTopLevelRange,
|
|
2053
2390
|
getPropertySchema,
|
|
2391
|
+
getSchemaType,
|
|
2054
2392
|
getStringTypedProperties,
|
|
2055
2393
|
parse,
|
|
2056
2394
|
parseFromStream,
|
|
@@ -2063,6 +2401,8 @@ export {
|
|
|
2063
2401
|
stringify,
|
|
2064
2402
|
stringifyNode,
|
|
2065
2403
|
stringifyNodes,
|
|
2066
|
-
toContentString
|
|
2404
|
+
toContentString,
|
|
2405
|
+
unescapeXml,
|
|
2406
|
+
unwrapJsonSchema
|
|
2067
2407
|
};
|
|
2068
2408
|
//# sourceMappingURL=index.js.map
|