@eksml/xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +588 -0
- package/dist/converters/fromLossless.d.mts +14 -0
- package/dist/converters/fromLossless.d.mts.map +1 -0
- package/dist/converters/fromLossless.mjs +35 -0
- package/dist/converters/fromLossless.mjs.map +1 -0
- package/dist/converters/fromLossy.d.mts +18 -0
- package/dist/converters/fromLossy.d.mts.map +1 -0
- package/dist/converters/fromLossy.mjs +91 -0
- package/dist/converters/fromLossy.mjs.map +1 -0
- package/dist/converters/lossless.d.mts +39 -0
- package/dist/converters/lossless.d.mts.map +1 -0
- package/dist/converters/lossless.mjs +74 -0
- package/dist/converters/lossless.mjs.map +1 -0
- package/dist/converters/lossy.d.mts +42 -0
- package/dist/converters/lossy.d.mts.map +1 -0
- package/dist/converters/lossy.mjs +158 -0
- package/dist/converters/lossy.mjs.map +1 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs +30 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs.map +1 -0
- package/dist/parser-BfdEfWDg.d.mts +95 -0
- package/dist/parser-BfdEfWDg.d.mts.map +1 -0
- package/dist/parser-CYq309aR.mjs +479 -0
- package/dist/parser-CYq309aR.mjs.map +1 -0
- package/dist/parser.d.mts +2 -0
- package/dist/parser.mjs +2 -0
- package/dist/sax.d.mts +64 -0
- package/dist/sax.d.mts.map +1 -0
- package/dist/sax.mjs +70 -0
- package/dist/sax.mjs.map +1 -0
- package/dist/saxEngine-BDnD7ruG.mjs +750 -0
- package/dist/saxEngine-BDnD7ruG.mjs.map +1 -0
- package/dist/utilities/index.d.mts +88 -0
- package/dist/utilities/index.d.mts.map +1 -0
- package/dist/utilities/index.mjs +87 -0
- package/dist/utilities/index.mjs.map +1 -0
- package/dist/writer.d.mts +58 -0
- package/dist/writer.d.mts.map +1 -0
- package/dist/writer.mjs +357 -0
- package/dist/writer.mjs.map +1 -0
- package/dist/xmlParseStream.d.mts +138 -0
- package/dist/xmlParseStream.d.mts.map +1 -0
- package/dist/xmlParseStream.mjs +313 -0
- package/dist/xmlParseStream.mjs.map +1 -0
- package/package.json +100 -0
- package/src/converters/fromLossless.ts +80 -0
- package/src/converters/fromLossy.ts +180 -0
- package/src/converters/lossless.ts +116 -0
- package/src/converters/lossy.ts +274 -0
- package/src/parser.ts +728 -0
- package/src/sax.ts +157 -0
- package/src/saxEngine.ts +1157 -0
- package/src/utilities/escapeRegExp.ts +19 -0
- package/src/utilities/filter.ts +63 -0
- package/src/utilities/getElementById.ts +21 -0
- package/src/utilities/getElementsByClassName.ts +22 -0
- package/src/utilities/htmlConstants.ts +26 -0
- package/src/utilities/index.ts +7 -0
- package/src/utilities/isElementNode.ts +19 -0
- package/src/utilities/isTextNode.ts +19 -0
- package/src/utilities/toContentString.ts +23 -0
- package/src/writer.ts +650 -0
- package/src/xmlParseStream.ts +597 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// RegExp.escape is not yet in the ES2023 lib typings
|
|
2
|
+
declare global {
|
|
3
|
+
interface RegExpConstructor {
|
|
4
|
+
escape?: (s: string) => string;
|
|
5
|
+
}
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Escapes special regex characters in a string so it can be safely
|
|
10
|
+
* interpolated into a `new RegExp(...)` pattern.
|
|
11
|
+
*
|
|
12
|
+
* Uses the native `RegExp.escape` when available (Node ≥ 24, Chrome ≥ 136,
|
|
13
|
+
* Firefox ≥ 134, Safari ≥ 18.2), otherwise falls back to a manual replacement.
|
|
14
|
+
* @internal
|
|
15
|
+
*/
|
|
16
|
+
export const escapeRegExp: (s: string) => string =
|
|
17
|
+
typeof RegExp.escape === 'function'
|
|
18
|
+
? RegExp.escape
|
|
19
|
+
: (s) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import type { TNode } from '#src/parser.ts';
|
|
2
|
+
import { parse } from '#src/parser.ts';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Filter nodes like Array.filter - returns nodes where the filter function returns true
|
|
6
|
+
* @param input - XML string or array of nodes to filter
|
|
7
|
+
* @param predicate - Filter function
|
|
8
|
+
* @param depth - Current depth in the tree (internal use)
|
|
9
|
+
* @param path - Current path in the tree (internal use)
|
|
10
|
+
* @returns Filtered array of nodes
|
|
11
|
+
*/
|
|
12
|
+
export function filter(
|
|
13
|
+
input: string | (TNode | string)[],
|
|
14
|
+
predicate: (
|
|
15
|
+
node: TNode,
|
|
16
|
+
index: number,
|
|
17
|
+
depth: number,
|
|
18
|
+
path: string,
|
|
19
|
+
) => boolean,
|
|
20
|
+
depth: number = 0,
|
|
21
|
+
path: string = '',
|
|
22
|
+
): TNode[] {
|
|
23
|
+
const out: TNode[] = [];
|
|
24
|
+
filterInto(
|
|
25
|
+
out,
|
|
26
|
+
typeof input === 'string' ? parse(input) : input,
|
|
27
|
+
predicate,
|
|
28
|
+
depth,
|
|
29
|
+
path,
|
|
30
|
+
);
|
|
31
|
+
return out;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function filterInto(
|
|
35
|
+
out: TNode[],
|
|
36
|
+
children: (TNode | string)[],
|
|
37
|
+
predicate: (
|
|
38
|
+
node: TNode,
|
|
39
|
+
index: number,
|
|
40
|
+
depth: number,
|
|
41
|
+
path: string,
|
|
42
|
+
) => boolean,
|
|
43
|
+
depth: number,
|
|
44
|
+
path: string,
|
|
45
|
+
): void {
|
|
46
|
+
for (let i = 0; i < children.length; i++) {
|
|
47
|
+
const child = children[i]!;
|
|
48
|
+
if (typeof child === 'object') {
|
|
49
|
+
if (predicate(child, i, depth, path)) {
|
|
50
|
+
out.push(child);
|
|
51
|
+
}
|
|
52
|
+
if (child.children) {
|
|
53
|
+
filterInto(
|
|
54
|
+
out,
|
|
55
|
+
child.children,
|
|
56
|
+
predicate,
|
|
57
|
+
depth + 1,
|
|
58
|
+
(path ? path + '.' : '') + i + '.' + child.tagName,
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { TNode } from '#src/parser.ts';
|
|
2
|
+
import { parse } from '#src/parser.ts';
|
|
3
|
+
import { filter } from '#src/utilities/filter.ts';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Find an element by ID attribute
|
|
7
|
+
* @param input - XML string or parsed DOM to search
|
|
8
|
+
* @param id - ID value to find
|
|
9
|
+
* @returns Found node, or undefined if not found
|
|
10
|
+
*/
|
|
11
|
+
export function getElementById(
|
|
12
|
+
input: string | (TNode | string)[],
|
|
13
|
+
id: string,
|
|
14
|
+
): TNode | undefined {
|
|
15
|
+
if (typeof input === 'string') {
|
|
16
|
+
const out = parse(input, { attrValue: id });
|
|
17
|
+
return out[0] as TNode | undefined;
|
|
18
|
+
}
|
|
19
|
+
const matches = filter(input, (node) => node.attributes?.id === id);
|
|
20
|
+
return matches[0];
|
|
21
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { TNode } from '#src/parser.ts';
|
|
2
|
+
import { parse } from '#src/parser.ts';
|
|
3
|
+
import { escapeRegExp } from '#src/utilities/escapeRegExp.ts';
|
|
4
|
+
import { filter } from '#src/utilities/filter.ts';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Find elements by class name
|
|
8
|
+
* @param input - XML string or parsed DOM to search
|
|
9
|
+
* @param className - Class name to find
|
|
10
|
+
* @returns Found nodes
|
|
11
|
+
*/
|
|
12
|
+
export function getElementsByClassName(
|
|
13
|
+
input: string | (TNode | string)[],
|
|
14
|
+
className: string,
|
|
15
|
+
): TNode[] {
|
|
16
|
+
const dom = typeof input === 'string' ? parse(input) : input;
|
|
17
|
+
const re = new RegExp('(?:^|\\s)' + escapeRegExp(className) + '(?:\\s|$)');
|
|
18
|
+
return filter(dom, (node) => {
|
|
19
|
+
const cls = node.attributes?.class;
|
|
20
|
+
return cls != null && re.test(cls);
|
|
21
|
+
});
|
|
22
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Standard HTML void elements that are self-closing and never have children.
|
|
3
|
+
* Used as the default for `selfClosingTags` when `html: true`.
|
|
4
|
+
*/
|
|
5
|
+
export const HTML_VOID_ELEMENTS = [
|
|
6
|
+
'area',
|
|
7
|
+
'base',
|
|
8
|
+
'br',
|
|
9
|
+
'col',
|
|
10
|
+
'embed',
|
|
11
|
+
'hr',
|
|
12
|
+
'img',
|
|
13
|
+
'input',
|
|
14
|
+
'link',
|
|
15
|
+
'meta',
|
|
16
|
+
'param',
|
|
17
|
+
'source',
|
|
18
|
+
'track',
|
|
19
|
+
'wbr',
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* HTML elements whose content is raw text (not parsed as markup).
|
|
24
|
+
* Used as the default for `rawContentTags` when `html: true`.
|
|
25
|
+
*/
|
|
26
|
+
export const HTML_RAW_CONTENT_TAGS = ['script', 'style'];
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export * from '#src/utilities/htmlConstants.ts';
|
|
2
|
+
export * from '#src/utilities/filter.ts';
|
|
3
|
+
export * from '#src/utilities/toContentString.ts';
|
|
4
|
+
export * from '#src/utilities/getElementById.ts';
|
|
5
|
+
export * from '#src/utilities/getElementsByClassName.ts';
|
|
6
|
+
export * from '#src/utilities/isTextNode.ts';
|
|
7
|
+
export * from '#src/utilities/isElementNode.ts';
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { TNode } from '#src/parser.ts';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Type guard to check if a node is an element node (TNode object).
|
|
5
|
+
* Useful for filtering and type narrowing when working with mixed node arrays.
|
|
6
|
+
*
|
|
7
|
+
* @param node - The node to check
|
|
8
|
+
* @returns True if the node is a TNode (element node)
|
|
9
|
+
* @example
|
|
10
|
+
* const parsed = parse('<div>Hello <span>World</span></div>');
|
|
11
|
+
* parsed[0].children.forEach(child => {
|
|
12
|
+
* if (isElementNode(child)) {
|
|
13
|
+
* console.log('Element:', child.tagName);
|
|
14
|
+
* }
|
|
15
|
+
* });
|
|
16
|
+
*/
|
|
17
|
+
export function isElementNode(node: TNode | string): node is TNode {
|
|
18
|
+
return typeof node === 'object' && node !== null && 'tagName' in node;
|
|
19
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { TNode } from '#src/parser.ts';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Type guard to check if a node is a text node (string).
|
|
5
|
+
* Useful for filtering and type narrowing when working with mixed node arrays.
|
|
6
|
+
*
|
|
7
|
+
* @param node - The node to check
|
|
8
|
+
* @returns True if the node is a string (text node)
|
|
9
|
+
* @example
|
|
10
|
+
* const parsed = parse('<div>Hello <span>World</span></div>');
|
|
11
|
+
* parsed[0].children.forEach(child => {
|
|
12
|
+
* if (isTextNode(child)) {
|
|
13
|
+
* console.log('Text:', child);
|
|
14
|
+
* }
|
|
15
|
+
* });
|
|
16
|
+
*/
|
|
17
|
+
export function isTextNode(node: TNode | string): node is string {
|
|
18
|
+
return typeof node === 'string';
|
|
19
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { TNode } from '#src/parser.ts';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Read the text content of a node, useful for mixed content.
|
|
5
|
+
* Example: "this text has some <b>big</b> text and a <a href=''>link</a>"
|
|
6
|
+
* @param domContent - The node(s) to extract text from
|
|
7
|
+
* @returns Concatenated text content
|
|
8
|
+
*/
|
|
9
|
+
export function toContentString(
|
|
10
|
+
domContent: TNode | (TNode | string)[] | string,
|
|
11
|
+
): string {
|
|
12
|
+
if (Array.isArray(domContent)) {
|
|
13
|
+
let out = '';
|
|
14
|
+
for (let i = 0; i < domContent.length; i++) {
|
|
15
|
+
out += ' ' + toContentString(domContent[i]!);
|
|
16
|
+
}
|
|
17
|
+
return out.trim();
|
|
18
|
+
} else if (typeof domContent === 'object' && domContent !== null) {
|
|
19
|
+
return toContentString(domContent.children);
|
|
20
|
+
} else {
|
|
21
|
+
return ' ' + domContent;
|
|
22
|
+
}
|
|
23
|
+
}
|