@nodable/node-tree-builder 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,208 @@
1
+ # Node Tree Output Builder
2
+
3
+ Produces a sequential node tree where each element has three fixed properties — `tagname`, `child`, and `attributes` — plus an optional `text` property for leaf nodes.
4
+
5
+ ## Node structure
6
+
7
+ ```
8
+ {
9
+ tagname: string, // element name
10
+ child: array, // ordered child nodes (always present, empty for leaf nodes)
11
+ attributes: object, // always present; populated when skip.attributes is false
12
+ text?: any // only present on leaf nodes (no child elements)
13
+ }
14
+ ```
15
+
16
+ ### Leaf node (text only, no child elements)
17
+
18
+ ```js
19
+ { tagname: "span", child: [], attributes: {}, text: "Hello" }
20
+ ```
21
+
22
+ ### Empty tag (no text, no children)
23
+
24
+ ```js
25
+ { tagname: "br", child: [], attributes: {} }
26
+ ```
27
+
28
+ ### Tag with child elements
29
+
30
+ ```js
31
+ { tagname: "div", child: [ /* child nodes */ ], attributes: {} }
32
+ ```
33
+
34
+ ### Mixed content (text interleaved with child elements)
35
+
36
+ Inline text runs appear as `{ ":text": value }` entries inside the `child` array. The parent node has no `text` property in this case.
37
+
38
+ Input:
39
+ ```xml
40
+ <p>Hello <b>world</b>!</p>
41
+ ```
42
+
43
+ Output:
44
+ ```js
45
+ {
46
+ tagname: "p",
47
+ child: [
48
+ { ":text": "Hello " },
49
+ { tagname: "b", child: [], attributes: {}, text: "world" },
50
+ { ":text": "!" }
51
+ ],
52
+ attributes: {}
53
+ }
54
+ ```
55
+
56
+ #### textInChild
57
+
58
+ However, if `textInChild` is set to `true` then text is always inserted in child.
59
+
60
+
61
+ Input:
62
+ ```xml
63
+ <p>Hello <b>world</b>!</p>
64
+ ```
65
+
66
+ Output:
67
+ ```js
68
+ {
69
+ tagname: "p",
70
+ child: [
71
+ { ":text": "Hello " },
72
+ { tagname: "b", child: [
73
+ { ":text": "world" }
74
+ ], attributes: {}},
75
+ { ":text": "!" }
76
+ ],
77
+ attributes: {}
78
+ }
79
+ ```
80
+
81
+
82
+ ## Basic example
83
+
84
+ Input:
85
+ ```xml
86
+ <root>
87
+ <child>hello</child>
88
+ <child>world</child>
89
+ </root>
90
+ ```
91
+
92
+ Output:
93
+ ```js
94
+ {
95
+ tagname: "root",
96
+ child: [
97
+ { tagname: "child", child: [], attributes: {}, text: "hello" },
98
+ { tagname: "child", child: [], attributes: {}, text: "world" }
99
+ ],
100
+ attributes: {}
101
+ }
102
+ ```
103
+
104
+ The fixed structure lets you traverse the tree without defensive property checks.
105
+
106
+ ## Install
107
+
108
+ ```bash
109
+ npm install @nodable/node-tree-builder
110
+ ```
111
+
112
+ ## Usage
113
+
114
+ ```js
115
+ import XMLParser from "@nodable/flexible-xml-parser";
116
+ import NodeTreeBuilderFactory from "@nodable/node-tree-builder";
117
+
118
+ const parser = new XMLParser({
119
+ OutputBuilder: new NodeTreeBuilderFactory(builderOptions),
120
+ ...parserOptions,
121
+ });
122
+
123
+ const result = parser.parse(xmlString);
124
+ ```
125
+
126
+ ## Options
127
+
128
+ ### `attributes.groupBy` (default: `"attributes"`)
129
+
130
+ The property name under which all attributes are collected. The property is **always** present on every node, even when empty.
131
+
132
+ ```js
133
+ new NodeTreeBuilderFactory({
134
+ attributes: { groupBy: "attributes" } // default
135
+ })
136
+ ```
137
+
138
+ To use a custom key:
139
+
140
+ ```js
141
+ new NodeTreeBuilderFactory({
142
+ attributes: { groupBy: ":@" }
143
+ })
144
+ ```
145
+
146
+ ### `nameFor.text` (default: `"#text"`)
147
+
148
+ The key used for inline text entries inside `child` when a node has mixed content.
149
+
150
+ ```js
151
+ new NodeTreeBuilderFactory({
152
+ nameFor: { text: ":text" }
153
+ })
154
+ ```
155
+
156
+ ### `nameFor.comment`
157
+
158
+ When skip.comment is false, this property is used to name the comment nodes.
159
+
160
+
161
+ ### `nameFor.cdata` (default: `""`)
162
+
163
+ input:
164
+ ```xml
165
+ <root><code><![CDATA[data]]></code></root>
166
+ ```
167
+
168
+ ```js
169
+ const builderConfig = { nameFor: { cdata: "##cdata" } }
170
+ const parserConfig = { skip: { cdata: false } }
171
+
172
+ const parser = new XMLParser({
173
+ OutputBuilder: new NodeTreeBuilderFactory(builderConfig),
174
+ ...parserConfig,
175
+ });
176
+
177
+ const result = parser.parse(xmlString);
178
+ ```
179
+
180
+ Output
181
+ ```js
182
+ {
183
+ "tagname": "root",
184
+ "child": [
185
+ {
186
+ "tagname": "code",
187
+ "child": [
188
+ {
189
+ "tagname": "##cdata",
190
+ "child": [],
191
+ "attributes": {},
192
+ "text": "data"
193
+ }
194
+ ],
195
+ "attributes": {}
196
+ }
197
+ ],
198
+ "attributes": {}
199
+ }
200
+ ```
201
+
202
+ ### `skip.attributes` (default: `true`)
203
+
204
+ When `true` (default), all attributes are ignored and every node's `attributes` property is `{}`. Set to `false` to populate attributes.
205
+
206
+ ### Value parsers
207
+
208
+ By default the parser chain `["entity", "boolean", "number"]` is applied to text content, converting strings like `"42"` to `42` and `"true"` to `true`. Override with `tags.valueParsers`.
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "@nodable/node-tree-builder",
3
+ "version": "1.0.0",
4
+ "description": "Node tree JS Object Sequential builder for flexible-xml-parser",
5
+ "main": "src/index.js",
6
+ "types": "./src/index.d.ts",
7
+ "type": "module",
8
+ "scripts": {
9
+ "test": "echo \"Error: no test specified\" && exit 1"
10
+ },
11
+ "keywords": [
12
+ "flexible-xml-parser",
13
+ "nodable",
14
+ "xml"
15
+ ],
16
+ "author": "Amit Gupta (https://solothought.com)",
17
+ "license": "MIT",
18
+ "files": [
19
+ "src"
20
+ ],
21
+ "publishConfig": {
22
+ "access": "public"
23
+ },
24
+ "dependencies": {
25
+ "@nodable/base-output-builder": "^1.0.2",
26
+ "path-expression-matcher": "^1.4.0"
27
+ },
28
+ "funding": [
29
+ {
30
+ "type": "github",
31
+ "url": "https://github.com/sponsors/nodable"
32
+ }
33
+ ],
34
+ "repository": {
35
+ "type": "git",
36
+ "url": "git+https://github.com/nodable/flexible-output-builder.git"
37
+ }
38
+ }
@@ -0,0 +1,177 @@
1
+ //OrderedOutputBuilder
2
+
3
+ import { buildOptions } from './ParserOptionsBuilder.js';
4
+ import { BaseOutputBuilder, BaseOutputBuilderFactory, commonValueParsers, ElementType } from '@nodable/base-output-builder';
5
+
6
+ const rootName = '!js_arr';
7
+
8
+ export default class NodeTreeBuilderFactory extends BaseOutputBuilderFactory {
9
+ constructor(options) {
10
+ super()
11
+ this.options = buildOptions(options);
12
+ // this.commonValParsers = commonValueParsers();
13
+ }
14
+
15
+ // registerValueParser(name, parserInstance) {
16
+ // this.commonValParsers[name] = parserInstance;
17
+ // }
18
+
19
+ getInstance(parserOptions, readonlyMatcher) {
20
+ const valParsers = { ...this.commonValParsers };
21
+ return new NodeTreeBuilder(parserOptions, this.options, valParsers, readonlyMatcher);
22
+ }
23
+ }
24
+
25
+ export class NodeTreeBuilder extends BaseOutputBuilder {
26
+
27
+ constructor(parserOptions, builderOptions, registeredValParsers, readonlyMatcher) {
28
+ super(readonlyMatcher);
29
+ this.tagsStack = [];
30
+ this.parserOptions = parserOptions;
31
+
32
+ this.options = {
33
+ ...parserOptions,
34
+ ...builderOptions,
35
+ skip: { ...parserOptions.skip, ...builderOptions.skip },
36
+ nameFor: { ...parserOptions.nameFor, ...builderOptions.nameFor },
37
+ tags: { ...parserOptions.tags, ...builderOptions.tags },
38
+ attributes: { ...parserOptions.attributes, ...builderOptions.attributes },
39
+ };
40
+
41
+ this.registeredValParsers = registeredValParsers;
42
+
43
+ this.root = new Node(rootName, this.options);
44
+ this.currentNode = this.root;
45
+ this.attributes = {};
46
+ this._pendingStopNode = false;
47
+ }
48
+
49
+ addElement(tag) {
50
+ // If the current node has text set (text arrived before any child element),
51
+ // retroactively migrate it into the child array as an inline text entry
52
+ // now that we know this is mixed content.
53
+ if (this.currentNode.text !== undefined) {
54
+ this.currentNode.child.unshift({
55
+ [this.options.nameFor.text]: this.currentNode.text
56
+ });
57
+ delete this.currentNode.text;
58
+ }
59
+
60
+ this.tagsStack.push(this.currentNode);
61
+ const node = new Node(tag.name, this.options);
62
+ // Attach any pending attributes onto the new node
63
+ if (this.attributes && Object.keys(this.attributes).length > 0) {
64
+ node[this.options.attributes.groupBy] = { ...this.attributes };
65
+ }
66
+ this.attributes = {};
67
+ this.currentNode = node;
68
+ }
69
+
70
+ /**
71
+ * Called when a stop node is fully collected, before `addValue`.
72
+ *
73
+ * @param {TagDetail} tagDetail - name, line, col, index of the stop node
74
+ * @param {string} rawContent - raw unparsed content between the tags
75
+ */
76
+ onStopNode(tagDetail, rawContent) {
77
+ this._pendingStopNode = true;
78
+ if (typeof this.options.onStopNode === 'function') {
79
+ this.options.onStopNode(tagDetail, rawContent, this.matcher);
80
+ }
81
+ }
82
+
83
+ closeElement() {
84
+ const node = this.currentNode;
85
+ this.currentNode = this.tagsStack.pop();
86
+
87
+ this._pendingStopNode = false;
88
+
89
+ if (this.options.onClose !== undefined) {
90
+ const resultTag = this.options.onClose(node, this.matcher);
91
+ if (resultTag) return;
92
+ }
93
+
94
+ this.currentNode.child.push(node);
95
+ }
96
+
97
+ _addChild(node) {
98
+ // this.currentNode.child.push({ [key]: val });
99
+ this.currentNode.child.push(node);
100
+ }
101
+
102
+ addValue(text) {
103
+ const tagName = this.currentNode?.elementname;
104
+ // Check whether there are already element children (mixed content scenario)
105
+ const hasElementChildren = this.currentNode?.child?.some(c => c.elementname !== undefined);
106
+
107
+ const context = {
108
+ elementName: tagName,
109
+ elementValue: text,
110
+ elementType: ElementType.ELEMENT,
111
+ matcher: this.matcher,
112
+ isLeafNode: !hasElementChildren,
113
+ };
114
+
115
+ const parsedValue = this.parseValue(text, this.options.tags.valueParsers, context);
116
+
117
+ if (hasElementChildren || this.options.textInChild) {
118
+ // Mixed content: text alongside child elements — store as inline text child
119
+ this.currentNode.child.push({
120
+ [this.options.nameFor.text]: parsedValue
121
+ });
122
+ } else {
123
+ // Pure text (leaf node or text before any child elements):
124
+ // set directly on the node as `text` property
125
+ this.currentNode.text = parsedValue;
126
+ }
127
+ }
128
+
129
+ addInstruction(name) {
130
+ const node = new Node(name, this.options);
131
+ if (!isEmpty(this.attributes)) {
132
+ node[this.options.attributes.groupBy] = this.attributes;
133
+ }
134
+ // this.currentNode.child.push(node);
135
+ this._addChild(node);
136
+ this.attributes = {};
137
+ }
138
+
139
+ addComment(text) {
140
+ if (this.options.skip.comment) return;
141
+ if (this.options.nameFor.comment) {
142
+ const node = new Node(this.options.nameFor.comment, this.options);
143
+ node.text = text;
144
+ this._addChild(node);
145
+ }
146
+ }
147
+
148
+ addLiteral(text) {
149
+ if (this.options.skip.cdata) return;
150
+ if (this.options.nameFor.cdata) {
151
+ const node = new Node(this.options.nameFor.cdata, this.options);
152
+ node.text = text;
153
+ this._addChild(node);
154
+ } else {
155
+ this.addValue(text || "");
156
+ }
157
+ }
158
+
159
+ getOutput() {
160
+ const children = this.root.child;
161
+ if (children.length === 1) return children[0];
162
+ return children;
163
+ }
164
+ }
165
+
166
+ class Node {
167
+ constructor(elementname, options) {
168
+ this.elementname = elementname;
169
+ this.child = [];
170
+ const groupBy = options?.attributes?.groupBy ?? 'attributes';
171
+ this[groupBy] = {};
172
+ }
173
+ }
174
+
175
+ function isEmpty(obj) {
176
+ return Object.keys(obj).length === 0;
177
+ }
@@ -0,0 +1,78 @@
1
+ const defaultOptions = {
2
+ nameFor: {
3
+ text: "#text",
4
+ //comment: undefined,
5
+ //cdata: undefined,
6
+ },
7
+ skip: {
8
+ // declaration: false,
9
+ // pi: false,
10
+ // attributes: true,
11
+ // cdata: false,
12
+ // comment: false,
13
+ // nsPrefix: false,
14
+ // tags: false,
15
+ },
16
+ tags: {
17
+ valueParsers: [],
18
+ // stopNodes: [],
19
+ },
20
+ attributes: {
21
+ prefix: "@_",
22
+ suffix: "",
23
+ groupBy: "attributes",
24
+ valueParsers: [],
25
+ },
26
+ textInChild: false,
27
+ };
28
+
29
+ // Default chains: replaceEntities first (expand references), then type coercion.
30
+ const defaultTagParsers = ["entity", "boolean", "number"];
31
+ const defaultAttrParsers = ["entity", "number", "boolean"];
32
+
33
+ export function buildOptions(options) {
34
+ const finalOptions = deepClone(defaultOptions);
35
+
36
+ if (!options || options.tags?.valueParsers === undefined) {
37
+ finalOptions.tags.valueParsers = [...defaultTagParsers];
38
+ }
39
+ if (!options || options.attributes?.valueParsers === undefined) {
40
+ finalOptions.attributes.valueParsers = [...defaultAttrParsers];
41
+ }
42
+
43
+ if (options) {
44
+ copyProperties(finalOptions, options);
45
+ }
46
+
47
+ return finalOptions;
48
+ }
49
+
50
+ function deepClone(obj) {
51
+ if (obj === null || typeof obj !== 'object') return obj;
52
+ if (Array.isArray(obj)) return obj.map(deepClone);
53
+ const clone = {};
54
+ for (const key of Object.keys(obj)) {
55
+ clone[key] = deepClone(obj[key]);
56
+ }
57
+ return clone;
58
+ }
59
+
60
+ function copyProperties(target, source) {
61
+ for (const key of Object.keys(source)) {
62
+ // Guard against prototype pollution via option keys
63
+ if (key === '__proto__' || key === 'constructor' || key === 'prototype') continue;
64
+
65
+ if (typeof source[key] === 'function') {
66
+ target[key] = source[key];
67
+ } else if (Array.isArray(source[key])) {
68
+ target[key] = source[key];
69
+ } else if (typeof source[key] === 'object' && source[key] !== null) {
70
+ if (typeof target[key] !== 'object' || target[key] === null) {
71
+ target[key] = {};
72
+ }
73
+ copyProperties(target[key], source[key]);
74
+ } else {
75
+ target[key] = source[key];
76
+ }
77
+ }
78
+ }
package/src/index.d.ts ADDED
@@ -0,0 +1,160 @@
1
+ export interface SkipOptions {
2
+ /** Skip XML declaration `<?xml ... ?>` from output. Default: false */
3
+ declaration?: boolean;
4
+ /** Skip processing instructions (other than declaration) from output. Default: false */
5
+ pi?: boolean;
6
+ /**
7
+ * Skip all attributes from output. When true (default), the `attributes`
8
+ * property on every node is an empty object `{}`.
9
+ * Set to false to populate attributes.
10
+ * Default: true
11
+ */
12
+ attributes?: boolean;
13
+ /** Exclude CDATA sections entirely from output. Default: false */
14
+ cdata?: boolean;
15
+ /** Exclude comments entirely from output. Default: false */
16
+ comment?: boolean;
17
+ /**
18
+ * Strip namespace prefixes from tag and attribute names.
19
+ * E.g. `ns:tag` → `tag`, `xmlns:*` attributes are dropped.
20
+ * Default: false
21
+ */
22
+ nsPrefix?: boolean;
23
+ /** (future) Tag-level filtering — not yet implemented. Default: false */
24
+ tags?: boolean;
25
+ }
26
+
27
+ export interface NameForOptions {
28
+ /**
29
+ * Property name for inline text nodes in mixed content
30
+ * (i.e. text that appears alongside child elements in the same parent).
31
+ * These appear as `{ [text]: value }` entries in the `child` array.
32
+ * Default: '#text'
33
+ */
34
+ text?: string;
35
+ /**
36
+ * Property name for CDATA sections.
37
+ * Empty string (default) merges CDATA content into the node's `text` value.
38
+ */
39
+ cdata?: string;
40
+ /**
41
+ * Property name for XML comments.
42
+ * Empty string (default) omits comments from output.
43
+ * Set e.g. '#comment' to capture them.
44
+ */
45
+ comment?: string;
46
+ }
47
+
48
+ export interface AttributeOptions {
49
+ /** Allow boolean (valueless) attributes — treated as `true`. Default: false */
50
+ booleanType?: boolean;
51
+ /**
52
+ * Property name under which all attributes are grouped on each node.
53
+ * The `attributes` property is always present (empty `{}` when no attributes
54
+ * or when `skip.attributes` is true).
55
+ * Default: 'attributes'
56
+ */
57
+ groupBy?: string;
58
+ /** Prefix prepended to attribute names in output. Default: '@_' */
59
+ prefix?: string;
60
+ /** Suffix appended to attribute names in output. Default: '' */
61
+ suffix?: string;
62
+ /**
63
+ * Value parser chain for attribute values.
64
+ * Built-in names: 'entity', 'number', 'boolean', 'trim', 'currency'.
65
+ * Default: ['entity', 'number', 'boolean']
66
+ */
67
+ valueParsers?: Array<string | ValueParser>;
68
+ }
69
+
70
+ export interface TagOptions {
71
+ /**
72
+ * Value parser chain for tag text content.
73
+ * Built-in names: 'entity', 'boolean', 'number', 'trim', 'currency'.
74
+ * Default: ['entity', 'boolean', 'number']
75
+ * Add 'trim' to strip leading/trailing whitespace (not done by default).
76
+ */
77
+ valueParsers?: Array<string | ValueParser>;
78
+ }
79
+
80
+ export interface FactoryOptions {
81
+ /** Fine-grained control over which node types appear in output */
82
+ skip?: SkipOptions;
83
+
84
+ /** Property names used for special nodes in output */
85
+ nameFor?: NameForOptions;
86
+
87
+ /** Attribute parsing and representation options */
88
+ attributes?: AttributeOptions;
89
+
90
+ /** Tag parsing options including stop nodes and value parser chain */
91
+ tags?: TagOptions;
92
+ }
93
+
94
+ /**
95
+ * A parsed XML node as produced by NodeTreeBuilder.
96
+ *
97
+ * - `tagname` — element name
98
+ * - `child` — ordered array of child nodes; empty for leaf nodes
99
+ * - `attributes` — always present; populated when `skip.attributes` is false
100
+ * - `text` — only present on leaf nodes (no child elements); holds the
101
+ * parsed text value (may be string, number, or boolean
102
+ * depending on the active value-parser chain)
103
+ *
104
+ * In mixed content (text interleaved with child elements), inline text runs
105
+ * appear in `child` as `{ [nameFor.inlineText]: value }` objects (default
106
+ * key is `":text"`). The parent node has no `text` property in that case.
107
+ */
108
+ export interface XmlNode {
109
+ tagname: string;
110
+ child: Array<XmlNode | Record<string, any>>;
111
+ attributes: Record<string, any>;
112
+ text?: string | number | boolean;
113
+ [key: string]: any;
114
+ }
115
+
116
+ export interface NodeTreeBuilderInstance {
117
+ addElement(tag: { name: string }, matcher: any): void;
118
+ closeElement(matcher: any): void;
119
+ addValue(text: string, matcher: any): void;
120
+ addAttribute(name: string, value: any): void;
121
+ addComment(text: string): void;
122
+ addLiteral(text: string): void;
123
+ addDeclaration(): void;
124
+ addInstruction(name: string): void;
125
+ /**
126
+ * Called by the XML parser after the DOCTYPE block is read.
127
+ * Implementations forward entities to any registered value parser
128
+ * that implements addInputEntities().
129
+ */
130
+ addInputEntities(entities: object): void;
131
+ getOutput(): XmlNode | XmlNode[];
132
+ registeredValParsers: Record<string, ValueParser>;
133
+ /**
134
+ * Optional hook called by the parser when a stop node is fully collected.
135
+ * Delegates to the `options.onStopNode` callback when supplied.
136
+ */
137
+ onStopNode?(
138
+ tagDetail: { name: string; line: number; col: number; index: number },
139
+ rawContent: string,
140
+ matcher: any,
141
+ ): void;
142
+ }
143
+
144
+ /**
145
+ * A value parser transforms a value in the parsing chain.
146
+ * Receives the current value and an optional context object.
147
+ */
148
+ export interface ValueParser {
149
+ /**
150
+ * @param val Current value (string initially; may already be typed if earlier parsers ran)
151
+ * @param context { tagName, isAttribute, attrName? }
152
+ */
153
+ parse(val: any, context?: { tagName: string; isAttribute: boolean; attrName?: string }): any;
154
+ }
155
+
156
+ export class NodeTreeBuilderFactory {
157
+ constructor(options?: Partial<FactoryOptions>);
158
+ getInstance(factoryOptions: FactoryOptions): NodeTreeBuilderInstance;
159
+ registerValueParser(name: string, parser: ValueParser): void;
160
+ }
package/src/index.js ADDED
@@ -0,0 +1 @@
1
+ export { default as NodeTreeBuilderFactory, NodeTreeBuilder } from './NodeTreeBuilder.js';