xml-stream-editor 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,18 @@
1
1
  CHANGELOG
2
2
  ===
3
3
 
4
+ 0.2.1
5
+ ---
6
+
7
+ Validate selector strings passed to `createXMLEditor` (for now, very basic.
8
+ Just making sure there is only one space between element names in each
9
+ selector).
10
+
11
+ Fix issue where in some cases a selectors would match against the
12
+ suffixes/endings of elements, and not always the full element name
13
+ (e.g.,the selector `"steak"` would sometimes match elements like
14
+ `<mistake>`).
15
+
4
16
  0.2.0
5
17
  ---
6
18
 
package/README.md CHANGED
@@ -78,7 +78,6 @@ interface Options {
78
78
 
79
79
  // Options defined by the "saxes" library, and passed to the "saxes" parser
80
80
  //
81
- // eslint-disable-next-line max-len
82
81
  // https://github.com/lddubeau/saxes/blob/4968bd09b5fd0270a989c69913614b0e640dae1b/src/saxes.ts#L557
83
82
  // https://www.npmjs.com/package/saxes
84
83
  saxes?: SaxesOptions
@@ -116,46 +115,43 @@ import { createReadStream } from 'node:fs'
116
115
  import { pipeline } from 'node:stream/promises'
117
116
  import { createXMLEditor, newElement } from 'xml-stream-editor'
118
117
 
119
- (async () => {
120
- // The keys of this object are selector strings, and the
121
- // values are functions that get called with matching elements.
122
- const rules = {
123
- "main character": (elm) => {
124
- switch (elm.text) {
125
- case "Marge Simpson":
126
- elm.attributes["hair"] = "blue"
127
- break
128
- case "Homer Simpson":
129
- elm.text += " (Sr.)"
130
- break
131
- case "Lisa Simpson":
132
- elm.text = ""
133
-
134
- // Create an <instrument> element and make it
135
- // a child element.
136
- const instrumentElm = newElement("instrument")
137
- instrumentElm.text = "saxophone"
138
- elm.children.push(instrumentElm)
139
-
140
- // Also create a new <name> element, and also make it
141
- // a child element.
142
- const nameElm = newElement("name")
143
- nameElm.text = "Lisa Simpson"
144
- elm.children.push(nameElm)
145
- break
146
- case "Bart Simpson":
147
- // Remove the node by not returning an element.
148
- return
149
- }
150
- return elm
118
+ // The keys of this object are selector strings, and the
119
+ // values are functions that get called with matching elements.
120
+ const rules = {
121
+ "main character": (elm) => {
122
+ switch (elm.text) {
123
+ case "Marge Simpson":
124
+ elm.attributes["hair"] = "blue"
125
+ break
126
+ case "Homer Simpson":
127
+ elm.text += " (Sr.)"
128
+ break
129
+ case "Lisa Simpson":
130
+ elm.text = ""
131
+
132
+ // Create an <instrument> element and make it a child element.
133
+ const instrumentElm = newElement("instrument")
134
+ instrumentElm.text = "saxophone"
135
+ elm.children.push(instrumentElm)
136
+
137
+ // Also create a new <name> element, and also make it a child
138
+ // element.
139
+ const nameElm = newElement("name")
140
+ nameElm.text = "Lisa Simpson"
141
+ elm.children.push(nameElm)
142
+ break
143
+ case "Bart Simpson":
144
+ // Remove the node by not returning an element.
145
+ return
151
146
  }
147
+ return elm
152
148
  }
153
- await pipeline(
154
- createReadStream("simpsons.xml"), // above example
155
- createXMLEditor(rules),
156
- process.stdout
157
- )
158
- })()
149
+ }
150
+ await pipeline(
151
+ createReadStream("simpsons.xml"), // above example
152
+ createXMLEditor(rules),
153
+ process.stdout
154
+ )
159
155
  ```
160
156
 
161
157
  And you'll find this printed to `STDOUT` (reformatted and annotated):
@@ -203,29 +199,31 @@ import { createReadStream } from 'node:fs'
203
199
  import { pipeline } from 'node:stream/promises'
204
200
  import { createXMLEditor, newElement } from 'xml-stream-editor'
205
201
 
206
- (async () => {
207
- const rules = {
208
- // This rule will match first, since the "main" element will be
209
- // identified first during parsing.
210
- "main character": (elm) => {
211
- // editing goes here
212
- return elm
213
- },
214
- // And as a result, this rule will never be applied during editing
215
- // (since anytime "character" would match a <character> element,
216
- // that <character> element will have already been matched by the
217
- // above "main character" selector.
218
- "character": (elm) => {
219
- // this function would never be called in this document.
220
- return elm
221
- },
222
- }
223
- await pipeline(
224
- createReadStream("simpsons.xml"), // above example
225
- createXMLEditor(rules),
226
- process.stdout
227
- )
228
- })()
202
+ const rules = {
203
+ // This rule will match first, since the "main" element will be
204
+ // identified first during parsing.
205
+ "main character": (elm) => {
206
+ // editing goes here
207
+ return elm
208
+ },
209
+ // And as a result, this rule will never match the "Disco Stu"
210
+ // or "Julius Hibbert" elements, since anytime the "character" selector
211
+ // would match a <character> element, that <character> element will
212
+ // have already been matched by the above "main character" selector.
213
+ //
214
+ // However, this selector would match (and so this function would
215
+ // be called with) the two <character> elements that are children
216
+ // of the <side> element.
217
+ "character": (elm) => {
218
+ // this function would never be called in this document.
219
+ return elm
220
+ },
221
+ }
222
+ await pipeline(
223
+ createReadStream("simpsons.xml"), // above example
224
+ createXMLEditor(rules),
225
+ process.stdout
226
+ )
229
227
  ```
230
228
 
231
229
  ## Motivation
@@ -0,0 +1,69 @@
1
+ import xnv from 'xml-name-validator';
2
+ const isValidName = xnv.qname;
3
+ export class Element {
4
+ attributes;
5
+ children = [];
6
+ name;
7
+ text;
8
+ constructor(name, attributes) {
9
+ this.name = name;
10
+ this.attributes = attributes
11
+ ? JSON.parse(JSON.stringify(attributes))
12
+ : Object.create(null);
13
+ }
14
+ validate() {
15
+ if (typeof this.name !== 'string') {
16
+ return [false, new Error('No name provided for element')];
17
+ }
18
+ if (!isValidName(this.name)) {
19
+ return [false, new Error(`"${this.name}" is not a valid element name`)];
20
+ }
21
+ if (typeof this.attributes !== 'object' || this.attributes === null) {
22
+ return [false, new Error('"attributes" property is not an object')];
23
+ }
24
+ for (const attrName of Object.keys(this.attributes)) {
25
+ if (!isValidName(attrName)) {
26
+ return [false, new Error(`"${attrName}" is not a valid attribute name`)];
27
+ }
28
+ }
29
+ for (const child of this.children) {
30
+ const [isChildValid, childError] = child.validate();
31
+ if (!isChildValid) {
32
+ return [false, childError];
33
+ }
34
+ }
35
+ return [true, undefined];
36
+ }
37
+ }
38
+ export class ParsedElement extends Element {
39
+ children = [];
40
+ static fromSaxesNode(node) {
41
+ // Here we check if each attribute name is simple (and so just a
42
+ // string), or in the namespace representation the "saxes" library
43
+ // uses (in which case attrValue will be a SaxesAttributeNS
44
+ // object, that we have to unpack a bit)
45
+ const attributes = Object.create(null);
46
+ if (node.attributes) {
47
+ for (const [attrName, attrValue] of Object.entries(node.attributes)) {
48
+ if (typeof attrValue === 'string') {
49
+ attributes[attrName] = attrValue;
50
+ continue;
51
+ }
52
+ attributes[attrValue.name] = attrValue.value;
53
+ }
54
+ }
55
+ return new ParsedElement(node.name, attributes);
56
+ }
57
+ clone() {
58
+ const cloneElm = new ParsedElement(this.name, this.attributes);
59
+ cloneElm.text = this.text;
60
+ cloneElm.children = [];
61
+ for (const aChildElm of this.children) {
62
+ cloneElm.children.push(aChildElm.clone());
63
+ }
64
+ return cloneElm;
65
+ }
66
+ }
67
+ export const newElement = (name, attributes) => {
68
+ return new Element(name, attributes);
69
+ };
package/dist/index.js CHANGED
@@ -1,2 +1,2 @@
1
- import { createXMLEditor, newElement, } from './xml-stream-editor.js';
2
- export { createXMLEditor, newElement, };
1
+ export { Element, newElement } from './element.js';
2
+ export { createXMLEditor, } from './xml-stream-editor.js';
package/dist/markup.js CHANGED
@@ -1,6 +1,4 @@
1
1
  import xmlescape from 'xml-escape';
2
- import xnv from 'xml-name-validator';
3
- export const isValidName = xnv.qname;
4
2
  export const toAttrValue = (value) => {
5
3
  return xmlescape(value);
6
4
  };
@@ -0,0 +1,60 @@
1
+ // Represents the user provided selector strings, for defining which
2
+ // XML elements in the XML document they want to edit.
3
+ //
4
+ // We modify the (simplified) XML paths used to i. allow user to define
5
+ // which XML elements they want to edit, and ii. track the position of
6
+ // each parsed XML element in the incoming XML document.
7
+ //
8
+ // This allows us to quickly check whether a user-provided "selector"
9
+ // string matches the current XML parse stack with a simple .endsWith()
10
+ // call (specifically pathToJustParsedXMLElement.endsWith(userProvidedSelector).
11
+ import xnv from 'xml-name-validator';
12
+ // Single character string that cannot appear in XML element names.
13
+ const pathSeparator = '@';
14
+ const process = (elementPath) => {
15
+ const collapsedWhiteSpace = elementPath.trim().replace(/ +/g, ' ');
16
+ return collapsedWhiteSpace.split(' ').map(x => pathSeparator + x).join('');
17
+ };
18
+ const validate = (selector) => {
19
+ for (const elmName of selector.split(' ')) {
20
+ if (xnv.name(elmName) === true) {
21
+ continue;
22
+ }
23
+ const msg = `Selector "${selector}" contains invalid name "${elmName}"`;
24
+ return [false, new Error(msg)];
25
+ }
26
+ return [true, undefined];
27
+ };
28
+ // Simple class used for tracking the path to an element in an XML document,
29
+ // when parsing the XML document.
30
+ //
31
+ // Mostly this is just wrapping how we track the position of each element
32
+ // in the XML document as we're parsing it, and annotating that path
33
+ // in a way that makes it easy to check if a SelectorRule matches the
34
+ // leaf-element in that path.
35
+ export class ElementPath {
36
+ path;
37
+ pathForMatching;
38
+ constructor(path) {
39
+ this.path = path;
40
+ this.pathForMatching = process(path);
41
+ }
42
+ append(elmName) {
43
+ return new ElementPath(this.path + ' ' + elmName);
44
+ }
45
+ matches(selector) {
46
+ return this.pathForMatching.endsWith(selector.text);
47
+ }
48
+ }
49
+ export class SelectorRule {
50
+ text;
51
+ pathForMatching;
52
+ constructor(selector) {
53
+ const [isValid, err] = validate(selector);
54
+ if (!isValid) {
55
+ throw err;
56
+ }
57
+ this.text = process(selector);
58
+ this.pathForMatching = process(this.text);
59
+ }
60
+ }
@@ -1,63 +1,9 @@
1
1
  import { strict as assert } from 'node:assert';
2
2
  import { Transform } from 'node:stream';
3
3
  import { SaxesParser } from 'saxes';
4
- import { isValidName, toAttrValue, toBodyText, toCloseTag, toOpenTag } from './markup.js';
5
- export const newElement = (name) => {
6
- return {
7
- name: name,
8
- text: undefined,
9
- attributes: Object.create(null),
10
- children: [],
11
- };
12
- };
13
- const throwOnInvalidElement = (elm) => {
14
- if (typeof elm.name !== 'string') {
15
- throw new Error('No name provided for element');
16
- }
17
- if (!isValidName(elm.name)) {
18
- throw new Error(`"${elm.name}" is not a valid XML element name`);
19
- }
20
- if (typeof elm.attributes !== 'object' || elm.attributes === null) {
21
- throw new Error('"attributes" property on element is not an object');
22
- }
23
- for (const attrName of Object.keys(elm.attributes)) {
24
- if (!isValidName(attrName)) {
25
- throw new Error(`"${attrName}" is not a valid XML attribute name`);
26
- }
27
- }
28
- elm.children.forEach(throwOnInvalidElement);
29
- };
30
- const cloneElement = (elm) => {
31
- const newElm = newElement(elm.name);
32
- newElm.text = elm.text;
33
- newElm.attributes = JSON.parse(JSON.stringify(elm.attributes));
34
- newElm.children = elm.children.map(cloneElement);
35
- return newElm;
36
- };
37
- const elementForNode = (node) => {
38
- // Here we check if each attribute name is simple (and so just a
39
- // string), or in the namespace representation the "saxes" library
40
- // uses (in which case attrValue will be a SaxesAttributeNS
41
- // object, that we have to unpack a bit)
42
- const attributes = Object.create(null);
43
- if (node.attributes) {
44
- for (const [attrName, attrValue] of Object.entries(node.attributes)) {
45
- if (typeof attrValue === 'string') {
46
- attributes[attrName] = attrValue;
47
- continue;
48
- }
49
- attributes[attrValue.name] = attrValue.value;
50
- }
51
- }
52
- return elementForNameAndAttrs(node.name, attributes);
53
- };
54
- const elementForNameAndAttrs = (name, attrs) => {
55
- const newElm = newElement(name);
56
- if (attrs) {
57
- newElm.attributes = attrs;
58
- }
59
- return newElm;
60
- };
4
+ import { ParsedElement } from './element.js';
5
+ import { toAttrValue, toBodyText, toCloseTag, toOpenTag } from './markup.js';
6
+ import { ElementPath, SelectorRule } from './selector.js';
61
7
  class XMLStreamEditorTransformer extends Transform {
62
8
  // Default options, used if the caller doesn't provide any options (or
63
9
  // merged into the provided options if the user only sets some options).
@@ -71,9 +17,9 @@ class XMLStreamEditorTransformer extends Transform {
71
17
  // Used to track how deep in the XML tree the parser is, so that we can
72
18
  // check newly parsed elements against the passed editor rules.
73
19
  #parseStack = [];
74
- // This is a map of (VERY) simple xpaths (i.e., only XML element names;
75
- // no attributes, no name spaces, etc).
76
- #editingRules;
20
+ // This is a map of objects that represent simple xpaths (i.e., only XML
21
+ // element names (no attributes, no name spaces, etc).
22
+ #rules;
77
23
  // Handle to the 'saxes' xml parser object.
78
24
  #xmlParser;
79
25
  // If set, tracks the current element in the parser stack that matches
@@ -85,9 +31,13 @@ class XMLStreamEditorTransformer extends Transform {
85
31
  #error;
86
32
  #pushParsedElementToStack(element) {
87
33
  const topOfStackElm = this.#parseStack.at(-1);
34
+ // We prefix every element name in the parse stack with '@' (a character
35
+ // that isn't valid in an XML element name) so that we can easily
36
+ // check if a selector matches the parse stack by just checking if
37
+ // the selector matches right end of the stack path.
88
38
  const pathToElement = topOfStackElm
89
- ? topOfStackElm.path + ' ' + element.name
90
- : element.name;
39
+ ? topOfStackElm.path.append(element.name)
40
+ : new ElementPath(element.name);
91
41
  this.#parseStack.push({
92
42
  element: element,
93
43
  path: pathToElement,
@@ -101,14 +51,13 @@ class XMLStreamEditorTransformer extends Transform {
101
51
  // This method is only called after pushing an element to the stack,
102
52
  // so this is guaranteed to be true
103
53
  assert(topOfStack);
104
- const topOfStackPath = topOfStack.path;
105
- for (const [selector, editorFunc] of Object.entries(this.#editingRules)) {
106
- if (topOfStackPath.endsWith(selector)) {
54
+ for (const [selectorRule, editorFunc] of this.#rules.entries()) {
55
+ if (topOfStack.path.matches(selectorRule)) {
107
56
  // The depth of the root of this subtree in the stack
108
57
  const depth = this.#parseStack.length - 1;
109
58
  assert(depth >= 0);
110
59
  const elmToEdit = this.#parseStack[depth].element;
111
- return { selector: selector, func: editorFunc, element: elmToEdit };
60
+ return { selector: selectorRule, func: editorFunc, element: elmToEdit };
112
61
  }
113
62
  }
114
63
  return null;
@@ -128,12 +77,16 @@ class XMLStreamEditorTransformer extends Transform {
128
77
  }
129
78
  #callUserFuncOnCompletedElementAndWriteToStream() {
130
79
  assert(this.#elmToEditInfo);
131
- const clonedElm = cloneElement(this.#elmToEditInfo.element);
80
+ const clonedElm = this.#elmToEditInfo.element.clone();
81
+ const editElmFunc = this.#elmToEditInfo.func;
132
82
  try {
133
- const editedElm = this.#elmToEditInfo.func(clonedElm);
83
+ const editedElm = editElmFunc(clonedElm);
134
84
  if (editedElm) {
135
85
  if (this.#options.validate === true) {
136
- throwOnInvalidElement(editedElm);
86
+ const [isValid, error] = editedElm.validate();
87
+ if (!isValid) {
88
+ throw error;
89
+ }
137
90
  }
138
91
  this.#writeElementToStream(editedElm);
139
92
  }
@@ -155,7 +108,7 @@ class XMLStreamEditorTransformer extends Transform {
155
108
  // and append ourselves to the stack.
156
109
  // 3. We are NOT the root of a subtree to be edited, in which case
157
110
  // we just add ourselves to the stack.
158
- const newElement = elementForNode(node);
111
+ const newElement = ParsedElement.fromSaxesNode(node);
159
112
  this.#pushParsedElementToStack(newElement);
160
113
  // Check for case one
161
114
  if (this.#isInSubtreeToBeEdited()) {
@@ -246,7 +199,13 @@ class XMLStreamEditorTransformer extends Transform {
246
199
  saxes: options?.saxes ?? defaultOptions.saxes,
247
200
  };
248
201
  this.#options = mergedOptions;
249
- this.#editingRules = editingRules;
202
+ this.#rules = new Map();
203
+ for (const [selector, editFunc] of Object.entries(editingRules)) {
204
+ // This will throw if one of the user-provided selectors
205
+ // is invalid.
206
+ const parsedSelector = new SelectorRule(selector);
207
+ this.#rules.set(parsedSelector, editFunc);
208
+ }
250
209
  this.#xmlParser = new SaxesParser(this.#options.saxes);
251
210
  this.#configureParserCallbacks();
252
211
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xml-stream-editor",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "A streaming xml editor.",
5
5
  "main": "dist/index.js",
6
6
  "files": [
package/src/types.d.ts CHANGED
@@ -2,11 +2,12 @@ import { Transform } from 'node:stream'
2
2
 
3
3
  import { SaxesOptions } from 'saxes'
4
4
 
5
- export declare interface Element {
6
- name: string
7
- text?: string
5
+ export declare class Element {
6
+ constructor (name: string, attributes?: Record<string, string>)
8
7
  attributes: Record<string, string>
9
8
  children: Element[]
9
+ name: string
10
+ text?: string
10
11
  }
11
12
 
12
13
  export declare interface Options {
@@ -34,6 +35,8 @@ export declare interface Options {
34
35
  export type Selector = string
35
36
  export type EditorFunc = (elm: Element) => Element | undefined
36
37
  export type EditingRules = Record<Selector, EditorFunc>
38
+ // Just wrapper for `new Element(name)`, mostly a remnant of a previous
39
+ // implementation approach.
37
40
  export declare const newElement: (name: string) => Element
38
41
  export declare const createXMLEditor: (
39
42
  editingRules: EditingRules, options?: Options) => Transform