@rgrove/parse-xml 2.0.4 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +84 -337
  3. package/dist/browser.js +774 -0
  4. package/dist/browser.js.map +7 -0
  5. package/dist/global.min.js +10 -0
  6. package/dist/global.min.js.map +7 -0
  7. package/dist/index.d.ts +24 -0
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +50 -0
  10. package/dist/index.js.map +1 -0
  11. package/dist/lib/Parser.d.ts +218 -0
  12. package/dist/lib/Parser.d.ts.map +1 -0
  13. package/dist/lib/Parser.js +638 -0
  14. package/dist/lib/Parser.js.map +1 -0
  15. package/dist/lib/StringScanner.d.ts +97 -0
  16. package/dist/lib/StringScanner.d.ts.map +1 -0
  17. package/dist/lib/StringScanner.js +210 -0
  18. package/dist/lib/StringScanner.js.map +1 -0
  19. package/dist/lib/XmlCdata.d.ts +8 -0
  20. package/dist/lib/XmlCdata.d.ts.map +1 -0
  21. package/dist/lib/XmlCdata.js +15 -0
  22. package/dist/lib/XmlCdata.js.map +1 -0
  23. package/dist/lib/XmlComment.d.ts +16 -0
  24. package/dist/lib/XmlComment.d.ts.map +1 -0
  25. package/dist/lib/XmlComment.js +23 -0
  26. package/dist/lib/XmlComment.js.map +1 -0
  27. package/dist/lib/XmlDocument.d.ts +29 -0
  28. package/dist/lib/XmlDocument.d.ts.map +1 -0
  29. package/dist/lib/XmlDocument.js +47 -0
  30. package/dist/lib/XmlDocument.js.map +1 -0
  31. package/dist/lib/XmlElement.d.ts +40 -0
  32. package/dist/lib/XmlElement.d.ts.map +1 -0
  33. package/dist/lib/XmlElement.js +51 -0
  34. package/dist/lib/XmlElement.js.map +1 -0
  35. package/dist/lib/XmlNode.d.ts +74 -0
  36. package/dist/lib/XmlNode.d.ts.map +1 -0
  37. package/dist/lib/XmlNode.js +96 -0
  38. package/dist/lib/XmlNode.js.map +1 -0
  39. package/dist/lib/XmlProcessingInstruction.d.ts +22 -0
  40. package/dist/lib/XmlProcessingInstruction.d.ts.map +1 -0
  41. package/dist/lib/XmlProcessingInstruction.js +25 -0
  42. package/dist/lib/XmlProcessingInstruction.js.map +1 -0
  43. package/dist/lib/XmlText.d.ts +16 -0
  44. package/dist/lib/XmlText.d.ts.map +1 -0
  45. package/dist/lib/XmlText.js +23 -0
  46. package/dist/lib/XmlText.js.map +1 -0
  47. package/dist/lib/syntax.d.ts +69 -0
  48. package/dist/lib/syntax.d.ts.map +1 -0
  49. package/dist/lib/syntax.js +133 -0
  50. package/dist/lib/syntax.js.map +1 -0
  51. package/dist/lib/types.d.ts +5 -0
  52. package/dist/lib/types.d.ts.map +1 -0
  53. package/dist/lib/types.js +3 -0
  54. package/dist/lib/types.js.map +1 -0
  55. package/package.json +36 -22
  56. package/src/index.ts +30 -0
  57. package/src/lib/Parser.ts +819 -0
  58. package/src/lib/StringScanner.ts +254 -0
  59. package/src/lib/XmlCdata.ts +11 -0
  60. package/src/lib/XmlComment.ts +26 -0
  61. package/src/lib/XmlDocument.ts +57 -0
  62. package/src/lib/XmlElement.ts +81 -0
  63. package/src/lib/XmlNode.ts +107 -0
  64. package/src/lib/XmlProcessingInstruction.ts +35 -0
  65. package/src/lib/XmlText.ts +26 -0
  66. package/src/lib/syntax.ts +136 -0
  67. package/src/lib/types.ts +2 -0
  68. package/CHANGELOG.md +0 -89
  69. package/dist/commonjs/index.js +0 -434
  70. package/dist/commonjs/lib/syntax.js +0 -262
  71. package/dist/umd/parse-xml.min.js +0 -1
  72. package/src/index.js +0 -451
  73. package/src/lib/syntax.js +0 -263
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Regular expression that matches one or more `AttValue` characters in a
3
+ * double-quoted attribute value.
4
+ *
5
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
6
+ */
7
+ export const attValueCharDoubleQuote = /[^"&<]+/y;
8
+
9
+ /**
10
+ * Regular expression that matches one or more `AttValue` characters in a
11
+ * single-quoted attribute value.
12
+ *
13
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
14
+ */
15
+ export const attValueCharSingleQuote = /[^'&<]+/y;
16
+
17
+ /**
18
+ * Regular expression that matches a whitespace character that should be
19
+ * normalized to a space character in an attribute value.
20
+ *
21
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#AVNormalize
22
+ */
23
+ export const attValueNormalizedWhitespace = /[\t\n]/g;
24
+
25
+ /**
26
+ * Regular expression that matches one or more characters that signal the end of
27
+ * XML `CharData` content.
28
+ *
29
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
30
+ */
31
+ export const endCharData = /<|&|]]>/;
32
+
33
+ /**
34
+ * Mapping of predefined entity names to their replacement values.
35
+ *
36
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-predefined-ent
37
+ */
38
+ export const predefinedEntities: Readonly<{[name: string]: string;}> = Object.freeze(Object.assign(Object.create(null), {
39
+ amp: '&',
40
+ apos: "'",
41
+ gt: '>',
42
+ lt: '<',
43
+ quot: '"',
44
+ }));
45
+
46
+ /**
47
+ * Returns `true` if _char_ is an XML `NameChar`, `false` if it isn't.
48
+ *
49
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameChar
50
+ */
51
+ export function isNameChar(char: string): boolean {
52
+ let cp = getCodePoint(char);
53
+
54
+ // Including the most common NameStartChars here improves performance
55
+ // slightly.
56
+ return (cp >= 0x61 && cp <= 0x7A) // a-z
57
+ || (cp >= 0x41 && cp <= 0x5A) // A-Z
58
+ || (cp >= 0x30 && cp <= 0x39) // 0-9
59
+ || cp === 0x2D // -
60
+ || cp === 0x2E // .
61
+ || cp === 0xB7
62
+ || (cp >= 0x300 && cp <= 0x36F)
63
+ || (cp >= 0x203F && cp <= 0x2040)
64
+ || isNameStartChar(char, cp);
65
+ }
66
+
67
+ /**
68
+ * Returns `true` if _char_ is an XML `NameStartChar`, `false` if it isn't.
69
+ *
70
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameStartChar
71
+ */
72
+ export function isNameStartChar(char: string, cp = getCodePoint(char)): boolean {
73
+ return (cp >= 0x61 && cp <= 0x7A) // a-z
74
+ || (cp >= 0x41 && cp <= 0x5A) // A-Z
75
+ || cp === 0x3A // :
76
+ || cp === 0x5F // _
77
+ || (cp >= 0xC0 && cp <= 0xD6)
78
+ || (cp >= 0xD8 && cp <= 0xF6)
79
+ || (cp >= 0xF8 && cp <= 0x2FF)
80
+ || (cp >= 0x370 && cp <= 0x37D)
81
+ || (cp >= 0x37F && cp <= 0x1FFF)
82
+ || (cp >= 0x200C && cp <= 0x200D)
83
+ || (cp >= 0x2070 && cp <= 0x218F)
84
+ || (cp >= 0x2C00 && cp <= 0x2FEF)
85
+ || (cp >= 0x3001 && cp <= 0xD7FF)
86
+ || (cp >= 0xF900 && cp <= 0xFDCF)
87
+ || (cp >= 0xFDF0 && cp <= 0xFFFD)
88
+ || (cp >= 0x10000 && cp <= 0xEFFFF);
89
+ }
90
+
91
+ /**
92
+ * Returns `true` if _char_ is a valid reference character (which may appear
93
+ * between `&` and `;` in a reference), `false` otherwise.
94
+ *
95
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-references
96
+ */
97
+ export function isReferenceChar(char: string): boolean {
98
+ return char === '#' || isNameChar(char);
99
+ }
100
+
101
+ /**
102
+ * Returns `true` if _char_ is an XML whitespace character, `false` otherwise.
103
+ *
104
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
105
+ */
106
+ export function isWhitespace(char: string): boolean {
107
+ let cp = getCodePoint(char);
108
+
109
+ return cp === 0x20
110
+ || cp === 0x9
111
+ || cp === 0xA
112
+ || cp === 0xD;
113
+ }
114
+
115
+ /**
116
+ * Returns `true` if _codepoint_ is a valid XML `Char` code point, `false`
117
+ * otherwise.
118
+ *
119
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char
120
+ */
121
+ export function isXmlCodePoint(cp: number): boolean {
122
+ return cp === 0x9
123
+ || cp === 0xA
124
+ || cp === 0xD
125
+ || (cp >= 0x20 && cp <= 0xD7FF)
126
+ || (cp >= 0xE000 && cp <= 0xFFFD)
127
+ || (cp >= 0x10000 && cp <= 0x10FFFF);
128
+ }
129
+
130
+ /**
131
+ * Returns the Unicode code point value of the given character, or `-1` if
132
+ * _char_ is empty.
133
+ */
134
+ function getCodePoint(char: string): number {
135
+ return char.codePointAt(0) || -1;
136
+ }
@@ -0,0 +1,2 @@
1
+ export type JsonObject = {[key in string]?: JsonValue};
2
+ export type JsonValue = string | number | boolean | JsonObject | JsonValue[] | null;
package/CHANGELOG.md DELETED
@@ -1,89 +0,0 @@
1
- # parse-xml changelog
2
-
3
- All notable changes to parse-xml are documented in this file. The format is
4
- based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project
5
- adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
-
7
- ## 2.0.4 (2020-05-01)
8
-
9
- ### Fixed
10
-
11
- - Extremely long attribute values no longer cause the parser to throw a
12
- "Maximum call stack size exceeded" `RangeError`. [#13] (@rossj)
13
-
14
- [#13]:https://github.com/rgrove/parse-xml/pull/13
15
-
16
- ## 2.0.3 (2020-04-20)
17
-
18
- ### Fixed
19
-
20
- - Attribute values with many consecutive character references (such as `&lt;`)
21
- no longer cause the parser to hang. [#10] (@rossj)
22
-
23
- [#10]:https://github.com/rgrove/parse-xml/pull/10
24
-
25
- ## 2.0.2 (2020-01-10)
26
-
27
- ### Fixed
28
-
29
- - Whitespace in attribute values is now normalized correctly. [#7]
30
-
31
- Previously, attribute values were normalized according to the rules for
32
- non-CDATA attributes, but this was incorrect and based on a misreading of
33
- the spec.
34
-
35
- Attribute values are now correctly parsed as CDATA, meaning that whitespace
36
- is not collapsed or trimmed and whitespace character entities are resolved
37
- to their respective characters rather than being normalized to spaces (which
38
- was incorrect even by the non-CDATA rules!).
39
-
40
- [#7]:https://github.com/rgrove/parse-xml/pull/7
41
-
42
- ## 2.0.1 (2019-04-09)
43
-
44
- ### Fixed
45
-
46
- - A carriage return (`\r`) character that isn't followed by a line feed (`\n`)
47
- character is now [correctly normalized][xml-line-ends] to a line feed before
48
- parsing.
49
-
50
- [xml-line-ends]:https://www.w3.org/TR/2008/REC-xml-20081126/#sec-line-ends
51
-
52
- ## 2.0.0 (2019-01-20)
53
-
54
- ### Added
55
-
56
- - There's a new minified UMD bundle at `dist/umd/parse-xml.min.js` in the npm
57
- package. This may be useful if you want to load parse-xml directly in a
58
- browser using a service like
59
- [unpkg](https://unpkg.com/@rgrove/parse-xml/dist/umd/parse-xml.min.js) or
60
- [jsDelivr](https://cdn.jsdelivr.net/npm/@rgrove/parse-xml/dist/umd/parse-xml.min.js).
61
-
62
- ### Changed
63
-
64
- - parse-xml no longer depends on CoreJS polyfills or the Babel runtime, which
65
- reduces the browser bundle size significantly. If you need to support older
66
- browsers, you should provide your own polyfills for `Object.assign()`,
67
- `Object.freeze()`, and `String.fromCodePoint()`.
68
-
69
- - The browser-friendly CommonJS build has moved from `dist/` to
70
- `dist/commonjs/` in the npm package.
71
-
72
- ## 1.1.1 (2017-09-20)
73
-
74
- ### Fixed
75
-
76
- - Attribute values are no longer truncated at the first `=` character.
77
-
78
- ## 1.1.0 (2017-09-10)
79
-
80
- ### Added
81
-
82
- - New parsing option `resolveUndefinedEntity`. [#2]
83
- ([@retorquere](https://github.com/retorquere))
84
-
85
- [#2]:https://github.com/rgrove/parse-xml/pull/2
86
-
87
- ## 1.0.0 (2017-06-04)
88
-
89
- - Initial release.
@@ -1,434 +0,0 @@
1
- 'use strict';
2
-
3
- var emptyArray = Object.freeze([]);
4
- var emptyObject = Object.freeze(Object.create(null));
5
- var namedEntities = Object.freeze({
6
- '&amp;': '&',
7
- '&apos;': "'",
8
- '&gt;': '>',
9
- '&lt;': '<',
10
- '&quot;': '"'
11
- });
12
- var NODE_TYPE_CDATA = 'cdata';
13
- var NODE_TYPE_COMMENT = 'comment';
14
- var NODE_TYPE_DOCUMENT = 'document';
15
- var NODE_TYPE_ELEMENT = 'element';
16
- var NODE_TYPE_TEXT = 'text';
17
- var Syntax;
18
-
19
- module.exports = function parseXml(xml, options) {
20
- if (options === void 0) {
21
- options = emptyObject;
22
- }
23
-
24
- if (Syntax === void 0) {
25
- // Lazy require to defer regex parsing until first use.
26
- Syntax = require('./lib/syntax');
27
- }
28
-
29
- if (xml[0] === "\uFEFF") {
30
- // Strip byte order mark.
31
- xml = xml.slice(1);
32
- }
33
-
34
- xml = xml.replace(/\r\n?/g, '\n'); // Normalize CRLF and CR to LF.
35
-
36
- var doc = {
37
- type: NODE_TYPE_DOCUMENT,
38
- children: [],
39
- parent: null,
40
- toJSON: nodeToJson
41
- };
42
- var state = {
43
- length: xml.length,
44
- options: options,
45
- parent: doc,
46
- pos: 0,
47
- prevPos: 0,
48
- xml: xml
49
- };
50
- state.replaceReference = replaceReference.bind(state);
51
- consumeProlog(state);
52
-
53
- if (!consumeElement(state)) {
54
- error(state, 'Root element is missing or invalid');
55
- }
56
-
57
- while (consumeMisc(state)) {} // eslint-disable-line no-empty
58
-
59
-
60
- if (!isEof(state)) {
61
- error(state, "Extra content at the end of the document");
62
- }
63
-
64
- return doc;
65
- }; // -- Private Functions --------------------------------------------------------
66
-
67
-
68
- function addNode(state, node) {
69
- node.parent = state.parent;
70
- node.toJSON = nodeToJson;
71
- state.parent.children.push(node);
72
- }
73
-
74
- function addText(state, text) {
75
- var children = state.parent.children;
76
- var prevNode = children[children.length - 1];
77
-
78
- if (prevNode !== void 0 && prevNode.type === NODE_TYPE_TEXT) {
79
- // The previous node is a text node, so we can append to it and avoid
80
- // creating another node.
81
- prevNode.text += text;
82
- } else {
83
- addNode(state, {
84
- type: NODE_TYPE_TEXT,
85
- text: text
86
- });
87
- }
88
- } // Each `consume*` function takes the current state as an argument and returns
89
- // `true` if `state.pos` was advanced (meaning some XML was consumed) or `false`
90
- // if nothing was consumed.
91
-
92
-
93
- function consumeCDSect(state) {
94
- var _scan = scan(state, Syntax.Anchored.CDSect),
95
- match = _scan[0],
96
- text = _scan[1];
97
-
98
- if (match === void 0) {
99
- return false;
100
- }
101
-
102
- if (state.options.preserveCdata) {
103
- addNode(state, {
104
- type: NODE_TYPE_CDATA,
105
- text: text
106
- });
107
- } else {
108
- addText(state, text);
109
- }
110
-
111
- return true;
112
- }
113
-
114
- function consumeCharData(state) {
115
- var _scan2 = scan(state, Syntax.Anchored.CharData),
116
- text = _scan2[0];
117
-
118
- if (text === void 0) {
119
- return false;
120
- }
121
-
122
- var cdataCloseIndex = text.indexOf(']]>');
123
-
124
- if (cdataCloseIndex !== -1) {
125
- state.pos = state.prevPos + cdataCloseIndex;
126
- error(state, 'Element content may not contain the CDATA section close delimiter `]]>`');
127
- } // Note: XML 1.0 5th ed. says `CharData` is "any string of characters which
128
- // does not contain the start-delimiter of any markup and does not include the
129
- // CDATA-section-close delimiter", but the conformance test suite and
130
- // well-established parsers like libxml seem to restrict `CharData` to
131
- // characters that match the `Char` symbol, so that's what I've done here.
132
-
133
-
134
- if (!Syntax.CharOnly.test(text)) {
135
- state.pos = state.prevPos + text.search(new RegExp("(?!" + Syntax.Char.source + ")"));
136
- error(state, 'Element content contains an invalid character');
137
- }
138
-
139
- addText(state, text);
140
- return true;
141
- }
142
-
143
- function consumeComment(state) {
144
- var _scan3 = scan(state, Syntax.Anchored.Comment),
145
- content = _scan3[1];
146
-
147
- if (content === void 0) {
148
- return false;
149
- }
150
-
151
- if (state.options.preserveComments) {
152
- addNode(state, {
153
- type: NODE_TYPE_COMMENT,
154
- content: content.trim()
155
- });
156
- }
157
-
158
- return true;
159
- }
160
-
161
- function consumeDoctypeDecl(state) {
162
- return scan(state, Syntax.Anchored.doctypedecl).length > 0;
163
- }
164
-
165
- function consumeElement(state) {
166
- var _scan4 = scan(state, Syntax.Anchored.EmptyElemTag),
167
- tag = _scan4[0],
168
- name = _scan4[1],
169
- attrs = _scan4[2];
170
-
171
- var isEmpty = tag !== void 0;
172
-
173
- if (!isEmpty) {
174
- var _scan5 = scan(state, Syntax.Anchored.STag);
175
-
176
- tag = _scan5[0];
177
- name = _scan5[1];
178
- attrs = _scan5[2];
179
-
180
- if (tag === void 0) {
181
- return false;
182
- }
183
- }
184
-
185
- var parent = state.parent;
186
- var parsedAttrs = parseAttrs(state, attrs);
187
- var node = {
188
- type: NODE_TYPE_ELEMENT,
189
- name: name,
190
- attributes: parsedAttrs,
191
- children: []
192
- };
193
- var xmlSpace = parsedAttrs['xml:space'];
194
-
195
- if (xmlSpace === 'preserve' || xmlSpace !== 'default' && parent.preserveWhitespace) {
196
- node.preserveWhitespace = true;
197
- }
198
-
199
- if (!isEmpty) {
200
- state.parent = node;
201
- consumeCharData(state);
202
-
203
- while (consumeElement(state) || consumeReference(state) || consumeCDSect(state) || consumePI(state) || consumeComment(state)) {
204
- consumeCharData(state);
205
- }
206
-
207
- var _scan6 = scan(state, Syntax.Anchored.ETag),
208
- endName = _scan6[1];
209
-
210
- if (endName !== name) {
211
- state.pos = state.prevPos;
212
- error(state, "Missing end tag for element " + name);
213
- }
214
-
215
- state.parent = parent;
216
- }
217
-
218
- addNode(state, node);
219
- return true;
220
- }
221
-
222
- function consumeMisc(state) {
223
- return consumeComment(state) || consumePI(state) || consumeWhitespace(state);
224
- }
225
-
226
- function consumePI(state) {
227
- var _scan7 = scan(state, Syntax.Anchored.PI),
228
- match = _scan7[0],
229
- target = _scan7[1];
230
-
231
- if (match === void 0) {
232
- return false;
233
- }
234
-
235
- if (target.toLowerCase() === 'xml') {
236
- state.pos = state.prevPos;
237
- error(state, 'XML declaration is only allowed at the start of the document');
238
- }
239
-
240
- return true;
241
- }
242
-
243
- function consumeProlog(state) {
244
- var pos = state.pos;
245
- scan(state, Syntax.Anchored.XMLDecl);
246
-
247
- while (consumeMisc(state)) {} // eslint-disable-line no-empty
248
-
249
-
250
- if (consumeDoctypeDecl(state)) {
251
- while (consumeMisc(state)) {} // eslint-disable-line no-empty
252
-
253
- }
254
-
255
- return state.pos > pos;
256
- }
257
-
258
- function consumeReference(state) {
259
- var _scan8 = scan(state, Syntax.Anchored.Reference),
260
- ref = _scan8[0];
261
-
262
- if (ref === void 0) {
263
- return false;
264
- }
265
-
266
- addText(state, state.replaceReference(ref));
267
- return true;
268
- }
269
-
270
- function consumeWhitespace(state) {
271
- return scan(state, Syntax.Anchored.S).length > 0;
272
- }
273
-
274
- function error(state, message) {
275
- var pos = state.pos,
276
- xml = state.xml;
277
- var column = 1;
278
- var excerpt = '';
279
- var line = 1; // Find the line and column where the error occurred.
280
-
281
- for (var i = 0; i < pos; ++i) {
282
- var _char = xml[i];
283
-
284
- if (_char === '\n') {
285
- column = 1;
286
- excerpt = '';
287
- line += 1;
288
- } else {
289
- column += 1;
290
- excerpt += _char;
291
- }
292
- }
293
-
294
- var eol = xml.indexOf('\n', pos);
295
- excerpt += eol === -1 ? xml.slice(pos) : xml.slice(pos, eol);
296
- var excerptStart = 0; // Keep the excerpt below 50 chars, but always keep the error position in
297
- // view.
298
-
299
- if (excerpt.length > 50) {
300
- if (column < 40) {
301
- excerpt = excerpt.slice(0, 50);
302
- } else {
303
- excerptStart = column - 20;
304
- excerpt = excerpt.slice(excerptStart, column + 30);
305
- }
306
- }
307
-
308
- var err = new Error(message + " (line " + line + ", column " + column + ")\n" + (" " + excerpt + "\n") + ' '.repeat(column - excerptStart + 1) + '^\n');
309
- err.column = column;
310
- err.excerpt = excerpt;
311
- err.line = line;
312
- err.pos = pos;
313
- throw err;
314
- }
315
-
316
- function isEof(state) {
317
- return state.pos >= state.length - 1;
318
- }
319
-
320
- function nodeToJson() {
321
- var json = Object.assign(Object.create(null), this); // eslint-disable-line no-invalid-this
322
-
323
- delete json.parent;
324
- return json;
325
- }
326
-
327
- function normalizeAttrValue(state, value) {
328
- return value.replace(/[\x20\t\r\n]/g, ' ').replace(Syntax.Global.Reference, state.replaceReference);
329
- }
330
-
331
- function parseAttrs(state, attrs) {
332
- var parsedAttrs = Object.create(null);
333
-
334
- if (!attrs) {
335
- return parsedAttrs;
336
- }
337
-
338
- var attrPairs = attrs.match(Syntax.Global.Attribute).sort();
339
-
340
- for (var i = 0, len = attrPairs.length; i < len; ++i) {
341
- var attrPair = attrPairs[i];
342
- var eqMatch = attrPair.match(Syntax.Eq);
343
- var name = attrPair.slice(0, eqMatch.index);
344
- var value = attrPair.slice(eqMatch.index + eqMatch[0].length);
345
-
346
- if (name in parsedAttrs) {
347
- state.pos = state.prevPos;
348
- error(state, "Attribute `" + name + "` redefined");
349
- }
350
-
351
- value = normalizeAttrValue(state, value.slice(1, -1));
352
-
353
- if (name === 'xml:space') {
354
- if (value !== 'default' && value !== 'preserve') {
355
- state.pos = state.prevPos;
356
- error(state, "Value of the `xml:space` attribute must be \"default\" or \"preserve\"");
357
- }
358
- }
359
-
360
- parsedAttrs[name] = value;
361
- }
362
-
363
- return parsedAttrs;
364
- }
365
-
366
- function replaceReference(ref) {
367
- var state = this; // eslint-disable-line no-invalid-this
368
-
369
- if (ref[ref.length - 1] !== ';') {
370
- error(state, "Invalid reference: `" + ref + "`");
371
- }
372
-
373
- if (ref[1] === '#') {
374
- // This is a character entity.
375
- var codePoint;
376
-
377
- if (ref[2] === 'x') {
378
- codePoint = parseInt(ref.slice(3, -1), 16);
379
- } else {
380
- codePoint = parseInt(ref.slice(2, -1), 10);
381
- }
382
-
383
- if (isNaN(codePoint)) {
384
- state.pos = state.prevPos;
385
- error(state, "Invalid character entity `" + ref + "`");
386
- }
387
-
388
- var _char2 = String.fromCodePoint(codePoint);
389
-
390
- if (!Syntax.Char.test(_char2)) {
391
- state.pos = state.prevPos;
392
- error(state, "Invalid character entity `" + ref + "`");
393
- }
394
-
395
- return _char2;
396
- } // This is a named entity.
397
-
398
-
399
- var value = namedEntities[ref];
400
-
401
- if (value !== void 0) {
402
- return value;
403
- }
404
-
405
- if (state.options.resolveUndefinedEntity) {
406
- var resolvedValue = state.options.resolveUndefinedEntity(ref);
407
-
408
- if (resolvedValue !== null && resolvedValue !== void 0) {
409
- return resolvedValue;
410
- }
411
- }
412
-
413
- if (state.options.ignoreUndefinedEntities) {
414
- return ref;
415
- }
416
-
417
- state.pos = state.prevPos;
418
- error(state, "Named entity isn't defined: `" + ref + "`");
419
- }
420
-
421
- function scan(state, regex) {
422
- var pos = state.pos,
423
- xml = state.xml;
424
- var xmlToScan = pos > 0 ? xml.slice(pos) : xml;
425
- var matches = xmlToScan.match(regex);
426
-
427
- if (matches === null) {
428
- return emptyArray;
429
- }
430
-
431
- state.prevPos = state.pos;
432
- state.pos += matches[0].length;
433
- return matches;
434
- }