@xmldom/xmldom 0.8.2 → 0.9.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -8
- package/index.d.ts +42 -40
- package/lib/conventions.js +183 -9
- package/lib/dom-parser.js +237 -52
- package/lib/dom.js +276 -89
- package/lib/entities.js +2 -0
- package/lib/index.js +2 -0
- package/lib/sax.js +49 -28
- package/package.json +6 -6
package/lib/dom-parser.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
1
3
|
var conventions = require("./conventions");
|
|
2
4
|
var dom = require('./dom')
|
|
3
5
|
var entities = require('./entities');
|
|
@@ -5,13 +7,14 @@ var sax = require('./sax');
|
|
|
5
7
|
|
|
6
8
|
var DOMImplementation = dom.DOMImplementation;
|
|
7
9
|
|
|
10
|
+
var MIME_TYPE = conventions.MIME_TYPE;
|
|
8
11
|
var NAMESPACE = conventions.NAMESPACE;
|
|
9
12
|
|
|
10
13
|
var ParseError = sax.ParseError;
|
|
11
14
|
var XMLReader = sax.XMLReader;
|
|
12
15
|
|
|
13
16
|
/**
|
|
14
|
-
* Normalizes line ending according to https://www.w3.org/TR/xml11/#sec-line-ends
|
|
17
|
+
* Normalizes line ending according to <https://www.w3.org/TR/xml11/#sec-line-ends>:
|
|
15
18
|
*
|
|
16
19
|
* > XML parsed entities are often stored in computer files which,
|
|
17
20
|
* > for editing convenience, are organized into lines.
|
|
@@ -45,12 +48,26 @@ function normalizeLineEndings(input) {
|
|
|
45
48
|
|
|
46
49
|
/**
|
|
47
50
|
* @typedef DOMParserOptions
|
|
48
|
-
* @property {
|
|
51
|
+
* @property {typeof conventions.assign} [assign=Object.assign || conventions.assign]
|
|
52
|
+
* The method to use instead of `Object.assign` (or if not available `conventions.assign`),
|
|
53
|
+
* which is used to copy values from the options before they are used for parsing.
|
|
54
|
+
* @property {typeof DOMHandler} [domHandler]
|
|
55
|
+
* For internal testing: The class for creating an instance for handling events from the SAX parser.
|
|
56
|
+
* Warning: By configuring a faulty implementation, the specified behavior can completely be broken.
|
|
49
57
|
* @property {Function} [errorHandler]
|
|
50
|
-
* @property {
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
58
|
+
* @property {boolean} [locator=true]
|
|
59
|
+
* Configures if the nodes created during parsing
|
|
60
|
+
* will have a `lineNumber` and a `columnNumber` attribute
|
|
61
|
+
* describing their location in the XML string.
|
|
62
|
+
* Default is true.
|
|
63
|
+
* @property {(string) => string} [normalizeLineEndings]
|
|
64
|
+
* used to replace line endings before parsing, defaults to `normalizeLineEndings`
|
|
65
|
+
* @property {object} [xmlns]
|
|
66
|
+
* The XML namespaces that should be assumed when parsing.
|
|
67
|
+
* The default namespace can be provided by the key that is the empty string.
|
|
68
|
+
* When the `mimeType` for HTML, XHTML or SVG are passed to `parseFromString`,
|
|
69
|
+
* the default namespace that will be used,
|
|
70
|
+
* will be overridden according to the specification.
|
|
54
71
|
*
|
|
55
72
|
* @see normalizeLineEndings
|
|
56
73
|
*/
|
|
@@ -60,7 +77,7 @@ function normalizeLineEndings(input) {
|
|
|
60
77
|
* from a string into a DOM `Document`.
|
|
61
78
|
*
|
|
62
79
|
* _xmldom is different from the spec in that it allows an `options` parameter,
|
|
63
|
-
* to
|
|
80
|
+
* to control the behavior._
|
|
64
81
|
*
|
|
65
82
|
* @param {DOMParserOptions} [options]
|
|
66
83
|
* @constructor
|
|
@@ -69,39 +86,125 @@ function normalizeLineEndings(input) {
|
|
|
69
86
|
* @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
|
|
70
87
|
*/
|
|
71
88
|
function DOMParser(options){
|
|
72
|
-
|
|
89
|
+
|
|
90
|
+
options = options || {locator:true};
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* The method to use instead of `Object.assign` (or if not available `conventions.assign`),
|
|
94
|
+
* which is used to copy values from the options before they are used for parsing.
|
|
95
|
+
*
|
|
96
|
+
* @type {function (target: object, source: object | null | undefined): object}
|
|
97
|
+
* @readonly
|
|
98
|
+
* @private
|
|
99
|
+
* @see conventions.assign
|
|
100
|
+
*/
|
|
101
|
+
this.assign = options.assign || Object.assign || conventions.assign
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* For internal testing: The class for creating an instance for handling events from the SAX parser.
|
|
105
|
+
* __**Warning: By configuring a faulty implementation, the specified behavior can completely be broken.**__
|
|
106
|
+
*
|
|
107
|
+
* @type {typeof DOMHandler}
|
|
108
|
+
* @readonly
|
|
109
|
+
* @private
|
|
110
|
+
*/
|
|
111
|
+
this.domHandler = options.domHandler || DOMHandler
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* A function that can be invoked as the errorHandler instead of the default ones.
|
|
115
|
+
* @type {Function | undefined}
|
|
116
|
+
* @readonly
|
|
117
|
+
*/
|
|
118
|
+
this.errorHandler = options.errorHandler;
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* used to replace line endings before parsing, defaults to `normalizeLineEndings`
|
|
122
|
+
*
|
|
123
|
+
* @type {(string) => string}
|
|
124
|
+
* @readonly
|
|
125
|
+
*/
|
|
126
|
+
this.normalizeLineEndings = options.normalizeLineEndings || normalizeLineEndings
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Configures if the nodes created during parsing
|
|
130
|
+
* will have a `lineNumber` and a `columnNumber` attribute
|
|
131
|
+
* describing their location in the XML string.
|
|
132
|
+
* Default is true.
|
|
133
|
+
* @type {boolean}
|
|
134
|
+
* @readonly
|
|
135
|
+
*/
|
|
136
|
+
this.locator = !!options.locator
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* The default namespace can be provided by the key that is the empty string.
|
|
140
|
+
* When the `mimeType` for HTML, XHTML or SVG are passed to `parseFromString`,
|
|
141
|
+
* the default namespace that will be used,
|
|
142
|
+
* will be overridden according to the specification.
|
|
143
|
+
* @type {Readonly<object>}
|
|
144
|
+
* @readonly
|
|
145
|
+
*/
|
|
146
|
+
this.xmlns = options.xmlns || {}
|
|
73
147
|
}
|
|
74
148
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
149
|
+
/**
|
|
150
|
+
* Parses `source` using the options in the way configured by the `DOMParserOptions` of `this` `DOMParser`.
|
|
151
|
+
* If `mimeType` is `text/html` an HTML `Document` is created, otherwise an XML `Document` is created.
|
|
152
|
+
*
|
|
153
|
+
* __It behaves very different from the description in the living standard__:
|
|
154
|
+
* - Only allows the first argument to be a string (calls `error` handler otherwise.)
|
|
155
|
+
* - The second parameter is optional (defaults to `application/xml`) and can be any string,
|
|
156
|
+
* no `TypeError` will be thrown for values not listed in the spec.
|
|
157
|
+
* - Uses the `options` passed to the `DOMParser` constructor to modify the behavior/implementation.
|
|
158
|
+
* - Instead of creating a Document containing the error message,
|
|
159
|
+
* it triggers `errorHandler`(s) when unexpected input is found, which means it can return `undefined`.
|
|
160
|
+
* All error handlers can throw an `Error`, by default only the `fatalError` handler throws (a `ParserError`).
|
|
161
|
+
* - All errors thrown during the parsing that are not a `ParserError` are caught and reported using the `error` handler.
|
|
162
|
+
* - If no `ParserError` is thrown, this method returns the `DOMHandler.doc`,
|
|
163
|
+
* which most likely is the `Document` that has been created during parsing, or `undefined`.
|
|
164
|
+
* __**Warning: By configuring a faulty DOMHandler implementation,
|
|
165
|
+
* the specified behavior can completely be broken.**__
|
|
166
|
+
*
|
|
167
|
+
* @param {string} source Only string input is possible!
|
|
168
|
+
* @param {string} [mimeType='application/xml']
|
|
169
|
+
* the mimeType or contentType of the document to be created
|
|
170
|
+
* determines the `type` of document created (XML or HTML)
|
|
171
|
+
* @returns {Document | undefined}
|
|
172
|
+
* @throws ParseError for specific errors depending on the configured `errorHandler`s and/or `domBuilder`
|
|
173
|
+
*
|
|
174
|
+
* @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser/parseFromString
|
|
175
|
+
* @see https://html.spec.whatwg.org/#dom-domparser-parsefromstring-dev
|
|
176
|
+
*/
|
|
177
|
+
DOMParser.prototype.parseFromString = function (source, mimeType) {
|
|
178
|
+
var defaultNSMap = this.assign({}, this.xmlns)
|
|
179
|
+
var entityMap = entities.XML_ENTITIES
|
|
180
|
+
var defaultNamespace = defaultNSMap[''] || null
|
|
181
|
+
if (MIME_TYPE.hasDefaultHTMLNamespace(mimeType)) {
|
|
182
|
+
entityMap = entities.HTML_ENTITIES
|
|
183
|
+
defaultNamespace = NAMESPACE.HTML
|
|
184
|
+
} else if (mimeType === MIME_TYPE.XML_SVG_IMAGE) {
|
|
185
|
+
defaultNamespace = NAMESPACE.SVG
|
|
86
186
|
}
|
|
187
|
+
defaultNSMap[''] = defaultNamespace
|
|
188
|
+
defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML
|
|
87
189
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
190
|
+
var domBuilder = new this.domHandler({
|
|
191
|
+
mimeType: mimeType,
|
|
192
|
+
defaultNamespace: defaultNamespace,
|
|
193
|
+
})
|
|
194
|
+
var locator = this.locator ? {} : undefined;
|
|
195
|
+
if (this.locator) {
|
|
196
|
+
domBuilder.setDocumentLocator(locator)
|
|
92
197
|
}
|
|
93
|
-
|
|
94
|
-
var
|
|
198
|
+
|
|
199
|
+
var sax = new XMLReader()
|
|
200
|
+
sax.errorHandler = buildErrorHandler(this.errorHandler, domBuilder, locator)
|
|
201
|
+
sax.domBuilder = domBuilder
|
|
95
202
|
if (source && typeof source === 'string') {
|
|
96
|
-
sax.parse(
|
|
97
|
-
normalize(source),
|
|
98
|
-
defaultNSMap,
|
|
99
|
-
entityMap
|
|
100
|
-
)
|
|
203
|
+
sax.parse(this.normalizeLineEndings(source), defaultNSMap, entityMap)
|
|
101
204
|
} else {
|
|
102
205
|
sax.errorHandler.error('invalid doc source')
|
|
103
206
|
}
|
|
104
|
-
return domBuilder.doc
|
|
207
|
+
return domBuilder.doc
|
|
105
208
|
}
|
|
106
209
|
function buildErrorHandler(errorImpl,domBuilder,locator){
|
|
107
210
|
if(!errorImpl){
|
|
@@ -128,33 +231,108 @@ function buildErrorHandler(errorImpl,domBuilder,locator){
|
|
|
128
231
|
return errorHandler;
|
|
129
232
|
}
|
|
130
233
|
|
|
131
|
-
//console.log('#\n\n\n\n\n\n\n####')
|
|
132
234
|
/**
|
|
133
|
-
*
|
|
134
|
-
*
|
|
135
|
-
*
|
|
235
|
+
* @typedef DOMHandlerOptions
|
|
236
|
+
* @property {string} [mimeType=MIME_TYPE.XML_APPLICATION]
|
|
237
|
+
* @property {string|null} [defaultNamespace=null]
|
|
238
|
+
*/
|
|
239
|
+
/**
|
|
240
|
+
* The class that is used to handle events from the SAX parser to create the related DOM elements.
|
|
241
|
+
*
|
|
242
|
+
* Some methods are only implemented as an empty function,
|
|
243
|
+
* since they are (at least currently) not relevant for xmldom.
|
|
136
244
|
*
|
|
137
|
-
*
|
|
138
|
-
*
|
|
139
|
-
* @
|
|
245
|
+
* @constructor
|
|
246
|
+
* @param {DOMHandlerOptions} [options]
|
|
247
|
+
* @see http://www.saxproject.org/apidoc/org/xml/sax/ext/DefaultHandler2.html
|
|
140
248
|
*/
|
|
141
|
-
function DOMHandler() {
|
|
142
|
-
|
|
249
|
+
function DOMHandler(options) {
|
|
250
|
+
var opt = options || {}
|
|
251
|
+
/**
|
|
252
|
+
* The mime type is used to determine if the DOM handler will create an XML or HTML document.
|
|
253
|
+
* Only if it is set to `text/html` it will create an HTML document.
|
|
254
|
+
* It defaults to MIME_TYPE.XML_APPLICATION.
|
|
255
|
+
*
|
|
256
|
+
* @type {string}
|
|
257
|
+
* @readonly
|
|
258
|
+
* @see MIME_TYPE
|
|
259
|
+
*/
|
|
260
|
+
this.mimeType = opt.mimeType || MIME_TYPE.XML_APPLICATION
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* The namespace to use to create an XML document.
|
|
264
|
+
* For the following reasons this is required:
|
|
265
|
+
* - The SAX API for `startDocument` doesn't offer any way to pass a namespace,
|
|
266
|
+
* since at that point there is no way for the parser to know what the default namespace from the document will be.
|
|
267
|
+
* - When creating using `DOMImplementation.createDocument` it is required to pass a namespace,
|
|
268
|
+
* to determine the correct `Document.contentType`, which should match `this.mimeType`.
|
|
269
|
+
* - When parsing an XML document with the `application/xhtml+xml` mimeType,
|
|
270
|
+
* the HTML namespace needs to be the default namespace.
|
|
271
|
+
*
|
|
272
|
+
* @type {string|null}
|
|
273
|
+
* @readonly
|
|
274
|
+
* @private
|
|
275
|
+
*/
|
|
276
|
+
this.defaultNamespace = opt.defaultNamespace || null
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* @private
|
|
280
|
+
* @type {boolean}
|
|
281
|
+
*/
|
|
282
|
+
this.cdata = false
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* The last `Element` that was created by `startElement`.
|
|
287
|
+
* `endElement` sets it to the `currentElement.parentNode`.
|
|
288
|
+
*
|
|
289
|
+
* Note: The sax parser currently sets it to white space text nodes between tags.
|
|
290
|
+
*
|
|
291
|
+
* @type {Element | Node | undefined}
|
|
292
|
+
* @private
|
|
293
|
+
*/
|
|
294
|
+
this.currentElement = undefined
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* The Document that is created as part of `startDocument`,
|
|
298
|
+
* and returned by `DOMParser.parseFromString`.
|
|
299
|
+
*
|
|
300
|
+
* @type {Document | undefined}
|
|
301
|
+
* @readonly
|
|
302
|
+
*/
|
|
303
|
+
this.doc = undefined
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* The locator is stored as part of setDocumentLocator.
|
|
307
|
+
* It is controlled and mutated by the SAX parser
|
|
308
|
+
* to store the current parsing position.
|
|
309
|
+
* It is used by DOMHandler to set `columnNumber` and `lineNumber`
|
|
310
|
+
* on the DOM nodes.
|
|
311
|
+
*
|
|
312
|
+
* @type {Readonly<Locator> | undefined}
|
|
313
|
+
* @readonly (the sax parser currently sometimes set's it)
|
|
314
|
+
* @private
|
|
315
|
+
*/
|
|
316
|
+
this.locator = undefined
|
|
143
317
|
}
|
|
144
318
|
function position(locator,node){
|
|
145
319
|
node.lineNumber = locator.lineNumber;
|
|
146
320
|
node.columnNumber = locator.columnNumber;
|
|
147
321
|
}
|
|
148
|
-
/**
|
|
149
|
-
* @see org.xml.sax.ContentHandler#startDocument
|
|
150
|
-
* @link http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
|
|
151
|
-
*/
|
|
152
322
|
DOMHandler.prototype = {
|
|
323
|
+
/**
|
|
324
|
+
* Either creates an XML or an HTML document and stores it under `this.doc`.
|
|
325
|
+
* If it is an XML document, `this.defaultNamespace` is used to create it,
|
|
326
|
+
* and it will not contain any `childNodes`.
|
|
327
|
+
* If it is an HTML document, it will be created without any `childNodes`.
|
|
328
|
+
*
|
|
329
|
+
* @see http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
|
|
330
|
+
*/
|
|
153
331
|
startDocument : function() {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
332
|
+
var impl = new DOMImplementation()
|
|
333
|
+
this.doc = MIME_TYPE.isHTML(this.mimeType)
|
|
334
|
+
? impl.createHTMLDocument(false)
|
|
335
|
+
: impl.createDocument(this.defaultNamespace, '')
|
|
158
336
|
},
|
|
159
337
|
startElement:function(namespaceURI, localName, qName, attrs) {
|
|
160
338
|
var doc = this.doc;
|
|
@@ -213,10 +391,17 @@ DOMHandler.prototype = {
|
|
|
213
391
|
endDocument:function() {
|
|
214
392
|
this.doc.normalize();
|
|
215
393
|
},
|
|
394
|
+
/**
|
|
395
|
+
* Stores the locator to be able to set the `columnNumber` and `lineNumber`
|
|
396
|
+
* on the created DOM nodes.
|
|
397
|
+
*
|
|
398
|
+
* @param {Locator} locator
|
|
399
|
+
*/
|
|
216
400
|
setDocumentLocator:function (locator) {
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
401
|
+
if (locator) {
|
|
402
|
+
locator.lineNumber = 0
|
|
403
|
+
}
|
|
404
|
+
this.locator = locator
|
|
220
405
|
},
|
|
221
406
|
//LexicalHandler
|
|
222
407
|
comment:function(chars, start, length) {
|
|
@@ -259,7 +444,7 @@ DOMHandler.prototype = {
|
|
|
259
444
|
}
|
|
260
445
|
function _locator(l){
|
|
261
446
|
if(l){
|
|
262
|
-
return '\n
|
|
447
|
+
return '\n@#[line:'+l.lineNumber+',col:'+l.columnNumber+']'
|
|
263
448
|
}
|
|
264
449
|
}
|
|
265
450
|
function _toString(chars,start,length){
|