@xmldom/xmldom 0.8.2 → 0.9.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/dom-parser.js CHANGED
@@ -1,3 +1,5 @@
1
+ 'use strict'
2
+
1
3
  var conventions = require("./conventions");
2
4
  var dom = require('./dom')
3
5
  var entities = require('./entities');
@@ -5,13 +7,14 @@ var sax = require('./sax');
5
7
 
6
8
  var DOMImplementation = dom.DOMImplementation;
7
9
 
10
+ var MIME_TYPE = conventions.MIME_TYPE;
8
11
  var NAMESPACE = conventions.NAMESPACE;
9
12
 
10
13
  var ParseError = sax.ParseError;
11
14
  var XMLReader = sax.XMLReader;
12
15
 
13
16
  /**
14
- * Normalizes line ending according to https://www.w3.org/TR/xml11/#sec-line-ends:
17
+ * Normalizes line ending according to <https://www.w3.org/TR/xml11/#sec-line-ends>:
15
18
  *
16
19
  * > XML parsed entities are often stored in computer files which,
17
20
  * > for editing convenience, are organized into lines.
@@ -45,12 +48,26 @@ function normalizeLineEndings(input) {
45
48
 
46
49
  /**
47
50
  * @typedef DOMParserOptions
48
- * @property {DOMHandler} [domBuilder]
51
+ * @property {typeof conventions.assign} [assign=Object.assign || conventions.assign]
52
+ * The method to use instead of `Object.assign` (or if not available `conventions.assign`),
53
+ * which is used to copy values from the options before they are used for parsing.
54
+ * @property {typeof DOMHandler} [domHandler]
55
+ * For internal testing: The class for creating an instance for handling events from the SAX parser.
56
+ * Warning: By configuring a faulty implementation, the specified behavior can completely be broken.
49
57
  * @property {Function} [errorHandler]
50
- * @property {(string) => string} [normalizeLineEndings] used to replace line endings before parsing
51
- * defaults to `normalizeLineEndings`
52
- * @property {Locator} [locator]
53
- * @property {Record<string, string>} [xmlns]
58
+ * @property {boolean} [locator=true]
59
+ * Configures if the nodes created during parsing
60
+ * will have a `lineNumber` and a `columnNumber` attribute
61
+ * describing their location in the XML string.
62
+ * Default is true.
63
+ * @property {(string) => string} [normalizeLineEndings]
64
+ * used to replace line endings before parsing, defaults to `normalizeLineEndings`
65
+ * @property {object} [xmlns]
66
+ * The XML namespaces that should be assumed when parsing.
67
+ * The default namespace can be provided by the key that is the empty string.
68
+ * When the `mimeType` for HTML, XHTML or SVG are passed to `parseFromString`,
69
+ * the default namespace that will be used,
70
+ * will be overridden according to the specification.
54
71
  *
55
72
  * @see normalizeLineEndings
56
73
  */
@@ -60,7 +77,7 @@ function normalizeLineEndings(input) {
60
77
  * from a string into a DOM `Document`.
61
78
  *
62
79
  * _xmldom is different from the spec in that it allows an `options` parameter,
63
- * to override the default behavior._
80
+ * to control the behavior._
64
81
  *
65
82
  * @param {DOMParserOptions} [options]
66
83
  * @constructor
@@ -69,39 +86,125 @@ function normalizeLineEndings(input) {
69
86
  * @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
70
87
  */
71
88
  function DOMParser(options){
72
- this.options = options ||{locator:{}};
89
+
90
+ options = options || {locator:true};
91
+
92
+ /**
93
+ * The method to use instead of `Object.assign` (or if not available `conventions.assign`),
94
+ * which is used to copy values from the options before they are used for parsing.
95
+ *
96
+ * @type {function (target: object, source: object | null | undefined): object}
97
+ * @readonly
98
+ * @private
99
+ * @see conventions.assign
100
+ */
101
+ this.assign = options.assign || Object.assign || conventions.assign
102
+
103
+ /**
104
+ * For internal testing: The class for creating an instance for handling events from the SAX parser.
105
+ * __**Warning: By configuring a faulty implementation, the specified behavior can completely be broken.**__
106
+ *
107
+ * @type {typeof DOMHandler}
108
+ * @readonly
109
+ * @private
110
+ */
111
+ this.domHandler = options.domHandler || DOMHandler
112
+
113
+ /**
114
+ * A function that can be invoked as the errorHandler instead of the default ones.
115
+ * @type {Function | undefined}
116
+ * @readonly
117
+ */
118
+ this.errorHandler = options.errorHandler;
119
+
120
+ /**
121
+ * used to replace line endings before parsing, defaults to `normalizeLineEndings`
122
+ *
123
+ * @type {(string) => string}
124
+ * @readonly
125
+ */
126
+ this.normalizeLineEndings = options.normalizeLineEndings || normalizeLineEndings
127
+
128
+ /**
129
+ * Configures if the nodes created during parsing
130
+ * will have a `lineNumber` and a `columnNumber` attribute
131
+ * describing their location in the XML string.
132
+ * Default is true.
133
+ * @type {boolean}
134
+ * @readonly
135
+ */
136
+ this.locator = !!options.locator
137
+
138
+ /**
139
+ * The default namespace can be provided by the key that is the empty string.
140
+ * When the `mimeType` for HTML, XHTML or SVG are passed to `parseFromString`,
141
+ * the default namespace that will be used,
142
+ * will be overridden according to the specification.
143
+ * @type {Readonly<object>}
144
+ * @readonly
145
+ */
146
+ this.xmlns = options.xmlns || {}
73
147
  }
74
148
 
75
- DOMParser.prototype.parseFromString = function(source,mimeType){
76
- var options = this.options;
77
- var sax = new XMLReader();
78
- var domBuilder = options.domBuilder || new DOMHandler();//contentHandler and LexicalHandler
79
- var errorHandler = options.errorHandler;
80
- var locator = options.locator;
81
- var defaultNSMap = options.xmlns||{};
82
- var isHTML = /\/x?html?$/.test(mimeType);//mimeType.toLowerCase().indexOf('html') > -1;
83
- var entityMap = isHTML ? entities.HTML_ENTITIES : entities.XML_ENTITIES;
84
- if(locator){
85
- domBuilder.setDocumentLocator(locator)
149
+ /**
150
+ * Parses `source` using the options in the way configured by the `DOMParserOptions` of `this` `DOMParser`.
151
+ * If `mimeType` is `text/html` an HTML `Document` is created, otherwise an XML `Document` is created.
152
+ *
153
+ * __It behaves very different from the description in the living standard__:
154
+ * - Only allows the first argument to be a string (calls `error` handler otherwise.)
155
+ * - The second parameter is optional (defaults to `application/xml`) and can be any string,
156
+ * no `TypeError` will be thrown for values not listed in the spec.
157
+ * - Uses the `options` passed to the `DOMParser` constructor to modify the behavior/implementation.
158
+ * - Instead of creating a Document containing the error message,
159
+ * it triggers `errorHandler`(s) when unexpected input is found, which means it can return `undefined`.
160
+ * All error handlers can throw an `Error`, by default only the `fatalError` handler throws (a `ParserError`).
161
+ * - All errors thrown during the parsing that are not a `ParserError` are caught and reported using the `error` handler.
162
+ * - If no `ParserError` is thrown, this method returns the `DOMHandler.doc`,
163
+ * which most likely is the `Document` that has been created during parsing, or `undefined`.
164
+ * __**Warning: By configuring a faulty DOMHandler implementation,
165
+ * the specified behavior can completely be broken.**__
166
+ *
167
+ * @param {string} source Only string input is possible!
168
+ * @param {string} [mimeType='application/xml']
169
+ * the mimeType or contentType of the document to be created
170
+ * determines the `type` of document created (XML or HTML)
171
+ * @returns {Document | undefined}
172
+ * @throws ParseError for specific errors depending on the configured `errorHandler`s and/or `domBuilder`
173
+ *
174
+ * @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser/parseFromString
175
+ * @see https://html.spec.whatwg.org/#dom-domparser-parsefromstring-dev
176
+ */
177
+ DOMParser.prototype.parseFromString = function (source, mimeType) {
178
+ var defaultNSMap = this.assign({}, this.xmlns)
179
+ var entityMap = entities.XML_ENTITIES
180
+ var defaultNamespace = defaultNSMap[''] || null
181
+ if (MIME_TYPE.hasDefaultHTMLNamespace(mimeType)) {
182
+ entityMap = entities.HTML_ENTITIES
183
+ defaultNamespace = NAMESPACE.HTML
184
+ } else if (mimeType === MIME_TYPE.XML_SVG_IMAGE) {
185
+ defaultNamespace = NAMESPACE.SVG
86
186
  }
187
+ defaultNSMap[''] = defaultNamespace
188
+ defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML
87
189
 
88
- sax.errorHandler = buildErrorHandler(errorHandler,domBuilder,locator);
89
- sax.domBuilder = options.domBuilder || domBuilder;
90
- if(isHTML){
91
- defaultNSMap[''] = NAMESPACE.HTML;
190
+ var domBuilder = new this.domHandler({
191
+ mimeType: mimeType,
192
+ defaultNamespace: defaultNamespace,
193
+ })
194
+ var locator = this.locator ? {} : undefined;
195
+ if (this.locator) {
196
+ domBuilder.setDocumentLocator(locator)
92
197
  }
93
- defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML;
94
- var normalize = options.normalizeLineEndings || normalizeLineEndings;
198
+
199
+ var sax = new XMLReader()
200
+ sax.errorHandler = buildErrorHandler(this.errorHandler, domBuilder, locator)
201
+ sax.domBuilder = domBuilder
95
202
  if (source && typeof source === 'string') {
96
- sax.parse(
97
- normalize(source),
98
- defaultNSMap,
99
- entityMap
100
- )
203
+ sax.parse(this.normalizeLineEndings(source), defaultNSMap, entityMap)
101
204
  } else {
102
205
  sax.errorHandler.error('invalid doc source')
103
206
  }
104
- return domBuilder.doc;
207
+ return domBuilder.doc
105
208
  }
106
209
  function buildErrorHandler(errorImpl,domBuilder,locator){
107
210
  if(!errorImpl){
@@ -128,33 +231,108 @@ function buildErrorHandler(errorImpl,domBuilder,locator){
128
231
  return errorHandler;
129
232
  }
130
233
 
131
- //console.log('#\n\n\n\n\n\n\n####')
132
234
  /**
133
- * +ContentHandler+ErrorHandler
134
- * +LexicalHandler+EntityResolver2
135
- * -DeclHandler-DTDHandler
235
+ * @typedef DOMHandlerOptions
236
+ * @property {string} [mimeType=MIME_TYPE.XML_APPLICATION]
237
+ * @property {string|null} [defaultNamespace=null]
238
+ */
239
+ /**
240
+ * The class that is used to handle events from the SAX parser to create the related DOM elements.
241
+ *
242
+ * Some methods are only implemented as an empty function,
243
+ * since they are (at least currently) not relevant for xmldom.
136
244
  *
137
- * DefaultHandler:EntityResolver, DTDHandler, ContentHandler, ErrorHandler
138
- * DefaultHandler2:DefaultHandler,LexicalHandler, DeclHandler, EntityResolver2
139
- * @link http://www.saxproject.org/apidoc/org/xml/sax/helpers/DefaultHandler.html
245
+ * @constructor
246
+ * @param {DOMHandlerOptions} [options]
247
+ * @see http://www.saxproject.org/apidoc/org/xml/sax/ext/DefaultHandler2.html
140
248
  */
141
- function DOMHandler() {
142
- this.cdata = false;
249
+ function DOMHandler(options) {
250
+ var opt = options || {}
251
+ /**
252
+ * The mime type is used to determine if the DOM handler will create an XML or HTML document.
253
+ * Only if it is set to `text/html` it will create an HTML document.
254
+ * It defaults to MIME_TYPE.XML_APPLICATION.
255
+ *
256
+ * @type {string}
257
+ * @readonly
258
+ * @see MIME_TYPE
259
+ */
260
+ this.mimeType = opt.mimeType || MIME_TYPE.XML_APPLICATION
261
+
262
+ /**
263
+ * The namespace to use to create an XML document.
264
+ * For the following reasons this is required:
265
+ * - The SAX API for `startDocument` doesn't offer any way to pass a namespace,
266
+ * since at that point there is no way for the parser to know what the default namespace from the document will be.
267
+ * - When creating using `DOMImplementation.createDocument` it is required to pass a namespace,
268
+ * to determine the correct `Document.contentType`, which should match `this.mimeType`.
269
+ * - When parsing an XML document with the `application/xhtml+xml` mimeType,
270
+ * the HTML namespace needs to be the default namespace.
271
+ *
272
+ * @type {string|null}
273
+ * @readonly
274
+ * @private
275
+ */
276
+ this.defaultNamespace = opt.defaultNamespace || null
277
+
278
+ /**
279
+ * @private
280
+ * @type {boolean}
281
+ */
282
+ this.cdata = false
283
+
284
+
285
+ /**
286
+ * The last `Element` that was created by `startElement`.
287
+ * `endElement` sets it to the `currentElement.parentNode`.
288
+ *
289
+ * Note: The sax parser currently sets it to white space text nodes between tags.
290
+ *
291
+ * @type {Element | Node | undefined}
292
+ * @private
293
+ */
294
+ this.currentElement = undefined
295
+
296
+ /**
297
+ * The Document that is created as part of `startDocument`,
298
+ * and returned by `DOMParser.parseFromString`.
299
+ *
300
+ * @type {Document | undefined}
301
+ * @readonly
302
+ */
303
+ this.doc = undefined
304
+
305
+ /**
306
+ * The locator is stored as part of setDocumentLocator.
307
+ * It is controlled and mutated by the SAX parser
308
+ * to store the current parsing position.
309
+ * It is used by DOMHandler to set `columnNumber` and `lineNumber`
310
+ * on the DOM nodes.
311
+ *
312
+ * @type {Readonly<Locator> | undefined}
313
+ * @readonly (the sax parser currently sometimes set's it)
314
+ * @private
315
+ */
316
+ this.locator = undefined
143
317
  }
144
318
  function position(locator,node){
145
319
  node.lineNumber = locator.lineNumber;
146
320
  node.columnNumber = locator.columnNumber;
147
321
  }
148
- /**
149
- * @see org.xml.sax.ContentHandler#startDocument
150
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
151
- */
152
322
  DOMHandler.prototype = {
323
+ /**
324
+ * Either creates an XML or an HTML document and stores it under `this.doc`.
325
+ * If it is an XML document, `this.defaultNamespace` is used to create it,
326
+ * and it will not contain any `childNodes`.
327
+ * If it is an HTML document, it will be created without any `childNodes`.
328
+ *
329
+ * @see http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
330
+ */
153
331
  startDocument : function() {
154
- this.doc = new DOMImplementation().createDocument(null, null, null);
155
- if (this.locator) {
156
- this.doc.documentURI = this.locator.systemId;
157
- }
332
+ var impl = new DOMImplementation()
333
+ this.doc = MIME_TYPE.isHTML(this.mimeType)
334
+ ? impl.createHTMLDocument(false)
335
+ : impl.createDocument(this.defaultNamespace, '')
158
336
  },
159
337
  startElement:function(namespaceURI, localName, qName, attrs) {
160
338
  var doc = this.doc;
@@ -213,10 +391,17 @@ DOMHandler.prototype = {
213
391
  endDocument:function() {
214
392
  this.doc.normalize();
215
393
  },
394
+ /**
395
+ * Stores the locator to be able to set the `columnNumber` and `lineNumber`
396
+ * on the created DOM nodes.
397
+ *
398
+ * @param {Locator} locator
399
+ */
216
400
  setDocumentLocator:function (locator) {
217
- if(this.locator = locator){// && !('lineNumber' in locator)){
218
- locator.lineNumber = 0;
219
- }
401
+ if (locator) {
402
+ locator.lineNumber = 0
403
+ }
404
+ this.locator = locator
220
405
  },
221
406
  //LexicalHandler
222
407
  comment:function(chars, start, length) {
@@ -259,7 +444,7 @@ DOMHandler.prototype = {
259
444
  }
260
445
  function _locator(l){
261
446
  if(l){
262
- return '\n@'+(l.systemId ||'')+'#[line:'+l.lineNumber+',col:'+l.columnNumber+']'
447
+ return '\n@#[line:'+l.lineNumber+',col:'+l.columnNumber+']'
263
448
  }
264
449
  }
265
450
  function _toString(chars,start,length){