@m2c2kit/build-helpers 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +8 -7
- package/dist/index.js +698 -363
- package/package.json +6 -6
package/dist/index.js
CHANGED
|
@@ -51,6 +51,7 @@ var xmlDecodeTree = new Uint16Array(
|
|
|
51
51
|
var _a;
|
|
52
52
|
const decodeMap = new Map([
|
|
53
53
|
[0, 65533],
|
|
54
|
+
// C1 Unicode control character reference replacements
|
|
54
55
|
[128, 8364],
|
|
55
56
|
[130, 8218],
|
|
56
57
|
[131, 402],
|
|
@@ -79,6 +80,9 @@ const decodeMap = new Map([
|
|
|
79
80
|
[158, 382],
|
|
80
81
|
[159, 376],
|
|
81
82
|
]);
|
|
83
|
+
/**
|
|
84
|
+
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
|
|
85
|
+
*/
|
|
82
86
|
const fromCodePoint =
|
|
83
87
|
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, node/no-unsupported-features/es-builtins
|
|
84
88
|
(_a = String.fromCodePoint) !== null && _a !== void 0 ? _a : function (codePoint) {
|
|
@@ -91,6 +95,11 @@ const fromCodePoint =
|
|
|
91
95
|
output += String.fromCharCode(codePoint);
|
|
92
96
|
return output;
|
|
93
97
|
};
|
|
98
|
+
/**
|
|
99
|
+
* Replace the given code point with a replacement character if it is a
|
|
100
|
+
* surrogate or is outside the valid range. Otherwise return the code
|
|
101
|
+
* point unchanged.
|
|
102
|
+
*/
|
|
94
103
|
function replaceCodePoint(codePoint) {
|
|
95
104
|
var _a;
|
|
96
105
|
if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
|
|
@@ -103,20 +112,419 @@ var CharCodes$1;
|
|
|
103
112
|
(function (CharCodes) {
|
|
104
113
|
CharCodes[CharCodes["NUM"] = 35] = "NUM";
|
|
105
114
|
CharCodes[CharCodes["SEMI"] = 59] = "SEMI";
|
|
115
|
+
CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS";
|
|
106
116
|
CharCodes[CharCodes["ZERO"] = 48] = "ZERO";
|
|
107
117
|
CharCodes[CharCodes["NINE"] = 57] = "NINE";
|
|
108
118
|
CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A";
|
|
109
119
|
CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F";
|
|
110
120
|
CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X";
|
|
111
|
-
|
|
112
|
-
CharCodes[CharCodes["
|
|
121
|
+
CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z";
|
|
122
|
+
CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A";
|
|
123
|
+
CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
|
|
124
|
+
CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
|
|
113
125
|
})(CharCodes$1 || (CharCodes$1 = {}));
|
|
126
|
+
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
|
|
127
|
+
const TO_LOWER_BIT = 0b100000;
|
|
114
128
|
var BinTrieFlags;
|
|
115
129
|
(function (BinTrieFlags) {
|
|
116
130
|
BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
|
|
117
131
|
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
|
|
118
132
|
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
|
|
119
133
|
})(BinTrieFlags || (BinTrieFlags = {}));
|
|
134
|
+
function isNumber(code) {
|
|
135
|
+
return code >= CharCodes$1.ZERO && code <= CharCodes$1.NINE;
|
|
136
|
+
}
|
|
137
|
+
function isHexadecimalCharacter(code) {
|
|
138
|
+
return ((code >= CharCodes$1.UPPER_A && code <= CharCodes$1.UPPER_F) ||
|
|
139
|
+
(code >= CharCodes$1.LOWER_A && code <= CharCodes$1.LOWER_F));
|
|
140
|
+
}
|
|
141
|
+
function isAsciiAlphaNumeric(code) {
|
|
142
|
+
return ((code >= CharCodes$1.UPPER_A && code <= CharCodes$1.UPPER_Z) ||
|
|
143
|
+
(code >= CharCodes$1.LOWER_A && code <= CharCodes$1.LOWER_Z) ||
|
|
144
|
+
isNumber(code));
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Checks if the given character is a valid end character for an entity in an attribute.
|
|
148
|
+
*
|
|
149
|
+
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
|
|
150
|
+
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
|
151
|
+
*/
|
|
152
|
+
function isEntityInAttributeInvalidEnd(code) {
|
|
153
|
+
return code === CharCodes$1.EQUALS || isAsciiAlphaNumeric(code);
|
|
154
|
+
}
|
|
155
|
+
var EntityDecoderState;
|
|
156
|
+
(function (EntityDecoderState) {
|
|
157
|
+
EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
|
|
158
|
+
EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart";
|
|
159
|
+
EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal";
|
|
160
|
+
EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex";
|
|
161
|
+
EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity";
|
|
162
|
+
})(EntityDecoderState || (EntityDecoderState = {}));
|
|
163
|
+
var DecodingMode;
|
|
164
|
+
(function (DecodingMode) {
|
|
165
|
+
/** Entities in text nodes that can end with any character. */
|
|
166
|
+
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy";
|
|
167
|
+
/** Only allow entities terminated with a semicolon. */
|
|
168
|
+
DecodingMode[DecodingMode["Strict"] = 1] = "Strict";
|
|
169
|
+
/** Entities in attributes have limitations on ending characters. */
|
|
170
|
+
DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
|
|
171
|
+
})(DecodingMode || (DecodingMode = {}));
|
|
172
|
+
/**
|
|
173
|
+
* Token decoder with support of writing partial entities.
|
|
174
|
+
*/
|
|
175
|
+
class EntityDecoder {
|
|
176
|
+
constructor(
|
|
177
|
+
/** The tree used to decode entities. */
|
|
178
|
+
decodeTree,
|
|
179
|
+
/**
|
|
180
|
+
* The function that is called when a codepoint is decoded.
|
|
181
|
+
*
|
|
182
|
+
* For multi-byte named entities, this will be called multiple times,
|
|
183
|
+
* with the second codepoint, and the same `consumed` value.
|
|
184
|
+
*
|
|
185
|
+
* @param codepoint The decoded codepoint.
|
|
186
|
+
* @param consumed The number of bytes consumed by the decoder.
|
|
187
|
+
*/
|
|
188
|
+
emitCodePoint,
|
|
189
|
+
/** An object that is used to produce errors. */
|
|
190
|
+
errors) {
|
|
191
|
+
this.decodeTree = decodeTree;
|
|
192
|
+
this.emitCodePoint = emitCodePoint;
|
|
193
|
+
this.errors = errors;
|
|
194
|
+
/** The current state of the decoder. */
|
|
195
|
+
this.state = EntityDecoderState.EntityStart;
|
|
196
|
+
/** Characters that were consumed while parsing an entity. */
|
|
197
|
+
this.consumed = 1;
|
|
198
|
+
/**
|
|
199
|
+
* The result of the entity.
|
|
200
|
+
*
|
|
201
|
+
* Either the result index of a numeric entity, or the codepoint of a
|
|
202
|
+
* numeric entity.
|
|
203
|
+
*/
|
|
204
|
+
this.result = 0;
|
|
205
|
+
/** The current index in the decode tree. */
|
|
206
|
+
this.treeIndex = 0;
|
|
207
|
+
/** The number of characters that were consumed in excess. */
|
|
208
|
+
this.excess = 1;
|
|
209
|
+
/** The mode in which the decoder is operating. */
|
|
210
|
+
this.decodeMode = DecodingMode.Strict;
|
|
211
|
+
}
|
|
212
|
+
/** Resets the instance to make it reusable. */
|
|
213
|
+
startEntity(decodeMode) {
|
|
214
|
+
this.decodeMode = decodeMode;
|
|
215
|
+
this.state = EntityDecoderState.EntityStart;
|
|
216
|
+
this.result = 0;
|
|
217
|
+
this.treeIndex = 0;
|
|
218
|
+
this.excess = 1;
|
|
219
|
+
this.consumed = 1;
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Write an entity to the decoder. This can be called multiple times with partial entities.
|
|
223
|
+
* If the entity is incomplete, the decoder will return -1.
|
|
224
|
+
*
|
|
225
|
+
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
|
|
226
|
+
* entity is incomplete, and resume when the next string is written.
|
|
227
|
+
*
|
|
228
|
+
* @param string The string containing the entity (or a continuation of the entity).
|
|
229
|
+
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
|
|
230
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
231
|
+
*/
|
|
232
|
+
write(str, offset) {
|
|
233
|
+
switch (this.state) {
|
|
234
|
+
case EntityDecoderState.EntityStart: {
|
|
235
|
+
if (str.charCodeAt(offset) === CharCodes$1.NUM) {
|
|
236
|
+
this.state = EntityDecoderState.NumericStart;
|
|
237
|
+
this.consumed += 1;
|
|
238
|
+
return this.stateNumericStart(str, offset + 1);
|
|
239
|
+
}
|
|
240
|
+
this.state = EntityDecoderState.NamedEntity;
|
|
241
|
+
return this.stateNamedEntity(str, offset);
|
|
242
|
+
}
|
|
243
|
+
case EntityDecoderState.NumericStart: {
|
|
244
|
+
return this.stateNumericStart(str, offset);
|
|
245
|
+
}
|
|
246
|
+
case EntityDecoderState.NumericDecimal: {
|
|
247
|
+
return this.stateNumericDecimal(str, offset);
|
|
248
|
+
}
|
|
249
|
+
case EntityDecoderState.NumericHex: {
|
|
250
|
+
return this.stateNumericHex(str, offset);
|
|
251
|
+
}
|
|
252
|
+
case EntityDecoderState.NamedEntity: {
|
|
253
|
+
return this.stateNamedEntity(str, offset);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Switches between the numeric decimal and hexadecimal states.
|
|
259
|
+
*
|
|
260
|
+
* Equivalent to the `Numeric character reference state` in the HTML spec.
|
|
261
|
+
*
|
|
262
|
+
* @param str The string containing the entity (or a continuation of the entity).
|
|
263
|
+
* @param offset The current offset.
|
|
264
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
265
|
+
*/
|
|
266
|
+
stateNumericStart(str, offset) {
|
|
267
|
+
if (offset >= str.length) {
|
|
268
|
+
return -1;
|
|
269
|
+
}
|
|
270
|
+
if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes$1.LOWER_X) {
|
|
271
|
+
this.state = EntityDecoderState.NumericHex;
|
|
272
|
+
this.consumed += 1;
|
|
273
|
+
return this.stateNumericHex(str, offset + 1);
|
|
274
|
+
}
|
|
275
|
+
this.state = EntityDecoderState.NumericDecimal;
|
|
276
|
+
return this.stateNumericDecimal(str, offset);
|
|
277
|
+
}
|
|
278
|
+
addToNumericResult(str, start, end, base) {
|
|
279
|
+
if (start !== end) {
|
|
280
|
+
const digitCount = end - start;
|
|
281
|
+
this.result =
|
|
282
|
+
this.result * Math.pow(base, digitCount) +
|
|
283
|
+
parseInt(str.substr(start, digitCount), base);
|
|
284
|
+
this.consumed += digitCount;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Parses a hexadecimal numeric entity.
|
|
289
|
+
*
|
|
290
|
+
* Equivalent to the `Hexademical character reference state` in the HTML spec.
|
|
291
|
+
*
|
|
292
|
+
* @param str The string containing the entity (or a continuation of the entity).
|
|
293
|
+
* @param offset The current offset.
|
|
294
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
295
|
+
*/
|
|
296
|
+
stateNumericHex(str, offset) {
|
|
297
|
+
const startIdx = offset;
|
|
298
|
+
while (offset < str.length) {
|
|
299
|
+
const char = str.charCodeAt(offset);
|
|
300
|
+
if (isNumber(char) || isHexadecimalCharacter(char)) {
|
|
301
|
+
offset += 1;
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
this.addToNumericResult(str, startIdx, offset, 16);
|
|
305
|
+
return this.emitNumericEntity(char, 3);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
this.addToNumericResult(str, startIdx, offset, 16);
|
|
309
|
+
return -1;
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Parses a decimal numeric entity.
|
|
313
|
+
*
|
|
314
|
+
* Equivalent to the `Decimal character reference state` in the HTML spec.
|
|
315
|
+
*
|
|
316
|
+
* @param str The string containing the entity (or a continuation of the entity).
|
|
317
|
+
* @param offset The current offset.
|
|
318
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
319
|
+
*/
|
|
320
|
+
stateNumericDecimal(str, offset) {
|
|
321
|
+
const startIdx = offset;
|
|
322
|
+
while (offset < str.length) {
|
|
323
|
+
const char = str.charCodeAt(offset);
|
|
324
|
+
if (isNumber(char)) {
|
|
325
|
+
offset += 1;
|
|
326
|
+
}
|
|
327
|
+
else {
|
|
328
|
+
this.addToNumericResult(str, startIdx, offset, 10);
|
|
329
|
+
return this.emitNumericEntity(char, 2);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
this.addToNumericResult(str, startIdx, offset, 10);
|
|
333
|
+
return -1;
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Validate and emit a numeric entity.
|
|
337
|
+
*
|
|
338
|
+
* Implements the logic from the `Hexademical character reference start
|
|
339
|
+
* state` and `Numeric character reference end state` in the HTML spec.
|
|
340
|
+
*
|
|
341
|
+
* @param lastCp The last code point of the entity. Used to see if the
|
|
342
|
+
* entity was terminated with a semicolon.
|
|
343
|
+
* @param expectedLength The minimum number of characters that should be
|
|
344
|
+
* consumed. Used to validate that at least one digit
|
|
345
|
+
* was consumed.
|
|
346
|
+
* @returns The number of characters that were consumed.
|
|
347
|
+
*/
|
|
348
|
+
emitNumericEntity(lastCp, expectedLength) {
|
|
349
|
+
var _a;
|
|
350
|
+
// Ensure we consumed at least one digit.
|
|
351
|
+
if (this.consumed <= expectedLength) {
|
|
352
|
+
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
|
|
353
|
+
return 0;
|
|
354
|
+
}
|
|
355
|
+
// Figure out if this is a legit end of the entity
|
|
356
|
+
if (lastCp === CharCodes$1.SEMI) {
|
|
357
|
+
this.consumed += 1;
|
|
358
|
+
}
|
|
359
|
+
else if (this.decodeMode === DecodingMode.Strict) {
|
|
360
|
+
return 0;
|
|
361
|
+
}
|
|
362
|
+
this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
|
|
363
|
+
if (this.errors) {
|
|
364
|
+
if (lastCp !== CharCodes$1.SEMI) {
|
|
365
|
+
this.errors.missingSemicolonAfterCharacterReference();
|
|
366
|
+
}
|
|
367
|
+
this.errors.validateNumericCharacterReference(this.result);
|
|
368
|
+
}
|
|
369
|
+
return this.consumed;
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Parses a named entity.
|
|
373
|
+
*
|
|
374
|
+
* Equivalent to the `Named character reference state` in the HTML spec.
|
|
375
|
+
*
|
|
376
|
+
* @param str The string containing the entity (or a continuation of the entity).
|
|
377
|
+
* @param offset The current offset.
|
|
378
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
379
|
+
*/
|
|
380
|
+
stateNamedEntity(str, offset) {
|
|
381
|
+
const { decodeTree } = this;
|
|
382
|
+
let current = decodeTree[this.treeIndex];
|
|
383
|
+
// The mask is the number of bytes of the value, including the current byte.
|
|
384
|
+
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
|
385
|
+
for (; offset < str.length; offset++, this.excess++) {
|
|
386
|
+
const char = str.charCodeAt(offset);
|
|
387
|
+
this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
|
|
388
|
+
if (this.treeIndex < 0) {
|
|
389
|
+
return this.result === 0 ||
|
|
390
|
+
// If we are parsing an attribute
|
|
391
|
+
(this.decodeMode === DecodingMode.Attribute &&
|
|
392
|
+
// We shouldn't have consumed any characters after the entity,
|
|
393
|
+
(valueLength === 0 ||
|
|
394
|
+
// And there should be no invalid characters.
|
|
395
|
+
isEntityInAttributeInvalidEnd(char)))
|
|
396
|
+
? 0
|
|
397
|
+
: this.emitNotTerminatedNamedEntity();
|
|
398
|
+
}
|
|
399
|
+
current = decodeTree[this.treeIndex];
|
|
400
|
+
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
|
401
|
+
// If the branch is a value, store it and continue
|
|
402
|
+
if (valueLength !== 0) {
|
|
403
|
+
// If the entity is terminated by a semicolon, we are done.
|
|
404
|
+
if (char === CharCodes$1.SEMI) {
|
|
405
|
+
return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
|
|
406
|
+
}
|
|
407
|
+
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
|
|
408
|
+
if (this.decodeMode !== DecodingMode.Strict) {
|
|
409
|
+
this.result = this.treeIndex;
|
|
410
|
+
this.consumed += this.excess;
|
|
411
|
+
this.excess = 0;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
return -1;
|
|
416
|
+
}
|
|
417
|
+
/**
|
|
418
|
+
* Emit a named entity that was not terminated with a semicolon.
|
|
419
|
+
*
|
|
420
|
+
* @returns The number of characters consumed.
|
|
421
|
+
*/
|
|
422
|
+
emitNotTerminatedNamedEntity() {
|
|
423
|
+
var _a;
|
|
424
|
+
const { result, decodeTree } = this;
|
|
425
|
+
const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
|
|
426
|
+
this.emitNamedEntityData(result, valueLength, this.consumed);
|
|
427
|
+
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
|
|
428
|
+
return this.consumed;
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Emit a named entity.
|
|
432
|
+
*
|
|
433
|
+
* @param result The index of the entity in the decode tree.
|
|
434
|
+
* @param valueLength The number of bytes in the entity.
|
|
435
|
+
* @param consumed The number of characters consumed.
|
|
436
|
+
*
|
|
437
|
+
* @returns The number of characters consumed.
|
|
438
|
+
*/
|
|
439
|
+
emitNamedEntityData(result, valueLength, consumed) {
|
|
440
|
+
const { decodeTree } = this;
|
|
441
|
+
this.emitCodePoint(valueLength === 1
|
|
442
|
+
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
|
|
443
|
+
: decodeTree[result + 1], consumed);
|
|
444
|
+
if (valueLength === 3) {
|
|
445
|
+
// For multi-byte values, we need to emit the second byte.
|
|
446
|
+
this.emitCodePoint(decodeTree[result + 2], consumed);
|
|
447
|
+
}
|
|
448
|
+
return consumed;
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
* Signal to the parser that the end of the input was reached.
|
|
452
|
+
*
|
|
453
|
+
* Remaining data will be emitted and relevant errors will be produced.
|
|
454
|
+
*
|
|
455
|
+
* @returns The number of characters consumed.
|
|
456
|
+
*/
|
|
457
|
+
end() {
|
|
458
|
+
var _a;
|
|
459
|
+
switch (this.state) {
|
|
460
|
+
case EntityDecoderState.NamedEntity: {
|
|
461
|
+
// Emit a named entity if we have one.
|
|
462
|
+
return this.result !== 0 &&
|
|
463
|
+
(this.decodeMode !== DecodingMode.Attribute ||
|
|
464
|
+
this.result === this.treeIndex)
|
|
465
|
+
? this.emitNotTerminatedNamedEntity()
|
|
466
|
+
: 0;
|
|
467
|
+
}
|
|
468
|
+
// Otherwise, emit a numeric entity if we have one.
|
|
469
|
+
case EntityDecoderState.NumericDecimal: {
|
|
470
|
+
return this.emitNumericEntity(0, 2);
|
|
471
|
+
}
|
|
472
|
+
case EntityDecoderState.NumericHex: {
|
|
473
|
+
return this.emitNumericEntity(0, 3);
|
|
474
|
+
}
|
|
475
|
+
case EntityDecoderState.NumericStart: {
|
|
476
|
+
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
|
|
477
|
+
return 0;
|
|
478
|
+
}
|
|
479
|
+
case EntityDecoderState.EntityStart: {
|
|
480
|
+
// Return 0 if we have no entity.
|
|
481
|
+
return 0;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* Creates a function that decodes entities in a string.
|
|
488
|
+
*
|
|
489
|
+
* @param decodeTree The decode tree.
|
|
490
|
+
* @returns A function that decodes entities in a string.
|
|
491
|
+
*/
|
|
492
|
+
function getDecoder(decodeTree) {
|
|
493
|
+
let ret = "";
|
|
494
|
+
const decoder = new EntityDecoder(decodeTree, (str) => (ret += fromCodePoint(str)));
|
|
495
|
+
return function decodeWithTrie(str, decodeMode) {
|
|
496
|
+
let lastIndex = 0;
|
|
497
|
+
let offset = 0;
|
|
498
|
+
while ((offset = str.indexOf("&", offset)) >= 0) {
|
|
499
|
+
ret += str.slice(lastIndex, offset);
|
|
500
|
+
decoder.startEntity(decodeMode);
|
|
501
|
+
const len = decoder.write(str,
|
|
502
|
+
// Skip the "&"
|
|
503
|
+
offset + 1);
|
|
504
|
+
if (len < 0) {
|
|
505
|
+
lastIndex = offset + decoder.end();
|
|
506
|
+
break;
|
|
507
|
+
}
|
|
508
|
+
lastIndex = offset + len;
|
|
509
|
+
// If `len` is 0, skip the current `&` and continue.
|
|
510
|
+
offset = len === 0 ? lastIndex + 1 : lastIndex;
|
|
511
|
+
}
|
|
512
|
+
const result = ret + str.slice(lastIndex);
|
|
513
|
+
// Make sure we don't keep a reference to the final string.
|
|
514
|
+
ret = "";
|
|
515
|
+
return result;
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
/**
|
|
519
|
+
* Determines the branch of the current node that is taken given the current
|
|
520
|
+
* character. This function is used to traverse the trie.
|
|
521
|
+
*
|
|
522
|
+
* @param decodeTree The trie.
|
|
523
|
+
* @param current The current node.
|
|
524
|
+
* @param nodeIdx The index right after the current node and its value.
|
|
525
|
+
* @param char The current character.
|
|
526
|
+
* @returns The index of the next node, or -1 if no branch is taken.
|
|
527
|
+
*/
|
|
120
528
|
function determineBranch(decodeTree, current, nodeIdx, char) {
|
|
121
529
|
const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
|
|
122
530
|
const jumpOffset = current & BinTrieFlags.JUMP_TABLE;
|
|
@@ -150,6 +558,8 @@ function determineBranch(decodeTree, current, nodeIdx, char) {
|
|
|
150
558
|
}
|
|
151
559
|
return -1;
|
|
152
560
|
}
|
|
561
|
+
getDecoder(htmlDecodeTree);
|
|
562
|
+
getDecoder(xmlDecodeTree);
|
|
153
563
|
|
|
154
564
|
var CharCodes;
|
|
155
565
|
(function (CharCodes) {
|
|
@@ -213,11 +623,7 @@ var State$1;
|
|
|
213
623
|
State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS";
|
|
214
624
|
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence";
|
|
215
625
|
State[State["InSpecialTag"] = 24] = "InSpecialTag";
|
|
216
|
-
State[State["
|
|
217
|
-
State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity";
|
|
218
|
-
State[State["InNamedEntity"] = 27] = "InNamedEntity";
|
|
219
|
-
State[State["InNumericEntity"] = 28] = "InNumericEntity";
|
|
220
|
-
State[State["InHexEntity"] = 29] = "InHexEntity";
|
|
626
|
+
State[State["InEntity"] = 25] = "InEntity";
|
|
221
627
|
})(State$1 || (State$1 = {}));
|
|
222
628
|
function isWhitespace$1(c) {
|
|
223
629
|
return (c === CharCodes.Space ||
|
|
@@ -229,17 +635,10 @@ function isWhitespace$1(c) {
|
|
|
229
635
|
function isEndOfTagSection(c) {
|
|
230
636
|
return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace$1(c);
|
|
231
637
|
}
|
|
232
|
-
function isNumber(c) {
|
|
233
|
-
return c >= CharCodes.Zero && c <= CharCodes.Nine;
|
|
234
|
-
}
|
|
235
638
|
function isASCIIAlpha(c) {
|
|
236
639
|
return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
|
|
237
640
|
(c >= CharCodes.UpperA && c <= CharCodes.UpperZ));
|
|
238
641
|
}
|
|
239
|
-
function isHexDigit$1(c) {
|
|
240
|
-
return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) ||
|
|
241
|
-
(c >= CharCodes.LowerA && c <= CharCodes.LowerF));
|
|
242
|
-
}
|
|
243
642
|
var QuoteType;
|
|
244
643
|
(function (QuoteType) {
|
|
245
644
|
QuoteType[QuoteType["NoValue"] = 0] = "NoValue";
|
|
@@ -272,6 +671,8 @@ class Tokenizer {
|
|
|
272
671
|
this.sectionStart = 0;
|
|
273
672
|
/** The index within the buffer that we are currently looking at. */
|
|
274
673
|
this.index = 0;
|
|
674
|
+
/** The start of the last entity. */
|
|
675
|
+
this.entityStart = 0;
|
|
275
676
|
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
|
|
276
677
|
this.baseState = State$1.Text;
|
|
277
678
|
/** For special parsing behavior inside of script and style tags. */
|
|
@@ -282,14 +683,9 @@ class Tokenizer {
|
|
|
282
683
|
this.offset = 0;
|
|
283
684
|
this.currentSequence = undefined;
|
|
284
685
|
this.sequenceIndex = 0;
|
|
285
|
-
this.trieIndex = 0;
|
|
286
|
-
this.trieCurrent = 0;
|
|
287
|
-
/** For named entities, the index of the value. For numeric entities, the code point. */
|
|
288
|
-
this.entityResult = 0;
|
|
289
|
-
this.entityExcess = 0;
|
|
290
686
|
this.xmlMode = xmlMode;
|
|
291
687
|
this.decodeEntities = decodeEntities;
|
|
292
|
-
this.
|
|
688
|
+
this.entityDecoder = new EntityDecoder(xmlMode ? xmlDecodeTree : htmlDecodeTree, (cp, consumed) => this.emitCodePoint(cp, consumed));
|
|
293
689
|
}
|
|
294
690
|
reset() {
|
|
295
691
|
this.state = State$1.Text;
|
|
@@ -319,18 +715,6 @@ class Tokenizer {
|
|
|
319
715
|
this.parse();
|
|
320
716
|
}
|
|
321
717
|
}
|
|
322
|
-
/**
|
|
323
|
-
* The current index within all of the written data.
|
|
324
|
-
*/
|
|
325
|
-
getIndex() {
|
|
326
|
-
return this.index;
|
|
327
|
-
}
|
|
328
|
-
/**
|
|
329
|
-
* The start of the current section.
|
|
330
|
-
*/
|
|
331
|
-
getSectionStart() {
|
|
332
|
-
return this.sectionStart;
|
|
333
|
-
}
|
|
334
718
|
stateText(c) {
|
|
335
719
|
if (c === CharCodes.Lt ||
|
|
336
720
|
(!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))) {
|
|
@@ -341,7 +725,7 @@ class Tokenizer {
|
|
|
341
725
|
this.sectionStart = this.index;
|
|
342
726
|
}
|
|
343
727
|
else if (this.decodeEntities && c === CharCodes.Amp) {
|
|
344
|
-
this.
|
|
728
|
+
this.startEntity();
|
|
345
729
|
}
|
|
346
730
|
}
|
|
347
731
|
stateSpecialStartSequence(c) {
|
|
@@ -388,7 +772,7 @@ class Tokenizer {
|
|
|
388
772
|
if (this.currentSequence === Sequences.TitleEnd) {
|
|
389
773
|
// We have to parse entities in <title> tags.
|
|
390
774
|
if (this.decodeEntities && c === CharCodes.Amp) {
|
|
391
|
-
this.
|
|
775
|
+
this.startEntity();
|
|
392
776
|
}
|
|
393
777
|
}
|
|
394
778
|
else if (this.fastForwardTo(CharCodes.Lt)) {
|
|
@@ -547,7 +931,6 @@ class Tokenizer {
|
|
|
547
931
|
// Skip everything until ">"
|
|
548
932
|
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
|
|
549
933
|
this.state = State$1.Text;
|
|
550
|
-
this.baseState = State$1.Text;
|
|
551
934
|
this.sectionStart = this.index + 1;
|
|
552
935
|
}
|
|
553
936
|
}
|
|
@@ -561,7 +944,6 @@ class Tokenizer {
|
|
|
561
944
|
else {
|
|
562
945
|
this.state = State$1.Text;
|
|
563
946
|
}
|
|
564
|
-
this.baseState = this.state;
|
|
565
947
|
this.sectionStart = this.index + 1;
|
|
566
948
|
}
|
|
567
949
|
else if (c === CharCodes.Slash) {
|
|
@@ -576,7 +958,6 @@ class Tokenizer {
|
|
|
576
958
|
if (c === CharCodes.Gt) {
|
|
577
959
|
this.cbs.onselfclosingtag(this.index);
|
|
578
960
|
this.state = State$1.Text;
|
|
579
|
-
this.baseState = State$1.Text;
|
|
580
961
|
this.sectionStart = this.index + 1;
|
|
581
962
|
this.isSpecial = false; // Reset special state, in case of self-closing special tags
|
|
582
963
|
}
|
|
@@ -634,8 +1015,7 @@ class Tokenizer {
|
|
|
634
1015
|
this.state = State$1.BeforeAttributeName;
|
|
635
1016
|
}
|
|
636
1017
|
else if (this.decodeEntities && c === CharCodes.Amp) {
|
|
637
|
-
this.
|
|
638
|
-
this.state = State$1.BeforeEntity;
|
|
1018
|
+
this.startEntity();
|
|
639
1019
|
}
|
|
640
1020
|
}
|
|
641
1021
|
stateInAttributeValueDoubleQuotes(c) {
|
|
@@ -653,8 +1033,7 @@ class Tokenizer {
|
|
|
653
1033
|
this.stateBeforeAttributeName(c);
|
|
654
1034
|
}
|
|
655
1035
|
else if (this.decodeEntities && c === CharCodes.Amp) {
|
|
656
|
-
this.
|
|
657
|
-
this.state = State$1.BeforeEntity;
|
|
1036
|
+
this.startEntity();
|
|
658
1037
|
}
|
|
659
1038
|
}
|
|
660
1039
|
stateBeforeDeclaration(c) {
|
|
@@ -715,147 +1094,30 @@ class Tokenizer {
|
|
|
715
1094
|
this.stateInTagName(c); // Consume the token again
|
|
716
1095
|
}
|
|
717
1096
|
}
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
this.
|
|
721
|
-
this.
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
if (this.trieIndex < 0) {
|
|
737
|
-
this.emitNamedEntity();
|
|
738
|
-
this.index--;
|
|
739
|
-
return;
|
|
740
|
-
}
|
|
741
|
-
this.trieCurrent = this.entityTrie[this.trieIndex];
|
|
742
|
-
const masked = this.trieCurrent & BinTrieFlags.VALUE_LENGTH;
|
|
743
|
-
// If the branch is a value, store it and continue
|
|
744
|
-
if (masked) {
|
|
745
|
-
// The mask is the number of bytes of the value, including the current byte.
|
|
746
|
-
const valueLength = (masked >> 14) - 1;
|
|
747
|
-
// If we have a legacy entity while parsing strictly, just skip the number of bytes
|
|
748
|
-
if (!this.allowLegacyEntity() && c !== CharCodes.Semi) {
|
|
749
|
-
this.trieIndex += valueLength;
|
|
750
|
-
}
|
|
751
|
-
else {
|
|
752
|
-
// Add 1 as we have already incremented the excess
|
|
753
|
-
const entityStart = this.index - this.entityExcess + 1;
|
|
754
|
-
if (entityStart > this.sectionStart) {
|
|
755
|
-
this.emitPartial(this.sectionStart, entityStart);
|
|
756
|
-
}
|
|
757
|
-
// If this is a surrogate pair, consume the next two bytes
|
|
758
|
-
this.entityResult = this.trieIndex;
|
|
759
|
-
this.trieIndex += valueLength;
|
|
760
|
-
this.entityExcess = 0;
|
|
761
|
-
this.sectionStart = this.index + 1;
|
|
762
|
-
if (valueLength === 0) {
|
|
763
|
-
this.emitNamedEntity();
|
|
764
|
-
}
|
|
1097
|
+
startEntity() {
|
|
1098
|
+
this.baseState = this.state;
|
|
1099
|
+
this.state = State$1.InEntity;
|
|
1100
|
+
this.entityStart = this.index;
|
|
1101
|
+
this.entityDecoder.startEntity(this.xmlMode
|
|
1102
|
+
? DecodingMode.Strict
|
|
1103
|
+
: this.baseState === State$1.Text ||
|
|
1104
|
+
this.baseState === State$1.InSpecialTag
|
|
1105
|
+
? DecodingMode.Legacy
|
|
1106
|
+
: DecodingMode.Attribute);
|
|
1107
|
+
}
|
|
1108
|
+
stateInEntity() {
|
|
1109
|
+
const length = this.entityDecoder.write(this.buffer, this.index - this.offset);
|
|
1110
|
+
// If `length` is positive, we are done with the entity.
|
|
1111
|
+
if (length >= 0) {
|
|
1112
|
+
this.state = this.baseState;
|
|
1113
|
+
if (length === 0) {
|
|
1114
|
+
this.index = this.entityStart;
|
|
765
1115
|
}
|
|
766
1116
|
}
|
|
767
|
-
}
|
|
768
|
-
emitNamedEntity() {
|
|
769
|
-
this.state = this.baseState;
|
|
770
|
-
if (this.entityResult === 0) {
|
|
771
|
-
return;
|
|
772
|
-
}
|
|
773
|
-
const valueLength = (this.entityTrie[this.entityResult] & BinTrieFlags.VALUE_LENGTH) >>
|
|
774
|
-
14;
|
|
775
|
-
switch (valueLength) {
|
|
776
|
-
case 1: {
|
|
777
|
-
this.emitCodePoint(this.entityTrie[this.entityResult] &
|
|
778
|
-
~BinTrieFlags.VALUE_LENGTH);
|
|
779
|
-
break;
|
|
780
|
-
}
|
|
781
|
-
case 2: {
|
|
782
|
-
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
|
|
783
|
-
break;
|
|
784
|
-
}
|
|
785
|
-
case 3: {
|
|
786
|
-
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
|
|
787
|
-
this.emitCodePoint(this.entityTrie[this.entityResult + 2]);
|
|
788
|
-
}
|
|
789
|
-
}
|
|
790
|
-
}
|
|
791
|
-
stateBeforeNumericEntity(c) {
|
|
792
|
-
if ((c | 0x20) === CharCodes.LowerX) {
|
|
793
|
-
this.entityExcess++;
|
|
794
|
-
this.state = State$1.InHexEntity;
|
|
795
|
-
}
|
|
796
1117
|
else {
|
|
797
|
-
|
|
798
|
-
this.
|
|
799
|
-
}
|
|
800
|
-
}
|
|
801
|
-
emitNumericEntity(strict) {
|
|
802
|
-
const entityStart = this.index - this.entityExcess - 1;
|
|
803
|
-
const numberStart = entityStart + 2 + Number(this.state === State$1.InHexEntity);
|
|
804
|
-
if (numberStart !== this.index) {
|
|
805
|
-
// Emit leading data if any
|
|
806
|
-
if (entityStart > this.sectionStart) {
|
|
807
|
-
this.emitPartial(this.sectionStart, entityStart);
|
|
808
|
-
}
|
|
809
|
-
this.sectionStart = this.index + Number(strict);
|
|
810
|
-
this.emitCodePoint(replaceCodePoint(this.entityResult));
|
|
1118
|
+
// Mark buffer as consumed.
|
|
1119
|
+
this.index = this.offset + this.buffer.length - 1;
|
|
811
1120
|
}
|
|
812
|
-
this.state = this.baseState;
|
|
813
|
-
}
|
|
814
|
-
stateInNumericEntity(c) {
|
|
815
|
-
if (c === CharCodes.Semi) {
|
|
816
|
-
this.emitNumericEntity(true);
|
|
817
|
-
}
|
|
818
|
-
else if (isNumber(c)) {
|
|
819
|
-
this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero);
|
|
820
|
-
this.entityExcess++;
|
|
821
|
-
}
|
|
822
|
-
else {
|
|
823
|
-
if (this.allowLegacyEntity()) {
|
|
824
|
-
this.emitNumericEntity(false);
|
|
825
|
-
}
|
|
826
|
-
else {
|
|
827
|
-
this.state = this.baseState;
|
|
828
|
-
}
|
|
829
|
-
this.index--;
|
|
830
|
-
}
|
|
831
|
-
}
|
|
832
|
-
stateInHexEntity(c) {
|
|
833
|
-
if (c === CharCodes.Semi) {
|
|
834
|
-
this.emitNumericEntity(true);
|
|
835
|
-
}
|
|
836
|
-
else if (isNumber(c)) {
|
|
837
|
-
this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero);
|
|
838
|
-
this.entityExcess++;
|
|
839
|
-
}
|
|
840
|
-
else if (isHexDigit$1(c)) {
|
|
841
|
-
this.entityResult =
|
|
842
|
-
this.entityResult * 16 + ((c | 0x20) - CharCodes.LowerA + 10);
|
|
843
|
-
this.entityExcess++;
|
|
844
|
-
}
|
|
845
|
-
else {
|
|
846
|
-
if (this.allowLegacyEntity()) {
|
|
847
|
-
this.emitNumericEntity(false);
|
|
848
|
-
}
|
|
849
|
-
else {
|
|
850
|
-
this.state = this.baseState;
|
|
851
|
-
}
|
|
852
|
-
this.index--;
|
|
853
|
-
}
|
|
854
|
-
}
|
|
855
|
-
allowLegacyEntity() {
|
|
856
|
-
return (!this.xmlMode &&
|
|
857
|
-
(this.baseState === State$1.Text ||
|
|
858
|
-
this.baseState === State$1.InSpecialTag));
|
|
859
1121
|
}
|
|
860
1122
|
/**
|
|
861
1123
|
* Remove data that has already been consumed from the buffer.
|
|
@@ -984,44 +1246,30 @@ class Tokenizer {
|
|
|
984
1246
|
this.stateInProcessingInstruction(c);
|
|
985
1247
|
break;
|
|
986
1248
|
}
|
|
987
|
-
case State$1.
|
|
988
|
-
this.
|
|
989
|
-
break;
|
|
990
|
-
}
|
|
991
|
-
case State$1.BeforeEntity: {
|
|
992
|
-
this.stateBeforeEntity(c);
|
|
1249
|
+
case State$1.InEntity: {
|
|
1250
|
+
this.stateInEntity();
|
|
993
1251
|
break;
|
|
994
1252
|
}
|
|
995
|
-
case State$1.InHexEntity: {
|
|
996
|
-
this.stateInHexEntity(c);
|
|
997
|
-
break;
|
|
998
|
-
}
|
|
999
|
-
case State$1.InNumericEntity: {
|
|
1000
|
-
this.stateInNumericEntity(c);
|
|
1001
|
-
break;
|
|
1002
|
-
}
|
|
1003
|
-
default: {
|
|
1004
|
-
// `this._state === State.BeforeNumericEntity`
|
|
1005
|
-
this.stateBeforeNumericEntity(c);
|
|
1006
|
-
}
|
|
1007
1253
|
}
|
|
1008
1254
|
this.index++;
|
|
1009
1255
|
}
|
|
1010
1256
|
this.cleanup();
|
|
1011
1257
|
}
|
|
1012
1258
|
finish() {
|
|
1013
|
-
if (this.state === State$1.
|
|
1014
|
-
this.
|
|
1015
|
-
|
|
1016
|
-
// If there is remaining data, emit it in a reasonable way
|
|
1017
|
-
if (this.sectionStart < this.index) {
|
|
1018
|
-
this.handleTrailingData();
|
|
1259
|
+
if (this.state === State$1.InEntity) {
|
|
1260
|
+
this.entityDecoder.end();
|
|
1261
|
+
this.state = this.baseState;
|
|
1019
1262
|
}
|
|
1263
|
+
this.handleTrailingData();
|
|
1020
1264
|
this.cbs.onend();
|
|
1021
1265
|
}
|
|
1022
1266
|
/** Handle any trailing data. */
|
|
1023
1267
|
handleTrailingData() {
|
|
1024
1268
|
const endIndex = this.buffer.length + this.offset;
|
|
1269
|
+
// If there is no remaining data, we are done.
|
|
1270
|
+
if (this.sectionStart >= endIndex) {
|
|
1271
|
+
return;
|
|
1272
|
+
}
|
|
1025
1273
|
if (this.state === State$1.InCommentLike) {
|
|
1026
1274
|
if (this.currentSequence === Sequences.CdataEnd) {
|
|
1027
1275
|
this.cbs.oncdata(this.sectionStart, endIndex, 0);
|
|
@@ -1030,16 +1278,6 @@ class Tokenizer {
|
|
|
1030
1278
|
this.cbs.oncomment(this.sectionStart, endIndex, 0);
|
|
1031
1279
|
}
|
|
1032
1280
|
}
|
|
1033
|
-
else if (this.state === State$1.InNumericEntity &&
|
|
1034
|
-
this.allowLegacyEntity()) {
|
|
1035
|
-
this.emitNumericEntity(false);
|
|
1036
|
-
// All trailing data will have been consumed
|
|
1037
|
-
}
|
|
1038
|
-
else if (this.state === State$1.InHexEntity &&
|
|
1039
|
-
this.allowLegacyEntity()) {
|
|
1040
|
-
this.emitNumericEntity(false);
|
|
1041
|
-
// All trailing data will have been consumed
|
|
1042
|
-
}
|
|
1043
1281
|
else if (this.state === State$1.InTagName ||
|
|
1044
1282
|
this.state === State$1.BeforeAttributeName ||
|
|
1045
1283
|
this.state === State$1.BeforeAttributeValue ||
|
|
@@ -1053,22 +1291,23 @@ class Tokenizer {
|
|
|
1053
1291
|
this.cbs.ontext(this.sectionStart, endIndex);
|
|
1054
1292
|
}
|
|
1055
1293
|
}
|
|
1056
|
-
|
|
1057
|
-
if (this.baseState !== State$1.Text &&
|
|
1058
|
-
this.baseState !== State$1.InSpecialTag) {
|
|
1059
|
-
this.cbs.onattribdata(start, endIndex);
|
|
1060
|
-
}
|
|
1061
|
-
else {
|
|
1062
|
-
this.cbs.ontext(start, endIndex);
|
|
1063
|
-
}
|
|
1064
|
-
}
|
|
1065
|
-
emitCodePoint(cp) {
|
|
1294
|
+
emitCodePoint(cp, consumed) {
|
|
1066
1295
|
if (this.baseState !== State$1.Text &&
|
|
1067
1296
|
this.baseState !== State$1.InSpecialTag) {
|
|
1297
|
+
if (this.sectionStart < this.entityStart) {
|
|
1298
|
+
this.cbs.onattribdata(this.sectionStart, this.entityStart);
|
|
1299
|
+
}
|
|
1300
|
+
this.sectionStart = this.entityStart + consumed;
|
|
1301
|
+
this.index = this.sectionStart - 1;
|
|
1068
1302
|
this.cbs.onattribentity(cp);
|
|
1069
1303
|
}
|
|
1070
1304
|
else {
|
|
1071
|
-
this.
|
|
1305
|
+
if (this.sectionStart < this.entityStart) {
|
|
1306
|
+
this.cbs.ontext(this.sectionStart, this.entityStart);
|
|
1307
|
+
}
|
|
1308
|
+
this.sectionStart = this.entityStart + consumed;
|
|
1309
|
+
this.index = this.sectionStart - 1;
|
|
1310
|
+
this.cbs.ontextentity(cp, this.sectionStart);
|
|
1072
1311
|
}
|
|
1073
1312
|
}
|
|
1074
1313
|
}
|
|
@@ -1187,7 +1426,6 @@ let Parser$1 = class Parser {
|
|
|
1187
1426
|
this.attribvalue = "";
|
|
1188
1427
|
this.attribs = null;
|
|
1189
1428
|
this.stack = [];
|
|
1190
|
-
this.foreignContext = [];
|
|
1191
1429
|
this.buffers = [];
|
|
1192
1430
|
this.bufferOffset = 0;
|
|
1193
1431
|
/** The index of the last written buffer. Used when resuming after a `pause()`. */
|
|
@@ -1195,10 +1433,12 @@ let Parser$1 = class Parser {
|
|
|
1195
1433
|
/** Indicates whether the parser has finished running / `.end` has been called. */
|
|
1196
1434
|
this.ended = false;
|
|
1197
1435
|
this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
|
|
1198
|
-
this.
|
|
1436
|
+
this.htmlMode = !this.options.xmlMode;
|
|
1437
|
+
this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : this.htmlMode;
|
|
1199
1438
|
this.lowerCaseAttributeNames =
|
|
1200
|
-
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b :
|
|
1439
|
+
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
|
|
1201
1440
|
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer)(this.options, this);
|
|
1441
|
+
this.foreignContext = [!this.htmlMode];
|
|
1202
1442
|
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
|
|
1203
1443
|
}
|
|
1204
1444
|
// Tokenizer event handlers
|
|
@@ -1211,19 +1451,18 @@ let Parser$1 = class Parser {
|
|
|
1211
1451
|
this.startIndex = endIndex;
|
|
1212
1452
|
}
|
|
1213
1453
|
/** @internal */
|
|
1214
|
-
ontextentity(cp) {
|
|
1454
|
+
ontextentity(cp, endIndex) {
|
|
1215
1455
|
var _a, _b;
|
|
1216
|
-
|
|
1217
|
-
* Entities can be emitted on the character, or directly after.
|
|
1218
|
-
* We use the section start here to get accurate indices.
|
|
1219
|
-
*/
|
|
1220
|
-
const index = this.tokenizer.getSectionStart();
|
|
1221
|
-
this.endIndex = index - 1;
|
|
1456
|
+
this.endIndex = endIndex - 1;
|
|
1222
1457
|
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, fromCodePoint(cp));
|
|
1223
|
-
this.startIndex =
|
|
1458
|
+
this.startIndex = endIndex;
|
|
1224
1459
|
}
|
|
1460
|
+
/**
|
|
1461
|
+
* Checks if the current tag is a void element. Override this if you want
|
|
1462
|
+
* to specify your own additional void elements.
|
|
1463
|
+
*/
|
|
1225
1464
|
isVoidElement(name) {
|
|
1226
|
-
return
|
|
1465
|
+
return this.htmlMode && voidElements.has(name);
|
|
1227
1466
|
}
|
|
1228
1467
|
/** @internal */
|
|
1229
1468
|
onopentagname(start, endIndex) {
|
|
@@ -1238,21 +1477,22 @@ let Parser$1 = class Parser {
|
|
|
1238
1477
|
var _a, _b, _c, _d;
|
|
1239
1478
|
this.openTagStart = this.startIndex;
|
|
1240
1479
|
this.tagname = name;
|
|
1241
|
-
const impliesClose =
|
|
1480
|
+
const impliesClose = this.htmlMode && openImpliesClose.get(name);
|
|
1242
1481
|
if (impliesClose) {
|
|
1243
|
-
while (this.stack.length > 0 &&
|
|
1244
|
-
|
|
1245
|
-
const element = this.stack.pop();
|
|
1482
|
+
while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
|
|
1483
|
+
const element = this.stack.shift();
|
|
1246
1484
|
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, true);
|
|
1247
1485
|
}
|
|
1248
1486
|
}
|
|
1249
1487
|
if (!this.isVoidElement(name)) {
|
|
1250
|
-
this.stack.
|
|
1251
|
-
if (
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1488
|
+
this.stack.unshift(name);
|
|
1489
|
+
if (this.htmlMode) {
|
|
1490
|
+
if (foreignContextElements.has(name)) {
|
|
1491
|
+
this.foreignContext.unshift(true);
|
|
1492
|
+
}
|
|
1493
|
+
else if (htmlIntegrationElements.has(name)) {
|
|
1494
|
+
this.foreignContext.unshift(false);
|
|
1495
|
+
}
|
|
1256
1496
|
}
|
|
1257
1497
|
}
|
|
1258
1498
|
(_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);
|
|
@@ -1280,40 +1520,37 @@ let Parser$1 = class Parser {
|
|
|
1280
1520
|
}
|
|
1281
1521
|
/** @internal */
|
|
1282
1522
|
onclosetag(start, endIndex) {
|
|
1283
|
-
var _a, _b, _c, _d, _e, _f;
|
|
1523
|
+
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
1284
1524
|
this.endIndex = endIndex;
|
|
1285
1525
|
let name = this.getSlice(start, endIndex);
|
|
1286
1526
|
if (this.lowerCaseTagNames) {
|
|
1287
1527
|
name = name.toLowerCase();
|
|
1288
1528
|
}
|
|
1289
|
-
if (
|
|
1290
|
-
|
|
1291
|
-
|
|
1529
|
+
if (this.htmlMode &&
|
|
1530
|
+
(foreignContextElements.has(name) ||
|
|
1531
|
+
htmlIntegrationElements.has(name))) {
|
|
1532
|
+
this.foreignContext.shift();
|
|
1292
1533
|
}
|
|
1293
1534
|
if (!this.isVoidElement(name)) {
|
|
1294
|
-
const pos = this.stack.
|
|
1535
|
+
const pos = this.stack.indexOf(name);
|
|
1295
1536
|
if (pos !== -1) {
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
this.cbs.onclosetag(this.stack.pop(), count !== 0);
|
|
1301
|
-
}
|
|
1537
|
+
for (let index = 0; index <= pos; index++) {
|
|
1538
|
+
const element = this.stack.shift();
|
|
1539
|
+
// We know the stack has sufficient elements.
|
|
1540
|
+
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, index !== pos);
|
|
1302
1541
|
}
|
|
1303
|
-
else
|
|
1304
|
-
this.stack.length = pos;
|
|
1305
1542
|
}
|
|
1306
|
-
else if (
|
|
1543
|
+
else if (this.htmlMode && name === "p") {
|
|
1307
1544
|
// Implicit open before close
|
|
1308
1545
|
this.emitOpenTag("p");
|
|
1309
1546
|
this.closeCurrentTag(true);
|
|
1310
1547
|
}
|
|
1311
1548
|
}
|
|
1312
|
-
else if (
|
|
1549
|
+
else if (this.htmlMode && name === "br") {
|
|
1313
1550
|
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
|
|
1314
|
-
(
|
|
1315
|
-
(
|
|
1316
|
-
(
|
|
1551
|
+
(_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, "br");
|
|
1552
|
+
(_f = (_e = this.cbs).onopentag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", {}, true);
|
|
1553
|
+
(_h = (_g = this.cbs).onclosetag) === null || _h === void 0 ? void 0 : _h.call(_g, "br", false);
|
|
1317
1554
|
}
|
|
1318
1555
|
// Set `startIndex` for next node
|
|
1319
1556
|
this.startIndex = endIndex + 1;
|
|
@@ -1321,9 +1558,7 @@ let Parser$1 = class Parser {
|
|
|
1321
1558
|
/** @internal */
|
|
1322
1559
|
onselfclosingtag(endIndex) {
|
|
1323
1560
|
this.endIndex = endIndex;
|
|
1324
|
-
if (this.options.
|
|
1325
|
-
this.options.recognizeSelfClosing ||
|
|
1326
|
-
this.foreignContext[this.foreignContext.length - 1]) {
|
|
1561
|
+
if (this.options.recognizeSelfClosing || this.foreignContext[0]) {
|
|
1327
1562
|
this.closeCurrentTag(false);
|
|
1328
1563
|
// Set `startIndex` for next node
|
|
1329
1564
|
this.startIndex = endIndex + 1;
|
|
@@ -1338,10 +1573,10 @@ let Parser$1 = class Parser {
|
|
|
1338
1573
|
const name = this.tagname;
|
|
1339
1574
|
this.endOpenTag(isOpenImplied);
|
|
1340
1575
|
// Self-closing tags will be on the top of the stack
|
|
1341
|
-
if (this.stack[
|
|
1576
|
+
if (this.stack[0] === name) {
|
|
1342
1577
|
// If the opening tag isn't implied, the closing tag has to be implied.
|
|
1343
1578
|
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
|
|
1344
|
-
this.stack.
|
|
1579
|
+
this.stack.shift();
|
|
1345
1580
|
}
|
|
1346
1581
|
}
|
|
1347
1582
|
/** @internal */
|
|
@@ -1421,7 +1656,7 @@ let Parser$1 = class Parser {
|
|
|
1421
1656
|
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
|
|
1422
1657
|
this.endIndex = endIndex;
|
|
1423
1658
|
const value = this.getSlice(start, endIndex - offset);
|
|
1424
|
-
if (this.
|
|
1659
|
+
if (!this.htmlMode || this.options.recognizeCDATA) {
|
|
1425
1660
|
(_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
|
|
1426
1661
|
(_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
|
|
1427
1662
|
(_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
|
|
@@ -1439,8 +1674,9 @@ let Parser$1 = class Parser {
|
|
|
1439
1674
|
if (this.cbs.onclosetag) {
|
|
1440
1675
|
// Set the end index for all remaining tags
|
|
1441
1676
|
this.endIndex = this.startIndex;
|
|
1442
|
-
for (let index =
|
|
1443
|
-
;
|
|
1677
|
+
for (let index = 0; index < this.stack.length; index++) {
|
|
1678
|
+
this.cbs.onclosetag(this.stack[index], true);
|
|
1679
|
+
}
|
|
1444
1680
|
}
|
|
1445
1681
|
(_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
|
|
1446
1682
|
}
|
|
@@ -1459,6 +1695,8 @@ let Parser$1 = class Parser {
|
|
|
1459
1695
|
this.endIndex = 0;
|
|
1460
1696
|
(_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
|
|
1461
1697
|
this.buffers.length = 0;
|
|
1698
|
+
this.foreignContext.length = 0;
|
|
1699
|
+
this.foreignContext.unshift(!this.htmlMode);
|
|
1462
1700
|
this.bufferOffset = 0;
|
|
1463
1701
|
this.writeIndex = 0;
|
|
1464
1702
|
this.ended = false;
|
|
@@ -2140,6 +2378,16 @@ function encodeXML(str) {
|
|
|
2140
2378
|
}
|
|
2141
2379
|
return ret + str.substr(lastIdx);
|
|
2142
2380
|
}
|
|
2381
|
+
/**
|
|
2382
|
+
* Creates a function that escapes all characters matched by the given regular
|
|
2383
|
+
* expression using the given map of characters to escape to their entities.
|
|
2384
|
+
*
|
|
2385
|
+
* @param regex Regular expression to match characters to escape.
|
|
2386
|
+
* @param map Map of characters to escape to their entities.
|
|
2387
|
+
*
|
|
2388
|
+
* @returns Function that escapes all characters matched by the given regular
|
|
2389
|
+
* expression using the given map of characters to escape to their entities.
|
|
2390
|
+
*/
|
|
2143
2391
|
function getEscaper(regex, map) {
|
|
2144
2392
|
return function escape(data) {
|
|
2145
2393
|
let match;
|
|
@@ -2149,7 +2397,7 @@ function getEscaper(regex, map) {
|
|
|
2149
2397
|
if (lastIdx !== match.index) {
|
|
2150
2398
|
result += data.substring(lastIdx, match.index);
|
|
2151
2399
|
}
|
|
2152
|
-
// We know that this
|
|
2400
|
+
// We know that this character will be in the map.
|
|
2153
2401
|
result += map.get(match[0].charCodeAt(0));
|
|
2154
2402
|
// Every match will be of length 1
|
|
2155
2403
|
lastIdx = match.index + 1;
|
|
@@ -2486,7 +2734,7 @@ function getInnerHTML(node, options) {
|
|
|
2486
2734
|
: "";
|
|
2487
2735
|
}
|
|
2488
2736
|
/**
|
|
2489
|
-
* Get a node's inner text. Same as `textContent`, but inserts newlines for `<br>` tags.
|
|
2737
|
+
* Get a node's inner text. Same as `textContent`, but inserts newlines for `<br>` tags. Ignores comments.
|
|
2490
2738
|
*
|
|
2491
2739
|
* @category Stringify
|
|
2492
2740
|
* @deprecated Use `textContent` instead.
|
|
@@ -2505,7 +2753,7 @@ function getText(node) {
|
|
|
2505
2753
|
return "";
|
|
2506
2754
|
}
|
|
2507
2755
|
/**
|
|
2508
|
-
* Get a node's text content.
|
|
2756
|
+
* Get a node's text content. Ignores comments.
|
|
2509
2757
|
*
|
|
2510
2758
|
* @category Stringify
|
|
2511
2759
|
* @param node Node to get the text content of.
|
|
@@ -2523,7 +2771,7 @@ function textContent(node) {
|
|
|
2523
2771
|
return "";
|
|
2524
2772
|
}
|
|
2525
2773
|
/**
|
|
2526
|
-
* Get a node's inner text.
|
|
2774
|
+
* Get a node's inner text, ignoring `<script>` and `<style>` tags. Ignores comments.
|
|
2527
2775
|
*
|
|
2528
2776
|
* @category Stringify
|
|
2529
2777
|
* @param node Node to get the inner text of.
|
|
@@ -2556,7 +2804,7 @@ function getChildren(elem) {
|
|
|
2556
2804
|
*
|
|
2557
2805
|
* @category Traversal
|
|
2558
2806
|
* @param elem Node to get the parent of.
|
|
2559
|
-
* @returns `elem`'s parent node.
|
|
2807
|
+
* @returns `elem`'s parent node, or `null` if `elem` is a root node.
|
|
2560
2808
|
*/
|
|
2561
2809
|
function getParent(elem) {
|
|
2562
2810
|
return elem.parent || null;
|
|
@@ -2570,7 +2818,7 @@ function getParent(elem) {
|
|
|
2570
2818
|
*
|
|
2571
2819
|
* @category Traversal
|
|
2572
2820
|
* @param elem Element to get the siblings of.
|
|
2573
|
-
* @returns `elem`'s siblings
|
|
2821
|
+
* @returns `elem`'s siblings, including `elem`.
|
|
2574
2822
|
*/
|
|
2575
2823
|
function getSiblings(elem) {
|
|
2576
2824
|
const parent = getParent(elem);
|
|
@@ -2628,7 +2876,8 @@ function getName(elem) {
|
|
|
2628
2876
|
*
|
|
2629
2877
|
* @category Traversal
|
|
2630
2878
|
* @param elem The element to get the next sibling of.
|
|
2631
|
-
* @returns `elem`'s next sibling that is a tag
|
|
2879
|
+
* @returns `elem`'s next sibling that is a tag, or `null` if there is no next
|
|
2880
|
+
* sibling.
|
|
2632
2881
|
*/
|
|
2633
2882
|
function nextElementSibling(elem) {
|
|
2634
2883
|
let { next } = elem;
|
|
@@ -2641,7 +2890,8 @@ function nextElementSibling(elem) {
|
|
|
2641
2890
|
*
|
|
2642
2891
|
* @category Traversal
|
|
2643
2892
|
* @param elem The element to get the previous sibling of.
|
|
2644
|
-
* @returns `elem`'s previous sibling that is a tag
|
|
2893
|
+
* @returns `elem`'s previous sibling that is a tag, or `null` if there is no
|
|
2894
|
+
* previous sibling.
|
|
2645
2895
|
*/
|
|
2646
2896
|
function prevElementSibling(elem) {
|
|
2647
2897
|
let { prev } = elem;
|
|
@@ -2663,8 +2913,14 @@ function removeElement(elem) {
|
|
|
2663
2913
|
elem.next.prev = elem.prev;
|
|
2664
2914
|
if (elem.parent) {
|
|
2665
2915
|
const childs = elem.parent.children;
|
|
2666
|
-
childs.
|
|
2916
|
+
const childsIndex = childs.lastIndexOf(elem);
|
|
2917
|
+
if (childsIndex >= 0) {
|
|
2918
|
+
childs.splice(childsIndex, 1);
|
|
2919
|
+
}
|
|
2667
2920
|
}
|
|
2921
|
+
elem.next = null;
|
|
2922
|
+
elem.prev = null;
|
|
2923
|
+
elem.parent = null;
|
|
2668
2924
|
}
|
|
2669
2925
|
/**
|
|
2670
2926
|
* Replace an element in the dom
|
|
@@ -2693,15 +2949,15 @@ function replaceElement(elem, replacement) {
|
|
|
2693
2949
|
* Append a child to an element.
|
|
2694
2950
|
*
|
|
2695
2951
|
* @category Manipulation
|
|
2696
|
-
* @param
|
|
2952
|
+
* @param parent The element to append to.
|
|
2697
2953
|
* @param child The element to be added as a child.
|
|
2698
2954
|
*/
|
|
2699
|
-
function appendChild(
|
|
2955
|
+
function appendChild(parent, child) {
|
|
2700
2956
|
removeElement(child);
|
|
2701
2957
|
child.next = null;
|
|
2702
|
-
child.parent =
|
|
2703
|
-
if (
|
|
2704
|
-
const sibling =
|
|
2958
|
+
child.parent = parent;
|
|
2959
|
+
if (parent.children.push(child) > 1) {
|
|
2960
|
+
const sibling = parent.children[parent.children.length - 2];
|
|
2705
2961
|
sibling.next = child;
|
|
2706
2962
|
child.prev = sibling;
|
|
2707
2963
|
}
|
|
@@ -2739,15 +2995,15 @@ function append(elem, next) {
|
|
|
2739
2995
|
* Prepend a child to an element.
|
|
2740
2996
|
*
|
|
2741
2997
|
* @category Manipulation
|
|
2742
|
-
* @param
|
|
2998
|
+
* @param parent The element to prepend before.
|
|
2743
2999
|
* @param child The element to be added as a child.
|
|
2744
3000
|
*/
|
|
2745
|
-
function prependChild(
|
|
3001
|
+
function prependChild(parent, child) {
|
|
2746
3002
|
removeElement(child);
|
|
2747
|
-
child.parent =
|
|
3003
|
+
child.parent = parent;
|
|
2748
3004
|
child.prev = null;
|
|
2749
|
-
if (
|
|
2750
|
-
const sibling =
|
|
3005
|
+
if (parent.children.unshift(child) !== 1) {
|
|
3006
|
+
const sibling = parent.children[1];
|
|
2751
3007
|
sibling.prev = child;
|
|
2752
3008
|
child.next = sibling;
|
|
2753
3009
|
}
|
|
@@ -2779,7 +3035,7 @@ function prepend(elem, prev) {
|
|
|
2779
3035
|
}
|
|
2780
3036
|
|
|
2781
3037
|
/**
|
|
2782
|
-
* Search a node and its children for nodes passing a test function.
|
|
3038
|
+
* Search a node and its children for nodes passing a test function. If `node` is not an array, it will be wrapped in one.
|
|
2783
3039
|
*
|
|
2784
3040
|
* @category Querying
|
|
2785
3041
|
* @param test Function to test nodes on.
|
|
@@ -2789,12 +3045,10 @@ function prepend(elem, prev) {
|
|
|
2789
3045
|
* @returns All nodes passing `test`.
|
|
2790
3046
|
*/
|
|
2791
3047
|
function filter(test, node, recurse = true, limit = Infinity) {
|
|
2792
|
-
|
|
2793
|
-
node = [node];
|
|
2794
|
-
return find(test, node, recurse, limit);
|
|
3048
|
+
return find(test, Array.isArray(node) ? node : [node], recurse, limit);
|
|
2795
3049
|
}
|
|
2796
3050
|
/**
|
|
2797
|
-
* Search an array of
|
|
3051
|
+
* Search an array of nodes and their children for nodes passing a test function.
|
|
2798
3052
|
*
|
|
2799
3053
|
* @category Querying
|
|
2800
3054
|
* @param test Function to test nodes on.
|
|
@@ -2805,24 +3059,41 @@ function filter(test, node, recurse = true, limit = Infinity) {
|
|
|
2805
3059
|
*/
|
|
2806
3060
|
function find(test, nodes, recurse, limit) {
|
|
2807
3061
|
const result = [];
|
|
2808
|
-
|
|
3062
|
+
/** Stack of the arrays we are looking at. */
|
|
3063
|
+
const nodeStack = [nodes];
|
|
3064
|
+
/** Stack of the indices within the arrays. */
|
|
3065
|
+
const indexStack = [0];
|
|
3066
|
+
for (;;) {
|
|
3067
|
+
// First, check if the current array has any more elements to look at.
|
|
3068
|
+
if (indexStack[0] >= nodeStack[0].length) {
|
|
3069
|
+
// If we have no more arrays to look at, we are done.
|
|
3070
|
+
if (indexStack.length === 1) {
|
|
3071
|
+
return result;
|
|
3072
|
+
}
|
|
3073
|
+
// Otherwise, remove the current array from the stack.
|
|
3074
|
+
nodeStack.shift();
|
|
3075
|
+
indexStack.shift();
|
|
3076
|
+
// Loop back to the start to continue with the next array.
|
|
3077
|
+
continue;
|
|
3078
|
+
}
|
|
3079
|
+
const elem = nodeStack[0][indexStack[0]++];
|
|
2809
3080
|
if (test(elem)) {
|
|
2810
3081
|
result.push(elem);
|
|
2811
3082
|
if (--limit <= 0)
|
|
2812
|
-
|
|
3083
|
+
return result;
|
|
2813
3084
|
}
|
|
2814
3085
|
if (recurse && hasChildren(elem) && elem.children.length > 0) {
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
3086
|
+
/*
|
|
3087
|
+
* Add the children to the stack. We are depth-first, so this is
|
|
3088
|
+
* the next array we look at.
|
|
3089
|
+
*/
|
|
3090
|
+
indexStack.unshift(0);
|
|
3091
|
+
nodeStack.unshift(elem.children);
|
|
2820
3092
|
}
|
|
2821
3093
|
}
|
|
2822
|
-
return result;
|
|
2823
3094
|
}
|
|
2824
3095
|
/**
|
|
2825
|
-
* Finds the first element inside of an array that matches a test function.
|
|
3096
|
+
* Finds the first element inside of an array that matches a test function. This is an alias for `Array.prototype.find`.
|
|
2826
3097
|
*
|
|
2827
3098
|
* @category Querying
|
|
2828
3099
|
* @param test Function to test nodes on.
|
|
@@ -2838,27 +3109,29 @@ function findOneChild(test, nodes) {
|
|
|
2838
3109
|
*
|
|
2839
3110
|
* @category Querying
|
|
2840
3111
|
* @param test Function to test nodes on.
|
|
2841
|
-
* @param nodes
|
|
3112
|
+
* @param nodes Node or array of nodes to search.
|
|
2842
3113
|
* @param recurse Also consider child nodes.
|
|
2843
|
-
* @returns The first
|
|
3114
|
+
* @returns The first node that passes `test`.
|
|
2844
3115
|
*/
|
|
2845
3116
|
function findOne(test, nodes, recurse = true) {
|
|
2846
3117
|
let elem = null;
|
|
2847
3118
|
for (let i = 0; i < nodes.length && !elem; i++) {
|
|
2848
|
-
const
|
|
2849
|
-
if (!isTag(
|
|
3119
|
+
const node = nodes[i];
|
|
3120
|
+
if (!isTag(node)) {
|
|
2850
3121
|
continue;
|
|
2851
3122
|
}
|
|
2852
|
-
else if (test(
|
|
2853
|
-
elem =
|
|
3123
|
+
else if (test(node)) {
|
|
3124
|
+
elem = node;
|
|
2854
3125
|
}
|
|
2855
|
-
else if (recurse &&
|
|
2856
|
-
elem = findOne(test,
|
|
3126
|
+
else if (recurse && node.children.length > 0) {
|
|
3127
|
+
elem = findOne(test, node.children, true);
|
|
2857
3128
|
}
|
|
2858
3129
|
}
|
|
2859
3130
|
return elem;
|
|
2860
3131
|
}
|
|
2861
3132
|
/**
|
|
3133
|
+
* Checks if a tree of nodes contains at least one node passing a test.
|
|
3134
|
+
*
|
|
2862
3135
|
* @category Querying
|
|
2863
3136
|
* @param test Function to test nodes on.
|
|
2864
3137
|
* @param nodes Array of nodes to search.
|
|
@@ -2866,12 +3139,10 @@ function findOne(test, nodes, recurse = true) {
|
|
|
2866
3139
|
*/
|
|
2867
3140
|
function existsOne(test, nodes) {
|
|
2868
3141
|
return nodes.some((checked) => isTag(checked) &&
|
|
2869
|
-
(test(checked) ||
|
|
2870
|
-
(checked.children.length > 0 &&
|
|
2871
|
-
existsOne(test, checked.children))));
|
|
3142
|
+
(test(checked) || existsOne(test, checked.children)));
|
|
2872
3143
|
}
|
|
2873
3144
|
/**
|
|
2874
|
-
* Search
|
|
3145
|
+
* Search an array of nodes and their children for elements passing a test function.
|
|
2875
3146
|
*
|
|
2876
3147
|
* Same as `find`, but limited to elements and with less options, leading to reduced complexity.
|
|
2877
3148
|
*
|
|
@@ -2881,21 +3152,35 @@ function existsOne(test, nodes) {
|
|
|
2881
3152
|
* @returns All nodes passing `test`.
|
|
2882
3153
|
*/
|
|
2883
3154
|
function findAll(test, nodes) {
|
|
2884
|
-
var _a;
|
|
2885
3155
|
const result = [];
|
|
2886
|
-
const
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
|
|
3156
|
+
const nodeStack = [nodes];
|
|
3157
|
+
const indexStack = [0];
|
|
3158
|
+
for (;;) {
|
|
3159
|
+
if (indexStack[0] >= nodeStack[0].length) {
|
|
3160
|
+
if (nodeStack.length === 1) {
|
|
3161
|
+
return result;
|
|
3162
|
+
}
|
|
3163
|
+
// Otherwise, remove the current array from the stack.
|
|
3164
|
+
nodeStack.shift();
|
|
3165
|
+
indexStack.shift();
|
|
3166
|
+
// Loop back to the start to continue with the next array.
|
|
3167
|
+
continue;
|
|
2892
3168
|
}
|
|
3169
|
+
const elem = nodeStack[0][indexStack[0]++];
|
|
3170
|
+
if (!isTag(elem))
|
|
3171
|
+
continue;
|
|
2893
3172
|
if (test(elem))
|
|
2894
3173
|
result.push(elem);
|
|
3174
|
+
if (elem.children.length > 0) {
|
|
3175
|
+
indexStack.unshift(0);
|
|
3176
|
+
nodeStack.unshift(elem.children);
|
|
3177
|
+
}
|
|
2895
3178
|
}
|
|
2896
|
-
return result;
|
|
2897
3179
|
}
|
|
2898
3180
|
|
|
3181
|
+
/**
|
|
3182
|
+
* A map of functions to check nodes against.
|
|
3183
|
+
*/
|
|
2899
3184
|
const Checks = {
|
|
2900
3185
|
tag_name(name) {
|
|
2901
3186
|
if (typeof name === "function") {
|
|
@@ -2920,6 +3205,9 @@ const Checks = {
|
|
|
2920
3205
|
},
|
|
2921
3206
|
};
|
|
2922
3207
|
/**
|
|
3208
|
+
* Returns a function to check whether a node has an attribute with a particular
|
|
3209
|
+
* value.
|
|
3210
|
+
*
|
|
2923
3211
|
* @param attrib Attribute to check.
|
|
2924
3212
|
* @param value Attribute value to look for.
|
|
2925
3213
|
* @returns A function to check whether the a node has an attribute with a
|
|
@@ -2932,6 +3220,9 @@ function getAttribCheck(attrib, value) {
|
|
|
2932
3220
|
return (elem) => isTag(elem) && elem.attribs[attrib] === value;
|
|
2933
3221
|
}
|
|
2934
3222
|
/**
|
|
3223
|
+
* Returns a function that returns `true` if either of the input functions
|
|
3224
|
+
* returns `true` for a node.
|
|
3225
|
+
*
|
|
2935
3226
|
* @param a First function to combine.
|
|
2936
3227
|
* @param b Second function to combine.
|
|
2937
3228
|
* @returns A function taking a node and returning `true` if either of the input
|
|
@@ -2941,9 +3232,12 @@ function combineFuncs(a, b) {
|
|
|
2941
3232
|
return (elem) => a(elem) || b(elem);
|
|
2942
3233
|
}
|
|
2943
3234
|
/**
|
|
3235
|
+
* Returns a function that executes all checks in `options` and returns `true`
|
|
3236
|
+
* if any of them match a node.
|
|
3237
|
+
*
|
|
2944
3238
|
* @param options An object describing nodes to look for.
|
|
2945
|
-
* @returns A function
|
|
2946
|
-
* any of them match a node.
|
|
3239
|
+
* @returns A function that executes all checks in `options` and returns `true`
|
|
3240
|
+
* if any of them match a node.
|
|
2947
3241
|
*/
|
|
2948
3242
|
function compileTest(options) {
|
|
2949
3243
|
const funcs = Object.keys(options).map((key) => {
|
|
@@ -2955,6 +3249,8 @@ function compileTest(options) {
|
|
|
2955
3249
|
return funcs.length === 0 ? null : funcs.reduce(combineFuncs);
|
|
2956
3250
|
}
|
|
2957
3251
|
/**
|
|
3252
|
+
* Checks whether a node matches the description in `options`.
|
|
3253
|
+
*
|
|
2958
3254
|
* @category Legacy Query Functions
|
|
2959
3255
|
* @param options An object describing nodes to look for.
|
|
2960
3256
|
* @param node The element to test.
|
|
@@ -2965,6 +3261,8 @@ function testElement(options, node) {
|
|
|
2965
3261
|
return test ? test(node) : true;
|
|
2966
3262
|
}
|
|
2967
3263
|
/**
|
|
3264
|
+
* Returns all nodes that match `options`.
|
|
3265
|
+
*
|
|
2968
3266
|
* @category Legacy Query Functions
|
|
2969
3267
|
* @param options An object describing nodes to look for.
|
|
2970
3268
|
* @param nodes Nodes to search through.
|
|
@@ -2977,6 +3275,8 @@ function getElements(options, nodes, recurse, limit = Infinity) {
|
|
|
2977
3275
|
return test ? filter(test, nodes, recurse, limit) : [];
|
|
2978
3276
|
}
|
|
2979
3277
|
/**
|
|
3278
|
+
* Returns the node with the supplied ID.
|
|
3279
|
+
*
|
|
2980
3280
|
* @category Legacy Query Functions
|
|
2981
3281
|
* @param id The unique ID attribute value to look for.
|
|
2982
3282
|
* @param nodes Nodes to search through.
|
|
@@ -2989,6 +3289,8 @@ function getElementById(id, nodes, recurse = true) {
|
|
|
2989
3289
|
return findOne(getAttribCheck("id", id), nodes, recurse);
|
|
2990
3290
|
}
|
|
2991
3291
|
/**
|
|
3292
|
+
* Returns all nodes with the supplied `tagName`.
|
|
3293
|
+
*
|
|
2992
3294
|
* @category Legacy Query Functions
|
|
2993
3295
|
* @param tagName Tag name to search for.
|
|
2994
3296
|
* @param nodes Nodes to search through.
|
|
@@ -3000,6 +3302,8 @@ function getElementsByTagName(tagName, nodes, recurse = true, limit = Infinity)
|
|
|
3000
3302
|
return filter(Checks["tag_name"](tagName), nodes, recurse, limit);
|
|
3001
3303
|
}
|
|
3002
3304
|
/**
|
|
3305
|
+
* Returns all nodes with the supplied `type`.
|
|
3306
|
+
*
|
|
3003
3307
|
* @category Legacy Query Functions
|
|
3004
3308
|
* @param type Element type to look for.
|
|
3005
3309
|
* @param nodes Nodes to search through.
|
|
@@ -3012,11 +3316,12 @@ function getElementsByTagType(type, nodes, recurse = true, limit = Infinity) {
|
|
|
3012
3316
|
}
|
|
3013
3317
|
|
|
3014
3318
|
/**
|
|
3015
|
-
* Given an array of nodes, remove any member that is contained by another
|
|
3319
|
+
* Given an array of nodes, remove any member that is contained by another
|
|
3320
|
+
* member.
|
|
3016
3321
|
*
|
|
3017
3322
|
* @category Helpers
|
|
3018
3323
|
* @param nodes Nodes to filter.
|
|
3019
|
-
* @returns Remaining nodes that aren't
|
|
3324
|
+
* @returns Remaining nodes that aren't contained by other nodes.
|
|
3020
3325
|
*/
|
|
3021
3326
|
function removeSubsets(nodes) {
|
|
3022
3327
|
let idx = nodes.length;
|
|
@@ -3057,8 +3362,8 @@ var DocumentPosition;
|
|
|
3057
3362
|
DocumentPosition[DocumentPosition["CONTAINED_BY"] = 16] = "CONTAINED_BY";
|
|
3058
3363
|
})(DocumentPosition || (DocumentPosition = {}));
|
|
3059
3364
|
/**
|
|
3060
|
-
* Compare the position of one node against another node in any other document
|
|
3061
|
-
*
|
|
3365
|
+
* Compare the position of one node against another node in any other document,
|
|
3366
|
+
* returning a bitmask with the values from {@link DocumentPosition}.
|
|
3062
3367
|
*
|
|
3063
3368
|
* Document order:
|
|
3064
3369
|
* > There is an ordering, document order, defined on all the nodes in the
|
|
@@ -3122,9 +3427,9 @@ function compareDocumentPosition(nodeA, nodeB) {
|
|
|
3122
3427
|
return DocumentPosition.PRECEDING;
|
|
3123
3428
|
}
|
|
3124
3429
|
/**
|
|
3125
|
-
* Sort an array of nodes based on their relative position in the document
|
|
3126
|
-
*
|
|
3127
|
-
* the same document, sort order is unspecified.
|
|
3430
|
+
* Sort an array of nodes based on their relative position in the document,
|
|
3431
|
+
* removing any duplicate nodes. If the array contains nodes that do not belong
|
|
3432
|
+
* to the same document, sort order is unspecified.
|
|
3128
3433
|
*
|
|
3129
3434
|
* @category Helpers
|
|
3130
3435
|
* @param nodes Array of DOM nodes.
|
|
@@ -3225,7 +3530,7 @@ function getRssFeed(feedRoot) {
|
|
|
3225
3530
|
addConditionally(entry, "title", "title", children);
|
|
3226
3531
|
addConditionally(entry, "link", "link", children);
|
|
3227
3532
|
addConditionally(entry, "description", "description", children);
|
|
3228
|
-
const pubDate = fetch("pubDate", children);
|
|
3533
|
+
const pubDate = fetch("pubDate", children) || fetch("dc:date", children);
|
|
3229
3534
|
if (pubDate)
|
|
3230
3535
|
entry.pubDate = new Date(pubDate);
|
|
3231
3536
|
return entry;
|
|
@@ -3376,7 +3681,7 @@ var DomUtils = /*#__PURE__*/Object.freeze({
|
|
|
3376
3681
|
* Parses the data, returns the resulting document.
|
|
3377
3682
|
*
|
|
3378
3683
|
* @param data The data that should be parsed.
|
|
3379
|
-
* @param options Optional options for the parser and DOM
|
|
3684
|
+
* @param options Optional options for the parser and DOM handler.
|
|
3380
3685
|
*/
|
|
3381
3686
|
function parseDocument(data, options) {
|
|
3382
3687
|
const handler = new DomHandler(undefined, options);
|
|
@@ -12020,13 +12325,14 @@ const HASH_CHARACTER_LENGTH = 16;
|
|
|
12020
12325
|
* finding the asset urls in the AST assumes that there will be no
|
|
12021
12326
|
* user code with the same structure as the asset urls. In other words:
|
|
12022
12327
|
* Don't use a define a property named "canvasKitWasmUrl" that refers to
|
|
12023
|
-
* a string literal, don't define a property named "
|
|
12024
|
-
* to an array expression of
|
|
12025
|
-
*
|
|
12026
|
-
*
|
|
12027
|
-
*
|
|
12028
|
-
*
|
|
12029
|
-
*
|
|
12328
|
+
* a string literal, don't define a property named "fonts" that that refers
|
|
12329
|
+
* to an array expression of object expressions with properties named
|
|
12330
|
+
* "fontName" and "url", and don't define a property called "images" that
|
|
12331
|
+
* refers to an array expression of object expressions with properties named
|
|
12332
|
+
* "imageName", "height", "width", and "url". Otherwise, this plugin may alter
|
|
12333
|
+
* your code in unexpected ways (although most likely it will simply give
|
|
12334
|
+
* warnings, because it's unlikely there will be valid file assets that will
|
|
12335
|
+
* be found and hashed).
|
|
12030
12336
|
*
|
|
12031
12337
|
* @param rootDir - root directory of build, usually "dist" because you
|
|
12032
12338
|
* usually hash only production builds
|
|
@@ -12071,10 +12377,16 @@ function hashM2c2kitAssets(rootDir) {
|
|
|
12071
12377
|
const literal = node;
|
|
12072
12378
|
const originalUrlValue = literal.value;
|
|
12073
12379
|
try {
|
|
12074
|
-
const hashedUrlValue = addHashToUrl(originalUrlValue,
|
|
12380
|
+
const hashedUrlValue = addHashToUrl(originalUrlValue,
|
|
12381
|
+
/**
|
|
12382
|
+
* by our convention, the wasm file will be served from
|
|
12383
|
+
* the assets directory, so the location is
|
|
12384
|
+
* `assets/${canvasKitWasmUrl}` not `${canvasKitWasmUrl}`
|
|
12385
|
+
*/
|
|
12386
|
+
`${rootDir}/assets`);
|
|
12075
12387
|
literal.value = literal.value.replace(originalUrlValue, hashedUrlValue);
|
|
12076
12388
|
literal.raw = literal.raw.replace(originalUrlValue, hashedUrlValue);
|
|
12077
|
-
addFileToFilesToBeRenamed(rootDir
|
|
12389
|
+
addFileToFilesToBeRenamed(`${rootDir}/assets`, originalUrlValue, hashedUrlValue, fileRenames);
|
|
12078
12390
|
}
|
|
12079
12391
|
catch (_a) {
|
|
12080
12392
|
console.log(`warning: could not hash canvaskit.wasm resource because it was not found at ${originalUrlValue}`);
|
|
@@ -12082,25 +12394,48 @@ function hashM2c2kitAssets(rootDir) {
|
|
|
12082
12394
|
}
|
|
12083
12395
|
}
|
|
12084
12396
|
}
|
|
12085
|
-
|
|
12086
|
-
|
|
12087
|
-
|
|
12088
|
-
|
|
12089
|
-
|
|
12090
|
-
|
|
12091
|
-
|
|
12092
|
-
|
|
12093
|
-
|
|
12094
|
-
|
|
12095
|
-
const
|
|
12096
|
-
|
|
12097
|
-
|
|
12098
|
-
|
|
12099
|
-
|
|
12100
|
-
|
|
12101
|
-
|
|
12102
|
-
|
|
12103
|
-
|
|
12397
|
+
// we'll be looking back 5 levels
|
|
12398
|
+
if (ancestors.length >= 5) {
|
|
12399
|
+
const maybeProperty = ancestors.slice(-2)[0];
|
|
12400
|
+
if (maybeProperty.type === "Property") {
|
|
12401
|
+
const property = maybeProperty;
|
|
12402
|
+
if (property.key.type === "Identifier" &&
|
|
12403
|
+
property.key.name == "url") {
|
|
12404
|
+
// property is url
|
|
12405
|
+
const maybeObjExpression = ancestors.slice(-3)[0];
|
|
12406
|
+
if (maybeObjExpression.type === "ObjectExpression") {
|
|
12407
|
+
const objExpression = maybeObjExpression;
|
|
12408
|
+
const properties = objExpression.properties
|
|
12409
|
+
.filter((p) => p.type === "Property")
|
|
12410
|
+
.map((p) => p);
|
|
12411
|
+
const identifiers = properties
|
|
12412
|
+
.filter((p) => p.key.type === "Identifier")
|
|
12413
|
+
.map((p) => p.key.name);
|
|
12414
|
+
const urlFontAssetProperties = ["fontName", "url"];
|
|
12415
|
+
const propCount = identifiers.filter((i) => urlFontAssetProperties.indexOf(i) !== -1).length;
|
|
12416
|
+
if (propCount === 2) {
|
|
12417
|
+
// the object expression has the 2 properties
|
|
12418
|
+
const maybeArrayExpression = ancestors.slice(-4)[0];
|
|
12419
|
+
if (maybeArrayExpression.type === "ArrayExpression") {
|
|
12420
|
+
const maybeProperty = ancestors.slice(-5)[0];
|
|
12421
|
+
if (maybeProperty.type === "Property") {
|
|
12422
|
+
const property = maybeProperty;
|
|
12423
|
+
if (property.key.type === "Identifier" &&
|
|
12424
|
+
property.key.name == "fonts") {
|
|
12425
|
+
// property is fonts
|
|
12426
|
+
const literal = node;
|
|
12427
|
+
const originalUrlValue = literal.value;
|
|
12428
|
+
try {
|
|
12429
|
+
const hashedUrlValue = addHashToUrl(originalUrlValue, rootDir);
|
|
12430
|
+
literal.value = literal.value.replace(originalUrlValue, hashedUrlValue);
|
|
12431
|
+
literal.raw = literal.raw.replace(originalUrlValue, hashedUrlValue);
|
|
12432
|
+
addFileToFilesToBeRenamed(rootDir, originalUrlValue, hashedUrlValue, fileRenames);
|
|
12433
|
+
}
|
|
12434
|
+
catch (_b) {
|
|
12435
|
+
}
|
|
12436
|
+
}
|
|
12437
|
+
}
|
|
12438
|
+
}
|
|
12104
12439
|
}
|
|
12105
12440
|
}
|
|
12106
12441
|
}
|