npm - tag-soup-ng - Versions diffs - 0.0.1-security → 1.1.14 - Mend

tag-soup-ng 0.0.1-security → 1.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tag-soup-ng might be problematic. Click here for more details.

Files changed (26) hide show

package/LICENSE.txt +21 -0
package/README.md +283 -3
package/lib/createDomParser.d.ts +12 -0
package/lib/createDomParser.js +84 -0
package/lib/createHtmlDomParser.d.ts +21 -0
package/lib/createHtmlDomParser.js +7 -0
package/lib/createHtmlSaxParser.d.ts +29 -0
package/lib/createHtmlSaxParser.js +120 -0
package/lib/createSaxParser.d.ts +8 -0
package/lib/createSaxParser.js +124 -0
package/lib/createXmlDomParser.d.ts +21 -0
package/lib/createXmlDomParser.js +58 -0
package/lib/createXmlSaxParser.d.ts +18 -0
package/lib/createXmlSaxParser.js +28 -0
package/lib/dom-types.d.ts +116 -0
package/lib/dom-types.js +14 -0
package/lib/index-cjs.js +1 -0
package/lib/index.d.ts +8 -0
package/lib/index.js +10 -0
package/lib/parser-types.d.ts +425 -0
package/lib/parser-types.js +14 -0
package/lib/tokenize.d.ts +34 -0
package/lib/tokenize.js +409 -0
package/lib/tokens.d.ts +9 -0
package/lib/tokens.js +69 -0
package/package.json +77 -3

package/lib/tokenize.js ADDED Viewed

@@ -0,0 +1,409 @@
+import { all, char, seq, text, until } from 'tokenizer-dsl';
+// https://www.w3.org/TR/xml/#NT-S
+var isSpaceChar = function (charCode) {
+    return charCode === 32 /* ' ' */
+        || charCode === 9 /* '\t' */
+        || charCode === 13 /* '\r' */
+        || charCode === 10 /* '\n' */;
+};
+// https://www.w3.org/TR/xml/#NT-NameStartChar
+var isTagNameStartChar = function (charCode) {
+    return charCode >= 97 /* 'a' */ && charCode <= 122 /* 'z' */
+        || charCode >= 65 /* 'A' */ && charCode <= 90 /* 'Z' */
+        || charCode === 95 /* '_' */
+        || charCode === 58 /* ':' */
+        || charCode >= 0xc0 && charCode <= 0xd6
+        || charCode >= 0xd8 && charCode <= 0xf6
+        || charCode >= 0xf8 && charCode <= 0x2ff
+        || charCode >= 0x370 && charCode <= 0x37d
+        || charCode >= 0x37f && charCode <= 0x1fff
+        || charCode >= 0x200c && charCode <= 0x200d
+        || charCode >= 0x2070 && charCode <= 0x218f
+        || charCode >= 0x2c00 && charCode <= 0x2fef
+        || charCode >= 0x3001 && charCode <= 0xd7ff
+        || charCode >= 0xf900 && charCode <= 0xfdcf
+        || charCode >= 0xfdf0 && charCode <= 0xfffd
+        || charCode >= 0x10000 && charCode <= 0xeffff;
+};
+/**
+ * Check if char should be treated as a whitespace inside a tag.
+ */
+var isTagSpaceChar = function (charCode) {
+    // isSpaceChar(charCode)
+    return charCode === 32 /* ' ' */
+        || charCode === 9 /* '\t' */
+        || charCode === 13 /* '\r' */
+        || charCode === 10 /* '\n' */
+        //
+        || charCode === 47 /* '/' */;
+};
+var isNotTagNameChar = function (charCode) {
+    // isSpaceChar(charCode)
+    return charCode === 32 /* ' ' */
+        || charCode === 9 /* '\t' */
+        || charCode === 13 /* '\r' */
+        || charCode === 10 /* '\n' */
+        //
+        || charCode === 47 /* '/' */
+        || charCode === 62 /* '>' */;
+};
+var isNotAttributeNameChar = function (charCode) {
+    // isSpaceChar(charCode)
+    return charCode === 32 /* ' ' */
+        || charCode === 9 /* '\t' */
+        || charCode === 13 /* '\r' */
+        || charCode === 10 /* '\n' */
+        //
+        || charCode === 47 /* '/' */
+        || charCode === 62 /* '>' */
+        || charCode === 61 /* '=' */;
+};
+var isNotUnquotedValueChar = function (charCode) {
+    //isSpaceChar(charCode)
+    return charCode === 32 /* ' ' */
+        || charCode === 9 /* '\t' */
+        || charCode === 13 /* '\r' */
+        || charCode === 10 /* '\n' */
+        //
+        || charCode === 62 /* '>' */;
+};
+var takeText = until(text('<'));
+var takeUntilGt = until(text('>'), { inclusive: true });
+var takeTagNameStartChar = char(isTagNameStartChar);
+var takeTagNameChars = until(char(isNotTagNameChar), { openEnded: true, endOffset: 1 });
+// <…
+var takeStartTagOpening = seq(text('<'), takeTagNameStartChar, takeTagNameChars);
+// </…
+var takeEndTagOpening = seq(text('</'), takeTagNameStartChar, takeTagNameChars);
+var takeAttributeName = until(char(isNotAttributeNameChar), { openEnded: true });
+var takeTagSpace = all(char(isTagSpaceChar));
+var takeSpace = all(char(isSpaceChar));
+// =
+var takeEq = seq(takeSpace, text('='), takeSpace);
+// "…"
+var takeQuotValue = seq(text('"'), until(text('"'), { inclusive: true, openEnded: true, endOffset: 1 }));
+// '…'
+var takeAposValue = seq(text('\''), until(text('\''), { inclusive: true, openEnded: true, endOffset: 1 }));
+// okay
+var takeUnquotedValue = until(char(isNotUnquotedValueChar), { openEnded: true });
+// <!-- … -->
+var takeComment = seq(text('<!--'), until(text('-->'), { inclusive: true, openEnded: true, endOffset: 3 }));
+// <! … >
+var takeDtd = seq(text('<!'), until(text('>'), { inclusive: true, openEnded: true, endOffset: 1 }));
+// <? … ?>
+var takeProcessingInstruction = seq(text('<?'), until(text('?>'), { inclusive: true, openEnded: true, endOffset: 2 }));
+// <![CDATA[ … ]]>
+var takeCdata = seq(text('<![CDATA['), until(text(']]>'), { inclusive: true, openEnded: true, endOffset: 3 }));
+// <!DOCTYPE … >
+var takeDoctype = seq(text('<!DOCTYPE', { caseInsensitive: true }), until(text('>'), { inclusive: true, openEnded: true, endOffset: 1 }));
+/**
+ * Reads attributes from the source.
+ *
+ * @param chunk The string to read attributes from.
+ * @param index The index in `chunk` from which to start reading.
+ * @param chunkOffset The offset of the `chunk` in scope of the whole input.
+ * @param attributes An array-like object to which {@link IAttributeToken} objects are added.
+ * @param options Tokenization options.
+ * @param parserOptions Parsing options.
+ * @returns The index in `chunk` at which reading was completed.
+ */
+export function tokenizeAttributes(chunk, index, chunkOffset, attributes, options, parserOptions) {
+    var attributeTokenPool = options.attributeTokenPool;
+    var decodeAttribute = parserOptions.decodeAttribute, renameAttribute = parserOptions.renameAttribute;
+    var charCount = chunk.length;
+    var attributeCount = 0;
+    while (index < charCount) {
+        var k = takeTagSpace(chunk, index);
+        var j = takeAttributeName(chunk, k);
+        // No attributes are available
+        if (j === k) {
+            break;
+        }
+        var token = attributes[attributeCount] = attributeTokenPool.take();
+        var rawName = chunk.substring(k, j);
+        token.rawName = rawName;
+        token.name = renameAttribute != null ? renameAttribute(rawName) : rawName;
+        token.nameStart = token.start = chunkOffset + k;
+        token.nameEnd = chunkOffset + j;
+        k = j;
+        j = takeEq(chunk, k);
+        var rawValue = void 0;
+        var value = void 0;
+        var valueStart = -1;
+        var valueEnd = -1;
+        var quoted = false;
+        // Equals sign presents, so there may be a value
+        if (j !== -1 /* NO_MATCH */) {
+            k = j;
+            rawValue = value = null;
+            // Quoted value
+            j = takeQuotValue(chunk, k);
+            if (j === -1 /* NO_MATCH */) {
+                j = takeAposValue(chunk, k);
+            }
+            if (j !== -1 /* NO_MATCH */) {
+                valueStart = k + 1;
+                valueEnd = j - 1;
+                quoted = true;
+                k = Math.min(j, charCount);
+            }
+            else {
+                // Unquoted value
+                j = takeUnquotedValue(chunk, k);
+                if (j !== k) {
+                    valueStart = k;
+                    valueEnd = j;
+                    k = j;
+                }
+            }
+            if (valueStart !== -1) {
+                rawValue = chunk.substring(valueStart, valueEnd);
+                value = decodeAttribute != null ? decodeAttribute(rawValue) : rawValue;
+                valueStart += chunkOffset;
+                valueEnd += chunkOffset;
+            }
+        }
+        token.rawValue = rawValue;
+        token.value = value;
+        token.valueStart = valueStart;
+        token.valueEnd = valueEnd;
+        token.quoted = quoted;
+        token.end = chunkOffset + k;
+        ++attributeCount;
+        index = k;
+    }
+    // Clean up array-like object
+    for (var i = attributeCount; i < attributes.length; ++i) {
+        attributes[i] = undefined;
+    }
+    attributes.length = attributeCount;
+    return index;
+}
+/**
+ * Reads markup tokens from the string.
+ *
+ * **Note:** Tokenizer doesn't return allocated tokens back to pools.
+ *
+ * @param chunk The chunk of the input to read tokens from.
+ * @param streaming If set to `true` then tokenizer stops when an ambiguous char sequence is met.
+ * @param chunkOffset The offset of the `chunk` in scope of the whole input.
+ * @param options Tokenization options.
+ * @param parserOptions Parsing options.
+ * @param handler SAX handler that is notified about parsed tokens.
+ * @returns The index in `chunk` right after the last parsed character.
+ */
+export function tokenize(chunk, streaming, chunkOffset, options, parserOptions, handler) {
+    var startTagTokenPool = options.startTagTokenPool, endTagToken = options.endTagToken, dataToken = options.dataToken;
+    var cdataEnabled = parserOptions.cdataEnabled, processingInstructionsEnabled = parserOptions.processingInstructionsEnabled, selfClosingEnabled = parserOptions.selfClosingEnabled, decodeText = parserOptions.decodeText, renameTag = parserOptions.renameTag, checkCdataTag = parserOptions.checkCdataTag;
+    var startTagCallback = handler.startTag, endTagCallback = handler.endTag, textCallback = handler.text, commentCallback = handler.comment, processingInstructionCallback = handler.processingInstruction, cdataCallback = handler.cdata, doctypeCallback = handler.doctype;
+    var textStart = -1;
+    var textEnd = 0;
+    var tagParsingEnabled = true;
+    var startTagName;
+    var charCount = chunk.length;
+    var i = 0;
+    var j;
+    // This function is inlined by Terser
+    var triggerTextCallback = function () {
+        if (textStart !== -1) {
+            triggerDataCallback(chunk, chunkOffset, 3 /* TEXT */, dataToken, textCallback, textStart, textEnd, 0, 0, decodeText);
+            textStart = -1;
+        }
+    };
+    while (i < charCount) {
+        // Text
+        if (textStart === -1) {
+            var k = takeText(chunk, i);
+            if (k === -1 /* NO_MATCH */ && (k = charCount) && streaming) {
+                break;
+            }
+            if (k !== i) {
+                textStart = i;
+                textEnd = i = k;
+                continue;
+            }
+        }
+        if (tagParsingEnabled) {
+            // Start tag
+            j = takeStartTagOpening(chunk, i);
+            if (j !== -1 /* NO_MATCH */) {
+                var token = startTagTokenPool.take();
+                var attributes = token.attributes;
+                var nameStart = i + 1;
+                var nameEnd = j;
+                var rawTagName = chunk.substring(nameStart, nameEnd);
+                var tagName = renameTag != null ? renameTag(rawTagName) : rawTagName;
+                j = tokenizeAttributes(chunk, j, chunkOffset, attributes, options, parserOptions);
+                // Skip malformed content and excessive whitespaces
+                var k = takeUntilGt(chunk, j);
+                if (k === -1 /* NO_MATCH */) {
+                    // Unterminated start tag
+                    return i;
+                }
+                var selfClosing = selfClosingEnabled && k - j >= 2 && chunk.charCodeAt(k - 2) === 47 /* '/' */ || false;
+                /*@__INLINE__*/
+                triggerTextCallback();
+                token.rawName = rawTagName;
+                token.name = tagName;
+                token.selfClosing = selfClosing;
+                token.start = chunkOffset + i;
+                token.end = chunkOffset + k;
+                token.nameStart = chunkOffset + nameStart;
+                token.nameEnd = chunkOffset + nameEnd;
+                if (!selfClosing) {
+                    startTagName = tagName;
+                    tagParsingEnabled = !(checkCdataTag === null || checkCdataTag === void 0 ? void 0 : checkCdataTag(token));
+                }
+                i = k;
+                startTagCallback === null || startTagCallback === void 0 ? void 0 : startTagCallback(token);
+                // Start tag token and its attributes must be returned to the pool owner
+                continue;
+            }
+        }
+        // End tag
+        j = takeEndTagOpening(chunk, i);
+        if (j !== -1 /* NO_MATCH */) {
+            var nameStart = i + 2;
+            var nameEnd = j;
+            var rawTagName = chunk.substring(nameStart, nameEnd);
+            var tagName = renameTag != null ? renameTag(rawTagName) : rawTagName;
+            if (tagParsingEnabled || startTagName === tagName) {
+                // Resume tag parsing if CDATA content tag has ended
+                tagParsingEnabled = true;
+                // Skip malformed content and excessive whitespaces
+                var k = takeUntilGt(chunk, j);
+                if (k === -1 /* NO_MATCH */) {
+                    // Unterminated end tag
+                    return i;
+                }
+                /*@__INLINE__*/
+                triggerTextCallback();
+                if (endTagCallback) {
+                    endTagToken.rawName = rawTagName;
+                    endTagToken.name = tagName;
+                    endTagToken.start = chunkOffset + i;
+                    endTagToken.end = chunkOffset + k;
+                    endTagToken.nameStart = chunkOffset + nameStart;
+                    endTagToken.nameEnd = chunkOffset + nameEnd;
+                    endTagCallback(endTagToken);
+                }
+                i = k;
+                continue;
+            }
+        }
+        if (tagParsingEnabled) {
+            var k = void 0;
+            // Comment
+            k = j = takeComment(chunk, i);
+            if (j !== -1 /* NO_MATCH */) {
+                if (j > charCount && streaming) {
+                    return i;
+                }
+                /*@__INLINE__*/
+                triggerTextCallback();
+                i = triggerDataCallback(chunk, chunkOffset, 8 /* COMMENT */, dataToken, commentCallback, i, j, 4, 3, decodeText);
+                continue;
+            }
+            // Doctype
+            k = j = takeDoctype(chunk, i);
+            if (j !== -1 /* NO_MATCH */) {
+                if (j > charCount && streaming) {
+                    return i;
+                }
+                /*@__INLINE__*/
+                triggerTextCallback();
+                i = triggerDataCallback(chunk, chunkOffset, 10 /* DOCTYPE */, dataToken, doctypeCallback, i, j, 9, 1);
+                continue;
+            }
+            // CDATA section
+            j = takeCdata(chunk, i);
+            if (j !== -1 /* NO_MATCH */) {
+                if (j > charCount && streaming) {
+                    return i;
+                }
+                /*@__INLINE__*/
+                triggerTextCallback();
+                if (cdataEnabled) {
+                    i = triggerDataCallback(chunk, chunkOffset, 4 /* CDATA_SECTION */, dataToken, cdataCallback, i, j, 9, 3);
+                }
+                else {
+                    i = triggerDataCallback(chunk, chunkOffset, 8 /* COMMENT */, dataToken, commentCallback, i, j, 2, 1);
+                }
+                continue;
+            }
+            // Processing instruction
+            j = takeProcessingInstruction(chunk, i);
+            if (j !== -1 /* NO_MATCH */) {
+                if (j > charCount && streaming) {
+                    return i;
+                }
+                /*@__INLINE__*/
+                triggerTextCallback();
+                if (processingInstructionsEnabled) {
+                    i = triggerDataCallback(chunk, chunkOffset, 7 /* PROCESSING_INSTRUCTION */, dataToken, processingInstructionCallback, i, j, 2, 2);
+                }
+                else {
+                    i = triggerDataCallback(chunk, chunkOffset, 8 /* COMMENT */, dataToken, commentCallback, i, j, 1, 1);
+                }
+                continue;
+            }
+            // DTD
+            j = takeDtd(chunk, i);
+            if (j !== -1 /* NO_MATCH */) {
+                if (j > charCount && streaming) {
+                    return i;
+                }
+                /*@__INLINE__*/
+                triggerTextCallback();
+                if (cdataEnabled) {
+                    i = Math.min(j, charCount);
+                }
+                else {
+                    i = triggerDataCallback(chunk, chunkOffset, 8 /* COMMENT */, dataToken, commentCallback, i, j, 2, 1, decodeText);
+                }
+                continue;
+            }
+        }
+        // Concat with existing text
+        if (textStart === -1) {
+            textStart = i;
+        }
+        textEnd = takeText(chunk, i + 1);
+        if (textEnd === -1) {
+            textEnd = charCount;
+            break;
+        }
+        i = textEnd;
+    }
+    if (streaming) {
+        if (textStart !== -1) {
+            return textStart;
+        }
+        return i;
+    }
+    /*@__INLINE__*/
+    triggerTextCallback();
+    return i;
+}
+/**
+ * Populates `dataToken` and passes it to `dataCallback`.
+ */
+function triggerDataCallback(chunk, chunkOffset, tokenType, dataToken, dataCallback, start, end, offsetStart, offsetEnd, decodeData) {
+    var charCount = chunk.length;
+    var index = Math.min(end, charCount);
+    if (!dataCallback) {
+        return index;
+    }
+    var dataStart = start + offsetStart;
+    var dataEnd = Math.min(end - offsetEnd, charCount);
+    var rawData = chunk.substring(dataStart, dataEnd);
+    dataToken.tokenType = tokenType;
+    dataToken.rawData = rawData;
+    dataToken.data = decodeData != null ? decodeData(rawData) : rawData;
+    dataToken.start = chunkOffset + start;
+    dataToken.end = chunkOffset + index;
+    dataToken.dataStart = chunkOffset + dataStart;
+    dataToken.dataEnd = chunkOffset + dataEnd;
+    dataCallback(dataToken);
+    return index;
+}

package/lib/tokens.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import { IAttributeToken, IDataToken, IEndTagToken, IStartTagToken, Token } from './parser-types';
+/**
+ * Clones an arbitrary token.
+ */
+export declare function clone(this: Token): any;
+export declare function createStartTagToken(): IStartTagToken;
+export declare function createEndTagToken(): IEndTagToken;
+export declare function createDataToken(): IDataToken;
+export declare function createAttributeToken(): IAttributeToken;

package/lib/tokens.js ADDED Viewed

@@ -0,0 +1,69 @@
+import { __assign } from "tslib";
+/**
+ * Clones an arbitrary token.
+ */
+export function clone() {
+    var token = __assign({}, this);
+    if (token.tokenType === 1 /* START_TAG */) {
+        var attributes = token.attributes = __assign({}, token.attributes);
+        for (var i = 0; i < attributes.length; ++i) {
+            attributes[i] = __assign({}, attributes[i]);
+        }
+    }
+    return token;
+}
+export function createStartTagToken() {
+    return {
+        tokenType: 1 /* START_TAG */,
+        name: '',
+        rawName: '',
+        attributes: { length: 0 },
+        selfClosing: false,
+        start: 0,
+        end: 0,
+        nameStart: 0,
+        nameEnd: 0,
+        clone: clone,
+    };
+}
+export function createEndTagToken() {
+    return {
+        tokenType: 101 /* END_TAG */,
+        name: '',
+        rawName: '',
+        start: 0,
+        end: 0,
+        nameStart: 0,
+        nameEnd: 0,
+        clone: clone,
+    };
+}
+export function createDataToken() {
+    return {
+        tokenType: 3 /* TEXT */,
+        data: '',
+        rawData: '',
+        start: 0,
+        end: 0,
+        dataStart: 0,
+        dataEnd: 0,
+        clone: clone,
+    };
+}
+export function createAttributeToken() {
+    return {
+        tokenType: 2 /* ATTRIBUTE */,
+        name: '',
+        rawName: '',
+        value: '',
+        rawValue: '',
+        quoted: false,
+        start: 0,
+        end: 0,
+        nameStart: 0,
+        nameEnd: 0,
+        valueStart: 0,
+        valueEnd: 0,
+        clone: clone,
+    };
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,80 @@
 {
   "name": "tag-soup-ng",
-  "version": "0.0.1-security",
-  "description": "security holding package",
-  "repository": "npm/security-holder"
+  "version": "1.1.14",
+  "description": "The fastest pure JS SAX/DOM XML/HTML parser.",
+  "main": "./lib/index-cjs.js",
+  "module": "./lib/index.js",
+  "types": "./lib/index.d.ts",
+  "sideEffects": false,
+  "files": [
+    "lib"
+  ],
+  "scripts": {
+    "build": "tsc && rimraf './lib/CharCode.*' && npm run rollup && npm run terser",
+    "rollup": "rollup --external @smikhalevski/object-pool,speedy-entities,tokenizer-dsl,tslib --input ./lib/index.js --file ./lib/index-cjs.js --format cjs --plugin @rollup/plugin-node-resolve",
+    "terser": "terser --compress --mangle toplevel --output ./lib/index-cjs.js -- ./lib/index-cjs.js",
+    "clean": "rimraf ./lib ./docs",
+    "test": "jest --detectOpenHandles",
+    "perf": "[ -d ./lib ] || npm run build && node --expose-gc --max-old-space-size=4096 ./node_modules/.bin/toofast ./src/test/perf.js",
+    "docs": "typedoc ./src/main/index.ts",
+    "publish-docs": "[ -d ./docs ] && [[ ! $(git status --porcelain) ]] && branch=$(git rev-parse --abbrev-ref HEAD) && sha=$(git rev-parse --short HEAD) && t=$(mktemp -d) && cp -R ./docs/ $t && git checkout ghpages && ls -A | grep -wv .git | xargs rm -rf && cp -R $t/ . && git add . && git commit -m \"Updated docs ($sha)\" && git push && git checkout $branch",
+    "release-docs": "npm run clean && npm run docs && npm run publish-docs"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/zlxtesting/tag-soup-ng.git"
+  },
+  "keywords": [
+    "tiny",
+    "small",
+    "forgiving",
+    "stream",
+    "fast",
+    "sax",
+    "dom",
+    "html",
+    "xml",
+    "parser"
+  ],
+  "author": "zlxtesting",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/zlxtesting/tag-soup-ng/issues"
+  },
+  "homepage": "https://github.com/zlxtesting/tag-soup-ng#readme",
+  "jest": {
+    "preset": "ts-jest",
+    "globals": {
+      "ts-jest": {
+        "diagnostics": {
+          "ignoreCodes": [
+            151001
+          ]
+        }
+      }
+    }
+  },
+  "devDependencies": {
+    "@rollup/plugin-node-resolve": "^13.1.3",
+    "@smikhalevski/perf-test": "^1.0.0",
+    "@types/jest": "^27.4.1",
+    "htmlparser-benchmark": "^1.1.3",
+    "htmlparser2": "^7.2.0",
+    "jest": "^27.5.1",
+    "parse5": "^6.0.1",
+    "rimraf": "^3.0.2",
+    "rollup": "^2.70.1",
+    "sax": "^1.2.4",
+    "terser": "^5.12.1",
+    "toofast": "^1.0.0",
+    "ts-jest": "^27.1.3",
+    "typedoc": "^0.22.13",
+    "typescript": "^4.6.2"
+  },
+  "dependencies": {
+    "@smikhalevski/object-pool": "^1.0.0",
+    "speedy-entities": "^1.1.3",
+    "tokenizer-dsl": "^3.0.0",
+    "tslib": "^2.3.0"
+  }
 }