@markuplint/html-parser 3.13.0 → 4.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/lib/get-namespace.js +5 -9
- package/lib/index.d.ts +2 -6
- package/lib/index.js +2 -18
- package/lib/is-document-fragment.d.ts +1 -1
- package/lib/is-document-fragment.js +2 -5
- package/lib/optimize-starts-head-or-body.d.ts +9 -10
- package/lib/optimize-starts-head-or-body.js +34 -24
- package/lib/parser.d.ts +33 -0
- package/lib/parser.js +162 -0
- package/lib/types.d.ts +9 -0
- package/lib/types.js +1 -0
- package/package.json +11 -7
- package/lib/attr-tokenizer.d.ts +0 -2
- package/lib/attr-tokenizer.js +0 -84
- package/lib/create-tree.d.ts +0 -8
- package/lib/create-tree.js +0 -261
- package/lib/parse-raw-tag.d.ts +0 -17
- package/lib/parse-raw-tag.js +0 -56
- package/lib/parse.d.ts +0 -2
- package/lib/parse.js +0 -29
- package/test/attr-tokenizer.spec.js +0 -675
- package/test/get-namespace.spec.js +0 -21
- package/test/index.spec.js +0 -1246
- package/test/optimize-starts-head-or-body.spec.js +0 -25
- package/test/parse-raw-tag.spec.js +0 -483
package/LICENSE
CHANGED
package/lib/get-namespace.js
CHANGED
|
@@ -1,15 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.getNamespace = void 0;
|
|
4
|
-
const parse5_1 = require("parse5");
|
|
1
|
+
import { parse, parseFragment } from 'parse5';
|
|
5
2
|
const DEFAULT_NAMESPACE = 'http://www.w3.org/1999/xhtml';
|
|
6
|
-
function getNamespace(tagName, parentNamespace = DEFAULT_NAMESPACE) {
|
|
3
|
+
export function getNamespace(tagName, parentNamespace = DEFAULT_NAMESPACE) {
|
|
7
4
|
switch (parentNamespace) {
|
|
8
5
|
case 'http://www.w3.org/2000/svg':
|
|
9
6
|
case 'http://www.w3.org/1998/Math/MathML': {
|
|
10
7
|
const parent = parentNamespace === 'http://www.w3.org/2000/svg' ? 'svg' : 'math';
|
|
11
8
|
const tag = `<${parent}><${tagName}></${parent}>`;
|
|
12
|
-
const frag =
|
|
9
|
+
const frag = parseFragment(tag);
|
|
13
10
|
const node = frag.childNodes[0];
|
|
14
11
|
if (!node) {
|
|
15
12
|
return DEFAULT_NAMESPACE;
|
|
@@ -21,10 +18,10 @@ function getNamespace(tagName, parentNamespace = DEFAULT_NAMESPACE) {
|
|
|
21
18
|
}
|
|
22
19
|
}
|
|
23
20
|
const tag = `<${tagName}>`;
|
|
24
|
-
const frag =
|
|
21
|
+
const frag = parseFragment(tag);
|
|
25
22
|
let node = frag.childNodes[0];
|
|
26
23
|
if (!node) {
|
|
27
|
-
const doc =
|
|
24
|
+
const doc = parse(tag);
|
|
28
25
|
node = doc.childNodes[0];
|
|
29
26
|
}
|
|
30
27
|
if (node && 'namespaceURI' in node) {
|
|
@@ -32,4 +29,3 @@ function getNamespace(tagName, parentNamespace = DEFAULT_NAMESPACE) {
|
|
|
32
29
|
}
|
|
33
30
|
return DEFAULT_NAMESPACE;
|
|
34
31
|
}
|
|
35
|
-
exports.getNamespace = getNamespace;
|
package/lib/index.d.ts
CHANGED
|
@@ -1,6 +1,2 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export {
|
|
3
|
-
export { default as parseRawTag } from './parse-raw-tag';
|
|
4
|
-
export { getNamespace } from './get-namespace';
|
|
5
|
-
export { parse } from './parse';
|
|
6
|
-
export { createTree } from './create-tree';
|
|
1
|
+
export { getNamespace } from './get-namespace.js';
|
|
2
|
+
export { parser, HtmlParser } from './parser.js';
|
package/lib/index.js
CHANGED
|
@@ -1,18 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.createTree = exports.parse = exports.getNamespace = exports.parseRawTag = exports.isDocumentFragment = exports.attrTokenizer = void 0;
|
|
7
|
-
var attr_tokenizer_1 = require("./attr-tokenizer");
|
|
8
|
-
Object.defineProperty(exports, "attrTokenizer", { enumerable: true, get: function () { return __importDefault(attr_tokenizer_1).default; } });
|
|
9
|
-
var is_document_fragment_1 = require("./is-document-fragment");
|
|
10
|
-
Object.defineProperty(exports, "isDocumentFragment", { enumerable: true, get: function () { return __importDefault(is_document_fragment_1).default; } });
|
|
11
|
-
var parse_raw_tag_1 = require("./parse-raw-tag");
|
|
12
|
-
Object.defineProperty(exports, "parseRawTag", { enumerable: true, get: function () { return __importDefault(parse_raw_tag_1).default; } });
|
|
13
|
-
var get_namespace_1 = require("./get-namespace");
|
|
14
|
-
Object.defineProperty(exports, "getNamespace", { enumerable: true, get: function () { return get_namespace_1.getNamespace; } });
|
|
15
|
-
var parse_1 = require("./parse");
|
|
16
|
-
Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return parse_1.parse; } });
|
|
17
|
-
var create_tree_1 = require("./create-tree");
|
|
18
|
-
Object.defineProperty(exports, "createTree", { enumerable: true, get: function () { return create_tree_1.createTree; } });
|
|
1
|
+
export { getNamespace } from './get-namespace.js';
|
|
2
|
+
export { parser, HtmlParser } from './parser.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export
|
|
1
|
+
export declare function isDocumentFragment(html: string): boolean;
|
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
function isDocumentFragment(html) {
|
|
4
|
-
return !/^\s*(<!doctype html(?:\s*.+)?>|<html(?:\s|>))/im.test(html);
|
|
1
|
+
export function isDocumentFragment(html) {
|
|
2
|
+
return !/^\s*(?:<!doctype html(?:\s*(?:\S.*|[\t\v\f \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\uFEFF]))?>|<html[\s>])/im.test(html);
|
|
5
3
|
}
|
|
6
|
-
exports.default = isDocumentFragment;
|
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
3
|
-
export
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
1
|
+
import type { HtmlParser } from './parser.js';
|
|
2
|
+
import type { MLASTNodeTreeItem } from '@markuplint/ml-ast';
|
|
3
|
+
export type Replacements = {
|
|
4
|
+
readonly code: string;
|
|
5
|
+
readonly heads: readonly string[];
|
|
6
|
+
readonly bodies: readonly string[];
|
|
7
7
|
};
|
|
8
|
-
export declare function
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
): void;
|
|
8
|
+
export declare function isStartsHeadTagOrBodyTag(rawCode: string): boolean;
|
|
9
|
+
export declare function optimizeStartsHeadTagOrBodyTagSetup(rawCode: string): Replacements;
|
|
10
|
+
export declare function optimizeStartsHeadTagOrBodyTagResume(parser: HtmlParser, nodeList: readonly MLASTNodeTreeItem[], replacements: Replacements): readonly MLASTNodeTreeItem[];
|
|
@@ -1,17 +1,18 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.optimizeStartsHeadTagOrBodyTagResume = exports.optimizeStartsHeadTagOrBodyTagSetup = exports.isStartsHeadTagOrBodyTag = void 0;
|
|
4
1
|
const UNDUPLICATED_CHAR = '\uFFFD';
|
|
5
|
-
function isStartsHeadTagOrBodyTag(rawCode) {
|
|
6
|
-
return /^\s*<(?:head|body)
|
|
2
|
+
export function isStartsHeadTagOrBodyTag(rawCode) {
|
|
3
|
+
return /^\s*<(?:head|body)[\s>]/i.test(rawCode);
|
|
7
4
|
}
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
6
|
+
if (!isStartsHeadTagOrBodyTag(rawCode)) {
|
|
7
|
+
return {
|
|
8
|
+
code: rawCode,
|
|
9
|
+
heads: [],
|
|
10
|
+
bodies: [],
|
|
11
|
+
};
|
|
12
|
+
}
|
|
10
13
|
const heads = [];
|
|
11
14
|
const bodies = [];
|
|
12
|
-
const code = rawCode.
|
|
13
|
-
// eslint-disable-next-line no-control-regex
|
|
14
|
-
/(?<=<\/?)(?:head|body)(?=\u0009|\u000A|\u000C|\u0020|\/|>|\u0000)/gi, tag => {
|
|
15
|
+
const code = rawCode.replaceAll(/(?<=<\/?)(?:head|body)(?=[\0\t\n\f />])/gi, tag => {
|
|
15
16
|
const prefix = `x-${UNDUPLICATED_CHAR}`;
|
|
16
17
|
let name;
|
|
17
18
|
if (/^head$/i.test(tag)) {
|
|
@@ -33,25 +34,34 @@ function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
|
33
34
|
bodies,
|
|
34
35
|
};
|
|
35
36
|
}
|
|
36
|
-
|
|
37
|
-
function optimizeStartsHeadTagOrBodyTagResume(
|
|
37
|
+
export function optimizeStartsHeadTagOrBodyTagResume(
|
|
38
38
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
39
|
-
nodeList,
|
|
40
|
-
|
|
41
|
-
replacements
|
|
42
|
-
|
|
39
|
+
parser, nodeList, replacements) {
|
|
40
|
+
const heads = [...replacements.heads];
|
|
41
|
+
const bodies = [...replacements.bodies];
|
|
42
|
+
for (const node of nodeList) {
|
|
43
43
|
if (!node.nodeName.startsWith(`x-${UNDUPLICATED_CHAR}`)) {
|
|
44
|
-
|
|
44
|
+
continue;
|
|
45
45
|
}
|
|
46
|
-
const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ?
|
|
46
|
+
const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ? heads.shift() : bodies.shift();
|
|
47
47
|
if (!realName) {
|
|
48
|
-
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
if (node.type !== 'starttag' && node.type !== 'endtag') {
|
|
51
|
+
continue;
|
|
49
52
|
}
|
|
50
|
-
node
|
|
51
|
-
node.nodeName = realName;
|
|
53
|
+
parser.updateRaw(node, node.raw.replace(node.nodeName, realName));
|
|
52
54
|
if (node.type === 'starttag') {
|
|
53
|
-
node
|
|
55
|
+
parser.updateElement(node, {
|
|
56
|
+
nodeName: realName,
|
|
57
|
+
elementType: 'html',
|
|
58
|
+
});
|
|
59
|
+
continue;
|
|
54
60
|
}
|
|
55
|
-
|
|
61
|
+
parser.updateElement(node, {
|
|
62
|
+
nodeName: realName,
|
|
63
|
+
});
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
return nodeList;
|
|
56
67
|
}
|
|
57
|
-
exports.optimizeStartsHeadTagOrBodyTagResume = optimizeStartsHeadTagOrBodyTagResume;
|
package/lib/parser.d.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { Replacements } from './optimize-starts-head-or-body.js';
|
|
2
|
+
import type { Node } from './types.js';
|
|
3
|
+
import type { MLASTNodeTreeItem, MLASTParentNode } from '@markuplint/ml-ast';
|
|
4
|
+
import type { ChildToken, ParseOptions, ParserOptions } from '@markuplint/parser-utils';
|
|
5
|
+
import { Parser } from '@markuplint/parser-utils';
|
|
6
|
+
type State = {
|
|
7
|
+
startsHeadTagOrBodyTag: Replacements | null;
|
|
8
|
+
afterPosition: {
|
|
9
|
+
endOffset: number;
|
|
10
|
+
endLine: number;
|
|
11
|
+
endCol: number;
|
|
12
|
+
depth: number;
|
|
13
|
+
};
|
|
14
|
+
};
|
|
15
|
+
type ExtendsOptions = Pick<ParserOptions, 'ignoreTags' | 'maskChar'>;
|
|
16
|
+
export declare class HtmlParser extends Parser<Node, State> {
|
|
17
|
+
constructor(options?: ExtendsOptions);
|
|
18
|
+
tokenize(): {
|
|
19
|
+
ast: import("node_modules/parse5/dist/tree-adapters/default.js").ChildNode[];
|
|
20
|
+
isFragment: boolean;
|
|
21
|
+
};
|
|
22
|
+
beforeParse(rawCode: string, options?: ParseOptions): string;
|
|
23
|
+
afterParse(nodeList: readonly MLASTNodeTreeItem[], options?: ParseOptions): readonly MLASTNodeTreeItem[];
|
|
24
|
+
nodeize(originNode: Node, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[];
|
|
25
|
+
afterNodeize(siblings: readonly MLASTNodeTreeItem[], parentNode: MLASTParentNode | null, depth: number): {
|
|
26
|
+
siblings: import("@markuplint/ml-ast").MLASTChildNode[];
|
|
27
|
+
ancestors: MLASTNodeTreeItem[];
|
|
28
|
+
};
|
|
29
|
+
visitText(token: ChildToken): readonly MLASTNodeTreeItem[];
|
|
30
|
+
visitSpreadAttr(): null;
|
|
31
|
+
}
|
|
32
|
+
export declare const parser: HtmlParser;
|
|
33
|
+
export {};
|
package/lib/parser.js
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { Parser } from '@markuplint/parser-utils';
|
|
2
|
+
import { parse, parseFragment } from 'parse5';
|
|
3
|
+
import { isDocumentFragment } from './is-document-fragment.js';
|
|
4
|
+
import { optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
|
|
5
|
+
export class HtmlParser extends Parser {
|
|
6
|
+
constructor(options) {
|
|
7
|
+
super(options, {
|
|
8
|
+
startsHeadTagOrBodyTag: null,
|
|
9
|
+
afterPosition: {
|
|
10
|
+
endOffset: 0,
|
|
11
|
+
endLine: 1,
|
|
12
|
+
endCol: 1,
|
|
13
|
+
depth: 0,
|
|
14
|
+
},
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
tokenize() {
|
|
18
|
+
const isFragment = isDocumentFragment(this.rawCode);
|
|
19
|
+
const parseFn = isFragment ? parseFragment : parse;
|
|
20
|
+
const doc = parseFn(this.rawCode, {
|
|
21
|
+
scriptingEnabled: false,
|
|
22
|
+
sourceCodeLocationInfo: true,
|
|
23
|
+
});
|
|
24
|
+
const childNodes = doc.childNodes;
|
|
25
|
+
return {
|
|
26
|
+
ast: childNodes,
|
|
27
|
+
isFragment,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
beforeParse(rawCode, options) {
|
|
31
|
+
rawCode = super.beforeParse(rawCode, options);
|
|
32
|
+
const replacements = optimizeStartsHeadTagOrBodyTagSetup(rawCode);
|
|
33
|
+
if (replacements?.code) {
|
|
34
|
+
this.state.startsHeadTagOrBodyTag = replacements;
|
|
35
|
+
return replacements.code;
|
|
36
|
+
}
|
|
37
|
+
this.state.afterPosition = {
|
|
38
|
+
endOffset: (options?.offsetOffset ?? 0) + this.state.afterPosition.endOffset,
|
|
39
|
+
endLine: (options?.offsetLine ?? 0) + this.state.afterPosition.endLine,
|
|
40
|
+
endCol: (options?.offsetColumn ?? 0) + this.state.afterPosition.endCol,
|
|
41
|
+
depth: this.state.afterPosition.depth,
|
|
42
|
+
};
|
|
43
|
+
return rawCode;
|
|
44
|
+
}
|
|
45
|
+
afterParse(nodeList, options) {
|
|
46
|
+
nodeList = super.afterParse(nodeList, options);
|
|
47
|
+
if (this.state.startsHeadTagOrBodyTag) {
|
|
48
|
+
return optimizeStartsHeadTagOrBodyTagResume(this, nodeList, this.state.startsHeadTagOrBodyTag);
|
|
49
|
+
}
|
|
50
|
+
return nodeList;
|
|
51
|
+
}
|
|
52
|
+
nodeize(
|
|
53
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
54
|
+
originNode, parentNode, depth) {
|
|
55
|
+
const namespace = 'namespaceURI' in originNode ? originNode.namespaceURI : '';
|
|
56
|
+
const location = originNode.sourceCodeLocation;
|
|
57
|
+
if (!location) {
|
|
58
|
+
// Ghost element
|
|
59
|
+
const afterNode = this.state.afterPosition.depth === depth ? this.state.afterPosition : parentNode;
|
|
60
|
+
const startOffset = afterNode?.endOffset ?? 0;
|
|
61
|
+
const startLine = afterNode?.endLine ?? 0;
|
|
62
|
+
const startCol = afterNode?.endCol ?? 0;
|
|
63
|
+
const childNodes = 'childNodes' in originNode ? originNode.childNodes : [];
|
|
64
|
+
return this.visitElement({
|
|
65
|
+
raw: '',
|
|
66
|
+
startOffset,
|
|
67
|
+
startLine,
|
|
68
|
+
startCol,
|
|
69
|
+
depth,
|
|
70
|
+
parentNode,
|
|
71
|
+
nodeName: originNode.nodeName,
|
|
72
|
+
namespace,
|
|
73
|
+
}, childNodes);
|
|
74
|
+
}
|
|
75
|
+
const { startOffset, endOffset } = location;
|
|
76
|
+
const token = this.sliceFragment(startOffset, endOffset ?? startOffset);
|
|
77
|
+
switch (originNode.nodeName) {
|
|
78
|
+
case '#documentType': {
|
|
79
|
+
if (!('name' in originNode)) {
|
|
80
|
+
throw new TypeError("DocumentType doesn't have name");
|
|
81
|
+
}
|
|
82
|
+
return this.visitDoctype({
|
|
83
|
+
...token,
|
|
84
|
+
depth,
|
|
85
|
+
name: originNode.name ?? '',
|
|
86
|
+
publicId: originNode.publicId ?? '',
|
|
87
|
+
systemId: originNode.systemId ?? '',
|
|
88
|
+
parentNode,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
case '#text': {
|
|
92
|
+
return this.visitText({
|
|
93
|
+
...token,
|
|
94
|
+
depth,
|
|
95
|
+
parentNode,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
case '#comment': {
|
|
99
|
+
return this.visitComment({
|
|
100
|
+
...token,
|
|
101
|
+
depth,
|
|
102
|
+
parentNode,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
default: {
|
|
106
|
+
const tagLoc = 'startTag' in location ? location.startTag : null;
|
|
107
|
+
const offset = tagLoc?.startOffset ?? startOffset;
|
|
108
|
+
const endOffset = tagLoc?.endOffset ?? offset;
|
|
109
|
+
const startTagToken = this.sliceFragment(offset, endOffset);
|
|
110
|
+
const childNodes = 'childNodes' in originNode
|
|
111
|
+
? originNode.nodeName === 'template' && 'content' in originNode
|
|
112
|
+
? originNode.content.childNodes
|
|
113
|
+
: originNode.childNodes
|
|
114
|
+
: [];
|
|
115
|
+
return this.visitElement({
|
|
116
|
+
...startTagToken,
|
|
117
|
+
depth,
|
|
118
|
+
parentNode,
|
|
119
|
+
nodeName: originNode.nodeName,
|
|
120
|
+
namespace,
|
|
121
|
+
}, childNodes, {
|
|
122
|
+
createEndTagToken: () => {
|
|
123
|
+
const endTagLoc = 'endTag' in location ? location.endTag : null;
|
|
124
|
+
if (!endTagLoc) {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
const { startOffset, endOffset } = endTagLoc;
|
|
128
|
+
const endTagToken = this.sliceFragment(startOffset, endOffset);
|
|
129
|
+
return {
|
|
130
|
+
...endTagToken,
|
|
131
|
+
depth,
|
|
132
|
+
parentNode,
|
|
133
|
+
};
|
|
134
|
+
},
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
afterNodeize(siblings, parentNode, depth) {
|
|
140
|
+
const after = super.afterNodeize(siblings, parentNode, depth);
|
|
141
|
+
const prevNode = after.siblings.at(-1) ?? after.ancestors.findLast(n => n.depth === depth);
|
|
142
|
+
if (prevNode) {
|
|
143
|
+
this.state.afterPosition = {
|
|
144
|
+
endOffset: prevNode.endOffset,
|
|
145
|
+
endLine: prevNode.endLine,
|
|
146
|
+
endCol: prevNode.endCol,
|
|
147
|
+
depth,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
return after;
|
|
151
|
+
}
|
|
152
|
+
visitText(token) {
|
|
153
|
+
return super.visitText(token, {
|
|
154
|
+
researchTags: true,
|
|
155
|
+
invalidTagAsText: true,
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
visitSpreadAttr() {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
export const parser = new HtmlParser();
|
package/lib/types.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type { ParserOptions, DefaultTreeAdapterMap } from 'parse5';
|
|
2
|
+
import type { DefaultTreeAdapterMap } from 'parse5';
|
|
3
|
+
export type Node = DefaultTreeAdapterMap['node'];
|
|
4
|
+
export type TextNode = DefaultTreeAdapterMap['textNode'];
|
|
5
|
+
export type Element = DefaultTreeAdapterMap['element'];
|
|
6
|
+
export type CommentNode = DefaultTreeAdapterMap['commentNode'];
|
|
7
|
+
export type Document = DefaultTreeAdapterMap['document'];
|
|
8
|
+
export type DocumentFragment = DefaultTreeAdapterMap['documentFragment'];
|
|
9
|
+
export type ChildNode = DefaultTreeAdapterMap['childNode'];
|
package/lib/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/html-parser",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "4.0.0-alpha.10",
|
|
4
4
|
"description": "HTML parser for markuplint",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
7
7
|
"license": "MIT",
|
|
8
8
|
"private": false,
|
|
9
|
-
"
|
|
9
|
+
"type": "module",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"import": "./lib/index.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
10
15
|
"types": "lib/index.d.ts",
|
|
11
16
|
"publishConfig": {
|
|
12
17
|
"access": "public"
|
|
@@ -20,11 +25,10 @@
|
|
|
20
25
|
"clean": "tsc --build --clean"
|
|
21
26
|
},
|
|
22
27
|
"dependencies": {
|
|
23
|
-
"@markuplint/ml-ast": "
|
|
24
|
-
"@markuplint/parser-utils": "
|
|
28
|
+
"@markuplint/ml-ast": "4.0.0-alpha.10",
|
|
29
|
+
"@markuplint/parser-utils": "4.0.0-alpha.10",
|
|
25
30
|
"parse5": "7.1.2",
|
|
26
|
-
"
|
|
27
|
-
"type-fest": "^4.8.2"
|
|
31
|
+
"type-fest": "^4.10.1"
|
|
28
32
|
},
|
|
29
|
-
"gitHead": "
|
|
33
|
+
"gitHead": "b41153ea665aa8f091daf6114a06047f4ccb8350"
|
|
30
34
|
}
|
package/lib/attr-tokenizer.d.ts
DELETED
package/lib/attr-tokenizer.js
DELETED
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const parser_utils_1 = require("@markuplint/parser-utils");
|
|
4
|
-
const reAttrsInStartTag =
|
|
5
|
-
// eslint-disable-next-line no-control-regex
|
|
6
|
-
/(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/;
|
|
7
|
-
function attrTokenizer(raw, line, col, startOffset) {
|
|
8
|
-
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
9
|
-
const attrMatchedMap = raw.match(reAttrsInStartTag);
|
|
10
|
-
if (!attrMatchedMap) {
|
|
11
|
-
throw new SyntaxError('Illegal attribute token');
|
|
12
|
-
}
|
|
13
|
-
const spacesBeforeAttrString = (_a = attrMatchedMap[1]) !== null && _a !== void 0 ? _a : '';
|
|
14
|
-
const nameChars = (_b = attrMatchedMap[2]) !== null && _b !== void 0 ? _b : '';
|
|
15
|
-
const spacesBeforeEqualChars = (_c = attrMatchedMap[3]) !== null && _c !== void 0 ? _c : '';
|
|
16
|
-
const equalChars = (_d = attrMatchedMap[4]) !== null && _d !== void 0 ? _d : null;
|
|
17
|
-
const spacesAfterEqualChars = (_e = attrMatchedMap[5]) !== null && _e !== void 0 ? _e : '';
|
|
18
|
-
const quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null;
|
|
19
|
-
const valueChars = (_h = (_g = (_f = attrMatchedMap[6]) !== null && _f !== void 0 ? _f : attrMatchedMap[7]) !== null && _g !== void 0 ? _g : attrMatchedMap[8]) !== null && _h !== void 0 ? _h : (quoteChars ? '' : null);
|
|
20
|
-
let offset = startOffset;
|
|
21
|
-
const spacesBeforeName = (0, parser_utils_1.tokenizer)(spacesBeforeAttrString, line, col, offset);
|
|
22
|
-
line = spacesBeforeName.endLine;
|
|
23
|
-
col = spacesBeforeName.endCol;
|
|
24
|
-
offset = spacesBeforeName.endOffset;
|
|
25
|
-
const name = (0, parser_utils_1.tokenizer)(nameChars, line, col, offset);
|
|
26
|
-
line = name.endLine;
|
|
27
|
-
col = name.endCol;
|
|
28
|
-
offset = name.endOffset;
|
|
29
|
-
const spacesBeforeEqual = (0, parser_utils_1.tokenizer)(spacesBeforeEqualChars, line, col, offset);
|
|
30
|
-
line = spacesBeforeEqual.endLine;
|
|
31
|
-
col = spacesBeforeEqual.endCol;
|
|
32
|
-
offset = spacesBeforeEqual.endOffset;
|
|
33
|
-
const equal = (0, parser_utils_1.tokenizer)(equalChars, line, col, offset);
|
|
34
|
-
line = equal.endLine;
|
|
35
|
-
col = equal.endCol;
|
|
36
|
-
offset = equal.endOffset;
|
|
37
|
-
const spacesAfterEqual = (0, parser_utils_1.tokenizer)(spacesAfterEqualChars, line, col, offset);
|
|
38
|
-
line = spacesAfterEqual.endLine;
|
|
39
|
-
col = spacesAfterEqual.endCol;
|
|
40
|
-
offset = spacesAfterEqual.endOffset;
|
|
41
|
-
const startQuote = (0, parser_utils_1.tokenizer)(quoteChars, line, col, offset);
|
|
42
|
-
line = startQuote.endLine;
|
|
43
|
-
col = startQuote.endCol;
|
|
44
|
-
offset = startQuote.endOffset;
|
|
45
|
-
const value = (0, parser_utils_1.tokenizer)(valueChars, line, col, offset);
|
|
46
|
-
line = value.endLine;
|
|
47
|
-
col = value.endCol;
|
|
48
|
-
offset = value.endOffset;
|
|
49
|
-
const endQuote = (0, parser_utils_1.tokenizer)(quoteChars, line, col, offset);
|
|
50
|
-
const attrToken = (0, parser_utils_1.tokenizer)(nameChars +
|
|
51
|
-
spacesBeforeEqualChars +
|
|
52
|
-
(equalChars !== null && equalChars !== void 0 ? equalChars : '') +
|
|
53
|
-
spacesAfterEqualChars +
|
|
54
|
-
(quoteChars !== null && quoteChars !== void 0 ? quoteChars : '') +
|
|
55
|
-
(valueChars !== null && valueChars !== void 0 ? valueChars : '') +
|
|
56
|
-
(quoteChars !== null && quoteChars !== void 0 ? quoteChars : ''), name.startLine, name.startCol, name.startOffset);
|
|
57
|
-
return {
|
|
58
|
-
type: 'html-attr',
|
|
59
|
-
uuid: (0, parser_utils_1.uuid)(),
|
|
60
|
-
raw: attrToken.raw,
|
|
61
|
-
startOffset: attrToken.startOffset,
|
|
62
|
-
endOffset: attrToken.endOffset,
|
|
63
|
-
startLine: attrToken.startLine,
|
|
64
|
-
endLine: attrToken.endLine,
|
|
65
|
-
startCol: attrToken.startCol,
|
|
66
|
-
endCol: attrToken.endCol,
|
|
67
|
-
spacesBeforeName,
|
|
68
|
-
name,
|
|
69
|
-
spacesBeforeEqual,
|
|
70
|
-
equal,
|
|
71
|
-
spacesAfterEqual,
|
|
72
|
-
startQuote,
|
|
73
|
-
value,
|
|
74
|
-
endQuote,
|
|
75
|
-
isDuplicatable: false,
|
|
76
|
-
nodeName: name.raw,
|
|
77
|
-
parentNode: null,
|
|
78
|
-
prevNode: null,
|
|
79
|
-
nextNode: null,
|
|
80
|
-
isFragment: false,
|
|
81
|
-
isGhost: false,
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
exports.default = attrTokenizer;
|