@markuplint/html-parser 4.0.0-dev.28 → 4.0.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2017-2019 Yusuke Hirao
3
+ Copyright (c) 2017-2024 Yusuke Hirao
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
package/lib/index.d.ts CHANGED
@@ -1,6 +1,2 @@
1
- export { default as attrTokenizer } from './attr-tokenizer.js';
2
- export { default as isDocumentFragment } from './is-document-fragment.js';
3
- export { default as parseRawTag } from './parse-raw-tag.js';
4
1
  export { getNamespace } from './get-namespace.js';
5
- export { parse } from './parse.js';
6
- export { createTree } from './create-tree.js';
2
+ export { parser, HtmlParser } from './parser.js';
package/lib/index.js CHANGED
@@ -1,6 +1,2 @@
1
- export { default as attrTokenizer } from './attr-tokenizer.js';
2
- export { default as isDocumentFragment } from './is-document-fragment.js';
3
- export { default as parseRawTag } from './parse-raw-tag.js';
4
1
  export { getNamespace } from './get-namespace.js';
5
- export { parse } from './parse.js';
6
- export { createTree } from './create-tree.js';
2
+ export { parser, HtmlParser } from './parser.js';
@@ -1 +1 @@
1
- export default function isDocumentFragment(html: string): boolean;
1
+ export declare function isDocumentFragment(html: string): boolean;
@@ -1,3 +1,3 @@
1
- export default function isDocumentFragment(html) {
2
- return !/^\s*(<!doctype html(?:\s*.+)?>|<html[\s>])/im.test(html);
1
+ export function isDocumentFragment(html) {
2
+ return !/^\s*(?:<!doctype html(?:\s*(?:\S.*|[\t\v\f \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\uFEFF]))?>|<html[\s>])/im.test(html);
3
3
  }
@@ -1,8 +1,10 @@
1
- import type { MLASTNode } from '@markuplint/ml-ast';
2
- export declare function isStartsHeadTagOrBodyTag(rawCode: string): boolean;
3
- export declare function optimizeStartsHeadTagOrBodyTagSetup(rawCode: string): {
4
- code: string;
5
- heads: string[];
6
- bodies: string[];
1
+ import type { HtmlParser } from './parser.js';
2
+ import type { MLASTNodeTreeItem } from '@markuplint/ml-ast';
3
+ export type Replacements = {
4
+ readonly code: string;
5
+ readonly heads: readonly string[];
6
+ readonly bodies: readonly string[];
7
7
  };
8
- export declare function optimizeStartsHeadTagOrBodyTagResume(nodeList: MLASTNode[], replacements: ReturnType<typeof optimizeStartsHeadTagOrBodyTagSetup>): void;
8
+ export declare function isStartsHeadTagOrBodyTag(rawCode: string): boolean;
9
+ export declare function optimizeStartsHeadTagOrBodyTagSetup(rawCode: string): Replacements;
10
+ export declare function optimizeStartsHeadTagOrBodyTagResume(parser: HtmlParser, nodeList: readonly MLASTNodeTreeItem[], replacements: Replacements): readonly MLASTNodeTreeItem[];
@@ -1,13 +1,18 @@
1
1
  const UNDUPLICATED_CHAR = '\uFFFD';
2
2
  export function isStartsHeadTagOrBodyTag(rawCode) {
3
- return /^\s*<(?:head|body)>/i.test(rawCode);
3
+ return /^\s*<(?:head|body)[\s>]/i.test(rawCode);
4
4
  }
5
5
  export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
6
+ if (!isStartsHeadTagOrBodyTag(rawCode)) {
7
+ return {
8
+ code: rawCode,
9
+ heads: [],
10
+ bodies: [],
11
+ };
12
+ }
6
13
  const heads = [];
7
14
  const bodies = [];
8
- const code = rawCode.replaceAll(
9
- // eslint-disable-next-line no-control-regex
10
- /(?<=<\/?)(?:head|body)(?=\u0009|\u000A|\u000C| |\/|>|\u0000)/gi, tag => {
15
+ const code = rawCode.replaceAll(/(?<=<\/?)(?:head|body)(?=[\0\t\n\f />])/gi, tag => {
11
16
  const prefix = `x-${UNDUPLICATED_CHAR}`;
12
17
  let name;
13
18
  if (/^head$/i.test(tag)) {
@@ -31,21 +36,32 @@ export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
31
36
  }
32
37
  export function optimizeStartsHeadTagOrBodyTagResume(
33
38
  // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
34
- nodeList,
35
- // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
36
- replacements) {
39
+ parser, nodeList, replacements) {
40
+ const heads = [...replacements.heads];
41
+ const bodies = [...replacements.bodies];
37
42
  for (const node of nodeList) {
38
43
  if (!node.nodeName.startsWith(`x-${UNDUPLICATED_CHAR}`)) {
39
44
  continue;
40
45
  }
41
- const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ? replacements.heads.shift() : replacements.bodies.shift();
46
+ const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ? heads.shift() : bodies.shift();
42
47
  if (!realName) {
43
48
  continue;
44
49
  }
45
- node.raw = node.raw.replace(node.nodeName, realName);
46
- node.nodeName = realName;
50
+ if (node.type !== 'starttag' && node.type !== 'endtag') {
51
+ continue;
52
+ }
53
+ parser.updateRaw(node, node.raw.replace(node.nodeName, realName));
47
54
  if (node.type === 'starttag') {
48
- node.elementType = 'html';
55
+ parser.updateElement(node, {
56
+ nodeName: realName,
57
+ elementType: 'html',
58
+ });
59
+ continue;
49
60
  }
61
+ parser.updateElement(node, {
62
+ nodeName: realName,
63
+ });
64
+ continue;
50
65
  }
66
+ return nodeList;
51
67
  }
@@ -0,0 +1,33 @@
1
+ import type { Replacements } from './optimize-starts-head-or-body.js';
2
+ import type { Node } from './types.js';
3
+ import type { MLASTNodeTreeItem, MLASTParentNode } from '@markuplint/ml-ast';
4
+ import type { ChildToken, ParseOptions, ParserOptions } from '@markuplint/parser-utils';
5
+ import { Parser } from '@markuplint/parser-utils';
6
+ type State = {
7
+ startsHeadTagOrBodyTag: Replacements | null;
8
+ afterPosition: {
9
+ endOffset: number;
10
+ endLine: number;
11
+ endCol: number;
12
+ depth: number;
13
+ };
14
+ };
15
+ type ExtendsOptions = Pick<ParserOptions, 'ignoreTags' | 'maskChar'>;
16
+ export declare class HtmlParser extends Parser<Node, State> {
17
+ constructor(options?: ExtendsOptions);
18
+ tokenize(): {
19
+ ast: import("node_modules/parse5/dist/tree-adapters/default.js").ChildNode[];
20
+ isFragment: boolean;
21
+ };
22
+ beforeParse(rawCode: string, options?: ParseOptions): string;
23
+ afterParse(nodeList: readonly MLASTNodeTreeItem[], options?: ParseOptions): readonly MLASTNodeTreeItem[];
24
+ nodeize(originNode: Node, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[];
25
+ afterNodeize(siblings: readonly MLASTNodeTreeItem[], parentNode: MLASTParentNode | null, depth: number): {
26
+ siblings: import("@markuplint/ml-ast").MLASTChildNode[];
27
+ ancestors: MLASTNodeTreeItem[];
28
+ };
29
+ visitText(token: ChildToken): readonly MLASTNodeTreeItem[];
30
+ visitSpreadAttr(): null;
31
+ }
32
+ export declare const parser: HtmlParser;
33
+ export {};
package/lib/parser.js ADDED
@@ -0,0 +1,162 @@
1
+ import { Parser } from '@markuplint/parser-utils';
2
+ import { parse, parseFragment } from 'parse5';
3
+ import { isDocumentFragment } from './is-document-fragment.js';
4
+ import { optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
5
+ export class HtmlParser extends Parser {
6
+ constructor(options) {
7
+ super(options, {
8
+ startsHeadTagOrBodyTag: null,
9
+ afterPosition: {
10
+ endOffset: 0,
11
+ endLine: 1,
12
+ endCol: 1,
13
+ depth: 0,
14
+ },
15
+ });
16
+ }
17
+ tokenize() {
18
+ const isFragment = isDocumentFragment(this.rawCode);
19
+ const parseFn = isFragment ? parseFragment : parse;
20
+ const doc = parseFn(this.rawCode, {
21
+ scriptingEnabled: false,
22
+ sourceCodeLocationInfo: true,
23
+ });
24
+ const childNodes = doc.childNodes;
25
+ return {
26
+ ast: childNodes,
27
+ isFragment,
28
+ };
29
+ }
30
+ beforeParse(rawCode, options) {
31
+ rawCode = super.beforeParse(rawCode, options);
32
+ const replacements = optimizeStartsHeadTagOrBodyTagSetup(rawCode);
33
+ if (replacements?.code) {
34
+ this.state.startsHeadTagOrBodyTag = replacements;
35
+ return replacements.code;
36
+ }
37
+ this.state.afterPosition = {
38
+ endOffset: (options?.offsetOffset ?? 0) + this.state.afterPosition.endOffset,
39
+ endLine: (options?.offsetLine ?? 0) + this.state.afterPosition.endLine,
40
+ endCol: (options?.offsetColumn ?? 0) + this.state.afterPosition.endCol,
41
+ depth: this.state.afterPosition.depth,
42
+ };
43
+ return rawCode;
44
+ }
45
+ afterParse(nodeList, options) {
46
+ nodeList = super.afterParse(nodeList, options);
47
+ if (this.state.startsHeadTagOrBodyTag) {
48
+ return optimizeStartsHeadTagOrBodyTagResume(this, nodeList, this.state.startsHeadTagOrBodyTag);
49
+ }
50
+ return nodeList;
51
+ }
52
+ nodeize(
53
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
54
+ originNode, parentNode, depth) {
55
+ const namespace = 'namespaceURI' in originNode ? originNode.namespaceURI : '';
56
+ const location = originNode.sourceCodeLocation;
57
+ if (!location) {
58
+ // Ghost element
59
+ const afterNode = this.state.afterPosition.depth === depth ? this.state.afterPosition : parentNode;
60
+ const startOffset = afterNode?.endOffset ?? 0;
61
+ const startLine = afterNode?.endLine ?? 0;
62
+ const startCol = afterNode?.endCol ?? 0;
63
+ const childNodes = 'childNodes' in originNode ? originNode.childNodes : [];
64
+ return this.visitElement({
65
+ raw: '',
66
+ startOffset,
67
+ startLine,
68
+ startCol,
69
+ depth,
70
+ parentNode,
71
+ nodeName: originNode.nodeName,
72
+ namespace,
73
+ }, childNodes);
74
+ }
75
+ const { startOffset, endOffset } = location;
76
+ const token = this.sliceFragment(startOffset, endOffset ?? startOffset);
77
+ switch (originNode.nodeName) {
78
+ case '#documentType': {
79
+ if (!('name' in originNode)) {
80
+ throw new TypeError("DocumentType doesn't have name");
81
+ }
82
+ return this.visitDoctype({
83
+ ...token,
84
+ depth,
85
+ name: originNode.name ?? '',
86
+ publicId: originNode.publicId ?? '',
87
+ systemId: originNode.systemId ?? '',
88
+ parentNode,
89
+ });
90
+ }
91
+ case '#text': {
92
+ return this.visitText({
93
+ ...token,
94
+ depth,
95
+ parentNode,
96
+ });
97
+ }
98
+ case '#comment': {
99
+ return this.visitComment({
100
+ ...token,
101
+ depth,
102
+ parentNode,
103
+ });
104
+ }
105
+ default: {
106
+ const tagLoc = 'startTag' in location ? location.startTag : null;
107
+ const offset = tagLoc?.startOffset ?? startOffset;
108
+ const endOffset = tagLoc?.endOffset ?? offset;
109
+ const startTagToken = this.sliceFragment(offset, endOffset);
110
+ const childNodes = 'childNodes' in originNode
111
+ ? originNode.nodeName === 'template' && 'content' in originNode
112
+ ? originNode.content.childNodes
113
+ : originNode.childNodes
114
+ : [];
115
+ return this.visitElement({
116
+ ...startTagToken,
117
+ depth,
118
+ parentNode,
119
+ nodeName: originNode.nodeName,
120
+ namespace,
121
+ }, childNodes, {
122
+ createEndTagToken: () => {
123
+ const endTagLoc = 'endTag' in location ? location.endTag : null;
124
+ if (!endTagLoc) {
125
+ return null;
126
+ }
127
+ const { startOffset, endOffset } = endTagLoc;
128
+ const endTagToken = this.sliceFragment(startOffset, endOffset);
129
+ return {
130
+ ...endTagToken,
131
+ depth,
132
+ parentNode,
133
+ };
134
+ },
135
+ });
136
+ }
137
+ }
138
+ }
139
+ afterNodeize(siblings, parentNode, depth) {
140
+ const after = super.afterNodeize(siblings, parentNode, depth);
141
+ const prevNode = after.siblings.at(-1) ?? after.ancestors.findLast(n => n.depth === depth);
142
+ if (prevNode) {
143
+ this.state.afterPosition = {
144
+ endOffset: prevNode.endOffset,
145
+ endLine: prevNode.endLine,
146
+ endCol: prevNode.endCol,
147
+ depth,
148
+ };
149
+ }
150
+ return after;
151
+ }
152
+ visitText(token) {
153
+ return super.visitText(token, {
154
+ researchTags: true,
155
+ invalidTagAsText: true,
156
+ });
157
+ }
158
+ visitSpreadAttr() {
159
+ return null;
160
+ }
161
+ }
162
+ export const parser = new HtmlParser();
package/lib/types.d.ts ADDED
@@ -0,0 +1,9 @@
1
+ export type { ParserOptions, DefaultTreeAdapterMap } from 'parse5';
2
+ import type { DefaultTreeAdapterMap } from 'parse5';
3
+ export type Node = DefaultTreeAdapterMap['node'];
4
+ export type TextNode = DefaultTreeAdapterMap['textNode'];
5
+ export type Element = DefaultTreeAdapterMap['element'];
6
+ export type CommentNode = DefaultTreeAdapterMap['commentNode'];
7
+ export type Document = DefaultTreeAdapterMap['document'];
8
+ export type DocumentFragment = DefaultTreeAdapterMap['documentFragment'];
9
+ export type ChildNode = DefaultTreeAdapterMap['childNode'];
package/lib/types.js ADDED
@@ -0,0 +1 @@
1
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@markuplint/html-parser",
3
- "version": "4.0.0-dev.28+0131de5e",
3
+ "version": "4.0.0-rc.0",
4
4
  "description": "HTML parser for markuplint",
5
5
  "repository": "git@github.com:markuplint/markuplint.git",
6
6
  "author": "Yusuke Hirao <yusukehirao@me.com>",
@@ -25,10 +25,10 @@
25
25
  "clean": "tsc --build --clean"
26
26
  },
27
27
  "dependencies": {
28
- "@markuplint/ml-ast": "4.0.0-dev.28+0131de5e",
29
- "@markuplint/parser-utils": "4.0.0-dev.28+0131de5e",
28
+ "@markuplint/ml-ast": "4.0.0-rc.0",
29
+ "@markuplint/parser-utils": "4.0.0-rc.0",
30
30
  "parse5": "7.1.2",
31
- "type-fest": "^4.5.0"
31
+ "type-fest": "^4.10.2"
32
32
  },
33
- "gitHead": "0131de5ea9dd6d3fd5472d7b414b66644c758881"
33
+ "gitHead": "3fdeb45cb69ed52b3a215a7520cea1181601443f"
34
34
  }
@@ -1,2 +0,0 @@
1
- import type { MLASTHTMLAttr } from '@markuplint/ml-ast';
2
- export default function attrTokenizer(raw: string, line: number, col: number, startOffset: number): MLASTHTMLAttr;
@@ -1,80 +0,0 @@
1
- import { tokenizer, uuid } from '@markuplint/parser-utils';
2
- const reAttrsInStartTag =
3
- // eslint-disable-next-line no-control-regex
4
- /(\s*)([^\u0000-\u001F "'/=>\u007F-\u009F]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|(\S*)))?/;
5
- export default function attrTokenizer(raw, line, col, startOffset) {
6
- const attrMatchedMap = raw.match(reAttrsInStartTag);
7
- if (!attrMatchedMap) {
8
- throw new SyntaxError('Illegal attribute token');
9
- }
10
- const spacesBeforeAttrString = attrMatchedMap[1] ?? '';
11
- const nameChars = attrMatchedMap[2] ?? '';
12
- const spacesBeforeEqualChars = attrMatchedMap[3] ?? '';
13
- const equalChars = attrMatchedMap[4] ?? null;
14
- const spacesAfterEqualChars = attrMatchedMap[5] ?? '';
15
- const quoteChars = attrMatchedMap[6] == null ? (attrMatchedMap[7] == null ? null : "'") : '"';
16
- const valueChars = attrMatchedMap[6] ?? attrMatchedMap[7] ?? attrMatchedMap[8] ?? (quoteChars ? '' : null);
17
- let offset = startOffset;
18
- const spacesBeforeName = tokenizer(spacesBeforeAttrString, line, col, offset);
19
- line = spacesBeforeName.endLine;
20
- col = spacesBeforeName.endCol;
21
- offset = spacesBeforeName.endOffset;
22
- const name = tokenizer(nameChars, line, col, offset);
23
- line = name.endLine;
24
- col = name.endCol;
25
- offset = name.endOffset;
26
- const spacesBeforeEqual = tokenizer(spacesBeforeEqualChars, line, col, offset);
27
- line = spacesBeforeEqual.endLine;
28
- col = spacesBeforeEqual.endCol;
29
- offset = spacesBeforeEqual.endOffset;
30
- const equal = tokenizer(equalChars, line, col, offset);
31
- line = equal.endLine;
32
- col = equal.endCol;
33
- offset = equal.endOffset;
34
- const spacesAfterEqual = tokenizer(spacesAfterEqualChars, line, col, offset);
35
- line = spacesAfterEqual.endLine;
36
- col = spacesAfterEqual.endCol;
37
- offset = spacesAfterEqual.endOffset;
38
- const startQuote = tokenizer(quoteChars, line, col, offset);
39
- line = startQuote.endLine;
40
- col = startQuote.endCol;
41
- offset = startQuote.endOffset;
42
- const value = tokenizer(valueChars, line, col, offset);
43
- line = value.endLine;
44
- col = value.endCol;
45
- offset = value.endOffset;
46
- const endQuote = tokenizer(quoteChars, line, col, offset);
47
- const attrToken = tokenizer(nameChars +
48
- spacesBeforeEqualChars +
49
- (equalChars ?? '') +
50
- spacesAfterEqualChars +
51
- (quoteChars ?? '') +
52
- (valueChars ?? '') +
53
- (quoteChars ?? ''), name.startLine, name.startCol, name.startOffset);
54
- return {
55
- type: 'html-attr',
56
- uuid: uuid(),
57
- raw: attrToken.raw,
58
- startOffset: attrToken.startOffset,
59
- endOffset: attrToken.endOffset,
60
- startLine: attrToken.startLine,
61
- endLine: attrToken.endLine,
62
- startCol: attrToken.startCol,
63
- endCol: attrToken.endCol,
64
- spacesBeforeName,
65
- name,
66
- spacesBeforeEqual,
67
- equal,
68
- spacesAfterEqual,
69
- startQuote,
70
- value,
71
- endQuote,
72
- isDuplicatable: false,
73
- nodeName: name.raw,
74
- parentNode: null,
75
- prevNode: null,
76
- nextNode: null,
77
- isFragment: false,
78
- isGhost: false,
79
- };
80
- }
@@ -1,2 +0,0 @@
1
- import type { MLASTNode } from '@markuplint/ml-ast';
2
- export declare function createTree(rawCode: string, isFragment: boolean, offsetOffset: number, offsetLine: number, offsetColumn: number): MLASTNode[];
@@ -1,255 +0,0 @@
1
- // @ts-nocheck TODO: Parse5(https://github.com/inikulin/parse5) supports to expose type definitions as submodules.
2
- import { detectElementType, getEndCol, getEndLine, sliceFragment, uuid } from '@markuplint/parser-utils';
3
- import { parse, parseFragment } from 'parse5';
4
- import parseRawTag from './parse-raw-tag.js';
5
- const P5_OPTIONS = {
6
- scriptingEnabled: false,
7
- sourceCodeLocationInfo: true,
8
- };
9
- export function createTree(rawCode, isFragment, offsetOffset, offsetLine, offsetColumn) {
10
- const doc = isFragment ? parseFragment(rawCode, P5_OPTIONS) : parse(rawCode, P5_OPTIONS);
11
- return createTreeRecursive(doc, null, rawCode, offsetOffset, offsetLine, offsetColumn);
12
- }
13
- function createTreeRecursive(rootNode,
14
- // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
15
- parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
16
- const nodeList = [];
17
- const childNodes = getChildNodes(rootNode);
18
- let prevNode = null;
19
- for (const p5node of childNodes) {
20
- const node = nodeize(p5node, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn);
21
- if (!node) {
22
- continue;
23
- }
24
- if (prevNode) {
25
- if (node.type !== 'endtag') {
26
- prevNode.nextNode = node;
27
- }
28
- node.prevNode = prevNode;
29
- }
30
- prevNode = node;
31
- nodeList.push(node);
32
- }
33
- return nodeList;
34
- }
35
- function nodeize(originNode,
36
- // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
37
- prevNode,
38
- // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
39
- parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
40
- const nextNode = null;
41
- const location = getLocation(originNode);
42
- if (!location) {
43
- const prevToken = prevNode ?? parentNode;
44
- const startOffset = prevToken ? prevToken.endOffset : 0;
45
- const endOffset = prevToken ? prevToken.endOffset : 0;
46
- const startLine = prevToken ? prevToken.endLine : 0;
47
- const endLine = prevToken ? prevToken.endLine : 0;
48
- const startCol = prevToken ? prevToken.endCol : 0;
49
- const endCol = prevToken ? prevToken.endCol : 0;
50
- const node = {
51
- uuid: uuid(),
52
- raw: '',
53
- startOffset: startOffset + offsetOffset,
54
- endOffset: endOffset + offsetOffset,
55
- startLine: startLine + offsetLine,
56
- endLine: endLine + offsetLine,
57
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
58
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
59
- nodeName: originNode.nodeName,
60
- type: 'starttag',
61
- namespace: getNamespace(originNode),
62
- elementType: 'html',
63
- attributes: [],
64
- hasSpreadAttr: false,
65
- pearNode: null,
66
- tagCloseChar: '',
67
- tagOpenChar: '',
68
- parentNode,
69
- prevNode,
70
- nextNode,
71
- isFragment: false,
72
- isGhost: true,
73
- };
74
- node.childNodes = createTreeRecursive(originNode, node, rawHtml, offsetOffset, offsetLine, offsetColumn);
75
- return node;
76
- }
77
- const { startOffset, endOffset, startLine, endLine, startCol, endCol } = location;
78
- const raw = rawHtml.slice(startOffset, endOffset ?? startOffset);
79
- switch (originNode.nodeName) {
80
- case '#documentType': {
81
- return {
82
- uuid: uuid(),
83
- raw,
84
- // @ts-ignore
85
- name: originNode.name ?? '',
86
- // @ts-ignore
87
- publicId: originNode.publicId ?? '',
88
- // @ts-ignore
89
- systemId: originNode.systemId ?? '',
90
- startOffset: startOffset + offsetOffset,
91
- endOffset: endOffset + offsetOffset,
92
- startLine: startLine + offsetLine,
93
- endLine: endLine + offsetLine,
94
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
95
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
96
- nodeName: '#doctype',
97
- type: 'doctype',
98
- parentNode,
99
- prevNode,
100
- _addPrevNode: 102,
101
- nextNode,
102
- isFragment: false,
103
- isGhost: false,
104
- };
105
- }
106
- case '#text': {
107
- const node = {
108
- uuid: uuid(),
109
- raw,
110
- startOffset: startOffset + offsetOffset,
111
- endOffset: endOffset + offsetOffset,
112
- startLine: startLine + offsetLine,
113
- endLine: endLine + offsetLine,
114
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
115
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
116
- nodeName: '#text',
117
- type: 'text',
118
- parentNode,
119
- prevNode,
120
- nextNode,
121
- isFragment: false,
122
- isGhost: false,
123
- };
124
- return node;
125
- }
126
- case '#comment': {
127
- return {
128
- uuid: uuid(),
129
- raw,
130
- startOffset: startOffset + offsetOffset,
131
- endOffset: endOffset + offsetOffset,
132
- startLine: startLine + offsetLine,
133
- endLine: endLine + offsetLine,
134
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
135
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
136
- nodeName: '#comment',
137
- type: 'comment',
138
- parentNode,
139
- prevNode,
140
- nextNode,
141
- isFragment: false,
142
- isGhost: false,
143
- };
144
- }
145
- default: {
146
- const tagLoc = 'startTag' in location ? location.startTag : null;
147
- const startTagRaw = tagLoc
148
- ? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
149
- : rawHtml.slice(startOffset, endOffset ?? startOffset);
150
- const tagTokens = parseRawTag(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
151
- const tagName = tagTokens.tagName;
152
- let endTag = null;
153
- let endTagLoc = 'endTag' in location ? location.endTag : null;
154
- /**
155
- * Patch: Create endTag for SVG Element
156
- * @see https://github.com/inikulin/parse5/issues/352
157
- */
158
- if (!endTagLoc &&
159
- 'namespaceURI' in originNode &&
160
- originNode.namespaceURI === 'http://www.w3.org/2000/svg') {
161
- const belowRawHTMLFromStartTagEnd = rawHtml.slice(location.endOffset);
162
- const endTagMatched = belowRawHTMLFromStartTagEnd.match(new RegExp(`^</\\s*${tagName}[^>]*>`, 'm'));
163
- const endTag = endTagMatched && endTagMatched[0];
164
- if (endTag) {
165
- endTagLoc = sliceFragment(rawHtml, location.endOffset, location.endOffset + endTag.length);
166
- }
167
- }
168
- if (endTagLoc) {
169
- const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
170
- const endTagRaw = rawHtml.slice(startOffset, endOffset);
171
- const endTagTokens = parseRawTag(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
172
- const endTagName = endTagTokens.tagName;
173
- endTag = {
174
- uuid: uuid(),
175
- raw: endTagRaw,
176
- startOffset: startOffset + offsetOffset,
177
- endOffset: endOffset + offsetOffset,
178
- startLine: startLine + offsetLine,
179
- endLine: endLine + offsetLine,
180
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
181
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
182
- nodeName: endTagName,
183
- type: 'endtag',
184
- namespace: getNamespace(originNode),
185
- attributes: endTagTokens.attrs,
186
- parentNode,
187
- prevNode,
188
- nextNode,
189
- pearNode: null,
190
- isFragment: false,
191
- isGhost: false,
192
- tagOpenChar: '</',
193
- tagCloseChar: '>',
194
- };
195
- }
196
- const _endOffset = startOffset + startTagRaw.length;
197
- const _endLine = getEndLine(startTagRaw, startLine);
198
- const _endCol = getEndCol(startTagRaw, startCol);
199
- const startTag = {
200
- uuid: uuid(),
201
- raw: startTagRaw,
202
- startOffset: startOffset + offsetOffset,
203
- endOffset: _endOffset + offsetOffset,
204
- startLine: startLine + offsetLine,
205
- endLine: _endLine + offsetLine,
206
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
207
- endCol: _endCol + (startLine === _endLine ? offsetColumn : 0),
208
- nodeName: tagName,
209
- type: 'starttag',
210
- namespace: getNamespace(originNode),
211
- elementType: detectElementType(tagName),
212
- attributes: tagTokens.attrs,
213
- hasSpreadAttr: false,
214
- parentNode,
215
- prevNode,
216
- nextNode,
217
- pearNode: endTag,
218
- selfClosingSolidus: tagTokens.selfClosingSolidus,
219
- endSpace: tagTokens.endSpace,
220
- isFragment: false,
221
- isGhost: false,
222
- tagOpenChar: '<',
223
- tagCloseChar: '>',
224
- };
225
- if (endTag) {
226
- endTag.pearNode = startTag;
227
- }
228
- startTag.childNodes = createTreeRecursive(originNode, startTag, rawHtml, offsetOffset, offsetLine, offsetColumn);
229
- return startTag;
230
- }
231
- }
232
- }
233
- /**
234
- * getChildNodes
235
- *
236
- * - If node has "content" property then parse as document fragment.
237
- */
238
- function getChildNodes(rootNode) {
239
- return rootNode.content ? rootNode.content.childNodes : rootNode.childNodes ?? [];
240
- }
241
- function hasLocation(node) {
242
- return 'sourceCodeLocation' in node;
243
- }
244
- function getLocation(node) {
245
- if (hasLocation(node) && node.sourceCodeLocation) {
246
- return node.sourceCodeLocation;
247
- }
248
- return null;
249
- }
250
- function getNamespace(node) {
251
- if ('namespaceURI' in node) {
252
- return node.namespaceURI;
253
- }
254
- return '';
255
- }
@@ -1,9 +0,0 @@
1
- import type { MLASTAttr, MLToken } from '@markuplint/ml-ast';
2
- type TagTokens = {
3
- tagName: string;
4
- attrs: MLASTAttr[];
5
- selfClosingSolidus: MLToken;
6
- endSpace: MLToken;
7
- };
8
- export default function parseRawTag(raw: string, startLine: number, startCol: number, startOffset: number, offsetOffset?: number, offsetLine?: number, offsetColumn?: number): TagTokens;
9
- export {};
@@ -1,51 +0,0 @@
1
- import { reTag, reTagName, isPotentialCustomElementName, tokenizer } from '@markuplint/parser-utils';
2
- import attrTokenizer from './attr-tokenizer.js';
3
- // eslint-disable-next-line no-control-regex
4
- const reAttrsInStartTag = /\s*[^\u0000-\u001F "'/=>\u007F-\u009F]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|\S*))?/;
5
- const reEndTokens = /(\s*\/)?(\s*)>$/;
6
- export default function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0) {
7
- let offset = startOffset + offsetOffset;
8
- let line = startLine + offsetLine;
9
- let col = startCol + (startLine === 1 ? offsetColumn : 0);
10
- const matches = raw.match(reTag);
11
- const tagWithAttrs = matches?.[1];
12
- if (!tagWithAttrs) {
13
- throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
14
- }
15
- // eslint-disable-next-line no-control-regex
16
- const tagNameSplitted = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C />]/);
17
- const tagName = tagNameSplitted[0] || tagNameSplitted[1];
18
- if (!tagName || (!reTagName.test(tagName) && !isPotentialCustomElementName(tagName))) {
19
- throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`);
20
- }
21
- const tagStartPos = tagWithAttrs.indexOf(tagName);
22
- let rawAttrs = tagWithAttrs.slice(Math.max(0, tagStartPos + tagName.length));
23
- // console.log({ raw, tagStartPos, tagName, rawAttrs });
24
- col += tagName.length + 1 + tagStartPos;
25
- offset += tagName.length + 1 + tagStartPos;
26
- const attrs = [];
27
- while (reAttrsInStartTag.test(rawAttrs)) {
28
- const attrMatchedMap = rawAttrs.match(reAttrsInStartTag);
29
- if (attrMatchedMap && attrMatchedMap[0]) {
30
- const rawAttr = attrMatchedMap[0];
31
- const attr = attrTokenizer(rawAttr, line, col, offset);
32
- line = attr.endLine;
33
- col = attr.endCol;
34
- offset = attr.endOffset;
35
- rawAttrs = rawAttrs.slice(rawAttr.length);
36
- attrs.push(attr);
37
- }
38
- }
39
- const endTokens = reEndTokens.exec(raw);
40
- const selfClosingSolidus = tokenizer(endTokens?.[1] ?? '', line, col, offset);
41
- line = selfClosingSolidus.endLine;
42
- col = selfClosingSolidus.endCol;
43
- offset = selfClosingSolidus.endOffset;
44
- const endSpace = tokenizer(endTokens?.[2] ?? '', line, col, offset);
45
- return {
46
- tagName,
47
- attrs,
48
- selfClosingSolidus,
49
- endSpace,
50
- };
51
- }
package/lib/parse.d.ts DELETED
@@ -1,2 +0,0 @@
1
- import type { Parse } from '@markuplint/ml-ast';
2
- export declare const parse: Parse;
package/lib/parse.js DELETED
@@ -1,23 +0,0 @@
1
- import { ignoreFrontMatter, flattenNodes } from '@markuplint/parser-utils';
2
- import { createTree } from './create-tree.js';
3
- import isDocumentFragment from './is-document-fragment.js';
4
- import { isStartsHeadTagOrBodyTag, optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
5
- export const parse = (rawCode, options) => {
6
- if (options?.ignoreFrontMatter) {
7
- rawCode = ignoreFrontMatter(rawCode);
8
- }
9
- const isFragment = isDocumentFragment(rawCode);
10
- const data = isStartsHeadTagOrBodyTag(rawCode) ? optimizeStartsHeadTagOrBodyTagSetup(rawCode) : null;
11
- if (data?.code) {
12
- rawCode = data.code;
13
- }
14
- const nodeTree = createTree(rawCode, isFragment, options?.offsetOffset ?? 0, options?.offsetLine ?? 0, options?.offsetColumn ?? 0);
15
- const nodeList = flattenNodes(nodeTree, rawCode);
16
- if (data) {
17
- optimizeStartsHeadTagOrBodyTagResume(nodeList, data);
18
- }
19
- return {
20
- nodeList,
21
- isFragment,
22
- };
23
- };