@markuplint/html-parser 4.6.23 → 5.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +12 -22
- package/CHANGELOG.md +23 -0
- package/lib/index.d.ts +0 -1
- package/lib/index.js +0 -1
- package/lib/parser.js +14 -13
- package/package.json +8 -5
package/ARCHITECTURE.md
CHANGED
|
@@ -8,10 +8,9 @@
|
|
|
8
8
|
|
|
9
9
|
```
|
|
10
10
|
src/
|
|
11
|
-
├── index.ts — Re-exports HtmlParser
|
|
11
|
+
├── index.ts — Re-exports HtmlParser and parser
|
|
12
12
|
├── parser.ts — HtmlParser class extending Parser<Node, State>
|
|
13
13
|
├── types.ts — Re-exports parse5 types (Node, Element, etc.)
|
|
14
|
-
├── get-namespace.ts — Namespace URI resolution (HTML/SVG/MathML)
|
|
15
14
|
├── is-document-fragment.ts — Regex-based fragment vs document detection
|
|
16
15
|
└── optimize-starts-head-or-body.ts — Head/body tag placeholder optimization
|
|
17
16
|
```
|
|
@@ -28,7 +27,6 @@ flowchart TD
|
|
|
28
27
|
|
|
29
28
|
subgraph pkg ["@markuplint/html-parser"]
|
|
30
29
|
htmlParser["HtmlParser\nextends Parser‹Node, State›"]
|
|
31
|
-
getNs["getNamespace()\nNamespace resolution"]
|
|
32
30
|
isFragment["isDocumentFragment()\nFragment detection"]
|
|
33
31
|
optimize["optimizeStartsHeadTagOrBodyTag\nHead/body optimization"]
|
|
34
32
|
types["types.ts\nparse5 type re-exports"]
|
|
@@ -44,11 +42,8 @@ flowchart TD
|
|
|
44
42
|
mlAst -->|"AST types"| htmlParser
|
|
45
43
|
parserUtils -->|"Parser base class"| htmlParser
|
|
46
44
|
parse5 -->|"parse / parseFragment"| htmlParser
|
|
47
|
-
parse5 -->|"parseFragment"| getNs
|
|
48
|
-
|
|
49
45
|
htmlParser --> isFragment
|
|
50
46
|
htmlParser --> optimize
|
|
51
|
-
htmlParser --> getNs
|
|
52
47
|
|
|
53
48
|
htmlParser -->|"extends / imports"| downstream
|
|
54
49
|
```
|
|
@@ -73,15 +68,15 @@ The parser maintains internal state through the `State` type:
|
|
|
73
68
|
|
|
74
69
|
### Override Methods
|
|
75
70
|
|
|
76
|
-
| Method | Purpose
|
|
77
|
-
| ------------------- |
|
|
78
|
-
| `tokenize()` | Invokes parse5 `parse()` or `parseFragment()` based on fragment detection
|
|
79
|
-
| `beforeParse()` | Sets up head/body optimization and offset tracking
|
|
80
|
-
| `afterParse()` | Restores original head/body tag names from placeholders
|
|
81
|
-
| `nodeize()` | Converts parse5 nodes to markuplint AST nodes, handling ghost elements
|
|
82
|
-
| `afterNodeize()` | Updates `afterPosition` state for ghost element positioning
|
|
83
|
-
| `visitText()` | Delegates to parent with `researchTags: true` and `invalidTagAsText: true`
|
|
84
|
-
| `visitSpreadAttr()` | Returns `null` (HTML does not support spread attributes)
|
|
71
|
+
| Method | Purpose |
|
|
72
|
+
| ------------------- | ------------------------------------------------------------------------------------------- |
|
|
73
|
+
| `tokenize()` | Invokes parse5 `parse()` or `parseFragment()` based on fragment detection |
|
|
74
|
+
| `beforeParse()` | Sets up head/body optimization and offset tracking |
|
|
75
|
+
| `afterParse()` | Restores original head/body tag names from placeholders |
|
|
76
|
+
| `nodeize()` | Converts parse5 nodes to markuplint AST nodes, handling ghost elements and template content |
|
|
77
|
+
| `afterNodeize()` | Updates `afterPosition` state for ghost element positioning |
|
|
78
|
+
| `visitText()` | Delegates to parent with `researchTags: true` and `invalidTagAsText: true` |
|
|
79
|
+
| `visitSpreadAttr()` | Returns `null` (HTML does not support spread attributes) |
|
|
85
80
|
|
|
86
81
|
## Parse Pipeline
|
|
87
82
|
|
|
@@ -91,7 +86,7 @@ The HTML-specific pipeline extends the base `Parser` pipeline:
|
|
|
91
86
|
flowchart LR
|
|
92
87
|
A["beforeParse\n- super.beforeParse()\n- head/body optimization setup\n- offset tracking"]
|
|
93
88
|
B["tokenize\n- isDocumentFragment() check\n- parse5 parse/parseFragment"]
|
|
94
|
-
C["nodeize\n- Ghost element handling\n- Doctype/text/comment/element dispatch\n- Template content extraction
|
|
89
|
+
C["nodeize\n- Ghost element handling\n- Doctype/text/comment/element dispatch\n- Template content extraction"]
|
|
95
90
|
D["afterNodeize\n- Update afterPosition state"]
|
|
96
91
|
E["afterParse\n- Restore head/body names"]
|
|
97
92
|
|
|
@@ -132,12 +127,7 @@ The optimization uses a placeholder replacement strategy:
|
|
|
132
127
|
|
|
133
128
|
## Namespace Resolution
|
|
134
129
|
|
|
135
|
-
`getNamespace()`
|
|
136
|
-
|
|
137
|
-
- **Default**: `http://www.w3.org/1999/xhtml` (HTML namespace)
|
|
138
|
-
- **SVG context**: When the parent namespace is `http://www.w3.org/2000/svg`, wraps the tag in `<svg>` and parses to determine the resolved namespace
|
|
139
|
-
- **MathML context**: When the parent namespace is `http://www.w3.org/1998/Math/MathML`, wraps in `<math>` and parses
|
|
140
|
-
- **Fallback**: For tags that produce no nodes as fragments, falls back to `parse()` (full document mode)
|
|
130
|
+
Namespace resolution is handled by `getNamespace()` in `@markuplint/parser-utils`. The HTML parser delegates namespace detection to the base `Parser` class, which automatically determines namespaces from tag names and parent node context.
|
|
141
131
|
|
|
142
132
|
## Fragment vs Document Detection
|
|
143
133
|
|
package/CHANGELOG.md
CHANGED
|
@@ -3,6 +3,29 @@
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
|
|
5
5
|
|
|
6
|
+
# [5.0.0-alpha.1](https://github.com/markuplint/markuplint/compare/v5.0.0-alpha.0...v5.0.0-alpha.1) (2026-02-22)
|
|
7
|
+
|
|
8
|
+
**Note:** Version bump only for package @markuplint/html-parser
|
|
9
|
+
|
|
10
|
+
# [5.0.0-alpha.0](https://github.com/markuplint/markuplint/compare/v4.14.1...v5.0.0-alpha.0) (2026-02-20)
|
|
11
|
+
|
|
12
|
+
### Bug Fixes
|
|
13
|
+
|
|
14
|
+
- **ml-core:** improve detection of namespace ([5b507ad](https://github.com/markuplint/markuplint/commit/5b507ad7c19c5015b8ce587845d901e31dfa6518))
|
|
15
|
+
|
|
16
|
+
- refactor(html-parser)!: update for simplified AST token properties ([524ce5d](https://github.com/markuplint/markuplint/commit/524ce5d6fc23c8bff73583ed4ac42fdff1759938))
|
|
17
|
+
|
|
18
|
+
### BREAKING CHANGES
|
|
19
|
+
|
|
20
|
+
- Adapt to renamed MLASTToken properties.
|
|
21
|
+
|
|
22
|
+
* Use getEndPosition() for ghost element position calculation
|
|
23
|
+
* Update test assertions: startCol -> col, startOffset -> offset,
|
|
24
|
+
startLine -> line
|
|
25
|
+
* Remove endOffset/endLine/endCol assertions from tests
|
|
26
|
+
|
|
27
|
+
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
|
|
28
|
+
|
|
6
29
|
## [4.6.23](https://github.com/markuplint/markuplint/compare/@markuplint/html-parser@4.6.22...@markuplint/html-parser@4.6.23) (2026-02-10)
|
|
7
30
|
|
|
8
31
|
**Note:** Version bump only for package @markuplint/html-parser
|
package/lib/index.d.ts
CHANGED
package/lib/index.js
CHANGED
package/lib/parser.js
CHANGED
|
@@ -2,6 +2,7 @@ import { Parser } from '@markuplint/parser-utils';
|
|
|
2
2
|
import { parse, parseFragment } from 'parse5';
|
|
3
3
|
import { isDocumentFragment } from './is-document-fragment.js';
|
|
4
4
|
import { optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
|
|
5
|
+
import { getEndPosition } from '@markuplint/parser-utils/location';
|
|
5
6
|
/**
|
|
6
7
|
* Parser implementation for standard HTML, built on top of parse5.
|
|
7
8
|
* Handles document and fragment parsing, ghost elements (omitted tags),
|
|
@@ -57,24 +58,26 @@ export class HtmlParser extends Parser {
|
|
|
57
58
|
nodeize(
|
|
58
59
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
59
60
|
originNode, parentNode, depth) {
|
|
60
|
-
const namespace = 'namespaceURI' in originNode ? originNode.namespaceURI : '';
|
|
61
61
|
const location = originNode.sourceCodeLocation;
|
|
62
62
|
if (!location) {
|
|
63
63
|
// Ghost element
|
|
64
|
-
const afterNode = this.state.afterPosition.depth === depth
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
64
|
+
const afterNode = this.state.afterPosition.depth === depth
|
|
65
|
+
? this.state.afterPosition
|
|
66
|
+
: parentNode
|
|
67
|
+
? getEndPosition(parentNode.raw, parentNode.offset, parentNode.line, parentNode.col)
|
|
68
|
+
: null;
|
|
69
|
+
const offset = afterNode?.endOffset ?? 0;
|
|
70
|
+
const line = afterNode?.endLine ?? 0;
|
|
71
|
+
const col = afterNode?.endCol ?? 0;
|
|
68
72
|
const childNodes = 'childNodes' in originNode ? originNode.childNodes : [];
|
|
69
73
|
return this.visitElement({
|
|
70
74
|
raw: '',
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
75
|
+
offset,
|
|
76
|
+
line,
|
|
77
|
+
col,
|
|
74
78
|
depth,
|
|
75
79
|
parentNode,
|
|
76
80
|
nodeName: originNode.nodeName,
|
|
77
|
-
namespace,
|
|
78
81
|
}, childNodes);
|
|
79
82
|
}
|
|
80
83
|
const { startOffset, endOffset } = location;
|
|
@@ -122,7 +125,6 @@ export class HtmlParser extends Parser {
|
|
|
122
125
|
depth,
|
|
123
126
|
parentNode,
|
|
124
127
|
nodeName: originNode.nodeName,
|
|
125
|
-
namespace,
|
|
126
128
|
}, childNodes, {
|
|
127
129
|
createEndTagToken: () => {
|
|
128
130
|
const endTagLoc = 'endTag' in location ? location.endTag : null;
|
|
@@ -145,10 +147,9 @@ export class HtmlParser extends Parser {
|
|
|
145
147
|
const after = super.afterNodeize(siblings, parentNode, depth);
|
|
146
148
|
const prevNode = after.siblings.at(-1) ?? after.ancestors.findLast(n => n.depth === depth);
|
|
147
149
|
if (prevNode) {
|
|
150
|
+
const endPos = getEndPosition(prevNode.raw, prevNode.offset, prevNode.line, prevNode.col);
|
|
148
151
|
this.state.afterPosition = {
|
|
149
|
-
|
|
150
|
-
endLine: prevNode.endLine,
|
|
151
|
-
endCol: prevNode.endCol,
|
|
152
|
+
...endPos,
|
|
152
153
|
depth,
|
|
153
154
|
};
|
|
154
155
|
}
|
package/package.json
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/html-parser",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "5.0.0-alpha.1",
|
|
4
4
|
"description": "HTML parser for markuplint",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
7
7
|
"license": "MIT",
|
|
8
|
+
"engines": {
|
|
9
|
+
"node": ">=22"
|
|
10
|
+
},
|
|
8
11
|
"type": "module",
|
|
9
12
|
"exports": {
|
|
10
13
|
".": {
|
|
@@ -24,10 +27,10 @@
|
|
|
24
27
|
"clean": "tsc --build --clean tsconfig.build.json"
|
|
25
28
|
},
|
|
26
29
|
"dependencies": {
|
|
27
|
-
"@markuplint/ml-ast": "
|
|
28
|
-
"@markuplint/parser-utils": "
|
|
30
|
+
"@markuplint/ml-ast": "5.0.0-alpha.1",
|
|
31
|
+
"@markuplint/parser-utils": "5.0.0-alpha.1",
|
|
29
32
|
"parse5": "8.0.0",
|
|
30
|
-
"type-fest": "4.
|
|
33
|
+
"type-fest": "5.4.4"
|
|
31
34
|
},
|
|
32
|
-
"gitHead": "
|
|
35
|
+
"gitHead": "78a295e73a097a1ce09c777c06fa21ab68136387"
|
|
33
36
|
}
|