justhtml 0.12.0__py3-none-any.whl → 0.38.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of justhtml might be problematic. Click here for more details.
- justhtml/__init__.py +48 -0
- justhtml/__main__.py +86 -17
- justhtml/constants.py +12 -0
- justhtml/entities.py +45 -7
- justhtml/errors.py +17 -3
- justhtml/linkify.py +438 -0
- justhtml/node.py +385 -97
- justhtml/parser.py +139 -16
- justhtml/sanitize.py +992 -0
- justhtml/selector.py +117 -19
- justhtml/serialize.py +671 -41
- justhtml/tokenizer.py +364 -194
- justhtml/tokens.py +28 -5
- justhtml/transforms.py +2568 -0
- justhtml/treebuilder.py +297 -204
- justhtml/treebuilder_modes.py +208 -138
- justhtml-0.38.0.dist-info/METADATA +213 -0
- justhtml-0.38.0.dist-info/RECORD +26 -0
- {justhtml-0.12.0.dist-info → justhtml-0.38.0.dist-info}/licenses/LICENSE +4 -1
- justhtml-0.12.0.dist-info/METADATA +0 -164
- justhtml-0.12.0.dist-info/RECORD +0 -23
- {justhtml-0.12.0.dist-info → justhtml-0.38.0.dist-info}/WHEEL +0 -0
- {justhtml-0.12.0.dist-info → justhtml-0.38.0.dist-info}/entry_points.txt +0 -0
justhtml/tokens.py
CHANGED
|
@@ -4,7 +4,7 @@ from typing import Literal
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Tag:
|
|
7
|
-
__slots__ = ("attrs", "kind", "name", "self_closing")
|
|
7
|
+
__slots__ = ("attrs", "end_pos", "kind", "name", "self_closing", "start_pos")
|
|
8
8
|
|
|
9
9
|
START: Literal[0] = 0
|
|
10
10
|
END: Literal[1] = 1
|
|
@@ -12,7 +12,9 @@ class Tag:
|
|
|
12
12
|
kind: int
|
|
13
13
|
name: str
|
|
14
14
|
attrs: dict[str, str | None]
|
|
15
|
+
end_pos: int | None
|
|
15
16
|
self_closing: bool
|
|
17
|
+
start_pos: int | None
|
|
16
18
|
|
|
17
19
|
def __init__(
|
|
18
20
|
self,
|
|
@@ -20,11 +22,15 @@ class Tag:
|
|
|
20
22
|
name: str,
|
|
21
23
|
attrs: dict[str, str | None] | None,
|
|
22
24
|
self_closing: bool = False,
|
|
25
|
+
start_pos: int | None = None,
|
|
26
|
+
end_pos: int | None = None,
|
|
23
27
|
) -> None:
|
|
24
28
|
self.kind = kind
|
|
25
29
|
self.name = name
|
|
26
30
|
self.attrs = attrs if attrs is not None else {}
|
|
27
31
|
self.self_closing = bool(self_closing)
|
|
32
|
+
self.start_pos = start_pos
|
|
33
|
+
self.end_pos = end_pos
|
|
28
34
|
|
|
29
35
|
|
|
30
36
|
class CharacterTokens:
|
|
@@ -37,12 +43,14 @@ class CharacterTokens:
|
|
|
37
43
|
|
|
38
44
|
|
|
39
45
|
class CommentToken:
|
|
40
|
-
__slots__ = ("data",)
|
|
46
|
+
__slots__ = ("data", "start_pos")
|
|
41
47
|
|
|
42
48
|
data: str
|
|
49
|
+
start_pos: int | None
|
|
43
50
|
|
|
44
|
-
def __init__(self, data: str) -> None:
|
|
51
|
+
def __init__(self, data: str, start_pos: int | None = None) -> None:
|
|
45
52
|
self.data = data
|
|
53
|
+
self.start_pos = start_pos
|
|
46
54
|
|
|
47
55
|
|
|
48
56
|
class Doctype:
|
|
@@ -79,6 +87,9 @@ class EOFToken:
|
|
|
79
87
|
__slots__ = ()
|
|
80
88
|
|
|
81
89
|
|
|
90
|
+
AnyToken = Tag | CharacterTokens | CommentToken | DoctypeToken | EOFToken
|
|
91
|
+
|
|
92
|
+
|
|
82
93
|
class TokenSinkResult:
|
|
83
94
|
__slots__ = ()
|
|
84
95
|
|
|
@@ -89,8 +100,9 @@ class TokenSinkResult:
|
|
|
89
100
|
class ParseError:
|
|
90
101
|
"""Represents a parse error with location information."""
|
|
91
102
|
|
|
92
|
-
__slots__ = ("_end_column", "_source_html", "code", "column", "line", "message")
|
|
103
|
+
__slots__ = ("_end_column", "_source_html", "category", "code", "column", "line", "message")
|
|
93
104
|
|
|
105
|
+
category: str
|
|
94
106
|
code: str
|
|
95
107
|
line: int | None
|
|
96
108
|
column: int | None
|
|
@@ -105,10 +117,12 @@ class ParseError:
|
|
|
105
117
|
code: str,
|
|
106
118
|
line: int | None = None,
|
|
107
119
|
column: int | None = None,
|
|
120
|
+
category: str = "parse",
|
|
108
121
|
message: str | None = None,
|
|
109
122
|
source_html: str | None = None,
|
|
110
123
|
end_column: int | None = None,
|
|
111
124
|
) -> None:
|
|
125
|
+
self.category = category
|
|
112
126
|
self.code = code
|
|
113
127
|
self.line = line
|
|
114
128
|
self.column = column
|
|
@@ -118,7 +132,11 @@ class ParseError:
|
|
|
118
132
|
|
|
119
133
|
def __repr__(self) -> str:
|
|
120
134
|
if self.line is not None and self.column is not None:
|
|
135
|
+
if self.category != "parse":
|
|
136
|
+
return f"ParseError({self.code!r}, line={self.line}, column={self.column}, category={self.category!r})"
|
|
121
137
|
return f"ParseError({self.code!r}, line={self.line}, column={self.column})"
|
|
138
|
+
if self.category != "parse":
|
|
139
|
+
return f"ParseError({self.code!r}, category={self.category!r})"
|
|
122
140
|
return f"ParseError({self.code!r})"
|
|
123
141
|
|
|
124
142
|
def __str__(self) -> str:
|
|
@@ -133,7 +151,12 @@ class ParseError:
|
|
|
133
151
|
def __eq__(self, other: object) -> bool:
|
|
134
152
|
if not isinstance(other, ParseError):
|
|
135
153
|
return NotImplemented
|
|
136
|
-
return
|
|
154
|
+
return (
|
|
155
|
+
self.category == other.category
|
|
156
|
+
and self.code == other.code
|
|
157
|
+
and self.line == other.line
|
|
158
|
+
and self.column == other.column
|
|
159
|
+
)
|
|
137
160
|
|
|
138
161
|
def as_exception(self, end_column: int | None = None) -> SyntaxError:
|
|
139
162
|
"""Convert to a SyntaxError-like exception with source highlighting.
|