justhtml 0.6.0__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- justhtml/__init__.py +28 -0
- justhtml/__main__.py +161 -13
- justhtml/constants.py +17 -1
- justhtml/context.py +7 -1
- justhtml/encoding.py +405 -0
- justhtml/entities.py +57 -17
- justhtml/errors.py +20 -4
- justhtml/linkify.py +438 -0
- justhtml/node.py +738 -41
- justhtml/parser.py +188 -21
- justhtml/py.typed +0 -0
- justhtml/sanitize.py +1141 -0
- justhtml/selector.py +240 -104
- justhtml/serialize.py +418 -57
- justhtml/stream.py +34 -10
- justhtml/tokenizer.py +433 -289
- justhtml/tokens.py +91 -23
- justhtml/transforms.py +690 -0
- justhtml/treebuilder.py +196 -111
- justhtml/treebuilder_modes.py +191 -117
- justhtml/treebuilder_utils.py +11 -4
- justhtml-0.33.0.dist-info/METADATA +196 -0
- justhtml-0.33.0.dist-info/RECORD +26 -0
- justhtml-0.33.0.dist-info/entry_points.txt +2 -0
- {justhtml-0.6.0.dist-info → justhtml-0.33.0.dist-info}/licenses/LICENSE +4 -1
- justhtml-0.6.0.dist-info/METADATA +0 -126
- justhtml-0.6.0.dist-info/RECORD +0 -20
- {justhtml-0.6.0.dist-info → justhtml-0.33.0.dist-info}/WHEEL +0 -0
justhtml/tokens.py
CHANGED
|
@@ -1,34 +1,70 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
3
4
|
|
|
4
|
-
START = 0
|
|
5
|
-
END = 1
|
|
6
5
|
|
|
7
|
-
|
|
6
|
+
class Tag:
|
|
7
|
+
__slots__ = ("attrs", "kind", "name", "self_closing", "start_pos")
|
|
8
|
+
|
|
9
|
+
START: Literal[0] = 0
|
|
10
|
+
END: Literal[1] = 1
|
|
11
|
+
|
|
12
|
+
kind: int
|
|
13
|
+
name: str
|
|
14
|
+
attrs: dict[str, str | None]
|
|
15
|
+
self_closing: bool
|
|
16
|
+
start_pos: int | None
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
kind: int,
|
|
21
|
+
name: str,
|
|
22
|
+
attrs: dict[str, str | None] | None,
|
|
23
|
+
self_closing: bool = False,
|
|
24
|
+
start_pos: int | None = None,
|
|
25
|
+
) -> None:
|
|
8
26
|
self.kind = kind
|
|
9
27
|
self.name = name
|
|
10
28
|
self.attrs = attrs if attrs is not None else {}
|
|
11
29
|
self.self_closing = bool(self_closing)
|
|
30
|
+
self.start_pos = start_pos
|
|
12
31
|
|
|
13
32
|
|
|
14
33
|
class CharacterTokens:
|
|
15
34
|
__slots__ = ("data",)
|
|
16
35
|
|
|
17
|
-
|
|
36
|
+
data: str
|
|
37
|
+
|
|
38
|
+
def __init__(self, data: str) -> None:
|
|
18
39
|
self.data = data
|
|
19
40
|
|
|
20
41
|
|
|
21
42
|
class CommentToken:
|
|
22
|
-
__slots__ = ("data",)
|
|
43
|
+
__slots__ = ("data", "start_pos")
|
|
44
|
+
|
|
45
|
+
data: str
|
|
46
|
+
start_pos: int | None
|
|
23
47
|
|
|
24
|
-
def __init__(self, data):
|
|
48
|
+
def __init__(self, data: str, start_pos: int | None = None) -> None:
|
|
25
49
|
self.data = data
|
|
50
|
+
self.start_pos = start_pos
|
|
26
51
|
|
|
27
52
|
|
|
28
53
|
class Doctype:
|
|
29
54
|
__slots__ = ("force_quirks", "name", "public_id", "system_id")
|
|
30
55
|
|
|
31
|
-
|
|
56
|
+
name: str | None
|
|
57
|
+
public_id: str | None
|
|
58
|
+
system_id: str | None
|
|
59
|
+
force_quirks: bool
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
name: str | None = None,
|
|
64
|
+
public_id: str | None = None,
|
|
65
|
+
system_id: str | None = None,
|
|
66
|
+
force_quirks: bool = False,
|
|
67
|
+
) -> None:
|
|
32
68
|
self.name = name
|
|
33
69
|
self.public_id = public_id
|
|
34
70
|
self.system_id = system_id
|
|
@@ -38,7 +74,9 @@ class Doctype:
|
|
|
38
74
|
class DoctypeToken:
|
|
39
75
|
__slots__ = ("doctype",)
|
|
40
76
|
|
|
41
|
-
|
|
77
|
+
doctype: Doctype
|
|
78
|
+
|
|
79
|
+
def __init__(self, doctype: Doctype) -> None:
|
|
42
80
|
self.doctype = doctype
|
|
43
81
|
|
|
44
82
|
|
|
@@ -46,19 +84,42 @@ class EOFToken:
|
|
|
46
84
|
__slots__ = ()
|
|
47
85
|
|
|
48
86
|
|
|
87
|
+
AnyToken = Tag | CharacterTokens | CommentToken | DoctypeToken | EOFToken
|
|
88
|
+
|
|
89
|
+
|
|
49
90
|
class TokenSinkResult:
|
|
50
91
|
__slots__ = ()
|
|
51
92
|
|
|
52
|
-
Continue = 0
|
|
53
|
-
Plaintext = 1
|
|
93
|
+
Continue: Literal[0] = 0
|
|
94
|
+
Plaintext: Literal[1] = 1
|
|
54
95
|
|
|
55
96
|
|
|
56
97
|
class ParseError:
|
|
57
98
|
"""Represents a parse error with location information."""
|
|
58
99
|
|
|
59
|
-
__slots__ = ("_end_column", "_source_html", "code", "column", "line", "message")
|
|
60
|
-
|
|
61
|
-
|
|
100
|
+
__slots__ = ("_end_column", "_source_html", "category", "code", "column", "line", "message")
|
|
101
|
+
|
|
102
|
+
category: str
|
|
103
|
+
code: str
|
|
104
|
+
line: int | None
|
|
105
|
+
column: int | None
|
|
106
|
+
message: str
|
|
107
|
+
_source_html: str | None
|
|
108
|
+
_end_column: int | None
|
|
109
|
+
|
|
110
|
+
__hash__ = None # type: ignore[assignment] # Unhashable since we define __eq__
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
code: str,
|
|
115
|
+
line: int | None = None,
|
|
116
|
+
column: int | None = None,
|
|
117
|
+
category: str = "parse",
|
|
118
|
+
message: str | None = None,
|
|
119
|
+
source_html: str | None = None,
|
|
120
|
+
end_column: int | None = None,
|
|
121
|
+
) -> None:
|
|
122
|
+
self.category = category
|
|
62
123
|
self.code = code
|
|
63
124
|
self.line = line
|
|
64
125
|
self.column = column
|
|
@@ -66,12 +127,16 @@ class ParseError:
|
|
|
66
127
|
self._source_html = source_html
|
|
67
128
|
self._end_column = end_column
|
|
68
129
|
|
|
69
|
-
def __repr__(self):
|
|
130
|
+
def __repr__(self) -> str:
|
|
70
131
|
if self.line is not None and self.column is not None:
|
|
132
|
+
if self.category != "parse":
|
|
133
|
+
return f"ParseError({self.code!r}, line={self.line}, column={self.column}, category={self.category!r})"
|
|
71
134
|
return f"ParseError({self.code!r}, line={self.line}, column={self.column})"
|
|
135
|
+
if self.category != "parse":
|
|
136
|
+
return f"ParseError({self.code!r}, category={self.category!r})"
|
|
72
137
|
return f"ParseError({self.code!r})"
|
|
73
138
|
|
|
74
|
-
def __str__(self):
|
|
139
|
+
def __str__(self) -> str:
|
|
75
140
|
if self.line is not None and self.column is not None:
|
|
76
141
|
if self.message != self.code:
|
|
77
142
|
return f"({self.line},{self.column}): {self.code} - {self.message}"
|
|
@@ -80,14 +145,17 @@ class ParseError:
|
|
|
80
145
|
return f"{self.code} - {self.message}"
|
|
81
146
|
return self.code
|
|
82
147
|
|
|
83
|
-
def __eq__(self, other):
|
|
148
|
+
def __eq__(self, other: object) -> bool:
|
|
84
149
|
if not isinstance(other, ParseError):
|
|
85
150
|
return NotImplemented
|
|
86
|
-
return
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
151
|
+
return (
|
|
152
|
+
self.category == other.category
|
|
153
|
+
and self.code == other.code
|
|
154
|
+
and self.line == other.line
|
|
155
|
+
and self.column == other.column
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def as_exception(self, end_column: int | None = None) -> SyntaxError:
|
|
91
159
|
"""Convert to a SyntaxError-like exception with source highlighting.
|
|
92
160
|
|
|
93
161
|
This uses Python 3.11+ enhanced error display to show the exact
|