justhtml 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of justhtml might be problematic. Click here for more details.

justhtml/tokens.py ADDED
@@ -0,0 +1,223 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+
6
+ class Tag:
7
+ __slots__ = ("attrs", "kind", "name", "self_closing")
8
+
9
+ START: Literal[0] = 0
10
+ END: Literal[1] = 1
11
+
12
+ kind: int
13
+ name: str
14
+ attrs: dict[str, str | None]
15
+ self_closing: bool
16
+
17
+ def __init__(
18
+ self,
19
+ kind: int,
20
+ name: str,
21
+ attrs: dict[str, str | None] | None,
22
+ self_closing: bool = False,
23
+ ) -> None:
24
+ self.kind = kind
25
+ self.name = name
26
+ self.attrs = attrs if attrs is not None else {}
27
+ self.self_closing = bool(self_closing)
28
+
29
+
30
+ class CharacterTokens:
31
+ __slots__ = ("data",)
32
+
33
+ data: str
34
+
35
+ def __init__(self, data: str) -> None:
36
+ self.data = data
37
+
38
+
39
+ class CommentToken:
40
+ __slots__ = ("data",)
41
+
42
+ data: str
43
+
44
+ def __init__(self, data: str) -> None:
45
+ self.data = data
46
+
47
+
48
+ class Doctype:
49
+ __slots__ = ("force_quirks", "name", "public_id", "system_id")
50
+
51
+ name: str | None
52
+ public_id: str | None
53
+ system_id: str | None
54
+ force_quirks: bool
55
+
56
+ def __init__(
57
+ self,
58
+ name: str | None = None,
59
+ public_id: str | None = None,
60
+ system_id: str | None = None,
61
+ force_quirks: bool = False,
62
+ ) -> None:
63
+ self.name = name
64
+ self.public_id = public_id
65
+ self.system_id = system_id
66
+ self.force_quirks = bool(force_quirks)
67
+
68
+
69
+ class DoctypeToken:
70
+ __slots__ = ("doctype",)
71
+
72
+ doctype: Doctype
73
+
74
+ def __init__(self, doctype: Doctype) -> None:
75
+ self.doctype = doctype
76
+
77
+
78
+ class EOFToken:
79
+ __slots__ = ()
80
+
81
+
82
+ class TokenSinkResult:
83
+ __slots__ = ()
84
+
85
+ Continue: Literal[0] = 0
86
+ Plaintext: Literal[1] = 1
87
+
88
+
89
+ class ParseError:
90
+ """Represents a parse error with location information."""
91
+
92
+ __slots__ = ("_end_column", "_source_html", "code", "column", "line", "message")
93
+
94
+ code: str
95
+ line: int | None
96
+ column: int | None
97
+ message: str
98
+ _source_html: str | None
99
+ _end_column: int | None
100
+
101
+ __hash__ = None # type: ignore[assignment] # Unhashable since we define __eq__
102
+
103
+ def __init__(
104
+ self,
105
+ code: str,
106
+ line: int | None = None,
107
+ column: int | None = None,
108
+ message: str | None = None,
109
+ source_html: str | None = None,
110
+ end_column: int | None = None,
111
+ ) -> None:
112
+ self.code = code
113
+ self.line = line
114
+ self.column = column
115
+ self.message = message or code
116
+ self._source_html = source_html
117
+ self._end_column = end_column
118
+
119
+ def __repr__(self) -> str:
120
+ if self.line is not None and self.column is not None:
121
+ return f"ParseError({self.code!r}, line={self.line}, column={self.column})"
122
+ return f"ParseError({self.code!r})"
123
+
124
+ def __str__(self) -> str:
125
+ if self.line is not None and self.column is not None:
126
+ if self.message != self.code:
127
+ return f"({self.line},{self.column}): {self.code} - {self.message}"
128
+ return f"({self.line},{self.column}): {self.code}"
129
+ if self.message != self.code:
130
+ return f"{self.code} - {self.message}"
131
+ return self.code
132
+
133
+ def __eq__(self, other: object) -> bool:
134
+ if not isinstance(other, ParseError):
135
+ return NotImplemented
136
+ return self.code == other.code and self.line == other.line and self.column == other.column
137
+
138
+ def as_exception(self, end_column: int | None = None) -> SyntaxError:
139
+ """Convert to a SyntaxError-like exception with source highlighting.
140
+
141
+ This uses Python 3.11+ enhanced error display to show the exact
142
+ location in the HTML source where the error occurred.
143
+
144
+ Args:
145
+ end_column: Optional end column for highlighting a range.
146
+ If None, attempts to highlight the full tag at the error position.
147
+
148
+ Returns:
149
+ A SyntaxError instance configured to display the error location.
150
+ """
151
+ if self.line is None or self.column is None or not self._source_html:
152
+ # Fall back to regular exception if we don't have location info
153
+ exc = SyntaxError(self.message)
154
+ exc.msg = self.message
155
+ return exc
156
+
157
+ # Split HTML into lines
158
+ lines = self._source_html.split("\n")
159
+ if self.line < 1 or self.line > len(lines):
160
+ # Invalid line number
161
+ exc = SyntaxError(self.message)
162
+ exc.msg = self.message
163
+ return exc
164
+
165
+ # Get the line with the error (1-indexed line -> 0-indexed array)
166
+ error_line = lines[self.line - 1]
167
+
168
+ # Create SyntaxError with location information
169
+ exc = SyntaxError(self.message)
170
+ exc.filename = "<html>"
171
+ exc.lineno = self.line
172
+ exc.offset = self.column
173
+ exc.text = error_line
174
+ exc.msg = self.message
175
+
176
+ # Set end position for highlighting
177
+ # Use stored end_column if provided, otherwise use parameter, otherwise auto-detect
178
+ if self._end_column is not None:
179
+ exc.end_lineno = self.line
180
+ exc.end_offset = self._end_column
181
+ elif end_column is not None:
182
+ exc.end_lineno = self.line
183
+ exc.end_offset = end_column
184
+ else:
185
+ # Try to find and highlight the full tag at this position
186
+ col_idx = self.column - 1 # Convert to 0-indexed
187
+
188
+ # Look backwards for '<' if we're not already on it
189
+ start_idx = col_idx
190
+ if start_idx < len(error_line) and error_line[start_idx] == "<":
191
+ # Already at '<', use this position
192
+ pass
193
+ else:
194
+ # Look backwards for '<'
195
+ found_tag_start = False
196
+ while start_idx > 0 and error_line[start_idx - 1] != "<":
197
+ start_idx -= 1
198
+ if col_idx - start_idx > 10: # Don't look too far back
199
+ start_idx = col_idx
200
+ break
201
+
202
+ # If we found a '<' before our position, use it as start
203
+ if start_idx > 0 and error_line[start_idx - 1] == "<":
204
+ start_idx -= 1
205
+ found_tag_start = True
206
+
207
+ # If we didn't find a tag start, use original position
208
+ if not found_tag_start:
209
+ start_idx = col_idx
210
+
211
+ # Look forward for '>' to find end of tag
212
+ end_idx = col_idx
213
+ while end_idx < len(error_line) and error_line[end_idx] != ">":
214
+ end_idx += 1
215
+ if end_idx < len(error_line) and error_line[end_idx] == ">":
216
+ end_idx += 1 # Include the '>'
217
+
218
+ # Set the highlighting range (convert back to 1-indexed)
219
+ exc.end_lineno = self.line
220
+ exc.offset = start_idx + 1
221
+ exc.end_offset = end_idx + 1
222
+
223
+ return exc