selectolax 0.3.28__cp39-cp39-win_amd64.whl → 0.3.30__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

Binary file
selectolax/parser.pyi CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Any, Iterator, TypeVar, Literal, overload
1
+ from typing import Iterator, TypeVar, Literal, overload
2
2
 
3
3
  DefaultT = TypeVar("DefaultT")
4
4
 
@@ -146,17 +146,13 @@ class Node:
146
146
  ...
147
147
  @overload
148
148
  def css_first(
149
- self, query: str, default: Any = ..., strict: Literal[True] = ...
150
- ) -> Node: ...
151
- @overload
152
- def css_first(
153
- self, query: str, default: DefaultT, strict: bool = False
154
- ) -> Node | DefaultT: ...
149
+ self, query: str, default: DefaultT, strict: bool = False
150
+ ) -> Node | DefaultT:
151
+ ...
155
152
  @overload
156
153
  def css_first(
157
- self, query: str, default: None = ..., strict: bool = False
158
- ) -> Node | None:
159
- """Evaluate CSS selector against current node and its child nodes."""
154
+ self, query: str, default: None = None, strict: bool = False
155
+ ) -> Node | None | DefaultT:
160
156
  ...
161
157
  def decompose(self, recursive: bool = True) -> None:
162
158
  """Remove a Node from the tree."""
@@ -170,7 +166,7 @@ class Node:
170
166
  def strip_tags(self, tags: list[str], recursive: bool = False) -> None:
171
167
  """Remove specified tags from the HTML tree."""
172
168
  ...
173
- def unwrap_tags(self, tags: list[str]) -> None:
169
+ def unwrap_tags(self, tags: list[str], delete_empty: bool = False) -> None:
174
170
  """Unwraps specified tags from the HTML tree.
175
171
 
176
172
  Works the same as the unwrap method, but applied to a list of tags."""
@@ -236,18 +232,15 @@ class HTMLParser:
236
232
  Matches pattern query against HTML tree."""
237
233
  ...
238
234
  @overload
239
- def css_first(
240
- self, query: str, default: Any = ..., strict: Literal[True] = ...
241
- ) -> Node: ...
242
- @overload
243
235
  def css_first(
244
236
  self, query: str, default: DefaultT, strict: bool = False
245
- ) -> Node | DefaultT: ...
237
+ ) -> Node | DefaultT:
238
+ ...
239
+
246
240
  @overload
247
241
  def css_first(
248
- self, query: str, default: None = ..., strict: bool = False
249
- ) -> Node | None:
250
- """Same as css but returns only the first match."""
242
+ self, query: str, default: None = None, strict: bool = False
243
+ ) -> Node | None | DefaultT:
251
244
  ...
252
245
  @property
253
246
  def input_encoding(self) -> str:
@@ -274,7 +267,7 @@ class HTMLParser:
274
267
  """Returns the text of the node including text of all its child nodes."""
275
268
  ...
276
269
  def strip_tags(self, tags: list[str], recursive: bool = False) -> None: ...
277
- def unwrap_tags(self, tags: list[str]) -> None:
270
+ def unwrap_tags(self, tags: list[str], delete_empty: bool = False) -> None:
278
271
  """Unwraps specified tags from the HTML tree.
279
272
 
280
273
  Works the same as th unwrap method, but applied to a list of tags."""
selectolax/parser.pyx CHANGED
@@ -124,7 +124,7 @@ cdef class HTMLParser:
124
124
  status = myhtml_parse(self.html_tree, self._encoding, html, html_len)
125
125
 
126
126
  if status != 0:
127
- raise RuntimeError("Can't parse HTML:\n%s" % str(html))
127
+ raise RuntimeError("Can't parse HTML (status code: %d)" % status)
128
128
 
129
129
  assert self.html_tree.node_html != NULL
130
130
 
@@ -147,9 +147,13 @@ cdef class HTMLParser:
147
147
  def root(self):
148
148
  """Returns root node."""
149
149
  if self.html_tree and self.html_tree.node_html:
150
- node = Node()
151
- node._init(self.html_tree.node_html, self)
152
- return node
150
+ try:
151
+ node = Node()
152
+ node._init(self.html_tree.node_html, self)
153
+ return node
154
+ except Exception:
155
+ # If Node creation or initialization fails, return None
156
+ return None
153
157
  return None
154
158
 
155
159
  @property
@@ -185,6 +189,12 @@ cdef class HTMLParser:
185
189
  name : str (e.g. div)
186
190
 
187
191
  """
192
+ # Validate tag name
193
+ if not name:
194
+ raise ValueError("Tag name cannot be empty")
195
+ if len(name) > 100: # Reasonable limit for tag names
196
+ raise ValueError("Tag name is too long")
197
+
188
198
  cdef myhtml_collection_t* collection = NULL
189
199
  pybyte_name = name.encode('UTF-8')
190
200
  cdef mystatus_t status = 0;
@@ -269,7 +279,7 @@ cdef class HTMLParser:
269
279
  myhtml_collection_destroy(collection)
270
280
 
271
281
 
272
- def unwrap_tags(self, list tags):
282
+ def unwrap_tags(self, list tags, delete_empty : bool = False):
273
283
  """Unwraps specified tags from the HTML tree.
274
284
 
275
285
  Works the same as th `unwrap` method, but applied to a list of tags.
@@ -278,6 +288,8 @@ cdef class HTMLParser:
278
288
  ----------
279
289
  tags : list
280
290
  List of tags to remove.
291
+ delete_empty : bool, default False
292
+ If True, removes empty tags.
281
293
 
282
294
  Examples
283
295
  --------
@@ -288,7 +300,7 @@ cdef class HTMLParser:
288
300
  '<body><div>Hello world!</div></body>'
289
301
  """
290
302
  if self.root is not None:
291
- self.root.unwrap_tags(tags)
303
+ self.root.unwrap_tags(tags, delete_empty=delete_empty)
292
304
 
293
305
  @property
294
306
  def html(self):
@@ -426,6 +438,7 @@ cdef class HTMLParser:
426
438
  if self.html_tree != NULL:
427
439
  myhtml = self.html_tree.myhtml
428
440
  myhtml_tree_destroy(self.html_tree)
441
+ self.html_tree = NULL # Prevent double-free
429
442
  if myhtml != NULL:
430
443
  myhtml_destroy(myhtml)
431
444
 
@@ -1,13 +1,25 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: selectolax
3
- Version: 0.3.28
3
+ Version: 0.3.30
4
4
  Summary: Fast HTML5 parser with CSS selectors.
5
5
  Home-page: https://github.com/rushter/selectolax
6
6
  Author: Artem Golubin
7
- Author-email: me@rushter.com
8
- License: MIT license
9
- Project-URL: Source code, https://github.com/rushter/selectolax
10
- Keywords: selectolax
7
+ Author-email: Artem Golubin <me@rushter.com>
8
+ License:
9
+ MIT License
10
+
11
+ Copyright (c) 2018-2025, Artem Golubin
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18
+
19
+ Project-URL: Repository, https://github.com/rushter/selectolax
20
+ Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
21
+ Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.rst
22
+ Keywords: selectolax,html,parser,css,fast
11
23
  Classifier: Development Status :: 5 - Production/Stable
12
24
  Classifier: Topic :: Text Processing :: Markup :: HTML
13
25
  Classifier: Topic :: Internet
@@ -16,26 +28,18 @@ Classifier: Intended Audience :: Developers
16
28
  Classifier: License :: OSI Approved :: MIT License
17
29
  Classifier: Natural Language :: English
18
30
  Classifier: Programming Language :: Python :: 3
19
- Classifier: Programming Language :: Python :: 3.7
20
- Classifier: Programming Language :: Python :: 3.8
21
31
  Classifier: Programming Language :: Python :: 3.9
22
32
  Classifier: Programming Language :: Python :: 3.10
23
33
  Classifier: Programming Language :: Python :: 3.11
24
34
  Classifier: Programming Language :: Python :: 3.12
25
35
  Classifier: Programming Language :: Python :: 3.13
36
+ Requires-Python: >=3.9
37
+ Description-Content-Type: text/x-rst
26
38
  License-File: LICENSE
27
- Provides-Extra: cython
28
- Requires-Dist: Cython==3.0.11; extra == "cython"
39
+ Requires-Dist: Cython==3.0.11
29
40
  Dynamic: author
30
- Dynamic: author-email
31
- Dynamic: classifier
32
- Dynamic: description
33
41
  Dynamic: home-page
34
- Dynamic: keywords
35
- Dynamic: license
36
- Dynamic: project-url
37
- Dynamic: provides-extra
38
- Dynamic: summary
42
+ Dynamic: license-file
39
43
 
40
44
  .. image:: docs/logo.png
41
45
  :alt: selectolax logo
@@ -0,0 +1,26 @@
1
+ selectolax/__init__.py,sha256=QXGDeB8i55GmKcd_UXLaBjnqN76hPlaDFhHSTBgQwEc,185
2
+ selectolax/base.pxi,sha256=zOj3BrCA71xd-mJFtkMIAglP4ZybfrHVoCoy6ljTBDQ,93
3
+ selectolax/lexbor.c,sha256=t2OJwP7pIHYq-Ry6qHIIvivpAsJN4E-08FVdNmer5gU,2373583
4
+ selectolax/lexbor.cp39-win_amd64.pyd,sha256=GMHfHFgPETfWrtxvqo79w6TYUyY_pYaWjkvAMTscKRY,3191296
5
+ selectolax/lexbor.pxd,sha256=1d9nvZd9rZl27gwPwVV5BlbR2LAi6jDK69Xm9Guz5Kk,21538
6
+ selectolax/lexbor.pyi,sha256=BZSChBeL9ctyJk3IICpHyzmbFytD8sYvr4v3hSG7iIk,6877
7
+ selectolax/lexbor.pyx,sha256=-O-g03mLCQKc9F19eMvo3PyoLDtF09IIuFziXJAl6Ao,11520
8
+ selectolax/parser.c,sha256=pok6ajFRnMk_QBj0cFkvNk4C_jt7gGTbbqHoJ2j8yHk,2237176
9
+ selectolax/parser.cp39-win_amd64.pyd,sha256=cCYoIslma-B-7NGG4dgMTSDoWcsRNxIjxaeAVi98t9w,2145280
10
+ selectolax/parser.pxd,sha256=4pM_CcZlvJlaR8EMjZCnSmnCcJbwcYOldRTBEbfwm48,25145
11
+ selectolax/parser.pyi,sha256=jFQa0_av9w9HT4XtbJioI86Le0N_Wvbc7BFbdPqDuHM,11598
12
+ selectolax/parser.pyx,sha256=bS2n70o_5OPJ6JuXTBAVUTc-XhxqC4DXzPE4H3-e5Ek,13987
13
+ selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ selectolax/utils.pxi,sha256=4rtdRcLWuemxN1qe7Eul5jvAmHZ65r7Gvf67_Wg8Bt4,3566
15
+ selectolax/lexbor/attrs.pxi,sha256=r9DroDAkoxIvSMiDTRKpfYp503b7yUteDoYwglhQ0FM,3241
16
+ selectolax/lexbor/node.pxi,sha256=aLt7VzOvVErdkafncXR8LSwPBdODACh00kBtoaBBSQM,30736
17
+ selectolax/lexbor/selection.pxi,sha256=irSqUr6-csI3RNQMv11FNGd__GX1VjVmVhB7aSmj8Wg,6951
18
+ selectolax/lexbor/util.pxi,sha256=0I4ElWIwXxrZCfMmGCtyDU127oMsPCqC3IcUk4QmMAc,582
19
+ selectolax/modest/node.pxi,sha256=Da2b3cdmggCX736x0htGvac51SEeGCcY5l-LA5H4HNI,34376
20
+ selectolax/modest/selection.pxi,sha256=0elY7JwnpPVaw0QZE1T7A78s9FIph5uWIhwy4sEXGU8,6586
21
+ selectolax/modest/util.pxi,sha256=o2nPGGGtRlLqOCa7yPk94CfBzNlVr7ull7osFy6NRX4,570
22
+ selectolax-0.3.30.dist-info/licenses/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
23
+ selectolax-0.3.30.dist-info/METADATA,sha256=QPFdZ2KqmowrwhCaOA9B49GIKqpHzAEW6ObesnXTvZw,7582
24
+ selectolax-0.3.30.dist-info/WHEEL,sha256=XkFE14KmFh7mutkkb-qn_ueuH2lwfT8rLdfc5xpQ7wE,99
25
+ selectolax-0.3.30.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
26
+ selectolax-0.3.30.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp39-cp39-win_amd64
5
5
 
@@ -1,26 +0,0 @@
1
- selectolax/__init__.py,sha256=IMgHVlDkSCVB-D4ya7t6eqh3wvTxHwRpxOvV6ybw-Lw,185
2
- selectolax/base.pxi,sha256=zOj3BrCA71xd-mJFtkMIAglP4ZybfrHVoCoy6ljTBDQ,93
3
- selectolax/lexbor.c,sha256=Uwb33OzA7c6RxSct6extDwdzZZOGZrBjfnZqJklCvMk,2360252
4
- selectolax/lexbor.cp39-win_amd64.pyd,sha256=w0vu6_fj1PEEyaMc_t5uhXc4a-rXzxVXhMKfAgPMS2s,3197952
5
- selectolax/lexbor.pxd,sha256=1d9nvZd9rZl27gwPwVV5BlbR2LAi6jDK69Xm9Guz5Kk,21538
6
- selectolax/lexbor.pyi,sha256=32Ky5eZkOAPc4pN3m-eZQy4EJQV7O8xUwGHOWkOI06o,6724
7
- selectolax/lexbor.pyx,sha256=CYorl52H--UfKOihmZn7SxVIjl2GYvSVh3Lg9YMRACU,11231
8
- selectolax/parser.c,sha256=rGXXnE3kJJDElEps9cQMYaIfXCCxVpDk5ELR-RQF5nk,2215098
9
- selectolax/parser.cp39-win_amd64.pyd,sha256=oZLs5Zo0XpGKHLDcVmzIuQAXFPxpcXQI3WkDTkGCZJs,2141696
10
- selectolax/parser.pxd,sha256=4pM_CcZlvJlaR8EMjZCnSmnCcJbwcYOldRTBEbfwm48,25145
11
- selectolax/parser.pyi,sha256=XItuYMW5pdpTbTFMI1FjH0JvBao9VTwbtfvnnrLn0Pk,11891
12
- selectolax/parser.pyx,sha256=lQW4qJ6nCDraCupvcT61zUkgo-S-KIzl9JIGV6hh6hA,13386
13
- selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- selectolax/utils.pxi,sha256=4rtdRcLWuemxN1qe7Eul5jvAmHZ65r7Gvf67_Wg8Bt4,3566
15
- selectolax/lexbor/attrs.pxi,sha256=r9DroDAkoxIvSMiDTRKpfYp503b7yUteDoYwglhQ0FM,3241
16
- selectolax/lexbor/node.pxi,sha256=g5R3gZw-Xa7y8TkupE4mAyaorcdDArl5DeaKBBU35jA,30207
17
- selectolax/lexbor/selection.pxi,sha256=DVtVnaCwzXPPkDqgW0vEtkQa1zWJ0c2Ud3KGSrvK5PM,6755
18
- selectolax/lexbor/util.pxi,sha256=0I4ElWIwXxrZCfMmGCtyDU127oMsPCqC3IcUk4QmMAc,582
19
- selectolax/modest/node.pxi,sha256=xDJTFhcvkF7FGSObnCftZJSIBj7wV1--IdLJbFoIItw,33519
20
- selectolax/modest/selection.pxi,sha256=0elY7JwnpPVaw0QZE1T7A78s9FIph5uWIhwy4sEXGU8,6586
21
- selectolax/modest/util.pxi,sha256=o2nPGGGtRlLqOCa7yPk94CfBzNlVr7ull7osFy6NRX4,570
22
- selectolax-0.3.28.dist-info/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
23
- selectolax-0.3.28.dist-info/METADATA,sha256=KFj4io2mGglBQQmipQ0zmc_yAmwXu8kDVav_YS3SBZc,6448
24
- selectolax-0.3.28.dist-info/WHEEL,sha256=agy-BJge3afXwWznUXANATmKFW4eqelqRR0uf608A_0,99
25
- selectolax-0.3.28.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
26
- selectolax-0.3.28.dist-info/RECORD,,