selectolax 0.3.28__cp312-cp312-musllinux_1_2_i686.whl → 0.3.29__cp312-cp312-musllinux_1_2_i686.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +1 -1
- selectolax/lexbor/node.pxi +16 -4
- selectolax/lexbor.c +1734 -1602
- selectolax/lexbor.cpython-312-i386-linux-musl.so +0 -0
- selectolax/lexbor.pyi +2 -2
- selectolax/lexbor.pyx +4 -2
- selectolax/modest/node.pxi +21 -5
- selectolax/parser.c +1860 -1706
- selectolax/parser.cpython-312-i386-linux-musl.so +0 -0
- selectolax/parser.pyi +13 -20
- selectolax/parser.pyx +4 -2
- {selectolax-0.3.28.dist-info → selectolax-0.3.29.dist-info}/METADATA +1 -1
- selectolax-0.3.29.dist-info/RECORD +26 -0
- selectolax-0.3.28.dist-info/RECORD +0 -26
- {selectolax-0.3.28.dist-info → selectolax-0.3.29.dist-info}/LICENSE +0 -0
- {selectolax-0.3.28.dist-info → selectolax-0.3.29.dist-info}/WHEEL +0 -0
- {selectolax-0.3.28.dist-info → selectolax-0.3.29.dist-info}/top_level.txt +0 -0
|
Binary file
|
selectolax/lexbor.pyi
CHANGED
|
@@ -101,7 +101,7 @@ class LexborNode:
|
|
|
101
101
|
def id(self) -> str | None: ...
|
|
102
102
|
def iter(self, include_text: bool = False) -> Iterator[LexborNode]: ...
|
|
103
103
|
def unwrap(self) -> None: ...
|
|
104
|
-
def unwrap_tags(self, tags: list[str]) -> None: ...
|
|
104
|
+
def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
|
|
105
105
|
def traverse(self, include_text: bool = False) -> Iterator[LexborNode]: ...
|
|
106
106
|
def replace_with(self, value: bytes | str | LexborNode) -> None: ...
|
|
107
107
|
def insert_before(self, value: bytes | str | LexborNode) -> None: ...
|
|
@@ -152,7 +152,7 @@ class LexborHTMLParser:
|
|
|
152
152
|
def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
|
|
153
153
|
def css_matches(self, selector: str) -> bool: ...
|
|
154
154
|
def clone(self) -> LexborHTMLParser: ...
|
|
155
|
-
def unwrap_tags(self, tags: list[str]) -> None: ...
|
|
155
|
+
def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
|
|
156
156
|
|
|
157
157
|
def create_tag(tag: str) -> LexborNode:
|
|
158
158
|
"""
|
selectolax/lexbor.pyx
CHANGED
|
@@ -327,7 +327,7 @@ cdef class LexborHTMLParser:
|
|
|
327
327
|
|
|
328
328
|
cls = LexborHTMLParser.from_document(cloned_document, self.raw_html)
|
|
329
329
|
return cls
|
|
330
|
-
def unwrap_tags(self, list tags):
|
|
330
|
+
def unwrap_tags(self, list tags, delete_empty = False):
|
|
331
331
|
"""Unwraps specified tags from the HTML tree.
|
|
332
332
|
|
|
333
333
|
Works the same as the ``unwrap`` method, but applied to a list of tags.
|
|
@@ -336,6 +336,8 @@ cdef class LexborHTMLParser:
|
|
|
336
336
|
----------
|
|
337
337
|
tags : list
|
|
338
338
|
List of tags to remove.
|
|
339
|
+
delete_empty : bool
|
|
340
|
+
Whenever to delete empty tags.
|
|
339
341
|
|
|
340
342
|
Examples
|
|
341
343
|
--------
|
|
@@ -346,4 +348,4 @@ cdef class LexborHTMLParser:
|
|
|
346
348
|
'<body><div>Hello world!</div></body>'
|
|
347
349
|
"""
|
|
348
350
|
if self.root is not None:
|
|
349
|
-
self.root.unwrap_tags(tags)
|
|
351
|
+
self.root.unwrap_tags(tags, delete_empty=delete_empty)
|
selectolax/modest/node.pxi
CHANGED
|
@@ -515,9 +515,14 @@ cdef class Node:
|
|
|
515
515
|
"""An alias for the decompose method."""
|
|
516
516
|
self.decompose(recursive)
|
|
517
517
|
|
|
518
|
-
def unwrap(self):
|
|
518
|
+
def unwrap(self, delete_empty = False):
|
|
519
519
|
"""Replace node with whatever is inside this node.
|
|
520
520
|
|
|
521
|
+
Parameters
|
|
522
|
+
----------
|
|
523
|
+
delete_empty : bool, default False
|
|
524
|
+
Whenever to delete empty tags.
|
|
525
|
+
|
|
521
526
|
Examples
|
|
522
527
|
--------
|
|
523
528
|
|
|
@@ -526,8 +531,11 @@ cdef class Node:
|
|
|
526
531
|
>>> tree.html
|
|
527
532
|
'<html><head></head><body><div>Hello world!</div></body></html>'
|
|
528
533
|
|
|
534
|
+
Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
|
|
529
535
|
"""
|
|
530
536
|
if self.node.child == NULL:
|
|
537
|
+
if delete_empty:
|
|
538
|
+
myhtml_node_delete(self.node)
|
|
531
539
|
return
|
|
532
540
|
cdef myhtml_tree_node_t* next_node;
|
|
533
541
|
cdef myhtml_tree_node_t* current_node;
|
|
@@ -568,7 +576,7 @@ cdef class Node:
|
|
|
568
576
|
for element in self.css(tag):
|
|
569
577
|
element.decompose(recursive=recursive)
|
|
570
578
|
|
|
571
|
-
def unwrap_tags(self, list tags):
|
|
579
|
+
def unwrap_tags(self, list tags, delete_empty = False):
|
|
572
580
|
"""Unwraps specified tags from the HTML tree.
|
|
573
581
|
|
|
574
582
|
Works the same as the ``unwrap`` method, but applied to a list of tags.
|
|
@@ -577,6 +585,8 @@ cdef class Node:
|
|
|
577
585
|
----------
|
|
578
586
|
tags : list
|
|
579
587
|
List of tags to remove.
|
|
588
|
+
delete_empty : bool, default False
|
|
589
|
+
Whenever to delete empty tags.
|
|
580
590
|
|
|
581
591
|
Examples
|
|
582
592
|
--------
|
|
@@ -585,11 +595,13 @@ cdef class Node:
|
|
|
585
595
|
>>> tree.body.unwrap_tags(['i','a'])
|
|
586
596
|
>>> tree.body.html
|
|
587
597
|
'<body><div>Hello world!</div></body>'
|
|
598
|
+
|
|
599
|
+
Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
|
|
588
600
|
"""
|
|
589
601
|
|
|
590
602
|
for tag in tags:
|
|
591
603
|
for element in self.css(tag):
|
|
592
|
-
element.unwrap()
|
|
604
|
+
element.unwrap(delete_empty)
|
|
593
605
|
|
|
594
606
|
def replace_with(self, str_or_Node value):
|
|
595
607
|
"""Replace current Node with specified value.
|
|
@@ -752,7 +764,7 @@ cdef class Node:
|
|
|
752
764
|
else:
|
|
753
765
|
raise TypeError("Expected a string or Node instance, but %s found" % type(value).__name__)
|
|
754
766
|
|
|
755
|
-
def unwrap_tags(self, list tags):
|
|
767
|
+
def unwrap_tags(self, list tags, delete_empty = False):
|
|
756
768
|
"""Unwraps specified tags from the HTML tree.
|
|
757
769
|
|
|
758
770
|
Works the same as th ``unwrap`` method, but applied to a list of tags.
|
|
@@ -761,6 +773,8 @@ cdef class Node:
|
|
|
761
773
|
----------
|
|
762
774
|
tags : list
|
|
763
775
|
List of tags to remove.
|
|
776
|
+
delete_empty : bool, default False
|
|
777
|
+
Whenever to delete empty tags.
|
|
764
778
|
|
|
765
779
|
Examples
|
|
766
780
|
--------
|
|
@@ -769,11 +783,13 @@ cdef class Node:
|
|
|
769
783
|
>>> tree.body.unwrap_tags(['i','a'])
|
|
770
784
|
>>> tree.body.html
|
|
771
785
|
'<body><div>Hello world!</div></body>'
|
|
786
|
+
|
|
787
|
+
Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
|
|
772
788
|
"""
|
|
773
789
|
|
|
774
790
|
for tag in tags:
|
|
775
791
|
for element in self.css(tag):
|
|
776
|
-
element.unwrap()
|
|
792
|
+
element.unwrap(delete_empty)
|
|
777
793
|
|
|
778
794
|
@property
|
|
779
795
|
def raw_value(self):
|