selectolax 0.3.27__cp39-cp39-musllinux_1_2_aarch64.whl → 0.3.29__cp39-cp39-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/lexbor.pyi CHANGED
@@ -101,7 +101,7 @@ class LexborNode:
101
101
  def id(self) -> str | None: ...
102
102
  def iter(self, include_text: bool = False) -> Iterator[LexborNode]: ...
103
103
  def unwrap(self) -> None: ...
104
- def unwrap_tags(self, tags: list[str]) -> None: ...
104
+ def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
105
105
  def traverse(self, include_text: bool = False) -> Iterator[LexborNode]: ...
106
106
  def replace_with(self, value: bytes | str | LexborNode) -> None: ...
107
107
  def insert_before(self, value: bytes | str | LexborNode) -> None: ...
@@ -117,7 +117,7 @@ class LexborNode:
117
117
  def text_content(self) -> str | None: ...
118
118
 
119
119
  class LexborHTMLParser:
120
- def __init__(self, html: str): ...
120
+ def __init__(self, html: str| bytes ): ...
121
121
  @property
122
122
  def selector(self) -> "LexborCSSSelector": ...
123
123
  @property
@@ -152,7 +152,7 @@ class LexborHTMLParser:
152
152
  def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
153
153
  def css_matches(self, selector: str) -> bool: ...
154
154
  def clone(self) -> LexborHTMLParser: ...
155
- def unwrap_tags(self, tags: list[str]) -> None: ...
155
+ def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
156
156
 
157
157
  def create_tag(tag: str) -> LexborNode:
158
158
  """
selectolax/lexbor.pyx CHANGED
@@ -112,6 +112,7 @@ cdef class LexborHTMLParser:
112
112
  len(pybyte_name)
113
113
  )
114
114
  if status != 0x0000:
115
+ lxb_dom_collection_destroy(collection, <bint> True)
115
116
  raise SelectolaxError("Can't locate elements.")
116
117
 
117
118
  for i in range(lxb_dom_collection_length_noi(collection)):
@@ -226,13 +227,14 @@ cdef class LexborHTMLParser:
226
227
  len(pybyte_name)
227
228
  )
228
229
  if status != 0x0000:
230
+ lxb_dom_collection_destroy(collection, <bint> True)
229
231
  raise SelectolaxError("Can't locate elements.")
230
232
 
231
233
  for i in range(lxb_dom_collection_length_noi(collection)):
232
234
  if recursive:
233
- lxb_dom_node_destroy( <lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i))
234
- else:
235
235
  lxb_dom_node_destroy_deep( <lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i))
236
+ else:
237
+ lxb_dom_node_destroy(<lxb_dom_node_t *> lxb_dom_collection_element_noi(collection, i))
236
238
  lxb_dom_collection_destroy(collection, <bint> True)
237
239
 
238
240
  def select(self, query=None):
@@ -325,7 +327,7 @@ cdef class LexborHTMLParser:
325
327
 
326
328
  cls = LexborHTMLParser.from_document(cloned_document, self.raw_html)
327
329
  return cls
328
- def unwrap_tags(self, list tags):
330
+ def unwrap_tags(self, list tags, delete_empty = False):
329
331
  """Unwraps specified tags from the HTML tree.
330
332
 
331
333
  Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -334,6 +336,8 @@ cdef class LexborHTMLParser:
334
336
  ----------
335
337
  tags : list
336
338
  List of tags to remove.
339
+ delete_empty : bool
340
+ Whenever to delete empty tags.
337
341
 
338
342
  Examples
339
343
  --------
@@ -344,4 +348,4 @@ cdef class LexborHTMLParser:
344
348
  '<body><div>Hello world!</div></body>'
345
349
  """
346
350
  if self.root is not None:
347
- self.root.unwrap_tags(tags)
351
+ self.root.unwrap_tags(tags, delete_empty=delete_empty)
@@ -515,9 +515,14 @@ cdef class Node:
515
515
  """An alias for the decompose method."""
516
516
  self.decompose(recursive)
517
517
 
518
- def unwrap(self):
518
+ def unwrap(self, delete_empty = False):
519
519
  """Replace node with whatever is inside this node.
520
520
 
521
+ Parameters
522
+ ----------
523
+ delete_empty : bool, default False
524
+ Whenever to delete empty tags.
525
+
521
526
  Examples
522
527
  --------
523
528
 
@@ -526,8 +531,11 @@ cdef class Node:
526
531
  >>> tree.html
527
532
  '<html><head></head><body><div>Hello world!</div></body></html>'
528
533
 
534
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
529
535
  """
530
536
  if self.node.child == NULL:
537
+ if delete_empty:
538
+ myhtml_node_delete(self.node)
531
539
  return
532
540
  cdef myhtml_tree_node_t* next_node;
533
541
  cdef myhtml_tree_node_t* current_node;
@@ -568,7 +576,7 @@ cdef class Node:
568
576
  for element in self.css(tag):
569
577
  element.decompose(recursive=recursive)
570
578
 
571
- def unwrap_tags(self, list tags):
579
+ def unwrap_tags(self, list tags, delete_empty = False):
572
580
  """Unwraps specified tags from the HTML tree.
573
581
 
574
582
  Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -577,6 +585,8 @@ cdef class Node:
577
585
  ----------
578
586
  tags : list
579
587
  List of tags to remove.
588
+ delete_empty : bool, default False
589
+ Whenever to delete empty tags.
580
590
 
581
591
  Examples
582
592
  --------
@@ -585,11 +595,13 @@ cdef class Node:
585
595
  >>> tree.body.unwrap_tags(['i','a'])
586
596
  >>> tree.body.html
587
597
  '<body><div>Hello world!</div></body>'
598
+
599
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
588
600
  """
589
601
 
590
602
  for tag in tags:
591
603
  for element in self.css(tag):
592
- element.unwrap()
604
+ element.unwrap(delete_empty)
593
605
 
594
606
  def replace_with(self, str_or_Node value):
595
607
  """Replace current Node with specified value.
@@ -752,7 +764,7 @@ cdef class Node:
752
764
  else:
753
765
  raise TypeError("Expected a string or Node instance, but %s found" % type(value).__name__)
754
766
 
755
- def unwrap_tags(self, list tags):
767
+ def unwrap_tags(self, list tags, delete_empty = False):
756
768
  """Unwraps specified tags from the HTML tree.
757
769
 
758
770
  Works the same as th ``unwrap`` method, but applied to a list of tags.
@@ -761,6 +773,8 @@ cdef class Node:
761
773
  ----------
762
774
  tags : list
763
775
  List of tags to remove.
776
+ delete_empty : bool, default False
777
+ Whenever to delete empty tags.
764
778
 
765
779
  Examples
766
780
  --------
@@ -769,11 +783,13 @@ cdef class Node:
769
783
  >>> tree.body.unwrap_tags(['i','a'])
770
784
  >>> tree.body.html
771
785
  '<body><div>Hello world!</div></body>'
786
+
787
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
772
788
  """
773
789
 
774
790
  for tag in tags:
775
791
  for element in self.css(tag):
776
- element.unwrap()
792
+ element.unwrap(delete_empty)
777
793
 
778
794
  @property
779
795
  def raw_value(self):