selectolax 0.3.28__cp310-cp310-win_amd64.whl → 0.3.30__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

Binary file
selectolax/lexbor.pyi CHANGED
@@ -101,7 +101,7 @@ class LexborNode:
101
101
  def id(self) -> str | None: ...
102
102
  def iter(self, include_text: bool = False) -> Iterator[LexborNode]: ...
103
103
  def unwrap(self) -> None: ...
104
- def unwrap_tags(self, tags: list[str]) -> None: ...
104
+ def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
105
105
  def traverse(self, include_text: bool = False) -> Iterator[LexborNode]: ...
106
106
  def replace_with(self, value: bytes | str | LexborNode) -> None: ...
107
107
  def insert_before(self, value: bytes | str | LexborNode) -> None: ...
@@ -152,7 +152,7 @@ class LexborHTMLParser:
152
152
  def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
153
153
  def css_matches(self, selector: str) -> bool: ...
154
154
  def clone(self) -> LexborHTMLParser: ...
155
- def unwrap_tags(self, tags: list[str]) -> None: ...
155
+ def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
156
156
 
157
157
  def create_tag(tag: str) -> LexborNode:
158
158
  """
@@ -170,3 +170,8 @@ def parse_fragment(html: str) -> list[LexborNode]:
170
170
  if they are missing. This function does not add these tags.
171
171
  """
172
172
  ...
173
+
174
+
175
+ class SelectolaxError(Exception):
176
+ """An exception that indicates error."""
177
+ pass
selectolax/lexbor.pyx CHANGED
@@ -96,6 +96,12 @@ cdef class LexborHTMLParser:
96
96
  name : str (e.g. div)
97
97
 
98
98
  """
99
+
100
+ if not name:
101
+ raise ValueError("Tag name cannot be empty")
102
+ if len(name) > 100:
103
+ raise ValueError("Tag name is too long")
104
+
99
105
  cdef lxb_dom_collection_t* collection = NULL
100
106
  cdef lxb_status_t status
101
107
  pybyte_name = name.encode('UTF-8')
@@ -327,7 +333,7 @@ cdef class LexborHTMLParser:
327
333
 
328
334
  cls = LexborHTMLParser.from_document(cloned_document, self.raw_html)
329
335
  return cls
330
- def unwrap_tags(self, list tags):
336
+ def unwrap_tags(self, list tags, delete_empty = False):
331
337
  """Unwraps specified tags from the HTML tree.
332
338
 
333
339
  Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -336,6 +342,8 @@ cdef class LexborHTMLParser:
336
342
  ----------
337
343
  tags : list
338
344
  List of tags to remove.
345
+ delete_empty : bool
346
+ Whenever to delete empty tags.
339
347
 
340
348
  Examples
341
349
  --------
@@ -346,4 +354,4 @@ cdef class LexborHTMLParser:
346
354
  '<body><div>Hello world!</div></body>'
347
355
  """
348
356
  if self.root is not None:
349
- self.root.unwrap_tags(tags)
357
+ self.root.unwrap_tags(tags, delete_empty=delete_empty)
@@ -14,6 +14,8 @@ cdef class Stack:
14
14
  self.capacity = capacity
15
15
  self.top = 0
16
16
  self._stack = <myhtml_tree_node_t**> malloc(capacity * sizeof(myhtml_tree_node_t))
17
+ if self._stack == NULL:
18
+ raise MemoryError("Failed to allocate memory for stack")
17
19
 
18
20
  def __dealloc__(self):
19
21
  free(self._stack)
@@ -131,7 +133,7 @@ cdef class _Attributes:
131
133
 
132
134
 
133
135
  ctypedef fused str_or_Node:
134
- basestring
136
+ str
135
137
  bytes
136
138
  Node
137
139
 
@@ -515,9 +517,14 @@ cdef class Node:
515
517
  """An alias for the decompose method."""
516
518
  self.decompose(recursive)
517
519
 
518
- def unwrap(self):
520
+ def unwrap(self, delete_empty = False):
519
521
  """Replace node with whatever is inside this node.
520
522
 
523
+ Parameters
524
+ ----------
525
+ delete_empty : bool, default False
526
+ Whenever to delete empty tags.
527
+
521
528
  Examples
522
529
  --------
523
530
 
@@ -526,8 +533,11 @@ cdef class Node:
526
533
  >>> tree.html
527
534
  '<html><head></head><body><div>Hello world!</div></body></html>'
528
535
 
536
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
529
537
  """
530
538
  if self.node.child == NULL:
539
+ if delete_empty:
540
+ myhtml_node_delete(self.node)
531
541
  return
532
542
  cdef myhtml_tree_node_t* next_node;
533
543
  cdef myhtml_tree_node_t* current_node;
@@ -568,7 +578,7 @@ cdef class Node:
568
578
  for element in self.css(tag):
569
579
  element.decompose(recursive=recursive)
570
580
 
571
- def unwrap_tags(self, list tags):
581
+ def unwrap_tags(self, list tags, delete_empty = False):
572
582
  """Unwraps specified tags from the HTML tree.
573
583
 
574
584
  Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -577,6 +587,8 @@ cdef class Node:
577
587
  ----------
578
588
  tags : list
579
589
  List of tags to remove.
590
+ delete_empty : bool, default False
591
+ Whenever to delete empty tags.
580
592
 
581
593
  Examples
582
594
  --------
@@ -585,11 +597,13 @@ cdef class Node:
585
597
  >>> tree.body.unwrap_tags(['i','a'])
586
598
  >>> tree.body.html
587
599
  '<body><div>Hello world!</div></body>'
600
+
601
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
588
602
  """
589
603
 
590
604
  for tag in tags:
591
605
  for element in self.css(tag):
592
- element.unwrap()
606
+ element.unwrap(delete_empty)
593
607
 
594
608
  def replace_with(self, str_or_Node value):
595
609
  """Replace current Node with specified value.
@@ -752,7 +766,7 @@ cdef class Node:
752
766
  else:
753
767
  raise TypeError("Expected a string or Node instance, but %s found" % type(value).__name__)
754
768
 
755
- def unwrap_tags(self, list tags):
769
+ def unwrap_tags(self, list tags, delete_empty = False):
756
770
  """Unwraps specified tags from the HTML tree.
757
771
 
758
772
  Works the same as th ``unwrap`` method, but applied to a list of tags.
@@ -761,6 +775,8 @@ cdef class Node:
761
775
  ----------
762
776
  tags : list
763
777
  List of tags to remove.
778
+ delete_empty : bool, default False
779
+ Whenever to delete empty tags.
764
780
 
765
781
  Examples
766
782
  --------
@@ -769,11 +785,13 @@ cdef class Node:
769
785
  >>> tree.body.unwrap_tags(['i','a'])
770
786
  >>> tree.body.html
771
787
  '<body><div>Hello world!</div></body>'
788
+
789
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
772
790
  """
773
791
 
774
792
  for tag in tags:
775
793
  for element in self.css(tag):
776
- element.unwrap()
794
+ element.unwrap(delete_empty)
777
795
 
778
796
  @property
779
797
  def raw_value(self):
@@ -962,8 +980,8 @@ cdef inline str append_text(str text, str node_text, str separator='', bint stri
962
980
 
963
981
  cdef inline bytes to_bytes(str_or_Node value):
964
982
  cdef bytes bytes_val
965
- if isinstance(value, (str, unicode)):
966
- bytes_val = value.encode(_ENCODING)
983
+ if isinstance(value, unicode):
984
+ bytes_val = <bytes>value.encode("utf-8")
967
985
  elif isinstance(value, bytes):
968
- bytes_val = <char*> value
986
+ bytes_val = <bytes>value
969
987
  return bytes_val