selectolax 0.3.34__cp314-cp314t-win_amd64.whl → 0.4.0__cp314-cp314t-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

Binary file
selectolax/lexbor.pxd CHANGED
@@ -215,6 +215,8 @@ cdef extern from "lexbor/html/html.h" nogil:
215
215
 
216
216
  size_t ref_count
217
217
 
218
+ ctypedef struct lxb_html_element_t
219
+
218
220
  # Functions
219
221
  lxb_html_document_t * lxb_html_document_create()
220
222
  lxb_status_t lxb_html_document_parse(lxb_html_document_t *document, const lxb_char_t *html, size_t size)
@@ -223,6 +225,9 @@ cdef extern from "lexbor/html/html.h" nogil:
223
225
  lxb_dom_element_t * lxb_dom_document_element(lxb_dom_document_t *document)
224
226
 
225
227
  lxb_status_t lxb_html_serialize_tree_str(lxb_dom_node_t *node, lexbor_str_t *str)
228
+ lxb_status_t lxb_html_serialize_deep_str(lxb_dom_node_t *node, lexbor_str_t *str)
229
+ lxb_html_element_t* lxb_html_element_inner_html_set(lxb_html_element_t *element,
230
+ const lxb_char_t *html, size_t size)
226
231
 
227
232
  cdef class LexborNode:
228
233
  cdef:
@@ -241,6 +246,8 @@ cdef class LexborCSSSelector:
241
246
  cdef public LexborNode current_node
242
247
  cdef int _create_css_parser(self) except -1
243
248
  cpdef list find(self, str query, LexborNode node)
249
+ cpdef list find_first(self, str query, LexborNode node)
250
+ cpdef list _find(self, str query, LexborNode node, bint only_first)
244
251
  cpdef int any_matches(self, str query, LexborNode node) except -1
245
252
 
246
253
  cdef class LexborHTMLParser:
@@ -318,6 +325,7 @@ cdef extern from "lexbor/dom/dom.h" nogil:
318
325
  void lxb_dom_node_insert_after(lxb_dom_node_t *to, lxb_dom_node_t *node)
319
326
  lxb_dom_text_t * lxb_dom_document_create_text_node(lxb_dom_document_t *document, const lxb_char_t *data, size_t len)
320
327
  void lxb_dom_node_simple_walk(lxb_dom_node_t *root, lxb_dom_node_simple_walker_f walker_cb, void *ctx)
328
+ lxb_dom_node_t* lxb_dom_node_clone(lxb_dom_node_t *node, bint deep)
321
329
 
322
330
 
323
331
  cdef extern from "lexbor/dom/interfaces/element.h" nogil:
selectolax/lexbor.pyi CHANGED
@@ -71,6 +71,34 @@ class LexborSelector:
71
71
  """
72
72
  ...
73
73
 
74
+ @property
75
+ def inner_html(self) -> str | None:
76
+ """Return HTML representation of the child nodes.
77
+
78
+ Works similar to innerHTML in JavaScript.
79
+ Unlike the `.html` property, does not include the current node.
80
+ Can be used to set HTML as well. See the setter docstring.
81
+
82
+ Returns
83
+ -------
84
+ text : str or None
85
+ """
86
+ ...
87
+
88
+ @inner_html.setter
89
+ def inner_html(self, html: str):
90
+ """Set inner HTML to the specified HTML.
91
+
92
+ Replaces existing data inside the node.
93
+ Works similar to innerHTML in JavaScript.
94
+
95
+ Parameters
96
+ ----------
97
+ html : str
98
+
99
+ """
100
+ ...
101
+
74
102
  class LexborCSSSelector:
75
103
  def __init__(self): ...
76
104
  def find(self, query: str, node: LexborNode) -> list[LexborNode]: ...
@@ -84,7 +112,10 @@ class LexborNode:
84
112
  def mem_id(self) -> int: ...
85
113
  @property
86
114
  def child(self) -> LexborNode | None:
87
- """Alias for the first_child property."""
115
+ """Alias for the `first_child` property.
116
+
117
+ **Deprecated**. Please use `first_child` instead.
118
+ """
88
119
  ...
89
120
  @property
90
121
  def first_child(self) -> LexborNode | None:
@@ -173,7 +204,7 @@ class LexborNode:
173
204
  query : str
174
205
  default : bool, default None
175
206
  Default value to return if there is no match.
176
- strict: bool, default True
207
+ strict: bool, default False
177
208
  Set to True if you want to check if there is strictly only one match in the document.
178
209
 
179
210
 
@@ -194,7 +225,7 @@ class LexborNode:
194
225
  query : str
195
226
  default : bool, default None
196
227
  Default value to return if there is no match.
197
- strict: bool, default True
228
+ strict: bool, default False
198
229
  Set to True if you want to check if there is strictly only one match in the document.
199
230
 
200
231
 
@@ -215,7 +246,7 @@ class LexborNode:
215
246
  query : str
216
247
  default : bool, default None
217
248
  Default value to return if there is no match.
218
- strict: bool, default True
249
+ strict: bool, default False
219
250
  Set to True if you want to check if there is strictly only one match in the document.
220
251
 
221
252
 
@@ -236,6 +267,12 @@ class LexborNode:
236
267
  def tag(self) -> str | None:
237
268
  """Return the name of the current tag (e.g. div, p, img).
238
269
 
270
+ For for non-tag nodes, returns the following names:
271
+
272
+ * `-text` - text node
273
+ * `-document` - document node
274
+ * `-comment` - comment node
275
+
239
276
  Returns
240
277
  -------
241
278
  text : str
@@ -351,6 +388,8 @@ class LexborNode:
351
388
  def unwrap(self, delete_empty: bool = False) -> None:
352
389
  """Replace node with whatever is inside this node.
353
390
 
391
+ Does nothing if you perform unwrapping second time on the same node.
392
+
354
393
  Parameters
355
394
  ----------
356
395
  delete_empty : bool, default False
@@ -608,6 +647,44 @@ class LexborNode:
608
647
  """
609
648
  ...
610
649
 
650
+ @property
651
+ def inner_html(self) -> str | None:
652
+ """Return HTML representation of the child nodes.
653
+
654
+ Works similar to innerHTML in JavaScript.
655
+ Unlike the `.html` property, does not include the current node.
656
+ Can be used to set HTML as well. See the setter docstring.
657
+
658
+ Returns
659
+ -------
660
+ text : str or None
661
+ """
662
+ ...
663
+
664
+ @inner_html.setter
665
+ def inner_html(self, html: str):
666
+ """Set inner HTML to the specified HTML.
667
+
668
+ Replaces existing data inside the node.
669
+ Works similar to innerHTML in JavaScript.
670
+
671
+ Parameters
672
+ ----------
673
+ html : str
674
+
675
+ """
676
+ ...
677
+
678
+ def clone(self) -> LexborNode:
679
+ """Clone the current node.
680
+
681
+ You can use to do temporary modifications without affecting the original HTML tree.
682
+
683
+ It is tied to the current parser instance.
684
+ Gets destroyed when parser instance is destroyed.
685
+ """
686
+ ...
687
+
611
688
  class LexborHTMLParser:
612
689
  """The lexbor HTML parser.
613
690
 
@@ -699,7 +776,7 @@ class LexborHTMLParser:
699
776
  query : str
700
777
  default : bool, default None
701
778
  Default value to return if there is no match.
702
- strict: bool, default True
779
+ strict: bool, default False
703
780
  Set to True if you want to check if there is strictly only one match in the document.
704
781
 
705
782
 
@@ -720,7 +797,7 @@ class LexborHTMLParser:
720
797
  query : str
721
798
  default : bool, default None
722
799
  Default value to return if there is no match.
723
- strict: bool, default True
800
+ strict: bool, default False
724
801
  Set to True if you want to check if there is strictly only one match in the document.
725
802
 
726
803
 
@@ -741,7 +818,7 @@ class LexborHTMLParser:
741
818
  query : str
742
819
  default : bool, default None
743
820
  Default value to return if there is no match.
744
- strict: bool, default True
821
+ strict: bool, default False
745
822
  Set to True if you want to check if there is strictly only one match in the document.
746
823
 
747
824
 
selectolax/lexbor.pyx CHANGED
@@ -1,6 +1,7 @@
1
1
  from cpython.bool cimport bool
2
2
  from cpython.exc cimport PyErr_SetObject
3
3
 
4
+
4
5
  _ENCODING = 'UTF-8'
5
6
 
6
7
  include "base.pxi"
@@ -9,6 +10,7 @@ include "lexbor/attrs.pxi"
9
10
  include "lexbor/node.pxi"
10
11
  include "lexbor/selection.pxi"
11
12
  include "lexbor/util.pxi"
13
+ include "lexbor/node_remove.pxi"
12
14
 
13
15
  # We don't inherit from HTMLParser here, because it also includes all the C code from Modest.
14
16
 
@@ -192,9 +194,9 @@ cdef class LexborHTMLParser:
192
194
  ----------
193
195
 
194
196
  query : str
195
- default : bool, default None
197
+ default : Any, default None
196
198
  Default value to return if there is no match.
197
- strict: bool, default True
199
+ strict: bool, default False
198
200
  Set to True if you want to check if there is strictly only one match in the document.
199
201
 
200
202
 
@@ -211,7 +213,7 @@ cdef class LexborHTMLParser:
211
213
  ----------
212
214
  tags : list of str
213
215
  List of tags to remove.
214
- recursive : bool, default True
216
+ recursive : bool, default False
215
217
  Whenever to delete all its child nodes
216
218
 
217
219
  Examples
@@ -334,7 +336,13 @@ cdef class LexborHTMLParser:
334
336
  return obj
335
337
 
336
338
  def clone(self):
337
- """Clone the current tree."""
339
+ """Clone the current node.
340
+
341
+ You can use to do temporary modifications without affecting the original HTML tree.
342
+
343
+ It is tied to the current parser instance.
344
+ Gets destroyed when parser instance is destroyed.
345
+ """
338
346
  cdef lxb_html_document_t* cloned_document
339
347
  cdef lxb_dom_node_t* cloned_node
340
348
  cdef LexborHTMLParser cls
@@ -386,3 +394,31 @@ cdef class LexborHTMLParser:
386
394
  # faster to check if the document is empty which should determine if we have a root
387
395
  if self.document != NULL:
388
396
  self.root.unwrap_tags(tags, delete_empty=delete_empty)
397
+
398
+ @property
399
+ def inner_html(self) -> str:
400
+ """Return HTML representation of the child nodes.
401
+
402
+ Works similar to innerHTML in JavaScript.
403
+ Unlike the `.html` property, does not include the current node.
404
+ Can be used to set HTML as well. See the setter docstring.
405
+
406
+ Returns
407
+ -------
408
+ text : str | None
409
+ """
410
+ return self.root.inner_html
411
+
412
+ @inner_html.setter
413
+ def inner_html(self, str html):
414
+ """Set inner HTML to the specified HTML.
415
+
416
+ Replaces existing data inside the node.
417
+ Works similar to innerHTML in JavaScript.
418
+
419
+ Parameters
420
+ ----------
421
+ html : str
422
+
423
+ """
424
+ self.root.inner_html = html
@@ -397,7 +397,10 @@ cdef class Node:
397
397
 
398
398
  @property
399
399
  def child(self):
400
- """Return the child node."""
400
+ """Alias for the `first_child` property.
401
+
402
+ **Deprecated**. Please use `first_child` instead.
403
+ """
401
404
  cdef Node node
402
405
  if self.node.child:
403
406
  node = Node.new(self.node.child, self.parser)