selectolax 0.3.33__cp313-cp313-win_amd64.whl → 0.4.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

Binary file
selectolax/lexbor.pxd CHANGED
@@ -215,6 +215,8 @@ cdef extern from "lexbor/html/html.h" nogil:
215
215
 
216
216
  size_t ref_count
217
217
 
218
+ ctypedef struct lxb_html_element_t
219
+
218
220
  # Functions
219
221
  lxb_html_document_t * lxb_html_document_create()
220
222
  lxb_status_t lxb_html_document_parse(lxb_html_document_t *document, const lxb_char_t *html, size_t size)
@@ -223,6 +225,9 @@ cdef extern from "lexbor/html/html.h" nogil:
223
225
  lxb_dom_element_t * lxb_dom_document_element(lxb_dom_document_t *document)
224
226
 
225
227
  lxb_status_t lxb_html_serialize_tree_str(lxb_dom_node_t *node, lexbor_str_t *str)
228
+ lxb_status_t lxb_html_serialize_deep_str(lxb_dom_node_t *node, lexbor_str_t *str)
229
+ lxb_html_element_t* lxb_html_element_inner_html_set(lxb_html_element_t *element,
230
+ const lxb_char_t *html, size_t size)
226
231
 
227
232
  cdef class LexborNode:
228
233
  cdef:
@@ -241,6 +246,8 @@ cdef class LexborCSSSelector:
241
246
  cdef public LexborNode current_node
242
247
  cdef int _create_css_parser(self) except -1
243
248
  cpdef list find(self, str query, LexborNode node)
249
+ cpdef list find_first(self, str query, LexborNode node)
250
+ cpdef list _find(self, str query, LexborNode node, bint only_first)
244
251
  cpdef int any_matches(self, str query, LexborNode node) except -1
245
252
 
246
253
  cdef class LexborHTMLParser:
@@ -318,6 +325,7 @@ cdef extern from "lexbor/dom/dom.h" nogil:
318
325
  void lxb_dom_node_insert_after(lxb_dom_node_t *to, lxb_dom_node_t *node)
319
326
  lxb_dom_text_t * lxb_dom_document_create_text_node(lxb_dom_document_t *document, const lxb_char_t *data, size_t len)
320
327
  void lxb_dom_node_simple_walk(lxb_dom_node_t *root, lxb_dom_node_simple_walker_f walker_cb, void *ctx)
328
+ lxb_dom_node_t* lxb_dom_node_clone(lxb_dom_node_t *node, bint deep)
321
329
 
322
330
 
323
331
  cdef extern from "lexbor/dom/interfaces/element.h" nogil:
selectolax/lexbor.pyi CHANGED
@@ -71,6 +71,34 @@ class LexborSelector:
71
71
  """
72
72
  ...
73
73
 
74
+ @property
75
+ def inner_html(self) -> str | None:
76
+ """Return HTML representation of the child nodes.
77
+
78
+ Works similar to innerHTML in JavaScript.
79
+ Unlike the `.html` property, does not include the current node.
80
+ Can be used to set HTML as well. See the setter docstring.
81
+
82
+ Returns
83
+ -------
84
+ text : str or None
85
+ """
86
+ ...
87
+
88
+ @inner_html.setter
89
+ def inner_html(self, html: str):
90
+ """Set inner HTML to the specified HTML.
91
+
92
+ Replaces existing data inside the node.
93
+ Works similar to innerHTML in JavaScript.
94
+
95
+ Parameters
96
+ ----------
97
+ html : str
98
+
99
+ """
100
+ ...
101
+
74
102
  class LexborCSSSelector:
75
103
  def __init__(self): ...
76
104
  def find(self, query: str, node: LexborNode) -> list[LexborNode]: ...
@@ -84,7 +112,10 @@ class LexborNode:
84
112
  def mem_id(self) -> int: ...
85
113
  @property
86
114
  def child(self) -> LexborNode | None:
87
- """Alias for the first_child property."""
115
+ """Alias for the `first_child` property.
116
+
117
+ **Deprecated**. Please use `first_child` instead.
118
+ """
88
119
  ...
89
120
  @property
90
121
  def first_child(self) -> LexborNode | None:
@@ -145,6 +176,12 @@ class LexborNode:
145
176
  Matches pattern `query` against HTML tree.
146
177
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
147
178
 
179
+ Special selectors:
180
+
181
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
182
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
183
+
184
+
148
185
  Parameters
149
186
  ----------
150
187
  query : str
@@ -167,7 +204,7 @@ class LexborNode:
167
204
  query : str
168
205
  default : bool, default None
169
206
  Default value to return if there is no match.
170
- strict: bool, default True
207
+ strict: bool, default False
171
208
  Set to True if you want to check if there is strictly only one match in the document.
172
209
 
173
210
 
@@ -188,7 +225,7 @@ class LexborNode:
188
225
  query : str
189
226
  default : bool, default None
190
227
  Default value to return if there is no match.
191
- strict: bool, default True
228
+ strict: bool, default False
192
229
  Set to True if you want to check if there is strictly only one match in the document.
193
230
 
194
231
 
@@ -209,7 +246,7 @@ class LexborNode:
209
246
  query : str
210
247
  default : bool, default None
211
248
  Default value to return if there is no match.
212
- strict: bool, default True
249
+ strict: bool, default False
213
250
  Set to True if you want to check if there is strictly only one match in the document.
214
251
 
215
252
 
@@ -230,6 +267,12 @@ class LexborNode:
230
267
  def tag(self) -> str | None:
231
268
  """Return the name of the current tag (e.g. div, p, img).
232
269
 
270
+ For for non-tag nodes, returns the following names:
271
+
272
+ * `-text` - text node
273
+ * `-document` - document node
274
+ * `-comment` - comment node
275
+
233
276
  Returns
234
277
  -------
235
278
  text : str
@@ -345,6 +388,8 @@ class LexborNode:
345
388
  def unwrap(self, delete_empty: bool = False) -> None:
346
389
  """Replace node with whatever is inside this node.
347
390
 
391
+ Does nothing if you perform unwrapping second time on the same node.
392
+
348
393
  Parameters
349
394
  ----------
350
395
  delete_empty : bool, default False
@@ -602,6 +647,44 @@ class LexborNode:
602
647
  """
603
648
  ...
604
649
 
650
+ @property
651
+ def inner_html(self) -> str | None:
652
+ """Return HTML representation of the child nodes.
653
+
654
+ Works similar to innerHTML in JavaScript.
655
+ Unlike the `.html` property, does not include the current node.
656
+ Can be used to set HTML as well. See the setter docstring.
657
+
658
+ Returns
659
+ -------
660
+ text : str or None
661
+ """
662
+ ...
663
+
664
+ @inner_html.setter
665
+ def inner_html(self, html: str):
666
+ """Set inner HTML to the specified HTML.
667
+
668
+ Replaces existing data inside the node.
669
+ Works similar to innerHTML in JavaScript.
670
+
671
+ Parameters
672
+ ----------
673
+ html : str
674
+
675
+ """
676
+ ...
677
+
678
+ def clone(self) -> LexborNode:
679
+ """Clone the current node.
680
+
681
+ You can use to do temporary modifications without affecting the original HTML tree.
682
+
683
+ It is tied to the current parser instance.
684
+ Gets destroyed when parser instance is destroyed.
685
+ """
686
+ ...
687
+
605
688
  class LexborHTMLParser:
606
689
  """The lexbor HTML parser.
607
690
 
@@ -665,6 +748,12 @@ class LexborHTMLParser:
665
748
  Matches pattern `query` against HTML tree.
666
749
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
667
750
 
751
+ Special selectors:
752
+
753
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
754
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
755
+
756
+
668
757
  Parameters
669
758
  ----------
670
759
  query : str
@@ -687,7 +776,7 @@ class LexborHTMLParser:
687
776
  query : str
688
777
  default : bool, default None
689
778
  Default value to return if there is no match.
690
- strict: bool, default True
779
+ strict: bool, default False
691
780
  Set to True if you want to check if there is strictly only one match in the document.
692
781
 
693
782
 
@@ -708,7 +797,7 @@ class LexborHTMLParser:
708
797
  query : str
709
798
  default : bool, default None
710
799
  Default value to return if there is no match.
711
- strict: bool, default True
800
+ strict: bool, default False
712
801
  Set to True if you want to check if there is strictly only one match in the document.
713
802
 
714
803
 
@@ -729,7 +818,7 @@ class LexborHTMLParser:
729
818
  query : str
730
819
  default : bool, default None
731
820
  Default value to return if there is no match.
732
- strict: bool, default True
821
+ strict: bool, default False
733
822
  Set to True if you want to check if there is strictly only one match in the document.
734
823
 
735
824
 
selectolax/lexbor.pyx CHANGED
@@ -1,6 +1,7 @@
1
1
  from cpython.bool cimport bool
2
2
  from cpython.exc cimport PyErr_SetObject
3
3
 
4
+
4
5
  _ENCODING = 'UTF-8'
5
6
 
6
7
  include "base.pxi"
@@ -9,6 +10,7 @@ include "lexbor/attrs.pxi"
9
10
  include "lexbor/node.pxi"
10
11
  include "lexbor/selection.pxi"
11
12
  include "lexbor/util.pxi"
13
+ include "lexbor/node_remove.pxi"
12
14
 
13
15
  # We don't inherit from HTMLParser here, because it also includes all the C code from Modest.
14
16
 
@@ -169,6 +171,11 @@ cdef class LexborHTMLParser:
169
171
  Matches pattern `query` against HTML tree.
170
172
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
171
173
 
174
+ Special selectors:
175
+
176
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
177
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
178
+
172
179
  Parameters
173
180
  ----------
174
181
  query : str
@@ -187,9 +194,9 @@ cdef class LexborHTMLParser:
187
194
  ----------
188
195
 
189
196
  query : str
190
- default : bool, default None
197
+ default : Any, default None
191
198
  Default value to return if there is no match.
192
- strict: bool, default True
199
+ strict: bool, default False
193
200
  Set to True if you want to check if there is strictly only one match in the document.
194
201
 
195
202
 
@@ -206,7 +213,7 @@ cdef class LexborHTMLParser:
206
213
  ----------
207
214
  tags : list of str
208
215
  List of tags to remove.
209
- recursive : bool, default True
216
+ recursive : bool, default False
210
217
  Whenever to delete all its child nodes
211
218
 
212
219
  Examples
@@ -329,7 +336,13 @@ cdef class LexborHTMLParser:
329
336
  return obj
330
337
 
331
338
  def clone(self):
332
- """Clone the current tree."""
339
+ """Clone the current node.
340
+
341
+ You can use to do temporary modifications without affecting the original HTML tree.
342
+
343
+ It is tied to the current parser instance.
344
+ Gets destroyed when parser instance is destroyed.
345
+ """
333
346
  cdef lxb_html_document_t* cloned_document
334
347
  cdef lxb_dom_node_t* cloned_node
335
348
  cdef LexborHTMLParser cls
@@ -381,3 +394,31 @@ cdef class LexborHTMLParser:
381
394
  # faster to check if the document is empty which should determine if we have a root
382
395
  if self.document != NULL:
383
396
  self.root.unwrap_tags(tags, delete_empty=delete_empty)
397
+
398
+ @property
399
+ def inner_html(self) -> str:
400
+ """Return HTML representation of the child nodes.
401
+
402
+ Works similar to innerHTML in JavaScript.
403
+ Unlike the `.html` property, does not include the current node.
404
+ Can be used to set HTML as well. See the setter docstring.
405
+
406
+ Returns
407
+ -------
408
+ text : str | None
409
+ """
410
+ return self.root.inner_html
411
+
412
+ @inner_html.setter
413
+ def inner_html(self, str html):
414
+ """Set inner HTML to the specified HTML.
415
+
416
+ Replaces existing data inside the node.
417
+ Works similar to innerHTML in JavaScript.
418
+
419
+ Parameters
420
+ ----------
421
+ html : str
422
+
423
+ """
424
+ self.root.inner_html = html
@@ -397,7 +397,10 @@ cdef class Node:
397
397
 
398
398
  @property
399
399
  def child(self):
400
- """Return the child node."""
400
+ """Alias for the `first_child` property.
401
+
402
+ **Deprecated**. Please use `first_child` instead.
403
+ """
401
404
  cdef Node node
402
405
  if self.node.child:
403
406
  node = Node.new(self.node.child, self.parser)