selectolax 0.3.32__cp311-cp311-win_arm64.whl → 0.3.34__cp311-cp311-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +3 -5
- selectolax/lexbor/attrs.pxi +0 -1
- selectolax/lexbor/node.pxi +99 -41
- selectolax/lexbor/selection.pxi +27 -25
- selectolax/lexbor/util.pxi +1 -0
- selectolax/lexbor.c +6412 -6702
- selectolax/lexbor.cp311-win_arm64.pyd +0 -0
- selectolax/lexbor.pxd +32 -35
- selectolax/lexbor.pyi +51 -1
- selectolax/lexbor.pyx +48 -17
- selectolax/modest/node.pxi +37 -36
- selectolax/modest/selection.pxi +24 -22
- selectolax/modest/util.pxi +1 -0
- selectolax/parser.c +4524 -5291
- selectolax/parser.cp311-win_arm64.pyd +0 -0
- selectolax/parser.pxd +17 -20
- selectolax/parser.pyi +2 -2
- selectolax/parser.pyx +28 -31
- selectolax/utils.pxi +13 -3
- selectolax-0.3.34.dist-info/METADATA +32 -0
- selectolax-0.3.34.dist-info/RECORD +26 -0
- selectolax-0.3.32.dist-info/METADATA +0 -187
- selectolax-0.3.32.dist-info/RECORD +0 -26
- {selectolax-0.3.32.dist-info → selectolax-0.3.34.dist-info}/WHEEL +0 -0
- {selectolax-0.3.32.dist-info → selectolax-0.3.34.dist-info}/licenses/LICENSE +0 -0
- {selectolax-0.3.32.dist-info → selectolax-0.3.34.dist-info}/top_level.txt +0 -0
selectolax/__init__.py
CHANGED
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
__author__ = """Artem Golubin"""
|
|
5
|
-
__email__ =
|
|
6
|
-
__version__ =
|
|
5
|
+
__email__ = "me@rushter.com"
|
|
6
|
+
__version__ = "0.3.34"
|
|
7
7
|
|
|
8
|
-
from . import parser
|
|
9
|
-
from . import lexbor
|
|
10
|
-
from . import modest
|
|
8
|
+
from . import lexbor, modest, parser
|
selectolax/lexbor/attrs.pxi
CHANGED
selectolax/lexbor/node.pxi
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
cimport cython
|
|
2
|
+
from cpython.exc cimport PyErr_SetNone
|
|
2
3
|
|
|
3
4
|
_TAG_TO_NAME = {
|
|
4
5
|
0x0005: "- doctype",
|
|
@@ -18,14 +19,17 @@ cdef inline bytes to_bytes(str_or_LexborNode value):
|
|
|
18
19
|
bytes_val = <bytes>value
|
|
19
20
|
return bytes_val
|
|
20
21
|
|
|
22
|
+
|
|
21
23
|
@cython.final
|
|
22
24
|
cdef class LexborNode:
|
|
23
25
|
"""A class that represents HTML node (element)."""
|
|
24
26
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
@staticmethod
|
|
28
|
+
cdef LexborNode new(lxb_dom_node_t *node, LexborHTMLParser parser):
|
|
29
|
+
cdef LexborNode lxbnode = LexborNode.__new__(LexborNode)
|
|
30
|
+
lxbnode.node = node
|
|
31
|
+
lxbnode.parser = parser
|
|
32
|
+
return lxbnode
|
|
29
33
|
|
|
30
34
|
@property
|
|
31
35
|
def mem_id(self):
|
|
@@ -41,8 +45,7 @@ cdef class LexborNode:
|
|
|
41
45
|
"""Return the first child node."""
|
|
42
46
|
cdef LexborNode node
|
|
43
47
|
if self.node.first_child:
|
|
44
|
-
node = LexborNode()
|
|
45
|
-
node._cinit(<lxb_dom_node_t *> self.node.first_child, self.parser)
|
|
48
|
+
node = LexborNode.new(<lxb_dom_node_t *> self.node.first_child, self.parser)
|
|
46
49
|
return node
|
|
47
50
|
return None
|
|
48
51
|
|
|
@@ -50,9 +53,8 @@ cdef class LexborNode:
|
|
|
50
53
|
def parent(self):
|
|
51
54
|
"""Return the parent node."""
|
|
52
55
|
cdef LexborNode node
|
|
53
|
-
if self.node.parent:
|
|
54
|
-
node = LexborNode()
|
|
55
|
-
node._cinit(<lxb_dom_node_t *> self.node.parent, self.parser)
|
|
56
|
+
if self.node.parent != NULL:
|
|
57
|
+
node = LexborNode.new(<lxb_dom_node_t *> self.node.parent, self.parser)
|
|
56
58
|
return node
|
|
57
59
|
return None
|
|
58
60
|
|
|
@@ -60,9 +62,8 @@ cdef class LexborNode:
|
|
|
60
62
|
def next(self):
|
|
61
63
|
"""Return next node."""
|
|
62
64
|
cdef LexborNode node
|
|
63
|
-
if self.node.next:
|
|
64
|
-
node = LexborNode()
|
|
65
|
-
node._cinit(<lxb_dom_node_t *> self.node.next, self.parser)
|
|
65
|
+
if self.node.next != NULL:
|
|
66
|
+
node = LexborNode.new(<lxb_dom_node_t *> self.node.next, self.parser)
|
|
66
67
|
return node
|
|
67
68
|
return None
|
|
68
69
|
|
|
@@ -70,9 +71,8 @@ cdef class LexborNode:
|
|
|
70
71
|
def prev(self):
|
|
71
72
|
"""Return previous node."""
|
|
72
73
|
cdef LexborNode node
|
|
73
|
-
if self.node.prev:
|
|
74
|
-
node = LexborNode()
|
|
75
|
-
node._cinit(<lxb_dom_node_t *> self.node.prev, self.parser)
|
|
74
|
+
if self.node.prev != NULL:
|
|
75
|
+
node = LexborNode.new(<lxb_dom_node_t *> self.node.prev, self.parser)
|
|
76
76
|
return node
|
|
77
77
|
return None
|
|
78
78
|
|
|
@@ -80,9 +80,8 @@ cdef class LexborNode:
|
|
|
80
80
|
def last_child(self):
|
|
81
81
|
"""Return last child node."""
|
|
82
82
|
cdef LexborNode node
|
|
83
|
-
if self.node.last_child:
|
|
84
|
-
node = LexborNode()
|
|
85
|
-
node._cinit(<lxb_dom_node_t *> self.node.last_child, self.parser)
|
|
83
|
+
if self.node.last_child != NULL:
|
|
84
|
+
node = LexborNode.new(<lxb_dom_node_t *> self.node.last_child, self.parser)
|
|
86
85
|
return node
|
|
87
86
|
return None
|
|
88
87
|
|
|
@@ -181,6 +180,12 @@ cdef class LexborNode:
|
|
|
181
180
|
Matches pattern `query` against HTML tree.
|
|
182
181
|
`CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
|
|
183
182
|
|
|
183
|
+
Special selectors:
|
|
184
|
+
|
|
185
|
+
- parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
|
|
186
|
+
- parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
|
|
187
|
+
|
|
188
|
+
|
|
184
189
|
Parameters
|
|
185
190
|
----------
|
|
186
191
|
query : str
|
|
@@ -256,7 +261,6 @@ cdef class LexborNode:
|
|
|
256
261
|
text = c_text.decode(_ENCODING)
|
|
257
262
|
return text
|
|
258
263
|
|
|
259
|
-
|
|
260
264
|
def decompose(self, bool recursive=True):
|
|
261
265
|
"""Remove the current node from the tree.
|
|
262
266
|
|
|
@@ -301,11 +305,11 @@ cdef class LexborNode:
|
|
|
301
305
|
'<html><body><div>Hello world!</div></body></html>'
|
|
302
306
|
|
|
303
307
|
"""
|
|
308
|
+
cdef LexborNode element
|
|
304
309
|
for tag in tags:
|
|
305
310
|
for element in self.css(tag):
|
|
306
311
|
element.decompose(recursive=recursive)
|
|
307
312
|
|
|
308
|
-
|
|
309
313
|
@property
|
|
310
314
|
def attributes(self):
|
|
311
315
|
"""Get all attributes that belong to the current node.
|
|
@@ -413,13 +417,11 @@ cdef class LexborNode:
|
|
|
413
417
|
node = node.next
|
|
414
418
|
continue
|
|
415
419
|
|
|
416
|
-
next_node = LexborNode()
|
|
417
|
-
next_node._cinit(<lxb_dom_node_t *> node, self.parser)
|
|
420
|
+
next_node = LexborNode.new(<lxb_dom_node_t *> node, self.parser)
|
|
418
421
|
yield next_node
|
|
419
422
|
node = node.next
|
|
420
423
|
|
|
421
|
-
|
|
422
|
-
def unwrap(self, delete_empty=False):
|
|
424
|
+
def unwrap(self, bint delete_empty=False):
|
|
423
425
|
"""Replace node with whatever is inside this node.
|
|
424
426
|
|
|
425
427
|
Parameters
|
|
@@ -441,8 +443,8 @@ cdef class LexborNode:
|
|
|
441
443
|
if delete_empty:
|
|
442
444
|
lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
|
|
443
445
|
return
|
|
444
|
-
cdef lxb_dom_node_t* next_node
|
|
445
|
-
cdef lxb_dom_node_t* current_node
|
|
446
|
+
cdef lxb_dom_node_t* next_node
|
|
447
|
+
cdef lxb_dom_node_t* current_node
|
|
446
448
|
|
|
447
449
|
if self.node.first_child.next != NULL:
|
|
448
450
|
current_node = self.node.first_child
|
|
@@ -456,7 +458,7 @@ cdef class LexborNode:
|
|
|
456
458
|
lxb_dom_node_insert_before(self.node, self.node.first_child)
|
|
457
459
|
lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
|
|
458
460
|
|
|
459
|
-
def unwrap_tags(self, list tags, delete_empty = False):
|
|
461
|
+
def unwrap_tags(self, list tags, bint delete_empty = False):
|
|
460
462
|
"""Unwraps specified tags from the HTML tree.
|
|
461
463
|
|
|
462
464
|
Works the same as the ``unwrap`` method, but applied to a list of tags.
|
|
@@ -478,11 +480,47 @@ cdef class LexborNode:
|
|
|
478
480
|
|
|
479
481
|
Note: by default, empty tags are ignored, use "delete_empty" to change this.
|
|
480
482
|
"""
|
|
481
|
-
|
|
483
|
+
cdef LexborNode element
|
|
482
484
|
for tag in tags:
|
|
483
485
|
for element in self.css(tag):
|
|
484
486
|
element.unwrap(delete_empty)
|
|
485
487
|
|
|
488
|
+
def merge_text_nodes(self):
|
|
489
|
+
"""Iterates over all text nodes and merges all text nodes that are close to each other.
|
|
490
|
+
|
|
491
|
+
This is useful for text extraction.
|
|
492
|
+
Use it when you need to strip HTML tags and merge "dangling" text.
|
|
493
|
+
|
|
494
|
+
Examples
|
|
495
|
+
--------
|
|
496
|
+
|
|
497
|
+
>>> tree = LexborHTMLParser("<div><p><strong>J</strong>ohn</p><p>Doe</p></div>")
|
|
498
|
+
>>> node = tree.css_first('div')
|
|
499
|
+
>>> tree.unwrap_tags(["strong"])
|
|
500
|
+
>>> tree.text(deep=True, separator=" ", strip=True)
|
|
501
|
+
"J ohn Doe" # Text extraction produces an extra space because the strong tag was removed.
|
|
502
|
+
>>> node.merge_text_nodes()
|
|
503
|
+
>>> tree.text(deep=True, separator=" ", strip=True)
|
|
504
|
+
"John Doe"
|
|
505
|
+
"""
|
|
506
|
+
cdef lxb_dom_node_t *node = self.node.first_child
|
|
507
|
+
cdef lxb_dom_node_t *next_node
|
|
508
|
+
cdef lxb_char_t *left_text
|
|
509
|
+
cdef lxb_char_t *right_text
|
|
510
|
+
cdef size_t left_length, right_length
|
|
511
|
+
|
|
512
|
+
while node != NULL:
|
|
513
|
+
next_node = node.next
|
|
514
|
+
if node.type == LXB_DOM_NODE_TYPE_TEXT and node.prev and node.prev.type == LXB_DOM_NODE_TYPE_TEXT:
|
|
515
|
+
left_text = lxb_dom_node_text_content(node.prev, &left_length)
|
|
516
|
+
right_text = lxb_dom_node_text_content(node, &right_length)
|
|
517
|
+
if left_text and right_text:
|
|
518
|
+
combined = (<bytes>left_text[:left_length]) + (<bytes>right_text[:right_length])
|
|
519
|
+
lxb_dom_node_text_content_set(node, combined, len(combined))
|
|
520
|
+
lxb_dom_node_remove(node.prev)
|
|
521
|
+
if node.first_child:
|
|
522
|
+
LexborNode.new(node, self.parser).merge_text_nodes()
|
|
523
|
+
node = next_node
|
|
486
524
|
|
|
487
525
|
def traverse(self, include_text=False):
|
|
488
526
|
"""Iterate over all child and next nodes starting from the current level.
|
|
@@ -502,8 +540,7 @@ cdef class LexborNode:
|
|
|
502
540
|
|
|
503
541
|
while node != NULL:
|
|
504
542
|
if not (not include_text and node.type == LXB_DOM_NODE_TYPE_TEXT):
|
|
505
|
-
lxb_node = LexborNode()
|
|
506
|
-
lxb_node._cinit(<lxb_dom_node_t *> node, self.parser)
|
|
543
|
+
lxb_node = LexborNode.new(<lxb_dom_node_t *> node, self.parser)
|
|
507
544
|
yield lxb_node
|
|
508
545
|
|
|
509
546
|
if node.first_child != NULL:
|
|
@@ -567,7 +604,6 @@ cdef class LexborNode:
|
|
|
567
604
|
else:
|
|
568
605
|
raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
|
|
569
606
|
|
|
570
|
-
|
|
571
607
|
def insert_before(self, str_or_LexborNode value):
|
|
572
608
|
"""
|
|
573
609
|
Insert a node before the current Node.
|
|
@@ -742,7 +778,7 @@ cdef class LexborNode:
|
|
|
742
778
|
>>> selector.child.raw_value
|
|
743
779
|
b'<test>'
|
|
744
780
|
"""
|
|
745
|
-
raise
|
|
781
|
+
raise NotImplementedError("This features is not supported by the lexbor backend. Please use Modest backend.")
|
|
746
782
|
|
|
747
783
|
def scripts_contain(self, str query):
|
|
748
784
|
"""Returns True if any of the script tags contain specified text.
|
|
@@ -755,6 +791,7 @@ cdef class LexborNode:
|
|
|
755
791
|
The query to check.
|
|
756
792
|
|
|
757
793
|
"""
|
|
794
|
+
cdef LexborNode node
|
|
758
795
|
if self.parser.cached_script_texts is None:
|
|
759
796
|
nodes = self.parser.selector.find('script', self)
|
|
760
797
|
text_nodes = []
|
|
@@ -779,6 +816,7 @@ cdef class LexborNode:
|
|
|
779
816
|
queries : tuple of str
|
|
780
817
|
|
|
781
818
|
"""
|
|
819
|
+
cdef LexborNode node
|
|
782
820
|
if self.parser.cached_script_srcs is None:
|
|
783
821
|
nodes = self.parser.selector.find('script', self)
|
|
784
822
|
src_nodes = []
|
|
@@ -834,31 +872,44 @@ cdef class LexborNode:
|
|
|
834
872
|
"""
|
|
835
873
|
cdef unsigned char * text
|
|
836
874
|
cdef lxb_dom_node_t* node = <lxb_dom_node_t*> self.node.first_child
|
|
837
|
-
|
|
838
|
-
container = TextContainer()
|
|
875
|
+
cdef TextContainer container
|
|
839
876
|
if self.node == NULL or self.node.type != LXB_DOM_NODE_TYPE_TEXT:
|
|
840
877
|
return None
|
|
878
|
+
|
|
841
879
|
text = <unsigned char *> lexbor_str_data_noi(&(<lxb_dom_character_data_t *> self.node).data)
|
|
842
880
|
if text != NULL:
|
|
881
|
+
container = TextContainer.new_with_defaults()
|
|
843
882
|
py_text = text.decode(_ENCODING)
|
|
844
883
|
container.append(py_text)
|
|
845
884
|
return container.text
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
@cython.internal
|
|
846
888
|
@cython.final
|
|
847
889
|
cdef class TextContainer:
|
|
848
890
|
cdef str _text
|
|
849
|
-
cdef
|
|
850
|
-
cdef
|
|
891
|
+
cdef str separator
|
|
892
|
+
cdef bint strip
|
|
893
|
+
|
|
894
|
+
@staticmethod
|
|
895
|
+
cdef TextContainer new_with_defaults():
|
|
896
|
+
cdef TextContainer cls = TextContainer.__new__(TextContainer)
|
|
897
|
+
cls._text = ''
|
|
898
|
+
cls.separator = ''
|
|
899
|
+
cls.strip = False
|
|
900
|
+
return cls
|
|
851
901
|
|
|
852
902
|
def __init__(self, str separator = '', bool strip = False):
|
|
853
903
|
self._text = ""
|
|
854
904
|
self.separator = separator
|
|
855
905
|
self.strip = strip
|
|
856
906
|
|
|
857
|
-
def append(self, node_text):
|
|
907
|
+
def append(self, str node_text):
|
|
858
908
|
if self.strip:
|
|
859
909
|
self._text += node_text.strip() + self.separator
|
|
860
910
|
else:
|
|
861
911
|
self._text += node_text + self.separator
|
|
912
|
+
|
|
862
913
|
@property
|
|
863
914
|
def text(self):
|
|
864
915
|
if self.separator and self._text and self._text.endswith(self.separator):
|
|
@@ -867,7 +918,7 @@ cdef class TextContainer:
|
|
|
867
918
|
|
|
868
919
|
|
|
869
920
|
cdef lexbor_action_t text_callback(lxb_dom_node_t *node, void *ctx):
|
|
870
|
-
cdef unsigned char *text
|
|
921
|
+
cdef unsigned char *text
|
|
871
922
|
cdef lxb_tag_id_t tag_id = lxb_dom_node_tag_id_noi(node)
|
|
872
923
|
if tag_id != LXB_TAG__TEXT:
|
|
873
924
|
return LEXBOR_ACTION_OK
|
|
@@ -875,8 +926,15 @@ cdef lexbor_action_t text_callback(lxb_dom_node_t *node, void *ctx):
|
|
|
875
926
|
text = <unsigned char*> lexbor_str_data_noi(&(<lxb_dom_text_t *> node).char_data.data)
|
|
876
927
|
if not text:
|
|
877
928
|
return LEXBOR_ACTION_OK
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
929
|
+
|
|
930
|
+
try:
|
|
931
|
+
py_str = text.decode(_ENCODING)
|
|
932
|
+
|
|
933
|
+
except Exception as e:
|
|
934
|
+
PyErr_SetNone(e)
|
|
935
|
+
return LEXBOR_ACTION_STOP
|
|
936
|
+
|
|
937
|
+
cdef TextContainer cls
|
|
938
|
+
cls = <TextContainer> ctx
|
|
881
939
|
cls.append(py_str)
|
|
882
940
|
return LEXBOR_ACTION_OK
|
selectolax/lexbor/selection.pxi
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
cimport cython
|
|
2
|
+
from cpython.exc cimport PyErr_SetObject
|
|
3
|
+
from cpython.list cimport PyList_GET_SIZE
|
|
4
|
+
|
|
2
5
|
|
|
3
6
|
@cython.final
|
|
4
7
|
cdef class LexborCSSSelector:
|
|
@@ -8,21 +11,22 @@ cdef class LexborCSSSelector:
|
|
|
8
11
|
self.results = []
|
|
9
12
|
self.current_node = None
|
|
10
13
|
|
|
11
|
-
cdef _create_css_parser(self):
|
|
14
|
+
cdef int _create_css_parser(self) except -1:
|
|
12
15
|
cdef lxb_status_t status
|
|
13
16
|
|
|
14
|
-
|
|
15
17
|
self.parser = lxb_css_parser_create()
|
|
16
18
|
status = lxb_css_parser_init(self.parser, NULL)
|
|
17
19
|
|
|
18
20
|
if status != LXB_STATUS_OK:
|
|
19
|
-
|
|
21
|
+
PyErr_SetObject(SelectolaxError, "Can't initialize CSS parser.")
|
|
22
|
+
return -1
|
|
20
23
|
|
|
21
24
|
self.css_selectors = lxb_css_selectors_create()
|
|
22
25
|
status = lxb_css_selectors_init(self.css_selectors)
|
|
23
26
|
|
|
24
27
|
if status != LXB_STATUS_OK:
|
|
25
|
-
|
|
28
|
+
PyErr_SetObject(SelectolaxError, "Can't initialize CSS selector.")
|
|
29
|
+
return -1
|
|
26
30
|
|
|
27
31
|
lxb_css_parser_selectors_set(self.parser, self.css_selectors)
|
|
28
32
|
|
|
@@ -30,10 +34,11 @@ cdef class LexborCSSSelector:
|
|
|
30
34
|
status = lxb_selectors_init(self.selectors)
|
|
31
35
|
lxb_selectors_opt_set(self.selectors, LXB_SELECTORS_OPT_MATCH_ROOT)
|
|
32
36
|
if status != LXB_STATUS_OK:
|
|
33
|
-
|
|
37
|
+
PyErr_SetObject(SelectolaxError, "Can't initialize CSS selector.")
|
|
38
|
+
return -1
|
|
39
|
+
return 0
|
|
34
40
|
|
|
35
|
-
|
|
36
|
-
cpdef find(self, str query, LexborNode node):
|
|
41
|
+
cpdef list find(self, str query, LexborNode node):
|
|
37
42
|
cdef lxb_css_selector_list_t* selectors
|
|
38
43
|
cdef lxb_char_t* c_selector
|
|
39
44
|
cdef lxb_css_selector_list_t * selectors_list
|
|
@@ -57,10 +62,11 @@ cdef class LexborCSSSelector:
|
|
|
57
62
|
lxb_css_selector_list_destroy_memory(selectors_list)
|
|
58
63
|
return results
|
|
59
64
|
|
|
60
|
-
cpdef any_matches(self, str query, LexborNode node):
|
|
65
|
+
cpdef int any_matches(self, str query, LexborNode node) except -1:
|
|
61
66
|
cdef lxb_css_selector_list_t * selectors
|
|
62
67
|
cdef lxb_char_t * c_selector
|
|
63
68
|
cdef lxb_css_selector_list_t * selectors_list
|
|
69
|
+
cdef int result
|
|
64
70
|
|
|
65
71
|
if not isinstance(query, str):
|
|
66
72
|
raise TypeError("Query must be a string.")
|
|
@@ -69,20 +75,19 @@ cdef class LexborCSSSelector:
|
|
|
69
75
|
selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t> len(query))
|
|
70
76
|
|
|
71
77
|
if selectors_list == NULL:
|
|
72
|
-
|
|
78
|
+
PyErr_SetObject(SelectolaxError, "Can't parse CSS selector.")
|
|
73
79
|
|
|
74
80
|
self.results = []
|
|
75
81
|
status = lxb_selectors_find(self.selectors, node.node, selectors_list,
|
|
76
82
|
<lxb_selectors_cb_f> css_matcher_callback, <void *> self)
|
|
77
83
|
if status != LXB_STATUS_OK:
|
|
78
84
|
lxb_css_selector_list_destroy_memory(selectors_list)
|
|
79
|
-
|
|
80
|
-
result =
|
|
85
|
+
PyErr_SetObject(SelectolaxError, "Can't parse CSS selector.")
|
|
86
|
+
result = PyList_GET_SIZE(self.results) > 0
|
|
81
87
|
self.results = []
|
|
82
88
|
lxb_css_selector_list_destroy_memory(selectors_list)
|
|
83
89
|
return result
|
|
84
90
|
|
|
85
|
-
|
|
86
91
|
def __dealloc__(self):
|
|
87
92
|
if self.selectors != NULL:
|
|
88
93
|
lxb_selectors_destroy(self.selectors, True)
|
|
@@ -92,7 +97,6 @@ cdef class LexborCSSSelector:
|
|
|
92
97
|
lxb_css_selectors_destroy(self.css_selectors, True)
|
|
93
98
|
|
|
94
99
|
|
|
95
|
-
|
|
96
100
|
cdef class LexborSelector:
|
|
97
101
|
"""An advanced CSS selector that supports additional operations.
|
|
98
102
|
|
|
@@ -107,10 +111,9 @@ cdef class LexborSelector:
|
|
|
107
111
|
self.node = node
|
|
108
112
|
self.nodes = self.node.parser.selector.find(query, self.node) if query else [node, ]
|
|
109
113
|
|
|
110
|
-
|
|
111
114
|
cpdef css(self, str query):
|
|
112
115
|
"""Evaluate CSS selector against current scope."""
|
|
113
|
-
raise
|
|
116
|
+
raise NotImplementedError("This features is not supported by the lexbor backend. Please use Modest backend.")
|
|
114
117
|
|
|
115
118
|
@property
|
|
116
119
|
def matches(self) -> list:
|
|
@@ -124,7 +127,7 @@ cdef class LexborSelector:
|
|
|
124
127
|
|
|
125
128
|
def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
|
|
126
129
|
"""Filter all current matches given text."""
|
|
127
|
-
nodes = []
|
|
130
|
+
cdef list nodes = []
|
|
128
131
|
for node in self.nodes:
|
|
129
132
|
node_text = node.text(deep=deep, separator=separator, strip=strip)
|
|
130
133
|
if node_text and text in node_text:
|
|
@@ -134,7 +137,7 @@ cdef class LexborSelector:
|
|
|
134
137
|
|
|
135
138
|
def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
|
|
136
139
|
"""Returns True if any node in the current search scope contains specified text"""
|
|
137
|
-
|
|
140
|
+
cdef LexborNode node
|
|
138
141
|
for node in self.nodes:
|
|
139
142
|
node_text = node.text(deep=deep, separator=separator, strip=strip)
|
|
140
143
|
if node_text and text in node_text:
|
|
@@ -146,7 +149,7 @@ cdef class LexborSelector:
|
|
|
146
149
|
|
|
147
150
|
Similar to `string-length` in XPath.
|
|
148
151
|
"""
|
|
149
|
-
nodes = []
|
|
152
|
+
cdef list nodes = []
|
|
150
153
|
for node in self.nodes:
|
|
151
154
|
attr = node.attributes.get(attribute)
|
|
152
155
|
if attr and start and start in attr:
|
|
@@ -161,7 +164,7 @@ cdef class LexborSelector:
|
|
|
161
164
|
|
|
162
165
|
Similar to `string-length` in XPath.
|
|
163
166
|
"""
|
|
164
|
-
|
|
167
|
+
cdef LexborNode node
|
|
165
168
|
for node in self.nodes:
|
|
166
169
|
attr = node.attributes.get(attribute)
|
|
167
170
|
if attr and start and start in attr:
|
|
@@ -176,16 +179,15 @@ cdef class LexborSelector:
|
|
|
176
179
|
|
|
177
180
|
cdef lxb_status_t css_finder_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx):
|
|
178
181
|
cdef LexborNode lxb_node
|
|
179
|
-
cdef
|
|
180
|
-
cls = <
|
|
181
|
-
lxb_node = LexborNode()
|
|
182
|
-
lxb_node._cinit(<lxb_dom_node_t *> node, cls.current_node.parser)
|
|
182
|
+
cdef LexborCSSSelector cls
|
|
183
|
+
cls = <LexborCSSSelector> ctx
|
|
184
|
+
lxb_node = LexborNode.new(<lxb_dom_node_t *> node, cls.current_node.parser)
|
|
183
185
|
cls.results.append(lxb_node)
|
|
184
186
|
return LXB_STATUS_OK
|
|
185
187
|
|
|
186
188
|
cdef lxb_status_t css_matcher_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx):
|
|
187
189
|
cdef LexborNode lxb_node
|
|
188
|
-
cdef
|
|
189
|
-
cls = <
|
|
190
|
+
cdef LexborCSSSelector cls
|
|
191
|
+
cls = <LexborCSSSelector> ctx
|
|
190
192
|
cls.results.append(True)
|
|
191
193
|
return LXB_STATUS_STOP
|