selectolax 0.3.15__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.3.28__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +1 -1
- selectolax/lexbor/attrs.pxi +2 -1
- selectolax/lexbor/node.pxi +59 -1
- selectolax/lexbor/selection.pxi +14 -11
- selectolax/lexbor/util.pxi +19 -0
- selectolax/lexbor.c +23543 -12863
- selectolax/lexbor.cpython-38-aarch64-linux-gnu.so +0 -0
- selectolax/lexbor.pxd +7 -1
- selectolax/lexbor.pyi +91 -38
- selectolax/lexbor.pyx +6 -3
- selectolax/modest/node.pxi +53 -0
- selectolax/modest/selection.pxi +1 -1
- selectolax/modest/util.pxi +19 -0
- selectolax/parser.c +16862 -6964
- selectolax/parser.cpython-38-aarch64-linux-gnu.so +0 -0
- selectolax/parser.pyi +101 -37
- selectolax/parser.pyx +1 -2
- selectolax/utils.pxi +95 -1
- {selectolax-0.3.15.dist-info → selectolax-0.3.28.dist-info}/LICENSE +1 -1
- {selectolax-0.3.15.dist-info → selectolax-0.3.28.dist-info}/METADATA +17 -4
- selectolax-0.3.28.dist-info/RECORD +26 -0
- {selectolax-0.3.15.dist-info → selectolax-0.3.28.dist-info}/WHEEL +1 -1
- selectolax-0.3.15.dist-info/RECORD +0 -24
- {selectolax-0.3.15.dist-info → selectolax-0.3.28.dist-info}/top_level.txt +0 -0
selectolax/__init__.py
CHANGED
selectolax/lexbor/attrs.pxi
CHANGED
|
@@ -19,8 +19,9 @@ cdef class LexborAttributes:
|
|
|
19
19
|
|
|
20
20
|
while attr != NULL:
|
|
21
21
|
key = lxb_dom_attr_local_name_noi(attr, &str_len)
|
|
22
|
+
if key is not NULL:
|
|
23
|
+
yield key.decode(_ENCODING)
|
|
22
24
|
attr = attr.next
|
|
23
|
-
yield key.decode(_ENCODING)
|
|
24
25
|
|
|
25
26
|
def __setitem__(self, str key, value):
|
|
26
27
|
value = str(value)
|
selectolax/lexbor/node.pxi
CHANGED
|
@@ -27,6 +27,10 @@ cdef class LexborNode:
|
|
|
27
27
|
self.node = node
|
|
28
28
|
return self
|
|
29
29
|
|
|
30
|
+
@property
|
|
31
|
+
def mem_id(self):
|
|
32
|
+
return <size_t> self.node
|
|
33
|
+
|
|
30
34
|
@property
|
|
31
35
|
def child(self):
|
|
32
36
|
"""Alias for the `first_child` property."""
|
|
@@ -91,7 +95,7 @@ cdef class LexborNode:
|
|
|
91
95
|
text : str
|
|
92
96
|
"""
|
|
93
97
|
cdef lexbor_str_t *lxb_str
|
|
94
|
-
cdef lxb_status_t
|
|
98
|
+
cdef lxb_status_t status
|
|
95
99
|
|
|
96
100
|
lxb_str = lexbor_str_create()
|
|
97
101
|
status = lxb_html_serialize_tree_str(self.node, lxb_str)
|
|
@@ -101,6 +105,9 @@ cdef class LexborNode:
|
|
|
101
105
|
return html
|
|
102
106
|
return None
|
|
103
107
|
|
|
108
|
+
def __hash__(self):
|
|
109
|
+
return self.mem_id
|
|
110
|
+
|
|
104
111
|
def text_lexbor(self):
|
|
105
112
|
"""Returns the text of the node including text of all its child nodes.
|
|
106
113
|
|
|
@@ -648,6 +655,57 @@ cdef class LexborNode:
|
|
|
648
655
|
else:
|
|
649
656
|
raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
|
|
650
657
|
|
|
658
|
+
def insert_child(self, str_or_LexborNode value):
|
|
659
|
+
"""
|
|
660
|
+
Insert a node inside (at the end of) the current Node.
|
|
661
|
+
|
|
662
|
+
Parameters
|
|
663
|
+
----------
|
|
664
|
+
value : str, bytes or Node
|
|
665
|
+
The text or Node instance to insert inside the Node.
|
|
666
|
+
When a text string is passed, it's treated as text. All HTML tags will be escaped.
|
|
667
|
+
Convert and pass the ``Node`` object when you want to work with HTML.
|
|
668
|
+
Does not clone the ``Node`` object.
|
|
669
|
+
All future changes to the passed ``Node`` object will also be taken into account.
|
|
670
|
+
|
|
671
|
+
Examples
|
|
672
|
+
--------
|
|
673
|
+
|
|
674
|
+
>>> tree = LexborHTMLParser('<div>Get <img src=""></div>')
|
|
675
|
+
>>> div = tree.css_first('div')
|
|
676
|
+
>>> div.insert_child('Laptop')
|
|
677
|
+
>>> tree.body.child.html
|
|
678
|
+
'<div>Get <img src="">Laptop</div>'
|
|
679
|
+
|
|
680
|
+
>>> html_parser = LexborHTMLParser('<div>Get <span alt="Laptop"> <div>Laptop</div> </span></div>')
|
|
681
|
+
>>> html_parser2 = LexborHTMLParser('<div>Test</div>')
|
|
682
|
+
>>> span_node = html_parser.css_first('span')
|
|
683
|
+
>>> span_node.insert_child(html_parser2.body.child)
|
|
684
|
+
<div>Get <span alt="Laptop"> <div>Laptop</div> <div>Test</div> </span></div>'
|
|
685
|
+
"""
|
|
686
|
+
cdef lxb_dom_node_t * new_node
|
|
687
|
+
|
|
688
|
+
if isinstance(value, (str, bytes, unicode)):
|
|
689
|
+
bytes_val = to_bytes(value)
|
|
690
|
+
new_node = <lxb_dom_node_t *> lxb_dom_document_create_text_node(
|
|
691
|
+
&self.parser.document.dom_document,
|
|
692
|
+
<lxb_char_t *> bytes_val, len(bytes_val)
|
|
693
|
+
)
|
|
694
|
+
if new_node == NULL:
|
|
695
|
+
raise SelectolaxError("Can't create a new node")
|
|
696
|
+
lxb_dom_node_insert_child(self.node, new_node)
|
|
697
|
+
elif isinstance(value, LexborNode):
|
|
698
|
+
new_node = lxb_dom_document_import_node(
|
|
699
|
+
&self.parser.document.dom_document,
|
|
700
|
+
<lxb_dom_node_t *> value.node,
|
|
701
|
+
<bint> True
|
|
702
|
+
)
|
|
703
|
+
if new_node == NULL:
|
|
704
|
+
raise SelectolaxError("Can't create a new node")
|
|
705
|
+
lxb_dom_node_insert_child(self.node, <lxb_dom_node_t *> new_node)
|
|
706
|
+
else:
|
|
707
|
+
raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
|
|
708
|
+
|
|
651
709
|
@property
|
|
652
710
|
def raw_value(self):
|
|
653
711
|
"""Return the raw (unparsed, original) value of a node.
|
selectolax/lexbor/selection.pxi
CHANGED
|
@@ -28,7 +28,7 @@ cdef class LexborCSSSelector:
|
|
|
28
28
|
|
|
29
29
|
self.selectors = lxb_selectors_create()
|
|
30
30
|
status = lxb_selectors_init(self.selectors)
|
|
31
|
-
|
|
31
|
+
lxb_selectors_opt_set(self.selectors, LXB_SELECTORS_OPT_MATCH_ROOT)
|
|
32
32
|
if status != LXB_STATUS_OK:
|
|
33
33
|
raise SelectolaxError("Can't initialize CSS selector.")
|
|
34
34
|
|
|
@@ -72,14 +72,17 @@ cdef class LexborCSSSelector:
|
|
|
72
72
|
raise SelectolaxError("Can't parse CSS selector.")
|
|
73
73
|
result = bool(self.results)
|
|
74
74
|
self.results = []
|
|
75
|
+
lxb_css_selector_list_destroy_memory(selectors_list)
|
|
75
76
|
return result
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
def __dealloc__(self):
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
if self.selectors != NULL:
|
|
81
|
+
lxb_selectors_destroy(self.selectors, True)
|
|
82
|
+
if self.parser != NULL:
|
|
83
|
+
lxb_css_parser_destroy(self.parser, True)
|
|
84
|
+
if self.css_selectors != NULL:
|
|
85
|
+
lxb_css_selectors_destroy(self.css_selectors, True)
|
|
83
86
|
|
|
84
87
|
|
|
85
88
|
|
|
@@ -103,16 +106,16 @@ cdef class LexborSelector:
|
|
|
103
106
|
raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
|
|
104
107
|
|
|
105
108
|
@property
|
|
106
|
-
def matches(self):
|
|
109
|
+
def matches(self) -> list:
|
|
107
110
|
"""Returns all possible matches"""
|
|
108
111
|
return self.nodes
|
|
109
112
|
|
|
110
113
|
@property
|
|
111
|
-
def any_matches(self):
|
|
114
|
+
def any_matches(self) -> bool:
|
|
112
115
|
"""Returns True if there are any matches"""
|
|
113
116
|
return bool(self.nodes)
|
|
114
117
|
|
|
115
|
-
def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
|
|
118
|
+
def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
|
|
116
119
|
"""Filter all current matches given text."""
|
|
117
120
|
nodes = []
|
|
118
121
|
for node in self.nodes:
|
|
@@ -122,7 +125,7 @@ cdef class LexborSelector:
|
|
|
122
125
|
self.nodes = nodes
|
|
123
126
|
return self
|
|
124
127
|
|
|
125
|
-
def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
|
|
128
|
+
def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
|
|
126
129
|
"""Returns True if any node in the current search scope contains specified text"""
|
|
127
130
|
nodes = []
|
|
128
131
|
for node in self.nodes:
|
|
@@ -131,7 +134,7 @@ cdef class LexborSelector:
|
|
|
131
134
|
return True
|
|
132
135
|
return False
|
|
133
136
|
|
|
134
|
-
def attribute_longer_than(self, str attribute, int length, str start = None):
|
|
137
|
+
def attribute_longer_than(self, str attribute, int length, str start = None) -> LexborSelector:
|
|
135
138
|
"""Filter all current matches by attribute length.
|
|
136
139
|
|
|
137
140
|
Similar to `string-length` in XPath.
|
|
@@ -146,7 +149,7 @@ cdef class LexborSelector:
|
|
|
146
149
|
self.nodes = nodes
|
|
147
150
|
return self
|
|
148
151
|
|
|
149
|
-
def any_attribute_longer_than(self, str attribute, int length, str start = None):
|
|
152
|
+
def any_attribute_longer_than(self, str attribute, int length, str start = None) -> bool:
|
|
150
153
|
"""Returns True any href attribute longer than a specified length.
|
|
151
154
|
|
|
152
155
|
Similar to `string-length` in XPath.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
include "../utils.pxi"
|
|
2
|
+
|
|
3
|
+
def create_tag(tag: str):
|
|
4
|
+
"""
|
|
5
|
+
Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
|
|
6
|
+
e.g. `"<div></div>"`.
|
|
7
|
+
"""
|
|
8
|
+
return do_create_tag(tag, LexborHTMLParser)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_fragment(html: str):
|
|
12
|
+
"""
|
|
13
|
+
Given HTML, parse it into a list of Nodes, such that the nodes
|
|
14
|
+
correspond to the given HTML.
|
|
15
|
+
|
|
16
|
+
For contrast, HTMLParser adds `<html>`, `<head>`, and `<body>` tags
|
|
17
|
+
if they are missing. This function does not add these tags.
|
|
18
|
+
"""
|
|
19
|
+
return do_parse_fragment(html, LexborHTMLParser)
|