selectolax 0.3.25__cp37-cp37m-macosx_10_9_x86_64.whl → 0.3.27__cp37-cp37m-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +1 -1
- selectolax/lexbor.c +14 -14
- selectolax/lexbor.cpython-37m-darwin.so +0 -0
- selectolax/lexbor.pyi +71 -40
- selectolax/parser.c +14 -14
- selectolax/parser.cpython-37m-darwin.so +0 -0
- selectolax/parser.pyi +69 -38
- selectolax/utils.pxi +1 -1
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/METADATA +1 -1
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/RECORD +13 -13
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/LICENSE +0 -0
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/WHEEL +0 -0
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/top_level.txt +0 -0
selectolax/__init__.py
CHANGED
selectolax/lexbor.c
CHANGED
|
@@ -3494,7 +3494,6 @@ static const char __pyx_k_AssertionError[] = "AssertionError";
|
|
|
3494
3494
|
static const char __pyx_k_LexborNode_css[] = "LexborNode.css";
|
|
3495
3495
|
static const char __pyx_k_LexborSelector[] = "LexborSelector";
|
|
3496
3496
|
static const char __pyx_k_fused_sigindex[] = "_fused_sigindex";
|
|
3497
|
-
static const char __pyx_k_html_body_head[] = "<html|<body|<head";
|
|
3498
3497
|
static const char __pyx_k_parse_fragment[] = "parse_fragment";
|
|
3499
3498
|
static const char __pyx_k_LexborNode_iter[] = "LexborNode.iter";
|
|
3500
3499
|
static const char __pyx_k_LexborNode_text[] = "LexborNode.text";
|
|
@@ -3518,6 +3517,7 @@ static const char __pyx_k_LexborNode_unwrap[] = "LexborNode.unwrap";
|
|
|
3518
3517
|
static const char __pyx_k_any_text_contains[] = "any_text_contains";
|
|
3519
3518
|
static const char __pyx_k_do_parse_fragment[] = "do_parse_fragment";
|
|
3520
3519
|
static const char __pyx_k_get_fragment_type[] = "get_fragment_type";
|
|
3520
|
+
static const char __pyx_k_html_body_head_er[] = "<html|<body|<head(?!er)";
|
|
3521
3521
|
static const char __pyx_k_selectolax_lexbor[] = "selectolax.lexbor";
|
|
3522
3522
|
static const char __pyx_k_Can_t_extract_text[] = "Can't extract text";
|
|
3523
3523
|
static const char __pyx_k_LexborSelector_css[] = "LexborSelector.css";
|
|
@@ -4160,7 +4160,7 @@ typedef struct {
|
|
|
4160
4160
|
PyObject *__pyx_n_u_head_and_body;
|
|
4161
4161
|
PyObject *__pyx_n_s_html;
|
|
4162
4162
|
PyObject *__pyx_kp_u_html_2;
|
|
4163
|
-
PyObject *
|
|
4163
|
+
PyObject *__pyx_kp_u_html_body_head_er;
|
|
4164
4164
|
PyObject *__pyx_n_s_html_len;
|
|
4165
4165
|
PyObject *__pyx_n_s_html_re;
|
|
4166
4166
|
PyObject *__pyx_n_s_i;
|
|
@@ -4724,7 +4724,7 @@ static int __pyx_m_clear(PyObject *m) {
|
|
|
4724
4724
|
Py_CLEAR(clear_module_state->__pyx_n_u_head_and_body);
|
|
4725
4725
|
Py_CLEAR(clear_module_state->__pyx_n_s_html);
|
|
4726
4726
|
Py_CLEAR(clear_module_state->__pyx_kp_u_html_2);
|
|
4727
|
-
Py_CLEAR(clear_module_state->
|
|
4727
|
+
Py_CLEAR(clear_module_state->__pyx_kp_u_html_body_head_er);
|
|
4728
4728
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_len);
|
|
4729
4729
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_re);
|
|
4730
4730
|
Py_CLEAR(clear_module_state->__pyx_n_s_i);
|
|
@@ -5266,7 +5266,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
|
|
|
5266
5266
|
Py_VISIT(traverse_module_state->__pyx_n_u_head_and_body);
|
|
5267
5267
|
Py_VISIT(traverse_module_state->__pyx_n_s_html);
|
|
5268
5268
|
Py_VISIT(traverse_module_state->__pyx_kp_u_html_2);
|
|
5269
|
-
Py_VISIT(traverse_module_state->
|
|
5269
|
+
Py_VISIT(traverse_module_state->__pyx_kp_u_html_body_head_er);
|
|
5270
5270
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_len);
|
|
5271
5271
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_re);
|
|
5272
5272
|
Py_VISIT(traverse_module_state->__pyx_n_s_i);
|
|
@@ -5908,7 +5908,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
|
|
|
5908
5908
|
#define __pyx_n_u_head_and_body __pyx_mstate_global->__pyx_n_u_head_and_body
|
|
5909
5909
|
#define __pyx_n_s_html __pyx_mstate_global->__pyx_n_s_html
|
|
5910
5910
|
#define __pyx_kp_u_html_2 __pyx_mstate_global->__pyx_kp_u_html_2
|
|
5911
|
-
#define
|
|
5911
|
+
#define __pyx_kp_u_html_body_head_er __pyx_mstate_global->__pyx_kp_u_html_body_head_er
|
|
5912
5912
|
#define __pyx_n_s_html_len __pyx_mstate_global->__pyx_n_s_html_len
|
|
5913
5913
|
#define __pyx_n_s_html_re __pyx_mstate_global->__pyx_n_s_html_re
|
|
5914
5914
|
#define __pyx_n_s_i __pyx_mstate_global->__pyx_n_s_i
|
|
@@ -7298,7 +7298,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_4get_fragment_type(CYTHON_UNUSED
|
|
|
7298
7298
|
* tree = parser_cls(html)
|
|
7299
7299
|
*
|
|
7300
7300
|
* import re # <<<<<<<<<<<<<<
|
|
7301
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
7301
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
7302
7302
|
*
|
|
7303
7303
|
*/
|
|
7304
7304
|
__pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 36, __pyx_L1_error)
|
|
@@ -7309,7 +7309,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_4get_fragment_type(CYTHON_UNUSED
|
|
|
7309
7309
|
/* "selectolax/utils.pxi":37
|
|
7310
7310
|
*
|
|
7311
7311
|
* import re
|
|
7312
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
7312
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
7313
7313
|
*
|
|
7314
7314
|
* has_html = False
|
|
7315
7315
|
*/
|
|
@@ -7332,7 +7332,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_4get_fragment_type(CYTHON_UNUSED
|
|
|
7332
7332
|
}
|
|
7333
7333
|
#endif
|
|
7334
7334
|
{
|
|
7335
|
-
PyObject *__pyx_callargs[3] = {__pyx_t_7,
|
|
7335
|
+
PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
|
|
7336
7336
|
__pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
|
|
7337
7337
|
__Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
7338
7338
|
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
|
|
@@ -7344,7 +7344,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_4get_fragment_type(CYTHON_UNUSED
|
|
|
7344
7344
|
__pyx_t_3 = 0;
|
|
7345
7345
|
|
|
7346
7346
|
/* "selectolax/utils.pxi":39
|
|
7347
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
7347
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
7348
7348
|
*
|
|
7349
7349
|
* has_html = False # <<<<<<<<<<<<<<
|
|
7350
7350
|
* has_head = False
|
|
@@ -33919,7 +33919,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_12get_fragment_type(CYTHON_UNUSED
|
|
|
33919
33919
|
* tree = parser_cls(html)
|
|
33920
33920
|
*
|
|
33921
33921
|
* import re # <<<<<<<<<<<<<<
|
|
33922
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
33922
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
33923
33923
|
*
|
|
33924
33924
|
*/
|
|
33925
33925
|
__pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(8, 36, __pyx_L1_error)
|
|
@@ -33930,7 +33930,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_12get_fragment_type(CYTHON_UNUSED
|
|
|
33930
33930
|
/* "selectolax/utils.pxi":37
|
|
33931
33931
|
*
|
|
33932
33932
|
* import re
|
|
33933
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
33933
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
33934
33934
|
*
|
|
33935
33935
|
* has_html = False
|
|
33936
33936
|
*/
|
|
@@ -33953,7 +33953,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_12get_fragment_type(CYTHON_UNUSED
|
|
|
33953
33953
|
}
|
|
33954
33954
|
#endif
|
|
33955
33955
|
{
|
|
33956
|
-
PyObject *__pyx_callargs[3] = {__pyx_t_7,
|
|
33956
|
+
PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
|
|
33957
33957
|
__pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
|
|
33958
33958
|
__Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
33959
33959
|
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
|
|
@@ -33965,7 +33965,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_12get_fragment_type(CYTHON_UNUSED
|
|
|
33965
33965
|
__pyx_t_3 = 0;
|
|
33966
33966
|
|
|
33967
33967
|
/* "selectolax/utils.pxi":39
|
|
33968
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
33968
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
33969
33969
|
*
|
|
33970
33970
|
* has_html = False # <<<<<<<<<<<<<<
|
|
33971
33971
|
* has_head = False
|
|
@@ -43679,7 +43679,7 @@ static int __Pyx_CreateStringTabAndInitStrings(void) {
|
|
|
43679
43679
|
{&__pyx_n_u_head_and_body, __pyx_k_head_and_body, sizeof(__pyx_k_head_and_body), 0, 1, 0, 1},
|
|
43680
43680
|
{&__pyx_n_s_html, __pyx_k_html, sizeof(__pyx_k_html), 0, 0, 1, 1},
|
|
43681
43681
|
{&__pyx_kp_u_html_2, __pyx_k_html_2, sizeof(__pyx_k_html_2), 0, 1, 0, 0},
|
|
43682
|
-
{&
|
|
43682
|
+
{&__pyx_kp_u_html_body_head_er, __pyx_k_html_body_head_er, sizeof(__pyx_k_html_body_head_er), 0, 1, 0, 0},
|
|
43683
43683
|
{&__pyx_n_s_html_len, __pyx_k_html_len, sizeof(__pyx_k_html_len), 0, 0, 1, 1},
|
|
43684
43684
|
{&__pyx_n_s_html_re, __pyx_k_html_re, sizeof(__pyx_k_html_re), 0, 0, 1, 1},
|
|
43685
43685
|
{&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1},
|
|
Binary file
|
selectolax/lexbor.pyi
CHANGED
|
@@ -1,14 +1,27 @@
|
|
|
1
|
-
from typing import Iterator, TypeVar, NoReturn
|
|
1
|
+
from typing import Any, Iterator, Literal, TypeVar, NoReturn, overload
|
|
2
2
|
|
|
3
3
|
DefaultT = TypeVar("DefaultT")
|
|
4
4
|
|
|
5
5
|
class LexborAttributes:
|
|
6
6
|
@staticmethod
|
|
7
|
-
def create(node:
|
|
7
|
+
def create(node: LexborAttributes) -> LexborAttributes: ...
|
|
8
8
|
def keys(self) -> Iterator[str]: ...
|
|
9
|
-
def items(self) -> Iterator[tuple[str, str]]: ...
|
|
10
|
-
def values(self) -> Iterator[str]: ...
|
|
11
|
-
def
|
|
9
|
+
def items(self) -> Iterator[tuple[str, str | None]]: ...
|
|
10
|
+
def values(self) -> Iterator[str | None]: ...
|
|
11
|
+
def __iter__(self) -> Iterator[str]: ...
|
|
12
|
+
def __len__(self) -> int: ...
|
|
13
|
+
def __getitem__(self, key: str) -> str | None: ...
|
|
14
|
+
def __setitem__(self, key: str, value: str) -> None: ...
|
|
15
|
+
def __delitem__(self, key: str) -> None: ...
|
|
16
|
+
def __contains__(self, key: str) -> bool: ...
|
|
17
|
+
def __repr__(self) -> str: ...
|
|
18
|
+
@overload
|
|
19
|
+
def get(self, key: str, default: DefaultT) -> DefaultT | str | None: ...
|
|
20
|
+
@overload
|
|
21
|
+
def get(self, key: str, default: None = ...) -> str | None: ...
|
|
22
|
+
@overload
|
|
23
|
+
def sget(self, key: str, default: str | DefaultT) -> str | DefaultT: ...
|
|
24
|
+
@overload
|
|
12
25
|
def sget(self, key: str, default: str = "") -> str: ...
|
|
13
26
|
|
|
14
27
|
class LexborSelector:
|
|
@@ -20,38 +33,38 @@ class LexborSelector:
|
|
|
20
33
|
def any_matches(self) -> bool: ...
|
|
21
34
|
def text_contains(
|
|
22
35
|
self, text: str, deep: bool = True, separator: str = "", strip: bool = False
|
|
23
|
-
) ->
|
|
36
|
+
) -> LexborSelector: ...
|
|
24
37
|
def any_text_contains(
|
|
25
38
|
self, text: str, deep: bool = True, separator: str = "", strip: bool = False
|
|
26
39
|
) -> bool: ...
|
|
27
40
|
def attribute_longer_than(
|
|
28
41
|
self, attribute: str, length: int, start: str | None = None
|
|
29
|
-
) ->
|
|
42
|
+
) -> LexborSelector: ...
|
|
30
43
|
def any_attribute_longer_than(
|
|
31
44
|
self, attribute: str, length: int, start: str | None = None
|
|
32
45
|
) -> bool: ...
|
|
33
46
|
|
|
34
47
|
class LexborCSSSelector:
|
|
35
48
|
def __init__(self): ...
|
|
36
|
-
def find(self, query: str, node:
|
|
37
|
-
def any_matches(self, query: str, node:
|
|
49
|
+
def find(self, query: str, node: LexborNode) -> list[LexborNode]: ...
|
|
50
|
+
def any_matches(self, query: str, node: LexborNode) -> bool: ...
|
|
38
51
|
|
|
39
52
|
class LexborNode:
|
|
40
|
-
parser:
|
|
53
|
+
parser: LexborHTMLParser
|
|
41
54
|
@property
|
|
42
55
|
def mem_id(self) -> int: ...
|
|
43
56
|
@property
|
|
44
|
-
def child(self) ->
|
|
57
|
+
def child(self) -> LexborNode | None: ...
|
|
45
58
|
@property
|
|
46
|
-
def first_child(self) ->
|
|
59
|
+
def first_child(self) -> LexborNode | None: ...
|
|
47
60
|
@property
|
|
48
|
-
def parent(self) ->
|
|
61
|
+
def parent(self) -> LexborNode | None: ...
|
|
49
62
|
@property
|
|
50
|
-
def next(self) ->
|
|
63
|
+
def next(self) -> LexborNode | None: ...
|
|
51
64
|
@property
|
|
52
|
-
def prev(self) ->
|
|
65
|
+
def prev(self) -> LexborNode | None: ...
|
|
53
66
|
@property
|
|
54
|
-
def last_child(self) ->
|
|
67
|
+
def last_child(self) -> LexborNode | None: ...
|
|
55
68
|
@property
|
|
56
69
|
def html(self) -> str | None: ...
|
|
57
70
|
def __hash__(self) -> int: ...
|
|
@@ -59,10 +72,19 @@ class LexborNode:
|
|
|
59
72
|
def text(
|
|
60
73
|
self, deep: bool = True, separator: str = "", strip: bool = False
|
|
61
74
|
) -> str: ...
|
|
62
|
-
def css(self, query: str) -> list[
|
|
75
|
+
def css(self, query: str) -> list[LexborNode]: ...
|
|
76
|
+
@overload
|
|
63
77
|
def css_first(
|
|
64
|
-
self, query: str, default:
|
|
65
|
-
) ->
|
|
78
|
+
self, query: str, default: Any = ..., strict: Literal[True] = ...
|
|
79
|
+
) -> LexborNode: ...
|
|
80
|
+
@overload
|
|
81
|
+
def css_first(
|
|
82
|
+
self, query: str, default: DefaultT, strict: bool = False
|
|
83
|
+
) -> LexborNode | DefaultT: ...
|
|
84
|
+
@overload
|
|
85
|
+
def css_first(
|
|
86
|
+
self, query: str, default: None = ..., strict: bool = False
|
|
87
|
+
) -> LexborNode | None: ...
|
|
66
88
|
def any_css_matches(self, selectors: tuple[str]) -> bool: ...
|
|
67
89
|
def css_matches(self, selector: str) -> bool: ...
|
|
68
90
|
@property
|
|
@@ -74,23 +96,23 @@ class LexborNode:
|
|
|
74
96
|
@property
|
|
75
97
|
def attributes(self) -> dict[str, str | None]: ...
|
|
76
98
|
@property
|
|
77
|
-
def attrs(self) ->
|
|
99
|
+
def attrs(self) -> LexborAttributes: ...
|
|
78
100
|
@property
|
|
79
|
-
def id(self) ->
|
|
80
|
-
def iter(self, include_text: bool = False) -> Iterator[
|
|
101
|
+
def id(self) -> str | None: ...
|
|
102
|
+
def iter(self, include_text: bool = False) -> Iterator[LexborNode]: ...
|
|
81
103
|
def unwrap(self) -> None: ...
|
|
82
104
|
def unwrap_tags(self, tags: list[str]) -> None: ...
|
|
83
|
-
def traverse(self, include_text: bool = False) -> Iterator[
|
|
84
|
-
def replace_with(self, value: bytes | str |
|
|
85
|
-
def insert_before(self, value: bytes | str |
|
|
86
|
-
def insert_after(self, value: bytes | str |
|
|
87
|
-
def insert_child(self, value: bytes | str |
|
|
105
|
+
def traverse(self, include_text: bool = False) -> Iterator[LexborNode]: ...
|
|
106
|
+
def replace_with(self, value: bytes | str | LexborNode) -> None: ...
|
|
107
|
+
def insert_before(self, value: bytes | str | LexborNode) -> None: ...
|
|
108
|
+
def insert_after(self, value: bytes | str | LexborNode) -> None: ...
|
|
109
|
+
def insert_child(self, value: bytes | str | LexborNode) -> None: ...
|
|
88
110
|
@property
|
|
89
111
|
def raw_value(self) -> NoReturn: ...
|
|
90
112
|
def scripts_contain(self, query: str) -> bool: ...
|
|
91
113
|
def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
|
|
92
114
|
def remove(self, recursive: bool = True) -> None: ...
|
|
93
|
-
def select(self, query: str | None = None) ->
|
|
115
|
+
def select(self, query: str | None = None) -> LexborSelector: ...
|
|
94
116
|
@property
|
|
95
117
|
def text_content(self) -> str | None: ...
|
|
96
118
|
|
|
@@ -99,38 +121,47 @@ class LexborHTMLParser:
|
|
|
99
121
|
@property
|
|
100
122
|
def selector(self) -> "LexborCSSSelector": ...
|
|
101
123
|
@property
|
|
102
|
-
def root(self) ->
|
|
124
|
+
def root(self) -> LexborNode | None: ...
|
|
103
125
|
@property
|
|
104
|
-
def body(self) ->
|
|
126
|
+
def body(self) -> LexborNode | None: ...
|
|
105
127
|
@property
|
|
106
|
-
def head(self) ->
|
|
107
|
-
def tags(self, name: str) -> list[
|
|
128
|
+
def head(self) -> LexborNode | None: ...
|
|
129
|
+
def tags(self, name: str) -> list[LexborNode]: ...
|
|
108
130
|
def text(
|
|
109
131
|
self, deep: bool = True, separator: str = "", strip: bool = False
|
|
110
132
|
) -> str: ...
|
|
111
133
|
@property
|
|
112
|
-
def html(self) ->
|
|
113
|
-
def css(self, query: str) -> list[
|
|
134
|
+
def html(self) -> str | None: ...
|
|
135
|
+
def css(self, query: str) -> list[LexborNode]: ...
|
|
136
|
+
@overload
|
|
137
|
+
def css_first(
|
|
138
|
+
self, query: str, default: Any = ..., strict: Literal[True] = ...
|
|
139
|
+
) -> LexborNode: ...
|
|
140
|
+
@overload
|
|
141
|
+
def css_first(
|
|
142
|
+
self, query: str, default: DefaultT, strict: bool = False
|
|
143
|
+
) -> LexborNode | DefaultT: ...
|
|
144
|
+
@overload
|
|
114
145
|
def css_first(
|
|
115
|
-
self, query: str, default:
|
|
116
|
-
) ->
|
|
146
|
+
self, query: str, default: None = ..., strict: bool = False
|
|
147
|
+
) -> LexborNode | None: ...
|
|
117
148
|
def strip_tags(self, tags: list[str], recursive: bool = False) -> None: ...
|
|
118
|
-
def select(self, query: str | None = None) ->
|
|
149
|
+
def select(self, query: str | None = None) -> LexborSelector | None: ...
|
|
119
150
|
def any_css_matches(self, selectors: tuple[str]) -> bool: ...
|
|
120
151
|
def scripts_contain(self, query: str) -> bool: ...
|
|
121
152
|
def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
|
|
122
153
|
def css_matches(self, selector: str) -> bool: ...
|
|
123
|
-
def clone(self) ->
|
|
154
|
+
def clone(self) -> LexborHTMLParser: ...
|
|
124
155
|
def unwrap_tags(self, tags: list[str]) -> None: ...
|
|
125
156
|
|
|
126
|
-
def create_tag(tag: str) ->
|
|
157
|
+
def create_tag(tag: str) -> LexborNode:
|
|
127
158
|
"""
|
|
128
159
|
Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
|
|
129
160
|
e.g. `"<div></div>"`.
|
|
130
161
|
"""
|
|
131
162
|
...
|
|
132
163
|
|
|
133
|
-
def parse_fragment(html: str) -> list[
|
|
164
|
+
def parse_fragment(html: str) -> list[LexborNode]:
|
|
134
165
|
"""
|
|
135
166
|
Given HTML, parse it into a list of Nodes, such that the nodes
|
|
136
167
|
correspond to the given HTML.
|
selectolax/parser.c
CHANGED
|
@@ -3482,7 +3482,6 @@ static const char __pyx_k_HTMLParser_css[] = "HTMLParser.css";
|
|
|
3482
3482
|
static const char __pyx_k_Node_css_first[] = "Node.css_first";
|
|
3483
3483
|
static const char __pyx_k_Node_decompose[] = "Node.decompose";
|
|
3484
3484
|
static const char __pyx_k_fused_sigindex[] = "_fused_sigindex";
|
|
3485
|
-
static const char __pyx_k_html_body_head[] = "<html|<body|<head";
|
|
3486
3485
|
static const char __pyx_k_parse_fragment[] = "parse_fragment";
|
|
3487
3486
|
static const char __pyx_k_Attributes_keys[] = "_Attributes.keys";
|
|
3488
3487
|
static const char __pyx_k_Attributes_sget[] = "_Attributes.sget";
|
|
@@ -3513,6 +3512,7 @@ static const char __pyx_k_Node_replace_with[] = "Node.replace_with";
|
|
|
3513
3512
|
static const char __pyx_k_any_text_contains[] = "any_text_contains";
|
|
3514
3513
|
static const char __pyx_k_do_parse_fragment[] = "do_parse_fragment";
|
|
3515
3514
|
static const char __pyx_k_get_fragment_type[] = "get_fragment_type";
|
|
3515
|
+
static const char __pyx_k_html_body_head_er[] = "<html|<body|<head(?!er)";
|
|
3516
3516
|
static const char __pyx_k_selectolax_parser[] = "selectolax.parser";
|
|
3517
3517
|
static const char __pyx_k_Can_t_parse_HTML_s[] = "Can't parse HTML:\n%s";
|
|
3518
3518
|
static const char __pyx_k_HTMLParser_chars_s[] = "<HTMLParser chars=%s>";
|
|
@@ -4119,7 +4119,7 @@ typedef struct {
|
|
|
4119
4119
|
PyObject *__pyx_n_u_head_and_body;
|
|
4120
4120
|
PyObject *__pyx_n_s_html;
|
|
4121
4121
|
PyObject *__pyx_kp_u_html_2;
|
|
4122
|
-
PyObject *
|
|
4122
|
+
PyObject *__pyx_kp_u_html_body_head_er;
|
|
4123
4123
|
PyObject *__pyx_n_s_html_len;
|
|
4124
4124
|
PyObject *__pyx_n_s_html_re;
|
|
4125
4125
|
PyObject *__pyx_n_s_html_tree;
|
|
@@ -4663,7 +4663,7 @@ static int __pyx_m_clear(PyObject *m) {
|
|
|
4663
4663
|
Py_CLEAR(clear_module_state->__pyx_n_u_head_and_body);
|
|
4664
4664
|
Py_CLEAR(clear_module_state->__pyx_n_s_html);
|
|
4665
4665
|
Py_CLEAR(clear_module_state->__pyx_kp_u_html_2);
|
|
4666
|
-
Py_CLEAR(clear_module_state->
|
|
4666
|
+
Py_CLEAR(clear_module_state->__pyx_kp_u_html_body_head_er);
|
|
4667
4667
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_len);
|
|
4668
4668
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_re);
|
|
4669
4669
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_tree);
|
|
@@ -5185,7 +5185,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
|
|
|
5185
5185
|
Py_VISIT(traverse_module_state->__pyx_n_u_head_and_body);
|
|
5186
5186
|
Py_VISIT(traverse_module_state->__pyx_n_s_html);
|
|
5187
5187
|
Py_VISIT(traverse_module_state->__pyx_kp_u_html_2);
|
|
5188
|
-
Py_VISIT(traverse_module_state->
|
|
5188
|
+
Py_VISIT(traverse_module_state->__pyx_kp_u_html_body_head_er);
|
|
5189
5189
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_len);
|
|
5190
5190
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_re);
|
|
5191
5191
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_tree);
|
|
@@ -5807,7 +5807,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
|
|
|
5807
5807
|
#define __pyx_n_u_head_and_body __pyx_mstate_global->__pyx_n_u_head_and_body
|
|
5808
5808
|
#define __pyx_n_s_html __pyx_mstate_global->__pyx_n_s_html
|
|
5809
5809
|
#define __pyx_kp_u_html_2 __pyx_mstate_global->__pyx_kp_u_html_2
|
|
5810
|
-
#define
|
|
5810
|
+
#define __pyx_kp_u_html_body_head_er __pyx_mstate_global->__pyx_kp_u_html_body_head_er
|
|
5811
5811
|
#define __pyx_n_s_html_len __pyx_mstate_global->__pyx_n_s_html_len
|
|
5812
5812
|
#define __pyx_n_s_html_re __pyx_mstate_global->__pyx_n_s_html_re
|
|
5813
5813
|
#define __pyx_n_s_html_tree __pyx_mstate_global->__pyx_n_s_html_tree
|
|
@@ -29017,7 +29017,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
|
|
|
29017
29017
|
* tree = parser_cls(html)
|
|
29018
29018
|
*
|
|
29019
29019
|
* import re # <<<<<<<<<<<<<<
|
|
29020
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
29020
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
29021
29021
|
*
|
|
29022
29022
|
*/
|
|
29023
29023
|
__pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(5, 36, __pyx_L1_error)
|
|
@@ -29028,7 +29028,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
|
|
|
29028
29028
|
/* "selectolax/utils.pxi":37
|
|
29029
29029
|
*
|
|
29030
29030
|
* import re
|
|
29031
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
29031
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
29032
29032
|
*
|
|
29033
29033
|
* has_html = False
|
|
29034
29034
|
*/
|
|
@@ -29051,7 +29051,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
|
|
|
29051
29051
|
}
|
|
29052
29052
|
#endif
|
|
29053
29053
|
{
|
|
29054
|
-
PyObject *__pyx_callargs[3] = {__pyx_t_7,
|
|
29054
|
+
PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
|
|
29055
29055
|
__pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
|
|
29056
29056
|
__Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
29057
29057
|
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
|
|
@@ -29063,7 +29063,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
|
|
|
29063
29063
|
__pyx_t_3 = 0;
|
|
29064
29064
|
|
|
29065
29065
|
/* "selectolax/utils.pxi":39
|
|
29066
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
29066
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
29067
29067
|
*
|
|
29068
29068
|
* has_html = False # <<<<<<<<<<<<<<
|
|
29069
29069
|
* has_head = False
|
|
@@ -31536,7 +31536,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
|
|
|
31536
31536
|
* tree = parser_cls(html)
|
|
31537
31537
|
*
|
|
31538
31538
|
* import re # <<<<<<<<<<<<<<
|
|
31539
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
31539
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
31540
31540
|
*
|
|
31541
31541
|
*/
|
|
31542
31542
|
__pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(7, 36, __pyx_L1_error)
|
|
@@ -31547,7 +31547,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
|
|
|
31547
31547
|
/* "selectolax/utils.pxi":37
|
|
31548
31548
|
*
|
|
31549
31549
|
* import re
|
|
31550
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
31550
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
31551
31551
|
*
|
|
31552
31552
|
* has_html = False
|
|
31553
31553
|
*/
|
|
@@ -31570,7 +31570,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
|
|
|
31570
31570
|
}
|
|
31571
31571
|
#endif
|
|
31572
31572
|
{
|
|
31573
|
-
PyObject *__pyx_callargs[3] = {__pyx_t_7,
|
|
31573
|
+
PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
|
|
31574
31574
|
__pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
|
|
31575
31575
|
__Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
31576
31576
|
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
|
|
@@ -31582,7 +31582,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
|
|
|
31582
31582
|
__pyx_t_3 = 0;
|
|
31583
31583
|
|
|
31584
31584
|
/* "selectolax/utils.pxi":39
|
|
31585
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
31585
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
31586
31586
|
*
|
|
31587
31587
|
* has_html = False # <<<<<<<<<<<<<<
|
|
31588
31588
|
* has_head = False
|
|
@@ -41216,7 +41216,7 @@ static int __Pyx_CreateStringTabAndInitStrings(void) {
|
|
|
41216
41216
|
{&__pyx_n_u_head_and_body, __pyx_k_head_and_body, sizeof(__pyx_k_head_and_body), 0, 1, 0, 1},
|
|
41217
41217
|
{&__pyx_n_s_html, __pyx_k_html, sizeof(__pyx_k_html), 0, 0, 1, 1},
|
|
41218
41218
|
{&__pyx_kp_u_html_2, __pyx_k_html_2, sizeof(__pyx_k_html_2), 0, 1, 0, 0},
|
|
41219
|
-
{&
|
|
41219
|
+
{&__pyx_kp_u_html_body_head_er, __pyx_k_html_body_head_er, sizeof(__pyx_k_html_body_head_er), 0, 1, 0, 0},
|
|
41220
41220
|
{&__pyx_n_s_html_len, __pyx_k_html_len, sizeof(__pyx_k_html_len), 0, 0, 1, 1},
|
|
41221
41221
|
{&__pyx_n_s_html_re, __pyx_k_html_re, sizeof(__pyx_k_html_re), 0, 0, 1, 1},
|
|
41222
41222
|
{&__pyx_n_s_html_tree, __pyx_k_html_tree, sizeof(__pyx_k_html_tree), 0, 0, 1, 1},
|
|
Binary file
|
selectolax/parser.pyi
CHANGED
|
@@ -1,15 +1,28 @@
|
|
|
1
|
-
from typing import Iterator, TypeVar, Literal
|
|
1
|
+
from typing import Any, Iterator, TypeVar, Literal, overload
|
|
2
2
|
|
|
3
3
|
DefaultT = TypeVar("DefaultT")
|
|
4
4
|
|
|
5
5
|
class _Attributes:
|
|
6
6
|
@staticmethod
|
|
7
|
-
def create(node:
|
|
7
|
+
def create(node: Node, decode_errors: str) -> _Attributes: ...
|
|
8
8
|
def keys(self) -> Iterator[str]: ...
|
|
9
|
-
def items(self) -> Iterator[tuple[str, str]]: ...
|
|
10
|
-
def values(self) -> Iterator[str]: ...
|
|
11
|
-
def
|
|
12
|
-
def
|
|
9
|
+
def items(self) -> Iterator[tuple[str, str | None]]: ...
|
|
10
|
+
def values(self) -> Iterator[str | None]: ...
|
|
11
|
+
def __iter__(self) -> Iterator[str]: ...
|
|
12
|
+
def __len__(self) -> int: ...
|
|
13
|
+
def __getitem__(self, key: str) -> str | None: ...
|
|
14
|
+
def __setitem__(self, key: str, value: str) -> None: ...
|
|
15
|
+
def __delitem__(self, key: str) -> None: ...
|
|
16
|
+
def __contains__(self, key: str) -> bool: ...
|
|
17
|
+
def __repr__(self) -> str: ...
|
|
18
|
+
@overload
|
|
19
|
+
def get(self, key: str, default: DefaultT) -> DefaultT | str | None: ...
|
|
20
|
+
@overload
|
|
21
|
+
def get(self, key: str, default: None = ...) -> str | None: ...
|
|
22
|
+
@overload
|
|
23
|
+
def sget(self, key: str, default: str | DefaultT) -> str | DefaultT: ...
|
|
24
|
+
@overload
|
|
25
|
+
def sget(self, key: str, default: str = "") -> str: ...
|
|
13
26
|
|
|
14
27
|
class Selector:
|
|
15
28
|
"""An advanced CSS selector that supports additional operations.
|
|
@@ -18,12 +31,12 @@ class Selector:
|
|
|
18
31
|
|
|
19
32
|
Please note, this is an experimental feature that can change in the future."""
|
|
20
33
|
|
|
21
|
-
def __init__(self, node:
|
|
22
|
-
def css(self, query: str) ->
|
|
34
|
+
def __init__(self, node: Node, query: str): ...
|
|
35
|
+
def css(self, query: str) -> Node:
|
|
23
36
|
"""Evaluate CSS selector against current scope."""
|
|
24
37
|
...
|
|
25
38
|
@property
|
|
26
|
-
def matches(self) -> list[
|
|
39
|
+
def matches(self) -> list[Node]:
|
|
27
40
|
"""Returns all possible selector matches"""
|
|
28
41
|
...
|
|
29
42
|
@property
|
|
@@ -32,7 +45,7 @@ class Selector:
|
|
|
32
45
|
...
|
|
33
46
|
def text_contains(
|
|
34
47
|
self, text: str, deep: bool = True, separator: str = "", strip: bool = False
|
|
35
|
-
) ->
|
|
48
|
+
) -> Selector:
|
|
36
49
|
"""Filter all current matches given text."""
|
|
37
50
|
...
|
|
38
51
|
def any_text_contains(
|
|
@@ -42,7 +55,7 @@ class Selector:
|
|
|
42
55
|
...
|
|
43
56
|
def attribute_long_than(
|
|
44
57
|
self, text: str, length: int, start: str | None = None
|
|
45
|
-
) ->
|
|
58
|
+
) -> Selector:
|
|
46
59
|
"""Filter all current matches by attribute length.
|
|
47
60
|
|
|
48
61
|
Similar to string-length in XPath."""
|
|
@@ -56,15 +69,15 @@ class Selector:
|
|
|
56
69
|
...
|
|
57
70
|
|
|
58
71
|
class Node:
|
|
59
|
-
parser:
|
|
72
|
+
parser: HTMLParser
|
|
60
73
|
@property
|
|
61
|
-
def attributes(self) -> dict[str,
|
|
74
|
+
def attributes(self) -> dict[str, str | None]:
|
|
62
75
|
"""Get all attributes that belong to the current node.
|
|
63
76
|
|
|
64
77
|
The value of empty attributes is None."""
|
|
65
78
|
...
|
|
66
79
|
@property
|
|
67
|
-
def attrs(self) ->
|
|
80
|
+
def attrs(self) -> _Attributes:
|
|
68
81
|
"""A dict-like object that is similar to the attributes property, but operates directly on the Node data."""
|
|
69
82
|
...
|
|
70
83
|
@property
|
|
@@ -88,10 +101,10 @@ class Node:
|
|
|
88
101
|
def text(self, deep: bool = True, separator: str = "", strip: bool = False) -> str:
|
|
89
102
|
"""Returns the text of the node including text of all its child nodes."""
|
|
90
103
|
...
|
|
91
|
-
def iter(self, include_text: bool = False) -> Iterator[
|
|
104
|
+
def iter(self, include_text: bool = False) -> Iterator[Node]:
|
|
92
105
|
"""Iterate over nodes on the current level."""
|
|
93
106
|
...
|
|
94
|
-
def traverse(self, include_text: bool = False) -> Iterator[
|
|
107
|
+
def traverse(self, include_text: bool = False) -> Iterator[Node]:
|
|
95
108
|
"""Iterate over all child and next nodes starting from the current level."""
|
|
96
109
|
...
|
|
97
110
|
@property
|
|
@@ -99,30 +112,30 @@ class Node:
|
|
|
99
112
|
"""Return the name of the current tag (e.g. div, p, img)."""
|
|
100
113
|
...
|
|
101
114
|
@property
|
|
102
|
-
def child(self) ->
|
|
115
|
+
def child(self) -> Node | None:
|
|
103
116
|
"""Return the child node."""
|
|
104
117
|
...
|
|
105
118
|
@property
|
|
106
|
-
def parent(self) ->
|
|
119
|
+
def parent(self) -> Node | None:
|
|
107
120
|
"""Return the parent node."""
|
|
108
121
|
...
|
|
109
122
|
@property
|
|
110
|
-
def next(self) ->
|
|
123
|
+
def next(self) -> Node | None:
|
|
111
124
|
"""Return next node."""
|
|
112
125
|
...
|
|
113
126
|
@property
|
|
114
|
-
def prev(self) ->
|
|
127
|
+
def prev(self) -> Node | None:
|
|
115
128
|
"""Return previous node."""
|
|
116
129
|
...
|
|
117
130
|
@property
|
|
118
|
-
def last_child(self) ->
|
|
131
|
+
def last_child(self) -> Node | None:
|
|
119
132
|
"""Return last child node."""
|
|
120
133
|
...
|
|
121
134
|
@property
|
|
122
|
-
def html(self) ->
|
|
135
|
+
def html(self) -> str | None:
|
|
123
136
|
"""Return HTML representation of the current node including all its child nodes."""
|
|
124
137
|
...
|
|
125
|
-
def css(self, query: str) -> list[
|
|
138
|
+
def css(self, query: str) -> list[Node]:
|
|
126
139
|
"""Evaluate CSS selector against current node and its child nodes."""
|
|
127
140
|
...
|
|
128
141
|
def any_css_matches(self, selectors: tuple[str]) -> bool:
|
|
@@ -131,9 +144,18 @@ class Node:
|
|
|
131
144
|
def css_matches(self, selector: str) -> bool:
|
|
132
145
|
"""Returns True if CSS selector matches a node."""
|
|
133
146
|
...
|
|
147
|
+
@overload
|
|
134
148
|
def css_first(
|
|
135
|
-
self, query: str, default:
|
|
136
|
-
) ->
|
|
149
|
+
self, query: str, default: Any = ..., strict: Literal[True] = ...
|
|
150
|
+
) -> Node: ...
|
|
151
|
+
@overload
|
|
152
|
+
def css_first(
|
|
153
|
+
self, query: str, default: DefaultT, strict: bool = False
|
|
154
|
+
) -> Node | DefaultT: ...
|
|
155
|
+
@overload
|
|
156
|
+
def css_first(
|
|
157
|
+
self, query: str, default: None = ..., strict: bool = False
|
|
158
|
+
) -> Node | None:
|
|
137
159
|
"""Evaluate CSS selector against current node and its child nodes."""
|
|
138
160
|
...
|
|
139
161
|
def decompose(self, recursive: bool = True) -> None:
|
|
@@ -171,7 +193,7 @@ class Node:
|
|
|
171
193
|
|
|
172
194
|
Currently, works on text nodes only."""
|
|
173
195
|
...
|
|
174
|
-
def select(self, query: str | None = None) ->
|
|
196
|
+
def select(self, query: str | None = None) -> Selector:
|
|
175
197
|
"""Select nodes given a CSS selector.
|
|
176
198
|
|
|
177
199
|
Works similarly to the css method, but supports chained filtering and extra features.
|
|
@@ -208,14 +230,23 @@ class HTMLParser:
|
|
|
208
230
|
use_meta_tags: bool = True,
|
|
209
231
|
decode_errors: Literal["strict", "ignore", "replace"] = "ignore",
|
|
210
232
|
): ...
|
|
211
|
-
def css(self, query: str) -> list[
|
|
233
|
+
def css(self, query: str) -> list[Node]:
|
|
212
234
|
"""A CSS selector.
|
|
213
235
|
|
|
214
236
|
Matches pattern query against HTML tree."""
|
|
215
237
|
...
|
|
238
|
+
@overload
|
|
239
|
+
def css_first(
|
|
240
|
+
self, query: str, default: Any = ..., strict: Literal[True] = ...
|
|
241
|
+
) -> Node: ...
|
|
242
|
+
@overload
|
|
243
|
+
def css_first(
|
|
244
|
+
self, query: str, default: DefaultT, strict: bool = False
|
|
245
|
+
) -> Node | DefaultT: ...
|
|
246
|
+
@overload
|
|
216
247
|
def css_first(
|
|
217
|
-
self, query: str, default:
|
|
218
|
-
) ->
|
|
248
|
+
self, query: str, default: None = ..., strict: bool = False
|
|
249
|
+
) -> Node | None:
|
|
219
250
|
"""Same as css but returns only the first match."""
|
|
220
251
|
...
|
|
221
252
|
@property
|
|
@@ -225,18 +256,18 @@ class HTMLParser:
|
|
|
225
256
|
Returns unknown in case the encoding is not determined."""
|
|
226
257
|
...
|
|
227
258
|
@property
|
|
228
|
-
def root(self) ->
|
|
259
|
+
def root(self) -> Node | None:
|
|
229
260
|
"""Returns root node."""
|
|
230
261
|
...
|
|
231
262
|
@property
|
|
232
|
-
def head(self) ->
|
|
263
|
+
def head(self) -> Node | None:
|
|
233
264
|
"""Returns head node."""
|
|
234
265
|
...
|
|
235
266
|
@property
|
|
236
|
-
def body(self) ->
|
|
267
|
+
def body(self) -> Node | None:
|
|
237
268
|
"""Returns document body."""
|
|
238
269
|
...
|
|
239
|
-
def tags(self, name: str) -> list[
|
|
270
|
+
def tags(self, name: str) -> list[Node]:
|
|
240
271
|
"""Returns a list of tags that match specified name."""
|
|
241
272
|
...
|
|
242
273
|
def text(self, deep: bool = True, separator: str = "", strip: bool = False) -> str:
|
|
@@ -249,10 +280,10 @@ class HTMLParser:
|
|
|
249
280
|
Works the same as th unwrap method, but applied to a list of tags."""
|
|
250
281
|
...
|
|
251
282
|
@property
|
|
252
|
-
def html(self) ->
|
|
283
|
+
def html(self) -> str | None:
|
|
253
284
|
"""Return HTML representation of the page."""
|
|
254
285
|
...
|
|
255
|
-
def select(self, query: str | None = None) ->
|
|
286
|
+
def select(self, query: str | None = None) -> Selector | None:
|
|
256
287
|
"""Select nodes given a CSS selector.
|
|
257
288
|
|
|
258
289
|
Works similarly to the css method, but supports chained filtering and extra features.
|
|
@@ -272,7 +303,7 @@ class HTMLParser:
|
|
|
272
303
|
Caches values on the first call to improve performance."""
|
|
273
304
|
...
|
|
274
305
|
def css_matches(self, selector: str) -> bool: ...
|
|
275
|
-
def clone(self) ->
|
|
306
|
+
def clone(self) -> HTMLParser:
|
|
276
307
|
"""Clone the current tree."""
|
|
277
308
|
...
|
|
278
309
|
def merge_text_nodes(self):
|
|
@@ -281,14 +312,14 @@ class HTMLParser:
|
|
|
281
312
|
This is useful for text extraction."""
|
|
282
313
|
...
|
|
283
314
|
|
|
284
|
-
def create_tag(tag: str) ->
|
|
315
|
+
def create_tag(tag: str) -> Node:
|
|
285
316
|
"""
|
|
286
317
|
Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
|
|
287
318
|
e.g. `"<div></div>"`.
|
|
288
319
|
"""
|
|
289
320
|
...
|
|
290
321
|
|
|
291
|
-
def parse_fragment(html: str) -> list[
|
|
322
|
+
def parse_fragment(html: str) -> list[Node]:
|
|
292
323
|
"""
|
|
293
324
|
Given HTML, parse it into a list of Nodes, such that the nodes
|
|
294
325
|
correspond to the given HTML.
|
selectolax/utils.pxi
CHANGED
|
@@ -1,22 +1,17 @@
|
|
|
1
|
-
selectolax
|
|
2
|
-
selectolax-0.3.25.dist-info/LICENSE,sha256=kYggm2ZJzBgL79x1gCsYsx8rFIYP2IE-BdXRV3Rm0NU,1077
|
|
3
|
-
selectolax-0.3.25.dist-info/WHEEL,sha256=HS_2vgrXyXO7N4Q-5VnuMAgG-fG_r8eemMgfU9Aw3Kk,110
|
|
4
|
-
selectolax-0.3.25.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
|
|
5
|
-
selectolax-0.3.25.dist-info/METADATA,sha256=mAqPUpeDIXvme-DT-vJFgzhqKVKeU9o6mj0Op1TGtaw,5930
|
|
6
|
-
selectolax/lexbor.pyi,sha256=FFVEZfXI8BwvUI0AtNQRUaTTzf66sXq2PWXiggaglug,5543
|
|
1
|
+
selectolax/lexbor.pyi,sha256=X2PMQR2XLd2rOPliKSpeFZ_VEf6mOQFTcFm0ChQbzsQ,6544
|
|
7
2
|
selectolax/parser.pyx,sha256=o1HkYE_nQr3TS7EPlldJx2-ygU9B5FI2uWYFzdF-VaI,12953
|
|
8
|
-
selectolax/__init__.py,sha256=
|
|
3
|
+
selectolax/__init__.py,sha256=c_YcZI0XHUarueRv6JL0z5WjcrLiV5ygw8PGIGFQiKs,175
|
|
9
4
|
selectolax/lexbor.pxd,sha256=PwygBdb1blWAQcxXubZS5uffhgcXaqgySNMPFMT02-c,20958
|
|
10
|
-
selectolax/lexbor.cpython-37m-darwin.so,sha256=
|
|
5
|
+
selectolax/lexbor.cpython-37m-darwin.so,sha256=ff7zyxEFx27TOssJ_EuuITWTwIg8l9m5jbHgo2HNUtc,11509944
|
|
11
6
|
selectolax/lexbor.pyx,sha256=ffEzBnZjGTsI-H5qck7bfjVRE9vteOhQnDp6RjVD7G0,10750
|
|
12
|
-
selectolax/parser.pyi,sha256=
|
|
13
|
-
selectolax/utils.pxi,sha256=
|
|
14
|
-
selectolax/lexbor.c,sha256=
|
|
7
|
+
selectolax/parser.pyi,sha256=kbR5eWvkJEy-9Hx3L_4JmGy3caIl0ki4SiagWz-fnhw,11557
|
|
8
|
+
selectolax/utils.pxi,sha256=uB0-0naFQPy1JpR2DiIlKnyLyC76yWLnUHSuH11xg6s,3459
|
|
9
|
+
selectolax/lexbor.c,sha256=VXue3SvGjvljWCyWa2ntEKkF-sRWTRY2kWpNrkdI0PQ,2353665
|
|
15
10
|
selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
-
selectolax/parser.c,sha256=
|
|
11
|
+
selectolax/parser.c,sha256=B6KXGW2ItH2oWhyUpALnCxeuDspYkUHuWEsa4ChfbTE,2214827
|
|
17
12
|
selectolax/base.pxi,sha256=eiPKlY9gG3l49qJoRQVLl1Ljza6z1k0A-met6sDPcqE,89
|
|
18
13
|
selectolax/parser.pxd,sha256=zZlg1vHUg6o4MXaiwKAo5S5hO_DqBGc4_E10qJ2EcM4,24564
|
|
19
|
-
selectolax/parser.cpython-37m-darwin.so,sha256=
|
|
14
|
+
selectolax/parser.cpython-37m-darwin.so,sha256=oToNOizcGCXbNVVGp7-WV-aHLD0mSHIQumI1MUFu-ZI,3050416
|
|
20
15
|
selectolax/modest/selection.pxi,sha256=S55MMxEW2B1oPExB_DRwPM46WoWZU73J3rFRZU1URuQ,6393
|
|
21
16
|
selectolax/modest/util.pxi,sha256=aX9UnRNTITImHVBTlIs9efOd3EyugLq_Lwuo0zVTiuQ,551
|
|
22
17
|
selectolax/modest/node.pxi,sha256=NrMzJnQJDCmgTHpUxpMHDyAfQ_AS_n_Cr_2ryEKjyL0,32550
|
|
@@ -24,3 +19,8 @@ selectolax/lexbor/selection.pxi,sha256=PqjvpL6H9uFcmcQWVGfML8FDsTO7tGoZujpA00g9p
|
|
|
24
19
|
selectolax/lexbor/util.pxi,sha256=Zq7S-zlyU3wOo49wGHQHnmmhpbkrcJm59ZCTPENcZQA,563
|
|
25
20
|
selectolax/lexbor/node.pxi,sha256=1XNzUwCbTYXy4D6rZtHxMpoJ9M-xoprB9wjdsiaWhr0,29346
|
|
26
21
|
selectolax/lexbor/attrs.pxi,sha256=-518D5v70GgMJhtsxWrWcgIMnXg8afECpUubzq8kqqs,3102
|
|
22
|
+
selectolax-0.3.27.dist-info/RECORD,,
|
|
23
|
+
selectolax-0.3.27.dist-info/LICENSE,sha256=kYggm2ZJzBgL79x1gCsYsx8rFIYP2IE-BdXRV3Rm0NU,1077
|
|
24
|
+
selectolax-0.3.27.dist-info/WHEEL,sha256=HS_2vgrXyXO7N4Q-5VnuMAgG-fG_r8eemMgfU9Aw3Kk,110
|
|
25
|
+
selectolax-0.3.27.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
|
|
26
|
+
selectolax-0.3.27.dist-info/METADATA,sha256=CWQugZ3QvqtPgkQdQZ4Q912nsmLPfkodPksjraV0CFc,5930
|
|
File without changes
|
|
File without changes
|
|
File without changes
|