selectolax 0.3.25__cp37-cp37m-win_amd64.whl → 0.3.27__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +1 -1
- selectolax/lexbor.c +14 -14
- selectolax/lexbor.cp37-win_amd64.pyd +0 -0
- selectolax/lexbor.pyi +71 -40
- selectolax/parser.c +14 -14
- selectolax/parser.cp37-win_amd64.pyd +0 -0
- selectolax/parser.pyi +69 -38
- selectolax/utils.pxi +1 -1
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/METADATA +1 -1
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/RECORD +13 -13
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/LICENSE +0 -0
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/WHEEL +0 -0
- {selectolax-0.3.25.dist-info → selectolax-0.3.27.dist-info}/top_level.txt +0 -0
selectolax/__init__.py
CHANGED
selectolax/lexbor.c
CHANGED
|
@@ -3489,7 +3489,6 @@ static const char __pyx_k_AssertionError[] = "AssertionError";
|
|
|
3489
3489
|
static const char __pyx_k_LexborNode_css[] = "LexborNode.css";
|
|
3490
3490
|
static const char __pyx_k_LexborSelector[] = "LexborSelector";
|
|
3491
3491
|
static const char __pyx_k_fused_sigindex[] = "_fused_sigindex";
|
|
3492
|
-
static const char __pyx_k_html_body_head[] = "<html|<body|<head";
|
|
3493
3492
|
static const char __pyx_k_parse_fragment[] = "parse_fragment";
|
|
3494
3493
|
static const char __pyx_k_LexborNode_iter[] = "LexborNode.iter";
|
|
3495
3494
|
static const char __pyx_k_LexborNode_text[] = "LexborNode.text";
|
|
@@ -3513,6 +3512,7 @@ static const char __pyx_k_LexborNode_unwrap[] = "LexborNode.unwrap";
|
|
|
3513
3512
|
static const char __pyx_k_any_text_contains[] = "any_text_contains";
|
|
3514
3513
|
static const char __pyx_k_do_parse_fragment[] = "do_parse_fragment";
|
|
3515
3514
|
static const char __pyx_k_get_fragment_type[] = "get_fragment_type";
|
|
3515
|
+
static const char __pyx_k_html_body_head_er[] = "<html|<body|<head(?!er)";
|
|
3516
3516
|
static const char __pyx_k_selectolax_lexbor[] = "selectolax.lexbor";
|
|
3517
3517
|
static const char __pyx_k_Can_t_extract_text[] = "Can't extract text";
|
|
3518
3518
|
static const char __pyx_k_LexborSelector_css[] = "LexborSelector.css";
|
|
@@ -4155,7 +4155,7 @@ typedef struct {
|
|
|
4155
4155
|
PyObject *__pyx_n_u_head_and_body;
|
|
4156
4156
|
PyObject *__pyx_n_s_html;
|
|
4157
4157
|
PyObject *__pyx_kp_u_html_2;
|
|
4158
|
-
PyObject *
|
|
4158
|
+
PyObject *__pyx_kp_u_html_body_head_er;
|
|
4159
4159
|
PyObject *__pyx_n_s_html_len;
|
|
4160
4160
|
PyObject *__pyx_n_s_html_re;
|
|
4161
4161
|
PyObject *__pyx_n_s_i;
|
|
@@ -4719,7 +4719,7 @@ static int __pyx_m_clear(PyObject *m) {
|
|
|
4719
4719
|
Py_CLEAR(clear_module_state->__pyx_n_u_head_and_body);
|
|
4720
4720
|
Py_CLEAR(clear_module_state->__pyx_n_s_html);
|
|
4721
4721
|
Py_CLEAR(clear_module_state->__pyx_kp_u_html_2);
|
|
4722
|
-
Py_CLEAR(clear_module_state->
|
|
4722
|
+
Py_CLEAR(clear_module_state->__pyx_kp_u_html_body_head_er);
|
|
4723
4723
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_len);
|
|
4724
4724
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_re);
|
|
4725
4725
|
Py_CLEAR(clear_module_state->__pyx_n_s_i);
|
|
@@ -5261,7 +5261,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
|
|
|
5261
5261
|
Py_VISIT(traverse_module_state->__pyx_n_u_head_and_body);
|
|
5262
5262
|
Py_VISIT(traverse_module_state->__pyx_n_s_html);
|
|
5263
5263
|
Py_VISIT(traverse_module_state->__pyx_kp_u_html_2);
|
|
5264
|
-
Py_VISIT(traverse_module_state->
|
|
5264
|
+
Py_VISIT(traverse_module_state->__pyx_kp_u_html_body_head_er);
|
|
5265
5265
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_len);
|
|
5266
5266
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_re);
|
|
5267
5267
|
Py_VISIT(traverse_module_state->__pyx_n_s_i);
|
|
@@ -5903,7 +5903,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
|
|
|
5903
5903
|
#define __pyx_n_u_head_and_body __pyx_mstate_global->__pyx_n_u_head_and_body
|
|
5904
5904
|
#define __pyx_n_s_html __pyx_mstate_global->__pyx_n_s_html
|
|
5905
5905
|
#define __pyx_kp_u_html_2 __pyx_mstate_global->__pyx_kp_u_html_2
|
|
5906
|
-
#define
|
|
5906
|
+
#define __pyx_kp_u_html_body_head_er __pyx_mstate_global->__pyx_kp_u_html_body_head_er
|
|
5907
5907
|
#define __pyx_n_s_html_len __pyx_mstate_global->__pyx_n_s_html_len
|
|
5908
5908
|
#define __pyx_n_s_html_re __pyx_mstate_global->__pyx_n_s_html_re
|
|
5909
5909
|
#define __pyx_n_s_i __pyx_mstate_global->__pyx_n_s_i
|
|
@@ -7293,7 +7293,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_4get_fragment_type(CYTHON_UNUSED
|
|
|
7293
7293
|
* tree = parser_cls(html)
|
|
7294
7294
|
*
|
|
7295
7295
|
* import re # <<<<<<<<<<<<<<
|
|
7296
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
7296
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
7297
7297
|
*
|
|
7298
7298
|
*/
|
|
7299
7299
|
__pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 36, __pyx_L1_error)
|
|
@@ -7304,7 +7304,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_4get_fragment_type(CYTHON_UNUSED
|
|
|
7304
7304
|
/* "selectolax/utils.pxi":37
|
|
7305
7305
|
*
|
|
7306
7306
|
* import re
|
|
7307
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
7307
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
7308
7308
|
*
|
|
7309
7309
|
* has_html = False
|
|
7310
7310
|
*/
|
|
@@ -7327,7 +7327,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_4get_fragment_type(CYTHON_UNUSED
|
|
|
7327
7327
|
}
|
|
7328
7328
|
#endif
|
|
7329
7329
|
{
|
|
7330
|
-
PyObject *__pyx_callargs[3] = {__pyx_t_7,
|
|
7330
|
+
PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
|
|
7331
7331
|
__pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
|
|
7332
7332
|
__Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
7333
7333
|
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
|
|
@@ -7339,7 +7339,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_4get_fragment_type(CYTHON_UNUSED
|
|
|
7339
7339
|
__pyx_t_3 = 0;
|
|
7340
7340
|
|
|
7341
7341
|
/* "selectolax/utils.pxi":39
|
|
7342
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
7342
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
7343
7343
|
*
|
|
7344
7344
|
* has_html = False # <<<<<<<<<<<<<<
|
|
7345
7345
|
* has_head = False
|
|
@@ -33914,7 +33914,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_12get_fragment_type(CYTHON_UNUSED
|
|
|
33914
33914
|
* tree = parser_cls(html)
|
|
33915
33915
|
*
|
|
33916
33916
|
* import re # <<<<<<<<<<<<<<
|
|
33917
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
33917
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
33918
33918
|
*
|
|
33919
33919
|
*/
|
|
33920
33920
|
__pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(8, 36, __pyx_L1_error)
|
|
@@ -33925,7 +33925,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_12get_fragment_type(CYTHON_UNUSED
|
|
|
33925
33925
|
/* "selectolax/utils.pxi":37
|
|
33926
33926
|
*
|
|
33927
33927
|
* import re
|
|
33928
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
33928
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
33929
33929
|
*
|
|
33930
33930
|
* has_html = False
|
|
33931
33931
|
*/
|
|
@@ -33948,7 +33948,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_12get_fragment_type(CYTHON_UNUSED
|
|
|
33948
33948
|
}
|
|
33949
33949
|
#endif
|
|
33950
33950
|
{
|
|
33951
|
-
PyObject *__pyx_callargs[3] = {__pyx_t_7,
|
|
33951
|
+
PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
|
|
33952
33952
|
__pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
|
|
33953
33953
|
__Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
33954
33954
|
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
|
|
@@ -33960,7 +33960,7 @@ static PyObject *__pyx_pf_10selectolax_6lexbor_12get_fragment_type(CYTHON_UNUSED
|
|
|
33960
33960
|
__pyx_t_3 = 0;
|
|
33961
33961
|
|
|
33962
33962
|
/* "selectolax/utils.pxi":39
|
|
33963
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
33963
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
33964
33964
|
*
|
|
33965
33965
|
* has_html = False # <<<<<<<<<<<<<<
|
|
33966
33966
|
* has_head = False
|
|
@@ -43674,7 +43674,7 @@ static int __Pyx_CreateStringTabAndInitStrings(void) {
|
|
|
43674
43674
|
{&__pyx_n_u_head_and_body, __pyx_k_head_and_body, sizeof(__pyx_k_head_and_body), 0, 1, 0, 1},
|
|
43675
43675
|
{&__pyx_n_s_html, __pyx_k_html, sizeof(__pyx_k_html), 0, 0, 1, 1},
|
|
43676
43676
|
{&__pyx_kp_u_html_2, __pyx_k_html_2, sizeof(__pyx_k_html_2), 0, 1, 0, 0},
|
|
43677
|
-
{&
|
|
43677
|
+
{&__pyx_kp_u_html_body_head_er, __pyx_k_html_body_head_er, sizeof(__pyx_k_html_body_head_er), 0, 1, 0, 0},
|
|
43678
43678
|
{&__pyx_n_s_html_len, __pyx_k_html_len, sizeof(__pyx_k_html_len), 0, 0, 1, 1},
|
|
43679
43679
|
{&__pyx_n_s_html_re, __pyx_k_html_re, sizeof(__pyx_k_html_re), 0, 0, 1, 1},
|
|
43680
43680
|
{&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1},
|
|
Binary file
|
selectolax/lexbor.pyi
CHANGED
|
@@ -1,14 +1,27 @@
|
|
|
1
|
-
from typing import Iterator, TypeVar, NoReturn
|
|
1
|
+
from typing import Any, Iterator, Literal, TypeVar, NoReturn, overload
|
|
2
2
|
|
|
3
3
|
DefaultT = TypeVar("DefaultT")
|
|
4
4
|
|
|
5
5
|
class LexborAttributes:
|
|
6
6
|
@staticmethod
|
|
7
|
-
def create(node:
|
|
7
|
+
def create(node: LexborAttributes) -> LexborAttributes: ...
|
|
8
8
|
def keys(self) -> Iterator[str]: ...
|
|
9
|
-
def items(self) -> Iterator[tuple[str, str]]: ...
|
|
10
|
-
def values(self) -> Iterator[str]: ...
|
|
11
|
-
def
|
|
9
|
+
def items(self) -> Iterator[tuple[str, str | None]]: ...
|
|
10
|
+
def values(self) -> Iterator[str | None]: ...
|
|
11
|
+
def __iter__(self) -> Iterator[str]: ...
|
|
12
|
+
def __len__(self) -> int: ...
|
|
13
|
+
def __getitem__(self, key: str) -> str | None: ...
|
|
14
|
+
def __setitem__(self, key: str, value: str) -> None: ...
|
|
15
|
+
def __delitem__(self, key: str) -> None: ...
|
|
16
|
+
def __contains__(self, key: str) -> bool: ...
|
|
17
|
+
def __repr__(self) -> str: ...
|
|
18
|
+
@overload
|
|
19
|
+
def get(self, key: str, default: DefaultT) -> DefaultT | str | None: ...
|
|
20
|
+
@overload
|
|
21
|
+
def get(self, key: str, default: None = ...) -> str | None: ...
|
|
22
|
+
@overload
|
|
23
|
+
def sget(self, key: str, default: str | DefaultT) -> str | DefaultT: ...
|
|
24
|
+
@overload
|
|
12
25
|
def sget(self, key: str, default: str = "") -> str: ...
|
|
13
26
|
|
|
14
27
|
class LexborSelector:
|
|
@@ -20,38 +33,38 @@ class LexborSelector:
|
|
|
20
33
|
def any_matches(self) -> bool: ...
|
|
21
34
|
def text_contains(
|
|
22
35
|
self, text: str, deep: bool = True, separator: str = "", strip: bool = False
|
|
23
|
-
) ->
|
|
36
|
+
) -> LexborSelector: ...
|
|
24
37
|
def any_text_contains(
|
|
25
38
|
self, text: str, deep: bool = True, separator: str = "", strip: bool = False
|
|
26
39
|
) -> bool: ...
|
|
27
40
|
def attribute_longer_than(
|
|
28
41
|
self, attribute: str, length: int, start: str | None = None
|
|
29
|
-
) ->
|
|
42
|
+
) -> LexborSelector: ...
|
|
30
43
|
def any_attribute_longer_than(
|
|
31
44
|
self, attribute: str, length: int, start: str | None = None
|
|
32
45
|
) -> bool: ...
|
|
33
46
|
|
|
34
47
|
class LexborCSSSelector:
|
|
35
48
|
def __init__(self): ...
|
|
36
|
-
def find(self, query: str, node:
|
|
37
|
-
def any_matches(self, query: str, node:
|
|
49
|
+
def find(self, query: str, node: LexborNode) -> list[LexborNode]: ...
|
|
50
|
+
def any_matches(self, query: str, node: LexborNode) -> bool: ...
|
|
38
51
|
|
|
39
52
|
class LexborNode:
|
|
40
|
-
parser:
|
|
53
|
+
parser: LexborHTMLParser
|
|
41
54
|
@property
|
|
42
55
|
def mem_id(self) -> int: ...
|
|
43
56
|
@property
|
|
44
|
-
def child(self) ->
|
|
57
|
+
def child(self) -> LexborNode | None: ...
|
|
45
58
|
@property
|
|
46
|
-
def first_child(self) ->
|
|
59
|
+
def first_child(self) -> LexborNode | None: ...
|
|
47
60
|
@property
|
|
48
|
-
def parent(self) ->
|
|
61
|
+
def parent(self) -> LexborNode | None: ...
|
|
49
62
|
@property
|
|
50
|
-
def next(self) ->
|
|
63
|
+
def next(self) -> LexborNode | None: ...
|
|
51
64
|
@property
|
|
52
|
-
def prev(self) ->
|
|
65
|
+
def prev(self) -> LexborNode | None: ...
|
|
53
66
|
@property
|
|
54
|
-
def last_child(self) ->
|
|
67
|
+
def last_child(self) -> LexborNode | None: ...
|
|
55
68
|
@property
|
|
56
69
|
def html(self) -> str | None: ...
|
|
57
70
|
def __hash__(self) -> int: ...
|
|
@@ -59,10 +72,19 @@ class LexborNode:
|
|
|
59
72
|
def text(
|
|
60
73
|
self, deep: bool = True, separator: str = "", strip: bool = False
|
|
61
74
|
) -> str: ...
|
|
62
|
-
def css(self, query: str) -> list[
|
|
75
|
+
def css(self, query: str) -> list[LexborNode]: ...
|
|
76
|
+
@overload
|
|
63
77
|
def css_first(
|
|
64
|
-
self, query: str, default:
|
|
65
|
-
) ->
|
|
78
|
+
self, query: str, default: Any = ..., strict: Literal[True] = ...
|
|
79
|
+
) -> LexborNode: ...
|
|
80
|
+
@overload
|
|
81
|
+
def css_first(
|
|
82
|
+
self, query: str, default: DefaultT, strict: bool = False
|
|
83
|
+
) -> LexborNode | DefaultT: ...
|
|
84
|
+
@overload
|
|
85
|
+
def css_first(
|
|
86
|
+
self, query: str, default: None = ..., strict: bool = False
|
|
87
|
+
) -> LexborNode | None: ...
|
|
66
88
|
def any_css_matches(self, selectors: tuple[str]) -> bool: ...
|
|
67
89
|
def css_matches(self, selector: str) -> bool: ...
|
|
68
90
|
@property
|
|
@@ -74,23 +96,23 @@ class LexborNode:
|
|
|
74
96
|
@property
|
|
75
97
|
def attributes(self) -> dict[str, str | None]: ...
|
|
76
98
|
@property
|
|
77
|
-
def attrs(self) ->
|
|
99
|
+
def attrs(self) -> LexborAttributes: ...
|
|
78
100
|
@property
|
|
79
|
-
def id(self) ->
|
|
80
|
-
def iter(self, include_text: bool = False) -> Iterator[
|
|
101
|
+
def id(self) -> str | None: ...
|
|
102
|
+
def iter(self, include_text: bool = False) -> Iterator[LexborNode]: ...
|
|
81
103
|
def unwrap(self) -> None: ...
|
|
82
104
|
def unwrap_tags(self, tags: list[str]) -> None: ...
|
|
83
|
-
def traverse(self, include_text: bool = False) -> Iterator[
|
|
84
|
-
def replace_with(self, value: bytes | str |
|
|
85
|
-
def insert_before(self, value: bytes | str |
|
|
86
|
-
def insert_after(self, value: bytes | str |
|
|
87
|
-
def insert_child(self, value: bytes | str |
|
|
105
|
+
def traverse(self, include_text: bool = False) -> Iterator[LexborNode]: ...
|
|
106
|
+
def replace_with(self, value: bytes | str | LexborNode) -> None: ...
|
|
107
|
+
def insert_before(self, value: bytes | str | LexborNode) -> None: ...
|
|
108
|
+
def insert_after(self, value: bytes | str | LexborNode) -> None: ...
|
|
109
|
+
def insert_child(self, value: bytes | str | LexborNode) -> None: ...
|
|
88
110
|
@property
|
|
89
111
|
def raw_value(self) -> NoReturn: ...
|
|
90
112
|
def scripts_contain(self, query: str) -> bool: ...
|
|
91
113
|
def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
|
|
92
114
|
def remove(self, recursive: bool = True) -> None: ...
|
|
93
|
-
def select(self, query: str | None = None) ->
|
|
115
|
+
def select(self, query: str | None = None) -> LexborSelector: ...
|
|
94
116
|
@property
|
|
95
117
|
def text_content(self) -> str | None: ...
|
|
96
118
|
|
|
@@ -99,38 +121,47 @@ class LexborHTMLParser:
|
|
|
99
121
|
@property
|
|
100
122
|
def selector(self) -> "LexborCSSSelector": ...
|
|
101
123
|
@property
|
|
102
|
-
def root(self) ->
|
|
124
|
+
def root(self) -> LexborNode | None: ...
|
|
103
125
|
@property
|
|
104
|
-
def body(self) ->
|
|
126
|
+
def body(self) -> LexborNode | None: ...
|
|
105
127
|
@property
|
|
106
|
-
def head(self) ->
|
|
107
|
-
def tags(self, name: str) -> list[
|
|
128
|
+
def head(self) -> LexborNode | None: ...
|
|
129
|
+
def tags(self, name: str) -> list[LexborNode]: ...
|
|
108
130
|
def text(
|
|
109
131
|
self, deep: bool = True, separator: str = "", strip: bool = False
|
|
110
132
|
) -> str: ...
|
|
111
133
|
@property
|
|
112
|
-
def html(self) ->
|
|
113
|
-
def css(self, query: str) -> list[
|
|
134
|
+
def html(self) -> str | None: ...
|
|
135
|
+
def css(self, query: str) -> list[LexborNode]: ...
|
|
136
|
+
@overload
|
|
137
|
+
def css_first(
|
|
138
|
+
self, query: str, default: Any = ..., strict: Literal[True] = ...
|
|
139
|
+
) -> LexborNode: ...
|
|
140
|
+
@overload
|
|
141
|
+
def css_first(
|
|
142
|
+
self, query: str, default: DefaultT, strict: bool = False
|
|
143
|
+
) -> LexborNode | DefaultT: ...
|
|
144
|
+
@overload
|
|
114
145
|
def css_first(
|
|
115
|
-
self, query: str, default:
|
|
116
|
-
) ->
|
|
146
|
+
self, query: str, default: None = ..., strict: bool = False
|
|
147
|
+
) -> LexborNode | None: ...
|
|
117
148
|
def strip_tags(self, tags: list[str], recursive: bool = False) -> None: ...
|
|
118
|
-
def select(self, query: str | None = None) ->
|
|
149
|
+
def select(self, query: str | None = None) -> LexborSelector | None: ...
|
|
119
150
|
def any_css_matches(self, selectors: tuple[str]) -> bool: ...
|
|
120
151
|
def scripts_contain(self, query: str) -> bool: ...
|
|
121
152
|
def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
|
|
122
153
|
def css_matches(self, selector: str) -> bool: ...
|
|
123
|
-
def clone(self) ->
|
|
154
|
+
def clone(self) -> LexborHTMLParser: ...
|
|
124
155
|
def unwrap_tags(self, tags: list[str]) -> None: ...
|
|
125
156
|
|
|
126
|
-
def create_tag(tag: str) ->
|
|
157
|
+
def create_tag(tag: str) -> LexborNode:
|
|
127
158
|
"""
|
|
128
159
|
Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
|
|
129
160
|
e.g. `"<div></div>"`.
|
|
130
161
|
"""
|
|
131
162
|
...
|
|
132
163
|
|
|
133
|
-
def parse_fragment(html: str) -> list[
|
|
164
|
+
def parse_fragment(html: str) -> list[LexborNode]:
|
|
134
165
|
"""
|
|
135
166
|
Given HTML, parse it into a list of Nodes, such that the nodes
|
|
136
167
|
correspond to the given HTML.
|
selectolax/parser.c
CHANGED
|
@@ -3476,7 +3476,6 @@ static const char __pyx_k_HTMLParser_css[] = "HTMLParser.css";
|
|
|
3476
3476
|
static const char __pyx_k_Node_css_first[] = "Node.css_first";
|
|
3477
3477
|
static const char __pyx_k_Node_decompose[] = "Node.decompose";
|
|
3478
3478
|
static const char __pyx_k_fused_sigindex[] = "_fused_sigindex";
|
|
3479
|
-
static const char __pyx_k_html_body_head[] = "<html|<body|<head";
|
|
3480
3479
|
static const char __pyx_k_parse_fragment[] = "parse_fragment";
|
|
3481
3480
|
static const char __pyx_k_Attributes_keys[] = "_Attributes.keys";
|
|
3482
3481
|
static const char __pyx_k_Attributes_sget[] = "_Attributes.sget";
|
|
@@ -3507,6 +3506,7 @@ static const char __pyx_k_Node_replace_with[] = "Node.replace_with";
|
|
|
3507
3506
|
static const char __pyx_k_any_text_contains[] = "any_text_contains";
|
|
3508
3507
|
static const char __pyx_k_do_parse_fragment[] = "do_parse_fragment";
|
|
3509
3508
|
static const char __pyx_k_get_fragment_type[] = "get_fragment_type";
|
|
3509
|
+
static const char __pyx_k_html_body_head_er[] = "<html|<body|<head(?!er)";
|
|
3510
3510
|
static const char __pyx_k_selectolax_parser[] = "selectolax.parser";
|
|
3511
3511
|
static const char __pyx_k_Can_t_parse_HTML_s[] = "Can't parse HTML:\n%s";
|
|
3512
3512
|
static const char __pyx_k_HTMLParser_chars_s[] = "<HTMLParser chars=%s>";
|
|
@@ -4113,7 +4113,7 @@ typedef struct {
|
|
|
4113
4113
|
PyObject *__pyx_n_u_head_and_body;
|
|
4114
4114
|
PyObject *__pyx_n_s_html;
|
|
4115
4115
|
PyObject *__pyx_kp_u_html_2;
|
|
4116
|
-
PyObject *
|
|
4116
|
+
PyObject *__pyx_kp_u_html_body_head_er;
|
|
4117
4117
|
PyObject *__pyx_n_s_html_len;
|
|
4118
4118
|
PyObject *__pyx_n_s_html_re;
|
|
4119
4119
|
PyObject *__pyx_n_s_html_tree;
|
|
@@ -4657,7 +4657,7 @@ static int __pyx_m_clear(PyObject *m) {
|
|
|
4657
4657
|
Py_CLEAR(clear_module_state->__pyx_n_u_head_and_body);
|
|
4658
4658
|
Py_CLEAR(clear_module_state->__pyx_n_s_html);
|
|
4659
4659
|
Py_CLEAR(clear_module_state->__pyx_kp_u_html_2);
|
|
4660
|
-
Py_CLEAR(clear_module_state->
|
|
4660
|
+
Py_CLEAR(clear_module_state->__pyx_kp_u_html_body_head_er);
|
|
4661
4661
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_len);
|
|
4662
4662
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_re);
|
|
4663
4663
|
Py_CLEAR(clear_module_state->__pyx_n_s_html_tree);
|
|
@@ -5179,7 +5179,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
|
|
|
5179
5179
|
Py_VISIT(traverse_module_state->__pyx_n_u_head_and_body);
|
|
5180
5180
|
Py_VISIT(traverse_module_state->__pyx_n_s_html);
|
|
5181
5181
|
Py_VISIT(traverse_module_state->__pyx_kp_u_html_2);
|
|
5182
|
-
Py_VISIT(traverse_module_state->
|
|
5182
|
+
Py_VISIT(traverse_module_state->__pyx_kp_u_html_body_head_er);
|
|
5183
5183
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_len);
|
|
5184
5184
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_re);
|
|
5185
5185
|
Py_VISIT(traverse_module_state->__pyx_n_s_html_tree);
|
|
@@ -5801,7 +5801,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
|
|
|
5801
5801
|
#define __pyx_n_u_head_and_body __pyx_mstate_global->__pyx_n_u_head_and_body
|
|
5802
5802
|
#define __pyx_n_s_html __pyx_mstate_global->__pyx_n_s_html
|
|
5803
5803
|
#define __pyx_kp_u_html_2 __pyx_mstate_global->__pyx_kp_u_html_2
|
|
5804
|
-
#define
|
|
5804
|
+
#define __pyx_kp_u_html_body_head_er __pyx_mstate_global->__pyx_kp_u_html_body_head_er
|
|
5805
5805
|
#define __pyx_n_s_html_len __pyx_mstate_global->__pyx_n_s_html_len
|
|
5806
5806
|
#define __pyx_n_s_html_re __pyx_mstate_global->__pyx_n_s_html_re
|
|
5807
5807
|
#define __pyx_n_s_html_tree __pyx_mstate_global->__pyx_n_s_html_tree
|
|
@@ -29011,7 +29011,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
|
|
|
29011
29011
|
* tree = parser_cls(html)
|
|
29012
29012
|
*
|
|
29013
29013
|
* import re # <<<<<<<<<<<<<<
|
|
29014
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
29014
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
29015
29015
|
*
|
|
29016
29016
|
*/
|
|
29017
29017
|
__pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(5, 36, __pyx_L1_error)
|
|
@@ -29022,7 +29022,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
|
|
|
29022
29022
|
/* "selectolax/utils.pxi":37
|
|
29023
29023
|
*
|
|
29024
29024
|
* import re
|
|
29025
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
29025
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
29026
29026
|
*
|
|
29027
29027
|
* has_html = False
|
|
29028
29028
|
*/
|
|
@@ -29045,7 +29045,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
|
|
|
29045
29045
|
}
|
|
29046
29046
|
#endif
|
|
29047
29047
|
{
|
|
29048
|
-
PyObject *__pyx_callargs[3] = {__pyx_t_7,
|
|
29048
|
+
PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
|
|
29049
29049
|
__pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
|
|
29050
29050
|
__Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
29051
29051
|
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
|
|
@@ -29057,7 +29057,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
|
|
|
29057
29057
|
__pyx_t_3 = 0;
|
|
29058
29058
|
|
|
29059
29059
|
/* "selectolax/utils.pxi":39
|
|
29060
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
29060
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
29061
29061
|
*
|
|
29062
29062
|
* has_html = False # <<<<<<<<<<<<<<
|
|
29063
29063
|
* has_head = False
|
|
@@ -31530,7 +31530,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
|
|
|
31530
31530
|
* tree = parser_cls(html)
|
|
31531
31531
|
*
|
|
31532
31532
|
* import re # <<<<<<<<<<<<<<
|
|
31533
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
31533
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
31534
31534
|
*
|
|
31535
31535
|
*/
|
|
31536
31536
|
__pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(7, 36, __pyx_L1_error)
|
|
@@ -31541,7 +31541,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
|
|
|
31541
31541
|
/* "selectolax/utils.pxi":37
|
|
31542
31542
|
*
|
|
31543
31543
|
* import re
|
|
31544
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
31544
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
|
|
31545
31545
|
*
|
|
31546
31546
|
* has_html = False
|
|
31547
31547
|
*/
|
|
@@ -31564,7 +31564,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
|
|
|
31564
31564
|
}
|
|
31565
31565
|
#endif
|
|
31566
31566
|
{
|
|
31567
|
-
PyObject *__pyx_callargs[3] = {__pyx_t_7,
|
|
31567
|
+
PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
|
|
31568
31568
|
__pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
|
|
31569
31569
|
__Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
31570
31570
|
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
|
|
@@ -31576,7 +31576,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
|
|
|
31576
31576
|
__pyx_t_3 = 0;
|
|
31577
31577
|
|
|
31578
31578
|
/* "selectolax/utils.pxi":39
|
|
31579
|
-
* html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
|
|
31579
|
+
* html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
|
|
31580
31580
|
*
|
|
31581
31581
|
* has_html = False # <<<<<<<<<<<<<<
|
|
31582
31582
|
* has_head = False
|
|
@@ -41210,7 +41210,7 @@ static int __Pyx_CreateStringTabAndInitStrings(void) {
|
|
|
41210
41210
|
{&__pyx_n_u_head_and_body, __pyx_k_head_and_body, sizeof(__pyx_k_head_and_body), 0, 1, 0, 1},
|
|
41211
41211
|
{&__pyx_n_s_html, __pyx_k_html, sizeof(__pyx_k_html), 0, 0, 1, 1},
|
|
41212
41212
|
{&__pyx_kp_u_html_2, __pyx_k_html_2, sizeof(__pyx_k_html_2), 0, 1, 0, 0},
|
|
41213
|
-
{&
|
|
41213
|
+
{&__pyx_kp_u_html_body_head_er, __pyx_k_html_body_head_er, sizeof(__pyx_k_html_body_head_er), 0, 1, 0, 0},
|
|
41214
41214
|
{&__pyx_n_s_html_len, __pyx_k_html_len, sizeof(__pyx_k_html_len), 0, 0, 1, 1},
|
|
41215
41215
|
{&__pyx_n_s_html_re, __pyx_k_html_re, sizeof(__pyx_k_html_re), 0, 0, 1, 1},
|
|
41216
41216
|
{&__pyx_n_s_html_tree, __pyx_k_html_tree, sizeof(__pyx_k_html_tree), 0, 0, 1, 1},
|
|
Binary file
|
selectolax/parser.pyi
CHANGED
|
@@ -1,15 +1,28 @@
|
|
|
1
|
-
from typing import Iterator, TypeVar, Literal
|
|
1
|
+
from typing import Any, Iterator, TypeVar, Literal, overload
|
|
2
2
|
|
|
3
3
|
DefaultT = TypeVar("DefaultT")
|
|
4
4
|
|
|
5
5
|
class _Attributes:
|
|
6
6
|
@staticmethod
|
|
7
|
-
def create(node:
|
|
7
|
+
def create(node: Node, decode_errors: str) -> _Attributes: ...
|
|
8
8
|
def keys(self) -> Iterator[str]: ...
|
|
9
|
-
def items(self) -> Iterator[tuple[str, str]]: ...
|
|
10
|
-
def values(self) -> Iterator[str]: ...
|
|
11
|
-
def
|
|
12
|
-
def
|
|
9
|
+
def items(self) -> Iterator[tuple[str, str | None]]: ...
|
|
10
|
+
def values(self) -> Iterator[str | None]: ...
|
|
11
|
+
def __iter__(self) -> Iterator[str]: ...
|
|
12
|
+
def __len__(self) -> int: ...
|
|
13
|
+
def __getitem__(self, key: str) -> str | None: ...
|
|
14
|
+
def __setitem__(self, key: str, value: str) -> None: ...
|
|
15
|
+
def __delitem__(self, key: str) -> None: ...
|
|
16
|
+
def __contains__(self, key: str) -> bool: ...
|
|
17
|
+
def __repr__(self) -> str: ...
|
|
18
|
+
@overload
|
|
19
|
+
def get(self, key: str, default: DefaultT) -> DefaultT | str | None: ...
|
|
20
|
+
@overload
|
|
21
|
+
def get(self, key: str, default: None = ...) -> str | None: ...
|
|
22
|
+
@overload
|
|
23
|
+
def sget(self, key: str, default: str | DefaultT) -> str | DefaultT: ...
|
|
24
|
+
@overload
|
|
25
|
+
def sget(self, key: str, default: str = "") -> str: ...
|
|
13
26
|
|
|
14
27
|
class Selector:
|
|
15
28
|
"""An advanced CSS selector that supports additional operations.
|
|
@@ -18,12 +31,12 @@ class Selector:
|
|
|
18
31
|
|
|
19
32
|
Please note, this is an experimental feature that can change in the future."""
|
|
20
33
|
|
|
21
|
-
def __init__(self, node:
|
|
22
|
-
def css(self, query: str) ->
|
|
34
|
+
def __init__(self, node: Node, query: str): ...
|
|
35
|
+
def css(self, query: str) -> Node:
|
|
23
36
|
"""Evaluate CSS selector against current scope."""
|
|
24
37
|
...
|
|
25
38
|
@property
|
|
26
|
-
def matches(self) -> list[
|
|
39
|
+
def matches(self) -> list[Node]:
|
|
27
40
|
"""Returns all possible selector matches"""
|
|
28
41
|
...
|
|
29
42
|
@property
|
|
@@ -32,7 +45,7 @@ class Selector:
|
|
|
32
45
|
...
|
|
33
46
|
def text_contains(
|
|
34
47
|
self, text: str, deep: bool = True, separator: str = "", strip: bool = False
|
|
35
|
-
) ->
|
|
48
|
+
) -> Selector:
|
|
36
49
|
"""Filter all current matches given text."""
|
|
37
50
|
...
|
|
38
51
|
def any_text_contains(
|
|
@@ -42,7 +55,7 @@ class Selector:
|
|
|
42
55
|
...
|
|
43
56
|
def attribute_long_than(
|
|
44
57
|
self, text: str, length: int, start: str | None = None
|
|
45
|
-
) ->
|
|
58
|
+
) -> Selector:
|
|
46
59
|
"""Filter all current matches by attribute length.
|
|
47
60
|
|
|
48
61
|
Similar to string-length in XPath."""
|
|
@@ -56,15 +69,15 @@ class Selector:
|
|
|
56
69
|
...
|
|
57
70
|
|
|
58
71
|
class Node:
|
|
59
|
-
parser:
|
|
72
|
+
parser: HTMLParser
|
|
60
73
|
@property
|
|
61
|
-
def attributes(self) -> dict[str,
|
|
74
|
+
def attributes(self) -> dict[str, str | None]:
|
|
62
75
|
"""Get all attributes that belong to the current node.
|
|
63
76
|
|
|
64
77
|
The value of empty attributes is None."""
|
|
65
78
|
...
|
|
66
79
|
@property
|
|
67
|
-
def attrs(self) ->
|
|
80
|
+
def attrs(self) -> _Attributes:
|
|
68
81
|
"""A dict-like object that is similar to the attributes property, but operates directly on the Node data."""
|
|
69
82
|
...
|
|
70
83
|
@property
|
|
@@ -88,10 +101,10 @@ class Node:
|
|
|
88
101
|
def text(self, deep: bool = True, separator: str = "", strip: bool = False) -> str:
|
|
89
102
|
"""Returns the text of the node including text of all its child nodes."""
|
|
90
103
|
...
|
|
91
|
-
def iter(self, include_text: bool = False) -> Iterator[
|
|
104
|
+
def iter(self, include_text: bool = False) -> Iterator[Node]:
|
|
92
105
|
"""Iterate over nodes on the current level."""
|
|
93
106
|
...
|
|
94
|
-
def traverse(self, include_text: bool = False) -> Iterator[
|
|
107
|
+
def traverse(self, include_text: bool = False) -> Iterator[Node]:
|
|
95
108
|
"""Iterate over all child and next nodes starting from the current level."""
|
|
96
109
|
...
|
|
97
110
|
@property
|
|
@@ -99,30 +112,30 @@ class Node:
|
|
|
99
112
|
"""Return the name of the current tag (e.g. div, p, img)."""
|
|
100
113
|
...
|
|
101
114
|
@property
|
|
102
|
-
def child(self) ->
|
|
115
|
+
def child(self) -> Node | None:
|
|
103
116
|
"""Return the child node."""
|
|
104
117
|
...
|
|
105
118
|
@property
|
|
106
|
-
def parent(self) ->
|
|
119
|
+
def parent(self) -> Node | None:
|
|
107
120
|
"""Return the parent node."""
|
|
108
121
|
...
|
|
109
122
|
@property
|
|
110
|
-
def next(self) ->
|
|
123
|
+
def next(self) -> Node | None:
|
|
111
124
|
"""Return next node."""
|
|
112
125
|
...
|
|
113
126
|
@property
|
|
114
|
-
def prev(self) ->
|
|
127
|
+
def prev(self) -> Node | None:
|
|
115
128
|
"""Return previous node."""
|
|
116
129
|
...
|
|
117
130
|
@property
|
|
118
|
-
def last_child(self) ->
|
|
131
|
+
def last_child(self) -> Node | None:
|
|
119
132
|
"""Return last child node."""
|
|
120
133
|
...
|
|
121
134
|
@property
|
|
122
|
-
def html(self) ->
|
|
135
|
+
def html(self) -> str | None:
|
|
123
136
|
"""Return HTML representation of the current node including all its child nodes."""
|
|
124
137
|
...
|
|
125
|
-
def css(self, query: str) -> list[
|
|
138
|
+
def css(self, query: str) -> list[Node]:
|
|
126
139
|
"""Evaluate CSS selector against current node and its child nodes."""
|
|
127
140
|
...
|
|
128
141
|
def any_css_matches(self, selectors: tuple[str]) -> bool:
|
|
@@ -131,9 +144,18 @@ class Node:
|
|
|
131
144
|
def css_matches(self, selector: str) -> bool:
|
|
132
145
|
"""Returns True if CSS selector matches a node."""
|
|
133
146
|
...
|
|
147
|
+
@overload
|
|
134
148
|
def css_first(
|
|
135
|
-
self, query: str, default:
|
|
136
|
-
) ->
|
|
149
|
+
self, query: str, default: Any = ..., strict: Literal[True] = ...
|
|
150
|
+
) -> Node: ...
|
|
151
|
+
@overload
|
|
152
|
+
def css_first(
|
|
153
|
+
self, query: str, default: DefaultT, strict: bool = False
|
|
154
|
+
) -> Node | DefaultT: ...
|
|
155
|
+
@overload
|
|
156
|
+
def css_first(
|
|
157
|
+
self, query: str, default: None = ..., strict: bool = False
|
|
158
|
+
) -> Node | None:
|
|
137
159
|
"""Evaluate CSS selector against current node and its child nodes."""
|
|
138
160
|
...
|
|
139
161
|
def decompose(self, recursive: bool = True) -> None:
|
|
@@ -171,7 +193,7 @@ class Node:
|
|
|
171
193
|
|
|
172
194
|
Currently, works on text nodes only."""
|
|
173
195
|
...
|
|
174
|
-
def select(self, query: str | None = None) ->
|
|
196
|
+
def select(self, query: str | None = None) -> Selector:
|
|
175
197
|
"""Select nodes given a CSS selector.
|
|
176
198
|
|
|
177
199
|
Works similarly to the css method, but supports chained filtering and extra features.
|
|
@@ -208,14 +230,23 @@ class HTMLParser:
|
|
|
208
230
|
use_meta_tags: bool = True,
|
|
209
231
|
decode_errors: Literal["strict", "ignore", "replace"] = "ignore",
|
|
210
232
|
): ...
|
|
211
|
-
def css(self, query: str) -> list[
|
|
233
|
+
def css(self, query: str) -> list[Node]:
|
|
212
234
|
"""A CSS selector.
|
|
213
235
|
|
|
214
236
|
Matches pattern query against HTML tree."""
|
|
215
237
|
...
|
|
238
|
+
@overload
|
|
239
|
+
def css_first(
|
|
240
|
+
self, query: str, default: Any = ..., strict: Literal[True] = ...
|
|
241
|
+
) -> Node: ...
|
|
242
|
+
@overload
|
|
243
|
+
def css_first(
|
|
244
|
+
self, query: str, default: DefaultT, strict: bool = False
|
|
245
|
+
) -> Node | DefaultT: ...
|
|
246
|
+
@overload
|
|
216
247
|
def css_first(
|
|
217
|
-
self, query: str, default:
|
|
218
|
-
) ->
|
|
248
|
+
self, query: str, default: None = ..., strict: bool = False
|
|
249
|
+
) -> Node | None:
|
|
219
250
|
"""Same as css but returns only the first match."""
|
|
220
251
|
...
|
|
221
252
|
@property
|
|
@@ -225,18 +256,18 @@ class HTMLParser:
|
|
|
225
256
|
Returns unknown in case the encoding is not determined."""
|
|
226
257
|
...
|
|
227
258
|
@property
|
|
228
|
-
def root(self) ->
|
|
259
|
+
def root(self) -> Node | None:
|
|
229
260
|
"""Returns root node."""
|
|
230
261
|
...
|
|
231
262
|
@property
|
|
232
|
-
def head(self) ->
|
|
263
|
+
def head(self) -> Node | None:
|
|
233
264
|
"""Returns head node."""
|
|
234
265
|
...
|
|
235
266
|
@property
|
|
236
|
-
def body(self) ->
|
|
267
|
+
def body(self) -> Node | None:
|
|
237
268
|
"""Returns document body."""
|
|
238
269
|
...
|
|
239
|
-
def tags(self, name: str) -> list[
|
|
270
|
+
def tags(self, name: str) -> list[Node]:
|
|
240
271
|
"""Returns a list of tags that match specified name."""
|
|
241
272
|
...
|
|
242
273
|
def text(self, deep: bool = True, separator: str = "", strip: bool = False) -> str:
|
|
@@ -249,10 +280,10 @@ class HTMLParser:
|
|
|
249
280
|
Works the same as th unwrap method, but applied to a list of tags."""
|
|
250
281
|
...
|
|
251
282
|
@property
|
|
252
|
-
def html(self) ->
|
|
283
|
+
def html(self) -> str | None:
|
|
253
284
|
"""Return HTML representation of the page."""
|
|
254
285
|
...
|
|
255
|
-
def select(self, query: str | None = None) ->
|
|
286
|
+
def select(self, query: str | None = None) -> Selector | None:
|
|
256
287
|
"""Select nodes given a CSS selector.
|
|
257
288
|
|
|
258
289
|
Works similarly to the css method, but supports chained filtering and extra features.
|
|
@@ -272,7 +303,7 @@ class HTMLParser:
|
|
|
272
303
|
Caches values on the first call to improve performance."""
|
|
273
304
|
...
|
|
274
305
|
def css_matches(self, selector: str) -> bool: ...
|
|
275
|
-
def clone(self) ->
|
|
306
|
+
def clone(self) -> HTMLParser:
|
|
276
307
|
"""Clone the current tree."""
|
|
277
308
|
...
|
|
278
309
|
def merge_text_nodes(self):
|
|
@@ -281,14 +312,14 @@ class HTMLParser:
|
|
|
281
312
|
This is useful for text extraction."""
|
|
282
313
|
...
|
|
283
314
|
|
|
284
|
-
def create_tag(tag: str) ->
|
|
315
|
+
def create_tag(tag: str) -> Node:
|
|
285
316
|
"""
|
|
286
317
|
Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
|
|
287
318
|
e.g. `"<div></div>"`.
|
|
288
319
|
"""
|
|
289
320
|
...
|
|
290
321
|
|
|
291
|
-
def parse_fragment(html: str) -> list[
|
|
322
|
+
def parse_fragment(html: str) -> list[Node]:
|
|
292
323
|
"""
|
|
293
324
|
Given HTML, parse it into a list of Nodes, such that the nodes
|
|
294
325
|
correspond to the given HTML.
|
selectolax/utils.pxi
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
selectolax/__init__.py,sha256=
|
|
1
|
+
selectolax/__init__.py,sha256=H6D0G54OTy7vXxMudUXEbzIw-3rU9JcnyuRxJEv1L18,185
|
|
2
2
|
selectolax/base.pxi,sha256=zOj3BrCA71xd-mJFtkMIAglP4ZybfrHVoCoy6ljTBDQ,93
|
|
3
|
-
selectolax/lexbor.c,sha256=
|
|
4
|
-
selectolax/lexbor.cp37-win_amd64.pyd,sha256=
|
|
3
|
+
selectolax/lexbor.c,sha256=X-GrSY8qlNpnL0EHpftD_zn8QrfPVEwpj4IByZAb61c,2354397
|
|
4
|
+
selectolax/lexbor.cp37-win_amd64.pyd,sha256=LGg4WpQjGj9dm0GEjW_kSjA22ASrwVdIE-g5FMAruT4,6935040
|
|
5
5
|
selectolax/lexbor.pxd,sha256=1d9nvZd9rZl27gwPwVV5BlbR2LAi6jDK69Xm9Guz5Kk,21538
|
|
6
|
-
selectolax/lexbor.pyi,sha256=
|
|
6
|
+
selectolax/lexbor.pyi,sha256=DXQejzmWT7FbWCXcakLzOYGEs-rOMMdaLIZaNSu2uiM,6716
|
|
7
7
|
selectolax/lexbor.pyx,sha256=H3-Y78orz2Hop0Qqf8JulJo6f3yBR0kbHRqNPyDOsAc,11097
|
|
8
|
-
selectolax/parser.c,sha256=
|
|
9
|
-
selectolax/parser.cp37-win_amd64.pyd,sha256=
|
|
8
|
+
selectolax/parser.c,sha256=rGXXnE3kJJDElEps9cQMYaIfXCCxVpDk5ELR-RQF5nk,2215098
|
|
9
|
+
selectolax/parser.cp37-win_amd64.pyd,sha256=7HYfv4WZZeBncIFqppvPn8VyKdEi9Be63dyF10R2XFY,2144256
|
|
10
10
|
selectolax/parser.pxd,sha256=4pM_CcZlvJlaR8EMjZCnSmnCcJbwcYOldRTBEbfwm48,25145
|
|
11
|
-
selectolax/parser.pyi,sha256=
|
|
11
|
+
selectolax/parser.pyi,sha256=Ud_hBY54PJEVJX6WZ07L7s0uzs3u1FLD7TAGhES1y7Q,11887
|
|
12
12
|
selectolax/parser.pyx,sha256=lQW4qJ6nCDraCupvcT61zUkgo-S-KIzl9JIGV6hh6hA,13386
|
|
13
13
|
selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
selectolax/utils.pxi,sha256=
|
|
14
|
+
selectolax/utils.pxi,sha256=4rtdRcLWuemxN1qe7Eul5jvAmHZ65r7Gvf67_Wg8Bt4,3566
|
|
15
15
|
selectolax/lexbor/attrs.pxi,sha256=TEJUCGAkFwb14Emecyx4yljKSJMRmFbq8mOcNs35G_c,3204
|
|
16
16
|
selectolax/lexbor/node.pxi,sha256=P-KXzAk6fOo8ilEKAm0yjFQa90xkAXhWhSdce-YDauo,30213
|
|
17
17
|
selectolax/lexbor/selection.pxi,sha256=nRGiDYvpSAQcsWQ_2Z9-4kqebahIJmKWXQBtd3MPsis,6626
|
|
@@ -19,8 +19,8 @@ selectolax/lexbor/util.pxi,sha256=0I4ElWIwXxrZCfMmGCtyDU127oMsPCqC3IcUk4QmMAc,58
|
|
|
19
19
|
selectolax/modest/node.pxi,sha256=xDJTFhcvkF7FGSObnCftZJSIBj7wV1--IdLJbFoIItw,33519
|
|
20
20
|
selectolax/modest/selection.pxi,sha256=0elY7JwnpPVaw0QZE1T7A78s9FIph5uWIhwy4sEXGU8,6586
|
|
21
21
|
selectolax/modest/util.pxi,sha256=o2nPGGGtRlLqOCa7yPk94CfBzNlVr7ull7osFy6NRX4,570
|
|
22
|
-
selectolax-0.3.
|
|
23
|
-
selectolax-0.3.
|
|
24
|
-
selectolax-0.3.
|
|
25
|
-
selectolax-0.3.
|
|
26
|
-
selectolax-0.3.
|
|
22
|
+
selectolax-0.3.27.dist-info/LICENSE,sha256=Gy4WGsmAwV9QtqH0HaBHJQ35bt_0irn77fIt1iBncUo,1087
|
|
23
|
+
selectolax-0.3.27.dist-info/METADATA,sha256=IC4zgMREnUsRQLbqYPrDZJ1VkkSWkupV1VSxJ1zIkE4,6107
|
|
24
|
+
selectolax-0.3.27.dist-info/WHEEL,sha256=slqBGdqRnxanDn00BSYHhryEsWH_8CUurgRUvoMtK_Y,101
|
|
25
|
+
selectolax-0.3.27.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
|
|
26
|
+
selectolax-0.3.27.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|