selectolax 0.3.26__cp310-cp310-musllinux_1_2_i686.whl → 0.3.28__cp310-cp310-musllinux_1_2_i686.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/lexbor.pyi CHANGED
@@ -117,7 +117,7 @@ class LexborNode:
117
117
  def text_content(self) -> str | None: ...
118
118
 
119
119
  class LexborHTMLParser:
120
- def __init__(self, html: str): ...
120
+ def __init__(self, html: str| bytes ): ...
121
121
  @property
122
122
  def selector(self) -> "LexborCSSSelector": ...
123
123
  @property
selectolax/lexbor.pyx CHANGED
@@ -112,6 +112,7 @@ cdef class LexborHTMLParser:
112
112
  len(pybyte_name)
113
113
  )
114
114
  if status != 0x0000:
115
+ lxb_dom_collection_destroy(collection, <bint> True)
115
116
  raise SelectolaxError("Can't locate elements.")
116
117
 
117
118
  for i in range(lxb_dom_collection_length_noi(collection)):
@@ -226,13 +227,14 @@ cdef class LexborHTMLParser:
226
227
  len(pybyte_name)
227
228
  )
228
229
  if status != 0x0000:
230
+ lxb_dom_collection_destroy(collection, <bint> True)
229
231
  raise SelectolaxError("Can't locate elements.")
230
232
 
231
233
  for i in range(lxb_dom_collection_length_noi(collection)):
232
234
  if recursive:
233
- lxb_dom_node_destroy( <lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i))
234
- else:
235
235
  lxb_dom_node_destroy_deep( <lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i))
236
+ else:
237
+ lxb_dom_node_destroy(<lxb_dom_node_t *> lxb_dom_collection_element_noi(collection, i))
236
238
  lxb_dom_collection_destroy(collection, <bint> True)
237
239
 
238
240
  def select(self, query=None):
selectolax/parser.c CHANGED
@@ -31,143 +31,143 @@
31
31
  "name": "selectolax.parser",
32
32
  "sources": [
33
33
  "selectolax/parser.pyx",
34
- "modest/source/mycore/thread_queue.c",
35
- "modest/source/mycore/myosi.c",
36
- "modest/source/mycore/utils.c",
37
- "modest/source/mycore/mystring.c",
38
- "modest/source/mycore/mythread.c",
39
- "modest/source/mycore/incoming.c",
40
- "modest/source/mycore/utils/mcobject.c",
41
- "modest/source/mycore/utils/mcobject_async.c",
42
- "modest/source/mycore/utils/mctree.c",
43
- "modest/source/mycore/utils/avl_tree.c",
44
- "modest/source/mycore/utils/mchar_async.c",
45
- "modest/source/mycore/utils/mcsync.c",
46
- "modest/source/mycore/utils/mcsimple.c",
47
- "modest/source/mycore/utils/mhash.c",
48
- "modest/source/myfont/name.c",
49
- "modest/source/myfont/hmtx.c",
50
- "modest/source/myfont/glyf.c",
51
- "modest/source/myfont/pclt.c",
52
- "modest/source/myfont/loca.c",
53
- "modest/source/myfont/myosi.c",
54
- "modest/source/myfont/head.c",
55
- "modest/source/myfont/myfont.c",
56
- "modest/source/myfont/vmtx.c",
57
- "modest/source/myfont/cmap.c",
58
- "modest/source/myfont/os_2.c",
59
- "modest/source/myfont/maxp.c",
60
- "modest/source/myfont/hhea.c",
61
- "modest/source/myfont/vhea.c",
62
- "modest/source/myunicode/myosi.c",
34
+ "modest/source/myencoding/mystring.c",
63
35
  "modest/source/myencoding/detect.c",
64
36
  "modest/source/myencoding/encoding.c",
65
- "modest/source/myencoding/mystring.c",
66
- "modest/source/modest/declaration.c",
67
- "modest/source/modest/modest.c",
68
- "modest/source/modest/glue.c",
69
- "modest/source/modest/finder/type.c",
70
- "modest/source/modest/finder/finder.c",
71
- "modest/source/modest/finder/match.c",
72
- "modest/source/modest/finder/thread.c",
73
- "modest/source/modest/finder/pseudo_class.c",
74
- "modest/source/modest/node/node.c",
75
- "modest/source/modest/node/property.c",
76
- "modest/source/modest/node/raw_property.c",
77
- "modest/source/modest/node/serialization.c",
78
- "modest/source/modest/style/type.c",
79
- "modest/source/modest/style/map.c",
80
- "modest/source/modest/style/sheet.c",
81
- "modest/source/modest/style/default.c",
82
- "modest/source/modest/style/raw.c",
83
- "modest/source/modest/render/tree.c",
84
- "modest/source/modest/render/tree_node.c",
85
- "modest/source/modest/render/binding.c",
86
- "modest/source/modest/render/begin.c",
87
- "modest/source/modest/layer/layer.c",
88
- "modest/source/mycss/mycss.c",
89
- "modest/source/mycss/tokenizer_end.c",
37
+ "modest/source/myurl/host.c",
38
+ "modest/source/myurl/utils.c",
39
+ "modest/source/myurl/scheme.c",
40
+ "modest/source/myurl/path.c",
41
+ "modest/source/myurl/parser.c",
42
+ "modest/source/myurl/url.c",
43
+ "modest/source/myurl/parser_end.c",
44
+ "modest/source/myurl/serialization.c",
45
+ "modest/source/myurl/myosi.c",
46
+ "modest/source/myurl/punycode.c",
90
47
  "modest/source/mycss/stack.c",
91
- "modest/source/mycss/check.c",
92
- "modest/source/mycss/convert.c",
93
- "modest/source/mycss/stylesheet.c",
94
- "modest/source/mycss/tokenizer.c",
95
- "modest/source/mycss/parser.c",
48
+ "modest/source/mycss/mycss.c",
96
49
  "modest/source/mycss/an_plus_b.c",
97
50
  "modest/source/mycss/entry.c",
51
+ "modest/source/mycss/parser.c",
98
52
  "modest/source/mycss/mystring.c",
99
53
  "modest/source/mycss/tokenizer_global.c",
100
- "modest/source/mycss/values/values.c",
101
- "modest/source/mycss/values/serialization.c",
102
- "modest/source/mycss/values/consume.c",
103
- "modest/source/mycss/values/units.c",
104
- "modest/source/mycss/values/image.c",
105
- "modest/source/mycss/values/color.c",
106
- "modest/source/mycss/values/destroy.c",
107
- "modest/source/mycss/values/color_parser.c",
108
- "modest/source/mycss/media/state.c",
109
- "modest/source/mycss/media/init.c",
110
- "modest/source/mycss/selectors/state.c",
54
+ "modest/source/mycss/check.c",
55
+ "modest/source/mycss/tokenizer.c",
56
+ "modest/source/mycss/convert.c",
57
+ "modest/source/mycss/stylesheet.c",
58
+ "modest/source/mycss/tokenizer_end.c",
59
+ "modest/source/mycss/selectors/list.c",
60
+ "modest/source/mycss/selectors/init.c",
111
61
  "modest/source/mycss/selectors/function.c",
62
+ "modest/source/mycss/selectors/parser.c",
63
+ "modest/source/mycss/selectors/state.c",
112
64
  "modest/source/mycss/selectors/serialization.c",
113
- "modest/source/mycss/selectors/pseudo.c",
114
- "modest/source/mycss/selectors/init.c",
115
65
  "modest/source/mycss/selectors/function_parser.c",
66
+ "modest/source/mycss/selectors/pseudo.c",
116
67
  "modest/source/mycss/selectors/value.c",
117
- "modest/source/mycss/selectors/parser.c",
118
- "modest/source/mycss/selectors/list.c",
68
+ "modest/source/mycss/values/image.c",
69
+ "modest/source/mycss/values/color_parser.c",
70
+ "modest/source/mycss/values/units.c",
71
+ "modest/source/mycss/values/serialization.c",
72
+ "modest/source/mycss/values/values.c",
73
+ "modest/source/mycss/values/destroy.c",
74
+ "modest/source/mycss/values/color.c",
75
+ "modest/source/mycss/values/consume.c",
76
+ "modest/source/mycss/media/init.c",
77
+ "modest/source/mycss/media/state.c",
78
+ "modest/source/mycss/declaration/entry_destroy.c",
79
+ "modest/source/mycss/declaration/init.c",
80
+ "modest/source/mycss/declaration/entry.c",
81
+ "modest/source/mycss/declaration/parser.c",
82
+ "modest/source/mycss/declaration/state.c",
83
+ "modest/source/mycss/declaration/serialization.c",
84
+ "modest/source/mycss/declaration/default.c",
85
+ "modest/source/mycss/namespace/init.c",
86
+ "modest/source/mycss/namespace/parser.c",
87
+ "modest/source/mycss/namespace/state.c",
88
+ "modest/source/mycss/namespace/serialization.c",
119
89
  "modest/source/mycss/property/parser_image.c",
120
- "modest/source/mycss/property/shared.c",
121
- "modest/source/mycss/property/parser_text_decoration.c",
122
- "modest/source/mycss/property/serialization.c",
123
90
  "modest/source/mycss/property/init.c",
124
- "modest/source/mycss/property/parser_url.c",
91
+ "modest/source/mycss/property/parser_text_decoration.c",
125
92
  "modest/source/mycss/property/parser.c",
126
93
  "modest/source/mycss/property/parser_background.c",
127
- "modest/source/mycss/namespace/state.c",
128
- "modest/source/mycss/namespace/serialization.c",
129
- "modest/source/mycss/namespace/init.c",
130
- "modest/source/mycss/namespace/parser.c",
131
- "modest/source/mycss/declaration/state.c",
132
- "modest/source/mycss/declaration/serialization.c",
133
- "modest/source/mycss/declaration/init.c",
134
- "modest/source/mycss/declaration/default.c",
135
- "modest/source/mycss/declaration/entry_destroy.c",
136
- "modest/source/mycss/declaration/parser.c",
137
- "modest/source/mycss/declaration/entry.c",
138
- "modest/source/myurl/serialization.c",
139
- "modest/source/myurl/parser_end.c",
140
- "modest/source/myurl/path.c",
141
- "modest/source/myurl/myosi.c",
142
- "modest/source/myurl/host.c",
143
- "modest/source/myurl/parser.c",
144
- "modest/source/myurl/utils.c",
145
- "modest/source/myurl/url.c",
146
- "modest/source/myurl/punycode.c",
147
- "modest/source/myurl/scheme.c",
148
- "modest/source/myport/posix/mycore/io.c",
149
- "modest/source/myport/posix/mycore/thread.c",
150
- "modest/source/myport/posix/mycore/perf.c",
151
- "modest/source/myport/posix/mycore/memory.c",
152
- "modest/source/myport/posix/mycore/utils/mcsync.c",
153
- "modest/source/myhtml/rules.c",
154
- "modest/source/myhtml/tree.c",
94
+ "modest/source/mycss/property/serialization.c",
95
+ "modest/source/mycss/property/parser_url.c",
96
+ "modest/source/mycss/property/shared.c",
155
97
  "modest/source/myhtml/tokenizer_script.c",
156
- "modest/source/myhtml/tokenizer_end.c",
157
98
  "modest/source/myhtml/tag.c",
99
+ "modest/source/myhtml/tag_init.c",
100
+ "modest/source/myhtml/mynamespace.c",
101
+ "modest/source/myhtml/callback.c",
102
+ "modest/source/myhtml/tokenizer_doctype.c",
103
+ "modest/source/myhtml/charef.c",
104
+ "modest/source/myhtml/rules.c",
105
+ "modest/source/myhtml/parser.c",
106
+ "modest/source/myhtml/mystring.c",
158
107
  "modest/source/myhtml/serialization.c",
159
- "modest/source/myhtml/data_process.c",
108
+ "modest/source/myhtml/tree.c",
160
109
  "modest/source/myhtml/token.c",
161
- "modest/source/myhtml/charef.c",
162
- "modest/source/myhtml/mynamespace.c",
163
110
  "modest/source/myhtml/tokenizer.c",
164
- "modest/source/myhtml/stream.c",
165
- "modest/source/myhtml/parser.c",
166
111
  "modest/source/myhtml/myhtml.c",
167
- "modest/source/myhtml/mystring.c",
168
- "modest/source/myhtml/tokenizer_doctype.c",
169
- "modest/source/myhtml/tag_init.c",
170
- "modest/source/myhtml/callback.c"
112
+ "modest/source/myhtml/data_process.c",
113
+ "modest/source/myhtml/stream.c",
114
+ "modest/source/myhtml/tokenizer_end.c",
115
+ "modest/source/myport/posix/mycore/memory.c",
116
+ "modest/source/myport/posix/mycore/thread.c",
117
+ "modest/source/myport/posix/mycore/perf.c",
118
+ "modest/source/myport/posix/mycore/io.c",
119
+ "modest/source/myport/posix/mycore/utils/mcsync.c",
120
+ "modest/source/modest/modest.c",
121
+ "modest/source/modest/declaration.c",
122
+ "modest/source/modest/glue.c",
123
+ "modest/source/modest/render/tree_node.c",
124
+ "modest/source/modest/render/tree.c",
125
+ "modest/source/modest/render/begin.c",
126
+ "modest/source/modest/render/binding.c",
127
+ "modest/source/modest/node/property.c",
128
+ "modest/source/modest/node/node.c",
129
+ "modest/source/modest/node/serialization.c",
130
+ "modest/source/modest/node/raw_property.c",
131
+ "modest/source/modest/finder/finder.c",
132
+ "modest/source/modest/finder/type.c",
133
+ "modest/source/modest/finder/thread.c",
134
+ "modest/source/modest/finder/pseudo_class.c",
135
+ "modest/source/modest/finder/match.c",
136
+ "modest/source/modest/layer/layer.c",
137
+ "modest/source/modest/style/map.c",
138
+ "modest/source/modest/style/type.c",
139
+ "modest/source/modest/style/default.c",
140
+ "modest/source/modest/style/sheet.c",
141
+ "modest/source/modest/style/raw.c",
142
+ "modest/source/mycore/utils.c",
143
+ "modest/source/mycore/incoming.c",
144
+ "modest/source/mycore/mystring.c",
145
+ "modest/source/mycore/thread_queue.c",
146
+ "modest/source/mycore/myosi.c",
147
+ "modest/source/mycore/mythread.c",
148
+ "modest/source/mycore/utils/mhash.c",
149
+ "modest/source/mycore/utils/mcsimple.c",
150
+ "modest/source/mycore/utils/mcsync.c",
151
+ "modest/source/mycore/utils/avl_tree.c",
152
+ "modest/source/mycore/utils/mcobject.c",
153
+ "modest/source/mycore/utils/mcobject_async.c",
154
+ "modest/source/mycore/utils/mctree.c",
155
+ "modest/source/mycore/utils/mchar_async.c",
156
+ "modest/source/myfont/maxp.c",
157
+ "modest/source/myfont/os_2.c",
158
+ "modest/source/myfont/name.c",
159
+ "modest/source/myfont/vmtx.c",
160
+ "modest/source/myfont/cmap.c",
161
+ "modest/source/myfont/vhea.c",
162
+ "modest/source/myfont/myosi.c",
163
+ "modest/source/myfont/pclt.c",
164
+ "modest/source/myfont/hhea.c",
165
+ "modest/source/myfont/glyf.c",
166
+ "modest/source/myfont/head.c",
167
+ "modest/source/myfont/loca.c",
168
+ "modest/source/myfont/myfont.c",
169
+ "modest/source/myfont/hmtx.c",
170
+ "modest/source/myunicode/myosi.c"
171
171
  ]
172
172
  },
173
173
  "module_name": "selectolax.parser"
@@ -3482,7 +3482,6 @@ static const char __pyx_k_HTMLParser_css[] = "HTMLParser.css";
3482
3482
  static const char __pyx_k_Node_css_first[] = "Node.css_first";
3483
3483
  static const char __pyx_k_Node_decompose[] = "Node.decompose";
3484
3484
  static const char __pyx_k_fused_sigindex[] = "_fused_sigindex";
3485
- static const char __pyx_k_html_body_head[] = "<html|<body|<head";
3486
3485
  static const char __pyx_k_parse_fragment[] = "parse_fragment";
3487
3486
  static const char __pyx_k_Attributes_keys[] = "_Attributes.keys";
3488
3487
  static const char __pyx_k_Attributes_sget[] = "_Attributes.sget";
@@ -3513,6 +3512,7 @@ static const char __pyx_k_Node_replace_with[] = "Node.replace_with";
3513
3512
  static const char __pyx_k_any_text_contains[] = "any_text_contains";
3514
3513
  static const char __pyx_k_do_parse_fragment[] = "do_parse_fragment";
3515
3514
  static const char __pyx_k_get_fragment_type[] = "get_fragment_type";
3515
+ static const char __pyx_k_html_body_head_er[] = "<html|<body|<head(?!er)";
3516
3516
  static const char __pyx_k_selectolax_parser[] = "selectolax.parser";
3517
3517
  static const char __pyx_k_Can_t_parse_HTML_s[] = "Can't parse HTML:\n%s";
3518
3518
  static const char __pyx_k_HTMLParser_chars_s[] = "<HTMLParser chars=%s>";
@@ -4119,7 +4119,7 @@ typedef struct {
4119
4119
  PyObject *__pyx_n_u_head_and_body;
4120
4120
  PyObject *__pyx_n_s_html;
4121
4121
  PyObject *__pyx_kp_u_html_2;
4122
- PyObject *__pyx_kp_u_html_body_head;
4122
+ PyObject *__pyx_kp_u_html_body_head_er;
4123
4123
  PyObject *__pyx_n_s_html_len;
4124
4124
  PyObject *__pyx_n_s_html_re;
4125
4125
  PyObject *__pyx_n_s_html_tree;
@@ -4663,7 +4663,7 @@ static int __pyx_m_clear(PyObject *m) {
4663
4663
  Py_CLEAR(clear_module_state->__pyx_n_u_head_and_body);
4664
4664
  Py_CLEAR(clear_module_state->__pyx_n_s_html);
4665
4665
  Py_CLEAR(clear_module_state->__pyx_kp_u_html_2);
4666
- Py_CLEAR(clear_module_state->__pyx_kp_u_html_body_head);
4666
+ Py_CLEAR(clear_module_state->__pyx_kp_u_html_body_head_er);
4667
4667
  Py_CLEAR(clear_module_state->__pyx_n_s_html_len);
4668
4668
  Py_CLEAR(clear_module_state->__pyx_n_s_html_re);
4669
4669
  Py_CLEAR(clear_module_state->__pyx_n_s_html_tree);
@@ -5185,7 +5185,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
5185
5185
  Py_VISIT(traverse_module_state->__pyx_n_u_head_and_body);
5186
5186
  Py_VISIT(traverse_module_state->__pyx_n_s_html);
5187
5187
  Py_VISIT(traverse_module_state->__pyx_kp_u_html_2);
5188
- Py_VISIT(traverse_module_state->__pyx_kp_u_html_body_head);
5188
+ Py_VISIT(traverse_module_state->__pyx_kp_u_html_body_head_er);
5189
5189
  Py_VISIT(traverse_module_state->__pyx_n_s_html_len);
5190
5190
  Py_VISIT(traverse_module_state->__pyx_n_s_html_re);
5191
5191
  Py_VISIT(traverse_module_state->__pyx_n_s_html_tree);
@@ -5807,7 +5807,7 @@ static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
5807
5807
  #define __pyx_n_u_head_and_body __pyx_mstate_global->__pyx_n_u_head_and_body
5808
5808
  #define __pyx_n_s_html __pyx_mstate_global->__pyx_n_s_html
5809
5809
  #define __pyx_kp_u_html_2 __pyx_mstate_global->__pyx_kp_u_html_2
5810
- #define __pyx_kp_u_html_body_head __pyx_mstate_global->__pyx_kp_u_html_body_head
5810
+ #define __pyx_kp_u_html_body_head_er __pyx_mstate_global->__pyx_kp_u_html_body_head_er
5811
5811
  #define __pyx_n_s_html_len __pyx_mstate_global->__pyx_n_s_html_len
5812
5812
  #define __pyx_n_s_html_re __pyx_mstate_global->__pyx_n_s_html_re
5813
5813
  #define __pyx_n_s_html_tree __pyx_mstate_global->__pyx_n_s_html_tree
@@ -29017,7 +29017,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
29017
29017
  * tree = parser_cls(html)
29018
29018
  *
29019
29019
  * import re # <<<<<<<<<<<<<<
29020
- * html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
29020
+ * html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
29021
29021
  *
29022
29022
  */
29023
29023
  __pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(5, 36, __pyx_L1_error)
@@ -29028,7 +29028,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
29028
29028
  /* "selectolax/utils.pxi":37
29029
29029
  *
29030
29030
  * import re
29031
- * html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
29031
+ * html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
29032
29032
  *
29033
29033
  * has_html = False
29034
29034
  */
@@ -29051,7 +29051,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
29051
29051
  }
29052
29052
  #endif
29053
29053
  {
29054
- PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head, __pyx_t_5};
29054
+ PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
29055
29055
  __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
29056
29056
  __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
29057
29057
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
@@ -29063,7 +29063,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_4get_fragment_type(CYTHON_UNUSED
29063
29063
  __pyx_t_3 = 0;
29064
29064
 
29065
29065
  /* "selectolax/utils.pxi":39
29066
- * html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
29066
+ * html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
29067
29067
  *
29068
29068
  * has_html = False # <<<<<<<<<<<<<<
29069
29069
  * has_head = False
@@ -31536,7 +31536,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
31536
31536
  * tree = parser_cls(html)
31537
31537
  *
31538
31538
  * import re # <<<<<<<<<<<<<<
31539
- * html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
31539
+ * html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
31540
31540
  *
31541
31541
  */
31542
31542
  __pyx_t_3 = __Pyx_ImportDottedModule(__pyx_n_s_re, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(7, 36, __pyx_L1_error)
@@ -31547,7 +31547,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
31547
31547
  /* "selectolax/utils.pxi":37
31548
31548
  *
31549
31549
  * import re
31550
- * html_re = re.compile(r"<html|<body|<head", re.IGNORECASE) # <<<<<<<<<<<<<<
31550
+ * html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE) # <<<<<<<<<<<<<<
31551
31551
  *
31552
31552
  * has_html = False
31553
31553
  */
@@ -31570,7 +31570,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
31570
31570
  }
31571
31571
  #endif
31572
31572
  {
31573
- PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head, __pyx_t_5};
31573
+ PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_kp_u_html_body_head_er, __pyx_t_5};
31574
31574
  __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6);
31575
31575
  __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
31576
31576
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
@@ -31582,7 +31582,7 @@ static PyObject *__pyx_pf_10selectolax_6parser_16get_fragment_type(CYTHON_UNUSED
31582
31582
  __pyx_t_3 = 0;
31583
31583
 
31584
31584
  /* "selectolax/utils.pxi":39
31585
- * html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
31585
+ * html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
31586
31586
  *
31587
31587
  * has_html = False # <<<<<<<<<<<<<<
31588
31588
  * has_head = False
@@ -41216,7 +41216,7 @@ static int __Pyx_CreateStringTabAndInitStrings(void) {
41216
41216
  {&__pyx_n_u_head_and_body, __pyx_k_head_and_body, sizeof(__pyx_k_head_and_body), 0, 1, 0, 1},
41217
41217
  {&__pyx_n_s_html, __pyx_k_html, sizeof(__pyx_k_html), 0, 0, 1, 1},
41218
41218
  {&__pyx_kp_u_html_2, __pyx_k_html_2, sizeof(__pyx_k_html_2), 0, 1, 0, 0},
41219
- {&__pyx_kp_u_html_body_head, __pyx_k_html_body_head, sizeof(__pyx_k_html_body_head), 0, 1, 0, 0},
41219
+ {&__pyx_kp_u_html_body_head_er, __pyx_k_html_body_head_er, sizeof(__pyx_k_html_body_head_er), 0, 1, 0, 0},
41220
41220
  {&__pyx_n_s_html_len, __pyx_k_html_len, sizeof(__pyx_k_html_len), 0, 0, 1, 1},
41221
41221
  {&__pyx_n_s_html_re, __pyx_k_html_re, sizeof(__pyx_k_html_re), 0, 0, 1, 1},
41222
41222
  {&__pyx_n_s_html_tree, __pyx_k_html_tree, sizeof(__pyx_k_html_tree), 0, 0, 1, 1},
selectolax/parser.pyi CHANGED
@@ -53,14 +53,14 @@ class Selector:
53
53
  ) -> bool:
54
54
  """Returns True if any node in the current search scope contains specified text"""
55
55
  ...
56
- def attribute_long_than(
56
+ def attribute_longer_than(
57
57
  self, text: str, length: int, start: str | None = None
58
58
  ) -> Selector:
59
59
  """Filter all current matches by attribute length.
60
60
 
61
61
  Similar to string-length in XPath."""
62
62
  ...
63
- def any_attribute_long_than(
63
+ def any_attribute_longer_than(
64
64
  self, text: str, length: int, start: str | None = None
65
65
  ) -> bool:
66
66
  """Returns True any href attribute longer than a specified length.
selectolax/utils.pxi CHANGED
@@ -34,7 +34,7 @@ def get_fragment_type(
34
34
  tree = parser_cls(html)
35
35
 
36
36
  import re
37
- html_re = re.compile(r"<html|<body|<head", re.IGNORECASE)
37
+ html_re = re.compile(r"<html|<body|<head(?!er)", re.IGNORECASE)
38
38
 
39
39
  has_html = False
40
40
  has_head = False
@@ -1,7 +1,7 @@
1
1
 
2
2
  MIT License
3
3
 
4
- Copyright (c) 2018-2023, Artem Golubin
4
+ Copyright (c) 2018-2025, Artem Golubin
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7
7
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: selectolax
3
- Version: 0.3.26
3
+ Version: 0.3.28
4
4
  Summary: Fast HTML5 parser with CSS selectors.
5
5
  Home-page: https://github.com/rushter/selectolax
6
6
  Author: Artem Golubin
@@ -75,6 +75,10 @@ How to compile selectolax while developing:
75
75
  Basic examples
76
76
  --------------
77
77
 
78
+ Here are some basic examples to get you started with selectolax:
79
+
80
+ Parsing HTML and extracting text:
81
+
78
82
  .. code:: python
79
83
 
80
84
  In [1]: from selectolax.parser import HTMLParser
@@ -97,6 +101,8 @@ Basic examples
97
101
  ['Lorem Ipsum is simply dummy text of the printing and typesetting industry. ',
98
102
  'Lorem ipsum dolor sit amet, consectetur adipiscing elit.']
99
103
 
104
+ Using advanced CSS selectors:
105
+
100
106
  .. code:: python
101
107
 
102
108
  In [1]: html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5>text<p id=p6></div>"
@@ -0,0 +1,26 @@
1
+ selectolax-0.3.28.dist-info/LICENSE,sha256=MYCcM-Cv_rC2-lQiwDumin0E-rMXAhK-qIGGA29434Y,1077
2
+ selectolax-0.3.28.dist-info/WHEEL,sha256=93QKMfwiVNqBx5omg2EF3x8R4QgFF-JxwrCMC0yhcqE,110
3
+ selectolax-0.3.28.dist-info/METADATA,sha256=vTdwlSLUc9k1DeDfD4h5ZcM0cB0aIgShNit3UMiniAU,6060
4
+ selectolax-0.3.28.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
5
+ selectolax-0.3.28.dist-info/RECORD,,
6
+ selectolax/parser.pxd,sha256=zZlg1vHUg6o4MXaiwKAo5S5hO_DqBGc4_E10qJ2EcM4,24564
7
+ selectolax/__init__.py,sha256=IhnQaAtBWz03SUIe66y78uQqmWBontg4z13rRupwa7Q,175
8
+ selectolax/base.pxi,sha256=eiPKlY9gG3l49qJoRQVLl1Ljza6z1k0A-met6sDPcqE,89
9
+ selectolax/lexbor.pyi,sha256=gf0rPd2B1EZyz_oN6EER-wFojg__Sz18GwjjVYo7SkU,6552
10
+ selectolax/parser.pyi,sha256=qLkvStGG4K3daXChLChzPHGV5w5gmIEMvFwRpC_Q4EM,11561
11
+ selectolax/lexbor.cpython-310-i386-linux-gnu.so,sha256=_wTCZYNADnXqvgcEoEs2yrR7uYFV12BG_0II02Kk5ho,7925104
12
+ selectolax/utils.pxi,sha256=uB0-0naFQPy1JpR2DiIlKnyLyC76yWLnUHSuH11xg6s,3459
13
+ selectolax/parser.c,sha256=yd8gEU0netTAe3rf5haknFFz-QtMHTi6nhKGW5GUTTE,2214825
14
+ selectolax/lexbor.pxd,sha256=PwygBdb1blWAQcxXubZS5uffhgcXaqgySNMPFMT02-c,20958
15
+ selectolax/lexbor.pyx,sha256=rpb32yQ2E_6nJeaPDQs3kb3GFoALZqQbVCN35kcUM-M,10882
16
+ selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ selectolax/parser.pyx,sha256=o1HkYE_nQr3TS7EPlldJx2-ygU9B5FI2uWYFzdF-VaI,12953
18
+ selectolax/parser.cpython-310-i386-linux-gnu.so,sha256=5ADXdjt9A2wtxPvMAF6Q0nZaFm6kf-vwthPZ4Oospc0,5795372
19
+ selectolax/lexbor.c,sha256=UkiarPPcp6IKa-HMZLnhap1CRltADEZdwsDWo5EviLQ,2359509
20
+ selectolax/modest/util.pxi,sha256=aX9UnRNTITImHVBTlIs9efOd3EyugLq_Lwuo0zVTiuQ,551
21
+ selectolax/modest/node.pxi,sha256=NrMzJnQJDCmgTHpUxpMHDyAfQ_AS_n_Cr_2ryEKjyL0,32550
22
+ selectolax/modest/selection.pxi,sha256=S55MMxEW2B1oPExB_DRwPM46WoWZU73J3rFRZU1URuQ,6393
23
+ selectolax/lexbor/attrs.pxi,sha256=Ol2RNzXZAcWaqJdDBUe0ChOCcA8HC990Hjncj98XAkw,3138
24
+ selectolax/lexbor/util.pxi,sha256=Zq7S-zlyU3wOo49wGHQHnmmhpbkrcJm59ZCTPENcZQA,563
25
+ selectolax/lexbor/node.pxi,sha256=-cqsA4gz9yL6hCte6uGgdQKvhIBZF_BZc_xHJn0rkCM,29340
26
+ selectolax/lexbor/selection.pxi,sha256=FA6npHtXjJjvS8H2_e_LS53i5zbpGYgb5zTh5Tf_XQY,6571
@@ -1,26 +0,0 @@
1
- selectolax/lexbor.cpython-310-i386-linux-gnu.so,sha256=JvFwrcMHmP_NyL5zXMicvgVWatw2eyEBfBu__GUzY08,16414856
2
- selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- selectolax/parser.pyi,sha256=kbR5eWvkJEy-9Hx3L_4JmGy3caIl0ki4SiagWz-fnhw,11557
4
- selectolax/parser.cpython-310-i386-linux-gnu.so,sha256=hxCl_O9B_zW0yh4zUdlom7SmLJNpkibPFaUE3mGBBNk,5795368
5
- selectolax/parser.pxd,sha256=zZlg1vHUg6o4MXaiwKAo5S5hO_DqBGc4_E10qJ2EcM4,24564
6
- selectolax/lexbor.pxd,sha256=PwygBdb1blWAQcxXubZS5uffhgcXaqgySNMPFMT02-c,20958
7
- selectolax/base.pxi,sha256=eiPKlY9gG3l49qJoRQVLl1Ljza6z1k0A-met6sDPcqE,89
8
- selectolax/lexbor.pyi,sha256=X2PMQR2XLd2rOPliKSpeFZ_VEf6mOQFTcFm0ChQbzsQ,6544
9
- selectolax/lexbor.c,sha256=Bqxtf923DtupnW7ZPAu3f0M1PjMJJSaofiSvOYrP69c,2353590
10
- selectolax/lexbor.pyx,sha256=ffEzBnZjGTsI-H5qck7bfjVRE9vteOhQnDp6RjVD7G0,10750
11
- selectolax/parser.pyx,sha256=o1HkYE_nQr3TS7EPlldJx2-ygU9B5FI2uWYFzdF-VaI,12953
12
- selectolax/utils.pxi,sha256=rPNMFqS0PRLkQPugwPfj-pnHCzkQzQ2cjIRMPZdR6R8,3453
13
- selectolax/parser.c,sha256=0jrrlXvB2Vu9s8REpwt4l6lu641e4FXVmMYXsaQN2XQ,2214750
14
- selectolax/__init__.py,sha256=r8TrGaSWTYcCt8yNkcf_pH1I7c7IPOMagr3wd9tCgy8,175
15
- selectolax/modest/util.pxi,sha256=aX9UnRNTITImHVBTlIs9efOd3EyugLq_Lwuo0zVTiuQ,551
16
- selectolax/modest/node.pxi,sha256=NrMzJnQJDCmgTHpUxpMHDyAfQ_AS_n_Cr_2ryEKjyL0,32550
17
- selectolax/modest/selection.pxi,sha256=S55MMxEW2B1oPExB_DRwPM46WoWZU73J3rFRZU1URuQ,6393
18
- selectolax/lexbor/util.pxi,sha256=Zq7S-zlyU3wOo49wGHQHnmmhpbkrcJm59ZCTPENcZQA,563
19
- selectolax/lexbor/node.pxi,sha256=1XNzUwCbTYXy4D6rZtHxMpoJ9M-xoprB9wjdsiaWhr0,29346
20
- selectolax/lexbor/selection.pxi,sha256=PqjvpL6H9uFcmcQWVGfML8FDsTO7tGoZujpA00g9pWk,6444
21
- selectolax/lexbor/attrs.pxi,sha256=-518D5v70GgMJhtsxWrWcgIMnXg8afECpUubzq8kqqs,3102
22
- selectolax-0.3.26.dist-info/LICENSE,sha256=kYggm2ZJzBgL79x1gCsYsx8rFIYP2IE-BdXRV3Rm0NU,1077
23
- selectolax-0.3.26.dist-info/METADATA,sha256=HqNPkDK0NqFA-N4HsTn3QtHYktNEwobUdCj7kw-e4VU,5928
24
- selectolax-0.3.26.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
25
- selectolax-0.3.26.dist-info/WHEEL,sha256=93QKMfwiVNqBx5omg2EF3x8R4QgFF-JxwrCMC0yhcqE,110
26
- selectolax-0.3.26.dist-info/RECORD,,