selectolax 0.3.33__cp310-cp310-win_arm64.whl → 0.3.34__cp310-cp310-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +1 -1
- selectolax/lexbor/node.pxi +6 -0
- selectolax/lexbor.c +1933 -1918
- selectolax/lexbor.cp310-win_arm64.pyd +0 -0
- selectolax/lexbor.pyi +12 -0
- selectolax/lexbor.pyx +5 -0
- selectolax/parser.c +45 -30
- selectolax/parser.cp310-win_arm64.pyd +0 -0
- selectolax-0.3.34.dist-info/METADATA +32 -0
- {selectolax-0.3.33.dist-info → selectolax-0.3.34.dist-info}/RECORD +13 -13
- selectolax-0.3.33.dist-info/METADATA +0 -187
- {selectolax-0.3.33.dist-info → selectolax-0.3.34.dist-info}/WHEEL +0 -0
- {selectolax-0.3.33.dist-info → selectolax-0.3.34.dist-info}/licenses/LICENSE +0 -0
- {selectolax-0.3.33.dist-info → selectolax-0.3.34.dist-info}/top_level.txt +0 -0
|
Binary file
|
selectolax/lexbor.pyi
CHANGED
|
@@ -145,6 +145,12 @@ class LexborNode:
|
|
|
145
145
|
Matches pattern `query` against HTML tree.
|
|
146
146
|
`CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
|
|
147
147
|
|
|
148
|
+
Special selectors:
|
|
149
|
+
|
|
150
|
+
- parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
|
|
151
|
+
- parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
|
|
152
|
+
|
|
153
|
+
|
|
148
154
|
Parameters
|
|
149
155
|
----------
|
|
150
156
|
query : str
|
|
@@ -665,6 +671,12 @@ class LexborHTMLParser:
|
|
|
665
671
|
Matches pattern `query` against HTML tree.
|
|
666
672
|
`CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
|
|
667
673
|
|
|
674
|
+
Special selectors:
|
|
675
|
+
|
|
676
|
+
- parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
|
|
677
|
+
- parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
|
|
678
|
+
|
|
679
|
+
|
|
668
680
|
Parameters
|
|
669
681
|
----------
|
|
670
682
|
query : str
|
selectolax/lexbor.pyx
CHANGED
|
@@ -169,6 +169,11 @@ cdef class LexborHTMLParser:
|
|
|
169
169
|
Matches pattern `query` against HTML tree.
|
|
170
170
|
`CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
|
|
171
171
|
|
|
172
|
+
Special selectors:
|
|
173
|
+
|
|
174
|
+
- parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
|
|
175
|
+
- parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
|
|
176
|
+
|
|
172
177
|
Parameters
|
|
173
178
|
----------
|
|
174
179
|
query : str
|
selectolax/parser.c
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
/* Generated by Cython 3.1.
|
|
1
|
+
/* Generated by Cython 3.1.3 */
|
|
2
2
|
|
|
3
3
|
/* BEGIN: Cython Metadata
|
|
4
4
|
{
|
|
@@ -182,8 +182,8 @@ END: Cython Metadata */
|
|
|
182
182
|
#elif PY_VERSION_HEX < 0x03080000
|
|
183
183
|
#error Cython requires Python 3.8+.
|
|
184
184
|
#else
|
|
185
|
-
#define __PYX_ABI_VERSION "
|
|
186
|
-
#define CYTHON_HEX_VERSION
|
|
185
|
+
#define __PYX_ABI_VERSION "3_1_3"
|
|
186
|
+
#define CYTHON_HEX_VERSION 0x030103F0
|
|
187
187
|
#define CYTHON_FUTURE_DIVISION 1
|
|
188
188
|
/* CModulePreamble */
|
|
189
189
|
#include <stddef.h>
|
|
@@ -546,6 +546,9 @@ END: Cython Metadata */
|
|
|
546
546
|
enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
|
|
547
547
|
#endif
|
|
548
548
|
#endif
|
|
549
|
+
#ifndef CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME
|
|
550
|
+
#define CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME 100
|
|
551
|
+
#endif
|
|
549
552
|
#ifndef __has_attribute
|
|
550
553
|
#define __has_attribute(x) 0
|
|
551
554
|
#endif
|
|
@@ -2768,22 +2771,22 @@ static int __Pyx__DelItemOnTypeDict(PyTypeObject *tp, PyObject *k);
|
|
|
2768
2771
|
static int __Pyx_setup_reduce(PyObject* type_obj);
|
|
2769
2772
|
|
|
2770
2773
|
/* TypeImport.proto */
|
|
2771
|
-
#ifndef
|
|
2772
|
-
#define
|
|
2774
|
+
#ifndef __PYX_HAVE_RT_ImportType_proto_3_1_3
|
|
2775
|
+
#define __PYX_HAVE_RT_ImportType_proto_3_1_3
|
|
2773
2776
|
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
|
|
2774
2777
|
#include <stdalign.h>
|
|
2775
2778
|
#endif
|
|
2776
2779
|
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L
|
|
2777
|
-
#define
|
|
2780
|
+
#define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) alignof(s)
|
|
2778
2781
|
#else
|
|
2779
|
-
#define
|
|
2782
|
+
#define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) sizeof(void*)
|
|
2780
2783
|
#endif
|
|
2781
|
-
enum
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2784
|
+
enum __Pyx_ImportType_CheckSize_3_1_3 {
|
|
2785
|
+
__Pyx_ImportType_CheckSize_Error_3_1_3 = 0,
|
|
2786
|
+
__Pyx_ImportType_CheckSize_Warn_3_1_3 = 1,
|
|
2787
|
+
__Pyx_ImportType_CheckSize_Ignore_3_1_3 = 2
|
|
2785
2788
|
};
|
|
2786
|
-
static PyTypeObject *
|
|
2789
|
+
static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size);
|
|
2787
2790
|
#endif
|
|
2788
2791
|
|
|
2789
2792
|
/* FetchSharedCythonModule.proto */
|
|
@@ -3567,7 +3570,7 @@ static const char __pyx_k_Node_strip_tags_line_554[] = "Node.strip_tags (line 55
|
|
|
3567
3570
|
static const char __pyx_k_Selector___reduce_cython[] = "Selector.__reduce_cython__";
|
|
3568
3571
|
static const char __pyx_k_Tag_name_cannot_be_empty[] = "Tag name cannot be empty";
|
|
3569
3572
|
static const char __pyx_k_document_no_head_no_body[] = "document_no_head_no_body";
|
|
3570
|
-
static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200
|
|
3573
|
+
static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200\177\220h\230k\250\033\260A\330\010\r\210^\2301\330\010\016\320\016!\320!v\320vx\320xy\330\004\023\2208\2308\2401\240A\330\004\007\200|\2207\230!\330\010*\250!\250;\260n\300A\330\004\013\2101";
|
|
3571
3574
|
static const char __pyx_k_Node_unwrap_tags_line_580[] = "Node.unwrap_tags (line 580)";
|
|
3572
3575
|
static const char __pyx_k_Node_unwrap_tags_line_768[] = "Node.unwrap_tags (line 768)";
|
|
3573
3576
|
static const char __pyx_k_any_attribute_longer_than[] = "any_attribute_longer_than";
|
|
@@ -38627,27 +38630,27 @@ static int __Pyx_modinit_type_import_code(__pyx_mstatetype *__pyx_mstate) {
|
|
|
38627
38630
|
/*--- Type import code ---*/
|
|
38628
38631
|
__pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(8, 8, __pyx_L1_error)
|
|
38629
38632
|
__Pyx_GOTREF(__pyx_t_1);
|
|
38630
|
-
__pyx_mstate->__pyx_ptype_7cpython_4bool_bool =
|
|
38633
|
+
__pyx_mstate->__pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool",
|
|
38631
38634
|
#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
|
|
38632
|
-
sizeof(PyLongObject),
|
|
38635
|
+
sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
|
|
38633
38636
|
#elif CYTHON_COMPILING_IN_LIMITED_API
|
|
38634
38637
|
0, 0,
|
|
38635
38638
|
#else
|
|
38636
|
-
sizeof(PyLongObject),
|
|
38639
|
+
sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
|
|
38637
38640
|
#endif
|
|
38638
|
-
|
|
38641
|
+
__Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(8, 8, __pyx_L1_error)
|
|
38639
38642
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
38640
38643
|
__pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(9, 9, __pyx_L1_error)
|
|
38641
38644
|
__Pyx_GOTREF(__pyx_t_1);
|
|
38642
|
-
__pyx_mstate->__pyx_ptype_7cpython_4type_type =
|
|
38645
|
+
__pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
|
|
38643
38646
|
#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
|
|
38644
|
-
sizeof(PyTypeObject),
|
|
38647
|
+
sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyTypeObject),
|
|
38645
38648
|
#elif CYTHON_COMPILING_IN_LIMITED_API
|
|
38646
38649
|
0, 0,
|
|
38647
38650
|
#else
|
|
38648
|
-
sizeof(PyHeapTypeObject),
|
|
38651
|
+
sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyHeapTypeObject),
|
|
38649
38652
|
#endif
|
|
38650
|
-
|
|
38653
|
+
__Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(9, 9, __pyx_L1_error)
|
|
38651
38654
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
38652
38655
|
__Pyx_RefNannyFinishContext();
|
|
38653
38656
|
return 0;
|
|
@@ -43841,6 +43844,13 @@ try_unpack:
|
|
|
43841
43844
|
|
|
43842
43845
|
/* PyObjectCallMethod0 */
|
|
43843
43846
|
static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) {
|
|
43847
|
+
#if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
|
|
43848
|
+
PyObject *args[1] = {obj};
|
|
43849
|
+
(void) __Pyx_PyObject_GetMethod;
|
|
43850
|
+
(void) __Pyx_PyObject_CallOneArg;
|
|
43851
|
+
(void) __Pyx_PyObject_CallNoArg;
|
|
43852
|
+
return PyObject_VectorcallMethod(method_name, args, 1 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
|
|
43853
|
+
#else
|
|
43844
43854
|
PyObject *method = NULL, *result = NULL;
|
|
43845
43855
|
int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method);
|
|
43846
43856
|
if (likely(is_method)) {
|
|
@@ -43853,6 +43863,7 @@ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name
|
|
|
43853
43863
|
Py_DECREF(method);
|
|
43854
43864
|
bad:
|
|
43855
43865
|
return result;
|
|
43866
|
+
#endif
|
|
43856
43867
|
}
|
|
43857
43868
|
|
|
43858
43869
|
/* RaiseNeedMoreValuesToUnpack */
|
|
@@ -44663,6 +44674,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
|
|
|
44663
44674
|
changed = 1;
|
|
44664
44675
|
}
|
|
44665
44676
|
#endif // CYTHON_METH_FASTCALL
|
|
44677
|
+
#if !CYTHON_COMPILING_IN_PYPY
|
|
44666
44678
|
else if (strcmp(memb->name, "__module__") == 0) {
|
|
44667
44679
|
PyObject *descr;
|
|
44668
44680
|
assert(memb->type == T_OBJECT);
|
|
@@ -44677,11 +44689,13 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
|
|
|
44677
44689
|
}
|
|
44678
44690
|
changed = 1;
|
|
44679
44691
|
}
|
|
44692
|
+
#endif // !CYTHON_COMPILING_IN_PYPY
|
|
44680
44693
|
}
|
|
44681
44694
|
memb++;
|
|
44682
44695
|
}
|
|
44683
44696
|
}
|
|
44684
44697
|
#endif // !CYTHON_COMPILING_IN_LIMITED_API
|
|
44698
|
+
#if !CYTHON_COMPILING_IN_PYPY
|
|
44685
44699
|
slot = spec->slots;
|
|
44686
44700
|
while (slot && slot->slot && slot->slot != Py_tp_getset)
|
|
44687
44701
|
slot++;
|
|
@@ -44713,6 +44727,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
|
|
|
44713
44727
|
++getset;
|
|
44714
44728
|
}
|
|
44715
44729
|
}
|
|
44730
|
+
#endif // !CYTHON_COMPILING_IN_PYPY
|
|
44716
44731
|
if (changed)
|
|
44717
44732
|
PyType_Modified(type);
|
|
44718
44733
|
#endif // PY_VERSION_HEX > 0x030900B1
|
|
@@ -45146,10 +45161,10 @@ __PYX_GOOD:
|
|
|
45146
45161
|
}
|
|
45147
45162
|
|
|
45148
45163
|
/* TypeImport */
|
|
45149
|
-
#ifndef
|
|
45150
|
-
#define
|
|
45151
|
-
static PyTypeObject *
|
|
45152
|
-
size_t size, size_t alignment, enum
|
|
45164
|
+
#ifndef __PYX_HAVE_RT_ImportType_3_1_3
|
|
45165
|
+
#define __PYX_HAVE_RT_ImportType_3_1_3
|
|
45166
|
+
static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject *module, const char *module_name, const char *class_name,
|
|
45167
|
+
size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size)
|
|
45153
45168
|
{
|
|
45154
45169
|
PyObject *result = 0;
|
|
45155
45170
|
Py_ssize_t basicsize;
|
|
@@ -45205,7 +45220,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
|
|
|
45205
45220
|
module_name, class_name, size, basicsize+itemsize);
|
|
45206
45221
|
goto bad;
|
|
45207
45222
|
}
|
|
45208
|
-
if (check_size ==
|
|
45223
|
+
if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_3 &&
|
|
45209
45224
|
((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) {
|
|
45210
45225
|
PyErr_Format(PyExc_ValueError,
|
|
45211
45226
|
"%.200s.%.200s size changed, may indicate binary incompatibility. "
|
|
@@ -45213,7 +45228,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
|
|
|
45213
45228
|
module_name, class_name, size, basicsize, basicsize+itemsize);
|
|
45214
45229
|
goto bad;
|
|
45215
45230
|
}
|
|
45216
|
-
else if (check_size ==
|
|
45231
|
+
else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_3 && (size_t)basicsize > size) {
|
|
45217
45232
|
if (PyErr_WarnFormat(NULL, 0,
|
|
45218
45233
|
"%.200s.%.200s size changed, may indicate binary incompatibility. "
|
|
45219
45234
|
"Expected %zd from C header, got %zd from PyObject",
|
|
@@ -45354,7 +45369,7 @@ bad:
|
|
|
45354
45369
|
}
|
|
45355
45370
|
|
|
45356
45371
|
/* CommonTypesMetaclass */
|
|
45357
|
-
PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
|
|
45372
|
+
static PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
|
|
45358
45373
|
return PyUnicode_FromString(__PYX_ABI_MODULE_NAME);
|
|
45359
45374
|
}
|
|
45360
45375
|
static PyGetSetDef __pyx_CommonTypesMetaclass_getset[] = {
|
|
@@ -48283,7 +48298,7 @@ static CYTHON_INLINE PyObject *__Pyx_PyIter_Next_Plain(PyObject *iterator) {
|
|
|
48283
48298
|
}
|
|
48284
48299
|
|
|
48285
48300
|
/* PyObjectCallMethod1 */
|
|
48286
|
-
#if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000)
|
|
48301
|
+
#if !(CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000)))
|
|
48287
48302
|
static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
|
|
48288
48303
|
PyObject *result = __Pyx_PyObject_CallOneArg(method, arg);
|
|
48289
48304
|
Py_DECREF(method);
|
|
@@ -48291,7 +48306,7 @@ static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
|
|
|
48291
48306
|
}
|
|
48292
48307
|
#endif
|
|
48293
48308
|
static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) {
|
|
48294
|
-
#if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000
|
|
48309
|
+
#if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
|
|
48295
48310
|
PyObject *args[2] = {obj, arg};
|
|
48296
48311
|
(void) __Pyx_PyObject_GetMethod;
|
|
48297
48312
|
(void) __Pyx_PyObject_CallOneArg;
|
|
Binary file
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: selectolax
|
|
3
|
+
Version: 0.3.34
|
|
4
|
+
Summary: Fast HTML5 parser with CSS selectors.
|
|
5
|
+
Home-page: https://github.com/rushter/selectolax
|
|
6
|
+
Author: Artem Golubin
|
|
7
|
+
Author-email: Artem Golubin <me@rushter.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Project-URL: Repository, https://github.com/rushter/selectolax
|
|
10
|
+
Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
|
|
11
|
+
Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.md
|
|
12
|
+
Keywords: selectolax,html,parser,css,fast
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
15
|
+
Classifier: Topic :: Internet
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Natural Language :: English
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Description-Content-Type: text/x-rst
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Provides-Extra: cython
|
|
29
|
+
Requires-Dist: Cython; extra == "cython"
|
|
30
|
+
Dynamic: author
|
|
31
|
+
Dynamic: home-page
|
|
32
|
+
Dynamic: license-file
|
|
@@ -1,26 +1,26 @@
|
|
|
1
|
-
selectolax/__init__.py,sha256=
|
|
1
|
+
selectolax/__init__.py,sha256=iI6pQ10gimevS2gTf4K4_1cXh4NBRFj_5GjkmhrvU94,157
|
|
2
2
|
selectolax/base.pxi,sha256=zOj3BrCA71xd-mJFtkMIAglP4ZybfrHVoCoy6ljTBDQ,93
|
|
3
|
-
selectolax/lexbor.c,sha256=
|
|
4
|
-
selectolax/lexbor.cp310-win_arm64.pyd,sha256=
|
|
3
|
+
selectolax/lexbor.c,sha256=Kz7IFiUGbVTJvAH3WTwu188zD4xQm08Fs6ab6Jo6jyE,2419433
|
|
4
|
+
selectolax/lexbor.cp310-win_arm64.pyd,sha256=H2bvDCP1rB-nckBX8pn1iuuDV4DVmgiWeCRgPtCKBl8,3189760
|
|
5
5
|
selectolax/lexbor.pxd,sha256=BcqAzhlUVq0GVWiJHWXNhs4jY-gi6k0BELEnQtSYJAI,21720
|
|
6
|
-
selectolax/lexbor.pyi,sha256=
|
|
7
|
-
selectolax/lexbor.pyx,sha256=
|
|
8
|
-
selectolax/parser.c,sha256=
|
|
9
|
-
selectolax/parser.cp310-win_arm64.pyd,sha256=
|
|
6
|
+
selectolax/lexbor.pyi,sha256=dRNzLXJEbFRR7QcItuX8Ews9E9I6h6G4vA3X1hijzj4,28990
|
|
7
|
+
selectolax/lexbor.pyx,sha256=XLZ2vGwLoWdctnmU-gfizjD6tMjehR_bzNOapDJ_YOQ,12891
|
|
8
|
+
selectolax/parser.c,sha256=zUJAqFbI1vy5-cjgPwJVfYassgbP7Gdnr2eRYv5D3W4,2259231
|
|
9
|
+
selectolax/parser.cp310-win_arm64.pyd,sha256=ZaDfALnFRf-iGgHSzvwu_hmzi6hcTfSDoLZ3z8N8GUA,2132480
|
|
10
10
|
selectolax/parser.pxd,sha256=T7GoQdaOkhp_W2TBlRY0tZqom97PkHrytYaXQlyVnbI,25196
|
|
11
11
|
selectolax/parser.pyi,sha256=-qutpjrK1dD4rrl3SsHWQt2FT5lv6meaACkQzk1Bt6o,25612
|
|
12
12
|
selectolax/parser.pyx,sha256=nIWuhaEFRwlfo64WmgrSOM0A8mUw0eWw9j_fWyLV-Ro,14127
|
|
13
13
|
selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
selectolax/utils.pxi,sha256=hkzKfycdpwH1P-E_pP-9NTGsmiajt6EJNZSlkxlRapA,3623
|
|
15
15
|
selectolax/lexbor/attrs.pxi,sha256=d59V77aGkpp7YsYsd6t_z4-tRnUoQTJZKsvMC8nyttM,3978
|
|
16
|
-
selectolax/lexbor/node.pxi,sha256=
|
|
16
|
+
selectolax/lexbor/node.pxi,sha256=KODqPk3yZ_owwdSxqNr2Ih6qAOhu9CJ-jrHtqQJcWmY,33407
|
|
17
17
|
selectolax/lexbor/selection.pxi,sha256=BeUDypw5_P0CTmi-ACLcd7pK2NnG9ASrwWOdLdweAZY,7378
|
|
18
18
|
selectolax/lexbor/util.pxi,sha256=q2EYVNdnROg9y30mWpGwlNA0W00nJ7ZRNEEDrOEG14s,584
|
|
19
19
|
selectolax/modest/node.pxi,sha256=iX_yRPIPVkG0ALW7hEfmXiVperw6RjkSGATkxzLokz0,34691
|
|
20
20
|
selectolax/modest/selection.pxi,sha256=PfHUN1uuNA7YfcxTu7JZjhxevVbFRP1bHd3kyyFdO7E,6703
|
|
21
21
|
selectolax/modest/util.pxi,sha256=zab67Wzo8FcipA2VS8ClptaC19lZirbNqFEGQ3hW2Is,572
|
|
22
|
-
selectolax-0.3.
|
|
23
|
-
selectolax-0.3.
|
|
24
|
-
selectolax-0.3.
|
|
25
|
-
selectolax-0.3.
|
|
26
|
-
selectolax-0.3.
|
|
22
|
+
selectolax-0.3.34.dist-info/licenses/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
|
|
23
|
+
selectolax-0.3.34.dist-info/METADATA,sha256=rAqskRB9wMSn7tEZLxZswUJD4wFzN4fizyXjiBv4L4o,1318
|
|
24
|
+
selectolax-0.3.34.dist-info/WHEEL,sha256=3VTbmYeim8AL3qTE9jWZKYc9au-xBJE3sdtKTLGutAo,101
|
|
25
|
+
selectolax-0.3.34.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
|
|
26
|
+
selectolax-0.3.34.dist-info/RECORD,,
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: selectolax
|
|
3
|
-
Version: 0.3.33
|
|
4
|
-
Summary: Fast HTML5 parser with CSS selectors.
|
|
5
|
-
Home-page: https://github.com/rushter/selectolax
|
|
6
|
-
Author: Artem Golubin
|
|
7
|
-
Author-email: Artem Golubin <me@rushter.com>
|
|
8
|
-
License: MIT
|
|
9
|
-
Project-URL: Repository, https://github.com/rushter/selectolax
|
|
10
|
-
Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
|
|
11
|
-
Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.rst
|
|
12
|
-
Keywords: selectolax,html,parser,css,fast
|
|
13
|
-
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
-
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
15
|
-
Classifier: Topic :: Internet
|
|
16
|
-
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
-
Classifier: Intended Audience :: Developers
|
|
18
|
-
Classifier: Natural Language :: English
|
|
19
|
-
Classifier: Programming Language :: Python :: 3
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
-
Requires-Python: >=3.9
|
|
26
|
-
Description-Content-Type: text/x-rst
|
|
27
|
-
License-File: LICENSE
|
|
28
|
-
Provides-Extra: cython
|
|
29
|
-
Requires-Dist: Cython; extra == "cython"
|
|
30
|
-
Dynamic: author
|
|
31
|
-
Dynamic: home-page
|
|
32
|
-
Dynamic: license-file
|
|
33
|
-
|
|
34
|
-
.. image:: docs/logo.png
|
|
35
|
-
:alt: selectolax logo
|
|
36
|
-
|
|
37
|
-
-------------------------
|
|
38
|
-
|
|
39
|
-
.. image:: https://img.shields.io/pypi/v/selectolax.svg
|
|
40
|
-
:target: https://pypi.python.org/pypi/selectolax
|
|
41
|
-
|
|
42
|
-
A fast HTML5 parser with CSS selectors using `Modest <https://github.com/lexborisov/Modest/>`_ and
|
|
43
|
-
`Lexbor <https://github.com/lexbor/lexbor>`_ engines.
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
Installation
|
|
47
|
-
------------
|
|
48
|
-
From PyPI using pip:
|
|
49
|
-
|
|
50
|
-
.. code-block:: bash
|
|
51
|
-
|
|
52
|
-
pip install selectolax
|
|
53
|
-
|
|
54
|
-
If installation fails due to compilation errors, you may need to install `Cython <https://github.com/cython/cython>`_:
|
|
55
|
-
|
|
56
|
-
.. code-block:: bash
|
|
57
|
-
|
|
58
|
-
pip install selectolax[cython]
|
|
59
|
-
|
|
60
|
-
This usually happens when you try to install an outdated version of selectolax on a newer version of Python.
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
Development version from GitHub:
|
|
64
|
-
|
|
65
|
-
.. code-block:: bash
|
|
66
|
-
|
|
67
|
-
git clone --recursive https://github.com/rushter/selectolax
|
|
68
|
-
cd selectolax
|
|
69
|
-
pip install -r requirements_dev.txt
|
|
70
|
-
python setup.py install
|
|
71
|
-
|
|
72
|
-
How to compile selectolax while developing:
|
|
73
|
-
|
|
74
|
-
.. code-block:: bash
|
|
75
|
-
|
|
76
|
-
make clean
|
|
77
|
-
make dev
|
|
78
|
-
|
|
79
|
-
Basic examples
|
|
80
|
-
--------------
|
|
81
|
-
|
|
82
|
-
Here are some basic examples to get you started with selectolax:
|
|
83
|
-
|
|
84
|
-
Parsing HTML and extracting text:
|
|
85
|
-
|
|
86
|
-
.. code:: python
|
|
87
|
-
|
|
88
|
-
In [1]: from selectolax.parser import HTMLParser
|
|
89
|
-
...:
|
|
90
|
-
...: html = """
|
|
91
|
-
...: <h1 id="title" data-updated="20201101">Hi there</h1>
|
|
92
|
-
...: <div class="post">Lorem Ipsum is simply dummy text of the printing and typesetting industry. </div>
|
|
93
|
-
...: <div class="post">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div>
|
|
94
|
-
...: """
|
|
95
|
-
...: tree = HTMLParser(html)
|
|
96
|
-
|
|
97
|
-
In [2]: tree.css_first('h1#title').text()
|
|
98
|
-
Out[2]: 'Hi there'
|
|
99
|
-
|
|
100
|
-
In [3]: tree.css_first('h1#title').attributes
|
|
101
|
-
Out[3]: {'id': 'title', 'data-updated': '20201101'}
|
|
102
|
-
|
|
103
|
-
In [4]: [node.text() for node in tree.css('.post')]
|
|
104
|
-
Out[4]:
|
|
105
|
-
['Lorem Ipsum is simply dummy text of the printing and typesetting industry. ',
|
|
106
|
-
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.']
|
|
107
|
-
|
|
108
|
-
Using advanced CSS selectors:
|
|
109
|
-
|
|
110
|
-
.. code:: python
|
|
111
|
-
|
|
112
|
-
In [1]: html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5>text<p id=p6></div>"
|
|
113
|
-
...: selector = "div > :nth-child(2n+1):not(:has(a))"
|
|
114
|
-
|
|
115
|
-
In [2]: for node in HTMLParser(html).css(selector):
|
|
116
|
-
...: print(node.attributes, node.text(), node.tag)
|
|
117
|
-
...: print(node.parent.tag)
|
|
118
|
-
...: print(node.html)
|
|
119
|
-
...:
|
|
120
|
-
{'id': 'p1'} p
|
|
121
|
-
div
|
|
122
|
-
<p id="p1"></p>
|
|
123
|
-
{'id': 'p5'} text p
|
|
124
|
-
div
|
|
125
|
-
<p id="p5">text</p>
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
* `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
|
|
129
|
-
|
|
130
|
-
Available backends
|
|
131
|
-
------------------
|
|
132
|
-
|
|
133
|
-
Selectolax supports two backends: ``Modest`` and ``Lexbor``. By default, all examples use the Modest backend.
|
|
134
|
-
Most of the features between backends are almost identical, but there are still some differences.
|
|
135
|
-
|
|
136
|
-
As of 2024, the preferred backend is ``Lexbor``. The ``Modest`` backend is still available for compatibility reasons
|
|
137
|
-
and the underlying C library that selectolax uses is not maintained anymore.
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
To use ``lexbor``, just import the parser and use it in the similar way to the `HTMLParser`.
|
|
141
|
-
|
|
142
|
-
.. code:: python
|
|
143
|
-
|
|
144
|
-
In [1]: from selectolax.lexbor import LexborHTMLParser
|
|
145
|
-
|
|
146
|
-
In [2]: html = """
|
|
147
|
-
...: <title>Hi there</title>
|
|
148
|
-
...: <div id="updated">2021-08-15</div>
|
|
149
|
-
...: """
|
|
150
|
-
|
|
151
|
-
In [3]: parser = LexborHTMLParser(html)
|
|
152
|
-
In [4]: parser.root.css_first("#updated").text()
|
|
153
|
-
Out[4]: '2021-08-15'
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
Simple Benchmark
|
|
157
|
-
----------------
|
|
158
|
-
|
|
159
|
-
* Extract title, links, scripts and a meta tag from main pages of top 754 domains. See ``examples/benchmark.py`` for more information.
|
|
160
|
-
|
|
161
|
-
============================ ===========
|
|
162
|
-
Package Time
|
|
163
|
-
============================ ===========
|
|
164
|
-
Beautiful Soup (html.parser) 61.02 sec.
|
|
165
|
-
lxml / Beautiful Soup (lxml) 9.09 sec.
|
|
166
|
-
html5_parser 16.10 sec.
|
|
167
|
-
selectolax (Modest) 2.94 sec.
|
|
168
|
-
selectolax (Lexbor) 2.39 sec.
|
|
169
|
-
============================ ===========
|
|
170
|
-
|
|
171
|
-
Links
|
|
172
|
-
-----
|
|
173
|
-
|
|
174
|
-
* `selectolax API reference <https://selectolax.readthedocs.io/en/latest/index.html>`_
|
|
175
|
-
* `Video introduction to web scraping using selectolax <https://youtu.be/HpRsfpPuUzE>`_
|
|
176
|
-
* `How to Scrape 7k Products with Python using selectolax and httpx <https://www.youtube.com/watch?v=XpGvq755J2U>`_
|
|
177
|
-
* `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
|
|
178
|
-
* `Modest introduction <https://lexborisov.github.io/Modest/>`_
|
|
179
|
-
* `Modest benchmark <https://lexborisov.github.io/benchmark-html-parsers/>`_
|
|
180
|
-
* `Python benchmark <https://rushter.com/blog/python-fast-html-parser/>`_
|
|
181
|
-
* `Another Python benchmark <https://www.peterbe.com/plog/selectolax-or-pyquery>`_
|
|
182
|
-
|
|
183
|
-
License
|
|
184
|
-
-------
|
|
185
|
-
|
|
186
|
-
* Modest engine — `LGPL2.1 <https://github.com/lexborisov/Modest/blob/master/LICENSE>`_
|
|
187
|
-
* selectolax - `MIT <https://github.com/rushter/selectolax/blob/master/LICENSE>`_
|
|
File without changes
|
|
File without changes
|
|
File without changes
|