selectolax 0.3.33__cp313-cp313-macosx_11_0_arm64.whl → 0.3.34__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +1 -1
- selectolax/lexbor/node.pxi +6 -0
- selectolax/lexbor.c +1935 -1919
- selectolax/lexbor.cpython-313-darwin.so +0 -0
- selectolax/lexbor.pyi +12 -0
- selectolax/lexbor.pyx +5 -0
- selectolax/parser.c +47 -31
- selectolax/parser.cpython-313-darwin.so +0 -0
- selectolax-0.3.34.dist-info/METADATA +32 -0
- {selectolax-0.3.33.dist-info → selectolax-0.3.34.dist-info}/RECORD +13 -13
- selectolax-0.3.33.dist-info/METADATA +0 -187
- {selectolax-0.3.33.dist-info → selectolax-0.3.34.dist-info}/WHEEL +0 -0
- {selectolax-0.3.33.dist-info → selectolax-0.3.34.dist-info}/licenses/LICENSE +0 -0
- {selectolax-0.3.33.dist-info → selectolax-0.3.34.dist-info}/top_level.txt +0 -0
|
Binary file
|
selectolax/lexbor.pyi
CHANGED
|
@@ -145,6 +145,12 @@ class LexborNode:
|
|
|
145
145
|
Matches pattern `query` against HTML tree.
|
|
146
146
|
`CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
|
|
147
147
|
|
|
148
|
+
Special selectors:
|
|
149
|
+
|
|
150
|
+
- parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
|
|
151
|
+
- parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
|
|
152
|
+
|
|
153
|
+
|
|
148
154
|
Parameters
|
|
149
155
|
----------
|
|
150
156
|
query : str
|
|
@@ -665,6 +671,12 @@ class LexborHTMLParser:
|
|
|
665
671
|
Matches pattern `query` against HTML tree.
|
|
666
672
|
`CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
|
|
667
673
|
|
|
674
|
+
Special selectors:
|
|
675
|
+
|
|
676
|
+
- parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
|
|
677
|
+
- parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
|
|
678
|
+
|
|
679
|
+
|
|
668
680
|
Parameters
|
|
669
681
|
----------
|
|
670
682
|
query : str
|
selectolax/lexbor.pyx
CHANGED
|
@@ -169,6 +169,11 @@ cdef class LexborHTMLParser:
|
|
|
169
169
|
Matches pattern `query` against HTML tree.
|
|
170
170
|
`CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
|
|
171
171
|
|
|
172
|
+
Special selectors:
|
|
173
|
+
|
|
174
|
+
- parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
|
|
175
|
+
- parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
|
|
176
|
+
|
|
172
177
|
Parameters
|
|
173
178
|
----------
|
|
174
179
|
query : str
|
selectolax/parser.c
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
/* Generated by Cython 3.1.
|
|
1
|
+
/* Generated by Cython 3.1.3 */
|
|
2
2
|
|
|
3
3
|
/* BEGIN: Cython Metadata
|
|
4
4
|
{
|
|
@@ -22,7 +22,8 @@
|
|
|
22
22
|
"-Wno-unused-variable",
|
|
23
23
|
"-Wno-unused-function",
|
|
24
24
|
"-std=c99",
|
|
25
|
-
"-O2"
|
|
25
|
+
"-O2",
|
|
26
|
+
"-g0"
|
|
26
27
|
],
|
|
27
28
|
"include_dirs": [
|
|
28
29
|
"modest/include/"
|
|
@@ -188,8 +189,8 @@ END: Cython Metadata */
|
|
|
188
189
|
#elif PY_VERSION_HEX < 0x03080000
|
|
189
190
|
#error Cython requires Python 3.8+.
|
|
190
191
|
#else
|
|
191
|
-
#define __PYX_ABI_VERSION "
|
|
192
|
-
#define CYTHON_HEX_VERSION
|
|
192
|
+
#define __PYX_ABI_VERSION "3_1_3"
|
|
193
|
+
#define CYTHON_HEX_VERSION 0x030103F0
|
|
193
194
|
#define CYTHON_FUTURE_DIVISION 1
|
|
194
195
|
/* CModulePreamble */
|
|
195
196
|
#include <stddef.h>
|
|
@@ -552,6 +553,9 @@ END: Cython Metadata */
|
|
|
552
553
|
enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
|
|
553
554
|
#endif
|
|
554
555
|
#endif
|
|
556
|
+
#ifndef CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME
|
|
557
|
+
#define CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME 100
|
|
558
|
+
#endif
|
|
555
559
|
#ifndef __has_attribute
|
|
556
560
|
#define __has_attribute(x) 0
|
|
557
561
|
#endif
|
|
@@ -2774,22 +2778,22 @@ static int __Pyx__DelItemOnTypeDict(PyTypeObject *tp, PyObject *k);
|
|
|
2774
2778
|
static int __Pyx_setup_reduce(PyObject* type_obj);
|
|
2775
2779
|
|
|
2776
2780
|
/* TypeImport.proto */
|
|
2777
|
-
#ifndef
|
|
2778
|
-
#define
|
|
2781
|
+
#ifndef __PYX_HAVE_RT_ImportType_proto_3_1_3
|
|
2782
|
+
#define __PYX_HAVE_RT_ImportType_proto_3_1_3
|
|
2779
2783
|
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
|
|
2780
2784
|
#include <stdalign.h>
|
|
2781
2785
|
#endif
|
|
2782
2786
|
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L
|
|
2783
|
-
#define
|
|
2787
|
+
#define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) alignof(s)
|
|
2784
2788
|
#else
|
|
2785
|
-
#define
|
|
2789
|
+
#define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) sizeof(void*)
|
|
2786
2790
|
#endif
|
|
2787
|
-
enum
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
+
enum __Pyx_ImportType_CheckSize_3_1_3 {
|
|
2792
|
+
__Pyx_ImportType_CheckSize_Error_3_1_3 = 0,
|
|
2793
|
+
__Pyx_ImportType_CheckSize_Warn_3_1_3 = 1,
|
|
2794
|
+
__Pyx_ImportType_CheckSize_Ignore_3_1_3 = 2
|
|
2791
2795
|
};
|
|
2792
|
-
static PyTypeObject *
|
|
2796
|
+
static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size);
|
|
2793
2797
|
#endif
|
|
2794
2798
|
|
|
2795
2799
|
/* FetchSharedCythonModule.proto */
|
|
@@ -3573,7 +3577,7 @@ static const char __pyx_k_Node_strip_tags_line_554[] = "Node.strip_tags (line 55
|
|
|
3573
3577
|
static const char __pyx_k_Selector___reduce_cython[] = "Selector.__reduce_cython__";
|
|
3574
3578
|
static const char __pyx_k_Tag_name_cannot_be_empty[] = "Tag name cannot be empty";
|
|
3575
3579
|
static const char __pyx_k_document_no_head_no_body[] = "document_no_head_no_body";
|
|
3576
|
-
static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200
|
|
3580
|
+
static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200\177\220h\230k\250\033\260A\330\010\r\210^\2301\330\010\016\320\016!\320!v\320vx\320xy\330\004\023\2208\2308\2401\240A\330\004\007\200|\2207\230!\330\010*\250!\250;\260n\300A\330\004\013\2101";
|
|
3577
3581
|
static const char __pyx_k_Node_unwrap_tags_line_580[] = "Node.unwrap_tags (line 580)";
|
|
3578
3582
|
static const char __pyx_k_Node_unwrap_tags_line_768[] = "Node.unwrap_tags (line 768)";
|
|
3579
3583
|
static const char __pyx_k_any_attribute_longer_than[] = "any_attribute_longer_than";
|
|
@@ -38633,27 +38637,27 @@ static int __Pyx_modinit_type_import_code(__pyx_mstatetype *__pyx_mstate) {
|
|
|
38633
38637
|
/*--- Type import code ---*/
|
|
38634
38638
|
__pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(8, 8, __pyx_L1_error)
|
|
38635
38639
|
__Pyx_GOTREF(__pyx_t_1);
|
|
38636
|
-
__pyx_mstate->__pyx_ptype_7cpython_4bool_bool =
|
|
38640
|
+
__pyx_mstate->__pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool",
|
|
38637
38641
|
#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
|
|
38638
|
-
sizeof(PyLongObject),
|
|
38642
|
+
sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
|
|
38639
38643
|
#elif CYTHON_COMPILING_IN_LIMITED_API
|
|
38640
38644
|
0, 0,
|
|
38641
38645
|
#else
|
|
38642
|
-
sizeof(PyLongObject),
|
|
38646
|
+
sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
|
|
38643
38647
|
#endif
|
|
38644
|
-
|
|
38648
|
+
__Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(8, 8, __pyx_L1_error)
|
|
38645
38649
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
38646
38650
|
__pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(9, 9, __pyx_L1_error)
|
|
38647
38651
|
__Pyx_GOTREF(__pyx_t_1);
|
|
38648
|
-
__pyx_mstate->__pyx_ptype_7cpython_4type_type =
|
|
38652
|
+
__pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
|
|
38649
38653
|
#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
|
|
38650
|
-
sizeof(PyTypeObject),
|
|
38654
|
+
sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyTypeObject),
|
|
38651
38655
|
#elif CYTHON_COMPILING_IN_LIMITED_API
|
|
38652
38656
|
0, 0,
|
|
38653
38657
|
#else
|
|
38654
|
-
sizeof(PyHeapTypeObject),
|
|
38658
|
+
sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyHeapTypeObject),
|
|
38655
38659
|
#endif
|
|
38656
|
-
|
|
38660
|
+
__Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(9, 9, __pyx_L1_error)
|
|
38657
38661
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
38658
38662
|
__Pyx_RefNannyFinishContext();
|
|
38659
38663
|
return 0;
|
|
@@ -43848,6 +43852,13 @@ try_unpack:
|
|
|
43848
43852
|
|
|
43849
43853
|
/* PyObjectCallMethod0 */
|
|
43850
43854
|
static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) {
|
|
43855
|
+
#if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
|
|
43856
|
+
PyObject *args[1] = {obj};
|
|
43857
|
+
(void) __Pyx_PyObject_GetMethod;
|
|
43858
|
+
(void) __Pyx_PyObject_CallOneArg;
|
|
43859
|
+
(void) __Pyx_PyObject_CallNoArg;
|
|
43860
|
+
return PyObject_VectorcallMethod(method_name, args, 1 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
|
|
43861
|
+
#else
|
|
43851
43862
|
PyObject *method = NULL, *result = NULL;
|
|
43852
43863
|
int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method);
|
|
43853
43864
|
if (likely(is_method)) {
|
|
@@ -43860,6 +43871,7 @@ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name
|
|
|
43860
43871
|
Py_DECREF(method);
|
|
43861
43872
|
bad:
|
|
43862
43873
|
return result;
|
|
43874
|
+
#endif
|
|
43863
43875
|
}
|
|
43864
43876
|
|
|
43865
43877
|
/* RaiseNeedMoreValuesToUnpack */
|
|
@@ -44670,6 +44682,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
|
|
|
44670
44682
|
changed = 1;
|
|
44671
44683
|
}
|
|
44672
44684
|
#endif // CYTHON_METH_FASTCALL
|
|
44685
|
+
#if !CYTHON_COMPILING_IN_PYPY
|
|
44673
44686
|
else if (strcmp(memb->name, "__module__") == 0) {
|
|
44674
44687
|
PyObject *descr;
|
|
44675
44688
|
assert(memb->type == T_OBJECT);
|
|
@@ -44684,11 +44697,13 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
|
|
|
44684
44697
|
}
|
|
44685
44698
|
changed = 1;
|
|
44686
44699
|
}
|
|
44700
|
+
#endif // !CYTHON_COMPILING_IN_PYPY
|
|
44687
44701
|
}
|
|
44688
44702
|
memb++;
|
|
44689
44703
|
}
|
|
44690
44704
|
}
|
|
44691
44705
|
#endif // !CYTHON_COMPILING_IN_LIMITED_API
|
|
44706
|
+
#if !CYTHON_COMPILING_IN_PYPY
|
|
44692
44707
|
slot = spec->slots;
|
|
44693
44708
|
while (slot && slot->slot && slot->slot != Py_tp_getset)
|
|
44694
44709
|
slot++;
|
|
@@ -44720,6 +44735,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
|
|
|
44720
44735
|
++getset;
|
|
44721
44736
|
}
|
|
44722
44737
|
}
|
|
44738
|
+
#endif // !CYTHON_COMPILING_IN_PYPY
|
|
44723
44739
|
if (changed)
|
|
44724
44740
|
PyType_Modified(type);
|
|
44725
44741
|
#endif // PY_VERSION_HEX > 0x030900B1
|
|
@@ -45153,10 +45169,10 @@ __PYX_GOOD:
|
|
|
45153
45169
|
}
|
|
45154
45170
|
|
|
45155
45171
|
/* TypeImport */
|
|
45156
|
-
#ifndef
|
|
45157
|
-
#define
|
|
45158
|
-
static PyTypeObject *
|
|
45159
|
-
size_t size, size_t alignment, enum
|
|
45172
|
+
#ifndef __PYX_HAVE_RT_ImportType_3_1_3
|
|
45173
|
+
#define __PYX_HAVE_RT_ImportType_3_1_3
|
|
45174
|
+
static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject *module, const char *module_name, const char *class_name,
|
|
45175
|
+
size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size)
|
|
45160
45176
|
{
|
|
45161
45177
|
PyObject *result = 0;
|
|
45162
45178
|
Py_ssize_t basicsize;
|
|
@@ -45212,7 +45228,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
|
|
|
45212
45228
|
module_name, class_name, size, basicsize+itemsize);
|
|
45213
45229
|
goto bad;
|
|
45214
45230
|
}
|
|
45215
|
-
if (check_size ==
|
|
45231
|
+
if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_3 &&
|
|
45216
45232
|
((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) {
|
|
45217
45233
|
PyErr_Format(PyExc_ValueError,
|
|
45218
45234
|
"%.200s.%.200s size changed, may indicate binary incompatibility. "
|
|
@@ -45220,7 +45236,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
|
|
|
45220
45236
|
module_name, class_name, size, basicsize, basicsize+itemsize);
|
|
45221
45237
|
goto bad;
|
|
45222
45238
|
}
|
|
45223
|
-
else if (check_size ==
|
|
45239
|
+
else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_3 && (size_t)basicsize > size) {
|
|
45224
45240
|
if (PyErr_WarnFormat(NULL, 0,
|
|
45225
45241
|
"%.200s.%.200s size changed, may indicate binary incompatibility. "
|
|
45226
45242
|
"Expected %zd from C header, got %zd from PyObject",
|
|
@@ -45361,7 +45377,7 @@ bad:
|
|
|
45361
45377
|
}
|
|
45362
45378
|
|
|
45363
45379
|
/* CommonTypesMetaclass */
|
|
45364
|
-
PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
|
|
45380
|
+
static PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
|
|
45365
45381
|
return PyUnicode_FromString(__PYX_ABI_MODULE_NAME);
|
|
45366
45382
|
}
|
|
45367
45383
|
static PyGetSetDef __pyx_CommonTypesMetaclass_getset[] = {
|
|
@@ -48290,7 +48306,7 @@ static CYTHON_INLINE PyObject *__Pyx_PyIter_Next_Plain(PyObject *iterator) {
|
|
|
48290
48306
|
}
|
|
48291
48307
|
|
|
48292
48308
|
/* PyObjectCallMethod1 */
|
|
48293
|
-
#if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000)
|
|
48309
|
+
#if !(CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000)))
|
|
48294
48310
|
static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
|
|
48295
48311
|
PyObject *result = __Pyx_PyObject_CallOneArg(method, arg);
|
|
48296
48312
|
Py_DECREF(method);
|
|
@@ -48298,7 +48314,7 @@ static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
|
|
|
48298
48314
|
}
|
|
48299
48315
|
#endif
|
|
48300
48316
|
static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) {
|
|
48301
|
-
#if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000
|
|
48317
|
+
#if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
|
|
48302
48318
|
PyObject *args[2] = {obj, arg};
|
|
48303
48319
|
(void) __Pyx_PyObject_GetMethod;
|
|
48304
48320
|
(void) __Pyx_PyObject_CallOneArg;
|
|
Binary file
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: selectolax
|
|
3
|
+
Version: 0.3.34
|
|
4
|
+
Summary: Fast HTML5 parser with CSS selectors.
|
|
5
|
+
Home-page: https://github.com/rushter/selectolax
|
|
6
|
+
Author: Artem Golubin
|
|
7
|
+
Author-email: Artem Golubin <me@rushter.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Project-URL: Repository, https://github.com/rushter/selectolax
|
|
10
|
+
Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
|
|
11
|
+
Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.md
|
|
12
|
+
Keywords: selectolax,html,parser,css,fast
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
15
|
+
Classifier: Topic :: Internet
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Natural Language :: English
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Description-Content-Type: text/x-rst
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Provides-Extra: cython
|
|
29
|
+
Requires-Dist: Cython; extra == "cython"
|
|
30
|
+
Dynamic: author
|
|
31
|
+
Dynamic: home-page
|
|
32
|
+
Dynamic: license-file
|
|
@@ -1,15 +1,20 @@
|
|
|
1
|
-
selectolax/
|
|
1
|
+
selectolax-0.3.34.dist-info/RECORD,,
|
|
2
|
+
selectolax-0.3.34.dist-info/WHEEL,sha256=oqGJCpG61FZJmvyZ3C_0aCv-2mdfcY9e3fXvyUNmWfM,136
|
|
3
|
+
selectolax-0.3.34.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
|
|
4
|
+
selectolax-0.3.34.dist-info/METADATA,sha256=fGDgNbvrKw6lxT4PHH__IVb0blQ7eFbIEoaKV8ZWDY0,1286
|
|
5
|
+
selectolax-0.3.34.dist-info/licenses/LICENSE,sha256=MYCcM-Cv_rC2-lQiwDumin0E-rMXAhK-qIGGA29434Y,1077
|
|
6
|
+
selectolax/lexbor.pyi,sha256=peGOaeg01KBrh585hFk6z6X95WqrqjEdjKYlh88jZPs,28113
|
|
2
7
|
selectolax/parser.pyx,sha256=i2bCslGrqYARZvCab4mIdRFyA9k7AuvJSVjZsqrT4SM,13684
|
|
3
|
-
selectolax/__init__.py,sha256=
|
|
8
|
+
selectolax/__init__.py,sha256=d0wQ0XsnuynVKlPMOAMCB0yBbra7iisjAESTv1WTgEI,149
|
|
4
9
|
selectolax/lexbor.pxd,sha256=FTx2SphXsIHCCmEouzGSp9j2CBgrOc12Wf9E7DPIbrI,21144
|
|
5
|
-
selectolax/lexbor.pyx,sha256=
|
|
10
|
+
selectolax/lexbor.pyx,sha256=0LuaQJczlwSYeEq3GzljhcWUOIkWZ7ymCYZP5Kf44HY,12503
|
|
6
11
|
selectolax/parser.pyi,sha256=tCCAWcaeBJrhctUKpdTKVX6z4_tsxPz8D5GpbxO3Hz0,24845
|
|
7
12
|
selectolax/utils.pxi,sha256=_g-ZLprPgbqv7BLs-WEe8IhbDd_QTcfirz_NEyR1Yww,3506
|
|
8
|
-
selectolax/lexbor.c,sha256=
|
|
9
|
-
selectolax/parser.cpython-313-darwin.so,sha256=
|
|
13
|
+
selectolax/lexbor.c,sha256=oAopXm-RClFKji_ud7VrMWoU71Rq6FCgFVPao9QkwKg,2365835
|
|
14
|
+
selectolax/parser.cpython-313-darwin.so,sha256=KfYNQVP0ncUT0-adA7AWO6Da3TAMUa9P0vp8jiNCQ0Y,2842144
|
|
10
15
|
selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
selectolax/lexbor.cpython-313-darwin.so,sha256=
|
|
12
|
-
selectolax/parser.c,sha256=
|
|
16
|
+
selectolax/lexbor.cpython-313-darwin.so,sha256=sz8pp215oqr5GhRSrratyXesslMrk9xN56LxQrNAQ8w,3527904
|
|
17
|
+
selectolax/parser.c,sha256=unTLg-rvs-I3YJfMZMC270A9atuSnLRjP-oEu9Wvhvk,2208895
|
|
13
18
|
selectolax/base.pxi,sha256=eiPKlY9gG3l49qJoRQVLl1Ljza6z1k0A-met6sDPcqE,89
|
|
14
19
|
selectolax/parser.pxd,sha256=BQSlDGibVfqFDhfqX6l5sBnfkHEETxlj-eSpGWERKEs,24618
|
|
15
20
|
selectolax/modest/selection.pxi,sha256=m4GDpl0aI7lSWHFeBBheroUKDrZgJcc6uVubtzrXL1M,6508
|
|
@@ -17,10 +22,5 @@ selectolax/modest/util.pxi,sha256=di9cLmAyuGFXmiuptZ7Fz1SgkCf7hmiZLnpKCKEKsUc,55
|
|
|
17
22
|
selectolax/modest/node.pxi,sha256=9D-o4Kdd-e_ebc-z8eAVxlVDdmfN5g2fUn6rj4sUSxg,33703
|
|
18
23
|
selectolax/lexbor/selection.pxi,sha256=aYna6zEH9vBGxtcYe8AdOGseEh4ieymFt3Zu4cQssTs,7185
|
|
19
24
|
selectolax/lexbor/util.pxi,sha256=hqMQU1O_5O82ThjUzk8NxQPl-Kg29DDGFFpC46LcejI,564
|
|
20
|
-
selectolax/lexbor/node.pxi,sha256=
|
|
25
|
+
selectolax/lexbor/node.pxi,sha256=f-Illaw8ozBBzLOWDbRBCaUaK4wH4JgSHuc2xxYsFtI,32467
|
|
21
26
|
selectolax/lexbor/attrs.pxi,sha256=eH90zJYHicffTzC7peIitHkOqyIw3xzomhJHxJv9hP8,3858
|
|
22
|
-
selectolax-0.3.33.dist-info/RECORD,,
|
|
23
|
-
selectolax-0.3.33.dist-info/WHEEL,sha256=oqGJCpG61FZJmvyZ3C_0aCv-2mdfcY9e3fXvyUNmWfM,136
|
|
24
|
-
selectolax-0.3.33.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
|
|
25
|
-
selectolax-0.3.33.dist-info/METADATA,sha256=sQZmiyZ9oiWmyUNzYHd_cnkmVPjk7cK8ef0Wyyf7h_A,6215
|
|
26
|
-
selectolax-0.3.33.dist-info/licenses/LICENSE,sha256=MYCcM-Cv_rC2-lQiwDumin0E-rMXAhK-qIGGA29434Y,1077
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: selectolax
|
|
3
|
-
Version: 0.3.33
|
|
4
|
-
Summary: Fast HTML5 parser with CSS selectors.
|
|
5
|
-
Home-page: https://github.com/rushter/selectolax
|
|
6
|
-
Author: Artem Golubin
|
|
7
|
-
Author-email: Artem Golubin <me@rushter.com>
|
|
8
|
-
License: MIT
|
|
9
|
-
Project-URL: Repository, https://github.com/rushter/selectolax
|
|
10
|
-
Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
|
|
11
|
-
Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.rst
|
|
12
|
-
Keywords: selectolax,html,parser,css,fast
|
|
13
|
-
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
-
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
15
|
-
Classifier: Topic :: Internet
|
|
16
|
-
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
-
Classifier: Intended Audience :: Developers
|
|
18
|
-
Classifier: Natural Language :: English
|
|
19
|
-
Classifier: Programming Language :: Python :: 3
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
-
Requires-Python: >=3.9
|
|
26
|
-
Description-Content-Type: text/x-rst
|
|
27
|
-
License-File: LICENSE
|
|
28
|
-
Provides-Extra: cython
|
|
29
|
-
Requires-Dist: Cython; extra == "cython"
|
|
30
|
-
Dynamic: author
|
|
31
|
-
Dynamic: home-page
|
|
32
|
-
Dynamic: license-file
|
|
33
|
-
|
|
34
|
-
.. image:: docs/logo.png
|
|
35
|
-
:alt: selectolax logo
|
|
36
|
-
|
|
37
|
-
-------------------------
|
|
38
|
-
|
|
39
|
-
.. image:: https://img.shields.io/pypi/v/selectolax.svg
|
|
40
|
-
:target: https://pypi.python.org/pypi/selectolax
|
|
41
|
-
|
|
42
|
-
A fast HTML5 parser with CSS selectors using `Modest <https://github.com/lexborisov/Modest/>`_ and
|
|
43
|
-
`Lexbor <https://github.com/lexbor/lexbor>`_ engines.
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
Installation
|
|
47
|
-
------------
|
|
48
|
-
From PyPI using pip:
|
|
49
|
-
|
|
50
|
-
.. code-block:: bash
|
|
51
|
-
|
|
52
|
-
pip install selectolax
|
|
53
|
-
|
|
54
|
-
If installation fails due to compilation errors, you may need to install `Cython <https://github.com/cython/cython>`_:
|
|
55
|
-
|
|
56
|
-
.. code-block:: bash
|
|
57
|
-
|
|
58
|
-
pip install selectolax[cython]
|
|
59
|
-
|
|
60
|
-
This usually happens when you try to install an outdated version of selectolax on a newer version of Python.
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
Development version from GitHub:
|
|
64
|
-
|
|
65
|
-
.. code-block:: bash
|
|
66
|
-
|
|
67
|
-
git clone --recursive https://github.com/rushter/selectolax
|
|
68
|
-
cd selectolax
|
|
69
|
-
pip install -r requirements_dev.txt
|
|
70
|
-
python setup.py install
|
|
71
|
-
|
|
72
|
-
How to compile selectolax while developing:
|
|
73
|
-
|
|
74
|
-
.. code-block:: bash
|
|
75
|
-
|
|
76
|
-
make clean
|
|
77
|
-
make dev
|
|
78
|
-
|
|
79
|
-
Basic examples
|
|
80
|
-
--------------
|
|
81
|
-
|
|
82
|
-
Here are some basic examples to get you started with selectolax:
|
|
83
|
-
|
|
84
|
-
Parsing HTML and extracting text:
|
|
85
|
-
|
|
86
|
-
.. code:: python
|
|
87
|
-
|
|
88
|
-
In [1]: from selectolax.parser import HTMLParser
|
|
89
|
-
...:
|
|
90
|
-
...: html = """
|
|
91
|
-
...: <h1 id="title" data-updated="20201101">Hi there</h1>
|
|
92
|
-
...: <div class="post">Lorem Ipsum is simply dummy text of the printing and typesetting industry. </div>
|
|
93
|
-
...: <div class="post">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div>
|
|
94
|
-
...: """
|
|
95
|
-
...: tree = HTMLParser(html)
|
|
96
|
-
|
|
97
|
-
In [2]: tree.css_first('h1#title').text()
|
|
98
|
-
Out[2]: 'Hi there'
|
|
99
|
-
|
|
100
|
-
In [3]: tree.css_first('h1#title').attributes
|
|
101
|
-
Out[3]: {'id': 'title', 'data-updated': '20201101'}
|
|
102
|
-
|
|
103
|
-
In [4]: [node.text() for node in tree.css('.post')]
|
|
104
|
-
Out[4]:
|
|
105
|
-
['Lorem Ipsum is simply dummy text of the printing and typesetting industry. ',
|
|
106
|
-
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.']
|
|
107
|
-
|
|
108
|
-
Using advanced CSS selectors:
|
|
109
|
-
|
|
110
|
-
.. code:: python
|
|
111
|
-
|
|
112
|
-
In [1]: html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5>text<p id=p6></div>"
|
|
113
|
-
...: selector = "div > :nth-child(2n+1):not(:has(a))"
|
|
114
|
-
|
|
115
|
-
In [2]: for node in HTMLParser(html).css(selector):
|
|
116
|
-
...: print(node.attributes, node.text(), node.tag)
|
|
117
|
-
...: print(node.parent.tag)
|
|
118
|
-
...: print(node.html)
|
|
119
|
-
...:
|
|
120
|
-
{'id': 'p1'} p
|
|
121
|
-
div
|
|
122
|
-
<p id="p1"></p>
|
|
123
|
-
{'id': 'p5'} text p
|
|
124
|
-
div
|
|
125
|
-
<p id="p5">text</p>
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
* `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
|
|
129
|
-
|
|
130
|
-
Available backends
|
|
131
|
-
------------------
|
|
132
|
-
|
|
133
|
-
Selectolax supports two backends: ``Modest`` and ``Lexbor``. By default, all examples use the Modest backend.
|
|
134
|
-
Most of the features between backends are almost identical, but there are still some differences.
|
|
135
|
-
|
|
136
|
-
As of 2024, the preferred backend is ``Lexbor``. The ``Modest`` backend is still available for compatibility reasons
|
|
137
|
-
and the underlying C library that selectolax uses is not maintained anymore.
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
To use ``lexbor``, just import the parser and use it in the similar way to the `HTMLParser`.
|
|
141
|
-
|
|
142
|
-
.. code:: python
|
|
143
|
-
|
|
144
|
-
In [1]: from selectolax.lexbor import LexborHTMLParser
|
|
145
|
-
|
|
146
|
-
In [2]: html = """
|
|
147
|
-
...: <title>Hi there</title>
|
|
148
|
-
...: <div id="updated">2021-08-15</div>
|
|
149
|
-
...: """
|
|
150
|
-
|
|
151
|
-
In [3]: parser = LexborHTMLParser(html)
|
|
152
|
-
In [4]: parser.root.css_first("#updated").text()
|
|
153
|
-
Out[4]: '2021-08-15'
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
Simple Benchmark
|
|
157
|
-
----------------
|
|
158
|
-
|
|
159
|
-
* Extract title, links, scripts and a meta tag from main pages of top 754 domains. See ``examples/benchmark.py`` for more information.
|
|
160
|
-
|
|
161
|
-
============================ ===========
|
|
162
|
-
Package Time
|
|
163
|
-
============================ ===========
|
|
164
|
-
Beautiful Soup (html.parser) 61.02 sec.
|
|
165
|
-
lxml / Beautiful Soup (lxml) 9.09 sec.
|
|
166
|
-
html5_parser 16.10 sec.
|
|
167
|
-
selectolax (Modest) 2.94 sec.
|
|
168
|
-
selectolax (Lexbor) 2.39 sec.
|
|
169
|
-
============================ ===========
|
|
170
|
-
|
|
171
|
-
Links
|
|
172
|
-
-----
|
|
173
|
-
|
|
174
|
-
* `selectolax API reference <https://selectolax.readthedocs.io/en/latest/index.html>`_
|
|
175
|
-
* `Video introduction to web scraping using selectolax <https://youtu.be/HpRsfpPuUzE>`_
|
|
176
|
-
* `How to Scrape 7k Products with Python using selectolax and httpx <https://www.youtube.com/watch?v=XpGvq755J2U>`_
|
|
177
|
-
* `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
|
|
178
|
-
* `Modest introduction <https://lexborisov.github.io/Modest/>`_
|
|
179
|
-
* `Modest benchmark <https://lexborisov.github.io/benchmark-html-parsers/>`_
|
|
180
|
-
* `Python benchmark <https://rushter.com/blog/python-fast-html-parser/>`_
|
|
181
|
-
* `Another Python benchmark <https://www.peterbe.com/plog/selectolax-or-pyquery>`_
|
|
182
|
-
|
|
183
|
-
License
|
|
184
|
-
-------
|
|
185
|
-
|
|
186
|
-
* Modest engine — `LGPL2.1 <https://github.com/lexborisov/Modest/blob/master/LICENSE>`_
|
|
187
|
-
* selectolax - `MIT <https://github.com/rushter/selectolax/blob/master/LICENSE>`_
|
|
File without changes
|
|
File without changes
|
|
File without changes
|