selectolax 0.3.33__cp313-cp313-macosx_11_0_arm64.whl → 0.3.34__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

Binary file
selectolax/lexbor.pyi CHANGED
@@ -145,6 +145,12 @@ class LexborNode:
145
145
  Matches pattern `query` against HTML tree.
146
146
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
147
147
 
148
+ Special selectors:
149
+
150
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
151
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
152
+
153
+
148
154
  Parameters
149
155
  ----------
150
156
  query : str
@@ -665,6 +671,12 @@ class LexborHTMLParser:
665
671
  Matches pattern `query` against HTML tree.
666
672
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
667
673
 
674
+ Special selectors:
675
+
676
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
677
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
678
+
679
+
668
680
  Parameters
669
681
  ----------
670
682
  query : str
selectolax/lexbor.pyx CHANGED
@@ -169,6 +169,11 @@ cdef class LexborHTMLParser:
169
169
  Matches pattern `query` against HTML tree.
170
170
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
171
171
 
172
+ Special selectors:
173
+
174
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
175
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
176
+
172
177
  Parameters
173
178
  ----------
174
179
  query : str
selectolax/parser.c CHANGED
@@ -1,4 +1,4 @@
1
- /* Generated by Cython 3.1.2 */
1
+ /* Generated by Cython 3.1.3 */
2
2
 
3
3
  /* BEGIN: Cython Metadata
4
4
  {
@@ -22,7 +22,8 @@
22
22
  "-Wno-unused-variable",
23
23
  "-Wno-unused-function",
24
24
  "-std=c99",
25
- "-O2"
25
+ "-O2",
26
+ "-g0"
26
27
  ],
27
28
  "include_dirs": [
28
29
  "modest/include/"
@@ -188,8 +189,8 @@ END: Cython Metadata */
188
189
  #elif PY_VERSION_HEX < 0x03080000
189
190
  #error Cython requires Python 3.8+.
190
191
  #else
191
- #define __PYX_ABI_VERSION "3_1_2"
192
- #define CYTHON_HEX_VERSION 0x030102F0
192
+ #define __PYX_ABI_VERSION "3_1_3"
193
+ #define CYTHON_HEX_VERSION 0x030103F0
193
194
  #define CYTHON_FUTURE_DIVISION 1
194
195
  /* CModulePreamble */
195
196
  #include <stddef.h>
@@ -552,6 +553,9 @@ END: Cython Metadata */
552
553
  enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
553
554
  #endif
554
555
  #endif
556
+ #ifndef CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME
557
+ #define CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME 100
558
+ #endif
555
559
  #ifndef __has_attribute
556
560
  #define __has_attribute(x) 0
557
561
  #endif
@@ -2774,22 +2778,22 @@ static int __Pyx__DelItemOnTypeDict(PyTypeObject *tp, PyObject *k);
2774
2778
  static int __Pyx_setup_reduce(PyObject* type_obj);
2775
2779
 
2776
2780
  /* TypeImport.proto */
2777
- #ifndef __PYX_HAVE_RT_ImportType_proto_3_1_2
2778
- #define __PYX_HAVE_RT_ImportType_proto_3_1_2
2781
+ #ifndef __PYX_HAVE_RT_ImportType_proto_3_1_3
2782
+ #define __PYX_HAVE_RT_ImportType_proto_3_1_3
2779
2783
  #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
2780
2784
  #include <stdalign.h>
2781
2785
  #endif
2782
2786
  #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L
2783
- #define __PYX_GET_STRUCT_ALIGNMENT_3_1_2(s) alignof(s)
2787
+ #define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) alignof(s)
2784
2788
  #else
2785
- #define __PYX_GET_STRUCT_ALIGNMENT_3_1_2(s) sizeof(void*)
2789
+ #define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) sizeof(void*)
2786
2790
  #endif
2787
- enum __Pyx_ImportType_CheckSize_3_1_2 {
2788
- __Pyx_ImportType_CheckSize_Error_3_1_2 = 0,
2789
- __Pyx_ImportType_CheckSize_Warn_3_1_2 = 1,
2790
- __Pyx_ImportType_CheckSize_Ignore_3_1_2 = 2
2791
+ enum __Pyx_ImportType_CheckSize_3_1_3 {
2792
+ __Pyx_ImportType_CheckSize_Error_3_1_3 = 0,
2793
+ __Pyx_ImportType_CheckSize_Warn_3_1_3 = 1,
2794
+ __Pyx_ImportType_CheckSize_Ignore_3_1_3 = 2
2791
2795
  };
2792
- static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_2 check_size);
2796
+ static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size);
2793
2797
  #endif
2794
2798
 
2795
2799
  /* FetchSharedCythonModule.proto */
@@ -3573,7 +3577,7 @@ static const char __pyx_k_Node_strip_tags_line_554[] = "Node.strip_tags (line 55
3573
3577
  static const char __pyx_k_Selector___reduce_cython[] = "Selector.__reduce_cython__";
3574
3578
  static const char __pyx_k_Tag_name_cannot_be_empty[] = "Tag name cannot be empty";
3575
3579
  static const char __pyx_k_document_no_head_no_body[] = "document_no_head_no_body";
3576
- static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200\220h\230k\250\033\260A\330\010\r\210^\2301\330\010\016\320\016!\320!v\320vx\320xy\330\004\023\2208\2308\2401\240A\330\004\007\200|\2207\230!\330\010*\250!\250;\260n\300A\330\004\013\2101";
3580
+ static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200\177\220h\230k\250\033\260A\330\010\r\210^\2301\330\010\016\320\016!\320!v\320vx\320xy\330\004\023\2208\2308\2401\240A\330\004\007\200|\2207\230!\330\010*\250!\250;\260n\300A\330\004\013\2101";
3577
3581
  static const char __pyx_k_Node_unwrap_tags_line_580[] = "Node.unwrap_tags (line 580)";
3578
3582
  static const char __pyx_k_Node_unwrap_tags_line_768[] = "Node.unwrap_tags (line 768)";
3579
3583
  static const char __pyx_k_any_attribute_longer_than[] = "any_attribute_longer_than";
@@ -38633,27 +38637,27 @@ static int __Pyx_modinit_type_import_code(__pyx_mstatetype *__pyx_mstate) {
38633
38637
  /*--- Type import code ---*/
38634
38638
  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(8, 8, __pyx_L1_error)
38635
38639
  __Pyx_GOTREF(__pyx_t_1);
38636
- __pyx_mstate->__pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_1_2(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool",
38640
+ __pyx_mstate->__pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool",
38637
38641
  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
38638
- sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyLongObject),
38642
+ sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
38639
38643
  #elif CYTHON_COMPILING_IN_LIMITED_API
38640
38644
  0, 0,
38641
38645
  #else
38642
- sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyLongObject),
38646
+ sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
38643
38647
  #endif
38644
- __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(8, 8, __pyx_L1_error)
38648
+ __Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(8, 8, __pyx_L1_error)
38645
38649
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
38646
38650
  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(9, 9, __pyx_L1_error)
38647
38651
  __Pyx_GOTREF(__pyx_t_1);
38648
- __pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_2(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
38652
+ __pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
38649
38653
  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
38650
- sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyTypeObject),
38654
+ sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyTypeObject),
38651
38655
  #elif CYTHON_COMPILING_IN_LIMITED_API
38652
38656
  0, 0,
38653
38657
  #else
38654
- sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyHeapTypeObject),
38658
+ sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyHeapTypeObject),
38655
38659
  #endif
38656
- __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(9, 9, __pyx_L1_error)
38660
+ __Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(9, 9, __pyx_L1_error)
38657
38661
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
38658
38662
  __Pyx_RefNannyFinishContext();
38659
38663
  return 0;
@@ -43848,6 +43852,13 @@ try_unpack:
43848
43852
 
43849
43853
  /* PyObjectCallMethod0 */
43850
43854
  static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) {
43855
+ #if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
43856
+ PyObject *args[1] = {obj};
43857
+ (void) __Pyx_PyObject_GetMethod;
43858
+ (void) __Pyx_PyObject_CallOneArg;
43859
+ (void) __Pyx_PyObject_CallNoArg;
43860
+ return PyObject_VectorcallMethod(method_name, args, 1 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
43861
+ #else
43851
43862
  PyObject *method = NULL, *result = NULL;
43852
43863
  int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method);
43853
43864
  if (likely(is_method)) {
@@ -43860,6 +43871,7 @@ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name
43860
43871
  Py_DECREF(method);
43861
43872
  bad:
43862
43873
  return result;
43874
+ #endif
43863
43875
  }
43864
43876
 
43865
43877
  /* RaiseNeedMoreValuesToUnpack */
@@ -44670,6 +44682,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44670
44682
  changed = 1;
44671
44683
  }
44672
44684
  #endif // CYTHON_METH_FASTCALL
44685
+ #if !CYTHON_COMPILING_IN_PYPY
44673
44686
  else if (strcmp(memb->name, "__module__") == 0) {
44674
44687
  PyObject *descr;
44675
44688
  assert(memb->type == T_OBJECT);
@@ -44684,11 +44697,13 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44684
44697
  }
44685
44698
  changed = 1;
44686
44699
  }
44700
+ #endif // !CYTHON_COMPILING_IN_PYPY
44687
44701
  }
44688
44702
  memb++;
44689
44703
  }
44690
44704
  }
44691
44705
  #endif // !CYTHON_COMPILING_IN_LIMITED_API
44706
+ #if !CYTHON_COMPILING_IN_PYPY
44692
44707
  slot = spec->slots;
44693
44708
  while (slot && slot->slot && slot->slot != Py_tp_getset)
44694
44709
  slot++;
@@ -44720,6 +44735,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44720
44735
  ++getset;
44721
44736
  }
44722
44737
  }
44738
+ #endif // !CYTHON_COMPILING_IN_PYPY
44723
44739
  if (changed)
44724
44740
  PyType_Modified(type);
44725
44741
  #endif // PY_VERSION_HEX > 0x030900B1
@@ -45153,10 +45169,10 @@ __PYX_GOOD:
45153
45169
  }
45154
45170
 
45155
45171
  /* TypeImport */
45156
- #ifndef __PYX_HAVE_RT_ImportType_3_1_2
45157
- #define __PYX_HAVE_RT_ImportType_3_1_2
45158
- static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module_name, const char *class_name,
45159
- size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_2 check_size)
45172
+ #ifndef __PYX_HAVE_RT_ImportType_3_1_3
45173
+ #define __PYX_HAVE_RT_ImportType_3_1_3
45174
+ static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject *module, const char *module_name, const char *class_name,
45175
+ size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size)
45160
45176
  {
45161
45177
  PyObject *result = 0;
45162
45178
  Py_ssize_t basicsize;
@@ -45212,7 +45228,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
45212
45228
  module_name, class_name, size, basicsize+itemsize);
45213
45229
  goto bad;
45214
45230
  }
45215
- if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_2 &&
45231
+ if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_3 &&
45216
45232
  ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) {
45217
45233
  PyErr_Format(PyExc_ValueError,
45218
45234
  "%.200s.%.200s size changed, may indicate binary incompatibility. "
@@ -45220,7 +45236,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
45220
45236
  module_name, class_name, size, basicsize, basicsize+itemsize);
45221
45237
  goto bad;
45222
45238
  }
45223
- else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_2 && (size_t)basicsize > size) {
45239
+ else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_3 && (size_t)basicsize > size) {
45224
45240
  if (PyErr_WarnFormat(NULL, 0,
45225
45241
  "%.200s.%.200s size changed, may indicate binary incompatibility. "
45226
45242
  "Expected %zd from C header, got %zd from PyObject",
@@ -45361,7 +45377,7 @@ bad:
45361
45377
  }
45362
45378
 
45363
45379
  /* CommonTypesMetaclass */
45364
- PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
45380
+ static PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
45365
45381
  return PyUnicode_FromString(__PYX_ABI_MODULE_NAME);
45366
45382
  }
45367
45383
  static PyGetSetDef __pyx_CommonTypesMetaclass_getset[] = {
@@ -48290,7 +48306,7 @@ static CYTHON_INLINE PyObject *__Pyx_PyIter_Next_Plain(PyObject *iterator) {
48290
48306
  }
48291
48307
 
48292
48308
  /* PyObjectCallMethod1 */
48293
- #if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000)
48309
+ #if !(CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000)))
48294
48310
  static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
48295
48311
  PyObject *result = __Pyx_PyObject_CallOneArg(method, arg);
48296
48312
  Py_DECREF(method);
@@ -48298,7 +48314,7 @@ static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
48298
48314
  }
48299
48315
  #endif
48300
48316
  static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) {
48301
- #if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000
48317
+ #if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
48302
48318
  PyObject *args[2] = {obj, arg};
48303
48319
  (void) __Pyx_PyObject_GetMethod;
48304
48320
  (void) __Pyx_PyObject_CallOneArg;
Binary file
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: selectolax
3
+ Version: 0.3.34
4
+ Summary: Fast HTML5 parser with CSS selectors.
5
+ Home-page: https://github.com/rushter/selectolax
6
+ Author: Artem Golubin
7
+ Author-email: Artem Golubin <me@rushter.com>
8
+ License-Expression: MIT
9
+ Project-URL: Repository, https://github.com/rushter/selectolax
10
+ Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
+ Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.md
12
+ Keywords: selectolax,html,parser,css,fast
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Topic :: Text Processing :: Markup :: HTML
15
+ Classifier: Topic :: Internet
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Natural Language :: English
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/x-rst
27
+ License-File: LICENSE
28
+ Provides-Extra: cython
29
+ Requires-Dist: Cython; extra == "cython"
30
+ Dynamic: author
31
+ Dynamic: home-page
32
+ Dynamic: license-file
@@ -1,15 +1,20 @@
1
- selectolax/lexbor.pyi,sha256=CX_14MMiljOLZcoRVy96bEmGbqLTXTzSv12flrVzGDg,27721
1
+ selectolax-0.3.34.dist-info/RECORD,,
2
+ selectolax-0.3.34.dist-info/WHEEL,sha256=oqGJCpG61FZJmvyZ3C_0aCv-2mdfcY9e3fXvyUNmWfM,136
3
+ selectolax-0.3.34.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
4
+ selectolax-0.3.34.dist-info/METADATA,sha256=fGDgNbvrKw6lxT4PHH__IVb0blQ7eFbIEoaKV8ZWDY0,1286
5
+ selectolax-0.3.34.dist-info/licenses/LICENSE,sha256=MYCcM-Cv_rC2-lQiwDumin0E-rMXAhK-qIGGA29434Y,1077
6
+ selectolax/lexbor.pyi,sha256=peGOaeg01KBrh585hFk6z6X95WqrqjEdjKYlh88jZPs,28113
2
7
  selectolax/parser.pyx,sha256=i2bCslGrqYARZvCab4mIdRFyA9k7AuvJSVjZsqrT4SM,13684
3
- selectolax/__init__.py,sha256=MLVYxSOb4b5Rkz6I8O35L8iUn-Z691F1THazWpmf-iw,149
8
+ selectolax/__init__.py,sha256=d0wQ0XsnuynVKlPMOAMCB0yBbra7iisjAESTv1WTgEI,149
4
9
  selectolax/lexbor.pxd,sha256=FTx2SphXsIHCCmEouzGSp9j2CBgrOc12Wf9E7DPIbrI,21144
5
- selectolax/lexbor.pyx,sha256=g_6kYxAlqCOsMoihwOuRVgyf-F-TIbgxksHYPNlNE3s,12308
10
+ selectolax/lexbor.pyx,sha256=0LuaQJczlwSYeEq3GzljhcWUOIkWZ7ymCYZP5Kf44HY,12503
6
11
  selectolax/parser.pyi,sha256=tCCAWcaeBJrhctUKpdTKVX6z4_tsxPz8D5GpbxO3Hz0,24845
7
12
  selectolax/utils.pxi,sha256=_g-ZLprPgbqv7BLs-WEe8IhbDd_QTcfirz_NEyR1Yww,3506
8
- selectolax/lexbor.c,sha256=O5VPB3zfyb-QP557RGxvP6j--S1tJX25JBEIsdgV8cg,2364654
9
- selectolax/parser.cpython-313-darwin.so,sha256=ivGup0LI5cgxR4sF2RIjW1R3EisVMK-B7C7Dz-iNyrk,3066400
13
+ selectolax/lexbor.c,sha256=oAopXm-RClFKji_ud7VrMWoU71Rq6FCgFVPao9QkwKg,2365835
14
+ selectolax/parser.cpython-313-darwin.so,sha256=KfYNQVP0ncUT0-adA7AWO6Da3TAMUa9P0vp8jiNCQ0Y,2842144
10
15
  selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- selectolax/lexbor.cpython-313-darwin.so,sha256=gPd7PbfCU9vPFdz3nKjtjsETsFz9TtT326iCK1z9Bfw,3755312
12
- selectolax/parser.c,sha256=SMGGm8vfvCFgwdxab4GgZwZYJguoEsFY3BhHtuFCCT4,2208079
16
+ selectolax/lexbor.cpython-313-darwin.so,sha256=sz8pp215oqr5GhRSrratyXesslMrk9xN56LxQrNAQ8w,3527904
17
+ selectolax/parser.c,sha256=unTLg-rvs-I3YJfMZMC270A9atuSnLRjP-oEu9Wvhvk,2208895
13
18
  selectolax/base.pxi,sha256=eiPKlY9gG3l49qJoRQVLl1Ljza6z1k0A-met6sDPcqE,89
14
19
  selectolax/parser.pxd,sha256=BQSlDGibVfqFDhfqX6l5sBnfkHEETxlj-eSpGWERKEs,24618
15
20
  selectolax/modest/selection.pxi,sha256=m4GDpl0aI7lSWHFeBBheroUKDrZgJcc6uVubtzrXL1M,6508
@@ -17,10 +22,5 @@ selectolax/modest/util.pxi,sha256=di9cLmAyuGFXmiuptZ7Fz1SgkCf7hmiZLnpKCKEKsUc,55
17
22
  selectolax/modest/node.pxi,sha256=9D-o4Kdd-e_ebc-z8eAVxlVDdmfN5g2fUn6rj4sUSxg,33703
18
23
  selectolax/lexbor/selection.pxi,sha256=aYna6zEH9vBGxtcYe8AdOGseEh4ieymFt3Zu4cQssTs,7185
19
24
  selectolax/lexbor/util.pxi,sha256=hqMQU1O_5O82ThjUzk8NxQPl-Kg29DDGFFpC46LcejI,564
20
- selectolax/lexbor/node.pxi,sha256=1lpb3t-lGSeGMBkCT3a0IML-MonNJzTKDgiOWPfPpbA,32271
25
+ selectolax/lexbor/node.pxi,sha256=f-Illaw8ozBBzLOWDbRBCaUaK4wH4JgSHuc2xxYsFtI,32467
21
26
  selectolax/lexbor/attrs.pxi,sha256=eH90zJYHicffTzC7peIitHkOqyIw3xzomhJHxJv9hP8,3858
22
- selectolax-0.3.33.dist-info/RECORD,,
23
- selectolax-0.3.33.dist-info/WHEEL,sha256=oqGJCpG61FZJmvyZ3C_0aCv-2mdfcY9e3fXvyUNmWfM,136
24
- selectolax-0.3.33.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
25
- selectolax-0.3.33.dist-info/METADATA,sha256=sQZmiyZ9oiWmyUNzYHd_cnkmVPjk7cK8ef0Wyyf7h_A,6215
26
- selectolax-0.3.33.dist-info/licenses/LICENSE,sha256=MYCcM-Cv_rC2-lQiwDumin0E-rMXAhK-qIGGA29434Y,1077
@@ -1,187 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: selectolax
3
- Version: 0.3.33
4
- Summary: Fast HTML5 parser with CSS selectors.
5
- Home-page: https://github.com/rushter/selectolax
6
- Author: Artem Golubin
7
- Author-email: Artem Golubin <me@rushter.com>
8
- License: MIT
9
- Project-URL: Repository, https://github.com/rushter/selectolax
10
- Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
- Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.rst
12
- Keywords: selectolax,html,parser,css,fast
13
- Classifier: Development Status :: 5 - Production/Stable
14
- Classifier: Topic :: Text Processing :: Markup :: HTML
15
- Classifier: Topic :: Internet
16
- Classifier: Topic :: Internet :: WWW/HTTP
17
- Classifier: Intended Audience :: Developers
18
- Classifier: Natural Language :: English
19
- Classifier: Programming Language :: Python :: 3
20
- Classifier: Programming Language :: Python :: 3.9
21
- Classifier: Programming Language :: Python :: 3.10
22
- Classifier: Programming Language :: Python :: 3.11
23
- Classifier: Programming Language :: Python :: 3.12
24
- Classifier: Programming Language :: Python :: 3.13
25
- Requires-Python: >=3.9
26
- Description-Content-Type: text/x-rst
27
- License-File: LICENSE
28
- Provides-Extra: cython
29
- Requires-Dist: Cython; extra == "cython"
30
- Dynamic: author
31
- Dynamic: home-page
32
- Dynamic: license-file
33
-
34
- .. image:: docs/logo.png
35
- :alt: selectolax logo
36
-
37
- -------------------------
38
-
39
- .. image:: https://img.shields.io/pypi/v/selectolax.svg
40
- :target: https://pypi.python.org/pypi/selectolax
41
-
42
- A fast HTML5 parser with CSS selectors using `Modest <https://github.com/lexborisov/Modest/>`_ and
43
- `Lexbor <https://github.com/lexbor/lexbor>`_ engines.
44
-
45
-
46
- Installation
47
- ------------
48
- From PyPI using pip:
49
-
50
- .. code-block:: bash
51
-
52
- pip install selectolax
53
-
54
- If installation fails due to compilation errors, you may need to install `Cython <https://github.com/cython/cython>`_:
55
-
56
- .. code-block:: bash
57
-
58
- pip install selectolax[cython]
59
-
60
- This usually happens when you try to install an outdated version of selectolax on a newer version of Python.
61
-
62
-
63
- Development version from GitHub:
64
-
65
- .. code-block:: bash
66
-
67
- git clone --recursive https://github.com/rushter/selectolax
68
- cd selectolax
69
- pip install -r requirements_dev.txt
70
- python setup.py install
71
-
72
- How to compile selectolax while developing:
73
-
74
- .. code-block:: bash
75
-
76
- make clean
77
- make dev
78
-
79
- Basic examples
80
- --------------
81
-
82
- Here are some basic examples to get you started with selectolax:
83
-
84
- Parsing HTML and extracting text:
85
-
86
- .. code:: python
87
-
88
- In [1]: from selectolax.parser import HTMLParser
89
- ...:
90
- ...: html = """
91
- ...: <h1 id="title" data-updated="20201101">Hi there</h1>
92
- ...: <div class="post">Lorem Ipsum is simply dummy text of the printing and typesetting industry. </div>
93
- ...: <div class="post">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div>
94
- ...: """
95
- ...: tree = HTMLParser(html)
96
-
97
- In [2]: tree.css_first('h1#title').text()
98
- Out[2]: 'Hi there'
99
-
100
- In [3]: tree.css_first('h1#title').attributes
101
- Out[3]: {'id': 'title', 'data-updated': '20201101'}
102
-
103
- In [4]: [node.text() for node in tree.css('.post')]
104
- Out[4]:
105
- ['Lorem Ipsum is simply dummy text of the printing and typesetting industry. ',
106
- 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.']
107
-
108
- Using advanced CSS selectors:
109
-
110
- .. code:: python
111
-
112
- In [1]: html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5>text<p id=p6></div>"
113
- ...: selector = "div > :nth-child(2n+1):not(:has(a))"
114
-
115
- In [2]: for node in HTMLParser(html).css(selector):
116
- ...: print(node.attributes, node.text(), node.tag)
117
- ...: print(node.parent.tag)
118
- ...: print(node.html)
119
- ...:
120
- {'id': 'p1'} p
121
- div
122
- <p id="p1"></p>
123
- {'id': 'p5'} text p
124
- div
125
- <p id="p5">text</p>
126
-
127
-
128
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
129
-
130
- Available backends
131
- ------------------
132
-
133
- Selectolax supports two backends: ``Modest`` and ``Lexbor``. By default, all examples use the Modest backend.
134
- Most of the features between backends are almost identical, but there are still some differences.
135
-
136
- As of 2024, the preferred backend is ``Lexbor``. The ``Modest`` backend is still available for compatibility reasons
137
- and the underlying C library that selectolax uses is not maintained anymore.
138
-
139
-
140
- To use ``lexbor``, just import the parser and use it in the similar way to the `HTMLParser`.
141
-
142
- .. code:: python
143
-
144
- In [1]: from selectolax.lexbor import LexborHTMLParser
145
-
146
- In [2]: html = """
147
- ...: <title>Hi there</title>
148
- ...: <div id="updated">2021-08-15</div>
149
- ...: """
150
-
151
- In [3]: parser = LexborHTMLParser(html)
152
- In [4]: parser.root.css_first("#updated").text()
153
- Out[4]: '2021-08-15'
154
-
155
-
156
- Simple Benchmark
157
- ----------------
158
-
159
- * Extract title, links, scripts and a meta tag from main pages of top 754 domains. See ``examples/benchmark.py`` for more information.
160
-
161
- ============================ ===========
162
- Package Time
163
- ============================ ===========
164
- Beautiful Soup (html.parser) 61.02 sec.
165
- lxml / Beautiful Soup (lxml) 9.09 sec.
166
- html5_parser 16.10 sec.
167
- selectolax (Modest) 2.94 sec.
168
- selectolax (Lexbor) 2.39 sec.
169
- ============================ ===========
170
-
171
- Links
172
- -----
173
-
174
- * `selectolax API reference <https://selectolax.readthedocs.io/en/latest/index.html>`_
175
- * `Video introduction to web scraping using selectolax <https://youtu.be/HpRsfpPuUzE>`_
176
- * `How to Scrape 7k Products with Python using selectolax and httpx <https://www.youtube.com/watch?v=XpGvq755J2U>`_
177
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
178
- * `Modest introduction <https://lexborisov.github.io/Modest/>`_
179
- * `Modest benchmark <https://lexborisov.github.io/benchmark-html-parsers/>`_
180
- * `Python benchmark <https://rushter.com/blog/python-fast-html-parser/>`_
181
- * `Another Python benchmark <https://www.peterbe.com/plog/selectolax-or-pyquery>`_
182
-
183
- License
184
- -------
185
-
186
- * Modest engine — `LGPL2.1 <https://github.com/lexborisov/Modest/blob/master/LICENSE>`_
187
- * selectolax - `MIT <https://github.com/rushter/selectolax/blob/master/LICENSE>`_