selectolax 0.3.33__cp312-cp312-win_amd64.whl → 0.3.34__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

Binary file
selectolax/lexbor.pyi CHANGED
@@ -145,6 +145,12 @@ class LexborNode:
145
145
  Matches pattern `query` against HTML tree.
146
146
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
147
147
 
148
+ Special selectors:
149
+
150
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
151
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
152
+
153
+
148
154
  Parameters
149
155
  ----------
150
156
  query : str
@@ -665,6 +671,12 @@ class LexborHTMLParser:
665
671
  Matches pattern `query` against HTML tree.
666
672
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
667
673
 
674
+ Special selectors:
675
+
676
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
677
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
678
+
679
+
668
680
  Parameters
669
681
  ----------
670
682
  query : str
selectolax/lexbor.pyx CHANGED
@@ -169,6 +169,11 @@ cdef class LexborHTMLParser:
169
169
  Matches pattern `query` against HTML tree.
170
170
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
171
171
 
172
+ Special selectors:
173
+
174
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
175
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
176
+
172
177
  Parameters
173
178
  ----------
174
179
  query : str
selectolax/parser.c CHANGED
@@ -1,4 +1,4 @@
1
- /* Generated by Cython 3.1.2 */
1
+ /* Generated by Cython 3.1.3 */
2
2
 
3
3
  /* BEGIN: Cython Metadata
4
4
  {
@@ -182,8 +182,8 @@ END: Cython Metadata */
182
182
  #elif PY_VERSION_HEX < 0x03080000
183
183
  #error Cython requires Python 3.8+.
184
184
  #else
185
- #define __PYX_ABI_VERSION "3_1_2"
186
- #define CYTHON_HEX_VERSION 0x030102F0
185
+ #define __PYX_ABI_VERSION "3_1_3"
186
+ #define CYTHON_HEX_VERSION 0x030103F0
187
187
  #define CYTHON_FUTURE_DIVISION 1
188
188
  /* CModulePreamble */
189
189
  #include <stddef.h>
@@ -546,6 +546,9 @@ END: Cython Metadata */
546
546
  enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
547
547
  #endif
548
548
  #endif
549
+ #ifndef CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME
550
+ #define CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME 100
551
+ #endif
549
552
  #ifndef __has_attribute
550
553
  #define __has_attribute(x) 0
551
554
  #endif
@@ -2768,22 +2771,22 @@ static int __Pyx__DelItemOnTypeDict(PyTypeObject *tp, PyObject *k);
2768
2771
  static int __Pyx_setup_reduce(PyObject* type_obj);
2769
2772
 
2770
2773
  /* TypeImport.proto */
2771
- #ifndef __PYX_HAVE_RT_ImportType_proto_3_1_2
2772
- #define __PYX_HAVE_RT_ImportType_proto_3_1_2
2774
+ #ifndef __PYX_HAVE_RT_ImportType_proto_3_1_3
2775
+ #define __PYX_HAVE_RT_ImportType_proto_3_1_3
2773
2776
  #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
2774
2777
  #include <stdalign.h>
2775
2778
  #endif
2776
2779
  #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L
2777
- #define __PYX_GET_STRUCT_ALIGNMENT_3_1_2(s) alignof(s)
2780
+ #define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) alignof(s)
2778
2781
  #else
2779
- #define __PYX_GET_STRUCT_ALIGNMENT_3_1_2(s) sizeof(void*)
2782
+ #define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) sizeof(void*)
2780
2783
  #endif
2781
- enum __Pyx_ImportType_CheckSize_3_1_2 {
2782
- __Pyx_ImportType_CheckSize_Error_3_1_2 = 0,
2783
- __Pyx_ImportType_CheckSize_Warn_3_1_2 = 1,
2784
- __Pyx_ImportType_CheckSize_Ignore_3_1_2 = 2
2784
+ enum __Pyx_ImportType_CheckSize_3_1_3 {
2785
+ __Pyx_ImportType_CheckSize_Error_3_1_3 = 0,
2786
+ __Pyx_ImportType_CheckSize_Warn_3_1_3 = 1,
2787
+ __Pyx_ImportType_CheckSize_Ignore_3_1_3 = 2
2785
2788
  };
2786
- static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_2 check_size);
2789
+ static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size);
2787
2790
  #endif
2788
2791
 
2789
2792
  /* FetchSharedCythonModule.proto */
@@ -3567,7 +3570,7 @@ static const char __pyx_k_Node_strip_tags_line_554[] = "Node.strip_tags (line 55
3567
3570
  static const char __pyx_k_Selector___reduce_cython[] = "Selector.__reduce_cython__";
3568
3571
  static const char __pyx_k_Tag_name_cannot_be_empty[] = "Tag name cannot be empty";
3569
3572
  static const char __pyx_k_document_no_head_no_body[] = "document_no_head_no_body";
3570
- static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200\220h\230k\250\033\260A\330\010\r\210^\2301\330\010\016\320\016!\320!v\320vx\320xy\330\004\023\2208\2308\2401\240A\330\004\007\200|\2207\230!\330\010*\250!\250;\260n\300A\330\004\013\2101";
3573
+ static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200\177\220h\230k\250\033\260A\330\010\r\210^\2301\330\010\016\320\016!\320!v\320vx\320xy\330\004\023\2208\2308\2401\240A\330\004\007\200|\2207\230!\330\010*\250!\250;\260n\300A\330\004\013\2101";
3571
3574
  static const char __pyx_k_Node_unwrap_tags_line_580[] = "Node.unwrap_tags (line 580)";
3572
3575
  static const char __pyx_k_Node_unwrap_tags_line_768[] = "Node.unwrap_tags (line 768)";
3573
3576
  static const char __pyx_k_any_attribute_longer_than[] = "any_attribute_longer_than";
@@ -38627,27 +38630,27 @@ static int __Pyx_modinit_type_import_code(__pyx_mstatetype *__pyx_mstate) {
38627
38630
  /*--- Type import code ---*/
38628
38631
  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(8, 8, __pyx_L1_error)
38629
38632
  __Pyx_GOTREF(__pyx_t_1);
38630
- __pyx_mstate->__pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_1_2(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool",
38633
+ __pyx_mstate->__pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool",
38631
38634
  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
38632
- sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyLongObject),
38635
+ sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
38633
38636
  #elif CYTHON_COMPILING_IN_LIMITED_API
38634
38637
  0, 0,
38635
38638
  #else
38636
- sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyLongObject),
38639
+ sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
38637
38640
  #endif
38638
- __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(8, 8, __pyx_L1_error)
38641
+ __Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(8, 8, __pyx_L1_error)
38639
38642
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
38640
38643
  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(9, 9, __pyx_L1_error)
38641
38644
  __Pyx_GOTREF(__pyx_t_1);
38642
- __pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_2(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
38645
+ __pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
38643
38646
  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
38644
- sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyTypeObject),
38647
+ sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyTypeObject),
38645
38648
  #elif CYTHON_COMPILING_IN_LIMITED_API
38646
38649
  0, 0,
38647
38650
  #else
38648
- sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyHeapTypeObject),
38651
+ sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyHeapTypeObject),
38649
38652
  #endif
38650
- __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(9, 9, __pyx_L1_error)
38653
+ __Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(9, 9, __pyx_L1_error)
38651
38654
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
38652
38655
  __Pyx_RefNannyFinishContext();
38653
38656
  return 0;
@@ -43842,6 +43845,13 @@ try_unpack:
43842
43845
 
43843
43846
  /* PyObjectCallMethod0 */
43844
43847
  static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) {
43848
+ #if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
43849
+ PyObject *args[1] = {obj};
43850
+ (void) __Pyx_PyObject_GetMethod;
43851
+ (void) __Pyx_PyObject_CallOneArg;
43852
+ (void) __Pyx_PyObject_CallNoArg;
43853
+ return PyObject_VectorcallMethod(method_name, args, 1 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
43854
+ #else
43845
43855
  PyObject *method = NULL, *result = NULL;
43846
43856
  int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method);
43847
43857
  if (likely(is_method)) {
@@ -43854,6 +43864,7 @@ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name
43854
43864
  Py_DECREF(method);
43855
43865
  bad:
43856
43866
  return result;
43867
+ #endif
43857
43868
  }
43858
43869
 
43859
43870
  /* RaiseNeedMoreValuesToUnpack */
@@ -44664,6 +44675,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44664
44675
  changed = 1;
44665
44676
  }
44666
44677
  #endif // CYTHON_METH_FASTCALL
44678
+ #if !CYTHON_COMPILING_IN_PYPY
44667
44679
  else if (strcmp(memb->name, "__module__") == 0) {
44668
44680
  PyObject *descr;
44669
44681
  assert(memb->type == T_OBJECT);
@@ -44678,11 +44690,13 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44678
44690
  }
44679
44691
  changed = 1;
44680
44692
  }
44693
+ #endif // !CYTHON_COMPILING_IN_PYPY
44681
44694
  }
44682
44695
  memb++;
44683
44696
  }
44684
44697
  }
44685
44698
  #endif // !CYTHON_COMPILING_IN_LIMITED_API
44699
+ #if !CYTHON_COMPILING_IN_PYPY
44686
44700
  slot = spec->slots;
44687
44701
  while (slot && slot->slot && slot->slot != Py_tp_getset)
44688
44702
  slot++;
@@ -44714,6 +44728,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44714
44728
  ++getset;
44715
44729
  }
44716
44730
  }
44731
+ #endif // !CYTHON_COMPILING_IN_PYPY
44717
44732
  if (changed)
44718
44733
  PyType_Modified(type);
44719
44734
  #endif // PY_VERSION_HEX > 0x030900B1
@@ -45147,10 +45162,10 @@ __PYX_GOOD:
45147
45162
  }
45148
45163
 
45149
45164
  /* TypeImport */
45150
- #ifndef __PYX_HAVE_RT_ImportType_3_1_2
45151
- #define __PYX_HAVE_RT_ImportType_3_1_2
45152
- static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module_name, const char *class_name,
45153
- size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_2 check_size)
45165
+ #ifndef __PYX_HAVE_RT_ImportType_3_1_3
45166
+ #define __PYX_HAVE_RT_ImportType_3_1_3
45167
+ static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject *module, const char *module_name, const char *class_name,
45168
+ size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size)
45154
45169
  {
45155
45170
  PyObject *result = 0;
45156
45171
  Py_ssize_t basicsize;
@@ -45206,7 +45221,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
45206
45221
  module_name, class_name, size, basicsize+itemsize);
45207
45222
  goto bad;
45208
45223
  }
45209
- if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_2 &&
45224
+ if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_3 &&
45210
45225
  ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) {
45211
45226
  PyErr_Format(PyExc_ValueError,
45212
45227
  "%.200s.%.200s size changed, may indicate binary incompatibility. "
@@ -45214,7 +45229,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
45214
45229
  module_name, class_name, size, basicsize, basicsize+itemsize);
45215
45230
  goto bad;
45216
45231
  }
45217
- else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_2 && (size_t)basicsize > size) {
45232
+ else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_3 && (size_t)basicsize > size) {
45218
45233
  if (PyErr_WarnFormat(NULL, 0,
45219
45234
  "%.200s.%.200s size changed, may indicate binary incompatibility. "
45220
45235
  "Expected %zd from C header, got %zd from PyObject",
@@ -45355,7 +45370,7 @@ bad:
45355
45370
  }
45356
45371
 
45357
45372
  /* CommonTypesMetaclass */
45358
- PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
45373
+ static PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
45359
45374
  return PyUnicode_FromString(__PYX_ABI_MODULE_NAME);
45360
45375
  }
45361
45376
  static PyGetSetDef __pyx_CommonTypesMetaclass_getset[] = {
@@ -48284,7 +48299,7 @@ static CYTHON_INLINE PyObject *__Pyx_PyIter_Next_Plain(PyObject *iterator) {
48284
48299
  }
48285
48300
 
48286
48301
  /* PyObjectCallMethod1 */
48287
- #if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000)
48302
+ #if !(CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000)))
48288
48303
  static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
48289
48304
  PyObject *result = __Pyx_PyObject_CallOneArg(method, arg);
48290
48305
  Py_DECREF(method);
@@ -48292,7 +48307,7 @@ static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
48292
48307
  }
48293
48308
  #endif
48294
48309
  static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) {
48295
- #if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000
48310
+ #if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
48296
48311
  PyObject *args[2] = {obj, arg};
48297
48312
  (void) __Pyx_PyObject_GetMethod;
48298
48313
  (void) __Pyx_PyObject_CallOneArg;
Binary file
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: selectolax
3
+ Version: 0.3.34
4
+ Summary: Fast HTML5 parser with CSS selectors.
5
+ Home-page: https://github.com/rushter/selectolax
6
+ Author: Artem Golubin
7
+ Author-email: Artem Golubin <me@rushter.com>
8
+ License-Expression: MIT
9
+ Project-URL: Repository, https://github.com/rushter/selectolax
10
+ Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
+ Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.md
12
+ Keywords: selectolax,html,parser,css,fast
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Topic :: Text Processing :: Markup :: HTML
15
+ Classifier: Topic :: Internet
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Natural Language :: English
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/x-rst
27
+ License-File: LICENSE
28
+ Provides-Extra: cython
29
+ Requires-Dist: Cython; extra == "cython"
30
+ Dynamic: author
31
+ Dynamic: home-page
32
+ Dynamic: license-file
@@ -1,26 +1,26 @@
1
- selectolax/__init__.py,sha256=_qbVtaupjc0xeV8F8cU3lbDgckEoaUlhy2N013uLu4Y,157
1
+ selectolax/__init__.py,sha256=iI6pQ10gimevS2gTf4K4_1cXh4NBRFj_5GjkmhrvU94,157
2
2
  selectolax/base.pxi,sha256=zOj3BrCA71xd-mJFtkMIAglP4ZybfrHVoCoy6ljTBDQ,93
3
- selectolax/lexbor.c,sha256=K806oXPqw4y4LoljPDmj7d7weW0xG58gTRQ7UR1jNU8,2418331
4
- selectolax/lexbor.cp312-win_amd64.pyd,sha256=taZuJC4eimciag03siYg4_zNevl-XEYm-3AC1D_HLvQ,3150848
3
+ selectolax/lexbor.c,sha256=ohdT--aXa69etvB2pMZU-vBU8P0urBCVeNIsKJzIe3k,2419508
4
+ selectolax/lexbor.cp312-win_amd64.pyd,sha256=km3OqMJRruHnZGPjj0ljCQRr9a7Y-S6TIkTBdM70WLc,3152896
5
5
  selectolax/lexbor.pxd,sha256=BcqAzhlUVq0GVWiJHWXNhs4jY-gi6k0BELEnQtSYJAI,21720
6
- selectolax/lexbor.pyi,sha256=6oQU-9-LE_OAhwsETmynUYhCEE3lhxnEzj36TefQheo,28586
7
- selectolax/lexbor.pyx,sha256=mgP31DQHkRVAOvu0Eax29sCEq57OWUA51LoPOAFZ6VU,12691
8
- selectolax/parser.c,sha256=PABrCk3JLEVb3nf02FtAplokuPQDSFVuuoV54MBKu1s,2258494
9
- selectolax/parser.cp312-win_amd64.pyd,sha256=f6DFqa--SjcXte89Qua_E2mPD5jR5xjUqDu6yZ740s8,2119680
6
+ selectolax/lexbor.pyi,sha256=dRNzLXJEbFRR7QcItuX8Ews9E9I6h6G4vA3X1hijzj4,28990
7
+ selectolax/lexbor.pyx,sha256=XLZ2vGwLoWdctnmU-gfizjD6tMjehR_bzNOapDJ_YOQ,12891
8
+ selectolax/parser.c,sha256=E6Jy4u2RriHcudgyhxtAxpEyCvwH3lPxxjPIfemuK7A,2259306
9
+ selectolax/parser.cp312-win_amd64.pyd,sha256=suNKFZxxkFyBhK1KrC93vlrhUl0sizP4CPMMuqYQmTU,2119680
10
10
  selectolax/parser.pxd,sha256=T7GoQdaOkhp_W2TBlRY0tZqom97PkHrytYaXQlyVnbI,25196
11
11
  selectolax/parser.pyi,sha256=-qutpjrK1dD4rrl3SsHWQt2FT5lv6meaACkQzk1Bt6o,25612
12
12
  selectolax/parser.pyx,sha256=nIWuhaEFRwlfo64WmgrSOM0A8mUw0eWw9j_fWyLV-Ro,14127
13
13
  selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  selectolax/utils.pxi,sha256=hkzKfycdpwH1P-E_pP-9NTGsmiajt6EJNZSlkxlRapA,3623
15
15
  selectolax/lexbor/attrs.pxi,sha256=d59V77aGkpp7YsYsd6t_z4-tRnUoQTJZKsvMC8nyttM,3978
16
- selectolax/lexbor/node.pxi,sha256=aOH-pr0DtXOZ63YyfVVd3pppXL3mAbuugNT9HhKvoE0,33205
16
+ selectolax/lexbor/node.pxi,sha256=KODqPk3yZ_owwdSxqNr2Ih6qAOhu9CJ-jrHtqQJcWmY,33407
17
17
  selectolax/lexbor/selection.pxi,sha256=BeUDypw5_P0CTmi-ACLcd7pK2NnG9ASrwWOdLdweAZY,7378
18
18
  selectolax/lexbor/util.pxi,sha256=q2EYVNdnROg9y30mWpGwlNA0W00nJ7ZRNEEDrOEG14s,584
19
19
  selectolax/modest/node.pxi,sha256=iX_yRPIPVkG0ALW7hEfmXiVperw6RjkSGATkxzLokz0,34691
20
20
  selectolax/modest/selection.pxi,sha256=PfHUN1uuNA7YfcxTu7JZjhxevVbFRP1bHd3kyyFdO7E,6703
21
21
  selectolax/modest/util.pxi,sha256=zab67Wzo8FcipA2VS8ClptaC19lZirbNqFEGQ3hW2Is,572
22
- selectolax-0.3.33.dist-info/licenses/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
23
- selectolax-0.3.33.dist-info/METADATA,sha256=HmxKX5O__XxvqspHStldQCxGb22RSRndWDtpxBSntl8,6402
24
- selectolax-0.3.33.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
25
- selectolax-0.3.33.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
26
- selectolax-0.3.33.dist-info/RECORD,,
22
+ selectolax-0.3.34.dist-info/licenses/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
23
+ selectolax-0.3.34.dist-info/METADATA,sha256=rAqskRB9wMSn7tEZLxZswUJD4wFzN4fizyXjiBv4L4o,1318
24
+ selectolax-0.3.34.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
25
+ selectolax-0.3.34.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
26
+ selectolax-0.3.34.dist-info/RECORD,,
@@ -1,187 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: selectolax
3
- Version: 0.3.33
4
- Summary: Fast HTML5 parser with CSS selectors.
5
- Home-page: https://github.com/rushter/selectolax
6
- Author: Artem Golubin
7
- Author-email: Artem Golubin <me@rushter.com>
8
- License: MIT
9
- Project-URL: Repository, https://github.com/rushter/selectolax
10
- Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
- Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.rst
12
- Keywords: selectolax,html,parser,css,fast
13
- Classifier: Development Status :: 5 - Production/Stable
14
- Classifier: Topic :: Text Processing :: Markup :: HTML
15
- Classifier: Topic :: Internet
16
- Classifier: Topic :: Internet :: WWW/HTTP
17
- Classifier: Intended Audience :: Developers
18
- Classifier: Natural Language :: English
19
- Classifier: Programming Language :: Python :: 3
20
- Classifier: Programming Language :: Python :: 3.9
21
- Classifier: Programming Language :: Python :: 3.10
22
- Classifier: Programming Language :: Python :: 3.11
23
- Classifier: Programming Language :: Python :: 3.12
24
- Classifier: Programming Language :: Python :: 3.13
25
- Requires-Python: >=3.9
26
- Description-Content-Type: text/x-rst
27
- License-File: LICENSE
28
- Provides-Extra: cython
29
- Requires-Dist: Cython; extra == "cython"
30
- Dynamic: author
31
- Dynamic: home-page
32
- Dynamic: license-file
33
-
34
- .. image:: docs/logo.png
35
- :alt: selectolax logo
36
-
37
- -------------------------
38
-
39
- .. image:: https://img.shields.io/pypi/v/selectolax.svg
40
- :target: https://pypi.python.org/pypi/selectolax
41
-
42
- A fast HTML5 parser with CSS selectors using `Modest <https://github.com/lexborisov/Modest/>`_ and
43
- `Lexbor <https://github.com/lexbor/lexbor>`_ engines.
44
-
45
-
46
- Installation
47
- ------------
48
- From PyPI using pip:
49
-
50
- .. code-block:: bash
51
-
52
- pip install selectolax
53
-
54
- If installation fails due to compilation errors, you may need to install `Cython <https://github.com/cython/cython>`_:
55
-
56
- .. code-block:: bash
57
-
58
- pip install selectolax[cython]
59
-
60
- This usually happens when you try to install an outdated version of selectolax on a newer version of Python.
61
-
62
-
63
- Development version from GitHub:
64
-
65
- .. code-block:: bash
66
-
67
- git clone --recursive https://github.com/rushter/selectolax
68
- cd selectolax
69
- pip install -r requirements_dev.txt
70
- python setup.py install
71
-
72
- How to compile selectolax while developing:
73
-
74
- .. code-block:: bash
75
-
76
- make clean
77
- make dev
78
-
79
- Basic examples
80
- --------------
81
-
82
- Here are some basic examples to get you started with selectolax:
83
-
84
- Parsing HTML and extracting text:
85
-
86
- .. code:: python
87
-
88
- In [1]: from selectolax.parser import HTMLParser
89
- ...:
90
- ...: html = """
91
- ...: <h1 id="title" data-updated="20201101">Hi there</h1>
92
- ...: <div class="post">Lorem Ipsum is simply dummy text of the printing and typesetting industry. </div>
93
- ...: <div class="post">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div>
94
- ...: """
95
- ...: tree = HTMLParser(html)
96
-
97
- In [2]: tree.css_first('h1#title').text()
98
- Out[2]: 'Hi there'
99
-
100
- In [3]: tree.css_first('h1#title').attributes
101
- Out[3]: {'id': 'title', 'data-updated': '20201101'}
102
-
103
- In [4]: [node.text() for node in tree.css('.post')]
104
- Out[4]:
105
- ['Lorem Ipsum is simply dummy text of the printing and typesetting industry. ',
106
- 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.']
107
-
108
- Using advanced CSS selectors:
109
-
110
- .. code:: python
111
-
112
- In [1]: html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5>text<p id=p6></div>"
113
- ...: selector = "div > :nth-child(2n+1):not(:has(a))"
114
-
115
- In [2]: for node in HTMLParser(html).css(selector):
116
- ...: print(node.attributes, node.text(), node.tag)
117
- ...: print(node.parent.tag)
118
- ...: print(node.html)
119
- ...:
120
- {'id': 'p1'} p
121
- div
122
- <p id="p1"></p>
123
- {'id': 'p5'} text p
124
- div
125
- <p id="p5">text</p>
126
-
127
-
128
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
129
-
130
- Available backends
131
- ------------------
132
-
133
- Selectolax supports two backends: ``Modest`` and ``Lexbor``. By default, all examples use the Modest backend.
134
- Most of the features between backends are almost identical, but there are still some differences.
135
-
136
- As of 2024, the preferred backend is ``Lexbor``. The ``Modest`` backend is still available for compatibility reasons
137
- and the underlying C library that selectolax uses is not maintained anymore.
138
-
139
-
140
- To use ``lexbor``, just import the parser and use it in the similar way to the `HTMLParser`.
141
-
142
- .. code:: python
143
-
144
- In [1]: from selectolax.lexbor import LexborHTMLParser
145
-
146
- In [2]: html = """
147
- ...: <title>Hi there</title>
148
- ...: <div id="updated">2021-08-15</div>
149
- ...: """
150
-
151
- In [3]: parser = LexborHTMLParser(html)
152
- In [4]: parser.root.css_first("#updated").text()
153
- Out[4]: '2021-08-15'
154
-
155
-
156
- Simple Benchmark
157
- ----------------
158
-
159
- * Extract title, links, scripts and a meta tag from main pages of top 754 domains. See ``examples/benchmark.py`` for more information.
160
-
161
- ============================ ===========
162
- Package Time
163
- ============================ ===========
164
- Beautiful Soup (html.parser) 61.02 sec.
165
- lxml / Beautiful Soup (lxml) 9.09 sec.
166
- html5_parser 16.10 sec.
167
- selectolax (Modest) 2.94 sec.
168
- selectolax (Lexbor) 2.39 sec.
169
- ============================ ===========
170
-
171
- Links
172
- -----
173
-
174
- * `selectolax API reference <https://selectolax.readthedocs.io/en/latest/index.html>`_
175
- * `Video introduction to web scraping using selectolax <https://youtu.be/HpRsfpPuUzE>`_
176
- * `How to Scrape 7k Products with Python using selectolax and httpx <https://www.youtube.com/watch?v=XpGvq755J2U>`_
177
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
178
- * `Modest introduction <https://lexborisov.github.io/Modest/>`_
179
- * `Modest benchmark <https://lexborisov.github.io/benchmark-html-parsers/>`_
180
- * `Python benchmark <https://rushter.com/blog/python-fast-html-parser/>`_
181
- * `Another Python benchmark <https://www.peterbe.com/plog/selectolax-or-pyquery>`_
182
-
183
- License
184
- -------
185
-
186
- * Modest engine — `LGPL2.1 <https://github.com/lexborisov/Modest/blob/master/LICENSE>`_
187
- * selectolax - `MIT <https://github.com/rushter/selectolax/blob/master/LICENSE>`_