selectolax 0.3.33__cp39-cp39-win_amd64.whl → 0.3.34__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
selectolax/lexbor.pyi CHANGED
@@ -145,6 +145,12 @@ class LexborNode:
145
145
  Matches pattern `query` against HTML tree.
146
146
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
147
147
 
148
+ Special selectors:
149
+
150
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
151
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
152
+
153
+
148
154
  Parameters
149
155
  ----------
150
156
  query : str
@@ -665,6 +671,12 @@ class LexborHTMLParser:
665
671
  Matches pattern `query` against HTML tree.
666
672
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
667
673
 
674
+ Special selectors:
675
+
676
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
677
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
678
+
679
+
668
680
  Parameters
669
681
  ----------
670
682
  query : str
selectolax/lexbor.pyx CHANGED
@@ -169,6 +169,11 @@ cdef class LexborHTMLParser:
169
169
  Matches pattern `query` against HTML tree.
170
170
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
171
171
 
172
+ Special selectors:
173
+
174
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
175
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
176
+
172
177
  Parameters
173
178
  ----------
174
179
  query : str
selectolax/parser.c CHANGED
@@ -1,4 +1,4 @@
1
- /* Generated by Cython 3.1.2 */
1
+ /* Generated by Cython 3.1.3 */
2
2
 
3
3
  /* BEGIN: Cython Metadata
4
4
  {
@@ -182,8 +182,8 @@ END: Cython Metadata */
182
182
  #elif PY_VERSION_HEX < 0x03080000
183
183
  #error Cython requires Python 3.8+.
184
184
  #else
185
- #define __PYX_ABI_VERSION "3_1_2"
186
- #define CYTHON_HEX_VERSION 0x030102F0
185
+ #define __PYX_ABI_VERSION "3_1_3"
186
+ #define CYTHON_HEX_VERSION 0x030103F0
187
187
  #define CYTHON_FUTURE_DIVISION 1
188
188
  /* CModulePreamble */
189
189
  #include <stddef.h>
@@ -546,6 +546,9 @@ END: Cython Metadata */
546
546
  enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
547
547
  #endif
548
548
  #endif
549
+ #ifndef CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME
550
+ #define CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME 100
551
+ #endif
549
552
  #ifndef __has_attribute
550
553
  #define __has_attribute(x) 0
551
554
  #endif
@@ -2768,22 +2771,22 @@ static int __Pyx__DelItemOnTypeDict(PyTypeObject *tp, PyObject *k);
2768
2771
  static int __Pyx_setup_reduce(PyObject* type_obj);
2769
2772
 
2770
2773
  /* TypeImport.proto */
2771
- #ifndef __PYX_HAVE_RT_ImportType_proto_3_1_2
2772
- #define __PYX_HAVE_RT_ImportType_proto_3_1_2
2774
+ #ifndef __PYX_HAVE_RT_ImportType_proto_3_1_3
2775
+ #define __PYX_HAVE_RT_ImportType_proto_3_1_3
2773
2776
  #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
2774
2777
  #include <stdalign.h>
2775
2778
  #endif
2776
2779
  #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L
2777
- #define __PYX_GET_STRUCT_ALIGNMENT_3_1_2(s) alignof(s)
2780
+ #define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) alignof(s)
2778
2781
  #else
2779
- #define __PYX_GET_STRUCT_ALIGNMENT_3_1_2(s) sizeof(void*)
2782
+ #define __PYX_GET_STRUCT_ALIGNMENT_3_1_3(s) sizeof(void*)
2780
2783
  #endif
2781
- enum __Pyx_ImportType_CheckSize_3_1_2 {
2782
- __Pyx_ImportType_CheckSize_Error_3_1_2 = 0,
2783
- __Pyx_ImportType_CheckSize_Warn_3_1_2 = 1,
2784
- __Pyx_ImportType_CheckSize_Ignore_3_1_2 = 2
2784
+ enum __Pyx_ImportType_CheckSize_3_1_3 {
2785
+ __Pyx_ImportType_CheckSize_Error_3_1_3 = 0,
2786
+ __Pyx_ImportType_CheckSize_Warn_3_1_3 = 1,
2787
+ __Pyx_ImportType_CheckSize_Ignore_3_1_3 = 2
2785
2788
  };
2786
- static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_2 check_size);
2789
+ static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size);
2787
2790
  #endif
2788
2791
 
2789
2792
  /* FetchSharedCythonModule.proto */
@@ -3567,7 +3570,7 @@ static const char __pyx_k_Node_strip_tags_line_554[] = "Node.strip_tags (line 55
3567
3570
  static const char __pyx_k_Selector___reduce_cython[] = "Selector.__reduce_cython__";
3568
3571
  static const char __pyx_k_Tag_name_cannot_be_empty[] = "Tag name cannot be empty";
3569
3572
  static const char __pyx_k_document_no_head_no_body[] = "document_no_head_no_body";
3570
- static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200\220h\230k\250\033\260A\330\010\r\210^\2301\330\010\016\320\016!\320!v\320vx\320xy\330\004\023\2208\2308\2401\240A\330\004\007\200|\2207\230!\330\010*\250!\250;\260n\300A\330\004\013\2101";
3573
+ static const char __pyx_k_hk_A_1_vvxxy_881A_7_nA_1[] = "\200\001\360\006\000\005\010\200\177\220h\230k\250\033\260A\330\010\r\210^\2301\330\010\016\320\016!\320!v\320vx\320xy\330\004\023\2208\2308\2401\240A\330\004\007\200|\2207\230!\330\010*\250!\250;\260n\300A\330\004\013\2101";
3571
3574
  static const char __pyx_k_Node_unwrap_tags_line_580[] = "Node.unwrap_tags (line 580)";
3572
3575
  static const char __pyx_k_Node_unwrap_tags_line_768[] = "Node.unwrap_tags (line 768)";
3573
3576
  static const char __pyx_k_any_attribute_longer_than[] = "any_attribute_longer_than";
@@ -38627,27 +38630,27 @@ static int __Pyx_modinit_type_import_code(__pyx_mstatetype *__pyx_mstate) {
38627
38630
  /*--- Type import code ---*/
38628
38631
  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(8, 8, __pyx_L1_error)
38629
38632
  __Pyx_GOTREF(__pyx_t_1);
38630
- __pyx_mstate->__pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_1_2(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool",
38633
+ __pyx_mstate->__pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool",
38631
38634
  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
38632
- sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyLongObject),
38635
+ sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
38633
38636
  #elif CYTHON_COMPILING_IN_LIMITED_API
38634
38637
  0, 0,
38635
38638
  #else
38636
- sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyLongObject),
38639
+ sizeof(PyLongObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyLongObject),
38637
38640
  #endif
38638
- __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(8, 8, __pyx_L1_error)
38641
+ __Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(8, 8, __pyx_L1_error)
38639
38642
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
38640
38643
  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(9, 9, __pyx_L1_error)
38641
38644
  __Pyx_GOTREF(__pyx_t_1);
38642
- __pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_2(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
38645
+ __pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_3(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
38643
38646
  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
38644
- sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyTypeObject),
38647
+ sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyTypeObject),
38645
38648
  #elif CYTHON_COMPILING_IN_LIMITED_API
38646
38649
  0, 0,
38647
38650
  #else
38648
- sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyHeapTypeObject),
38651
+ sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_3(PyHeapTypeObject),
38649
38652
  #endif
38650
- __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(9, 9, __pyx_L1_error)
38653
+ __Pyx_ImportType_CheckSize_Warn_3_1_3); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(9, 9, __pyx_L1_error)
38651
38654
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
38652
38655
  __Pyx_RefNannyFinishContext();
38653
38656
  return 0;
@@ -40752,16 +40755,15 @@ static int __Pyx_InitConstants(__pyx_mstatetype *__pyx_mstate) {
40752
40755
  return -1;
40753
40756
  }
40754
40757
  /* #### Code section: init_codeobjects ### */
40755
- \
40756
- typedef struct {
40757
- unsigned int argcount : 3;
40758
- unsigned int num_posonly_args : 1;
40759
- unsigned int num_kwonly_args : 1;
40760
- unsigned int nlocals : 4;
40761
- unsigned int flags : 10;
40762
- unsigned int first_line : 10;
40763
- unsigned int line_table_length : 13;
40764
- } __Pyx_PyCode_New_function_description;
40758
+ typedef struct {
40759
+ unsigned int argcount : 3;
40760
+ unsigned int num_posonly_args : 1;
40761
+ unsigned int num_kwonly_args : 1;
40762
+ unsigned int nlocals : 4;
40763
+ unsigned int flags : 10;
40764
+ unsigned int first_line : 10;
40765
+ unsigned int line_table_length : 13;
40766
+ } __Pyx_PyCode_New_function_description;
40765
40767
  /* NewCodeObj.proto */
40766
40768
  static PyObject* __Pyx_PyCode_New(
40767
40769
  const __Pyx_PyCode_New_function_description descr,
@@ -43842,6 +43844,13 @@ try_unpack:
43842
43844
 
43843
43845
  /* PyObjectCallMethod0 */
43844
43846
  static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) {
43847
+ #if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
43848
+ PyObject *args[1] = {obj};
43849
+ (void) __Pyx_PyObject_GetMethod;
43850
+ (void) __Pyx_PyObject_CallOneArg;
43851
+ (void) __Pyx_PyObject_CallNoArg;
43852
+ return PyObject_VectorcallMethod(method_name, args, 1 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
43853
+ #else
43845
43854
  PyObject *method = NULL, *result = NULL;
43846
43855
  int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method);
43847
43856
  if (likely(is_method)) {
@@ -43854,6 +43863,7 @@ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name
43854
43863
  Py_DECREF(method);
43855
43864
  bad:
43856
43865
  return result;
43866
+ #endif
43857
43867
  }
43858
43868
 
43859
43869
  /* RaiseNeedMoreValuesToUnpack */
@@ -44664,6 +44674,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44664
44674
  changed = 1;
44665
44675
  }
44666
44676
  #endif // CYTHON_METH_FASTCALL
44677
+ #if !CYTHON_COMPILING_IN_PYPY
44667
44678
  else if (strcmp(memb->name, "__module__") == 0) {
44668
44679
  PyObject *descr;
44669
44680
  assert(memb->type == T_OBJECT);
@@ -44678,11 +44689,13 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44678
44689
  }
44679
44690
  changed = 1;
44680
44691
  }
44692
+ #endif // !CYTHON_COMPILING_IN_PYPY
44681
44693
  }
44682
44694
  memb++;
44683
44695
  }
44684
44696
  }
44685
44697
  #endif // !CYTHON_COMPILING_IN_LIMITED_API
44698
+ #if !CYTHON_COMPILING_IN_PYPY
44686
44699
  slot = spec->slots;
44687
44700
  while (slot && slot->slot && slot->slot != Py_tp_getset)
44688
44701
  slot++;
@@ -44714,6 +44727,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject
44714
44727
  ++getset;
44715
44728
  }
44716
44729
  }
44730
+ #endif // !CYTHON_COMPILING_IN_PYPY
44717
44731
  if (changed)
44718
44732
  PyType_Modified(type);
44719
44733
  #endif // PY_VERSION_HEX > 0x030900B1
@@ -45147,10 +45161,10 @@ __PYX_GOOD:
45147
45161
  }
45148
45162
 
45149
45163
  /* TypeImport */
45150
- #ifndef __PYX_HAVE_RT_ImportType_3_1_2
45151
- #define __PYX_HAVE_RT_ImportType_3_1_2
45152
- static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module_name, const char *class_name,
45153
- size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_2 check_size)
45164
+ #ifndef __PYX_HAVE_RT_ImportType_3_1_3
45165
+ #define __PYX_HAVE_RT_ImportType_3_1_3
45166
+ static PyTypeObject *__Pyx_ImportType_3_1_3(PyObject *module, const char *module_name, const char *class_name,
45167
+ size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_3 check_size)
45154
45168
  {
45155
45169
  PyObject *result = 0;
45156
45170
  Py_ssize_t basicsize;
@@ -45206,7 +45220,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
45206
45220
  module_name, class_name, size, basicsize+itemsize);
45207
45221
  goto bad;
45208
45222
  }
45209
- if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_2 &&
45223
+ if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_3 &&
45210
45224
  ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) {
45211
45225
  PyErr_Format(PyExc_ValueError,
45212
45226
  "%.200s.%.200s size changed, may indicate binary incompatibility. "
@@ -45214,7 +45228,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module
45214
45228
  module_name, class_name, size, basicsize, basicsize+itemsize);
45215
45229
  goto bad;
45216
45230
  }
45217
- else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_2 && (size_t)basicsize > size) {
45231
+ else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_3 && (size_t)basicsize > size) {
45218
45232
  if (PyErr_WarnFormat(NULL, 0,
45219
45233
  "%.200s.%.200s size changed, may indicate binary incompatibility. "
45220
45234
  "Expected %zd from C header, got %zd from PyObject",
@@ -45355,7 +45369,7 @@ bad:
45355
45369
  }
45356
45370
 
45357
45371
  /* CommonTypesMetaclass */
45358
- PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
45372
+ static PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) {
45359
45373
  return PyUnicode_FromString(__PYX_ABI_MODULE_NAME);
45360
45374
  }
45361
45375
  static PyGetSetDef __pyx_CommonTypesMetaclass_getset[] = {
@@ -48284,7 +48298,7 @@ static CYTHON_INLINE PyObject *__Pyx_PyIter_Next_Plain(PyObject *iterator) {
48284
48298
  }
48285
48299
 
48286
48300
  /* PyObjectCallMethod1 */
48287
- #if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000)
48301
+ #if !(CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000)))
48288
48302
  static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
48289
48303
  PyObject *result = __Pyx_PyObject_CallOneArg(method, arg);
48290
48304
  Py_DECREF(method);
@@ -48292,7 +48306,7 @@ static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
48292
48306
  }
48293
48307
  #endif
48294
48308
  static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) {
48295
- #if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C0000
48309
+ #if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000))
48296
48310
  PyObject *args[2] = {obj, arg};
48297
48311
  (void) __Pyx_PyObject_GetMethod;
48298
48312
  (void) __Pyx_PyObject_CallOneArg;
Binary file
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: selectolax
3
+ Version: 0.3.34
4
+ Summary: Fast HTML5 parser with CSS selectors.
5
+ Home-page: https://github.com/rushter/selectolax
6
+ Author: Artem Golubin
7
+ Author-email: Artem Golubin <me@rushter.com>
8
+ License-Expression: MIT
9
+ Project-URL: Repository, https://github.com/rushter/selectolax
10
+ Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
+ Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.md
12
+ Keywords: selectolax,html,parser,css,fast
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Topic :: Text Processing :: Markup :: HTML
15
+ Classifier: Topic :: Internet
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Natural Language :: English
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/x-rst
27
+ License-File: LICENSE
28
+ Provides-Extra: cython
29
+ Requires-Dist: Cython; extra == "cython"
30
+ Dynamic: author
31
+ Dynamic: home-page
32
+ Dynamic: license-file
@@ -1,26 +1,26 @@
1
- selectolax/__init__.py,sha256=_qbVtaupjc0xeV8F8cU3lbDgckEoaUlhy2N013uLu4Y,157
1
+ selectolax/__init__.py,sha256=iI6pQ10gimevS2gTf4K4_1cXh4NBRFj_5GjkmhrvU94,157
2
2
  selectolax/base.pxi,sha256=zOj3BrCA71xd-mJFtkMIAglP4ZybfrHVoCoy6ljTBDQ,93
3
- selectolax/lexbor.c,sha256=K806oXPqw4y4LoljPDmj7d7weW0xG58gTRQ7UR1jNU8,2418331
4
- selectolax/lexbor.cp39-win_amd64.pyd,sha256=AItD7G2SeAdn1lH5tyZVS7V3aErclTri_3SOoZRCQTk,3148800
3
+ selectolax/lexbor.c,sha256=Kz7IFiUGbVTJvAH3WTwu188zD4xQm08Fs6ab6Jo6jyE,2419433
4
+ selectolax/lexbor.cp39-win_amd64.pyd,sha256=oyBtlI6N_kbB3PWwszUu2wMnj5QGzzkHfRszq7w2KLE,3149824
5
5
  selectolax/lexbor.pxd,sha256=BcqAzhlUVq0GVWiJHWXNhs4jY-gi6k0BELEnQtSYJAI,21720
6
- selectolax/lexbor.pyi,sha256=6oQU-9-LE_OAhwsETmynUYhCEE3lhxnEzj36TefQheo,28586
7
- selectolax/lexbor.pyx,sha256=mgP31DQHkRVAOvu0Eax29sCEq57OWUA51LoPOAFZ6VU,12691
8
- selectolax/parser.c,sha256=PABrCk3JLEVb3nf02FtAplokuPQDSFVuuoV54MBKu1s,2258494
9
- selectolax/parser.cp39-win_amd64.pyd,sha256=4Fojc7eCok0WAfkHNuPujh0_mq3Dnc3X0gWcQKk8h6U,2106368
6
+ selectolax/lexbor.pyi,sha256=dRNzLXJEbFRR7QcItuX8Ews9E9I6h6G4vA3X1hijzj4,28990
7
+ selectolax/lexbor.pyx,sha256=XLZ2vGwLoWdctnmU-gfizjD6tMjehR_bzNOapDJ_YOQ,12891
8
+ selectolax/parser.c,sha256=zUJAqFbI1vy5-cjgPwJVfYassgbP7Gdnr2eRYv5D3W4,2259231
9
+ selectolax/parser.cp39-win_amd64.pyd,sha256=ewom3_ckmFHulhsszlIczbpzbw6m4lBNRnitepuNkJE,2105856
10
10
  selectolax/parser.pxd,sha256=T7GoQdaOkhp_W2TBlRY0tZqom97PkHrytYaXQlyVnbI,25196
11
11
  selectolax/parser.pyi,sha256=-qutpjrK1dD4rrl3SsHWQt2FT5lv6meaACkQzk1Bt6o,25612
12
12
  selectolax/parser.pyx,sha256=nIWuhaEFRwlfo64WmgrSOM0A8mUw0eWw9j_fWyLV-Ro,14127
13
13
  selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  selectolax/utils.pxi,sha256=hkzKfycdpwH1P-E_pP-9NTGsmiajt6EJNZSlkxlRapA,3623
15
15
  selectolax/lexbor/attrs.pxi,sha256=d59V77aGkpp7YsYsd6t_z4-tRnUoQTJZKsvMC8nyttM,3978
16
- selectolax/lexbor/node.pxi,sha256=aOH-pr0DtXOZ63YyfVVd3pppXL3mAbuugNT9HhKvoE0,33205
16
+ selectolax/lexbor/node.pxi,sha256=KODqPk3yZ_owwdSxqNr2Ih6qAOhu9CJ-jrHtqQJcWmY,33407
17
17
  selectolax/lexbor/selection.pxi,sha256=BeUDypw5_P0CTmi-ACLcd7pK2NnG9ASrwWOdLdweAZY,7378
18
18
  selectolax/lexbor/util.pxi,sha256=q2EYVNdnROg9y30mWpGwlNA0W00nJ7ZRNEEDrOEG14s,584
19
19
  selectolax/modest/node.pxi,sha256=iX_yRPIPVkG0ALW7hEfmXiVperw6RjkSGATkxzLokz0,34691
20
20
  selectolax/modest/selection.pxi,sha256=PfHUN1uuNA7YfcxTu7JZjhxevVbFRP1bHd3kyyFdO7E,6703
21
21
  selectolax/modest/util.pxi,sha256=zab67Wzo8FcipA2VS8ClptaC19lZirbNqFEGQ3hW2Is,572
22
- selectolax-0.3.33.dist-info/licenses/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
23
- selectolax-0.3.33.dist-info/METADATA,sha256=HmxKX5O__XxvqspHStldQCxGb22RSRndWDtpxBSntl8,6402
24
- selectolax-0.3.33.dist-info/WHEEL,sha256=XkFE14KmFh7mutkkb-qn_ueuH2lwfT8rLdfc5xpQ7wE,99
25
- selectolax-0.3.33.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
26
- selectolax-0.3.33.dist-info/RECORD,,
22
+ selectolax-0.3.34.dist-info/licenses/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
23
+ selectolax-0.3.34.dist-info/METADATA,sha256=rAqskRB9wMSn7tEZLxZswUJD4wFzN4fizyXjiBv4L4o,1318
24
+ selectolax-0.3.34.dist-info/WHEEL,sha256=XkFE14KmFh7mutkkb-qn_ueuH2lwfT8rLdfc5xpQ7wE,99
25
+ selectolax-0.3.34.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
26
+ selectolax-0.3.34.dist-info/RECORD,,
@@ -1,187 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: selectolax
3
- Version: 0.3.33
4
- Summary: Fast HTML5 parser with CSS selectors.
5
- Home-page: https://github.com/rushter/selectolax
6
- Author: Artem Golubin
7
- Author-email: Artem Golubin <me@rushter.com>
8
- License: MIT
9
- Project-URL: Repository, https://github.com/rushter/selectolax
10
- Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
- Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.rst
12
- Keywords: selectolax,html,parser,css,fast
13
- Classifier: Development Status :: 5 - Production/Stable
14
- Classifier: Topic :: Text Processing :: Markup :: HTML
15
- Classifier: Topic :: Internet
16
- Classifier: Topic :: Internet :: WWW/HTTP
17
- Classifier: Intended Audience :: Developers
18
- Classifier: Natural Language :: English
19
- Classifier: Programming Language :: Python :: 3
20
- Classifier: Programming Language :: Python :: 3.9
21
- Classifier: Programming Language :: Python :: 3.10
22
- Classifier: Programming Language :: Python :: 3.11
23
- Classifier: Programming Language :: Python :: 3.12
24
- Classifier: Programming Language :: Python :: 3.13
25
- Requires-Python: >=3.9
26
- Description-Content-Type: text/x-rst
27
- License-File: LICENSE
28
- Provides-Extra: cython
29
- Requires-Dist: Cython; extra == "cython"
30
- Dynamic: author
31
- Dynamic: home-page
32
- Dynamic: license-file
33
-
34
- .. image:: docs/logo.png
35
- :alt: selectolax logo
36
-
37
- -------------------------
38
-
39
- .. image:: https://img.shields.io/pypi/v/selectolax.svg
40
- :target: https://pypi.python.org/pypi/selectolax
41
-
42
- A fast HTML5 parser with CSS selectors using `Modest <https://github.com/lexborisov/Modest/>`_ and
43
- `Lexbor <https://github.com/lexbor/lexbor>`_ engines.
44
-
45
-
46
- Installation
47
- ------------
48
- From PyPI using pip:
49
-
50
- .. code-block:: bash
51
-
52
- pip install selectolax
53
-
54
- If installation fails due to compilation errors, you may need to install `Cython <https://github.com/cython/cython>`_:
55
-
56
- .. code-block:: bash
57
-
58
- pip install selectolax[cython]
59
-
60
- This usually happens when you try to install an outdated version of selectolax on a newer version of Python.
61
-
62
-
63
- Development version from GitHub:
64
-
65
- .. code-block:: bash
66
-
67
- git clone --recursive https://github.com/rushter/selectolax
68
- cd selectolax
69
- pip install -r requirements_dev.txt
70
- python setup.py install
71
-
72
- How to compile selectolax while developing:
73
-
74
- .. code-block:: bash
75
-
76
- make clean
77
- make dev
78
-
79
- Basic examples
80
- --------------
81
-
82
- Here are some basic examples to get you started with selectolax:
83
-
84
- Parsing HTML and extracting text:
85
-
86
- .. code:: python
87
-
88
- In [1]: from selectolax.parser import HTMLParser
89
- ...:
90
- ...: html = """
91
- ...: <h1 id="title" data-updated="20201101">Hi there</h1>
92
- ...: <div class="post">Lorem Ipsum is simply dummy text of the printing and typesetting industry. </div>
93
- ...: <div class="post">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div>
94
- ...: """
95
- ...: tree = HTMLParser(html)
96
-
97
- In [2]: tree.css_first('h1#title').text()
98
- Out[2]: 'Hi there'
99
-
100
- In [3]: tree.css_first('h1#title').attributes
101
- Out[3]: {'id': 'title', 'data-updated': '20201101'}
102
-
103
- In [4]: [node.text() for node in tree.css('.post')]
104
- Out[4]:
105
- ['Lorem Ipsum is simply dummy text of the printing and typesetting industry. ',
106
- 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.']
107
-
108
- Using advanced CSS selectors:
109
-
110
- .. code:: python
111
-
112
- In [1]: html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5>text<p id=p6></div>"
113
- ...: selector = "div > :nth-child(2n+1):not(:has(a))"
114
-
115
- In [2]: for node in HTMLParser(html).css(selector):
116
- ...: print(node.attributes, node.text(), node.tag)
117
- ...: print(node.parent.tag)
118
- ...: print(node.html)
119
- ...:
120
- {'id': 'p1'} p
121
- div
122
- <p id="p1"></p>
123
- {'id': 'p5'} text p
124
- div
125
- <p id="p5">text</p>
126
-
127
-
128
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
129
-
130
- Available backends
131
- ------------------
132
-
133
- Selectolax supports two backends: ``Modest`` and ``Lexbor``. By default, all examples use the Modest backend.
134
- Most of the features between backends are almost identical, but there are still some differences.
135
-
136
- As of 2024, the preferred backend is ``Lexbor``. The ``Modest`` backend is still available for compatibility reasons
137
- and the underlying C library that selectolax uses is not maintained anymore.
138
-
139
-
140
- To use ``lexbor``, just import the parser and use it in the similar way to the `HTMLParser`.
141
-
142
- .. code:: python
143
-
144
- In [1]: from selectolax.lexbor import LexborHTMLParser
145
-
146
- In [2]: html = """
147
- ...: <title>Hi there</title>
148
- ...: <div id="updated">2021-08-15</div>
149
- ...: """
150
-
151
- In [3]: parser = LexborHTMLParser(html)
152
- In [4]: parser.root.css_first("#updated").text()
153
- Out[4]: '2021-08-15'
154
-
155
-
156
- Simple Benchmark
157
- ----------------
158
-
159
- * Extract title, links, scripts and a meta tag from main pages of top 754 domains. See ``examples/benchmark.py`` for more information.
160
-
161
- ============================ ===========
162
- Package Time
163
- ============================ ===========
164
- Beautiful Soup (html.parser) 61.02 sec.
165
- lxml / Beautiful Soup (lxml) 9.09 sec.
166
- html5_parser 16.10 sec.
167
- selectolax (Modest) 2.94 sec.
168
- selectolax (Lexbor) 2.39 sec.
169
- ============================ ===========
170
-
171
- Links
172
- -----
173
-
174
- * `selectolax API reference <https://selectolax.readthedocs.io/en/latest/index.html>`_
175
- * `Video introduction to web scraping using selectolax <https://youtu.be/HpRsfpPuUzE>`_
176
- * `How to Scrape 7k Products with Python using selectolax and httpx <https://www.youtube.com/watch?v=XpGvq755J2U>`_
177
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
178
- * `Modest introduction <https://lexborisov.github.io/Modest/>`_
179
- * `Modest benchmark <https://lexborisov.github.io/benchmark-html-parsers/>`_
180
- * `Python benchmark <https://rushter.com/blog/python-fast-html-parser/>`_
181
- * `Another Python benchmark <https://www.peterbe.com/plog/selectolax-or-pyquery>`_
182
-
183
- License
184
- -------
185
-
186
- * Modest engine — `LGPL2.1 <https://github.com/lexborisov/Modest/blob/master/LICENSE>`_
187
- * selectolax - `MIT <https://github.com/rushter/selectolax/blob/master/LICENSE>`_