selectolax 0.3.33__cp312-cp312-macosx_11_0_arm64.whl → 0.4.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

Binary file
selectolax/parser.pyi CHANGED
@@ -203,7 +203,10 @@ class Node:
203
203
  ...
204
204
  @property
205
205
  def child(self) -> Node | None:
206
- """Return the child node."""
206
+ """Alias for the `first_child` property.
207
+
208
+ **Deprecated**. Please use `first_child` instead.
209
+ """
207
210
  ...
208
211
  @property
209
212
  def parent(self) -> Node | None:
@@ -588,7 +591,7 @@ class HTMLParser:
588
591
  query : str
589
592
  default : bool, default None
590
593
  Default value to return if there is no match.
591
- strict: bool, default True
594
+ strict: bool, default False
592
595
  Set to True if you want to check if there is strictly only one match in the document.
593
596
 
594
597
 
selectolax/parser.pyx CHANGED
@@ -72,9 +72,9 @@ cdef class HTMLParser:
72
72
  ----------
73
73
 
74
74
  query : str
75
- default : bool, default None
75
+ default : Any, default None
76
76
  Default value to return if there is no match.
77
- strict: bool, default True
77
+ strict: bool, default False
78
78
  Set to True if you want to check if there is strictly only one match in the document.
79
79
 
80
80
 
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: selectolax
3
+ Version: 0.4.0
4
+ Summary: Fast HTML5 parser with CSS selectors.
5
+ Home-page: https://github.com/rushter/selectolax
6
+ Author: Artem Golubin
7
+ Author-email: Artem Golubin <me@rushter.com>
8
+ License-Expression: MIT
9
+ Project-URL: Repository, https://github.com/rushter/selectolax
10
+ Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
+ Project-URL: Changelog, https://github.com/rushter/selectolax/blob/master/CHANGES.md
12
+ Keywords: selectolax,html,parser,css,fast
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Topic :: Text Processing :: Markup :: HTML
15
+ Classifier: Topic :: Internet
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Natural Language :: English
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/x-rst
27
+ License-File: LICENSE
28
+ Provides-Extra: cython
29
+ Requires-Dist: Cython; extra == "cython"
30
+ Dynamic: author
31
+ Dynamic: home-page
32
+ Dynamic: license-file
@@ -0,0 +1,27 @@
1
+ selectolax-0.4.0.dist-info/RECORD,,
2
+ selectolax-0.4.0.dist-info/WHEEL,sha256=V1loQ6TpxABu1APUg0MoTRBOzSKT5xVc3skizX-ovCU,136
3
+ selectolax-0.4.0.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
4
+ selectolax-0.4.0.dist-info/METADATA,sha256=xFNpvUeepMxneDCjZoP5kTv4nBQccf5_wyWieI3BFnY,1287
5
+ selectolax-0.4.0.dist-info/licenses/LICENSE,sha256=MYCcM-Cv_rC2-lQiwDumin0E-rMXAhK-qIGGA29434Y,1077
6
+ selectolax/lexbor.pyi,sha256=MDQ4YQWcywG3oeSITWifMkCsa09MmPbyXMQq06wqwAY,30092
7
+ selectolax/parser.pyx,sha256=sBq2_HR83Ek8yqnFBYrG2xBBCM4S6Jiiie_xq5O_twE,13684
8
+ selectolax/__init__.py,sha256=CnY6a5BeJexKaFN_b2L28F5AVD1jPM1lFz9kfS6RC5w,148
9
+ selectolax/lexbor.pxd,sha256=cAitQeHgGxp5Aac-o5aaOyg6_IiOpp3Rg0JNlz8Cstk,21652
10
+ selectolax/lexbor.pyx,sha256=-QsF8Ru8DvWEEy3AIjXDdoKTG5saocX-HkTE_feS6tQ,13468
11
+ selectolax/parser.pyi,sha256=qi9AHy_DWalANSOVTN6gbtbf-YJZCfE7i12aWJbrUp8,24929
12
+ selectolax/utils.pxi,sha256=_g-ZLprPgbqv7BLs-WEe8IhbDd_QTcfirz_NEyR1Yww,3506
13
+ selectolax/lexbor.cpython-312-darwin.so,sha256=CVrn2QYhMAfOUUuZOD6OLa7pfYBBMdIHKo7IHfnuOu4,3623072
14
+ selectolax/lexbor.c,sha256=lSvE0yZPdmEMord00GqpDiLYKz8_iZriiTy3TwO86Cc,2445661
15
+ selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ selectolax/parser.c,sha256=V2KgVlllg6gKjnW99qB4snuKg0XYpS2f2YDMsoaFLt0,2210174
17
+ selectolax/parser.cpython-312-darwin.so,sha256=dMAbSK0KJAl1t-Mt0OQ7HakBZriAUCH3dTsItwh0OB4,2872576
18
+ selectolax/base.pxi,sha256=eiPKlY9gG3l49qJoRQVLl1Ljza6z1k0A-met6sDPcqE,89
19
+ selectolax/parser.pxd,sha256=BQSlDGibVfqFDhfqX6l5sBnfkHEETxlj-eSpGWERKEs,24618
20
+ selectolax/modest/selection.pxi,sha256=m4GDpl0aI7lSWHFeBBheroUKDrZgJcc6uVubtzrXL1M,6508
21
+ selectolax/modest/util.pxi,sha256=di9cLmAyuGFXmiuptZ7Fz1SgkCf7hmiZLnpKCKEKsUc,552
22
+ selectolax/modest/node.pxi,sha256=l0aQf2Ojpzxh-L-0KxLetG7uGgGhkV7Cqgfy8O_5ch4,33786
23
+ selectolax/lexbor/selection.pxi,sha256=ZJ5ed7YgxvcsOW_qPbMhUQRKgChl9cih1n1d5elfTZ8,8030
24
+ selectolax/lexbor/util.pxi,sha256=hqMQU1O_5O82ThjUzk8NxQPl-Kg29DDGFFpC46LcejI,564
25
+ selectolax/lexbor/node.pxi,sha256=Sj5Kx_I2vBarZRNrYhPk2TufhEYYNlV9wnSbLACyZMQ,35311
26
+ selectolax/lexbor/node_remove.pxi,sha256=iqJ2PPNvQmK2dq8kJLXiZawoGf1Az3MpbrlQI6k4jDM,760
27
+ selectolax/lexbor/attrs.pxi,sha256=eH90zJYHicffTzC7peIitHkOqyIw3xzomhJHxJv9hP8,3858
@@ -1,187 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: selectolax
3
- Version: 0.3.33
4
- Summary: Fast HTML5 parser with CSS selectors.
5
- Home-page: https://github.com/rushter/selectolax
6
- Author: Artem Golubin
7
- Author-email: Artem Golubin <me@rushter.com>
8
- License: MIT
9
- Project-URL: Repository, https://github.com/rushter/selectolax
10
- Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
- Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.rst
12
- Keywords: selectolax,html,parser,css,fast
13
- Classifier: Development Status :: 5 - Production/Stable
14
- Classifier: Topic :: Text Processing :: Markup :: HTML
15
- Classifier: Topic :: Internet
16
- Classifier: Topic :: Internet :: WWW/HTTP
17
- Classifier: Intended Audience :: Developers
18
- Classifier: Natural Language :: English
19
- Classifier: Programming Language :: Python :: 3
20
- Classifier: Programming Language :: Python :: 3.9
21
- Classifier: Programming Language :: Python :: 3.10
22
- Classifier: Programming Language :: Python :: 3.11
23
- Classifier: Programming Language :: Python :: 3.12
24
- Classifier: Programming Language :: Python :: 3.13
25
- Requires-Python: >=3.9
26
- Description-Content-Type: text/x-rst
27
- License-File: LICENSE
28
- Provides-Extra: cython
29
- Requires-Dist: Cython; extra == "cython"
30
- Dynamic: author
31
- Dynamic: home-page
32
- Dynamic: license-file
33
-
34
- .. image:: docs/logo.png
35
- :alt: selectolax logo
36
-
37
- -------------------------
38
-
39
- .. image:: https://img.shields.io/pypi/v/selectolax.svg
40
- :target: https://pypi.python.org/pypi/selectolax
41
-
42
- A fast HTML5 parser with CSS selectors using `Modest <https://github.com/lexborisov/Modest/>`_ and
43
- `Lexbor <https://github.com/lexbor/lexbor>`_ engines.
44
-
45
-
46
- Installation
47
- ------------
48
- From PyPI using pip:
49
-
50
- .. code-block:: bash
51
-
52
- pip install selectolax
53
-
54
- If installation fails due to compilation errors, you may need to install `Cython <https://github.com/cython/cython>`_:
55
-
56
- .. code-block:: bash
57
-
58
- pip install selectolax[cython]
59
-
60
- This usually happens when you try to install an outdated version of selectolax on a newer version of Python.
61
-
62
-
63
- Development version from GitHub:
64
-
65
- .. code-block:: bash
66
-
67
- git clone --recursive https://github.com/rushter/selectolax
68
- cd selectolax
69
- pip install -r requirements_dev.txt
70
- python setup.py install
71
-
72
- How to compile selectolax while developing:
73
-
74
- .. code-block:: bash
75
-
76
- make clean
77
- make dev
78
-
79
- Basic examples
80
- --------------
81
-
82
- Here are some basic examples to get you started with selectolax:
83
-
84
- Parsing HTML and extracting text:
85
-
86
- .. code:: python
87
-
88
- In [1]: from selectolax.parser import HTMLParser
89
- ...:
90
- ...: html = """
91
- ...: <h1 id="title" data-updated="20201101">Hi there</h1>
92
- ...: <div class="post">Lorem Ipsum is simply dummy text of the printing and typesetting industry. </div>
93
- ...: <div class="post">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div>
94
- ...: """
95
- ...: tree = HTMLParser(html)
96
-
97
- In [2]: tree.css_first('h1#title').text()
98
- Out[2]: 'Hi there'
99
-
100
- In [3]: tree.css_first('h1#title').attributes
101
- Out[3]: {'id': 'title', 'data-updated': '20201101'}
102
-
103
- In [4]: [node.text() for node in tree.css('.post')]
104
- Out[4]:
105
- ['Lorem Ipsum is simply dummy text of the printing and typesetting industry. ',
106
- 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.']
107
-
108
- Using advanced CSS selectors:
109
-
110
- .. code:: python
111
-
112
- In [1]: html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5>text<p id=p6></div>"
113
- ...: selector = "div > :nth-child(2n+1):not(:has(a))"
114
-
115
- In [2]: for node in HTMLParser(html).css(selector):
116
- ...: print(node.attributes, node.text(), node.tag)
117
- ...: print(node.parent.tag)
118
- ...: print(node.html)
119
- ...:
120
- {'id': 'p1'} p
121
- div
122
- <p id="p1"></p>
123
- {'id': 'p5'} text p
124
- div
125
- <p id="p5">text</p>
126
-
127
-
128
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
129
-
130
- Available backends
131
- ------------------
132
-
133
- Selectolax supports two backends: ``Modest`` and ``Lexbor``. By default, all examples use the Modest backend.
134
- Most of the features between backends are almost identical, but there are still some differences.
135
-
136
- As of 2024, the preferred backend is ``Lexbor``. The ``Modest`` backend is still available for compatibility reasons
137
- and the underlying C library that selectolax uses is not maintained anymore.
138
-
139
-
140
- To use ``lexbor``, just import the parser and use it in the similar way to the `HTMLParser`.
141
-
142
- .. code:: python
143
-
144
- In [1]: from selectolax.lexbor import LexborHTMLParser
145
-
146
- In [2]: html = """
147
- ...: <title>Hi there</title>
148
- ...: <div id="updated">2021-08-15</div>
149
- ...: """
150
-
151
- In [3]: parser = LexborHTMLParser(html)
152
- In [4]: parser.root.css_first("#updated").text()
153
- Out[4]: '2021-08-15'
154
-
155
-
156
- Simple Benchmark
157
- ----------------
158
-
159
- * Extract title, links, scripts and a meta tag from main pages of top 754 domains. See ``examples/benchmark.py`` for more information.
160
-
161
- ============================ ===========
162
- Package Time
163
- ============================ ===========
164
- Beautiful Soup (html.parser) 61.02 sec.
165
- lxml / Beautiful Soup (lxml) 9.09 sec.
166
- html5_parser 16.10 sec.
167
- selectolax (Modest) 2.94 sec.
168
- selectolax (Lexbor) 2.39 sec.
169
- ============================ ===========
170
-
171
- Links
172
- -----
173
-
174
- * `selectolax API reference <https://selectolax.readthedocs.io/en/latest/index.html>`_
175
- * `Video introduction to web scraping using selectolax <https://youtu.be/HpRsfpPuUzE>`_
176
- * `How to Scrape 7k Products with Python using selectolax and httpx <https://www.youtube.com/watch?v=XpGvq755J2U>`_
177
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
178
- * `Modest introduction <https://lexborisov.github.io/Modest/>`_
179
- * `Modest benchmark <https://lexborisov.github.io/benchmark-html-parsers/>`_
180
- * `Python benchmark <https://rushter.com/blog/python-fast-html-parser/>`_
181
- * `Another Python benchmark <https://www.peterbe.com/plog/selectolax-or-pyquery>`_
182
-
183
- License
184
- -------
185
-
186
- * Modest engine — `LGPL2.1 <https://github.com/lexborisov/Modest/blob/master/LICENSE>`_
187
- * selectolax - `MIT <https://github.com/rushter/selectolax/blob/master/LICENSE>`_
@@ -1,26 +0,0 @@
1
- selectolax/lexbor.pyi,sha256=CX_14MMiljOLZcoRVy96bEmGbqLTXTzSv12flrVzGDg,27721
2
- selectolax/parser.pyx,sha256=i2bCslGrqYARZvCab4mIdRFyA9k7AuvJSVjZsqrT4SM,13684
3
- selectolax/__init__.py,sha256=MLVYxSOb4b5Rkz6I8O35L8iUn-Z691F1THazWpmf-iw,149
4
- selectolax/lexbor.pxd,sha256=FTx2SphXsIHCCmEouzGSp9j2CBgrOc12Wf9E7DPIbrI,21144
5
- selectolax/lexbor.pyx,sha256=g_6kYxAlqCOsMoihwOuRVgyf-F-TIbgxksHYPNlNE3s,12308
6
- selectolax/parser.pyi,sha256=tCCAWcaeBJrhctUKpdTKVX6z4_tsxPz8D5GpbxO3Hz0,24845
7
- selectolax/utils.pxi,sha256=_g-ZLprPgbqv7BLs-WEe8IhbDd_QTcfirz_NEyR1Yww,3506
8
- selectolax/lexbor.cpython-312-darwin.so,sha256=odl7lLRIChnazm0n1uFf0u_5QiAdMdwln58w6EwlbZA,3755648
9
- selectolax/lexbor.c,sha256=O5VPB3zfyb-QP557RGxvP6j--S1tJX25JBEIsdgV8cg,2364654
10
- selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- selectolax/parser.c,sha256=SMGGm8vfvCFgwdxab4GgZwZYJguoEsFY3BhHtuFCCT4,2208079
12
- selectolax/parser.cpython-312-darwin.so,sha256=XKrY-z0BAWNNqDwyPTP0261kwsf8TRt8UnD2OPMxyJk,3066656
13
- selectolax/base.pxi,sha256=eiPKlY9gG3l49qJoRQVLl1Ljza6z1k0A-met6sDPcqE,89
14
- selectolax/parser.pxd,sha256=BQSlDGibVfqFDhfqX6l5sBnfkHEETxlj-eSpGWERKEs,24618
15
- selectolax/modest/selection.pxi,sha256=m4GDpl0aI7lSWHFeBBheroUKDrZgJcc6uVubtzrXL1M,6508
16
- selectolax/modest/util.pxi,sha256=di9cLmAyuGFXmiuptZ7Fz1SgkCf7hmiZLnpKCKEKsUc,552
17
- selectolax/modest/node.pxi,sha256=9D-o4Kdd-e_ebc-z8eAVxlVDdmfN5g2fUn6rj4sUSxg,33703
18
- selectolax/lexbor/selection.pxi,sha256=aYna6zEH9vBGxtcYe8AdOGseEh4ieymFt3Zu4cQssTs,7185
19
- selectolax/lexbor/util.pxi,sha256=hqMQU1O_5O82ThjUzk8NxQPl-Kg29DDGFFpC46LcejI,564
20
- selectolax/lexbor/node.pxi,sha256=1lpb3t-lGSeGMBkCT3a0IML-MonNJzTKDgiOWPfPpbA,32271
21
- selectolax/lexbor/attrs.pxi,sha256=eH90zJYHicffTzC7peIitHkOqyIw3xzomhJHxJv9hP8,3858
22
- selectolax-0.3.33.dist-info/RECORD,,
23
- selectolax-0.3.33.dist-info/WHEEL,sha256=V1loQ6TpxABu1APUg0MoTRBOzSKT5xVc3skizX-ovCU,136
24
- selectolax-0.3.33.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
25
- selectolax-0.3.33.dist-info/METADATA,sha256=sQZmiyZ9oiWmyUNzYHd_cnkmVPjk7cK8ef0Wyyf7h_A,6215
26
- selectolax-0.3.33.dist-info/licenses/LICENSE,sha256=MYCcM-Cv_rC2-lQiwDumin0E-rMXAhK-qIGGA29434Y,1077