selectolax 0.3.33__cp311-cp311-win_amd64.whl → 0.4.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

Binary file
selectolax/parser.pyi CHANGED
@@ -203,7 +203,10 @@ class Node:
203
203
  ...
204
204
  @property
205
205
  def child(self) -> Node | None:
206
- """Return the child node."""
206
+ """Alias for the `first_child` property.
207
+
208
+ **Deprecated**. Please use `first_child` instead.
209
+ """
207
210
  ...
208
211
  @property
209
212
  def parent(self) -> Node | None:
@@ -588,7 +591,7 @@ class HTMLParser:
588
591
  query : str
589
592
  default : bool, default None
590
593
  Default value to return if there is no match.
591
- strict: bool, default True
594
+ strict: bool, default False
592
595
  Set to True if you want to check if there is strictly only one match in the document.
593
596
 
594
597
 
selectolax/parser.pyx CHANGED
@@ -72,9 +72,9 @@ cdef class HTMLParser:
72
72
  ----------
73
73
 
74
74
  query : str
75
- default : bool, default None
75
+ default : Any, default None
76
76
  Default value to return if there is no match.
77
- strict: bool, default True
77
+ strict: bool, default False
78
78
  Set to True if you want to check if there is strictly only one match in the document.
79
79
 
80
80
 
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: selectolax
3
+ Version: 0.4.0
4
+ Summary: Fast HTML5 parser with CSS selectors.
5
+ Home-page: https://github.com/rushter/selectolax
6
+ Author: Artem Golubin
7
+ Author-email: Artem Golubin <me@rushter.com>
8
+ License-Expression: MIT
9
+ Project-URL: Repository, https://github.com/rushter/selectolax
10
+ Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
+ Project-URL: Changelog, https://github.com/rushter/selectolax/blob/master/CHANGES.md
12
+ Keywords: selectolax,html,parser,css,fast
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Topic :: Text Processing :: Markup :: HTML
15
+ Classifier: Topic :: Internet
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Natural Language :: English
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/x-rst
27
+ License-File: LICENSE
28
+ Provides-Extra: cython
29
+ Requires-Dist: Cython; extra == "cython"
30
+ Dynamic: author
31
+ Dynamic: home-page
32
+ Dynamic: license-file
@@ -0,0 +1,27 @@
1
+ selectolax/__init__.py,sha256=-050n6lsikxxW1ZltsXOBwrx_z-ULnFcxLZ0xTjoJQg,156
2
+ selectolax/base.pxi,sha256=zOj3BrCA71xd-mJFtkMIAglP4ZybfrHVoCoy6ljTBDQ,93
3
+ selectolax/lexbor.c,sha256=6syPUDLP3Kua9c9D13ahscLTX2LmSbru1gzqcTpwDVs,2501237
4
+ selectolax/lexbor.cp311-win_amd64.pyd,sha256=JyfVbOUZzC_lLyFMcJ-7AejJWXpTJimVbfqFxXrzyBU,3186176
5
+ selectolax/lexbor.pxd,sha256=jZugG2ClrbWzJohWWeqblIVmbvFHkbX5WB-Wvc8tkn4,22236
6
+ selectolax/lexbor.pyi,sha256=hu1HMzR3mWfraGM-XNyiX3eHU_xljVEPg0AoRF27a_w,31046
7
+ selectolax/lexbor.pyx,sha256=P5Q3it_Apj3rFhywvpSlcZXofOhuq9upHzbOAEDBcuA,13892
8
+ selectolax/parser.c,sha256=gWdDUJkfF1K75R15HA17AWaV0JF2jSXM5zYEa2WWVIk,2260668
9
+ selectolax/parser.cp311-win_amd64.pyd,sha256=XmUeV74Xr6JfABdbnIhYXX80Jj8h3Mnn5ErWcl2mB3A,2107392
10
+ selectolax/parser.pxd,sha256=T7GoQdaOkhp_W2TBlRY0tZqom97PkHrytYaXQlyVnbI,25196
11
+ selectolax/parser.pyi,sha256=wO1pkE5efgtWLGt0ULEqHEZ_XyxcBy_YWzFZmU5mJdQ,25699
12
+ selectolax/parser.pyx,sha256=wy6GHgv__oa5gt1kQYGJGNc4cwVIU1NdJUJ8O4FKqmI,14127
13
+ selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ selectolax/utils.pxi,sha256=hkzKfycdpwH1P-E_pP-9NTGsmiajt6EJNZSlkxlRapA,3623
15
+ selectolax/lexbor/attrs.pxi,sha256=d59V77aGkpp7YsYsd6t_z4-tRnUoQTJZKsvMC8nyttM,3978
16
+ selectolax/lexbor/node.pxi,sha256=98M-9bRzKfnbHn0-vT1v3fSUrXqojpPK2yK4FI2BEJk,36345
17
+ selectolax/lexbor/node_remove.pxi,sha256=6h7KwWCxkLWtl71VROlLDA6EukHK745iyKK-uNw78fI,789
18
+ selectolax/lexbor/selection.pxi,sha256=H2-oPgLAEBwhwXMmk_-49YKah5z5hGgwBK0gciQtBak,8245
19
+ selectolax/lexbor/util.pxi,sha256=q2EYVNdnROg9y30mWpGwlNA0W00nJ7ZRNEEDrOEG14s,584
20
+ selectolax/modest/node.pxi,sha256=M52G3TlJrHIErR8CLqIXWUjSlNSCanjSJ9aFKn1p74Y,34777
21
+ selectolax/modest/selection.pxi,sha256=PfHUN1uuNA7YfcxTu7JZjhxevVbFRP1bHd3kyyFdO7E,6703
22
+ selectolax/modest/util.pxi,sha256=zab67Wzo8FcipA2VS8ClptaC19lZirbNqFEGQ3hW2Is,572
23
+ selectolax-0.4.0.dist-info/licenses/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
24
+ selectolax-0.4.0.dist-info/METADATA,sha256=tbBfJ2_fHMj95Pm8aazPr0U2Jvon511S9bn5FKDMWSs,1319
25
+ selectolax-0.4.0.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101
26
+ selectolax-0.4.0.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
27
+ selectolax-0.4.0.dist-info/RECORD,,
@@ -1,187 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: selectolax
3
- Version: 0.3.33
4
- Summary: Fast HTML5 parser with CSS selectors.
5
- Home-page: https://github.com/rushter/selectolax
6
- Author: Artem Golubin
7
- Author-email: Artem Golubin <me@rushter.com>
8
- License: MIT
9
- Project-URL: Repository, https://github.com/rushter/selectolax
10
- Project-URL: Documentation, https://selectolax.readthedocs.io/en/latest/parser.html
11
- Project-URL: Changelog, https://github.com/rushter/selectolax/blob/main/CHANGES.rst
12
- Keywords: selectolax,html,parser,css,fast
13
- Classifier: Development Status :: 5 - Production/Stable
14
- Classifier: Topic :: Text Processing :: Markup :: HTML
15
- Classifier: Topic :: Internet
16
- Classifier: Topic :: Internet :: WWW/HTTP
17
- Classifier: Intended Audience :: Developers
18
- Classifier: Natural Language :: English
19
- Classifier: Programming Language :: Python :: 3
20
- Classifier: Programming Language :: Python :: 3.9
21
- Classifier: Programming Language :: Python :: 3.10
22
- Classifier: Programming Language :: Python :: 3.11
23
- Classifier: Programming Language :: Python :: 3.12
24
- Classifier: Programming Language :: Python :: 3.13
25
- Requires-Python: >=3.9
26
- Description-Content-Type: text/x-rst
27
- License-File: LICENSE
28
- Provides-Extra: cython
29
- Requires-Dist: Cython; extra == "cython"
30
- Dynamic: author
31
- Dynamic: home-page
32
- Dynamic: license-file
33
-
34
- .. image:: docs/logo.png
35
- :alt: selectolax logo
36
-
37
- -------------------------
38
-
39
- .. image:: https://img.shields.io/pypi/v/selectolax.svg
40
- :target: https://pypi.python.org/pypi/selectolax
41
-
42
- A fast HTML5 parser with CSS selectors using `Modest <https://github.com/lexborisov/Modest/>`_ and
43
- `Lexbor <https://github.com/lexbor/lexbor>`_ engines.
44
-
45
-
46
- Installation
47
- ------------
48
- From PyPI using pip:
49
-
50
- .. code-block:: bash
51
-
52
- pip install selectolax
53
-
54
- If installation fails due to compilation errors, you may need to install `Cython <https://github.com/cython/cython>`_:
55
-
56
- .. code-block:: bash
57
-
58
- pip install selectolax[cython]
59
-
60
- This usually happens when you try to install an outdated version of selectolax on a newer version of Python.
61
-
62
-
63
- Development version from GitHub:
64
-
65
- .. code-block:: bash
66
-
67
- git clone --recursive https://github.com/rushter/selectolax
68
- cd selectolax
69
- pip install -r requirements_dev.txt
70
- python setup.py install
71
-
72
- How to compile selectolax while developing:
73
-
74
- .. code-block:: bash
75
-
76
- make clean
77
- make dev
78
-
79
- Basic examples
80
- --------------
81
-
82
- Here are some basic examples to get you started with selectolax:
83
-
84
- Parsing HTML and extracting text:
85
-
86
- .. code:: python
87
-
88
- In [1]: from selectolax.parser import HTMLParser
89
- ...:
90
- ...: html = """
91
- ...: <h1 id="title" data-updated="20201101">Hi there</h1>
92
- ...: <div class="post">Lorem Ipsum is simply dummy text of the printing and typesetting industry. </div>
93
- ...: <div class="post">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div>
94
- ...: """
95
- ...: tree = HTMLParser(html)
96
-
97
- In [2]: tree.css_first('h1#title').text()
98
- Out[2]: 'Hi there'
99
-
100
- In [3]: tree.css_first('h1#title').attributes
101
- Out[3]: {'id': 'title', 'data-updated': '20201101'}
102
-
103
- In [4]: [node.text() for node in tree.css('.post')]
104
- Out[4]:
105
- ['Lorem Ipsum is simply dummy text of the printing and typesetting industry. ',
106
- 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.']
107
-
108
- Using advanced CSS selectors:
109
-
110
- .. code:: python
111
-
112
- In [1]: html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5>text<p id=p6></div>"
113
- ...: selector = "div > :nth-child(2n+1):not(:has(a))"
114
-
115
- In [2]: for node in HTMLParser(html).css(selector):
116
- ...: print(node.attributes, node.text(), node.tag)
117
- ...: print(node.parent.tag)
118
- ...: print(node.html)
119
- ...:
120
- {'id': 'p1'} p
121
- div
122
- <p id="p1"></p>
123
- {'id': 'p5'} text p
124
- div
125
- <p id="p5">text</p>
126
-
127
-
128
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
129
-
130
- Available backends
131
- ------------------
132
-
133
- Selectolax supports two backends: ``Modest`` and ``Lexbor``. By default, all examples use the Modest backend.
134
- Most of the features between backends are almost identical, but there are still some differences.
135
-
136
- As of 2024, the preferred backend is ``Lexbor``. The ``Modest`` backend is still available for compatibility reasons
137
- and the underlying C library that selectolax uses is not maintained anymore.
138
-
139
-
140
- To use ``lexbor``, just import the parser and use it in the similar way to the `HTMLParser`.
141
-
142
- .. code:: python
143
-
144
- In [1]: from selectolax.lexbor import LexborHTMLParser
145
-
146
- In [2]: html = """
147
- ...: <title>Hi there</title>
148
- ...: <div id="updated">2021-08-15</div>
149
- ...: """
150
-
151
- In [3]: parser = LexborHTMLParser(html)
152
- In [4]: parser.root.css_first("#updated").text()
153
- Out[4]: '2021-08-15'
154
-
155
-
156
- Simple Benchmark
157
- ----------------
158
-
159
- * Extract title, links, scripts and a meta tag from main pages of top 754 domains. See ``examples/benchmark.py`` for more information.
160
-
161
- ============================ ===========
162
- Package Time
163
- ============================ ===========
164
- Beautiful Soup (html.parser) 61.02 sec.
165
- lxml / Beautiful Soup (lxml) 9.09 sec.
166
- html5_parser 16.10 sec.
167
- selectolax (Modest) 2.94 sec.
168
- selectolax (Lexbor) 2.39 sec.
169
- ============================ ===========
170
-
171
- Links
172
- -----
173
-
174
- * `selectolax API reference <https://selectolax.readthedocs.io/en/latest/index.html>`_
175
- * `Video introduction to web scraping using selectolax <https://youtu.be/HpRsfpPuUzE>`_
176
- * `How to Scrape 7k Products with Python using selectolax and httpx <https://www.youtube.com/watch?v=XpGvq755J2U>`_
177
- * `Detailed overview <https://github.com/rushter/selectolax/blob/master/examples/walkthrough.ipynb>`_
178
- * `Modest introduction <https://lexborisov.github.io/Modest/>`_
179
- * `Modest benchmark <https://lexborisov.github.io/benchmark-html-parsers/>`_
180
- * `Python benchmark <https://rushter.com/blog/python-fast-html-parser/>`_
181
- * `Another Python benchmark <https://www.peterbe.com/plog/selectolax-or-pyquery>`_
182
-
183
- License
184
- -------
185
-
186
- * Modest engine — `LGPL2.1 <https://github.com/lexborisov/Modest/blob/master/LICENSE>`_
187
- * selectolax - `MIT <https://github.com/rushter/selectolax/blob/master/LICENSE>`_
@@ -1,26 +0,0 @@
1
- selectolax/__init__.py,sha256=_qbVtaupjc0xeV8F8cU3lbDgckEoaUlhy2N013uLu4Y,157
2
- selectolax/base.pxi,sha256=zOj3BrCA71xd-mJFtkMIAglP4ZybfrHVoCoy6ljTBDQ,93
3
- selectolax/lexbor.c,sha256=K806oXPqw4y4LoljPDmj7d7weW0xG58gTRQ7UR1jNU8,2418331
4
- selectolax/lexbor.cp311-win_amd64.pyd,sha256=QOWQAtYxtfrdPZxVqJheyPBGh5RXjE4lqs_uvTk_Qqg,3149824
5
- selectolax/lexbor.pxd,sha256=BcqAzhlUVq0GVWiJHWXNhs4jY-gi6k0BELEnQtSYJAI,21720
6
- selectolax/lexbor.pyi,sha256=6oQU-9-LE_OAhwsETmynUYhCEE3lhxnEzj36TefQheo,28586
7
- selectolax/lexbor.pyx,sha256=mgP31DQHkRVAOvu0Eax29sCEq57OWUA51LoPOAFZ6VU,12691
8
- selectolax/parser.c,sha256=PABrCk3JLEVb3nf02FtAplokuPQDSFVuuoV54MBKu1s,2258494
9
- selectolax/parser.cp311-win_amd64.pyd,sha256=aO22XnknHTaPZyt3AC3Anglz_H8MZ71I1bGsUfpLAlo,2107392
10
- selectolax/parser.pxd,sha256=T7GoQdaOkhp_W2TBlRY0tZqom97PkHrytYaXQlyVnbI,25196
11
- selectolax/parser.pyi,sha256=-qutpjrK1dD4rrl3SsHWQt2FT5lv6meaACkQzk1Bt6o,25612
12
- selectolax/parser.pyx,sha256=nIWuhaEFRwlfo64WmgrSOM0A8mUw0eWw9j_fWyLV-Ro,14127
13
- selectolax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- selectolax/utils.pxi,sha256=hkzKfycdpwH1P-E_pP-9NTGsmiajt6EJNZSlkxlRapA,3623
15
- selectolax/lexbor/attrs.pxi,sha256=d59V77aGkpp7YsYsd6t_z4-tRnUoQTJZKsvMC8nyttM,3978
16
- selectolax/lexbor/node.pxi,sha256=aOH-pr0DtXOZ63YyfVVd3pppXL3mAbuugNT9HhKvoE0,33205
17
- selectolax/lexbor/selection.pxi,sha256=BeUDypw5_P0CTmi-ACLcd7pK2NnG9ASrwWOdLdweAZY,7378
18
- selectolax/lexbor/util.pxi,sha256=q2EYVNdnROg9y30mWpGwlNA0W00nJ7ZRNEEDrOEG14s,584
19
- selectolax/modest/node.pxi,sha256=iX_yRPIPVkG0ALW7hEfmXiVperw6RjkSGATkxzLokz0,34691
20
- selectolax/modest/selection.pxi,sha256=PfHUN1uuNA7YfcxTu7JZjhxevVbFRP1bHd3kyyFdO7E,6703
21
- selectolax/modest/util.pxi,sha256=zab67Wzo8FcipA2VS8ClptaC19lZirbNqFEGQ3hW2Is,572
22
- selectolax-0.3.33.dist-info/licenses/LICENSE,sha256=A7Jb3WZcENcLfZRc7QPdm9zJdwfpIyPodPJu-kdMH6E,1087
23
- selectolax-0.3.33.dist-info/METADATA,sha256=HmxKX5O__XxvqspHStldQCxGb22RSRndWDtpxBSntl8,6402
24
- selectolax-0.3.33.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101
25
- selectolax-0.3.33.dist-info/top_level.txt,sha256=e5MuEM2PrQzoDlWetkFli9uXSlxa_ktW5jJEihhaI1c,11
26
- selectolax-0.3.33.dist-info/RECORD,,