scrapling 0.2.91__tar.gz → 0.2.92__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. {scrapling-0.2.91 → scrapling-0.2.92}/MANIFEST.in +3 -0
  2. {scrapling-0.2.91/scrapling.egg-info → scrapling-0.2.92}/PKG-INFO +7 -40
  3. {scrapling-0.2.91 → scrapling-0.2.92}/README.md +5 -39
  4. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/__init__.py +1 -1
  5. scrapling-0.2.92/scrapling/cli.py +37 -0
  6. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/camo.py +10 -12
  7. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/pw.py +10 -12
  8. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/parser.py +2 -2
  9. {scrapling-0.2.91 → scrapling-0.2.92/scrapling.egg-info}/PKG-INFO +7 -40
  10. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling.egg-info/SOURCES.txt +2 -0
  11. scrapling-0.2.92/scrapling.egg-info/entry_points.txt +2 -0
  12. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling.egg-info/requires.txt +1 -0
  13. {scrapling-0.2.91 → scrapling-0.2.92}/setup.cfg +1 -1
  14. {scrapling-0.2.91 → scrapling-0.2.92}/setup.py +7 -1
  15. {scrapling-0.2.91 → scrapling-0.2.92}/LICENSE +0 -0
  16. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/core/__init__.py +0 -0
  17. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/core/_types.py +0 -0
  18. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/core/custom_types.py +0 -0
  19. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/core/mixins.py +0 -0
  20. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/core/storage_adaptors.py +0 -0
  21. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/core/translator.py +0 -0
  22. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/core/utils.py +0 -0
  23. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/defaults.py +0 -0
  24. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/__init__.py +0 -0
  25. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/constants.py +0 -0
  26. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/static.py +0 -0
  27. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/__init__.py +0 -0
  28. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/bypasses/navigator_plugins.js +0 -0
  29. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/bypasses/notification_permission.js +0 -0
  30. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/bypasses/pdf_viewer.js +0 -0
  31. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +0 -0
  32. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/bypasses/screen_props.js +0 -0
  33. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/bypasses/webdriver_fully.js +0 -0
  34. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/bypasses/window_chrome.js +0 -0
  35. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/custom.py +0 -0
  36. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/fingerprints.py +0 -0
  37. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/engines/toolbelt/navigation.py +0 -0
  38. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/fetchers.py +0 -0
  39. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling/py.typed +0 -0
  40. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling.egg-info/dependency_links.txt +0 -0
  41. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling.egg-info/not-zip-safe +0 -0
  42. {scrapling-0.2.91 → scrapling-0.2.92}/scrapling.egg-info/top_level.txt +0 -0
  43. {scrapling-0.2.91 → scrapling-0.2.92}/tests/__init__.py +0 -0
  44. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/__init__.py +0 -0
  45. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/async/__init__.py +0 -0
  46. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/async/test_camoufox.py +0 -0
  47. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/async/test_httpx.py +0 -0
  48. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/async/test_playwright.py +0 -0
  49. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/sync/__init__.py +0 -0
  50. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/sync/test_camoufox.py +0 -0
  51. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/sync/test_httpx.py +0 -0
  52. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/sync/test_playwright.py +0 -0
  53. {scrapling-0.2.91 → scrapling-0.2.92}/tests/fetchers/test_utils.py +0 -0
  54. {scrapling-0.2.91 → scrapling-0.2.92}/tests/parser/__init__.py +0 -0
  55. {scrapling-0.2.91 → scrapling-0.2.92}/tests/parser/test_automatch.py +0 -0
  56. {scrapling-0.2.91 → scrapling-0.2.92}/tests/parser/test_general.py +0 -0
@@ -4,7 +4,10 @@ include *.js
4
4
  include scrapling/engines/toolbelt/bypasses/*.js
5
5
  include scrapling/*.db
6
6
  include scrapling/*.db*
7
+ include scrapling/*.db-*
7
8
  include scrapling/py.typed
9
+ include scrapling/.scrapling_dependencies_installed
10
+ include .scrapling_dependencies_installed
8
11
 
9
12
  recursive-exclude * __pycache__
10
13
  recursive-exclude * *.py[co]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scrapling
3
- Version: 0.2.91
3
+ Version: 0.2.92
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -34,6 +34,7 @@ License-File: LICENSE
34
34
  Requires-Dist: requests>=2.3
35
35
  Requires-Dist: lxml>=4.5
36
36
  Requires-Dist: cssselect>=1.2
37
+ Requires-Dist: click
37
38
  Requires-Dist: w3lib
38
39
  Requires-Dist: orjson>=3
39
40
  Requires-Dist: tldextract
@@ -211,52 +212,18 @@ Scrapling can find elements with more methods and it returns full element `Adapt
211
212
  > All benchmarks' results are an average of 100 runs. See our [benchmarks.py](https://github.com/D4Vinci/Scrapling/blob/main/benchmarks.py) for methodology and to run your comparisons.
212
213
 
213
214
  ## Installation
214
- Scrapling is a breeze to get started with - Starting from version 0.2.9, we require at least Python 3.9 to work.
215
+ Scrapling is a breeze to get started with; Starting from version 0.2.9, we require at least Python 3.9 to work.
215
216
  ```bash
216
217
  pip3 install scrapling
217
218
  ```
218
- - For using the `StealthyFetcher`, go to the command line and download the browser with
219
- <details><summary>Windows OS</summary>
220
-
221
- ```bash
222
- camoufox fetch --browserforge
223
- ```
224
- </details>
225
- <details><summary>MacOS</summary>
226
-
227
- ```bash
228
- python3 -m camoufox fetch --browserforge
229
- ```
230
- </details>
231
- <details><summary>Linux</summary>
232
-
219
+ Then run this command to install browsers' dependencies needed to use Fetcher classes
233
220
  ```bash
234
- python -m camoufox fetch --browserforge
235
- ```
236
- On a fresh installation of Linux, you may also need the following Firefox dependencies:
237
- - Debian-based distros
238
- ```bash
239
- sudo apt install -y libgtk-3-0 libx11-xcb1 libasound2
240
- ```
241
- - Arch-based distros
242
- ```bash
243
- sudo pacman -S gtk3 libx11 libxcb cairo libasound alsa-lib
244
- ```
245
- </details>
246
-
247
- <small> See the official <a href="https://camoufox.com/python/installation/#download-the-browser">Camoufox documentation</a> for more info on installation</small>
248
-
249
- - If you are going to use the `PlayWrightFetcher` options, then install Playwright's Chromium browser with:
250
- ```commandline
251
- playwright install chromium
252
- ```
253
- - If you are going to use normal requests only with the `Fetcher` class then update the fingerprints files with:
254
- ```commandline
255
- python -m browserforge update
221
+ scrapling install
256
222
  ```
223
+ If you have any installation issues, please open an issue.
257
224
 
258
225
  ## Fetching Websites
259
- Fetchers are basically interfaces that do requests or fetch pages for you in a single request fashion and then return an `Adaptor` object for you. This feature was introduced because the only option we had before was to fetch the page as you wanted it, then pass it manually to the `Adaptor` class to create an `Adaptor` instance and start playing around with the page.
226
+ Fetchers are interfaces built on top of other libraries with added features that do requests or fetch pages for you in a single request fashion and then return an `Adaptor` object. This feature was introduced because the only option we had before was to fetch the page as you wanted it, then pass it manually to the `Adaptor` class to create an `Adaptor` instance and start playing around with the page.
260
227
 
261
228
  ### Features
262
229
  You might be slightly confused by now so let me clear things up. All fetcher-type classes are imported in the same way
@@ -167,52 +167,18 @@ Scrapling can find elements with more methods and it returns full element `Adapt
167
167
  > All benchmarks' results are an average of 100 runs. See our [benchmarks.py](https://github.com/D4Vinci/Scrapling/blob/main/benchmarks.py) for methodology and to run your comparisons.
168
168
 
169
169
  ## Installation
170
- Scrapling is a breeze to get started with - Starting from version 0.2.9, we require at least Python 3.9 to work.
170
+ Scrapling is a breeze to get started with; Starting from version 0.2.9, we require at least Python 3.9 to work.
171
171
  ```bash
172
172
  pip3 install scrapling
173
173
  ```
174
- - For using the `StealthyFetcher`, go to the command line and download the browser with
175
- <details><summary>Windows OS</summary>
176
-
177
- ```bash
178
- camoufox fetch --browserforge
179
- ```
180
- </details>
181
- <details><summary>MacOS</summary>
182
-
183
- ```bash
184
- python3 -m camoufox fetch --browserforge
185
- ```
186
- </details>
187
- <details><summary>Linux</summary>
188
-
174
+ Then run this command to install browsers' dependencies needed to use Fetcher classes
189
175
  ```bash
190
- python -m camoufox fetch --browserforge
191
- ```
192
- On a fresh installation of Linux, you may also need the following Firefox dependencies:
193
- - Debian-based distros
194
- ```bash
195
- sudo apt install -y libgtk-3-0 libx11-xcb1 libasound2
196
- ```
197
- - Arch-based distros
198
- ```bash
199
- sudo pacman -S gtk3 libx11 libxcb cairo libasound alsa-lib
200
- ```
201
- </details>
202
-
203
- <small> See the official <a href="https://camoufox.com/python/installation/#download-the-browser">Camoufox documentation</a> for more info on installation</small>
204
-
205
- - If you are going to use the `PlayWrightFetcher` options, then install Playwright's Chromium browser with:
206
- ```commandline
207
- playwright install chromium
208
- ```
209
- - If you are going to use normal requests only with the `Fetcher` class then update the fingerprints files with:
210
- ```commandline
211
- python -m browserforge update
176
+ scrapling install
212
177
  ```
178
+ If you have any installation issues, please open an issue.
213
179
 
214
180
  ## Fetching Websites
215
- Fetchers are basically interfaces that do requests or fetch pages for you in a single request fashion and then return an `Adaptor` object for you. This feature was introduced because the only option we had before was to fetch the page as you wanted it, then pass it manually to the `Adaptor` class to create an `Adaptor` instance and start playing around with the page.
181
+ Fetchers are interfaces built on top of other libraries with added features that do requests or fetch pages for you in a single request fashion and then return an `Adaptor` object. This feature was introduced because the only option we had before was to fetch the page as you wanted it, then pass it manually to the `Adaptor` class to create an `Adaptor` instance and start playing around with the page.
216
182
 
217
183
  ### Features
218
184
  You might be slightly confused by now so let me clear things up. All fetcher-type classes are imported in the same way
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
5
5
  from scrapling.parser import Adaptor, Adaptors
6
6
 
7
7
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
8
- __version__ = "0.2.91"
8
+ __version__ = "0.2.92"
9
9
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
10
10
 
11
11
 
@@ -0,0 +1,37 @@
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import click
7
+
8
+
9
+ def get_package_dir():
10
+ return Path(os.path.dirname(__file__))
11
+
12
+
13
+ def run_command(command, line):
14
+ print(f"Installing {line}...")
15
+ _ = subprocess.check_call(command, shell=True)
16
+ # I meant to not use try except here
17
+
18
+
19
+ @click.command(help="Install all Scrapling's Fetchers dependencies")
20
+ def install():
21
+ if not get_package_dir().joinpath(".scrapling_dependencies_installed").exists():
22
+ run_command([sys.executable, "-m", "playwright", "install", 'chromium'], 'Playwright browsers')
23
+ run_command([sys.executable, "-m", "playwright", "install-deps", 'chromium', 'firefox'], 'Playwright dependencies')
24
+ run_command([sys.executable, "-m", "camoufox", "fetch", '--browserforge'], 'Camoufox browser and databases')
25
+ # if no errors raised by above commands, then we add below file
26
+ get_package_dir().joinpath(".scrapling_dependencies_installed").touch()
27
+ else:
28
+ print('The dependencies are already installed')
29
+
30
+
31
+ @click.group()
32
+ def main():
33
+ pass
34
+
35
+
36
+ # Adding commands
37
+ main.add_command(install)
@@ -89,7 +89,7 @@ class CamoufoxEngine:
89
89
 
90
90
  def handle_response(finished_response):
91
91
  nonlocal final_response
92
- if finished_response.request.resource_type == "document":
92
+ if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
93
93
  final_response = finished_response
94
94
 
95
95
  with Camoufox(
@@ -133,7 +133,6 @@ class CamoufoxEngine:
133
133
  if self.network_idle:
134
134
  page.wait_for_load_state('networkidle')
135
135
 
136
- response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
137
136
  # In case we didn't catch a document type somehow
138
137
  final_response = final_response if final_response else first_response
139
138
  # This will be parsed inside `Response`
@@ -142,15 +141,15 @@ class CamoufoxEngine:
142
141
  status_text = final_response.status_text or StatusText.get(final_response.status)
143
142
 
144
143
  response = Response(
145
- url=final_response.url,
144
+ url=page.url,
146
145
  text=page.content(),
147
- body=response_bytes,
146
+ body=page.content().encode('utf-8'),
148
147
  status=final_response.status,
149
148
  reason=status_text,
150
149
  encoding=encoding,
151
150
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
152
- headers=final_response.all_headers(),
153
- request_headers=final_response.request.all_headers(),
151
+ headers=first_response.all_headers(),
152
+ request_headers=first_response.request.all_headers(),
154
153
  **self.adaptor_arguments
155
154
  )
156
155
  page.close()
@@ -169,7 +168,7 @@ class CamoufoxEngine:
169
168
 
170
169
  async def handle_response(finished_response):
171
170
  nonlocal final_response
172
- if finished_response.request.resource_type == "document":
171
+ if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
173
172
  final_response = finished_response
174
173
 
175
174
  async with AsyncCamoufox(
@@ -213,7 +212,6 @@ class CamoufoxEngine:
213
212
  if self.network_idle:
214
213
  await page.wait_for_load_state('networkidle')
215
214
 
216
- response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
217
215
  # In case we didn't catch a document type somehow
218
216
  final_response = final_response if final_response else first_response
219
217
  # This will be parsed inside `Response`
@@ -222,15 +220,15 @@ class CamoufoxEngine:
222
220
  status_text = final_response.status_text or StatusText.get(final_response.status)
223
221
 
224
222
  response = Response(
225
- url=final_response.url,
223
+ url=page.url,
226
224
  text=await page.content(),
227
- body=response_bytes,
225
+ body=(await page.content()).encode('utf-8'),
228
226
  status=final_response.status,
229
227
  reason=status_text,
230
228
  encoding=encoding,
231
229
  cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
232
- headers=await final_response.all_headers(),
233
- request_headers=await final_response.request.all_headers(),
230
+ headers=await first_response.all_headers(),
231
+ request_headers=await first_response.request.all_headers(),
234
232
  **self.adaptor_arguments
235
233
  )
236
234
  await page.close()
@@ -206,7 +206,7 @@ class PlaywrightEngine:
206
206
 
207
207
  def handle_response(finished_response: PlaywrightResponse):
208
208
  nonlocal final_response
209
- if finished_response.request.resource_type == "document":
209
+ if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
210
210
  final_response = finished_response
211
211
 
212
212
  with sync_playwright() as p:
@@ -252,7 +252,6 @@ class PlaywrightEngine:
252
252
  if self.network_idle:
253
253
  page.wait_for_load_state('networkidle')
254
254
 
255
- response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
256
255
  # In case we didn't catch a document type somehow
257
256
  final_response = final_response if final_response else first_response
258
257
  # This will be parsed inside `Response`
@@ -261,15 +260,15 @@ class PlaywrightEngine:
261
260
  status_text = final_response.status_text or StatusText.get(final_response.status)
262
261
 
263
262
  response = Response(
264
- url=final_response.url,
263
+ url=page.url,
265
264
  text=page.content(),
266
- body=response_bytes,
265
+ body=page.content().encode('utf-8'),
267
266
  status=final_response.status,
268
267
  reason=status_text,
269
268
  encoding=encoding,
270
269
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
271
- headers=final_response.all_headers(),
272
- request_headers=final_response.request.all_headers(),
270
+ headers=first_response.all_headers(),
271
+ request_headers=first_response.request.all_headers(),
273
272
  **self.adaptor_arguments
274
273
  )
275
274
  page.close()
@@ -293,7 +292,7 @@ class PlaywrightEngine:
293
292
 
294
293
  async def handle_response(finished_response: PlaywrightResponse):
295
294
  nonlocal final_response
296
- if finished_response.request.resource_type == "document":
295
+ if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
297
296
  final_response = finished_response
298
297
 
299
298
  async with async_playwright() as p:
@@ -339,7 +338,6 @@ class PlaywrightEngine:
339
338
  if self.network_idle:
340
339
  await page.wait_for_load_state('networkidle')
341
340
 
342
- response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
343
341
  # In case we didn't catch a document type somehow
344
342
  final_response = final_response if final_response else first_response
345
343
  # This will be parsed inside `Response`
@@ -348,15 +346,15 @@ class PlaywrightEngine:
348
346
  status_text = final_response.status_text or StatusText.get(final_response.status)
349
347
 
350
348
  response = Response(
351
- url=final_response.url,
349
+ url=page.url,
352
350
  text=await page.content(),
353
- body=response_bytes,
351
+ body=(await page.content()).encode('utf-8'),
354
352
  status=final_response.status,
355
353
  reason=status_text,
356
354
  encoding=encoding,
357
355
  cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
358
- headers=await final_response.all_headers(),
359
- request_headers=await final_response.request.all_headers(),
356
+ headers=await first_response.all_headers(),
357
+ request_headers=await first_response.request.all_headers(),
360
358
  **self.adaptor_arguments
361
359
  )
362
360
  await page.close()
@@ -474,7 +474,7 @@ class Adaptor(SelectorsGeneration):
474
474
 
475
475
  def css(self, selector: str, identifier: str = '',
476
476
  auto_match: bool = False, auto_save: bool = False, percentage: int = 0
477
- ) -> Union['Adaptors[Adaptor]', List]:
477
+ ) -> Union['Adaptors[Adaptor]', List, 'TextHandlers[TextHandler]']:
478
478
  """Search current tree with CSS3 selectors
479
479
 
480
480
  **Important:
@@ -517,7 +517,7 @@ class Adaptor(SelectorsGeneration):
517
517
 
518
518
  def xpath(self, selector: str, identifier: str = '',
519
519
  auto_match: bool = False, auto_save: bool = False, percentage: int = 0, **kwargs: Any
520
- ) -> Union['Adaptors[Adaptor]', List]:
520
+ ) -> Union['Adaptors[Adaptor]', List, 'TextHandlers[TextHandler]']:
521
521
  """Search current tree with XPath selectors
522
522
 
523
523
  **Important:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scrapling
3
- Version: 0.2.91
3
+ Version: 0.2.92
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -34,6 +34,7 @@ License-File: LICENSE
34
34
  Requires-Dist: requests>=2.3
35
35
  Requires-Dist: lxml>=4.5
36
36
  Requires-Dist: cssselect>=1.2
37
+ Requires-Dist: click
37
38
  Requires-Dist: w3lib
38
39
  Requires-Dist: orjson>=3
39
40
  Requires-Dist: tldextract
@@ -211,52 +212,18 @@ Scrapling can find elements with more methods and it returns full element `Adapt
211
212
  > All benchmarks' results are an average of 100 runs. See our [benchmarks.py](https://github.com/D4Vinci/Scrapling/blob/main/benchmarks.py) for methodology and to run your comparisons.
212
213
 
213
214
  ## Installation
214
- Scrapling is a breeze to get started with - Starting from version 0.2.9, we require at least Python 3.9 to work.
215
+ Scrapling is a breeze to get started with; Starting from version 0.2.9, we require at least Python 3.9 to work.
215
216
  ```bash
216
217
  pip3 install scrapling
217
218
  ```
218
- - For using the `StealthyFetcher`, go to the command line and download the browser with
219
- <details><summary>Windows OS</summary>
220
-
221
- ```bash
222
- camoufox fetch --browserforge
223
- ```
224
- </details>
225
- <details><summary>MacOS</summary>
226
-
227
- ```bash
228
- python3 -m camoufox fetch --browserforge
229
- ```
230
- </details>
231
- <details><summary>Linux</summary>
232
-
219
+ Then run this command to install browsers' dependencies needed to use Fetcher classes
233
220
  ```bash
234
- python -m camoufox fetch --browserforge
235
- ```
236
- On a fresh installation of Linux, you may also need the following Firefox dependencies:
237
- - Debian-based distros
238
- ```bash
239
- sudo apt install -y libgtk-3-0 libx11-xcb1 libasound2
240
- ```
241
- - Arch-based distros
242
- ```bash
243
- sudo pacman -S gtk3 libx11 libxcb cairo libasound alsa-lib
244
- ```
245
- </details>
246
-
247
- <small> See the official <a href="https://camoufox.com/python/installation/#download-the-browser">Camoufox documentation</a> for more info on installation</small>
248
-
249
- - If you are going to use the `PlayWrightFetcher` options, then install Playwright's Chromium browser with:
250
- ```commandline
251
- playwright install chromium
252
- ```
253
- - If you are going to use normal requests only with the `Fetcher` class then update the fingerprints files with:
254
- ```commandline
255
- python -m browserforge update
221
+ scrapling install
256
222
  ```
223
+ If you have any installation issues, please open an issue.
257
224
 
258
225
  ## Fetching Websites
259
- Fetchers are basically interfaces that do requests or fetch pages for you in a single request fashion and then return an `Adaptor` object for you. This feature was introduced because the only option we had before was to fetch the page as you wanted it, then pass it manually to the `Adaptor` class to create an `Adaptor` instance and start playing around with the page.
226
+ Fetchers are interfaces built on top of other libraries with added features that do requests or fetch pages for you in a single request fashion and then return an `Adaptor` object. This feature was introduced because the only option we had before was to fetch the page as you wanted it, then pass it manually to the `Adaptor` class to create an `Adaptor` instance and start playing around with the page.
260
227
 
261
228
  ### Features
262
229
  You might be slightly confused by now so let me clear things up. All fetcher-type classes are imported in the same way
@@ -4,6 +4,7 @@ README.md
4
4
  setup.cfg
5
5
  setup.py
6
6
  scrapling/__init__.py
7
+ scrapling/cli.py
7
8
  scrapling/defaults.py
8
9
  scrapling/fetchers.py
9
10
  scrapling/parser.py
@@ -11,6 +12,7 @@ scrapling/py.typed
11
12
  scrapling.egg-info/PKG-INFO
12
13
  scrapling.egg-info/SOURCES.txt
13
14
  scrapling.egg-info/dependency_links.txt
15
+ scrapling.egg-info/entry_points.txt
14
16
  scrapling.egg-info/not-zip-safe
15
17
  scrapling.egg-info/requires.txt
16
18
  scrapling.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ scrapling = scrapling.cli:main
@@ -1,6 +1,7 @@
1
1
  requests>=2.3
2
2
  lxml>=4.5
3
3
  cssselect>=1.2
4
+ click
4
5
  w3lib
5
6
  orjson>=3
6
7
  tldextract
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = scrapling
3
- version = 0.2.91
3
+ version = 0.2.92
4
4
  author = Karim Shoair
5
5
  author_email = karim.shoair@pm.me
6
6
  description = Scrapling is an undetectable, powerful, flexible, adaptive, and high-performance web scraping library for Python.
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
6
6
 
7
7
  setup(
8
8
  name="scrapling",
9
- version="0.2.91",
9
+ version="0.2.92",
10
10
  description="""Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
11
11
  simplifies the process of extracting data from websites, even when they undergo structural changes, and offers
12
12
  impressive speed improvements over many popular scraping tools.""",
@@ -20,6 +20,11 @@ setup(
20
20
  package_dir={
21
21
  "scrapling": "scrapling",
22
22
  },
23
+ entry_points={
24
+ 'console_scripts': [
25
+ 'scrapling=scrapling.cli:main'
26
+ ],
27
+ },
23
28
  include_package_data=True,
24
29
  classifiers=[
25
30
  "Operating System :: OS Independent",
@@ -50,6 +55,7 @@ setup(
50
55
  "requests>=2.3",
51
56
  "lxml>=4.5",
52
57
  "cssselect>=1.2",
58
+ 'click',
53
59
  "w3lib",
54
60
  "orjson>=3",
55
61
  "tldextract",
File without changes
File without changes