PlaywrightCapture 1.28.1__tar.gz → 1.28.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: PlaywrightCapture
3
- Version: 1.28.1
3
+ Version: 1.28.3
4
4
  Summary: A simple library to capture websites using playwright
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Internet
19
19
  Classifier: Topic :: Security
20
20
  Provides-Extra: recaptcha
21
- Requires-Dist: SpeechRecognition (>=3.14.1) ; extra == "recaptcha"
21
+ Requires-Dist: SpeechRecognition (>=3.14.2) ; extra == "recaptcha"
22
22
  Requires-Dist: aiohttp-socks (>=0.10.1)
23
23
  Requires-Dist: aiohttp[speedups] (>=3.11.14)
24
24
  Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
@@ -28,8 +28,9 @@ Requires-Dist: playwright (>=1.51.0)
28
28
  Requires-Dist: playwright-stealth (>=1.0.6)
29
29
  Requires-Dist: puremagic (>=1.28)
30
30
  Requires-Dist: pydub (>=0.25.1) ; extra == "recaptcha"
31
- Requires-Dist: setuptools (>=77.0.1)
32
- Requires-Dist: tzdata (>=2025.1)
31
+ Requires-Dist: setuptools (>=78.1.0)
32
+ Requires-Dist: typing-extensions (>=4.12.2,<5.0.0) ; python_version < "3.12"
33
+ Requires-Dist: tzdata (>=2025.2)
33
34
  Requires-Dist: w3lib (>=2.3.1)
34
35
  Project-URL: Issues, https://github.com/Lookyloo/PlaywrightCapture/issues
35
36
  Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
@@ -17,7 +17,7 @@ from dataclasses import dataclass
17
17
  from io import BytesIO
18
18
  from logging import LoggerAdapter, Logger
19
19
  from tempfile import NamedTemporaryFile
20
- from typing import Any, TypedDict, Literal, TYPE_CHECKING
20
+ from typing import Any, Literal, TYPE_CHECKING
21
21
  from collections.abc import MutableMapping, Iterator
22
22
  from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit
23
23
  from zipfile import ZipFile
@@ -46,6 +46,11 @@ if sys.version_info < (3, 11):
46
46
  else:
47
47
  from asyncio import timeout
48
48
 
49
+ if sys.version_info < (3, 12):
50
+ from typing_extensions import TypedDict
51
+ else:
52
+ from typing import TypedDict
53
+
49
54
  if TYPE_CHECKING:
50
55
  from playwright._impl._api_structures import (SetCookieParam, Geolocation,
51
56
  HttpCredentials, Headers,
@@ -915,6 +920,7 @@ class Capture():
915
920
  referer: str | None=None,
916
921
  page: Page | None=None, depth: int=0,
917
922
  rendered_hostname_only: bool=True,
923
+ with_screenshot: bool=True,
918
924
  with_favicon: bool=False,
919
925
  allow_tracking: bool=False,
920
926
  clock_set: bool=False
@@ -1094,7 +1100,9 @@ class Capture():
1094
1100
  self.logger.warning(f'Unable to get favicons: {e}')
1095
1101
 
1096
1102
  to_return['last_redirected_url'] = page.url
1097
- to_return['png'] = await self._failsafe_get_screenshot(page)
1103
+
1104
+ if with_screenshot:
1105
+ to_return['png'] = await self._failsafe_get_screenshot(page)
1098
1106
 
1099
1107
  self._already_captured.add(url)
1100
1108
  if depth > 0 and to_return.get('html') and to_return['html']:
@@ -1126,7 +1134,7 @@ class Capture():
1126
1134
  page=page, depth=depth,
1127
1135
  rendered_hostname_only=rendered_hostname_only,
1128
1136
  max_depth_capture_time=max_capture_time,
1129
- clock_set=clock_set)
1137
+ clock_set=clock_set, with_screenshot=with_screenshot)
1130
1138
  to_return['children'].append(child_capture) # type: ignore[union-attr]
1131
1139
  except (TimeoutError, asyncio.TimeoutError):
1132
1140
  self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
@@ -2,13 +2,19 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import sys
6
+
5
7
  from collections import defaultdict
6
- from typing import TypedDict
7
8
 
8
9
  from playwright.sync_api import sync_playwright
9
10
 
10
11
  from .exceptions import UnknownPlaywrightDeviceType
11
12
 
13
+ if sys.version_info < (3, 12):
14
+ from typing_extensions import TypedDict
15
+ else:
16
+ from typing import TypedDict
17
+
12
18
 
13
19
  class PlaywrightDevice(TypedDict):
14
20
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "PlaywrightCapture"
3
- version = "1.28.1"
3
+ version = "1.28.3"
4
4
  description = "A simple library to capture websites using playwright"
5
5
  authors = [
6
6
  {name="Raphaël Vinot", email= "raphael.vinot@circl.lu"}
@@ -16,13 +16,14 @@ dependencies = [
16
16
  "dateparser (>=1.2.1)",
17
17
  "beautifulsoup4[charset-normalizer,lxml] (>=4.13.3)",
18
18
  "w3lib (>=2.3.1)",
19
- "tzdata (>=2025.1)",
19
+ "tzdata (>=2025.2)",
20
20
  "playwright-stealth (>=1.0.6)",
21
- "setuptools (>=77.0.1)",
21
+ "setuptools (>=78.1.0)",
22
22
  "puremagic (>=1.28)",
23
23
  "async-timeout (>=5.0.1) ; python_version < \"3.11\"",
24
24
  "aiohttp[speedups] (>=3.11.14)",
25
- "aiohttp-socks (>=0.10.1)"
25
+ "aiohttp-socks (>=0.10.1)",
26
+ "typing-extensions (>=4.12.2,<5.0.0) ; python_version < \"3.12\""
26
27
  ]
27
28
 
28
29
  [project.urls]
@@ -41,7 +42,7 @@ classifiers=[
41
42
  [project.optional-dependencies]
42
43
  recaptcha = [
43
44
  "pydub (>=0.25.1)",
44
- "SpeechRecognition (>=3.14.1)"
45
+ "SpeechRecognition (>=3.14.2)"
45
46
  ]
46
47
 
47
48
  [tool.poetry.group.dev.dependencies]
@@ -49,7 +50,7 @@ types-beautifulsoup4 = "^4.12.0.20250204"
49
50
  pytest = "^8.3.5"
50
51
  mypy = "^1.15.0"
51
52
  types-dateparser = "^1.2.0.20250208"
52
- types-pytz = "^2025.1.0.20250318"
53
+ types-pytz = "^2025.2.0.20250326"
53
54
 
54
55
 
55
56
  [build-system]