PyPI - PlaywrightCapture - Versions diffs - 1.32.1__tar.gz → 1.32.3__tar.gz - Mend

PlaywrightCapture 1.32.1tar.gz → 1.32.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/PKG-INFO RENAMED Viewed

@@ -1,16 +1,15 @@
 Metadata-Version: 2.3
 Name: PlaywrightCapture
-Version: 1.32.1
+Version: 1.32.3
 Summary: A simple library to capture websites using playwright
 License: BSD-3-Clause
 Author: Raphaël Vinot
 Author-email: raphael.vinot@circl.lu
-Requires-Python: >=3.9,<4.0
+Requires-Python: >=3.9.2,<4.0
 Classifier: Intended Audience :: Science/Research
 Classifier: Intended Audience :: Telecommunications Industry
 Classifier: License :: OSI Approved :: BSD License
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
@@ -31,6 +30,7 @@ Requires-Dist: puremagic (>=1.30)
 Requires-Dist: pydub (>=0.25.1) ; (python_version < "3.10") and (extra == "recaptcha")
 Requires-Dist: pydub-ng (>=0.2.0) ; (python_version >= "3.10") and (extra == "recaptcha")
 Requires-Dist: python-socks (>=2.7.1,<3.0.0)
+Requires-Dist: rfc3161-client (>=1.0.4,<2.0.0)
 Requires-Dist: setuptools (>=80.9.0)
 Requires-Dist: typing-extensions (>=4.12.2,<5.0.0) ; python_version < "3.12"
 Requires-Dist: tzdata (>=2025.2)

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/playwrightcapture/__init__.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from .capture import Capture  # noqa
 from .capture import CaptureResponse  # noqa
 from .capture import SetCookieParam, Cookie  # noqa
+from .capture import TrustedTimestampSettings # noqa
 from .helpers import get_devices  # noqa
 from .exceptions import (PlaywrightCaptureException, UnknownPlaywrightDeviceType,  # noqa
                          UnknownPlaywrightBrowser, UnknownPlaywrightDevice,
@@ -9,6 +10,7 @@ from .exceptions import (PlaywrightCaptureException, UnknownPlaywrightDeviceType
 __all__ = [
     'Capture',
     'CaptureResponse',
+    'TrustedTimestampSettings',
     'SetCookieParam', 'Cookie',
     'get_devices',
     'PlaywrightCaptureException',

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/playwrightcapture/capture.py RENAMED Viewed

@@ -4,6 +4,7 @@ from __future__ import annotations
 import asyncio
 import binascii
+# import hashlib
 import json
 import logging
 import os
@@ -12,13 +13,13 @@ import re
 import sys
 import time
-from base64 import b64decode
+from base64 import b64decode, b64encode
 from io import BytesIO
 from logging import LoggerAdapter, Logger
 from tempfile import NamedTemporaryFile
 from typing import Any, Literal, TYPE_CHECKING
 from collections.abc import MutableMapping
-from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit
+from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit, unquote_plus
 from zipfile import ZipFile
 import aiohttp
@@ -32,6 +33,7 @@ from playwright.async_api import async_playwright, Frame, Error, Page, Download,
 from playwright.async_api import TimeoutError as PlaywrightTimeoutError
 from playwright_stealth import Stealth, ALL_EVASIONS_DISABLED_KWARGS  # type: ignore[attr-defined]
 from puremagic import PureError, from_string
+from rfc3161_client import TimestampRequestBuilder, TimeStampRequest, HashAlgorithm
 from w3lib.html import strip_html5_whitespace
 from w3lib.url import canonicalize_url, safe_url_string
@@ -119,6 +121,10 @@ class CaptureResponse(TypedDict, total=False):
     downloaded_file: bytes | None
     children: list[CaptureResponse] | None
+    # if the capture is triggered with with_trusted_timestamps, the response contains a
+    # dict[<entry name>] = '<base64 encoded timestamp response>'
+    trusted_timestamps: dict[str, str] | None
     # One day, playwright will support getting the favicon from the capture itself
     # favicon: Optional[bytes]
     # in the meantime, we need a workaround: https://github.com/Lookyloo/PlaywrightCapture/issues/45
@@ -135,6 +141,13 @@ class PlaywrightCaptureLogAdapter(LoggerAdapter):  # type: ignore[type-arg]
         return msg, kwargs
+class TrustedTimestampSettings(TypedDict, total=False):
+    url: str
+    hash_algorithm: HashAlgorithm | None
+    # NOTE: can add other settings such as auth mechanism, if needed.
 # good test pages:
 # https://kaliiiiiiiiii.github.io/brotector/?crash=false
 # https://www.browserscan.net/bot-detection
@@ -155,7 +168,7 @@ class Capture():
                  socks5_dns_resolver: str | list[str] | None=None,
                  general_timeout_in_sec: int | None=None, loglevel: str | int='INFO',
                  uuid: str | None=None, headless: bool=True,
-                 *, init_script: str | None=None):
+                 *, init_script: str | None=None, tt_settings: TrustedTimestampSettings | None=None):
         """Captures a page with Playwright.
         :param browser: The browser to use for the capture.
@@ -219,6 +232,8 @@ class Capture():
         self._init_script = init_script
+        self.tt_settings = tt_settings
     def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
         splitted = urlsplit(proxy)
         if splitted.username and splitted.password:
@@ -1007,6 +1022,7 @@ class Capture():
                            with_screenshot: bool=True,
                            with_favicon: bool=False,
                            allow_tracking: bool=False,
+                           with_trusted_timestamps: bool=False,
                            ) -> CaptureResponse:
         to_return: CaptureResponse = {}
@@ -1163,7 +1179,7 @@ class Capture():
                     except Exception as e:
                         self.logger.warning(f'Unable to get favicons: {e}')
-                to_return['last_redirected_url'] = page.url
+                to_return['last_redirected_url'] = unquote_plus(page.url)
                 if with_screenshot:
                     to_return['png'] = await self._failsafe_get_screenshot(page)
@@ -1199,6 +1215,11 @@ class Capture():
                                         rendered_hostname_only=rendered_hostname_only,
                                         max_depth_capture_time=max_capture_time,
                                         with_screenshot=with_screenshot)
+                                    if with_trusted_timestamps:
+                                        try:
+                                            await self._get_trusted_timestamps(child_capture)
+                                        except Exception as e:
+                                            self.logger.warning(f'Unable to get the trusted timestamps for the clild capture : {e}.')
                                     to_return['children'].append(child_capture)  # type: ignore[union-attr]
                             except (TimeoutError, asyncio.TimeoutError):
                                 self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
@@ -1331,8 +1352,56 @@ class Capture():
         self.logger.debug('Capture done')
         if errors:
             to_return['error'] = '\n'.join(errors)
+        if with_trusted_timestamps:
+            try:
+                await self._get_trusted_timestamps(to_return)
+            except Exception as e:
+                self.logger.warning(f'Unable to get trusted timestamps: {e}')
         return to_return
+    async def _get_trusted_timestamps(self, capture_response: CaptureResponse) -> None:
+        """Get trusted timestamps for the relevant values in the response"""
+        if not self.tt_settings:
+            self.logger.warning('The remote timestamper is not configured.')
+            return None
+        to_timestamp: dict[str, TimestampRequestBuilder] = {}
+        if last_redirected_url := capture_response.get('last_redirected_url'):
+            to_timestamp['last_redirected_url'] = TimestampRequestBuilder().data(last_redirected_url.encode())
+        if har := capture_response.get('har'):
+            to_timestamp['har'] = TimestampRequestBuilder().data(json.dumps(har).encode())
+        if storage := capture_response.get('storage'):
+            to_timestamp['storage'] = TimestampRequestBuilder().data(json.dumps(storage).encode())
+        if html := capture_response.get('html'):
+            to_timestamp['html'] = TimestampRequestBuilder().data(html.encode())
+        if png := capture_response.get('png'):
+            to_timestamp['png'] = TimestampRequestBuilder().data(png)
+        if downloaded_filename := capture_response.get('downloaded_filename'):
+            to_timestamp['downloaded_filename'] = TimestampRequestBuilder().data(downloaded_filename.encode())
+        if downloaded_file := capture_response.get('downloaded_file'):
+            to_timestamp['downloaded_file'] = TimestampRequestBuilder().data(downloaded_file)
+        # if potential_favicons := capture_response.get('potential_favicons'):
+        #    to_timestamp['potential_favicons'] = TimestampRequestBuilder().data(potential_favicons)
+        tt_requests: dict[str, TimeStampRequest] = {}
+        for k, trb in to_timestamp.items():
+            if h_algo := self.tt_settings.get('hash_algorithm'):
+                trb.hash_algorithm(h_algo)
+            tt_requests[k] = trb.build()
+        trusted_timestamps: dict[str, bytes] = {}
+        async with aiohttp.ClientSession() as session:
+            for k, tsr in tt_requests.items():
+                async with session.post(self.tt_settings['url'], data=tsr.as_bytes(),
+                                        headers={"Content-Type": "application/timestamp-query"}) as response:
+                    try:
+                        response.raise_for_status()
+                    except aiohttp.ClientResponseError as e:
+                        self.logger.warning(f'Unable to get Trusted Timestamp for {k}: {e}')
+                        continue
+                    trusted_timestamps[k] = await response.read()
+        capture_response['trusted_timestamps'] = {k: b64encode(v).decode() for k, v in trusted_timestamps.items()}
     async def _failsafe_get_screenshot(self, page: Page) -> bytes:
         self.logger.debug("Capturing a screenshot of the full page.")
         try:

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/pyproject.toml RENAMED Viewed

@@ -1,13 +1,13 @@
 [project]
 name = "PlaywrightCapture"
-version = "1.32.1"
+version = "1.32.3"
 description = "A simple library to capture websites using playwright"
 authors = [
     {name="Raphaël Vinot", email= "raphael.vinot@circl.lu"}
 ]
 license = "BSD-3-Clause"
 readme = "README.md"
-requires-python = ">=3.9,<4.0"
+requires-python = ">=3.9.2,<4.0"
 dynamic = [ "classifiers" ]
@@ -25,7 +25,8 @@ dependencies = [
     "aiohttp-socks (>=0.10.1)",
     "typing-extensions (>=4.12.2,<5.0.0) ; python_version < \"3.12\"",
     "dnspython (>=2.7.0,<3.0.0)",
-    "python-socks (>=2.7.1,<3.0.0)"
+    "python-socks (>=2.7.1,<3.0.0)",
+    "rfc3161-client (>=1.0.4,<2.0.0)"
 ]
 [project.urls]
@@ -50,7 +51,7 @@ recaptcha = [
 [tool.poetry.group.dev.dependencies]
 types-beautifulsoup4 = "^4.12.0.20250516"
-pytest = "^8.4.1"
+pytest = "^8.4.2"
 mypy = "^1.17.1"
 types-dateparser = "^1.2.2.20250809"
 types-pytz = "^2025.2.0.20250809"

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/LICENSE RENAMED Viewed

File without changes

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/README.md RENAMED Viewed

File without changes

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/playwrightcapture/exceptions.py RENAMED Viewed

File without changes

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/playwrightcapture/helpers.py RENAMED Viewed

File without changes

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/playwrightcapture/py.typed RENAMED Viewed

File without changes

{playwrightcapture-1.32.1 → playwrightcapture-1.32.3}/playwrightcapture/socks5dnslookup.py RENAMED Viewed

File without changes

PlaywrightCapture 1.32.1__tar.gz → 1.32.3__tar.gz

PlaywrightCapture 1.32.1tar.gz → 1.32.3tar.gz