PyPI - hishel - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

hishel 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

hishel/__init__.py +41 -1
hishel/_async/_client.py +1 -1
hishel/_async/_storages.py +17 -17
hishel/_async/_transports.py +9 -4
hishel/_controller.py +2 -3
hishel/_lmdb_types_.pyi +53 -0
hishel/_s3.py +19 -8
hishel/_serializers.py +2 -2
hishel/_sync/_client.py +1 -1
hishel/_sync/_storages.py +16 -16
hishel/_sync/_transports.py +9 -4
hishel/_utils.py +340 -0
hishel/beta/__init__.py +59 -0
hishel/beta/_async_cache.py +167 -0
hishel/beta/_core/__init__.py +0 -0
hishel/beta/_core/_async/_storages/_sqlite.py +411 -0
hishel/beta/_core/_base/_storages/_base.py +260 -0
hishel/beta/_core/_base/_storages/_packing.py +165 -0
hishel/beta/_core/_headers.py +301 -0
hishel/beta/_core/_spec.py +2291 -0
hishel/beta/_core/_sync/_storages/_sqlite.py +411 -0
hishel/beta/_core/models.py +176 -0
hishel/beta/_sync_cache.py +167 -0
hishel/beta/httpx.py +317 -0
hishel/beta/requests.py +193 -0
{hishel-0.1.2.dist-info → hishel-0.1.4.dist-info}/METADATA +50 -6
hishel-0.1.4.dist-info/RECORD +41 -0
hishel-0.1.2.dist-info/RECORD +0 -27
{hishel-0.1.2.dist-info → hishel-0.1.4.dist-info}/WHEEL +0 -0
{hishel-0.1.2.dist-info → hishel-0.1.4.dist-info}/licenses/LICENSE +0 -0

hishel/beta/_sync_cache.py ADDED Viewed

@@ -0,0 +1,167 @@
+from __future__ import annotations
+import hashlib
+import logging
+import time
+from dataclasses import replace
+from typing import Iterator, Awaitable, Callable
+from typing_extensions import assert_never
+from hishel.beta import (
+    AnyState,
+    SyncBaseStorage,
+    SyncSqliteStorage,
+    CacheMiss,
+    CacheOptions,
+    CouldNotBeStored,
+    FromCache,
+    IdleClient,
+    NeedRevalidation,
+    NeedToBeUpdated,
+    Request,
+    Response,
+    StoreAndUse,
+    create_idle_state,
+)
+from hishel.beta._core._spec import InvalidatePairs, vary_headers_match
+from hishel.beta._core.models import CompletePair
+logger = logging.getLogger("hishel.integrations.clients")
+class SyncCacheProxy:
+    """
+    A proxy for HTTP caching in clients.
+    This class is independent of any specific HTTP library and works only with internal models.
+    It delegates request execution to a user-provided callable, making it compatible with any
+    HTTP client. Caching behavior can be configured to either fully respect HTTP
+    caching rules or bypass them entirely.
+    """
+    def __init__(
+        self,
+        send_request: Callable[[Request], Response],
+        storage: SyncBaseStorage | None = None,
+        cache_options: CacheOptions | None = None,
+        ignore_specification: bool = False,
+    ) -> None:
+        self.send_request = send_request
+        self.storage = storage if storage is not None else SyncSqliteStorage()
+        self.cache_options = cache_options if cache_options is not None else CacheOptions()
+        self.ignore_specification = ignore_specification
+    def handle_request(self, request: Request) -> Response:
+        if self.ignore_specification or request.metadata.get("hishel_spec_ignore"):
+            return self._handle_request_ignoring_spec(request)
+        return self._handle_request_respecting_spec(request)
+    def _get_key_for_request(self, request: Request) -> str:
+        if request.metadata.get("hishel_body_key"):
+            assert isinstance(request.stream, Iterator)
+            collected = b"".join([chunk for chunk in request.stream])
+            hash_ = hashlib.sha256(collected).hexdigest()
+            return f"{str(request.url)}-{hash_}"
+        return str(request.url)
+    def _maybe_refresh_pair_ttl(self, pair: CompletePair) -> None:
+        if pair.request.metadata.get("hishel_refresh_ttl_on_access"):
+            self.storage.update_pair(
+                pair.id,
+                lambda complete_pair: replace(complete_pair, meta=replace(complete_pair.meta, created_at=time.time())),
+            )
+    def _handle_request_ignoring_spec(self, request: Request) -> Response:
+        logger.debug("Trying to get cached response ignoring specification")
+        pairs = self.storage.get_pairs(self._get_key_for_request(request))
+        logger.debug(f"Found {len(pairs)} cached pairs for the request")
+        for pair in pairs:
+            if (
+                str(pair.request.url) == str(request.url)
+                and pair.request.method == request.method
+                and vary_headers_match(
+                    request,
+                    pair,
+                )
+            ):
+                logger.debug(
+                    "Found matching cached response for the request",
+                )
+                pair.response.metadata["hishel_from_cache"] = True  # type: ignore
+                self._maybe_refresh_pair_ttl(pair)
+                return pair.response
+        incomplete_pair = self.storage.create_pair(
+            request,
+        )
+        response = self.send_request(incomplete_pair.request)
+        logger.debug("Storing response in cache ignoring specification")
+        complete_pair = self.storage.add_response(
+            incomplete_pair.id, response, self._get_key_for_request(request)
+        )
+        return complete_pair.response
+    def _handle_request_respecting_spec(self, request: Request) -> Response:
+        state: AnyState = create_idle_state("client", self.cache_options)
+        while state:
+            logger.debug(f"Handling state: {state.__class__.__name__}")
+            if isinstance(state, IdleClient):
+                state = self._handle_idle_state(state, request)
+            elif isinstance(state, CacheMiss):
+                state = self._handle_cache_miss(state)
+            elif isinstance(state, StoreAndUse):
+                return self._handle_store_and_use(state, request)
+            elif isinstance(state, CouldNotBeStored):
+                return state.response
+            elif isinstance(state, NeedRevalidation):
+                state = self._handle_revalidation(state)
+            elif isinstance(state, FromCache):
+                self._maybe_refresh_pair_ttl(state.pair)
+                return state.pair.response
+            elif isinstance(state, NeedToBeUpdated):
+                state = self._handle_update(state)
+            elif isinstance(state, InvalidatePairs):
+                state = self._handle_invalidate_pairs(state)
+            else:
+                assert_never(state)
+        raise RuntimeError("Unreachable")
+    def _handle_idle_state(self, state: IdleClient, request: Request) -> AnyState:
+        stored_pairs = self.storage.get_pairs(self._get_key_for_request(request))
+        return state.next(request, stored_pairs)
+    def _handle_cache_miss(self, state: CacheMiss) -> AnyState:
+        incomplete_pair = self.storage.create_pair(state.request)
+        response = self.send_request(incomplete_pair.request)
+        return state.next(response, incomplete_pair.id)
+    def _handle_store_and_use(self, state: StoreAndUse, request: Request) -> Response:
+        complete_pair = self.storage.add_response(
+            state.pair_id, state.response, self._get_key_for_request(request)
+        )
+        return complete_pair.response
+    def _handle_revalidation(self, state: NeedRevalidation) -> AnyState:
+        revalidation_response = self.send_request(state.request)
+        return state.next(revalidation_response)
+    def _handle_update(self, state: NeedToBeUpdated) -> AnyState:
+        for pair in state.updating_pairs:
+            self.storage.update_pair(
+                pair.id,
+                lambda complete_pair: replace(
+                    complete_pair, response=replace(pair.response, headers=pair.response.headers)
+                ),
+            )
+        return state.next()
+    def _handle_invalidate_pairs(self, state: InvalidatePairs) -> AnyState:
+        for pair_id in state.pair_ids:
+            self.storage.remove(pair_id)
+        return state.next()

hishel/beta/httpx.py ADDED Viewed

@@ -0,0 +1,317 @@
+from __future__ import annotations
+import ssl
+import typing as t
+from typing import AsyncIterator, Iterable, Iterator, Union, overload
+import httpx
+from hishel.beta import Headers, Request, Response
+from hishel.beta._async_cache import AsyncCacheProxy
+from hishel.beta._core._base._storages._base import AsyncBaseStorage, SyncBaseStorage
+from hishel.beta._core._spec import (
+    CacheOptions,
+)
+from hishel.beta._core.models import AnyIterable
+from hishel.beta._sync_cache import SyncCacheProxy
+SOCKET_OPTION = t.Union[
+    t.Tuple[int, int, int],
+    t.Tuple[int, int, t.Union[bytes, bytearray]],
+    t.Tuple[int, int, None, int],
+]
+class IteratorStream(httpx.SyncByteStream, httpx.AsyncByteStream):
+    def __init__(self, iterator: Iterator[bytes] | AsyncIterator[bytes]) -> None:
+        self.iterator = iterator
+    def __iter__(self) -> Iterator[bytes]:
+        assert isinstance(self.iterator, (Iterator))
+        yield from self.iterator
+    async def __aiter__(self) -> AsyncIterator[bytes]:
+        assert isinstance(self.iterator, (AsyncIterator))
+        async for chunk in self.iterator:
+            yield chunk
+@overload
+def internal_to_httpx(
+    value: Request,
+) -> httpx.Request: ...
+@overload
+def internal_to_httpx(
+    value: Response,
+) -> httpx.Response: ...
+def internal_to_httpx(
+    value: Union[Request, Response],
+) -> Union[httpx.Request, httpx.Response]:
+    """
+    Convert internal Request/Response to httpx.Request/httpx.Response.
+    """
+    if isinstance(value, Request):
+        return httpx.Request(
+            method=value.method,
+            url=value.url,
+            headers=value.headers,
+            stream=IteratorStream(value.stream),
+            extensions=value.metadata,
+        )
+    elif isinstance(value, Response):
+        return httpx.Response(
+            status_code=value.status_code,
+            headers=value.headers,
+            stream=IteratorStream(value.stream),
+            extensions=value.metadata,
+        )
+@overload
+def httpx_to_internal(
+    value: httpx.Request,
+) -> Request: ...
+@overload
+def httpx_to_internal(
+    value: httpx.Response,
+) -> Response: ...
+def httpx_to_internal(
+    value: Union[httpx.Request, httpx.Response],
+) -> Union[Request, Response]:
+    """
+    Convert httpx.Request/httpx.Response to internal Request/Response.
+    """
+    stream: Union[Iterator[bytes], AsyncIterator[bytes]]
+    try:
+        stream = AnyIterable(value.content)
+    except (httpx.RequestNotRead, httpx.ResponseNotRead):
+        if isinstance(value, httpx.Response):
+            stream = value.iter_raw() if isinstance(value.stream, Iterable) else value.aiter_raw()
+        else:
+            stream = value.stream  # type: ignore
+    if isinstance(value, httpx.Request):
+        return Request(
+            method=value.method,
+            url=str(value.url),
+            headers=Headers({key: value for key, value in value.headers.items()}),
+            stream=stream,
+            metadata={
+                "hishel_refresh_ttl_on_access": value.extensions.get("hishel_refresh_ttl_on_access"),
+                "hishel_ttl": value.extensions.get("hishel_ttl"),
+                "hishel_spec_ignore": value.extensions.get("hishel_spec_ignore"),
+            },
+        )
+    elif isinstance(value, httpx.Response):
+        return Response(
+            status_code=value.status_code,
+            headers=Headers({key: value for key, value in value.headers.items()}),
+            stream=stream,
+            metadata={},
+        )
+class SyncCacheTransport(httpx.BaseTransport):
+    def __init__(
+        self,
+        next_transport: httpx.BaseTransport,
+        storage: SyncBaseStorage | None = None,
+        cache_options: CacheOptions | None = None,
+        ignore_specification: bool = False,
+    ) -> None:
+        self.next_transport = next_transport
+        self._cache_proxy: SyncCacheProxy = SyncCacheProxy(
+            send_request=self.sync_send_request,
+            storage=storage,
+            cache_options=cache_options,
+            ignore_specification=ignore_specification,
+        )
+    def handle_request(
+        self,
+        request: httpx.Request,
+    ) -> httpx.Response:
+        internal_request = httpx_to_internal(request)
+        internal_response = self._cache_proxy.handle_request(internal_request)
+        response = internal_to_httpx(internal_response)
+        return response
+    def close(self) -> None:
+        self.next_transport.close()
+        super().close()
+    def sync_send_request(self, request: Request) -> Response:
+        httpx_request = internal_to_httpx(request)
+        httpx_response = self.next_transport.handle_request(httpx_request)
+        return httpx_to_internal(httpx_response)
+class SyncCacheClient(httpx.Client):
+    @overload
+    def __init__(
+        self,
+        *,
+        storage: SyncBaseStorage | None = None,
+        cache_options: CacheOptions | None = None,
+        **kwargs: t.Any,
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *args: t.Any,
+        **kwargs: t.Any,
+    ) -> None: ...
+    def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+        self.storage: SyncBaseStorage | None = kwargs.pop("storage", None)
+        self.cache_options: CacheOptions | None = kwargs.pop("cache_options", None)
+        super().__init__(*args, **kwargs)
+    def _init_transport(
+        self,
+        verify: ssl.SSLContext | str | bool = True,
+        cert: t.Union[str, t.Tuple[str, str], t.Tuple[str, str, str], None] = None,
+        trust_env: bool = True,
+        http1: bool = True,
+        http2: bool = False,
+        limits: httpx.Limits = httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        transport: httpx.BaseTransport | None = None,
+        **kwargs: t.Any,
+    ) -> httpx.BaseTransport:
+        if transport is not None:
+            return transport
+        return SyncCacheTransport(
+            next_transport=httpx.HTTPTransport(
+                verify=verify,
+                cert=cert,
+                trust_env=trust_env,
+                http1=http1,
+                http2=http2,
+                limits=limits,
+            ),
+            storage=self.storage,
+            cache_options=self.cache_options,
+            ignore_specification=False,
+        )
+    def _init_proxy_transport(
+        self,
+        proxy: httpx.Proxy,
+        verify: ssl.SSLContext | str | bool = True,
+        cert: t.Union[str, t.Tuple[str, str], t.Tuple[str, str, str], None] = None,
+        trust_env: bool = True,
+        http1: bool = True,
+        http2: bool = False,
+        limits: httpx.Limits = httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        **kwargs: t.Any,
+    ) -> httpx.BaseTransport:
+        return SyncCacheTransport(
+            next_transport=httpx.HTTPTransport(
+                verify=verify,
+                cert=cert,
+                trust_env=trust_env,
+                http1=http1,
+                http2=http2,
+                limits=limits,
+                proxy=proxy,
+            ),
+            storage=self.storage,
+            cache_options=self.cache_options,
+            ignore_specification=False,
+        )
+class AsyncCacheTransport(httpx.AsyncBaseTransport):
+    def __init__(
+        self,
+        next_transport: httpx.AsyncBaseTransport,
+        storage: AsyncBaseStorage | None = None,
+        cache_options: CacheOptions | None = None,
+        ignore_specification: bool = False,
+    ) -> None:
+        self.next_transport = next_transport
+        self._cache_proxy: AsyncCacheProxy = AsyncCacheProxy(
+            send_request=self.async_send_request,
+            storage=storage,
+            cache_options=cache_options,
+            ignore_specification=ignore_specification,
+        )
+    async def handle_async_request(
+        self,
+        request: httpx.Request,
+    ) -> httpx.Response:
+        internal_request = httpx_to_internal(request)
+        internal_response = await self._cache_proxy.handle_request(internal_request)
+        response = internal_to_httpx(internal_response)
+        return response
+    async def aclose(self) -> None:
+        await self.next_transport.aclose()
+        await super().aclose()
+    async def async_send_request(self, request: Request) -> Response:
+        httpx_request = internal_to_httpx(request)
+        httpx_response = await self.next_transport.handle_async_request(httpx_request)
+        return httpx_to_internal(httpx_response)
+class AsyncCacheClient(httpx.AsyncClient):
+    def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+        self.storage: AsyncBaseStorage | None = kwargs.pop("storage", None)
+        self.cache_options: CacheOptions | None = kwargs.pop("cache_options", None)
+        self.ignore_specification: bool = kwargs.pop("ignore_specification", False)
+        super().__init__(*args, **kwargs)
+    def _init_transport(
+        self,
+        verify: ssl.SSLContext | str | bool = True,
+        cert: t.Union[str, t.Tuple[str, str], t.Tuple[str, str, str], None] = None,
+        trust_env: bool = True,
+        http1: bool = True,
+        http2: bool = False,
+        limits: httpx.Limits = httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        transport: httpx.AsyncBaseTransport | None = None,
+        **kwargs: t.Any,
+    ) -> httpx.AsyncBaseTransport:
+        if transport is not None:
+            return transport
+        return AsyncCacheTransport(
+            next_transport=httpx.AsyncHTTPTransport(
+                verify=verify,
+                cert=cert,
+                trust_env=trust_env,
+                http1=http1,
+                http2=http2,
+                limits=limits,
+            ),
+            storage=self.storage,
+            cache_options=self.cache_options,
+            ignore_specification=False,
+        )
+    def _init_proxy_transport(
+        self,
+        proxy: httpx.Proxy,
+        verify: ssl.SSLContext | str | bool = True,
+        cert: t.Union[str, t.Tuple[str, str], t.Tuple[str, str, str], None] = None,
+        trust_env: bool = True,
+        http1: bool = True,
+        http2: bool = False,
+        limits: httpx.Limits = httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        **kwargs: t.Any,
+    ) -> httpx.AsyncBaseTransport:
+        return AsyncCacheTransport(
+            next_transport=httpx.AsyncHTTPTransport(
+                verify=verify,
+                cert=cert,
+                trust_env=trust_env,
+                http1=http1,
+                http2=http2,
+                limits=limits,
+                proxy=proxy,
+            ),
+            storage=self.storage,
+            cache_options=self.cache_options,
+            ignore_specification=self.ignore_specification,
+        )

hishel/beta/requests.py ADDED Viewed

@@ -0,0 +1,193 @@
+from __future__ import annotations
+from io import RawIOBase
+from typing import Iterator, Mapping, Optional, overload
+from typing_extensions import assert_never
+from hishel._utils import snake_to_header
+from hishel.beta import Headers, Request, Response as Response
+from hishel.beta._core._base._storages._base import SyncBaseStorage
+from hishel.beta._core._spec import CacheOptions
+from hishel.beta._core.models import extract_metadata_from_headers
+from hishel.beta._sync_cache import SyncCacheProxy
+try:
+    import requests
+    from requests.adapters import HTTPAdapter
+    from urllib3 import HTTPResponse
+    from urllib3.util.retry import Retry as Retry
+except ImportError:  # pragma: no cover
+    raise ImportError(
+        "The 'requests' library is required to use the requests integration. "
+        "Install hishel with 'pip install hishel[requests]'."
+    )
+class IteratorStream(RawIOBase):
+    def __init__(self, iterator: Iterator[bytes]):
+        self.iterator = iterator
+        self.leftover = b""
+    def readable(self) -> bool:
+        return True
+    def readinto(self, b: bytearray) -> Optional[int]:  # type: ignore
+        chunk = self.read(len(b))
+        if not chunk:
+            return 0
+        n = len(chunk)
+        b[:n] = chunk
+        return n
+    def read(self, size: int = -1) -> bytes:
+        if size is None or size < 0:
+            result = self.leftover + b"".join(self.iterator)
+            self.leftover = b""
+            return result
+        while len(self.leftover) < size:
+            try:
+                self.leftover += next(self.iterator)
+            except StopIteration:
+                break
+        result = self.leftover[:size]
+        self.leftover = self.leftover[size:]
+        return result
+@overload
+def requests_to_internal(
+    model: requests.models.PreparedRequest,
+) -> Request: ...
+@overload
+def requests_to_internal(
+    model: requests.models.Response,
+) -> Response: ...
+def requests_to_internal(
+    model: requests.models.PreparedRequest | requests.models.Response,
+) -> Request | Response:
+    if isinstance(model, requests.models.PreparedRequest):
+        body: bytes
+        if isinstance(model.body, str):
+            body = model.body.encode("utf-8")
+        elif isinstance(model.body, bytes):
+            body = model.body
+        else:
+            body = b""
+        assert model.method
+        return Request(
+            method=model.method,
+            url=str(model.url),
+            headers=Headers(model.headers),
+            stream=iter([body]),
+            metadata=extract_metadata_from_headers(model.headers),
+        )
+    elif isinstance(model, requests.models.Response):
+        try:
+            stream = model.raw.stream(amt=8192)
+        except requests.exceptions.StreamConsumedError:
+            stream = iter([model.content])
+        return Response(
+            status_code=model.status_code,
+            headers=Headers(model.headers),
+            stream=stream,
+        )
+    else:
+        assert_never(model)
+    raise RuntimeError("This line should never be reached, but is here to satisfy type checkers.")
+@overload
+def internal_to_requests(model: Request) -> requests.models.PreparedRequest: ...
+@overload
+def internal_to_requests(model: Response) -> requests.models.Response: ...
+def internal_to_requests(model: Request | Response) -> requests.models.Response | requests.models.PreparedRequest:
+    if isinstance(model, Response):
+        response = requests.models.Response()
+        assert isinstance(model.stream, Iterator)
+        # Collect all chunks from the internal stream
+        stream = IteratorStream(model.stream)
+        urllib_response = HTTPResponse(
+            body=stream,
+            headers={**model.headers, **{snake_to_header(k): str(v) for k, v in model.metadata.items()}},
+            status=model.status_code,
+            preload_content=False,
+            decode_content=True,
+        )
+        # Set up the response object
+        response.raw = urllib_response
+        response.status_code = model.status_code
+        response.headers.update(model.headers)
+        response.headers.update({snake_to_header(k): str(v) for k, v in model.metadata.items()})
+        response.url = ""  # Will be set by requests
+        response.encoding = response.apparent_encoding
+        return response
+    else:
+        assert isinstance(model.stream, Iterator)
+        request = requests.Request(
+            method=model.method,
+            url=model.url,
+            headers=model.headers,
+            data=b"".join(model.stream) if model.stream else None,
+        )
+        return request.prepare()
+class CacheAdapter(HTTPAdapter):
+    """
+    A custom HTTPAdapter that can be used with requests to capture HTTP interactions
+    for snapshot testing.
+    """
+    def __init__(
+        self,
+        pool_connections: int = 10,
+        pool_maxsize: int = 10,
+        max_retries: int = 0,
+        pool_block: bool = False,
+        storage: SyncBaseStorage | None = None,
+        cache_options: CacheOptions | None = None,
+        ignore_specification: bool = False,
+    ):
+        super().__init__(pool_connections, pool_maxsize, max_retries, pool_block)
+        self._cache_proxy = SyncCacheProxy(
+            send_request=self.send_request,
+            storage=storage,
+            cache_options=cache_options,
+            ignore_specification=ignore_specification,
+        )
+    def send(
+        self,
+        request: requests.models.PreparedRequest,
+        stream: bool = False,
+        timeout: None | float | tuple[float, float] | tuple[float, None] = None,
+        verify: bool | str = True,
+        cert: None | bytes | str | tuple[bytes | str, bytes | str] = None,
+        proxies: Mapping[str, str] | None = None,
+    ) -> requests.models.Response:
+        internal_request = requests_to_internal(request)
+        internal_response = self._cache_proxy.handle_request(internal_request)
+        response = internal_to_requests(internal_response)
+        # Set the original request on the response
+        response.request = request
+        response.connection = self  # type: ignore
+        return response
+    def send_request(self, request: Request) -> Response:
+        requests_request = internal_to_requests(request)
+        response = super().send(requests_request, stream=True)
+        return requests_to_internal(response)

hishel 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

hishel 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl