hishel 1.0.0.dev3__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hishel/_policies.py ADDED
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import typing as t
5
+ from dataclasses import dataclass, field
6
+ from typing import Generic
7
+
8
+ from hishel import Request, Response
9
+ from hishel._core._spec import (
10
+ CacheOptions,
11
+ )
12
+
13
+ logger = __import__("logging").getLogger(__name__)
14
+
15
+ T = t.TypeVar("T", Request, Response)
16
+
17
+
18
+ class CachePolicy(abc.ABC):
19
+ use_body_key: bool = False
20
+ """Whether to include request body in cache key calculation."""
21
+
22
+
23
+ class BaseFilter(abc.ABC, Generic[T]):
24
+ @abc.abstractmethod
25
+ def needs_body(self) -> bool:
26
+ pass
27
+
28
+ @abc.abstractmethod
29
+ def apply(self, item: T, body: bytes | None) -> bool:
30
+ pass
31
+
32
+
33
+ @dataclass
34
+ class SpecificationPolicy(CachePolicy):
35
+ """
36
+ Caching policy that respects HTTP caching specification.
37
+ """
38
+
39
+ cache_options: CacheOptions = field(default_factory=CacheOptions)
40
+
41
+
42
+ @dataclass
43
+ class FilterPolicy(CachePolicy):
44
+ """
45
+ Caching policy that applies user-defined filtering logic.
46
+ """
47
+
48
+ request_filters: list[BaseFilter[Request]] = field(default_factory=list)
49
+ response_filters: list[BaseFilter[Response]] = field(default_factory=list)
hishel/_sync_cache.py CHANGED
@@ -13,7 +13,6 @@ from hishel import (
13
13
  SyncBaseStorage,
14
14
  SyncSqliteStorage,
15
15
  CacheMiss,
16
- CacheOptions,
17
16
  CouldNotBeStored,
18
17
  FromCache,
19
18
  IdleClient,
@@ -22,10 +21,10 @@ from hishel import (
22
21
  Request,
23
22
  Response,
24
23
  StoreAndUse,
25
- create_idle_state,
26
24
  )
27
25
  from hishel._core._spec import InvalidateEntries, vary_headers_match
28
26
  from hishel._core.models import Entry, ResponseMetadata
27
+ from hishel._policies import CachePolicy, FilterPolicy, SpecificationPolicy
29
28
  from hishel._utils import make_sync_iterator
30
29
 
31
30
  logger = logging.getLogger("hishel.integrations.clients")
@@ -37,87 +36,121 @@ class SyncCacheProxy:
37
36
 
38
37
  This class is independent of any specific HTTP library and works only with internal models.
39
38
  It delegates request execution to a user-provided callable, making it compatible with any
40
- HTTP client. Caching behavior can be configured to either fully respect HTTP
41
- caching rules or bypass them entirely.
39
+ HTTP client. Caching behavior is determined by the policy object.
40
+
41
+ Args:
42
+ request_sender: Callable that sends HTTP requests and returns responses.
43
+ storage: Storage backend for cache entries. Defaults to SyncSqliteStorage.
44
+ policy: Caching policy to use. Can be SpecificationPolicy (respects RFC 9111) or
45
+ FilterPolicy (user-defined filtering). Defaults to SpecificationPolicy().
42
46
  """
43
47
 
44
48
  def __init__(
45
49
  self,
46
50
  request_sender: Callable[[Request], Response],
47
51
  storage: SyncBaseStorage | None = None,
48
- cache_options: CacheOptions | None = None,
49
- ignore_specification: bool = False,
52
+ policy: CachePolicy | None = None,
50
53
  ) -> None:
51
54
  self.send_request = request_sender
52
55
  self.storage = storage if storage is not None else SyncSqliteStorage()
53
- self.cache_options = cache_options if cache_options is not None else CacheOptions()
54
- self.ignore_specification = ignore_specification
56
+ self.policy = policy if policy is not None else SpecificationPolicy()
55
57
 
56
58
  def handle_request(self, request: Request) -> Response:
57
- if self.ignore_specification or request.metadata.get("hishel_spec_ignore"):
58
- return self._handle_request_ignoring_spec(request)
59
+ if isinstance(self.policy, FilterPolicy):
60
+ return self._handle_request_with_filters(request)
59
61
  return self._handle_request_respecting_spec(request)
60
62
 
61
63
  def _get_key_for_request(self, request: Request) -> str:
62
- if request.metadata.get("hishel_body_key"):
64
+ if self.policy.use_body_key or request.metadata.get("hishel_body_key"):
63
65
  assert isinstance(request.stream, (Iterator, Iterable))
64
66
  collected = b"".join([chunk for chunk in request.stream])
65
67
  hash_ = hashlib.sha256(collected).hexdigest()
66
68
  request.stream = make_sync_iterator([collected])
67
- return f"{str(request.url)}-{hash_}"
69
+ return hash_
68
70
  return hashlib.sha256(str(request.url).encode("utf-8")).hexdigest()
69
71
 
70
- def _maybe_refresh_pair_ttl(self, pair: Entry) -> None:
71
- if pair.request.metadata.get("hishel_refresh_ttl_on_access"):
72
+ def _maybe_refresh_entry_ttl(self, entry: Entry) -> None:
73
+ if entry.request.metadata.get("hishel_refresh_ttl_on_access"):
72
74
  self.storage.update_entry(
73
- pair.id,
74
- lambda complete_pair: replace(
75
- complete_pair,
76
- meta=replace(complete_pair.meta, created_at=time.time()),
75
+ entry.id,
76
+ lambda current_entry: replace(
77
+ current_entry,
78
+ meta=replace(current_entry.meta, created_at=time.time()),
77
79
  ),
78
80
  )
79
81
 
80
- def _handle_request_ignoring_spec(self, request: Request) -> Response:
82
+ def _handle_request_with_filters(self, request: Request) -> Response:
83
+ assert isinstance(self.policy, FilterPolicy)
84
+
85
+ for request_filter in self.policy.request_filters:
86
+ if request_filter.needs_body():
87
+ body = request.read()
88
+ if not request_filter.apply(request, body):
89
+ logger.debug("Request filtered out by request filter")
90
+ return self.send_request(request)
91
+ else:
92
+ if not request_filter.apply(request, None):
93
+ logger.debug("Request filtered out by request filter")
94
+ return self.send_request(request)
95
+
81
96
  logger.debug("Trying to get cached response ignoring specification")
82
- entries = self.storage.get_entries(self._get_key_for_request(request))
97
+ cache_key = self._get_key_for_request(request)
98
+ entries = self.storage.get_entries(cache_key)
83
99
 
84
100
  logger.debug(f"Found {len(entries)} cached entries for the request")
85
101
 
86
- for pair in entries:
102
+ for entry in entries:
87
103
  if (
88
- str(pair.request.url) == str(request.url)
89
- and pair.request.method == request.method
104
+ str(entry.request.url) == str(request.url)
105
+ and entry.request.method == request.method
90
106
  and vary_headers_match(
91
107
  request,
92
- pair,
108
+ entry,
93
109
  )
94
110
  ):
95
111
  logger.debug(
96
112
  "Found matching cached response for the request",
97
113
  )
98
114
  response_meta = ResponseMetadata(
99
- hishel_spec_ignored=True,
100
115
  hishel_from_cache=True,
101
- hishel_created_at=pair.meta.created_at,
116
+ hishel_created_at=entry.meta.created_at,
102
117
  hishel_revalidated=False,
103
118
  hishel_stored=False,
104
119
  )
105
- pair.response.metadata.update(response_meta) # type: ignore
106
- self._maybe_refresh_pair_ttl(pair)
107
- return pair.response
120
+ entry.response.metadata.update(response_meta) # type: ignore
121
+ self._maybe_refresh_entry_ttl(entry)
122
+ return entry.response
108
123
 
109
124
  response = self.send_request(request)
125
+ for response_filter in self.policy.response_filters:
126
+ if response_filter.needs_body():
127
+ body = response.read()
128
+ if not response_filter.apply(response, body):
129
+ logger.debug("Response filtered out by response filter")
130
+ return response
131
+ else:
132
+ if not response_filter.apply(response, None):
133
+ logger.debug("Response filtered out by response filter")
134
+ return response
135
+ response_meta = ResponseMetadata(
136
+ hishel_from_cache=False,
137
+ hishel_created_at=time.time(),
138
+ hishel_revalidated=False,
139
+ hishel_stored=True,
140
+ )
141
+ response.metadata.update(response_meta) # type: ignore
110
142
 
111
143
  logger.debug("Storing response in cache ignoring specification")
112
144
  entry = self.storage.create_entry(
113
145
  request,
114
146
  response,
115
- self._get_key_for_request(request),
147
+ cache_key,
116
148
  )
117
149
  return entry.response
118
150
 
119
151
  def _handle_request_respecting_spec(self, request: Request) -> Response:
120
- state: AnyState = create_idle_state("client", self.cache_options)
152
+ assert isinstance(self.policy, SpecificationPolicy)
153
+ state: AnyState = IdleClient(options=self.policy.cache_options)
121
154
 
122
155
  while state:
123
156
  logger.debug(f"Handling state: {state.__class__.__name__}")
@@ -132,8 +165,8 @@ class SyncCacheProxy:
132
165
  elif isinstance(state, NeedRevalidation):
133
166
  state = self._handle_revalidation(state)
134
167
  elif isinstance(state, FromCache):
135
- self._maybe_refresh_pair_ttl(state.pair)
136
- return state.pair.response
168
+ self._maybe_refresh_entry_ttl(state.entry)
169
+ return state.entry.response
137
170
  elif isinstance(state, NeedToBeUpdated):
138
171
  state = self._handle_update(state)
139
172
  elif isinstance(state, InvalidateEntries):
@@ -152,12 +185,12 @@ class SyncCacheProxy:
152
185
  return state.next(response)
153
186
 
154
187
  def _handle_store_and_use(self, state: StoreAndUse, request: Request) -> Response:
155
- complete_pair = self.storage.create_entry(
188
+ entry = self.storage.create_entry(
156
189
  request,
157
190
  state.response,
158
191
  self._get_key_for_request(request),
159
192
  )
160
- return complete_pair.response
193
+ return entry.response
161
194
 
162
195
  def _handle_revalidation(self, state: NeedRevalidation) -> AnyState:
163
196
  revalidation_response = self.send_request(state.request)
@@ -167,8 +200,8 @@ class SyncCacheProxy:
167
200
  for entry in state.updating_entries:
168
201
  self.storage.update_entry(
169
202
  entry.id,
170
- lambda complete_pair: replace(
171
- complete_pair,
203
+ lambda entry: replace(
204
+ entry,
172
205
  response=replace(entry.response, headers=entry.response.headers),
173
206
  ),
174
207
  )
hishel/_sync_httpx.py CHANGED
@@ -14,11 +14,9 @@ from typing import (
14
14
  from httpx import RequestNotRead
15
15
 
16
16
  from hishel import SyncCacheProxy, Headers, Request, Response
17
- from hishel._core._spec import (
18
- CacheOptions,
19
- )
20
17
  from hishel._core._storages._sync_base import SyncBaseStorage
21
18
  from hishel._core.models import RequestMetadata, extract_metadata_from_headers
19
+ from hishel._policies import CachePolicy
22
20
  from hishel._utils import (
23
21
  filter_mapping,
24
22
  make_sync_iterator,
@@ -62,14 +60,14 @@ def _internal_to_httpx(
62
60
  method=value.method,
63
61
  url=value.url,
64
62
  headers=value.headers,
65
- stream=_IteratorStream(value.iter_stream()),
63
+ stream=_IteratorStream(value._iter_stream()),
66
64
  extensions=value.metadata,
67
65
  )
68
66
  elif isinstance(value, Response):
69
67
  return httpx.Response(
70
68
  status_code=value.status_code,
71
69
  headers=value.headers,
72
- stream=_IteratorStream(value.iter_stream()),
70
+ stream=_IteratorStream(value._iter_stream()),
73
71
  extensions=value.metadata,
74
72
  )
75
73
 
@@ -149,15 +147,13 @@ class SyncCacheTransport(httpx.BaseTransport):
149
147
  self,
150
148
  next_transport: httpx.BaseTransport,
151
149
  storage: SyncBaseStorage | None = None,
152
- cache_options: CacheOptions | None = None,
153
- ignore_specification: bool = False,
150
+ policy: CachePolicy | None = None,
154
151
  ) -> None:
155
152
  self.next_transport = next_transport
156
153
  self._cache_proxy: SyncCacheProxy = SyncCacheProxy(
157
154
  request_sender=self.request_sender,
158
155
  storage=storage,
159
- cache_options=cache_options,
160
- ignore_specification=ignore_specification,
156
+ policy=policy,
161
157
  )
162
158
  self.storage = self._cache_proxy.storage
163
159
 
@@ -184,8 +180,7 @@ class SyncCacheTransport(httpx.BaseTransport):
184
180
  class SyncCacheClient(httpx.Client):
185
181
  def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
186
182
  self.storage: SyncBaseStorage | None = kwargs.pop("storage", None)
187
- self.cache_options: CacheOptions | None = kwargs.pop("cache_options", None)
188
- self.ignore_specification: bool = kwargs.pop("ignore_specification", False)
183
+ self.policy: CachePolicy | None = kwargs.pop("policy", None)
189
184
  super().__init__(*args, **kwargs)
190
185
 
191
186
  def _init_transport(
@@ -212,8 +207,7 @@ class SyncCacheClient(httpx.Client):
212
207
  limits=limits,
213
208
  ),
214
209
  storage=self.storage,
215
- cache_options=self.cache_options,
216
- ignore_specification=False,
210
+ policy=self.policy,
217
211
  )
218
212
 
219
213
  def _init_proxy_transport(
@@ -238,6 +232,5 @@ class SyncCacheClient(httpx.Client):
238
232
  proxy=proxy,
239
233
  ),
240
234
  storage=self.storage,
241
- cache_options=self.cache_options,
242
- ignore_specification=self.ignore_specification,
235
+ policy=self.policy,
243
236
  )
hishel/_utils.py CHANGED
@@ -107,8 +107,8 @@ def snake_to_header(text: str) -> str:
107
107
  return "X-" + "-".join(word.capitalize() for word in text.split("_"))
108
108
 
109
109
 
110
- def ensure_cache_dict(base_path: str | None = None) -> Path:
111
- _base_path = Path(base_path) if base_path is not None else Path(".cache/hishel")
110
+ def ensure_cache_dict(base_path: Path | None = None) -> Path:
111
+ _base_path = base_path if base_path is not None else Path(".cache/hishel")
112
112
  _gitignore_file = _base_path / ".gitignore"
113
113
 
114
114
  _base_path.mkdir(parents=True, exist_ok=True)
hishel/asgi.py CHANGED
@@ -5,8 +5,10 @@ import typing as t
5
5
  from email.utils import formatdate
6
6
  from typing import AsyncIterator
7
7
 
8
- from hishel import AsyncBaseStorage, CacheOptions, Headers, Request, Response
8
+ from hishel import AsyncBaseStorage, Headers, Request, Response
9
9
  from hishel._async_cache import AsyncCacheProxy
10
+ from hishel._policies import CachePolicy
11
+ from hishel._utils import filter_mapping
10
12
 
11
13
  # Configure logger for this module
12
14
  logger = logging.getLogger(__name__)
@@ -54,19 +56,20 @@ class ASGICacheMiddleware:
54
56
  Args:
55
57
  app: The ASGI application to wrap.
56
58
  storage: The storage backend to use for caching. Defaults to AsyncSqliteStorage.
57
- cache_options: Configuration options for caching behavior.
58
- ignore_specification: If True, bypasses HTTP caching rules and caches all responses.
59
+ policy: Caching policy to use. Can be SpecificationPolicy (respects RFC 9111) or
60
+ FilterPolicy (user-defined filtering). Defaults to SpecificationPolicy().
59
61
 
60
62
  Example:
61
63
  ```python
62
64
  from hishel.asgi import ASGICacheMiddleware
63
- from hishel import AsyncSqliteStorage, CacheOptions
65
+ from hishel import AsyncSqliteStorage
66
+ from hishel._policies import SpecificationPolicy, CacheOptions
64
67
 
65
68
  # Wrap your ASGI app
66
69
  app = ASGICacheMiddleware(
67
70
  app=my_asgi_app,
68
71
  storage=AsyncSqliteStorage(),
69
- cache_options=CacheOptions(),
72
+ policy=SpecificationPolicy(cache_options=CacheOptions()),
70
73
  )
71
74
  ```
72
75
  """
@@ -75,18 +78,16 @@ class ASGICacheMiddleware:
75
78
  self,
76
79
  app: _ASGIApp,
77
80
  storage: AsyncBaseStorage | None = None,
78
- cache_options: CacheOptions | None = None,
79
- ignore_specification: bool = False,
81
+ policy: CachePolicy | None = None,
80
82
  ) -> None:
81
83
  self.app = app
82
84
  self.storage = storage
83
- self._cache_options = cache_options
84
- self._ignore_specification = ignore_specification
85
+ self._policy = policy
85
86
 
86
87
  logger.info(
87
- "Initialized ASGICacheMiddleware with storage=%s, ignore_specification=%s",
88
+ "Initialized ASGICacheMiddleware with storage=%s, policy=%s",
88
89
  type(storage).__name__ if storage else "None",
89
- ignore_specification,
90
+ type(policy).__name__ if policy else "None",
90
91
  )
91
92
 
92
93
  async def __call__(self, scope: _Scope, receive: _Receive, send: _Send) -> None:
@@ -122,7 +123,7 @@ class ASGICacheMiddleware:
122
123
  logger.debug("Sending request to wrapped application: url=%s", request.url)
123
124
 
124
125
  # Create a buffered receive callable that replays the request body from the stream
125
- body_iterator = request.aiter_stream()
126
+ body_iterator = request._aiter_stream()
126
127
  body_exhausted = False
127
128
  bytes_received = 0
128
129
 
@@ -209,7 +210,7 @@ class ASGICacheMiddleware:
209
210
 
210
211
  return Response(
211
212
  status_code=status_code,
212
- headers=Headers(headers_dict),
213
+ headers=Headers(filter_mapping(headers_dict, ["Transfer-Encoding"])),
213
214
  stream=response_stream(),
214
215
  metadata={},
215
216
  )
@@ -219,8 +220,7 @@ class ASGICacheMiddleware:
219
220
  cache_proxy = AsyncCacheProxy(
220
221
  request_sender=send_request_to_app,
221
222
  storage=self.storage,
222
- cache_options=self._cache_options,
223
- ignore_specification=self._ignore_specification,
223
+ policy=self._policy,
224
224
  )
225
225
 
226
226
  # Convert ASGI request to internal Request (using async iterator, not reading into memory)
@@ -352,7 +352,7 @@ class ASGICacheMiddleware:
352
352
  # Send response body in chunks
353
353
  bytes_sent = 0
354
354
  chunk_count = 0
355
- async for chunk in response.aiter_stream():
355
+ async for chunk in response._aiter_stream():
356
356
  await send(
357
357
  {
358
358
  "type": "http.response.body",
hishel/requests.py CHANGED
@@ -6,9 +6,9 @@ from typing import Any, Iterator, Mapping, Optional, overload
6
6
  from typing_extensions import assert_never
7
7
 
8
8
  from hishel import Headers, Request, Response as Response
9
- from hishel._core._spec import CacheOptions
10
9
  from hishel._core._storages._sync_base import SyncBaseStorage
11
10
  from hishel._core.models import extract_metadata_from_headers
11
+ from hishel._policies import CachePolicy
12
12
  from hishel._sync_cache import SyncCacheProxy
13
13
  from hishel._utils import snake_to_header
14
14
 
@@ -163,15 +163,13 @@ class CacheAdapter(HTTPAdapter):
163
163
  max_retries: int = 0,
164
164
  pool_block: bool = False,
165
165
  storage: SyncBaseStorage | None = None,
166
- cache_options: CacheOptions | None = None,
167
- ignore_specification: bool = False,
166
+ policy: CachePolicy | None = None,
168
167
  ):
169
168
  super().__init__(pool_connections, pool_maxsize, max_retries, pool_block)
170
169
  self._cache_proxy = SyncCacheProxy(
171
170
  request_sender=self._send_request,
172
171
  storage=storage,
173
- cache_options=cache_options,
174
- ignore_specification=ignore_specification,
172
+ policy=policy,
175
173
  )
176
174
  self.storage = self._cache_proxy.storage
177
175