hishel 0.1.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. hishel/__init__.py +63 -52
  2. hishel/_async_cache.py +213 -0
  3. hishel/_async_httpx.py +236 -0
  4. hishel/{beta/_core → _core}/_headers.py +11 -1
  5. hishel/{beta/_core → _core}/_spec.py +270 -136
  6. hishel/_core/_storages/_async_base.py +71 -0
  7. hishel/_core/_storages/_async_sqlite.py +420 -0
  8. hishel/_core/_storages/_packing.py +144 -0
  9. hishel/_core/_storages/_sync_base.py +71 -0
  10. hishel/_core/_storages/_sync_sqlite.py +420 -0
  11. hishel/{beta/_core → _core}/models.py +100 -37
  12. hishel/_policies.py +49 -0
  13. hishel/_sync_cache.py +213 -0
  14. hishel/_sync_httpx.py +236 -0
  15. hishel/_utils.py +37 -366
  16. hishel/asgi.py +400 -0
  17. hishel/fastapi.py +263 -0
  18. hishel/httpx.py +12 -0
  19. hishel/{beta/requests.py → requests.py} +31 -25
  20. hishel-1.0.0.dist-info/METADATA +513 -0
  21. hishel-1.0.0.dist-info/RECORD +24 -0
  22. hishel/_async/__init__.py +0 -5
  23. hishel/_async/_client.py +0 -30
  24. hishel/_async/_mock.py +0 -43
  25. hishel/_async/_pool.py +0 -201
  26. hishel/_async/_storages.py +0 -768
  27. hishel/_async/_transports.py +0 -282
  28. hishel/_controller.py +0 -581
  29. hishel/_exceptions.py +0 -10
  30. hishel/_files.py +0 -54
  31. hishel/_headers.py +0 -215
  32. hishel/_lfu_cache.py +0 -71
  33. hishel/_lmdb_types_.pyi +0 -53
  34. hishel/_s3.py +0 -122
  35. hishel/_serializers.py +0 -329
  36. hishel/_sync/__init__.py +0 -5
  37. hishel/_sync/_client.py +0 -30
  38. hishel/_sync/_mock.py +0 -43
  39. hishel/_sync/_pool.py +0 -201
  40. hishel/_sync/_storages.py +0 -768
  41. hishel/_sync/_transports.py +0 -282
  42. hishel/_synchronization.py +0 -37
  43. hishel/beta/__init__.py +0 -59
  44. hishel/beta/_async_cache.py +0 -167
  45. hishel/beta/_core/__init__.py +0 -0
  46. hishel/beta/_core/_async/_storages/_sqlite.py +0 -411
  47. hishel/beta/_core/_base/_storages/_base.py +0 -272
  48. hishel/beta/_core/_base/_storages/_packing.py +0 -165
  49. hishel/beta/_core/_sync/_storages/_sqlite.py +0 -411
  50. hishel/beta/_sync_cache.py +0 -167
  51. hishel/beta/httpx.py +0 -328
  52. hishel-0.1.5.dist-info/METADATA +0 -258
  53. hishel-0.1.5.dist-info/RECORD +0 -41
  54. {hishel-0.1.5.dist-info → hishel-1.0.0.dist-info}/WHEEL +0 -0
  55. {hishel-0.1.5.dist-info → hishel-1.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,420 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ import uuid
5
+ from dataclasses import replace
6
+ from typing import (
7
+ Any,
8
+ Iterable,
9
+ Iterator,
10
+ Callable,
11
+ List,
12
+ Optional,
13
+ Union,
14
+ )
15
+
16
+ from hishel._core._storages._sync_base import SyncBaseStorage
17
+ from hishel._core._storages._packing import pack, unpack
18
+ from hishel._core.models import (
19
+ Entry,
20
+ EntryMeta,
21
+ Request,
22
+ Response,
23
+ )
24
+ from hishel._utils import ensure_cache_dict
25
+
26
+ # Batch cleanup configuration
27
+ # How often to run cleanup (seconds). Default: 1 hour.
28
+ BATCH_CLEANUP_INTERVAL = 3600
29
+ # How long to wait after storage creation before allowing the first cleanup (seconds)
30
+ BATCH_CLEANUP_START_DELAY = 5 * 60
31
+ # Number of rows to process per chunk when cleaning
32
+ BATCH_CLEANUP_CHUNK_SIZE = 200
33
+
34
+
35
+ try:
36
+ import sqlite3
37
+
38
+ class SyncSqliteStorage(SyncBaseStorage):
39
+ _COMPLETE_CHUNK_NUMBER = -1
40
+
41
+ def __init__(
42
+ self,
43
+ *,
44
+ connection: Optional[sqlite3.Connection] = None,
45
+ database_path: str = "hishel_cache.db",
46
+ default_ttl: Optional[float] = None,
47
+ refresh_ttl_on_access: bool = True,
48
+ ) -> None:
49
+ base_path = ensure_cache_dict()
50
+
51
+ self.connection = connection
52
+ self.database_path = base_path / database_path
53
+ self.default_ttl = default_ttl
54
+ self.refresh_ttl_on_access = refresh_ttl_on_access
55
+ self.last_cleanup = time.time() - BATCH_CLEANUP_INTERVAL + BATCH_CLEANUP_START_DELAY
56
+ # When this storage instance was created. Used to delay the first cleanup.
57
+ self._start_time = time.time()
58
+ self._initialized = False
59
+
60
+ def _ensure_connection(self) -> sqlite3.Connection:
61
+ """Ensure connection is established and database is initialized."""
62
+ if self.connection is None:
63
+ self.connection = sqlite3.connect(str(self.database_path))
64
+ if not self._initialized:
65
+ self._initialize_database()
66
+ self._initialized = True
67
+ return self.connection
68
+
69
+ def _initialize_database(self) -> None:
70
+ """Initialize the database schema."""
71
+ assert self.connection is not None
72
+ cursor = self.connection.cursor()
73
+
74
+ # Table for storing request/response pairs
75
+ cursor.execute("""
76
+ CREATE TABLE IF NOT EXISTS entries (
77
+ id BLOB PRIMARY KEY,
78
+ cache_key BLOB,
79
+ data BLOB NOT NULL,
80
+ created_at REAL NOT NULL,
81
+ deleted_at REAL
82
+ )
83
+ """)
84
+
85
+ # Table for storing response stream chunks only
86
+ cursor.execute("""
87
+ CREATE TABLE IF NOT EXISTS streams (
88
+ entry_id BLOB NOT NULL,
89
+ chunk_number INTEGER NOT NULL,
90
+ chunk_data BLOB NOT NULL,
91
+ PRIMARY KEY (entry_id, chunk_number),
92
+ FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
93
+ )
94
+ """)
95
+
96
+ # Indexes for performance
97
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_deleted_at ON entries(deleted_at)")
98
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_cache_key ON entries(cache_key)")
99
+
100
+ self.connection.commit()
101
+
102
+ def create_entry(
103
+ self, request: Request, response: Response, key: str, id_: uuid.UUID | None = None
104
+ ) -> Entry:
105
+ key_bytes = key.encode("utf-8")
106
+
107
+ connection = self._ensure_connection()
108
+ cursor = connection.cursor()
109
+
110
+ # Create a new entry directly with both request and response
111
+ pair_id = id_ if id_ is not None else uuid.uuid4()
112
+ pair_meta = EntryMeta(
113
+ created_at=time.time(),
114
+ )
115
+
116
+ assert isinstance(response.stream, (Iterator, Iterable))
117
+ response_with_stream = replace(
118
+ response,
119
+ stream=self._save_stream(response.stream, pair_id.bytes),
120
+ )
121
+
122
+ complete_entry = Entry(
123
+ id=pair_id,
124
+ request=request,
125
+ response=response_with_stream,
126
+ meta=pair_meta,
127
+ cache_key=key_bytes,
128
+ )
129
+
130
+ # Insert the complete entry into the database
131
+ cursor.execute(
132
+ "INSERT INTO entries (id, cache_key, data, created_at, deleted_at) VALUES (?, ?, ?, ?, ?)",
133
+ (pair_id.bytes, key_bytes, pack(complete_entry, kind="pair"), pair_meta.created_at, None),
134
+ )
135
+ connection.commit()
136
+
137
+ return complete_entry
138
+
139
+ def get_entries(self, key: str) -> List[Entry]:
140
+ final_pairs: List[Entry] = []
141
+
142
+ now = time.time()
143
+ if now - self.last_cleanup >= BATCH_CLEANUP_INTERVAL:
144
+ try:
145
+ self._batch_cleanup()
146
+ except Exception:
147
+ # don't let cleanup prevent reads; failures are non-fatal
148
+ pass
149
+
150
+ connection = self._ensure_connection()
151
+ cursor = connection.cursor()
152
+ # Query entries directly by cache_key
153
+ cursor.execute(
154
+ "SELECT id, data FROM entries WHERE cache_key = ?",
155
+ (key.encode("utf-8"),),
156
+ )
157
+
158
+ for row in cursor.fetchall():
159
+ pair_data = unpack(row[1], kind="pair")
160
+
161
+ # Skip entries without a response (incomplete)
162
+ if not isinstance(pair_data, Entry) or pair_data.response is None:
163
+ continue
164
+
165
+ final_pairs.append(pair_data)
166
+
167
+ pairs_with_streams: List[Entry] = []
168
+
169
+ # Only restore response streams from cache
170
+ for pair in final_pairs:
171
+ pairs_with_streams.append(
172
+ replace(
173
+ pair,
174
+ response=replace(
175
+ pair.response,
176
+ stream=self._stream_data_from_cache(pair.id.bytes),
177
+ ),
178
+ )
179
+ )
180
+ return pairs_with_streams
181
+
182
+ def update_entry(
183
+ self,
184
+ id: uuid.UUID,
185
+ new_pair: Union[Entry, Callable[[Entry], Entry]],
186
+ ) -> Optional[Entry]:
187
+ connection = self._ensure_connection()
188
+ cursor = connection.cursor()
189
+ cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
190
+ result = cursor.fetchone()
191
+
192
+ if result is None:
193
+ return None
194
+
195
+ pair = unpack(result[0], kind="pair")
196
+
197
+ # Skip entries without a response (incomplete)
198
+ if not isinstance(pair, Entry) or pair.response is None:
199
+ return None
200
+
201
+ if isinstance(new_pair, Entry):
202
+ complete_pair = new_pair
203
+ else:
204
+ complete_pair = new_pair(pair)
205
+
206
+ if pair.id != complete_pair.id:
207
+ raise ValueError("Pair ID mismatch")
208
+
209
+ cursor.execute(
210
+ "UPDATE entries SET data = ? WHERE id = ?",
211
+ (pack(complete_pair, kind="pair"), id.bytes),
212
+ )
213
+
214
+ if pair.cache_key != complete_pair.cache_key:
215
+ cursor.execute(
216
+ "UPDATE entries SET cache_key = ? WHERE id = ?",
217
+ (complete_pair.cache_key, complete_pair.id.bytes),
218
+ )
219
+
220
+ connection.commit()
221
+
222
+ return complete_pair
223
+
224
+ def remove_entry(self, id: uuid.UUID) -> None:
225
+ connection = self._ensure_connection()
226
+ cursor = connection.cursor()
227
+ cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
228
+ result = cursor.fetchone()
229
+
230
+ if result is None:
231
+ return None
232
+
233
+ pair = unpack(result[0], kind="pair")
234
+ self._soft_delete_pair(pair, cursor)
235
+ connection.commit()
236
+
237
+ def _is_stream_complete(self, pair_id: uuid.UUID, cursor: sqlite3.Cursor) -> bool:
238
+ # Check if there's a completion marker (chunk_number = -1) for response stream
239
+ cursor.execute(
240
+ "SELECT 1 FROM streams WHERE entry_id = ? AND chunk_number = ? LIMIT 1",
241
+ (pair_id.bytes, self._COMPLETE_CHUNK_NUMBER),
242
+ )
243
+ return cursor.fetchone() is not None
244
+
245
+ def _soft_delete_pair(
246
+ self,
247
+ pair: Entry,
248
+ cursor: sqlite3.Cursor,
249
+ ) -> None:
250
+ """
251
+ Mark the pair as deleted by setting the deleted_at timestamp.
252
+ """
253
+ marked_pair = self.mark_pair_as_deleted(pair)
254
+ cursor.execute(
255
+ "UPDATE entries SET data = ?, deleted_at = ? WHERE id = ?",
256
+ (
257
+ pack(marked_pair, kind="pair"),
258
+ marked_pair.meta.deleted_at,
259
+ pair.id.bytes,
260
+ ),
261
+ )
262
+
263
+ def _is_pair_expired(self, pair: Entry, cursor: sqlite3.Cursor) -> bool:
264
+ """
265
+ Check if the pair is expired.
266
+ """
267
+ ttl = pair.request.metadata["hishel_ttl"] if "hishel_ttl" in pair.request.metadata else self.default_ttl
268
+ created_at = pair.meta.created_at
269
+ if ttl is None:
270
+ return False
271
+ return created_at + ttl < time.time()
272
+
273
+ def _batch_cleanup(
274
+ self,
275
+ ) -> None:
276
+ """
277
+ Cleanup expired entries in the database.
278
+ """
279
+ should_mark_as_deleted: List[Entry] = []
280
+ should_hard_delete: List[Entry] = []
281
+
282
+ connection = self._ensure_connection()
283
+ cursor = connection.cursor()
284
+
285
+ # Process entries in chunks to avoid loading the entire table into memory.
286
+ chunk_size = BATCH_CLEANUP_CHUNK_SIZE
287
+ offset = 0
288
+ while True:
289
+ cursor.execute(
290
+ "SELECT id, data FROM entries LIMIT ? OFFSET ?",
291
+ (chunk_size, offset),
292
+ )
293
+ rows = cursor.fetchall()
294
+ if not rows:
295
+ break
296
+
297
+ for row in rows:
298
+ pair = unpack(row[1], kind="pair")
299
+ if pair is None:
300
+ continue
301
+
302
+ # expired but not yet soft-deleted
303
+ if self._is_pair_expired(pair, cursor) and not self.is_soft_deleted(pair):
304
+ should_mark_as_deleted.append(pair)
305
+
306
+ # soft-deleted and safe to hard delete, or corrupted pair
307
+ if (self.is_soft_deleted(pair) and self.is_safe_to_hard_delete(pair)) or self._is_corrupted(
308
+ pair, cursor
309
+ ):
310
+ should_hard_delete.append(pair)
311
+
312
+ # advance pagination
313
+ offset += len(rows)
314
+
315
+ for pair in should_mark_as_deleted:
316
+ self._soft_delete_pair(pair, cursor)
317
+
318
+ for pair in should_hard_delete:
319
+ self._hard_delete_pair(pair, cursor)
320
+
321
+ connection.commit()
322
+
323
+ def _is_corrupted(self, pair: Entry, cursor: sqlite3.Cursor) -> bool:
324
+ # if entry was created more than 1 hour ago and still has no response (incomplete)
325
+ if pair.meta.created_at + 3600 < time.time() and pair.response is None:
326
+ return True
327
+
328
+ # Check if response stream is complete for Entry with response
329
+ if (
330
+ isinstance(pair, Entry)
331
+ and pair.response is not None
332
+ and not self._is_stream_complete(pair.id, cursor)
333
+ ):
334
+ return True
335
+ return False
336
+
337
+ def _hard_delete_pair(self, pair: Entry, cursor: sqlite3.Cursor) -> None:
338
+ """
339
+ Permanently delete the pair from the database.
340
+ """
341
+ cursor.execute("DELETE FROM entries WHERE id = ?", (pair.id.bytes,))
342
+
343
+ # Delete response stream for this entry
344
+ self._delete_stream(pair.id.bytes, cursor)
345
+
346
+ def _delete_stream(
347
+ self,
348
+ entry_id: bytes,
349
+ cursor: sqlite3.Cursor,
350
+ ) -> None:
351
+ """
352
+ Delete response stream associated with the given entry ID.
353
+ """
354
+ cursor.execute("DELETE FROM streams WHERE entry_id = ?", (entry_id,))
355
+
356
+ def _save_stream(
357
+ self,
358
+ stream: Iterator[bytes],
359
+ entry_id: bytes,
360
+ ) -> Iterator[bytes]:
361
+ """
362
+ Wrapper around an async iterator that also saves the response data to the cache in chunks.
363
+ """
364
+ chunk_number = 0
365
+ content_length = 0
366
+ for chunk in stream:
367
+ content_length += len(chunk)
368
+ connection = self._ensure_connection()
369
+ cursor = connection.cursor()
370
+ cursor.execute(
371
+ "INSERT INTO streams (entry_id, chunk_number, chunk_data) VALUES (?, ?, ?)",
372
+ (entry_id, chunk_number, chunk),
373
+ )
374
+ connection.commit()
375
+ chunk_number += 1
376
+ yield chunk
377
+
378
+ # Mark end of stream with chunk_number = -1
379
+ connection = self._ensure_connection()
380
+ cursor = connection.cursor()
381
+ cursor.execute(
382
+ "INSERT INTO streams (entry_id, chunk_number, chunk_data) VALUES (?, ?, ?)",
383
+ (entry_id, self._COMPLETE_CHUNK_NUMBER, b""),
384
+ )
385
+ connection.commit()
386
+
387
+ def _stream_data_from_cache(
388
+ self,
389
+ entry_id: bytes,
390
+ ) -> Iterator[bytes]:
391
+ """
392
+ Get an async iterator that yields the response stream data from the cache.
393
+ """
394
+ chunk_number = 0
395
+
396
+ connection = self._ensure_connection()
397
+ while True:
398
+ cursor = connection.cursor()
399
+ cursor.execute(
400
+ "SELECT chunk_data FROM streams WHERE entry_id = ? AND chunk_number = ?",
401
+ (entry_id, chunk_number),
402
+ )
403
+ result = cursor.fetchone()
404
+
405
+ if result is None:
406
+ break
407
+ chunk = result[0]
408
+ # chunk_number = -1 is the completion marker with empty data
409
+ if chunk == b"":
410
+ break
411
+ yield chunk
412
+ chunk_number += 1
413
+ except ImportError:
414
+
415
+ class SyncSqliteStorage: # type: ignore[no-redef]
416
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
417
+ raise ImportError(
418
+ "The 'sqlite3' library is required to use the `SyncSqliteStorage` integration. "
419
+ "Install hishel with 'pip install hishel[async]'."
420
+ )
@@ -5,14 +5,18 @@ import uuid
5
5
  from dataclasses import dataclass, field
6
6
  from typing import (
7
7
  Any,
8
+ AsyncIterable,
8
9
  AsyncIterator,
10
+ Iterable,
9
11
  Iterator,
10
12
  Mapping,
11
13
  Optional,
12
14
  TypedDict,
15
+ cast,
13
16
  )
14
17
 
15
- from hishel.beta._core._headers import Headers
18
+ from hishel._core._headers import Headers
19
+ from hishel._utils import make_async_iterator, make_sync_iterator
16
20
 
17
21
 
18
22
  class AnyIterable:
@@ -64,7 +68,9 @@ class RequestMetadata(TypedDict, total=False):
64
68
  """
65
69
 
66
70
 
67
- def extract_metadata_from_headers(headers: Mapping[str, str]) -> RequestMetadata:
71
+ def extract_metadata_from_headers(
72
+ headers: Mapping[str, str],
73
+ ) -> RequestMetadata:
68
74
  metadata: RequestMetadata = {}
69
75
  if "X-Hishel-Ttl" in headers:
70
76
  try:
@@ -94,33 +100,71 @@ class Request:
94
100
  stream: Iterator[bytes] | AsyncIterator[bytes] = field(default_factory=lambda: iter(AnyIterable()))
95
101
  metadata: RequestMetadata | Mapping[str, Any] = field(default_factory=dict)
96
102
 
97
- def iter_stream(self) -> Iterator[bytes]:
98
- if isinstance(self.stream, Iterator):
99
- return self.stream
103
+ def _iter_stream(self) -> Iterator[bytes]:
104
+ if hasattr(self, "collected_body"):
105
+ yield getattr(self, "collected_body")
106
+ return
107
+ if isinstance(self.stream, (Iterator, Iterable)):
108
+ yield from self.stream
109
+ return
100
110
  raise TypeError("Request stream is not an Iterator")
101
111
 
102
- async def aiter_stream(self) -> AsyncIterator[bytes]:
103
- if isinstance(self.stream, AsyncIterator):
112
+ async def _aiter_stream(self) -> AsyncIterator[bytes]:
113
+ if hasattr(self, "collected_body"):
114
+ yield getattr(self, "collected_body")
115
+ return
116
+ if isinstance(self.stream, (AsyncIterator, AsyncIterable)):
104
117
  async for chunk in self.stream:
105
118
  yield chunk
119
+ return
106
120
  else:
107
121
  raise TypeError("Request stream is not an AsyncIterator")
108
122
 
123
+ def read(self) -> bytes:
124
+ """
125
+ Synchronously reads the entire request body without consuming the stream.
126
+ """
127
+ if not isinstance(self.stream, Iterator):
128
+ raise TypeError("Request stream is not an Iterator")
129
+
130
+ if hasattr(self, "collected_body"):
131
+ return cast(bytes, getattr(self, "collected_body"))
132
+
133
+ collected = b"".join([chunk for chunk in self.stream])
134
+ setattr(self, "collected_body", collected)
135
+ self.stream = make_sync_iterator([collected])
136
+ return collected
137
+
138
+ async def aread(self) -> bytes:
139
+ """
140
+ Asynchronously reads the entire request body without consuming the stream.
141
+ """
142
+ if not isinstance(self.stream, AsyncIterator):
143
+ raise TypeError("Request stream is not an AsyncIterator")
144
+
145
+ if hasattr(self, "collected_body"):
146
+ return cast(bytes, getattr(self, "collected_body"))
147
+
148
+ collected = b"".join([chunk async for chunk in self.stream])
149
+ setattr(self, "collected_body", collected)
150
+ self.stream = make_async_iterator([collected])
151
+ return collected
152
+
109
153
 
110
154
  class ResponseMetadata(TypedDict, total=False):
111
155
  # All the names here should be prefixed with "hishel_" to avoid collisions with user data
112
- hishel_from_cache: bool | None
156
+ hishel_from_cache: bool
113
157
  """Indicates whether the response was served from cache."""
114
158
 
115
- hishel_revalidated: bool | None
159
+ hishel_revalidated: bool
116
160
  """Indicates whether the response was revalidated with the origin server."""
117
161
 
118
- hishel_spec_ignored: bool | None
119
- """Indicates whether the caching specification was ignored for this response."""
120
-
121
- hishel_stored: bool | None
162
+ hishel_stored: bool
122
163
  """Indicates whether the response was stored in cache."""
123
164
 
165
+ hishel_created_at: float
166
+ """Timestamp when the response was cached."""
167
+
124
168
 
125
169
  @dataclass
126
170
  class Response:
@@ -129,48 +173,67 @@ class Response:
129
173
  stream: Iterator[bytes] | AsyncIterator[bytes] = field(default_factory=lambda: iter(AnyIterable()))
130
174
  metadata: ResponseMetadata | Mapping[str, Any] = field(default_factory=dict)
131
175
 
132
- def iter_stream(self) -> Iterator[bytes]:
176
+ def _iter_stream(self) -> Iterator[bytes]:
177
+ if hasattr(self, "collected_body"):
178
+ yield getattr(self, "collected_body")
179
+ return
133
180
  if isinstance(self.stream, Iterator):
134
- return self.stream
181
+ yield from self.stream
182
+ return
135
183
  raise TypeError("Response stream is not an Iterator")
136
184
 
137
- async def aiter_stream(self) -> AsyncIterator[bytes]:
185
+ async def _aiter_stream(self) -> AsyncIterator[bytes]:
186
+ if hasattr(self, "collected_body"):
187
+ yield getattr(self, "collected_body")
188
+ return
138
189
  if isinstance(self.stream, AsyncIterator):
139
190
  async for chunk in self.stream:
140
191
  yield chunk
141
192
  else:
142
193
  raise TypeError("Response stream is not an AsyncIterator")
143
194
 
195
+ def read(self) -> bytes:
196
+ """
197
+ Synchronously reads the entire request body without consuming the stream.
198
+ """
199
+ if not isinstance(self.stream, Iterator):
200
+ raise TypeError("Request stream is not an Iterator")
201
+
202
+ if hasattr(self, "collected_body"):
203
+ return cast(bytes, getattr(self, "collected_body"))
204
+
205
+ collected = b"".join([chunk for chunk in self.stream])
206
+ setattr(self, "collected_body", collected)
207
+ self.stream = make_sync_iterator([collected])
208
+ return collected
209
+
210
+ async def aread(self) -> bytes:
211
+ """
212
+ Asynchronously reads the entire request body without consuming the stream.
213
+ """
214
+ if not isinstance(self.stream, AsyncIterator):
215
+ raise TypeError("Request stream is not an AsyncIterator")
216
+
217
+ if hasattr(self, "collected_body"):
218
+ return cast(bytes, getattr(self, "collected_body"))
219
+
220
+ collected = b"".join([chunk async for chunk in self.stream])
221
+ setattr(self, "collected_body", collected)
222
+ self.stream = make_async_iterator([collected])
223
+ return collected
224
+
144
225
 
145
226
  @dataclass
146
- class PairMeta:
227
+ class EntryMeta:
147
228
  created_at: float = field(default_factory=time.time)
148
229
  deleted_at: Optional[float] = None
149
230
 
150
231
 
151
232
  @dataclass
152
- class Pair:
233
+ class Entry:
153
234
  id: uuid.UUID
154
235
  request: Request
155
- meta: PairMeta
156
-
157
-
158
- # class used by storage
159
- @dataclass
160
- class IncompletePair(Pair):
161
- extra: Mapping[str, Any] = field(default_factory=dict)
162
-
163
-
164
- @dataclass
165
- class CompletePair(Pair):
236
+ meta: EntryMeta
166
237
  response: Response
167
238
  cache_key: bytes
168
239
  extra: Mapping[str, Any] = field(default_factory=dict)
169
-
170
- @classmethod
171
- def create(
172
- cls,
173
- response: Response,
174
- request: Request,
175
- ) -> "CompletePair": # pragma: nocover
176
- return cls(id=uuid.uuid4(), request=request, response=response, meta=PairMeta(), cache_key=b"")
hishel/_policies.py ADDED
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import typing as t
5
+ from dataclasses import dataclass, field
6
+ from typing import Generic
7
+
8
+ from hishel import Request, Response
9
+ from hishel._core._spec import (
10
+ CacheOptions,
11
+ )
12
+
13
+ logger = __import__("logging").getLogger(__name__)
14
+
15
+ T = t.TypeVar("T", Request, Response)
16
+
17
+
18
+ class CachePolicy(abc.ABC):
19
+ use_body_key: bool = False
20
+ """Whether to include request body in cache key calculation."""
21
+
22
+
23
+ class BaseFilter(abc.ABC, Generic[T]):
24
+ @abc.abstractmethod
25
+ def needs_body(self) -> bool:
26
+ pass
27
+
28
+ @abc.abstractmethod
29
+ def apply(self, item: T, body: bytes | None) -> bool:
30
+ pass
31
+
32
+
33
+ @dataclass
34
+ class SpecificationPolicy(CachePolicy):
35
+ """
36
+ Caching policy that respects HTTP caching specification.
37
+ """
38
+
39
+ cache_options: CacheOptions = field(default_factory=CacheOptions)
40
+
41
+
42
+ @dataclass
43
+ class FilterPolicy(CachePolicy):
44
+ """
45
+ Caching policy that applies user-defined filtering logic.
46
+ """
47
+
48
+ request_filters: list[BaseFilter[Request]] = field(default_factory=list)
49
+ response_filters: list[BaseFilter[Response]] = field(default_factory=list)