hishel 0.1.4__py3-none-any.whl → 1.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hishel/__init__.py +59 -52
- hishel/_async_cache.py +213 -0
- hishel/_async_httpx.py +236 -0
- hishel/_core/_headers.py +646 -0
- hishel/{beta/_core → _core}/_spec.py +270 -136
- hishel/_core/_storages/_async_base.py +71 -0
- hishel/_core/_storages/_async_sqlite.py +420 -0
- hishel/_core/_storages/_packing.py +144 -0
- hishel/_core/_storages/_sync_base.py +71 -0
- hishel/_core/_storages/_sync_sqlite.py +420 -0
- hishel/{beta/_core → _core}/models.py +100 -37
- hishel/_policies.py +49 -0
- hishel/_sync_cache.py +213 -0
- hishel/_sync_httpx.py +236 -0
- hishel/_utils.py +37 -366
- hishel/asgi.py +400 -0
- hishel/fastapi.py +263 -0
- hishel/httpx.py +12 -0
- hishel/{beta/requests.py → requests.py} +41 -30
- hishel-1.0.0b1.dist-info/METADATA +509 -0
- hishel-1.0.0b1.dist-info/RECORD +24 -0
- hishel/_async/__init__.py +0 -5
- hishel/_async/_client.py +0 -30
- hishel/_async/_mock.py +0 -43
- hishel/_async/_pool.py +0 -201
- hishel/_async/_storages.py +0 -768
- hishel/_async/_transports.py +0 -282
- hishel/_controller.py +0 -581
- hishel/_exceptions.py +0 -10
- hishel/_files.py +0 -54
- hishel/_headers.py +0 -215
- hishel/_lfu_cache.py +0 -71
- hishel/_lmdb_types_.pyi +0 -53
- hishel/_s3.py +0 -122
- hishel/_serializers.py +0 -329
- hishel/_sync/__init__.py +0 -5
- hishel/_sync/_client.py +0 -30
- hishel/_sync/_mock.py +0 -43
- hishel/_sync/_pool.py +0 -201
- hishel/_sync/_storages.py +0 -768
- hishel/_sync/_transports.py +0 -282
- hishel/_synchronization.py +0 -37
- hishel/beta/__init__.py +0 -59
- hishel/beta/_async_cache.py +0 -167
- hishel/beta/_core/__init__.py +0 -0
- hishel/beta/_core/_async/_storages/_sqlite.py +0 -411
- hishel/beta/_core/_base/_storages/_base.py +0 -260
- hishel/beta/_core/_base/_storages/_packing.py +0 -165
- hishel/beta/_core/_headers.py +0 -301
- hishel/beta/_core/_sync/_storages/_sqlite.py +0 -411
- hishel/beta/_sync_cache.py +0 -167
- hishel/beta/httpx.py +0 -317
- hishel-0.1.4.dist-info/METADATA +0 -404
- hishel-0.1.4.dist-info/RECORD +0 -41
- {hishel-0.1.4.dist-info → hishel-1.0.0b1.dist-info}/WHEEL +0 -0
- {hishel-0.1.4.dist-info → hishel-1.0.0b1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
from dataclasses import replace
|
|
6
|
+
from typing import (
|
|
7
|
+
Any,
|
|
8
|
+
Iterable,
|
|
9
|
+
Iterator,
|
|
10
|
+
Callable,
|
|
11
|
+
List,
|
|
12
|
+
Optional,
|
|
13
|
+
Union,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from hishel._core._storages._sync_base import SyncBaseStorage
|
|
17
|
+
from hishel._core._storages._packing import pack, unpack
|
|
18
|
+
from hishel._core.models import (
|
|
19
|
+
Entry,
|
|
20
|
+
EntryMeta,
|
|
21
|
+
Request,
|
|
22
|
+
Response,
|
|
23
|
+
)
|
|
24
|
+
from hishel._utils import ensure_cache_dict
|
|
25
|
+
|
|
26
|
+
# Batch cleanup configuration
|
|
27
|
+
# How often to run cleanup (seconds). Default: 1 hour.
|
|
28
|
+
BATCH_CLEANUP_INTERVAL = 3600
|
|
29
|
+
# How long to wait after storage creation before allowing the first cleanup (seconds)
|
|
30
|
+
BATCH_CLEANUP_START_DELAY = 5 * 60
|
|
31
|
+
# Number of rows to process per chunk when cleaning
|
|
32
|
+
BATCH_CLEANUP_CHUNK_SIZE = 200
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
import sqlite3
|
|
37
|
+
|
|
38
|
+
class SyncSqliteStorage(SyncBaseStorage):
|
|
39
|
+
_COMPLETE_CHUNK_NUMBER = -1
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
*,
|
|
44
|
+
connection: Optional[sqlite3.Connection] = None,
|
|
45
|
+
database_path: str = "hishel_cache.db",
|
|
46
|
+
default_ttl: Optional[float] = None,
|
|
47
|
+
refresh_ttl_on_access: bool = True,
|
|
48
|
+
) -> None:
|
|
49
|
+
base_path = ensure_cache_dict()
|
|
50
|
+
|
|
51
|
+
self.connection = connection
|
|
52
|
+
self.database_path = base_path / database_path
|
|
53
|
+
self.default_ttl = default_ttl
|
|
54
|
+
self.refresh_ttl_on_access = refresh_ttl_on_access
|
|
55
|
+
self.last_cleanup = time.time() - BATCH_CLEANUP_INTERVAL + BATCH_CLEANUP_START_DELAY
|
|
56
|
+
# When this storage instance was created. Used to delay the first cleanup.
|
|
57
|
+
self._start_time = time.time()
|
|
58
|
+
self._initialized = False
|
|
59
|
+
|
|
60
|
+
def _ensure_connection(self) -> sqlite3.Connection:
|
|
61
|
+
"""Ensure connection is established and database is initialized."""
|
|
62
|
+
if self.connection is None:
|
|
63
|
+
self.connection = sqlite3.connect(str(self.database_path))
|
|
64
|
+
if not self._initialized:
|
|
65
|
+
self._initialize_database()
|
|
66
|
+
self._initialized = True
|
|
67
|
+
return self.connection
|
|
68
|
+
|
|
69
|
+
def _initialize_database(self) -> None:
|
|
70
|
+
"""Initialize the database schema."""
|
|
71
|
+
assert self.connection is not None
|
|
72
|
+
cursor = self.connection.cursor()
|
|
73
|
+
|
|
74
|
+
# Table for storing request/response pairs
|
|
75
|
+
cursor.execute("""
|
|
76
|
+
CREATE TABLE IF NOT EXISTS entries (
|
|
77
|
+
id BLOB PRIMARY KEY,
|
|
78
|
+
cache_key BLOB,
|
|
79
|
+
data BLOB NOT NULL,
|
|
80
|
+
created_at REAL NOT NULL,
|
|
81
|
+
deleted_at REAL
|
|
82
|
+
)
|
|
83
|
+
""")
|
|
84
|
+
|
|
85
|
+
# Table for storing response stream chunks only
|
|
86
|
+
cursor.execute("""
|
|
87
|
+
CREATE TABLE IF NOT EXISTS streams (
|
|
88
|
+
entry_id BLOB NOT NULL,
|
|
89
|
+
chunk_number INTEGER NOT NULL,
|
|
90
|
+
chunk_data BLOB NOT NULL,
|
|
91
|
+
PRIMARY KEY (entry_id, chunk_number),
|
|
92
|
+
FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
|
|
93
|
+
)
|
|
94
|
+
""")
|
|
95
|
+
|
|
96
|
+
# Indexes for performance
|
|
97
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_deleted_at ON entries(deleted_at)")
|
|
98
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_cache_key ON entries(cache_key)")
|
|
99
|
+
|
|
100
|
+
self.connection.commit()
|
|
101
|
+
|
|
102
|
+
def create_entry(
|
|
103
|
+
self, request: Request, response: Response, key: str, id_: uuid.UUID | None = None
|
|
104
|
+
) -> Entry:
|
|
105
|
+
key_bytes = key.encode("utf-8")
|
|
106
|
+
|
|
107
|
+
connection = self._ensure_connection()
|
|
108
|
+
cursor = connection.cursor()
|
|
109
|
+
|
|
110
|
+
# Create a new entry directly with both request and response
|
|
111
|
+
pair_id = id_ if id_ is not None else uuid.uuid4()
|
|
112
|
+
pair_meta = EntryMeta(
|
|
113
|
+
created_at=time.time(),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
assert isinstance(response.stream, (Iterator, Iterable))
|
|
117
|
+
response_with_stream = replace(
|
|
118
|
+
response,
|
|
119
|
+
stream=self._save_stream(response.stream, pair_id.bytes),
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
complete_entry = Entry(
|
|
123
|
+
id=pair_id,
|
|
124
|
+
request=request,
|
|
125
|
+
response=response_with_stream,
|
|
126
|
+
meta=pair_meta,
|
|
127
|
+
cache_key=key_bytes,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Insert the complete entry into the database
|
|
131
|
+
cursor.execute(
|
|
132
|
+
"INSERT INTO entries (id, cache_key, data, created_at, deleted_at) VALUES (?, ?, ?, ?, ?)",
|
|
133
|
+
(pair_id.bytes, key_bytes, pack(complete_entry, kind="pair"), pair_meta.created_at, None),
|
|
134
|
+
)
|
|
135
|
+
connection.commit()
|
|
136
|
+
|
|
137
|
+
return complete_entry
|
|
138
|
+
|
|
139
|
+
def get_entries(self, key: str) -> List[Entry]:
|
|
140
|
+
final_pairs: List[Entry] = []
|
|
141
|
+
|
|
142
|
+
now = time.time()
|
|
143
|
+
if now - self.last_cleanup >= BATCH_CLEANUP_INTERVAL:
|
|
144
|
+
try:
|
|
145
|
+
self._batch_cleanup()
|
|
146
|
+
except Exception:
|
|
147
|
+
# don't let cleanup prevent reads; failures are non-fatal
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
connection = self._ensure_connection()
|
|
151
|
+
cursor = connection.cursor()
|
|
152
|
+
# Query entries directly by cache_key
|
|
153
|
+
cursor.execute(
|
|
154
|
+
"SELECT id, data FROM entries WHERE cache_key = ?",
|
|
155
|
+
(key.encode("utf-8"),),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
for row in cursor.fetchall():
|
|
159
|
+
pair_data = unpack(row[1], kind="pair")
|
|
160
|
+
|
|
161
|
+
# Skip entries without a response (incomplete)
|
|
162
|
+
if not isinstance(pair_data, Entry) or pair_data.response is None:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
final_pairs.append(pair_data)
|
|
166
|
+
|
|
167
|
+
pairs_with_streams: List[Entry] = []
|
|
168
|
+
|
|
169
|
+
# Only restore response streams from cache
|
|
170
|
+
for pair in final_pairs:
|
|
171
|
+
pairs_with_streams.append(
|
|
172
|
+
replace(
|
|
173
|
+
pair,
|
|
174
|
+
response=replace(
|
|
175
|
+
pair.response,
|
|
176
|
+
stream=self._stream_data_from_cache(pair.id.bytes),
|
|
177
|
+
),
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
return pairs_with_streams
|
|
181
|
+
|
|
182
|
+
def update_entry(
|
|
183
|
+
self,
|
|
184
|
+
id: uuid.UUID,
|
|
185
|
+
new_pair: Union[Entry, Callable[[Entry], Entry]],
|
|
186
|
+
) -> Optional[Entry]:
|
|
187
|
+
connection = self._ensure_connection()
|
|
188
|
+
cursor = connection.cursor()
|
|
189
|
+
cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
|
|
190
|
+
result = cursor.fetchone()
|
|
191
|
+
|
|
192
|
+
if result is None:
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
pair = unpack(result[0], kind="pair")
|
|
196
|
+
|
|
197
|
+
# Skip entries without a response (incomplete)
|
|
198
|
+
if not isinstance(pair, Entry) or pair.response is None:
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
if isinstance(new_pair, Entry):
|
|
202
|
+
complete_pair = new_pair
|
|
203
|
+
else:
|
|
204
|
+
complete_pair = new_pair(pair)
|
|
205
|
+
|
|
206
|
+
if pair.id != complete_pair.id:
|
|
207
|
+
raise ValueError("Pair ID mismatch")
|
|
208
|
+
|
|
209
|
+
cursor.execute(
|
|
210
|
+
"UPDATE entries SET data = ? WHERE id = ?",
|
|
211
|
+
(pack(complete_pair, kind="pair"), id.bytes),
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
if pair.cache_key != complete_pair.cache_key:
|
|
215
|
+
cursor.execute(
|
|
216
|
+
"UPDATE entries SET cache_key = ? WHERE id = ?",
|
|
217
|
+
(complete_pair.cache_key, complete_pair.id.bytes),
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
connection.commit()
|
|
221
|
+
|
|
222
|
+
return complete_pair
|
|
223
|
+
|
|
224
|
+
def remove_entry(self, id: uuid.UUID) -> None:
|
|
225
|
+
connection = self._ensure_connection()
|
|
226
|
+
cursor = connection.cursor()
|
|
227
|
+
cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
|
|
228
|
+
result = cursor.fetchone()
|
|
229
|
+
|
|
230
|
+
if result is None:
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
pair = unpack(result[0], kind="pair")
|
|
234
|
+
self._soft_delete_pair(pair, cursor)
|
|
235
|
+
connection.commit()
|
|
236
|
+
|
|
237
|
+
def _is_stream_complete(self, pair_id: uuid.UUID, cursor: sqlite3.Cursor) -> bool:
|
|
238
|
+
# Check if there's a completion marker (chunk_number = -1) for response stream
|
|
239
|
+
cursor.execute(
|
|
240
|
+
"SELECT 1 FROM streams WHERE entry_id = ? AND chunk_number = ? LIMIT 1",
|
|
241
|
+
(pair_id.bytes, self._COMPLETE_CHUNK_NUMBER),
|
|
242
|
+
)
|
|
243
|
+
return cursor.fetchone() is not None
|
|
244
|
+
|
|
245
|
+
def _soft_delete_pair(
|
|
246
|
+
self,
|
|
247
|
+
pair: Entry,
|
|
248
|
+
cursor: sqlite3.Cursor,
|
|
249
|
+
) -> None:
|
|
250
|
+
"""
|
|
251
|
+
Mark the pair as deleted by setting the deleted_at timestamp.
|
|
252
|
+
"""
|
|
253
|
+
marked_pair = self.mark_pair_as_deleted(pair)
|
|
254
|
+
cursor.execute(
|
|
255
|
+
"UPDATE entries SET data = ?, deleted_at = ? WHERE id = ?",
|
|
256
|
+
(
|
|
257
|
+
pack(marked_pair, kind="pair"),
|
|
258
|
+
marked_pair.meta.deleted_at,
|
|
259
|
+
pair.id.bytes,
|
|
260
|
+
),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def _is_pair_expired(self, pair: Entry, cursor: sqlite3.Cursor) -> bool:
|
|
264
|
+
"""
|
|
265
|
+
Check if the pair is expired.
|
|
266
|
+
"""
|
|
267
|
+
ttl = pair.request.metadata["hishel_ttl"] if "hishel_ttl" in pair.request.metadata else self.default_ttl
|
|
268
|
+
created_at = pair.meta.created_at
|
|
269
|
+
if ttl is None:
|
|
270
|
+
return False
|
|
271
|
+
return created_at + ttl < time.time()
|
|
272
|
+
|
|
273
|
+
def _batch_cleanup(
|
|
274
|
+
self,
|
|
275
|
+
) -> None:
|
|
276
|
+
"""
|
|
277
|
+
Cleanup expired entries in the database.
|
|
278
|
+
"""
|
|
279
|
+
should_mark_as_deleted: List[Entry] = []
|
|
280
|
+
should_hard_delete: List[Entry] = []
|
|
281
|
+
|
|
282
|
+
connection = self._ensure_connection()
|
|
283
|
+
cursor = connection.cursor()
|
|
284
|
+
|
|
285
|
+
# Process entries in chunks to avoid loading the entire table into memory.
|
|
286
|
+
chunk_size = BATCH_CLEANUP_CHUNK_SIZE
|
|
287
|
+
offset = 0
|
|
288
|
+
while True:
|
|
289
|
+
cursor.execute(
|
|
290
|
+
"SELECT id, data FROM entries LIMIT ? OFFSET ?",
|
|
291
|
+
(chunk_size, offset),
|
|
292
|
+
)
|
|
293
|
+
rows = cursor.fetchall()
|
|
294
|
+
if not rows:
|
|
295
|
+
break
|
|
296
|
+
|
|
297
|
+
for row in rows:
|
|
298
|
+
pair = unpack(row[1], kind="pair")
|
|
299
|
+
if pair is None:
|
|
300
|
+
continue
|
|
301
|
+
|
|
302
|
+
# expired but not yet soft-deleted
|
|
303
|
+
if self._is_pair_expired(pair, cursor) and not self.is_soft_deleted(pair):
|
|
304
|
+
should_mark_as_deleted.append(pair)
|
|
305
|
+
|
|
306
|
+
# soft-deleted and safe to hard delete, or corrupted pair
|
|
307
|
+
if (self.is_soft_deleted(pair) and self.is_safe_to_hard_delete(pair)) or self._is_corrupted(
|
|
308
|
+
pair, cursor
|
|
309
|
+
):
|
|
310
|
+
should_hard_delete.append(pair)
|
|
311
|
+
|
|
312
|
+
# advance pagination
|
|
313
|
+
offset += len(rows)
|
|
314
|
+
|
|
315
|
+
for pair in should_mark_as_deleted:
|
|
316
|
+
self._soft_delete_pair(pair, cursor)
|
|
317
|
+
|
|
318
|
+
for pair in should_hard_delete:
|
|
319
|
+
self._hard_delete_pair(pair, cursor)
|
|
320
|
+
|
|
321
|
+
connection.commit()
|
|
322
|
+
|
|
323
|
+
def _is_corrupted(self, pair: Entry, cursor: sqlite3.Cursor) -> bool:
|
|
324
|
+
# if entry was created more than 1 hour ago and still has no response (incomplete)
|
|
325
|
+
if pair.meta.created_at + 3600 < time.time() and pair.response is None:
|
|
326
|
+
return True
|
|
327
|
+
|
|
328
|
+
# Check if response stream is complete for Entry with response
|
|
329
|
+
if (
|
|
330
|
+
isinstance(pair, Entry)
|
|
331
|
+
and pair.response is not None
|
|
332
|
+
and not self._is_stream_complete(pair.id, cursor)
|
|
333
|
+
):
|
|
334
|
+
return True
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
def _hard_delete_pair(self, pair: Entry, cursor: sqlite3.Cursor) -> None:
|
|
338
|
+
"""
|
|
339
|
+
Permanently delete the pair from the database.
|
|
340
|
+
"""
|
|
341
|
+
cursor.execute("DELETE FROM entries WHERE id = ?", (pair.id.bytes,))
|
|
342
|
+
|
|
343
|
+
# Delete response stream for this entry
|
|
344
|
+
self._delete_stream(pair.id.bytes, cursor)
|
|
345
|
+
|
|
346
|
+
def _delete_stream(
|
|
347
|
+
self,
|
|
348
|
+
entry_id: bytes,
|
|
349
|
+
cursor: sqlite3.Cursor,
|
|
350
|
+
) -> None:
|
|
351
|
+
"""
|
|
352
|
+
Delete response stream associated with the given entry ID.
|
|
353
|
+
"""
|
|
354
|
+
cursor.execute("DELETE FROM streams WHERE entry_id = ?", (entry_id,))
|
|
355
|
+
|
|
356
|
+
def _save_stream(
|
|
357
|
+
self,
|
|
358
|
+
stream: Iterator[bytes],
|
|
359
|
+
entry_id: bytes,
|
|
360
|
+
) -> Iterator[bytes]:
|
|
361
|
+
"""
|
|
362
|
+
Wrapper around an async iterator that also saves the response data to the cache in chunks.
|
|
363
|
+
"""
|
|
364
|
+
chunk_number = 0
|
|
365
|
+
content_length = 0
|
|
366
|
+
for chunk in stream:
|
|
367
|
+
content_length += len(chunk)
|
|
368
|
+
connection = self._ensure_connection()
|
|
369
|
+
cursor = connection.cursor()
|
|
370
|
+
cursor.execute(
|
|
371
|
+
"INSERT INTO streams (entry_id, chunk_number, chunk_data) VALUES (?, ?, ?)",
|
|
372
|
+
(entry_id, chunk_number, chunk),
|
|
373
|
+
)
|
|
374
|
+
connection.commit()
|
|
375
|
+
chunk_number += 1
|
|
376
|
+
yield chunk
|
|
377
|
+
|
|
378
|
+
# Mark end of stream with chunk_number = -1
|
|
379
|
+
connection = self._ensure_connection()
|
|
380
|
+
cursor = connection.cursor()
|
|
381
|
+
cursor.execute(
|
|
382
|
+
"INSERT INTO streams (entry_id, chunk_number, chunk_data) VALUES (?, ?, ?)",
|
|
383
|
+
(entry_id, self._COMPLETE_CHUNK_NUMBER, b""),
|
|
384
|
+
)
|
|
385
|
+
connection.commit()
|
|
386
|
+
|
|
387
|
+
def _stream_data_from_cache(
|
|
388
|
+
self,
|
|
389
|
+
entry_id: bytes,
|
|
390
|
+
) -> Iterator[bytes]:
|
|
391
|
+
"""
|
|
392
|
+
Get an async iterator that yields the response stream data from the cache.
|
|
393
|
+
"""
|
|
394
|
+
chunk_number = 0
|
|
395
|
+
|
|
396
|
+
connection = self._ensure_connection()
|
|
397
|
+
while True:
|
|
398
|
+
cursor = connection.cursor()
|
|
399
|
+
cursor.execute(
|
|
400
|
+
"SELECT chunk_data FROM streams WHERE entry_id = ? AND chunk_number = ?",
|
|
401
|
+
(entry_id, chunk_number),
|
|
402
|
+
)
|
|
403
|
+
result = cursor.fetchone()
|
|
404
|
+
|
|
405
|
+
if result is None:
|
|
406
|
+
break
|
|
407
|
+
chunk = result[0]
|
|
408
|
+
# chunk_number = -1 is the completion marker with empty data
|
|
409
|
+
if chunk == b"":
|
|
410
|
+
break
|
|
411
|
+
yield chunk
|
|
412
|
+
chunk_number += 1
|
|
413
|
+
except ImportError:
|
|
414
|
+
|
|
415
|
+
class SyncSqliteStorage: # type: ignore[no-redef]
|
|
416
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
417
|
+
raise ImportError(
|
|
418
|
+
"The 'sqlite3' library is required to use the `SyncSqliteStorage` integration. "
|
|
419
|
+
"Install hishel with 'pip install hishel[async]'."
|
|
420
|
+
)
|
|
@@ -5,14 +5,18 @@ import uuid
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from typing import (
|
|
7
7
|
Any,
|
|
8
|
+
AsyncIterable,
|
|
8
9
|
AsyncIterator,
|
|
10
|
+
Iterable,
|
|
9
11
|
Iterator,
|
|
10
12
|
Mapping,
|
|
11
13
|
Optional,
|
|
12
14
|
TypedDict,
|
|
15
|
+
cast,
|
|
13
16
|
)
|
|
14
17
|
|
|
15
|
-
from hishel.
|
|
18
|
+
from hishel._core._headers import Headers
|
|
19
|
+
from hishel._utils import make_async_iterator, make_sync_iterator
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
class AnyIterable:
|
|
@@ -64,7 +68,9 @@ class RequestMetadata(TypedDict, total=False):
|
|
|
64
68
|
"""
|
|
65
69
|
|
|
66
70
|
|
|
67
|
-
def extract_metadata_from_headers(
|
|
71
|
+
def extract_metadata_from_headers(
|
|
72
|
+
headers: Mapping[str, str],
|
|
73
|
+
) -> RequestMetadata:
|
|
68
74
|
metadata: RequestMetadata = {}
|
|
69
75
|
if "X-Hishel-Ttl" in headers:
|
|
70
76
|
try:
|
|
@@ -94,33 +100,71 @@ class Request:
|
|
|
94
100
|
stream: Iterator[bytes] | AsyncIterator[bytes] = field(default_factory=lambda: iter(AnyIterable()))
|
|
95
101
|
metadata: RequestMetadata | Mapping[str, Any] = field(default_factory=dict)
|
|
96
102
|
|
|
97
|
-
def
|
|
98
|
-
if
|
|
99
|
-
|
|
103
|
+
def _iter_stream(self) -> Iterator[bytes]:
|
|
104
|
+
if hasattr(self, "collected_body"):
|
|
105
|
+
yield getattr(self, "collected_body")
|
|
106
|
+
return
|
|
107
|
+
if isinstance(self.stream, (Iterator, Iterable)):
|
|
108
|
+
yield from self.stream
|
|
109
|
+
return
|
|
100
110
|
raise TypeError("Request stream is not an Iterator")
|
|
101
111
|
|
|
102
|
-
async def
|
|
103
|
-
if
|
|
112
|
+
async def _aiter_stream(self) -> AsyncIterator[bytes]:
|
|
113
|
+
if hasattr(self, "collected_body"):
|
|
114
|
+
yield getattr(self, "collected_body")
|
|
115
|
+
return
|
|
116
|
+
if isinstance(self.stream, (AsyncIterator, AsyncIterable)):
|
|
104
117
|
async for chunk in self.stream:
|
|
105
118
|
yield chunk
|
|
119
|
+
return
|
|
106
120
|
else:
|
|
107
121
|
raise TypeError("Request stream is not an AsyncIterator")
|
|
108
122
|
|
|
123
|
+
def read(self) -> bytes:
|
|
124
|
+
"""
|
|
125
|
+
Synchronously reads the entire request body without consuming the stream.
|
|
126
|
+
"""
|
|
127
|
+
if not isinstance(self.stream, Iterator):
|
|
128
|
+
raise TypeError("Request stream is not an Iterator")
|
|
129
|
+
|
|
130
|
+
if hasattr(self, "collected_body"):
|
|
131
|
+
return cast(bytes, getattr(self, "collected_body"))
|
|
132
|
+
|
|
133
|
+
collected = b"".join([chunk for chunk in self.stream])
|
|
134
|
+
setattr(self, "collected_body", collected)
|
|
135
|
+
self.stream = make_sync_iterator([collected])
|
|
136
|
+
return collected
|
|
137
|
+
|
|
138
|
+
async def aread(self) -> bytes:
|
|
139
|
+
"""
|
|
140
|
+
Asynchronously reads the entire request body without consuming the stream.
|
|
141
|
+
"""
|
|
142
|
+
if not isinstance(self.stream, AsyncIterator):
|
|
143
|
+
raise TypeError("Request stream is not an AsyncIterator")
|
|
144
|
+
|
|
145
|
+
if hasattr(self, "collected_body"):
|
|
146
|
+
return cast(bytes, getattr(self, "collected_body"))
|
|
147
|
+
|
|
148
|
+
collected = b"".join([chunk async for chunk in self.stream])
|
|
149
|
+
setattr(self, "collected_body", collected)
|
|
150
|
+
self.stream = make_async_iterator([collected])
|
|
151
|
+
return collected
|
|
152
|
+
|
|
109
153
|
|
|
110
154
|
class ResponseMetadata(TypedDict, total=False):
|
|
111
155
|
# All the names here should be prefixed with "hishel_" to avoid collisions with user data
|
|
112
|
-
hishel_from_cache: bool
|
|
156
|
+
hishel_from_cache: bool
|
|
113
157
|
"""Indicates whether the response was served from cache."""
|
|
114
158
|
|
|
115
|
-
hishel_revalidated: bool
|
|
159
|
+
hishel_revalidated: bool
|
|
116
160
|
"""Indicates whether the response was revalidated with the origin server."""
|
|
117
161
|
|
|
118
|
-
|
|
119
|
-
"""Indicates whether the caching specification was ignored for this response."""
|
|
120
|
-
|
|
121
|
-
hishel_stored: bool | None
|
|
162
|
+
hishel_stored: bool
|
|
122
163
|
"""Indicates whether the response was stored in cache."""
|
|
123
164
|
|
|
165
|
+
hishel_created_at: float
|
|
166
|
+
"""Timestamp when the response was cached."""
|
|
167
|
+
|
|
124
168
|
|
|
125
169
|
@dataclass
|
|
126
170
|
class Response:
|
|
@@ -129,48 +173,67 @@ class Response:
|
|
|
129
173
|
stream: Iterator[bytes] | AsyncIterator[bytes] = field(default_factory=lambda: iter(AnyIterable()))
|
|
130
174
|
metadata: ResponseMetadata | Mapping[str, Any] = field(default_factory=dict)
|
|
131
175
|
|
|
132
|
-
def
|
|
176
|
+
def _iter_stream(self) -> Iterator[bytes]:
|
|
177
|
+
if hasattr(self, "collected_body"):
|
|
178
|
+
yield getattr(self, "collected_body")
|
|
179
|
+
return
|
|
133
180
|
if isinstance(self.stream, Iterator):
|
|
134
|
-
|
|
181
|
+
yield from self.stream
|
|
182
|
+
return
|
|
135
183
|
raise TypeError("Response stream is not an Iterator")
|
|
136
184
|
|
|
137
|
-
async def
|
|
185
|
+
async def _aiter_stream(self) -> AsyncIterator[bytes]:
|
|
186
|
+
if hasattr(self, "collected_body"):
|
|
187
|
+
yield getattr(self, "collected_body")
|
|
188
|
+
return
|
|
138
189
|
if isinstance(self.stream, AsyncIterator):
|
|
139
190
|
async for chunk in self.stream:
|
|
140
191
|
yield chunk
|
|
141
192
|
else:
|
|
142
193
|
raise TypeError("Response stream is not an AsyncIterator")
|
|
143
194
|
|
|
195
|
+
def read(self) -> bytes:
|
|
196
|
+
"""
|
|
197
|
+
Synchronously reads the entire request body without consuming the stream.
|
|
198
|
+
"""
|
|
199
|
+
if not isinstance(self.stream, Iterator):
|
|
200
|
+
raise TypeError("Request stream is not an Iterator")
|
|
201
|
+
|
|
202
|
+
if hasattr(self, "collected_body"):
|
|
203
|
+
return cast(bytes, getattr(self, "collected_body"))
|
|
204
|
+
|
|
205
|
+
collected = b"".join([chunk for chunk in self.stream])
|
|
206
|
+
setattr(self, "collected_body", collected)
|
|
207
|
+
self.stream = make_sync_iterator([collected])
|
|
208
|
+
return collected
|
|
209
|
+
|
|
210
|
+
async def aread(self) -> bytes:
|
|
211
|
+
"""
|
|
212
|
+
Asynchronously reads the entire request body without consuming the stream.
|
|
213
|
+
"""
|
|
214
|
+
if not isinstance(self.stream, AsyncIterator):
|
|
215
|
+
raise TypeError("Request stream is not an AsyncIterator")
|
|
216
|
+
|
|
217
|
+
if hasattr(self, "collected_body"):
|
|
218
|
+
return cast(bytes, getattr(self, "collected_body"))
|
|
219
|
+
|
|
220
|
+
collected = b"".join([chunk async for chunk in self.stream])
|
|
221
|
+
setattr(self, "collected_body", collected)
|
|
222
|
+
self.stream = make_async_iterator([collected])
|
|
223
|
+
return collected
|
|
224
|
+
|
|
144
225
|
|
|
145
226
|
@dataclass
|
|
146
|
-
class
|
|
227
|
+
class EntryMeta:
|
|
147
228
|
created_at: float = field(default_factory=time.time)
|
|
148
229
|
deleted_at: Optional[float] = None
|
|
149
230
|
|
|
150
231
|
|
|
151
232
|
@dataclass
|
|
152
|
-
class
|
|
233
|
+
class Entry:
|
|
153
234
|
id: uuid.UUID
|
|
154
235
|
request: Request
|
|
155
|
-
meta:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
# class used by storage
|
|
159
|
-
@dataclass
|
|
160
|
-
class IncompletePair(Pair):
|
|
161
|
-
extra: Mapping[str, Any] = field(default_factory=dict)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
@dataclass
|
|
165
|
-
class CompletePair(Pair):
|
|
236
|
+
meta: EntryMeta
|
|
166
237
|
response: Response
|
|
167
238
|
cache_key: bytes
|
|
168
239
|
extra: Mapping[str, Any] = field(default_factory=dict)
|
|
169
|
-
|
|
170
|
-
@classmethod
|
|
171
|
-
def create(
|
|
172
|
-
cls,
|
|
173
|
-
response: Response,
|
|
174
|
-
request: Request,
|
|
175
|
-
) -> "CompletePair": # pragma: nocover
|
|
176
|
-
return cls(id=uuid.uuid4(), request=request, response=response, meta=PairMeta(), cache_key=b"")
|
hishel/_policies.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import typing as t
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Generic
|
|
7
|
+
|
|
8
|
+
from hishel import Request, Response
|
|
9
|
+
from hishel._core._spec import (
|
|
10
|
+
CacheOptions,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
logger = __import__("logging").getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
T = t.TypeVar("T", Request, Response)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CachePolicy(abc.ABC):
|
|
19
|
+
use_body_key: bool = False
|
|
20
|
+
"""Whether to include request body in cache key calculation."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BaseFilter(abc.ABC, Generic[T]):
|
|
24
|
+
@abc.abstractmethod
|
|
25
|
+
def needs_body(self) -> bool:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@abc.abstractmethod
|
|
29
|
+
def apply(self, item: T, body: bytes | None) -> bool:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class SpecificationPolicy(CachePolicy):
|
|
35
|
+
"""
|
|
36
|
+
Caching policy that respects HTTP caching specification.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
cache_options: CacheOptions = field(default_factory=CacheOptions)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class FilterPolicy(CachePolicy):
|
|
44
|
+
"""
|
|
45
|
+
Caching policy that applies user-defined filtering logic.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
request_filters: list[BaseFilter[Request]] = field(default_factory=list)
|
|
49
|
+
response_filters: list[BaseFilter[Response]] = field(default_factory=list)
|