hishel 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,411 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ import uuid
5
+ from dataclasses import replace
6
+ from typing import (
7
+ Iterable,
8
+ Iterator,
9
+ Callable,
10
+ List,
11
+ Literal,
12
+ Optional,
13
+ Union,
14
+ )
15
+
16
+ import sqlite3
17
+
18
+ from hishel.beta._core._base._storages._base import SyncBaseStorage, ensure_cache_dict
19
+ from hishel.beta._core._base._storages._packing import pack, unpack
20
+ from hishel.beta._core.models import (
21
+ CompletePair,
22
+ IncompletePair,
23
+ Pair,
24
+ PairMeta,
25
+ Request,
26
+ Response,
27
+ )
28
+
29
+
30
+ class SyncSqliteStorage(SyncBaseStorage):
31
+ _STREAM_KIND = {"request": 0, "response": 1}
32
+ _COMPLETE_CHUNK_NUMBER = -1
33
+
34
+ def __init__(
35
+ self,
36
+ *,
37
+ connection: Optional[sqlite3.Connection] = None,
38
+ database_path: str = "hishel_cache.db",
39
+ default_ttl: Optional[float] = None,
40
+ refresh_ttl_on_access: bool = True,
41
+ ) -> None:
42
+ base_path = ensure_cache_dict()
43
+
44
+ self.connection = connection
45
+ self.database_path = base_path / database_path
46
+ self.default_ttl = default_ttl
47
+ self.refresh_ttl_on_access = refresh_ttl_on_access
48
+ self.last_cleanup = float("-inf")
49
+ self._initialized = False
50
+
51
+ def _ensure_connection(self) -> sqlite3.Connection:
52
+ """Ensure connection is established and database is initialized."""
53
+ if self.connection is None:
54
+ self.connection = sqlite3.connect(str(self.database_path))
55
+ if not self._initialized:
56
+ self._initialize_database()
57
+ self._initialized = True
58
+ return self.connection
59
+
60
+ def _initialize_database(self) -> None:
61
+ """Initialize the database schema."""
62
+ assert self.connection is not None
63
+ cursor = self.connection.cursor()
64
+
65
+ # Table for storing request/response pairs
66
+ cursor.execute("""
67
+ CREATE TABLE IF NOT EXISTS entries (
68
+ id BLOB PRIMARY KEY,
69
+ cache_key BLOB,
70
+ data BLOB NOT NULL,
71
+ created_at REAL NOT NULL,
72
+ deleted_at REAL
73
+ )
74
+ """)
75
+
76
+ # Table for storing stream chunks
77
+ cursor.execute("""
78
+ CREATE TABLE IF NOT EXISTS streams (
79
+ entry_id BLOB NOT NULL,
80
+ kind INTEGER NOT NULL,
81
+ chunk_number INTEGER NOT NULL,
82
+ chunk_data BLOB NOT NULL,
83
+ PRIMARY KEY (entry_id, kind, chunk_number),
84
+ FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
85
+ )
86
+ """)
87
+
88
+ # Indexes for performance
89
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_deleted_at ON entries(deleted_at)")
90
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_cache_key ON entries(cache_key)")
91
+ # Note: PRIMARY KEY (entry_id, kind, chunk_number) already provides an index
92
+ # for queries like: entry_id = ? AND kind = ? AND chunk_number = ?
93
+
94
+ self.connection.commit()
95
+
96
+ def create_pair(
97
+ self,
98
+ request: Request,
99
+ id: uuid.UUID | None = None,
100
+ ) -> IncompletePair:
101
+ pair_id = id if id is not None else uuid.uuid4()
102
+ pair_meta = PairMeta(
103
+ created_at=time.time(),
104
+ )
105
+
106
+ pair = IncompletePair(id=pair_id, request=request, meta=pair_meta)
107
+
108
+ packed_pair = pack(pair, kind="pair")
109
+
110
+ connection = self._ensure_connection()
111
+ cursor = connection.cursor()
112
+ cursor.execute(
113
+ "INSERT INTO entries (id, cache_key, data, created_at, deleted_at) VALUES (?, ?, ?, ?, ?)",
114
+ (pair_id.bytes, None, packed_pair, pair_meta.created_at, None),
115
+ )
116
+ connection.commit()
117
+
118
+ assert isinstance(request.stream, Iterable), "Request stream must be an Iterable, not Iterable"
119
+
120
+ request = Request(
121
+ method=request.method,
122
+ url=request.url,
123
+ headers=request.headers,
124
+ metadata=request.metadata,
125
+ stream=self._save_stream(request.stream, pair_id.bytes, "request"),
126
+ )
127
+
128
+ return replace(pair, request=request)
129
+
130
+ def add_response(
131
+ self,
132
+ pair_id: uuid.UUID,
133
+ response: Response,
134
+ key: str | bytes,
135
+ ) -> CompletePair:
136
+ if isinstance(key, str):
137
+ key = key.encode("utf-8")
138
+
139
+ connection = self._ensure_connection()
140
+ cursor = connection.cursor()
141
+
142
+ # Get the existing pair
143
+ cursor.execute("SELECT data FROM entries WHERE id = ?", (pair_id.bytes,))
144
+ result = cursor.fetchone()
145
+
146
+ if result is None:
147
+ raise ValueError(f"Entry with ID {pair_id} not found.")
148
+
149
+ pair = unpack(result[0], kind="pair")
150
+
151
+ assert isinstance(response.stream, (Iterator, Iterable))
152
+ response = replace(response, stream=self._save_stream(response.stream, pair_id.bytes, "response"))
153
+
154
+ self._delete_stream(pair.id.bytes, cursor, type="response")
155
+ complete_pair = CompletePair(id=pair.id, request=pair.request, response=response, meta=pair.meta, cache_key=key)
156
+
157
+ # Update the entry with the complete pair and set cache_key
158
+ cursor.execute(
159
+ "UPDATE entries SET data = ?, cache_key = ? WHERE id = ?",
160
+ (pack(complete_pair, kind="pair"), key, pair_id.bytes),
161
+ )
162
+ connection.commit()
163
+
164
+ return complete_pair
165
+
166
+ def get_pairs(self, key: str) -> List[CompletePair]:
167
+ final_pairs: List[CompletePair] = []
168
+
169
+ connection = self._ensure_connection()
170
+ cursor = connection.cursor()
171
+ # Query entries directly by cache_key
172
+ cursor.execute("SELECT id, data FROM entries WHERE cache_key = ?", (key.encode("utf-8"),))
173
+
174
+ for row in cursor.fetchall():
175
+ pair_data = unpack(row[1], kind="pair")
176
+
177
+ if isinstance(pair_data, IncompletePair):
178
+ continue
179
+
180
+ final_pairs.append(pair_data)
181
+
182
+ pairs_with_streams: List[CompletePair] = []
183
+
184
+ for pair in final_pairs:
185
+ pairs_with_streams.append(
186
+ replace(
187
+ pair,
188
+ response=replace(
189
+ pair.response,
190
+ stream=self._stream_data_from_cache(pair.id.bytes, "response"),
191
+ ),
192
+ request=replace(
193
+ pair.request,
194
+ stream=self._stream_data_from_cache(pair.id.bytes, "request"),
195
+ ),
196
+ )
197
+ )
198
+ return pairs_with_streams
199
+
200
+ def update_pair(
201
+ self,
202
+ id: uuid.UUID,
203
+ new_pair: Union[CompletePair, Callable[[CompletePair], CompletePair]],
204
+ ) -> Optional[CompletePair]:
205
+ connection = self._ensure_connection()
206
+ cursor = connection.cursor()
207
+ cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
208
+ result = cursor.fetchone()
209
+
210
+ if result is None:
211
+ return None
212
+
213
+ pair = unpack(result[0], kind="pair")
214
+
215
+ if isinstance(pair, IncompletePair):
216
+ return None
217
+
218
+ if isinstance(new_pair, CompletePair):
219
+ complete_pair = new_pair
220
+ else:
221
+ complete_pair = new_pair(pair)
222
+
223
+ if pair.id != complete_pair.id:
224
+ raise ValueError("Pair ID mismatch")
225
+
226
+ cursor.execute("UPDATE entries SET data = ? WHERE id = ?", (pack(complete_pair, kind="pair"), id.bytes))
227
+
228
+ if pair.cache_key != complete_pair.cache_key:
229
+ cursor.execute(
230
+ "UPDATE entries SET cache_key = ? WHERE id = ?",
231
+ (complete_pair.cache_key, complete_pair.id.bytes),
232
+ )
233
+
234
+ connection.commit()
235
+
236
+ return complete_pair
237
+
238
+ def remove(self, id: uuid.UUID) -> None:
239
+ connection = self._ensure_connection()
240
+ cursor = connection.cursor()
241
+ cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
242
+ result = cursor.fetchone()
243
+
244
+ if result is None:
245
+ return None
246
+
247
+ pair = unpack(result[0], kind="pair")
248
+ self._soft_delete_pair(pair, cursor)
249
+ connection.commit()
250
+
251
+ def _is_stream_complete(
252
+ self, kind: Literal["request", "response"], pair_id: uuid.UUID, cursor: sqlite3.Cursor
253
+ ) -> bool:
254
+ kind_id = self._STREAM_KIND[kind]
255
+ # Check if there's a completion marker (chunk_number = -1)
256
+ cursor.execute(
257
+ "SELECT 1 FROM streams WHERE entry_id = ? AND kind = ? AND chunk_number = ? LIMIT 1",
258
+ (pair_id.bytes, kind_id, self._COMPLETE_CHUNK_NUMBER),
259
+ )
260
+ return cursor.fetchone() is not None
261
+
262
+ def _soft_delete_pair(self, pair: Union[CompletePair, IncompletePair], cursor: sqlite3.Cursor) -> None:
263
+ """
264
+ Mark the pair as deleted by setting the deleted_at timestamp.
265
+ """
266
+ marked_pair = self.mark_pair_as_deleted(pair)
267
+ cursor.execute(
268
+ "UPDATE entries SET data = ?, deleted_at = ? WHERE id = ?",
269
+ (pack(marked_pair, kind="pair"), marked_pair.meta.deleted_at, pair.id.bytes),
270
+ )
271
+
272
+ def _is_pair_expired(self, pair: Pair, cursor: sqlite3.Cursor) -> bool:
273
+ """
274
+ Check if the pair is expired.
275
+ """
276
+ ttl = pair.request.metadata["hishel_ttl"] if "hishel_ttl" in pair.request.metadata else self.default_ttl
277
+ created_at = pair.meta.created_at
278
+ if ttl is None:
279
+ return False
280
+ return created_at + ttl < time.time()
281
+
282
+ def _batch_cleanup(
283
+ self,
284
+ ) -> None:
285
+ """
286
+ Cleanup expired pairs in the database.
287
+ """
288
+ should_mark_as_deleted: List[Union[CompletePair, IncompletePair]] = []
289
+ should_hard_delete: List[Union[CompletePair, IncompletePair]] = []
290
+
291
+ connection = self._ensure_connection()
292
+ cursor = connection.cursor()
293
+ cursor.execute("SELECT id, data FROM entries")
294
+
295
+ for row in cursor.fetchall():
296
+ pair = unpack(row[1], kind="pair")
297
+ if pair is None:
298
+ continue
299
+ if self._is_pair_expired(pair, cursor) and not self.is_soft_deleted(pair):
300
+ should_mark_as_deleted.append(pair)
301
+
302
+ if (self.is_soft_deleted(pair) and self.is_safe_to_hard_delete(pair)) or self._is_corrupted(
303
+ pair, cursor
304
+ ):
305
+ should_hard_delete.append(pair)
306
+
307
+ for pair in should_mark_as_deleted:
308
+ self._soft_delete_pair(pair, cursor)
309
+
310
+ for pair in should_hard_delete:
311
+ self._hard_delete_pair(pair, cursor)
312
+
313
+ connection.commit()
314
+
315
+ def _is_corrupted(self, pair: IncompletePair | CompletePair, cursor: sqlite3.Cursor) -> bool:
316
+ # if pair was created more than 1 hour ago and still not completed
317
+ if pair.meta.created_at + 3600 < time.time() and isinstance(pair, IncompletePair):
318
+ return True
319
+
320
+ if isinstance(pair, CompletePair) and not self._is_stream_complete("request", pair.id, cursor):
321
+ return True
322
+ return False
323
+
324
+ def _hard_delete_pair(self, pair: CompletePair | IncompletePair, cursor: sqlite3.Cursor) -> None:
325
+ """
326
+ Permanently delete the pair from the database.
327
+ """
328
+ cursor.execute("DELETE FROM entries WHERE id = ?", (pair.id.bytes,))
329
+
330
+ # Delete all streams (both request and response) for this entry
331
+ self._delete_stream(pair.id.bytes, cursor)
332
+
333
+ def _delete_stream(
334
+ self,
335
+ entry_id: bytes,
336
+ cursor: sqlite3.Cursor,
337
+ type: Literal["request", "response", "all"] = "all",
338
+ ) -> None:
339
+ """
340
+ Delete all streams (both request and response) associated with the given entry ID.
341
+ """
342
+ if type == "request":
343
+ cursor.execute(
344
+ "DELETE FROM streams WHERE entry_id = ? AND kind = ?", (entry_id, self._STREAM_KIND["request"])
345
+ )
346
+ elif type == "response":
347
+ cursor.execute(
348
+ "DELETE FROM streams WHERE entry_id = ? AND kind = ?", (entry_id, self._STREAM_KIND["response"])
349
+ )
350
+ elif type == "all":
351
+ cursor.execute("DELETE FROM streams WHERE entry_id = ?", (entry_id,))
352
+
353
+ def _save_stream(
354
+ self,
355
+ stream: Iterator[bytes],
356
+ entry_id: bytes,
357
+ kind: Literal["response", "request"],
358
+ ) -> Iterator[bytes]:
359
+ """
360
+ Wrapper around an async iterator that also saves the data to the cache in chunks.
361
+ """
362
+ kind_id = self._STREAM_KIND[kind]
363
+ chunk_number = 0
364
+ for chunk in stream:
365
+ connection = self._ensure_connection()
366
+ cursor = connection.cursor()
367
+ cursor.execute(
368
+ "INSERT INTO streams (entry_id, kind, chunk_number, chunk_data) VALUES (?, ?, ?, ?)",
369
+ (entry_id, kind_id, chunk_number, chunk),
370
+ )
371
+ connection.commit()
372
+ chunk_number += 1
373
+ yield chunk
374
+
375
+ # Mark end of stream with chunk_number = -1
376
+ connection = self._ensure_connection()
377
+ cursor = connection.cursor()
378
+ cursor.execute(
379
+ "INSERT INTO streams (entry_id, kind, chunk_number, chunk_data) VALUES (?, ?, ?, ?)",
380
+ (entry_id, kind_id, self._COMPLETE_CHUNK_NUMBER, b""),
381
+ )
382
+ connection.commit()
383
+
384
+ def _stream_data_from_cache(
385
+ self,
386
+ entry_id: bytes,
387
+ kind: Literal["response", "request"],
388
+ ) -> Iterator[bytes]:
389
+ """
390
+ Get an async iterator that yields the stream data from the cache.
391
+ """
392
+ kind_id = self._STREAM_KIND[kind]
393
+ chunk_number = 0
394
+
395
+ connection = self._ensure_connection()
396
+ while True:
397
+ cursor = connection.cursor()
398
+ cursor.execute(
399
+ "SELECT chunk_data FROM streams WHERE entry_id = ? AND kind = ? AND chunk_number = ?",
400
+ (entry_id, kind_id, chunk_number),
401
+ )
402
+ result = cursor.fetchone()
403
+
404
+ if result is None:
405
+ break
406
+ chunk = result[0]
407
+ # chunk_number = -1 is the completion marker with empty data
408
+ if chunk == b"":
409
+ break
410
+ yield chunk
411
+ chunk_number += 1
@@ -0,0 +1,176 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ import uuid
5
+ from dataclasses import dataclass, field
6
+ from typing import (
7
+ Any,
8
+ AsyncIterator,
9
+ Iterator,
10
+ Mapping,
11
+ Optional,
12
+ TypedDict,
13
+ )
14
+
15
+ from hishel.beta._core._headers import Headers
16
+
17
+
18
+ class AnyIterable:
19
+ def __init__(self, content: bytes | None = None) -> None:
20
+ self.consumed = False
21
+ self.content = content
22
+
23
+ def __next__(self) -> bytes:
24
+ if self.content is not None and not self.consumed:
25
+ self.consumed = True
26
+ return self.content
27
+ raise StopIteration()
28
+
29
+ def __iter__(self) -> Iterator[bytes]:
30
+ return self
31
+
32
+ async def __anext__(self) -> bytes:
33
+ if self.content is not None and not self.consumed:
34
+ self.consumed = True
35
+ return self.content
36
+ raise StopAsyncIteration()
37
+
38
+ def __aiter__(self) -> AsyncIterator[bytes]:
39
+ return self
40
+
41
+ def __eq__(self, value: Any) -> bool:
42
+ return isinstance(value, AnyIterable)
43
+
44
+
45
+ class RequestMetadata(TypedDict, total=False):
46
+ # All the names here should be prefixed with "hishel_" to avoid collisions with user data
47
+ hishel_ttl: float | None
48
+ """When specified, hishel will remove the cached response after specified number of seconds."""
49
+
50
+ hishel_refresh_ttl_on_access: bool | None
51
+ """
52
+ When True, accessing this entry refreshes its TTL. When False, the TTL remains fixed (default).
53
+ """
54
+
55
+ hishel_spec_ignore: bool | None
56
+ """
57
+ When True, hishel will ignore the caching specification for this request.
58
+ """
59
+
60
+ hishel_body_key: bool | None
61
+ """
62
+ When True, the request body is included in the cache key generation.
63
+ This is useful for caching POST or QUERY requests with different bodies.
64
+ """
65
+
66
+
67
+ def extract_metadata_from_headers(headers: Mapping[str, str]) -> RequestMetadata:
68
+ metadata: RequestMetadata = {}
69
+ if "X-Hishel-Ttl" in headers:
70
+ try:
71
+ metadata["hishel_ttl"] = float(headers["X-Hishel-Ttl"])
72
+ except ValueError:
73
+ pass
74
+ if "X-Hishel-Refresh-Ttl-On-Access" in headers:
75
+ value = headers["X-Hishel-Refresh-Ttl-On-Access"].lower()
76
+ if value in ("1", "true", "yes", "on"):
77
+ metadata["hishel_refresh_ttl_on_access"] = True
78
+ elif value in ("0", "false", "no", "off"):
79
+ metadata["hishel_refresh_ttl_on_access"] = False
80
+ if "X-Hishel-Spec-Ignore" in headers:
81
+ value = headers["X-Hishel-Spec-Ignore"].lower()
82
+ if value in ("1", "true", "yes", "on"):
83
+ metadata["hishel_spec_ignore"] = True
84
+ elif value in ("0", "false", "no", "off"):
85
+ metadata["hishel_spec_ignore"] = False
86
+ return metadata
87
+
88
+
89
+ @dataclass
90
+ class Request:
91
+ method: str
92
+ url: str
93
+ headers: Headers = field(default_factory=lambda: Headers({}))
94
+ stream: Iterator[bytes] | AsyncIterator[bytes] = field(default_factory=lambda: iter(AnyIterable()))
95
+ metadata: RequestMetadata | Mapping[str, Any] = field(default_factory=dict)
96
+
97
+ def iter_stream(self) -> Iterator[bytes]:
98
+ if isinstance(self.stream, Iterator):
99
+ return self.stream
100
+ raise TypeError("Request stream is not an Iterator")
101
+
102
+ async def aiter_stream(self) -> AsyncIterator[bytes]:
103
+ if isinstance(self.stream, AsyncIterator):
104
+ async for chunk in self.stream:
105
+ yield chunk
106
+ else:
107
+ raise TypeError("Request stream is not an AsyncIterator")
108
+
109
+
110
+ class ResponseMetadata(TypedDict, total=False):
111
+ # All the names here should be prefixed with "hishel_" to avoid collisions with user data
112
+ hishel_from_cache: bool | None
113
+ """Indicates whether the response was served from cache."""
114
+
115
+ hishel_revalidated: bool | None
116
+ """Indicates whether the response was revalidated with the origin server."""
117
+
118
+ hishel_spec_ignored: bool | None
119
+ """Indicates whether the caching specification was ignored for this response."""
120
+
121
+ hishel_stored: bool | None
122
+ """Indicates whether the response was stored in cache."""
123
+
124
+
125
+ @dataclass
126
+ class Response:
127
+ status_code: int
128
+ headers: Headers = field(default_factory=lambda: Headers({}))
129
+ stream: Iterator[bytes] | AsyncIterator[bytes] = field(default_factory=lambda: iter(AnyIterable()))
130
+ metadata: ResponseMetadata | Mapping[str, Any] = field(default_factory=dict)
131
+
132
+ def iter_stream(self) -> Iterator[bytes]:
133
+ if isinstance(self.stream, Iterator):
134
+ return self.stream
135
+ raise TypeError("Response stream is not an Iterator")
136
+
137
+ async def aiter_stream(self) -> AsyncIterator[bytes]:
138
+ if isinstance(self.stream, AsyncIterator):
139
+ async for chunk in self.stream:
140
+ yield chunk
141
+ else:
142
+ raise TypeError("Response stream is not an AsyncIterator")
143
+
144
+
145
+ @dataclass
146
+ class PairMeta:
147
+ created_at: float = field(default_factory=time.time)
148
+ deleted_at: Optional[float] = None
149
+
150
+
151
+ @dataclass
152
+ class Pair:
153
+ id: uuid.UUID
154
+ request: Request
155
+ meta: PairMeta
156
+
157
+
158
+ # class used by storage
159
+ @dataclass
160
+ class IncompletePair(Pair):
161
+ extra: Mapping[str, Any] = field(default_factory=dict)
162
+
163
+
164
+ @dataclass
165
+ class CompletePair(Pair):
166
+ response: Response
167
+ cache_key: bytes
168
+ extra: Mapping[str, Any] = field(default_factory=dict)
169
+
170
+ @classmethod
171
+ def create(
172
+ cls,
173
+ response: Response,
174
+ request: Request,
175
+ ) -> "CompletePair": # pragma: nocover
176
+ return cls(id=uuid.uuid4(), request=request, response=response, meta=PairMeta(), cache_key=b"")