hishel 1.0.0.dev0__py3-none-any.whl → 1.0.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import time
4
4
  import uuid
5
5
  from dataclasses import replace
6
6
  from typing import (
7
+ Any,
7
8
  AsyncIterable,
8
9
  AsyncIterator,
9
10
  Callable,
@@ -13,8 +14,6 @@ from typing import (
13
14
  Union,
14
15
  )
15
16
 
16
- import anysqlite
17
-
18
17
  from hishel._core._base._storages._base import AsyncBaseStorage, ensure_cache_dict
19
18
  from hishel._core._base._storages._packing import pack, unpack
20
19
  from hishel._core.models import (
@@ -26,386 +25,433 @@ from hishel._core.models import (
26
25
  Response,
27
26
  )
28
27
 
28
+ # Batch cleanup configuration
29
+ # How often to run cleanup (seconds). Default: 1 hour.
30
+ BATCH_CLEANUP_INTERVAL = 3600
31
+ # How long to wait after storage creation before allowing the first cleanup (seconds)
32
+ BATCH_CLEANUP_START_DELAY = 5 * 60
33
+ # Number of rows to process per chunk when cleaning
34
+ BATCH_CLEANUP_CHUNK_SIZE = 200
35
+
36
+
37
+ try:
38
+ import anysqlite
39
+
40
+ class AsyncSqliteStorage(AsyncBaseStorage):
41
+ _STREAM_KIND = {"request": 0, "response": 1}
42
+ _COMPLETE_CHUNK_NUMBER = -1
43
+
44
+ def __init__(
45
+ self,
46
+ *,
47
+ connection: Optional[anysqlite.Connection] = None,
48
+ database_path: str = "hishel_cache.db",
49
+ default_ttl: Optional[float] = None,
50
+ refresh_ttl_on_access: bool = True,
51
+ ) -> None:
52
+ base_path = ensure_cache_dict()
53
+
54
+ self.connection = connection
55
+ self.database_path = base_path / database_path
56
+ self.default_ttl = default_ttl
57
+ self.refresh_ttl_on_access = refresh_ttl_on_access
58
+ self.last_cleanup = time.time() - BATCH_CLEANUP_INTERVAL + BATCH_CLEANUP_START_DELAY
59
+ # When this storage instance was created. Used to delay the first cleanup.
60
+ self._start_time = time.time()
61
+ self._initialized = False
62
+
63
+ async def _ensure_connection(self) -> anysqlite.Connection:
64
+ """Ensure connection is established and database is initialized."""
65
+ if self.connection is None:
66
+ self.connection = await anysqlite.connect(str(self.database_path))
67
+ if not self._initialized:
68
+ await self._initialize_database()
69
+ self._initialized = True
70
+ return self.connection
71
+
72
+ async def _initialize_database(self) -> None:
73
+ """Initialize the database schema."""
74
+ assert self.connection is not None
75
+ cursor = await self.connection.cursor()
76
+
77
+ # Table for storing request/response pairs
78
+ await cursor.execute("""
79
+ CREATE TABLE IF NOT EXISTS entries (
80
+ id BLOB PRIMARY KEY,
81
+ cache_key BLOB,
82
+ data BLOB NOT NULL,
83
+ created_at REAL NOT NULL,
84
+ deleted_at REAL
85
+ )
86
+ """)
87
+
88
+ # Table for storing stream chunks
89
+ await cursor.execute("""
90
+ CREATE TABLE IF NOT EXISTS streams (
91
+ entry_id BLOB NOT NULL,
92
+ kind INTEGER NOT NULL,
93
+ chunk_number INTEGER NOT NULL,
94
+ chunk_data BLOB NOT NULL,
95
+ PRIMARY KEY (entry_id, kind, chunk_number),
96
+ FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
97
+ )
98
+ """)
99
+
100
+ # Indexes for performance
101
+ await cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_deleted_at ON entries(deleted_at)")
102
+ await cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_cache_key ON entries(cache_key)")
103
+ # Note: PRIMARY KEY (entry_id, kind, chunk_number) already provides an index
104
+ # for queries like: entry_id = ? AND kind = ? AND chunk_number = ?
105
+
106
+ await self.connection.commit()
107
+
108
+ async def create_pair(
109
+ self,
110
+ request: Request,
111
+ id: uuid.UUID | None = None,
112
+ ) -> IncompletePair:
113
+ pair_id = id if id is not None else uuid.uuid4()
114
+ pair_meta = PairMeta(
115
+ created_at=time.time(),
116
+ )
117
+
118
+ pair = IncompletePair(id=pair_id, request=request, meta=pair_meta)
119
+
120
+ packed_pair = pack(pair, kind="pair")
29
121
 
30
- class AsyncSqliteStorage(AsyncBaseStorage):
31
- _STREAM_KIND = {"request": 0, "response": 1}
32
- _COMPLETE_CHUNK_NUMBER = -1
33
-
34
- def __init__(
35
- self,
36
- *,
37
- connection: Optional[anysqlite.Connection] = None,
38
- database_path: str = "hishel_cache.db",
39
- default_ttl: Optional[float] = None,
40
- refresh_ttl_on_access: bool = True,
41
- ) -> None:
42
- base_path = ensure_cache_dict()
43
-
44
- self.connection = connection
45
- self.database_path = base_path / database_path
46
- self.default_ttl = default_ttl
47
- self.refresh_ttl_on_access = refresh_ttl_on_access
48
- self.last_cleanup = float("-inf")
49
- self._initialized = False
50
-
51
- async def _ensure_connection(self) -> anysqlite.Connection:
52
- """Ensure connection is established and database is initialized."""
53
- if self.connection is None:
54
- self.connection = await anysqlite.connect(str(self.database_path))
55
- if not self._initialized:
56
- await self._initialize_database()
57
- self._initialized = True
58
- return self.connection
59
-
60
- async def _initialize_database(self) -> None:
61
- """Initialize the database schema."""
62
- assert self.connection is not None
63
- cursor = await self.connection.cursor()
64
-
65
- # Table for storing request/response pairs
66
- await cursor.execute("""
67
- CREATE TABLE IF NOT EXISTS entries (
68
- id BLOB PRIMARY KEY,
69
- cache_key BLOB,
70
- data BLOB NOT NULL,
71
- created_at REAL NOT NULL,
72
- deleted_at REAL
122
+ connection = await self._ensure_connection()
123
+ cursor = await connection.cursor()
124
+ await cursor.execute(
125
+ "INSERT INTO entries (id, cache_key, data, created_at, deleted_at) VALUES (?, ?, ?, ?, ?)",
126
+ (pair_id.bytes, None, packed_pair, pair_meta.created_at, None),
73
127
  )
74
- """)
75
-
76
- # Table for storing stream chunks
77
- await cursor.execute("""
78
- CREATE TABLE IF NOT EXISTS streams (
79
- entry_id BLOB NOT NULL,
80
- kind INTEGER NOT NULL,
81
- chunk_number INTEGER NOT NULL,
82
- chunk_data BLOB NOT NULL,
83
- PRIMARY KEY (entry_id, kind, chunk_number),
84
- FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
128
+ await connection.commit()
129
+
130
+ assert isinstance(request.stream, AsyncIterable), "Request stream must be an AsyncIterable, not Iterable"
131
+
132
+ request = Request(
133
+ method=request.method,
134
+ url=request.url,
135
+ headers=request.headers,
136
+ metadata=request.metadata,
137
+ stream=self._save_stream(request.stream, pair_id.bytes, "request"),
85
138
  )
86
- """)
87
139
 
88
- # Indexes for performance
89
- await cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_deleted_at ON entries(deleted_at)")
90
- await cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_cache_key ON entries(cache_key)")
91
- # Note: PRIMARY KEY (entry_id, kind, chunk_number) already provides an index
92
- # for queries like: entry_id = ? AND kind = ? AND chunk_number = ?
93
-
94
- await self.connection.commit()
95
-
96
- async def create_pair(
97
- self,
98
- request: Request,
99
- id: uuid.UUID | None = None,
100
- ) -> IncompletePair:
101
- pair_id = id if id is not None else uuid.uuid4()
102
- pair_meta = PairMeta(
103
- created_at=time.time(),
104
- )
105
-
106
- pair = IncompletePair(id=pair_id, request=request, meta=pair_meta)
107
-
108
- packed_pair = pack(pair, kind="pair")
109
-
110
- connection = await self._ensure_connection()
111
- cursor = await connection.cursor()
112
- await cursor.execute(
113
- "INSERT INTO entries (id, cache_key, data, created_at, deleted_at) VALUES (?, ?, ?, ?, ?)",
114
- (pair_id.bytes, None, packed_pair, pair_meta.created_at, None),
115
- )
116
- await connection.commit()
117
-
118
- assert isinstance(request.stream, AsyncIterable), "Request stream must be an AsyncIterable, not Iterable"
119
-
120
- request = Request(
121
- method=request.method,
122
- url=request.url,
123
- headers=request.headers,
124
- metadata=request.metadata,
125
- stream=self._save_stream(request.stream, pair_id.bytes, "request"),
126
- )
127
-
128
- return replace(pair, request=request)
129
-
130
- async def add_response(
131
- self,
132
- pair_id: uuid.UUID,
133
- response: Response,
134
- key: str | bytes,
135
- ) -> CompletePair:
136
- if isinstance(key, str):
137
- key = key.encode("utf-8")
138
-
139
- connection = await self._ensure_connection()
140
- cursor = await connection.cursor()
141
-
142
- # Get the existing pair
143
- await cursor.execute("SELECT data FROM entries WHERE id = ?", (pair_id.bytes,))
144
- result = await cursor.fetchone()
145
-
146
- if result is None:
147
- raise ValueError(f"Entry with ID {pair_id} not found.")
148
-
149
- pair = unpack(result[0], kind="pair")
150
-
151
- assert isinstance(response.stream, (AsyncIterator, AsyncIterable))
152
- response = replace(response, stream=self._save_stream(response.stream, pair_id.bytes, "response"))
153
-
154
- await self._delete_stream(pair.id.bytes, cursor, type="response")
155
- complete_pair = CompletePair(id=pair.id, request=pair.request, response=response, meta=pair.meta, cache_key=key)
156
-
157
- # Update the entry with the complete pair and set cache_key
158
- await cursor.execute(
159
- "UPDATE entries SET data = ?, cache_key = ? WHERE id = ?",
160
- (pack(complete_pair, kind="pair"), key, pair_id.bytes),
161
- )
162
- await connection.commit()
163
-
164
- return complete_pair
165
-
166
- async def get_pairs(self, key: str) -> List[CompletePair]:
167
- final_pairs: List[CompletePair] = []
168
-
169
- connection = await self._ensure_connection()
170
- cursor = await connection.cursor()
171
- # Query entries directly by cache_key
172
- await cursor.execute("SELECT id, data FROM entries WHERE cache_key = ?", (key.encode("utf-8"),))
173
-
174
- for row in await cursor.fetchall():
175
- pair_data = unpack(row[1], kind="pair")
176
-
177
- if isinstance(pair_data, IncompletePair):
178
- continue
179
-
180
- final_pairs.append(pair_data)
181
-
182
- pairs_with_streams: List[CompletePair] = []
183
-
184
- for pair in final_pairs:
185
- pairs_with_streams.append(
186
- replace(
187
- pair,
188
- response=replace(
189
- pair.response,
190
- stream=self._stream_data_from_cache(pair.id.bytes, "response"),
191
- ),
192
- request=replace(
193
- pair.request,
194
- stream=self._stream_data_from_cache(pair.id.bytes, "request"),
195
- ),
196
- )
140
+ return replace(pair, request=request)
141
+
142
+ async def add_response(
143
+ self,
144
+ pair_id: uuid.UUID,
145
+ response: Response,
146
+ key: str | bytes,
147
+ ) -> CompletePair:
148
+ if isinstance(key, str):
149
+ key = key.encode("utf-8")
150
+
151
+ connection = await self._ensure_connection()
152
+ cursor = await connection.cursor()
153
+
154
+ # Get the existing pair
155
+ await cursor.execute("SELECT data FROM entries WHERE id = ?", (pair_id.bytes,))
156
+ result = await cursor.fetchone()
157
+
158
+ if result is None:
159
+ raise ValueError(f"Entry with ID {pair_id} not found.")
160
+
161
+ pair = unpack(result[0], kind="pair")
162
+
163
+ assert isinstance(response.stream, (AsyncIterator, AsyncIterable))
164
+ response = replace(response, stream=self._save_stream(response.stream, pair_id.bytes, "response"))
165
+
166
+ await self._delete_stream(pair.id.bytes, cursor, type="response")
167
+ complete_pair = CompletePair(
168
+ id=pair.id, request=pair.request, response=response, meta=pair.meta, cache_key=key
169
+ )
170
+
171
+ # Update the entry with the complete pair and set cache_key
172
+ await cursor.execute(
173
+ "UPDATE entries SET data = ?, cache_key = ? WHERE id = ?",
174
+ (pack(complete_pair, kind="pair"), key, pair_id.bytes),
197
175
  )
198
- return pairs_with_streams
176
+ await connection.commit()
177
+
178
+ return complete_pair
179
+
180
+ async def get_pairs(self, key: str) -> List[CompletePair]:
181
+ final_pairs: List[CompletePair] = []
182
+
183
+ now = time.time()
184
+ if now - self.last_cleanup >= BATCH_CLEANUP_INTERVAL:
185
+ try:
186
+ await self._batch_cleanup()
187
+ except Exception:
188
+ # don't let cleanup prevent reads; failures are non-fatal
189
+ pass
199
190
 
200
- async def update_pair(
201
- self,
202
- id: uuid.UUID,
203
- new_pair: Union[CompletePair, Callable[[CompletePair], CompletePair]],
204
- ) -> Optional[CompletePair]:
205
- connection = await self._ensure_connection()
206
- cursor = await connection.cursor()
207
- await cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
208
- result = await cursor.fetchone()
191
+ connection = await self._ensure_connection()
192
+ cursor = await connection.cursor()
193
+ # Query entries directly by cache_key
194
+ await cursor.execute("SELECT id, data FROM entries WHERE cache_key = ?", (key.encode("utf-8"),))
195
+
196
+ for row in await cursor.fetchall():
197
+ pair_data = unpack(row[1], kind="pair")
198
+
199
+ if isinstance(pair_data, IncompletePair):
200
+ continue
201
+
202
+ final_pairs.append(pair_data)
203
+
204
+ pairs_with_streams: List[CompletePair] = []
205
+
206
+ for pair in final_pairs:
207
+ pairs_with_streams.append(
208
+ replace(
209
+ pair,
210
+ response=replace(
211
+ pair.response,
212
+ stream=self._stream_data_from_cache(pair.id.bytes, "response"),
213
+ ),
214
+ request=replace(
215
+ pair.request,
216
+ stream=self._stream_data_from_cache(pair.id.bytes, "request"),
217
+ ),
218
+ )
219
+ )
220
+ return pairs_with_streams
209
221
 
210
- if result is None:
211
- return None
222
+ async def update_pair(
223
+ self,
224
+ id: uuid.UUID,
225
+ new_pair: Union[CompletePair, Callable[[CompletePair], CompletePair]],
226
+ ) -> Optional[CompletePair]:
227
+ connection = await self._ensure_connection()
228
+ cursor = await connection.cursor()
229
+ await cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
230
+ result = await cursor.fetchone()
212
231
 
213
- pair = unpack(result[0], kind="pair")
232
+ if result is None:
233
+ return None
214
234
 
215
- if isinstance(pair, IncompletePair):
216
- return None
235
+ pair = unpack(result[0], kind="pair")
217
236
 
218
- if isinstance(new_pair, CompletePair):
219
- complete_pair = new_pair
220
- else:
221
- complete_pair = new_pair(pair)
237
+ if isinstance(pair, IncompletePair):
238
+ return None
222
239
 
223
- if pair.id != complete_pair.id:
224
- raise ValueError("Pair ID mismatch")
240
+ if isinstance(new_pair, CompletePair):
241
+ complete_pair = new_pair
242
+ else:
243
+ complete_pair = new_pair(pair)
225
244
 
226
- await cursor.execute("UPDATE entries SET data = ? WHERE id = ?", (pack(complete_pair, kind="pair"), id.bytes))
245
+ if pair.id != complete_pair.id:
246
+ raise ValueError("Pair ID mismatch")
227
247
 
228
- if pair.cache_key != complete_pair.cache_key:
229
248
  await cursor.execute(
230
- "UPDATE entries SET cache_key = ? WHERE id = ?",
231
- (complete_pair.cache_key, complete_pair.id.bytes),
249
+ "UPDATE entries SET data = ? WHERE id = ?", (pack(complete_pair, kind="pair"), id.bytes)
232
250
  )
233
251
 
234
- await connection.commit()
235
-
236
- return complete_pair
237
-
238
- async def remove(self, id: uuid.UUID) -> None:
239
- connection = await self._ensure_connection()
240
- cursor = await connection.cursor()
241
- await cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
242
- result = await cursor.fetchone()
243
-
244
- if result is None:
245
- return None
246
-
247
- pair = unpack(result[0], kind="pair")
248
- await self._soft_delete_pair(pair, cursor)
249
- await connection.commit()
250
-
251
- async def _is_stream_complete(
252
- self, kind: Literal["request", "response"], pair_id: uuid.UUID, cursor: anysqlite.Cursor
253
- ) -> bool:
254
- kind_id = self._STREAM_KIND[kind]
255
- # Check if there's a completion marker (chunk_number = -1)
256
- await cursor.execute(
257
- "SELECT 1 FROM streams WHERE entry_id = ? AND kind = ? AND chunk_number = ? LIMIT 1",
258
- (pair_id.bytes, kind_id, self._COMPLETE_CHUNK_NUMBER),
259
- )
260
- return await cursor.fetchone() is not None
261
-
262
- async def _soft_delete_pair(self, pair: Union[CompletePair, IncompletePair], cursor: anysqlite.Cursor) -> None:
263
- """
264
- Mark the pair as deleted by setting the deleted_at timestamp.
265
- """
266
- marked_pair = self.mark_pair_as_deleted(pair)
267
- await cursor.execute(
268
- "UPDATE entries SET data = ?, deleted_at = ? WHERE id = ?",
269
- (pack(marked_pair, kind="pair"), marked_pair.meta.deleted_at, pair.id.bytes),
270
- )
271
-
272
- async def _is_pair_expired(self, pair: Pair, cursor: anysqlite.Cursor) -> bool:
273
- """
274
- Check if the pair is expired.
275
- """
276
- ttl = pair.request.metadata["hishel_ttl"] if "hishel_ttl" in pair.request.metadata else self.default_ttl
277
- created_at = pair.meta.created_at
278
- if ttl is None:
279
- return False
280
- return created_at + ttl < time.time()
281
-
282
- async def _batch_cleanup(
283
- self,
284
- ) -> None:
285
- """
286
- Cleanup expired pairs in the database.
287
- """
288
- should_mark_as_deleted: List[Union[CompletePair, IncompletePair]] = []
289
- should_hard_delete: List[Union[CompletePair, IncompletePair]] = []
290
-
291
- connection = await self._ensure_connection()
292
- cursor = await connection.cursor()
293
- await cursor.execute("SELECT id, data FROM entries")
294
-
295
- for row in await cursor.fetchall():
296
- pair = unpack(row[1], kind="pair")
297
- if pair is None:
298
- continue
299
- if await self._is_pair_expired(pair, cursor) and not self.is_soft_deleted(pair):
300
- should_mark_as_deleted.append(pair)
301
-
302
- if (self.is_soft_deleted(pair) and self.is_safe_to_hard_delete(pair)) or await self._is_corrupted(
303
- pair, cursor
304
- ):
305
- should_hard_delete.append(pair)
306
-
307
- for pair in should_mark_as_deleted:
252
+ if pair.cache_key != complete_pair.cache_key:
253
+ await cursor.execute(
254
+ "UPDATE entries SET cache_key = ? WHERE id = ?",
255
+ (complete_pair.cache_key, complete_pair.id.bytes),
256
+ )
257
+
258
+ await connection.commit()
259
+
260
+ return complete_pair
261
+
262
+ async def remove(self, id: uuid.UUID) -> None:
263
+ connection = await self._ensure_connection()
264
+ cursor = await connection.cursor()
265
+ await cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
266
+ result = await cursor.fetchone()
267
+
268
+ if result is None:
269
+ return None
270
+
271
+ pair = unpack(result[0], kind="pair")
308
272
  await self._soft_delete_pair(pair, cursor)
273
+ await connection.commit()
309
274
 
310
- for pair in should_hard_delete:
311
- await self._hard_delete_pair(pair, cursor)
312
-
313
- await connection.commit()
314
-
315
- async def _is_corrupted(self, pair: IncompletePair | CompletePair, cursor: anysqlite.Cursor) -> bool:
316
- # if pair was created more than 1 hour ago and still not completed
317
- if pair.meta.created_at + 3600 < time.time() and isinstance(pair, IncompletePair):
318
- return True
319
-
320
- if isinstance(pair, CompletePair) and not await self._is_stream_complete("request", pair.id, cursor):
321
- return True
322
- return False
323
-
324
- async def _hard_delete_pair(self, pair: CompletePair | IncompletePair, cursor: anysqlite.Cursor) -> None:
325
- """
326
- Permanently delete the pair from the database.
327
- """
328
- await cursor.execute("DELETE FROM entries WHERE id = ?", (pair.id.bytes,))
329
-
330
- # Delete all streams (both request and response) for this entry
331
- await self._delete_stream(pair.id.bytes, cursor)
332
-
333
- async def _delete_stream(
334
- self,
335
- entry_id: bytes,
336
- cursor: anysqlite.Cursor,
337
- type: Literal["request", "response", "all"] = "all",
338
- ) -> None:
339
- """
340
- Delete all streams (both request and response) associated with the given entry ID.
341
- """
342
- if type == "request":
275
+ async def _is_stream_complete(
276
+ self, kind: Literal["request", "response"], pair_id: uuid.UUID, cursor: anysqlite.Cursor
277
+ ) -> bool:
278
+ kind_id = self._STREAM_KIND[kind]
279
+ # Check if there's a completion marker (chunk_number = -1)
343
280
  await cursor.execute(
344
- "DELETE FROM streams WHERE entry_id = ? AND kind = ?", (entry_id, self._STREAM_KIND["request"])
281
+ "SELECT 1 FROM streams WHERE entry_id = ? AND kind = ? AND chunk_number = ? LIMIT 1",
282
+ (pair_id.bytes, kind_id, self._COMPLETE_CHUNK_NUMBER),
345
283
  )
346
- elif type == "response":
284
+ return await cursor.fetchone() is not None
285
+
286
+ async def _soft_delete_pair(self, pair: Union[CompletePair, IncompletePair], cursor: anysqlite.Cursor) -> None:
287
+ """
288
+ Mark the pair as deleted by setting the deleted_at timestamp.
289
+ """
290
+ marked_pair = self.mark_pair_as_deleted(pair)
347
291
  await cursor.execute(
348
- "DELETE FROM streams WHERE entry_id = ? AND kind = ?", (entry_id, self._STREAM_KIND["response"])
292
+ "UPDATE entries SET data = ?, deleted_at = ? WHERE id = ?",
293
+ (pack(marked_pair, kind="pair"), marked_pair.meta.deleted_at, pair.id.bytes),
349
294
  )
350
- elif type == "all":
351
- await cursor.execute("DELETE FROM streams WHERE entry_id = ?", (entry_id,))
352
-
353
- async def _save_stream(
354
- self,
355
- stream: AsyncIterator[bytes],
356
- entry_id: bytes,
357
- kind: Literal["response", "request"],
358
- ) -> AsyncIterator[bytes]:
359
- """
360
- Wrapper around an async iterator that also saves the data to the cache in chunks.
361
- """
362
- kind_id = self._STREAM_KIND[kind]
363
- chunk_number = 0
364
- async for chunk in stream:
295
+
296
+ async def _is_pair_expired(self, pair: Pair, cursor: anysqlite.Cursor) -> bool:
297
+ """
298
+ Check if the pair is expired.
299
+ """
300
+ ttl = pair.request.metadata["hishel_ttl"] if "hishel_ttl" in pair.request.metadata else self.default_ttl
301
+ created_at = pair.meta.created_at
302
+ if ttl is None:
303
+ return False
304
+ return created_at + ttl < time.time()
305
+
306
+ async def _batch_cleanup(
307
+ self,
308
+ ) -> None:
309
+ """
310
+ Cleanup expired pairs in the database.
311
+ """
312
+ should_mark_as_deleted: List[Union[CompletePair, IncompletePair]] = []
313
+ should_hard_delete: List[Union[CompletePair, IncompletePair]] = []
314
+
365
315
  connection = await self._ensure_connection()
366
316
  cursor = await connection.cursor()
367
- await cursor.execute(
368
- "INSERT INTO streams (entry_id, kind, chunk_number, chunk_data) VALUES (?, ?, ?, ?)",
369
- (entry_id, kind_id, chunk_number, chunk),
370
- )
317
+
318
+ # Process entries in chunks to avoid loading the entire table into memory.
319
+ chunk_size = BATCH_CLEANUP_CHUNK_SIZE
320
+ offset = 0
321
+ while True:
322
+ await cursor.execute("SELECT id, data FROM entries LIMIT ? OFFSET ?", (chunk_size, offset))
323
+ rows = await cursor.fetchall()
324
+ if not rows:
325
+ break
326
+
327
+ for row in rows:
328
+ pair = unpack(row[1], kind="pair")
329
+ if pair is None:
330
+ continue
331
+
332
+ # expired but not yet soft-deleted
333
+ if await self._is_pair_expired(pair, cursor) and not self.is_soft_deleted(pair):
334
+ should_mark_as_deleted.append(pair)
335
+
336
+ # soft-deleted and safe to hard delete, or corrupted pair
337
+ if (self.is_soft_deleted(pair) and self.is_safe_to_hard_delete(pair)) or await self._is_corrupted(
338
+ pair, cursor
339
+ ):
340
+ should_hard_delete.append(pair)
341
+
342
+ # advance pagination
343
+ offset += len(rows)
344
+
345
+ for pair in should_mark_as_deleted:
346
+ await self._soft_delete_pair(pair, cursor)
347
+
348
+ for pair in should_hard_delete:
349
+ await self._hard_delete_pair(pair, cursor)
350
+
371
351
  await connection.commit()
372
- chunk_number += 1
373
- yield chunk
374
-
375
- # Mark end of stream with chunk_number = -1
376
- connection = await self._ensure_connection()
377
- cursor = await connection.cursor()
378
- await cursor.execute(
379
- "INSERT INTO streams (entry_id, kind, chunk_number, chunk_data) VALUES (?, ?, ?, ?)",
380
- (entry_id, kind_id, self._COMPLETE_CHUNK_NUMBER, b""),
381
- )
382
- await connection.commit()
383
-
384
- async def _stream_data_from_cache(
385
- self,
386
- entry_id: bytes,
387
- kind: Literal["response", "request"],
388
- ) -> AsyncIterator[bytes]:
389
- """
390
- Get an async iterator that yields the stream data from the cache.
391
- """
392
- kind_id = self._STREAM_KIND[kind]
393
- chunk_number = 0
394
-
395
- connection = await self._ensure_connection()
396
- while True:
352
+
353
+ async def _is_corrupted(self, pair: IncompletePair | CompletePair, cursor: anysqlite.Cursor) -> bool:
354
+ # if pair was created more than 1 hour ago and still not completed
355
+ if pair.meta.created_at + 3600 < time.time() and isinstance(pair, IncompletePair):
356
+ return True
357
+
358
+ if isinstance(pair, CompletePair) and not await self._is_stream_complete("request", pair.id, cursor):
359
+ return True
360
+ return False
361
+
362
+ async def _hard_delete_pair(self, pair: CompletePair | IncompletePair, cursor: anysqlite.Cursor) -> None:
363
+ """
364
+ Permanently delete the pair from the database.
365
+ """
366
+ await cursor.execute("DELETE FROM entries WHERE id = ?", (pair.id.bytes,))
367
+
368
+ # Delete all streams (both request and response) for this entry
369
+ await self._delete_stream(pair.id.bytes, cursor)
370
+
371
+ async def _delete_stream(
372
+ self,
373
+ entry_id: bytes,
374
+ cursor: anysqlite.Cursor,
375
+ type: Literal["request", "response", "all"] = "all",
376
+ ) -> None:
377
+ """
378
+ Delete all streams (both request and response) associated with the given entry ID.
379
+ """
380
+ if type == "request":
381
+ await cursor.execute(
382
+ "DELETE FROM streams WHERE entry_id = ? AND kind = ?", (entry_id, self._STREAM_KIND["request"])
383
+ )
384
+ elif type == "response":
385
+ await cursor.execute(
386
+ "DELETE FROM streams WHERE entry_id = ? AND kind = ?", (entry_id, self._STREAM_KIND["response"])
387
+ )
388
+ elif type == "all":
389
+ await cursor.execute("DELETE FROM streams WHERE entry_id = ?", (entry_id,))
390
+
391
+ async def _save_stream(
392
+ self,
393
+ stream: AsyncIterator[bytes],
394
+ entry_id: bytes,
395
+ kind: Literal["response", "request"],
396
+ ) -> AsyncIterator[bytes]:
397
+ """
398
+ Wrapper around an async iterator that also saves the data to the cache in chunks.
399
+ """
400
+ kind_id = self._STREAM_KIND[kind]
401
+ chunk_number = 0
402
+ async for chunk in stream:
403
+ connection = await self._ensure_connection()
404
+ cursor = await connection.cursor()
405
+ await cursor.execute(
406
+ "INSERT INTO streams (entry_id, kind, chunk_number, chunk_data) VALUES (?, ?, ?, ?)",
407
+ (entry_id, kind_id, chunk_number, chunk),
408
+ )
409
+ await connection.commit()
410
+ chunk_number += 1
411
+ yield chunk
412
+
413
+ # Mark end of stream with chunk_number = -1
414
+ connection = await self._ensure_connection()
397
415
  cursor = await connection.cursor()
398
416
  await cursor.execute(
399
- "SELECT chunk_data FROM streams WHERE entry_id = ? AND kind = ? AND chunk_number = ?",
400
- (entry_id, kind_id, chunk_number),
417
+ "INSERT INTO streams (entry_id, kind, chunk_number, chunk_data) VALUES (?, ?, ?, ?)",
418
+ (entry_id, kind_id, self._COMPLETE_CHUNK_NUMBER, b""),
401
419
  )
402
- result = await cursor.fetchone()
420
+ await connection.commit()
403
421
 
404
- if result is None:
405
- break
406
- chunk = result[0]
407
- # chunk_number = -1 is the completion marker with empty data
408
- if chunk == b"":
409
- break
410
- yield chunk
411
- chunk_number += 1
422
+ async def _stream_data_from_cache(
423
+ self,
424
+ entry_id: bytes,
425
+ kind: Literal["response", "request"],
426
+ ) -> AsyncIterator[bytes]:
427
+ """
428
+ Get an async iterator that yields the stream data from the cache.
429
+ """
430
+ kind_id = self._STREAM_KIND[kind]
431
+ chunk_number = 0
432
+
433
+ connection = await self._ensure_connection()
434
+ while True:
435
+ cursor = await connection.cursor()
436
+ await cursor.execute(
437
+ "SELECT chunk_data FROM streams WHERE entry_id = ? AND kind = ? AND chunk_number = ?",
438
+ (entry_id, kind_id, chunk_number),
439
+ )
440
+ result = await cursor.fetchone()
441
+
442
+ if result is None:
443
+ break
444
+ chunk = result[0]
445
+ # chunk_number = -1 is the completion marker with empty data
446
+ if chunk == b"":
447
+ break
448
+ yield chunk
449
+ chunk_number += 1
450
+ except ImportError:
451
+
452
+ class AsyncSqliteStorage(AsyncBaseStorage): # type: ignore[no-redef]
453
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
454
+ raise ImportError(
455
+ "The 'anysqlite' library is required to use the `AsyncSqliteStorage` integration. "
456
+ "Install hishel with 'pip install hishel[async]'."
457
+ )