hishel 1.0.0.dev0__tar.gz → 1.0.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hishel-1.0.0.dev1/CHANGELOG.md +45 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/PKG-INFO +30 -53
- hishel-1.0.0.dev1/hishel/_core/_async/_storages/_sqlite.py +457 -0
- hishel-1.0.0.dev1/hishel/_core/_sync/_storages/_sqlite.py +457 -0
- hishel-1.0.0.dev1/hishel/_utils.py +218 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/httpx.py +9 -2
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/pyproject.toml +10 -4
- hishel-1.0.0.dev0/CHANGELOG.md +0 -70
- hishel-1.0.0.dev0/hishel/_core/_async/_storages/_sqlite.py +0 -411
- hishel-1.0.0.dev0/hishel/_core/_sync/_storages/_sqlite.py +0 -411
- hishel-1.0.0.dev0/hishel/_utils.py +0 -458
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/.gitignore +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/LICENSE +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/README.md +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/__init__.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/_async_cache.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/_core/__init__.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/_core/_base/_storages/_base.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/_core/_base/_storages/_packing.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/_core/_headers.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/_core/_spec.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/_core/models.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/_sync_cache.py +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/py.typed +0 -0
- {hishel-1.0.0.dev0 → hishel-1.0.0.dev1}/hishel/requests.py +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## 1.0.0dev1 - 2025-10-21
|
|
6
|
+
### <!-- 7 -->⚙️ Miscellaneous Tasks
|
|
7
|
+
- Remove some redundant utils methods
|
|
8
|
+
|
|
9
|
+
## 1.0.0.dev0 - 2025-10-19
|
|
10
|
+
### <!-- 7 -->⚙️ Miscellaneous Tasks
|
|
11
|
+
- Use mike powered versioning
|
|
12
|
+
- Improve docs versioning, deploy dev doc on ci
|
|
13
|
+
|
|
14
|
+
## 0.1.5 - 2025-10-18
|
|
15
|
+
### <!-- 0 -->🚀 Features
|
|
16
|
+
- Set chunk size to 128KB for httpx to reduce SQLite read/writes
|
|
17
|
+
- Better cache-control parsing
|
|
18
|
+
- Add close method to storages API (#384)
|
|
19
|
+
- Increase requests buffer size to 128KB, disable charset detection
|
|
20
|
+
|
|
21
|
+
### <!-- 1 -->🐛 Bug Fixes
|
|
22
|
+
- Fix some line breaks
|
|
23
|
+
|
|
24
|
+
### <!-- 7 -->⚙️ Miscellaneous Tasks
|
|
25
|
+
- Remove some redundant files from repo
|
|
26
|
+
|
|
27
|
+
## 0.1.4 - 2025-10-14
|
|
28
|
+
### <!-- 0 -->🚀 Features
|
|
29
|
+
- Add support for a sans-IO API (#366)
|
|
30
|
+
- Allow already consumed streams with `CacheTransport` (#377)
|
|
31
|
+
- Add sqlite storage for beta storages
|
|
32
|
+
- Get rid of some locks from sqlite storage
|
|
33
|
+
- Better async implemetation for sqlite storage
|
|
34
|
+
|
|
35
|
+
### <!-- 1 -->🐛 Bug Fixes
|
|
36
|
+
- Create an sqlite file in a cache folder
|
|
37
|
+
- Fix beta imports
|
|
38
|
+
|
|
39
|
+
### <!-- 7 -->⚙️ Miscellaneous Tasks
|
|
40
|
+
- Improve CI (#369)
|
|
41
|
+
- Remove src folder (#373)
|
|
42
|
+
- Temporary remove python3.14 from CI
|
|
43
|
+
- Add sqlite tests for new storage
|
|
44
|
+
- Move some tests to beta
|
|
45
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hishel
|
|
3
|
-
Version: 1.0.0.
|
|
3
|
+
Version: 1.0.0.dev1
|
|
4
4
|
Summary: Elegant HTTP Caching for Python
|
|
5
5
|
Project-URL: Homepage, https://hishel.com
|
|
6
6
|
Project-URL: Source, https://github.com/karpetrosyan/hishel
|
|
@@ -24,12 +24,14 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.14
|
|
25
25
|
Classifier: Topic :: Internet :: WWW/HTTP
|
|
26
26
|
Requires-Python: >=3.9
|
|
27
|
-
Requires-Dist: anyio>=4.9.0
|
|
28
|
-
Requires-Dist: anysqlite>=0.0.5
|
|
29
|
-
Requires-Dist: httpx>=0.28.0
|
|
30
27
|
Requires-Dist: msgpack>=1.1.2
|
|
31
28
|
Requires-Dist: typing-extensions>=4.14.1
|
|
29
|
+
Provides-Extra: async
|
|
30
|
+
Requires-Dist: anyio>=4.9.0; extra == 'async'
|
|
31
|
+
Requires-Dist: anysqlite>=0.0.5; extra == 'async'
|
|
32
32
|
Provides-Extra: httpx
|
|
33
|
+
Requires-Dist: anyio>=4.9.0; extra == 'httpx'
|
|
34
|
+
Requires-Dist: anysqlite>=0.0.5; extra == 'httpx'
|
|
33
35
|
Requires-Dist: httpx>=0.28.1; extra == 'httpx'
|
|
34
36
|
Provides-Extra: requests
|
|
35
37
|
Requires-Dist: requests>=2.32.5; extra == 'requests'
|
|
@@ -249,73 +251,48 @@ Hishel is inspired by and builds upon the excellent work in the Python HTTP ecos
|
|
|
249
251
|
<strong>Made with ❤️ by <a href="https://github.com/karpetrosyan">Kar Petrosyan</a></strong>
|
|
250
252
|
</p>
|
|
251
253
|
|
|
252
|
-
|
|
254
|
+
# Changelog
|
|
253
255
|
|
|
254
|
-
|
|
256
|
+
All notable changes to this project will be documented in this file.
|
|
255
257
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
258
|
+
## 1.0.0dev1 - 2025-10-21
|
|
259
|
+
### <!-- 7 -->⚙️ Miscellaneous Tasks
|
|
260
|
+
- Remove some redundant utils methods
|
|
259
261
|
|
|
260
|
-
|
|
262
|
+
## 1.0.0.dev0 - 2025-10-19
|
|
263
|
+
### <!-- 7 -->⚙️ Miscellaneous Tasks
|
|
264
|
+
- Use mike powered versioning
|
|
265
|
+
- Improve docs versioning, deploy dev doc on ci
|
|
261
266
|
|
|
262
|
-
|
|
267
|
+
## 0.1.5 - 2025-10-18
|
|
268
|
+
### <!-- 0 -->🚀 Features
|
|
269
|
+
- Set chunk size to 128KB for httpx to reduce SQLite read/writes
|
|
263
270
|
- Better cache-control parsing
|
|
264
271
|
- Add close method to storages API (#384)
|
|
265
|
-
-
|
|
272
|
+
- Increase requests buffer size to 128KB, disable charset detection
|
|
266
273
|
|
|
267
|
-
###
|
|
268
|
-
|
|
269
|
-
- *(docs)* Fix some line breaks
|
|
270
|
-
|
|
271
|
-
### ⚙️ Miscellaneous Tasks
|
|
274
|
+
### <!-- 1 -->🐛 Bug Fixes
|
|
275
|
+
- Fix some line breaks
|
|
272
276
|
|
|
277
|
+
### <!-- 7 -->⚙️ Miscellaneous Tasks
|
|
273
278
|
- Remove some redundant files from repo
|
|
274
|
-
## [0.1.4] - 2025-10-14
|
|
275
|
-
|
|
276
|
-
### 🚀 Features
|
|
277
279
|
|
|
280
|
+
## 0.1.4 - 2025-10-14
|
|
281
|
+
### <!-- 0 -->🚀 Features
|
|
278
282
|
- Add support for a sans-IO API (#366)
|
|
279
283
|
- Allow already consumed streams with `CacheTransport` (#377)
|
|
280
284
|
- Add sqlite storage for beta storages
|
|
281
285
|
- Get rid of some locks from sqlite storage
|
|
282
286
|
- Better async implemetation for sqlite storage
|
|
283
287
|
|
|
284
|
-
###
|
|
285
|
-
|
|
288
|
+
### <!-- 1 -->🐛 Bug Fixes
|
|
286
289
|
- Create an sqlite file in a cache folder
|
|
287
290
|
- Fix beta imports
|
|
288
291
|
|
|
289
|
-
###
|
|
290
|
-
|
|
292
|
+
### <!-- 7 -->⚙️ Miscellaneous Tasks
|
|
291
293
|
- Improve CI (#369)
|
|
292
|
-
-
|
|
293
|
-
-
|
|
294
|
-
-
|
|
295
|
-
-
|
|
296
|
-
## [0.1.3] - 2025-07-06
|
|
297
|
-
|
|
298
|
-
### 🚀 Features
|
|
299
|
-
|
|
300
|
-
- Support providing a path prefix to S3 storage (#342)
|
|
301
|
-
|
|
302
|
-
### 📚 Documentation
|
|
303
|
-
|
|
304
|
-
- Update link to httpx transports page (#337)
|
|
305
|
-
## [0.1.2] - 2025-04-04
|
|
306
|
-
|
|
307
|
-
### 🐛 Bug Fixes
|
|
308
|
-
|
|
309
|
-
- Requirements.txt to reduce vulnerabilities (#263)
|
|
310
|
-
## [0.0.30] - 2024-07-12
|
|
311
|
-
|
|
312
|
-
### 🐛 Bug Fixes
|
|
313
|
-
|
|
314
|
-
- Requirements.txt to reduce vulnerabilities (#245)
|
|
315
|
-
- Requirements.txt to reduce vulnerabilities (#255)
|
|
316
|
-
## [0.0.27] - 2024-05-31
|
|
317
|
-
|
|
318
|
-
### 🐛 Bug Fixes
|
|
294
|
+
- Remove src folder (#373)
|
|
295
|
+
- Temporary remove python3.14 from CI
|
|
296
|
+
- Add sqlite tests for new storage
|
|
297
|
+
- Move some tests to beta
|
|
319
298
|
|
|
320
|
-
- *(redis)* Do not update metadata with negative ttl (#231)
|
|
321
|
-
## [0.0.1] - 2023-07-22
|
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
from dataclasses import replace
|
|
6
|
+
from typing import (
|
|
7
|
+
Any,
|
|
8
|
+
AsyncIterable,
|
|
9
|
+
AsyncIterator,
|
|
10
|
+
Callable,
|
|
11
|
+
List,
|
|
12
|
+
Literal,
|
|
13
|
+
Optional,
|
|
14
|
+
Union,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from hishel._core._base._storages._base import AsyncBaseStorage, ensure_cache_dict
|
|
18
|
+
from hishel._core._base._storages._packing import pack, unpack
|
|
19
|
+
from hishel._core.models import (
|
|
20
|
+
CompletePair,
|
|
21
|
+
IncompletePair,
|
|
22
|
+
Pair,
|
|
23
|
+
PairMeta,
|
|
24
|
+
Request,
|
|
25
|
+
Response,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Batch cleanup configuration
|
|
29
|
+
# How often to run cleanup (seconds). Default: 1 hour.
|
|
30
|
+
BATCH_CLEANUP_INTERVAL = 3600
|
|
31
|
+
# How long to wait after storage creation before allowing the first cleanup (seconds)
|
|
32
|
+
BATCH_CLEANUP_START_DELAY = 5 * 60
|
|
33
|
+
# Number of rows to process per chunk when cleaning
|
|
34
|
+
BATCH_CLEANUP_CHUNK_SIZE = 200
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
import anysqlite
|
|
39
|
+
|
|
40
|
+
class AsyncSqliteStorage(AsyncBaseStorage):
|
|
41
|
+
_STREAM_KIND = {"request": 0, "response": 1}
|
|
42
|
+
_COMPLETE_CHUNK_NUMBER = -1
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
*,
|
|
47
|
+
connection: Optional[anysqlite.Connection] = None,
|
|
48
|
+
database_path: str = "hishel_cache.db",
|
|
49
|
+
default_ttl: Optional[float] = None,
|
|
50
|
+
refresh_ttl_on_access: bool = True,
|
|
51
|
+
) -> None:
|
|
52
|
+
base_path = ensure_cache_dict()
|
|
53
|
+
|
|
54
|
+
self.connection = connection
|
|
55
|
+
self.database_path = base_path / database_path
|
|
56
|
+
self.default_ttl = default_ttl
|
|
57
|
+
self.refresh_ttl_on_access = refresh_ttl_on_access
|
|
58
|
+
self.last_cleanup = time.time() - BATCH_CLEANUP_INTERVAL + BATCH_CLEANUP_START_DELAY
|
|
59
|
+
# When this storage instance was created. Used to delay the first cleanup.
|
|
60
|
+
self._start_time = time.time()
|
|
61
|
+
self._initialized = False
|
|
62
|
+
|
|
63
|
+
async def _ensure_connection(self) -> anysqlite.Connection:
|
|
64
|
+
"""Ensure connection is established and database is initialized."""
|
|
65
|
+
if self.connection is None:
|
|
66
|
+
self.connection = await anysqlite.connect(str(self.database_path))
|
|
67
|
+
if not self._initialized:
|
|
68
|
+
await self._initialize_database()
|
|
69
|
+
self._initialized = True
|
|
70
|
+
return self.connection
|
|
71
|
+
|
|
72
|
+
async def _initialize_database(self) -> None:
|
|
73
|
+
"""Initialize the database schema."""
|
|
74
|
+
assert self.connection is not None
|
|
75
|
+
cursor = await self.connection.cursor()
|
|
76
|
+
|
|
77
|
+
# Table for storing request/response pairs
|
|
78
|
+
await cursor.execute("""
|
|
79
|
+
CREATE TABLE IF NOT EXISTS entries (
|
|
80
|
+
id BLOB PRIMARY KEY,
|
|
81
|
+
cache_key BLOB,
|
|
82
|
+
data BLOB NOT NULL,
|
|
83
|
+
created_at REAL NOT NULL,
|
|
84
|
+
deleted_at REAL
|
|
85
|
+
)
|
|
86
|
+
""")
|
|
87
|
+
|
|
88
|
+
# Table for storing stream chunks
|
|
89
|
+
await cursor.execute("""
|
|
90
|
+
CREATE TABLE IF NOT EXISTS streams (
|
|
91
|
+
entry_id BLOB NOT NULL,
|
|
92
|
+
kind INTEGER NOT NULL,
|
|
93
|
+
chunk_number INTEGER NOT NULL,
|
|
94
|
+
chunk_data BLOB NOT NULL,
|
|
95
|
+
PRIMARY KEY (entry_id, kind, chunk_number),
|
|
96
|
+
FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
|
|
97
|
+
)
|
|
98
|
+
""")
|
|
99
|
+
|
|
100
|
+
# Indexes for performance
|
|
101
|
+
await cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_deleted_at ON entries(deleted_at)")
|
|
102
|
+
await cursor.execute("CREATE INDEX IF NOT EXISTS idx_entries_cache_key ON entries(cache_key)")
|
|
103
|
+
# Note: PRIMARY KEY (entry_id, kind, chunk_number) already provides an index
|
|
104
|
+
# for queries like: entry_id = ? AND kind = ? AND chunk_number = ?
|
|
105
|
+
|
|
106
|
+
await self.connection.commit()
|
|
107
|
+
|
|
108
|
+
async def create_pair(
|
|
109
|
+
self,
|
|
110
|
+
request: Request,
|
|
111
|
+
id: uuid.UUID | None = None,
|
|
112
|
+
) -> IncompletePair:
|
|
113
|
+
pair_id = id if id is not None else uuid.uuid4()
|
|
114
|
+
pair_meta = PairMeta(
|
|
115
|
+
created_at=time.time(),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
pair = IncompletePair(id=pair_id, request=request, meta=pair_meta)
|
|
119
|
+
|
|
120
|
+
packed_pair = pack(pair, kind="pair")
|
|
121
|
+
|
|
122
|
+
connection = await self._ensure_connection()
|
|
123
|
+
cursor = await connection.cursor()
|
|
124
|
+
await cursor.execute(
|
|
125
|
+
"INSERT INTO entries (id, cache_key, data, created_at, deleted_at) VALUES (?, ?, ?, ?, ?)",
|
|
126
|
+
(pair_id.bytes, None, packed_pair, pair_meta.created_at, None),
|
|
127
|
+
)
|
|
128
|
+
await connection.commit()
|
|
129
|
+
|
|
130
|
+
assert isinstance(request.stream, AsyncIterable), "Request stream must be an AsyncIterable, not Iterable"
|
|
131
|
+
|
|
132
|
+
request = Request(
|
|
133
|
+
method=request.method,
|
|
134
|
+
url=request.url,
|
|
135
|
+
headers=request.headers,
|
|
136
|
+
metadata=request.metadata,
|
|
137
|
+
stream=self._save_stream(request.stream, pair_id.bytes, "request"),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return replace(pair, request=request)
|
|
141
|
+
|
|
142
|
+
async def add_response(
|
|
143
|
+
self,
|
|
144
|
+
pair_id: uuid.UUID,
|
|
145
|
+
response: Response,
|
|
146
|
+
key: str | bytes,
|
|
147
|
+
) -> CompletePair:
|
|
148
|
+
if isinstance(key, str):
|
|
149
|
+
key = key.encode("utf-8")
|
|
150
|
+
|
|
151
|
+
connection = await self._ensure_connection()
|
|
152
|
+
cursor = await connection.cursor()
|
|
153
|
+
|
|
154
|
+
# Get the existing pair
|
|
155
|
+
await cursor.execute("SELECT data FROM entries WHERE id = ?", (pair_id.bytes,))
|
|
156
|
+
result = await cursor.fetchone()
|
|
157
|
+
|
|
158
|
+
if result is None:
|
|
159
|
+
raise ValueError(f"Entry with ID {pair_id} not found.")
|
|
160
|
+
|
|
161
|
+
pair = unpack(result[0], kind="pair")
|
|
162
|
+
|
|
163
|
+
assert isinstance(response.stream, (AsyncIterator, AsyncIterable))
|
|
164
|
+
response = replace(response, stream=self._save_stream(response.stream, pair_id.bytes, "response"))
|
|
165
|
+
|
|
166
|
+
await self._delete_stream(pair.id.bytes, cursor, type="response")
|
|
167
|
+
complete_pair = CompletePair(
|
|
168
|
+
id=pair.id, request=pair.request, response=response, meta=pair.meta, cache_key=key
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Update the entry with the complete pair and set cache_key
|
|
172
|
+
await cursor.execute(
|
|
173
|
+
"UPDATE entries SET data = ?, cache_key = ? WHERE id = ?",
|
|
174
|
+
(pack(complete_pair, kind="pair"), key, pair_id.bytes),
|
|
175
|
+
)
|
|
176
|
+
await connection.commit()
|
|
177
|
+
|
|
178
|
+
return complete_pair
|
|
179
|
+
|
|
180
|
+
async def get_pairs(self, key: str) -> List[CompletePair]:
|
|
181
|
+
final_pairs: List[CompletePair] = []
|
|
182
|
+
|
|
183
|
+
now = time.time()
|
|
184
|
+
if now - self.last_cleanup >= BATCH_CLEANUP_INTERVAL:
|
|
185
|
+
try:
|
|
186
|
+
await self._batch_cleanup()
|
|
187
|
+
except Exception:
|
|
188
|
+
# don't let cleanup prevent reads; failures are non-fatal
|
|
189
|
+
pass
|
|
190
|
+
|
|
191
|
+
connection = await self._ensure_connection()
|
|
192
|
+
cursor = await connection.cursor()
|
|
193
|
+
# Query entries directly by cache_key
|
|
194
|
+
await cursor.execute("SELECT id, data FROM entries WHERE cache_key = ?", (key.encode("utf-8"),))
|
|
195
|
+
|
|
196
|
+
for row in await cursor.fetchall():
|
|
197
|
+
pair_data = unpack(row[1], kind="pair")
|
|
198
|
+
|
|
199
|
+
if isinstance(pair_data, IncompletePair):
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
final_pairs.append(pair_data)
|
|
203
|
+
|
|
204
|
+
pairs_with_streams: List[CompletePair] = []
|
|
205
|
+
|
|
206
|
+
for pair in final_pairs:
|
|
207
|
+
pairs_with_streams.append(
|
|
208
|
+
replace(
|
|
209
|
+
pair,
|
|
210
|
+
response=replace(
|
|
211
|
+
pair.response,
|
|
212
|
+
stream=self._stream_data_from_cache(pair.id.bytes, "response"),
|
|
213
|
+
),
|
|
214
|
+
request=replace(
|
|
215
|
+
pair.request,
|
|
216
|
+
stream=self._stream_data_from_cache(pair.id.bytes, "request"),
|
|
217
|
+
),
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
return pairs_with_streams
|
|
221
|
+
|
|
222
|
+
async def update_pair(
|
|
223
|
+
self,
|
|
224
|
+
id: uuid.UUID,
|
|
225
|
+
new_pair: Union[CompletePair, Callable[[CompletePair], CompletePair]],
|
|
226
|
+
) -> Optional[CompletePair]:
|
|
227
|
+
connection = await self._ensure_connection()
|
|
228
|
+
cursor = await connection.cursor()
|
|
229
|
+
await cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
|
|
230
|
+
result = await cursor.fetchone()
|
|
231
|
+
|
|
232
|
+
if result is None:
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
pair = unpack(result[0], kind="pair")
|
|
236
|
+
|
|
237
|
+
if isinstance(pair, IncompletePair):
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
if isinstance(new_pair, CompletePair):
|
|
241
|
+
complete_pair = new_pair
|
|
242
|
+
else:
|
|
243
|
+
complete_pair = new_pair(pair)
|
|
244
|
+
|
|
245
|
+
if pair.id != complete_pair.id:
|
|
246
|
+
raise ValueError("Pair ID mismatch")
|
|
247
|
+
|
|
248
|
+
await cursor.execute(
|
|
249
|
+
"UPDATE entries SET data = ? WHERE id = ?", (pack(complete_pair, kind="pair"), id.bytes)
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if pair.cache_key != complete_pair.cache_key:
|
|
253
|
+
await cursor.execute(
|
|
254
|
+
"UPDATE entries SET cache_key = ? WHERE id = ?",
|
|
255
|
+
(complete_pair.cache_key, complete_pair.id.bytes),
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
await connection.commit()
|
|
259
|
+
|
|
260
|
+
return complete_pair
|
|
261
|
+
|
|
262
|
+
async def remove(self, id: uuid.UUID) -> None:
|
|
263
|
+
connection = await self._ensure_connection()
|
|
264
|
+
cursor = await connection.cursor()
|
|
265
|
+
await cursor.execute("SELECT data FROM entries WHERE id = ?", (id.bytes,))
|
|
266
|
+
result = await cursor.fetchone()
|
|
267
|
+
|
|
268
|
+
if result is None:
|
|
269
|
+
return None
|
|
270
|
+
|
|
271
|
+
pair = unpack(result[0], kind="pair")
|
|
272
|
+
await self._soft_delete_pair(pair, cursor)
|
|
273
|
+
await connection.commit()
|
|
274
|
+
|
|
275
|
+
async def _is_stream_complete(
|
|
276
|
+
self, kind: Literal["request", "response"], pair_id: uuid.UUID, cursor: anysqlite.Cursor
|
|
277
|
+
) -> bool:
|
|
278
|
+
kind_id = self._STREAM_KIND[kind]
|
|
279
|
+
# Check if there's a completion marker (chunk_number = -1)
|
|
280
|
+
await cursor.execute(
|
|
281
|
+
"SELECT 1 FROM streams WHERE entry_id = ? AND kind = ? AND chunk_number = ? LIMIT 1",
|
|
282
|
+
(pair_id.bytes, kind_id, self._COMPLETE_CHUNK_NUMBER),
|
|
283
|
+
)
|
|
284
|
+
return await cursor.fetchone() is not None
|
|
285
|
+
|
|
286
|
+
async def _soft_delete_pair(self, pair: Union[CompletePair, IncompletePair], cursor: anysqlite.Cursor) -> None:
|
|
287
|
+
"""
|
|
288
|
+
Mark the pair as deleted by setting the deleted_at timestamp.
|
|
289
|
+
"""
|
|
290
|
+
marked_pair = self.mark_pair_as_deleted(pair)
|
|
291
|
+
await cursor.execute(
|
|
292
|
+
"UPDATE entries SET data = ?, deleted_at = ? WHERE id = ?",
|
|
293
|
+
(pack(marked_pair, kind="pair"), marked_pair.meta.deleted_at, pair.id.bytes),
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
async def _is_pair_expired(self, pair: Pair, cursor: anysqlite.Cursor) -> bool:
|
|
297
|
+
"""
|
|
298
|
+
Check if the pair is expired.
|
|
299
|
+
"""
|
|
300
|
+
ttl = pair.request.metadata["hishel_ttl"] if "hishel_ttl" in pair.request.metadata else self.default_ttl
|
|
301
|
+
created_at = pair.meta.created_at
|
|
302
|
+
if ttl is None:
|
|
303
|
+
return False
|
|
304
|
+
return created_at + ttl < time.time()
|
|
305
|
+
|
|
306
|
+
async def _batch_cleanup(
|
|
307
|
+
self,
|
|
308
|
+
) -> None:
|
|
309
|
+
"""
|
|
310
|
+
Cleanup expired pairs in the database.
|
|
311
|
+
"""
|
|
312
|
+
should_mark_as_deleted: List[Union[CompletePair, IncompletePair]] = []
|
|
313
|
+
should_hard_delete: List[Union[CompletePair, IncompletePair]] = []
|
|
314
|
+
|
|
315
|
+
connection = await self._ensure_connection()
|
|
316
|
+
cursor = await connection.cursor()
|
|
317
|
+
|
|
318
|
+
# Process entries in chunks to avoid loading the entire table into memory.
|
|
319
|
+
chunk_size = BATCH_CLEANUP_CHUNK_SIZE
|
|
320
|
+
offset = 0
|
|
321
|
+
while True:
|
|
322
|
+
await cursor.execute("SELECT id, data FROM entries LIMIT ? OFFSET ?", (chunk_size, offset))
|
|
323
|
+
rows = await cursor.fetchall()
|
|
324
|
+
if not rows:
|
|
325
|
+
break
|
|
326
|
+
|
|
327
|
+
for row in rows:
|
|
328
|
+
pair = unpack(row[1], kind="pair")
|
|
329
|
+
if pair is None:
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
# expired but not yet soft-deleted
|
|
333
|
+
if await self._is_pair_expired(pair, cursor) and not self.is_soft_deleted(pair):
|
|
334
|
+
should_mark_as_deleted.append(pair)
|
|
335
|
+
|
|
336
|
+
# soft-deleted and safe to hard delete, or corrupted pair
|
|
337
|
+
if (self.is_soft_deleted(pair) and self.is_safe_to_hard_delete(pair)) or await self._is_corrupted(
|
|
338
|
+
pair, cursor
|
|
339
|
+
):
|
|
340
|
+
should_hard_delete.append(pair)
|
|
341
|
+
|
|
342
|
+
# advance pagination
|
|
343
|
+
offset += len(rows)
|
|
344
|
+
|
|
345
|
+
for pair in should_mark_as_deleted:
|
|
346
|
+
await self._soft_delete_pair(pair, cursor)
|
|
347
|
+
|
|
348
|
+
for pair in should_hard_delete:
|
|
349
|
+
await self._hard_delete_pair(pair, cursor)
|
|
350
|
+
|
|
351
|
+
await connection.commit()
|
|
352
|
+
|
|
353
|
+
async def _is_corrupted(self, pair: IncompletePair | CompletePair, cursor: anysqlite.Cursor) -> bool:
|
|
354
|
+
# if pair was created more than 1 hour ago and still not completed
|
|
355
|
+
if pair.meta.created_at + 3600 < time.time() and isinstance(pair, IncompletePair):
|
|
356
|
+
return True
|
|
357
|
+
|
|
358
|
+
if isinstance(pair, CompletePair) and not await self._is_stream_complete("request", pair.id, cursor):
|
|
359
|
+
return True
|
|
360
|
+
return False
|
|
361
|
+
|
|
362
|
+
async def _hard_delete_pair(self, pair: CompletePair | IncompletePair, cursor: anysqlite.Cursor) -> None:
|
|
363
|
+
"""
|
|
364
|
+
Permanently delete the pair from the database.
|
|
365
|
+
"""
|
|
366
|
+
await cursor.execute("DELETE FROM entries WHERE id = ?", (pair.id.bytes,))
|
|
367
|
+
|
|
368
|
+
# Delete all streams (both request and response) for this entry
|
|
369
|
+
await self._delete_stream(pair.id.bytes, cursor)
|
|
370
|
+
|
|
371
|
+
async def _delete_stream(
|
|
372
|
+
self,
|
|
373
|
+
entry_id: bytes,
|
|
374
|
+
cursor: anysqlite.Cursor,
|
|
375
|
+
type: Literal["request", "response", "all"] = "all",
|
|
376
|
+
) -> None:
|
|
377
|
+
"""
|
|
378
|
+
Delete all streams (both request and response) associated with the given entry ID.
|
|
379
|
+
"""
|
|
380
|
+
if type == "request":
|
|
381
|
+
await cursor.execute(
|
|
382
|
+
"DELETE FROM streams WHERE entry_id = ? AND kind = ?", (entry_id, self._STREAM_KIND["request"])
|
|
383
|
+
)
|
|
384
|
+
elif type == "response":
|
|
385
|
+
await cursor.execute(
|
|
386
|
+
"DELETE FROM streams WHERE entry_id = ? AND kind = ?", (entry_id, self._STREAM_KIND["response"])
|
|
387
|
+
)
|
|
388
|
+
elif type == "all":
|
|
389
|
+
await cursor.execute("DELETE FROM streams WHERE entry_id = ?", (entry_id,))
|
|
390
|
+
|
|
391
|
+
async def _save_stream(
|
|
392
|
+
self,
|
|
393
|
+
stream: AsyncIterator[bytes],
|
|
394
|
+
entry_id: bytes,
|
|
395
|
+
kind: Literal["response", "request"],
|
|
396
|
+
) -> AsyncIterator[bytes]:
|
|
397
|
+
"""
|
|
398
|
+
Wrapper around an async iterator that also saves the data to the cache in chunks.
|
|
399
|
+
"""
|
|
400
|
+
kind_id = self._STREAM_KIND[kind]
|
|
401
|
+
chunk_number = 0
|
|
402
|
+
async for chunk in stream:
|
|
403
|
+
connection = await self._ensure_connection()
|
|
404
|
+
cursor = await connection.cursor()
|
|
405
|
+
await cursor.execute(
|
|
406
|
+
"INSERT INTO streams (entry_id, kind, chunk_number, chunk_data) VALUES (?, ?, ?, ?)",
|
|
407
|
+
(entry_id, kind_id, chunk_number, chunk),
|
|
408
|
+
)
|
|
409
|
+
await connection.commit()
|
|
410
|
+
chunk_number += 1
|
|
411
|
+
yield chunk
|
|
412
|
+
|
|
413
|
+
# Mark end of stream with chunk_number = -1
|
|
414
|
+
connection = await self._ensure_connection()
|
|
415
|
+
cursor = await connection.cursor()
|
|
416
|
+
await cursor.execute(
|
|
417
|
+
"INSERT INTO streams (entry_id, kind, chunk_number, chunk_data) VALUES (?, ?, ?, ?)",
|
|
418
|
+
(entry_id, kind_id, self._COMPLETE_CHUNK_NUMBER, b""),
|
|
419
|
+
)
|
|
420
|
+
await connection.commit()
|
|
421
|
+
|
|
422
|
+
async def _stream_data_from_cache(
|
|
423
|
+
self,
|
|
424
|
+
entry_id: bytes,
|
|
425
|
+
kind: Literal["response", "request"],
|
|
426
|
+
) -> AsyncIterator[bytes]:
|
|
427
|
+
"""
|
|
428
|
+
Get an async iterator that yields the stream data from the cache.
|
|
429
|
+
"""
|
|
430
|
+
kind_id = self._STREAM_KIND[kind]
|
|
431
|
+
chunk_number = 0
|
|
432
|
+
|
|
433
|
+
connection = await self._ensure_connection()
|
|
434
|
+
while True:
|
|
435
|
+
cursor = await connection.cursor()
|
|
436
|
+
await cursor.execute(
|
|
437
|
+
"SELECT chunk_data FROM streams WHERE entry_id = ? AND kind = ? AND chunk_number = ?",
|
|
438
|
+
(entry_id, kind_id, chunk_number),
|
|
439
|
+
)
|
|
440
|
+
result = await cursor.fetchone()
|
|
441
|
+
|
|
442
|
+
if result is None:
|
|
443
|
+
break
|
|
444
|
+
chunk = result[0]
|
|
445
|
+
# chunk_number = -1 is the completion marker with empty data
|
|
446
|
+
if chunk == b"":
|
|
447
|
+
break
|
|
448
|
+
yield chunk
|
|
449
|
+
chunk_number += 1
|
|
450
|
+
except ImportError:
|
|
451
|
+
|
|
452
|
+
class AsyncSqliteStorage(AsyncBaseStorage): # type: ignore[no-redef]
|
|
453
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
454
|
+
raise ImportError(
|
|
455
|
+
"The 'anysqlite' library is required to use the `AsyncSqliteStorage` integration. "
|
|
456
|
+
"Install hishel with 'pip install hishel[async]'."
|
|
457
|
+
)
|