PyperCache 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyperCache/__init__.py +25 -0
- PyperCache/core/__init__.py +7 -0
- PyperCache/core/cache.py +126 -0
- PyperCache/core/cache_record.py +217 -0
- PyperCache/core/request_logger.py +107 -0
- PyperCache/models/apimodel.py +49 -0
- PyperCache/py.typed +1 -0
- PyperCache/query/__init__.py +10 -0
- PyperCache/query/json_injester.py +436 -0
- PyperCache/storage/__init__.py +28 -0
- PyperCache/storage/backends.py +106 -0
- PyperCache/storage/base.py +103 -0
- PyperCache/storage/chunked_dictionary.py +297 -0
- PyperCache/storage/factory.py +40 -0
- PyperCache/storage/sqlite_storage.py +485 -0
- PyperCache/utils/__init__.py +25 -0
- PyperCache/utils/collections.py +28 -0
- PyperCache/utils/fs.py +46 -0
- PyperCache/utils/patterns.py +97 -0
- PyperCache/utils/profiling.py +44 -0
- PyperCache/utils/sentinel.py +26 -0
- PyperCache/utils/serialization.py +175 -0
- PyperCache/utils/typing_cast.py +72 -0
- pypercache-0.1.0.dist-info/METADATA +92 -0
- pypercache-0.1.0.dist-info/RECORD +28 -0
- pypercache-0.1.0.dist-info/WHEEL +5 -0
- pypercache-0.1.0.dist-info/licenses/LICENSE +21 -0
- pypercache-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""Disk-backed dictionary that splits its contents across multiple pickle chunk files.
|
|
2
|
+
|
|
3
|
+
A JSON manifest file tracks which chunk holds each key, allowing large datasets
|
|
4
|
+
to be stored and accessed without loading everything into memory at once.
|
|
5
|
+
|
|
6
|
+
Typical usage::
|
|
7
|
+
|
|
8
|
+
# Build from an in-memory dict
|
|
9
|
+
store = ChunkedDictionary.from_dict(data, "/path/to/dir", chunk_size_in_bytes=1_000_000)
|
|
10
|
+
|
|
11
|
+
# Re-open an existing store
|
|
12
|
+
store = ChunkedDictionary.from_disk("/path/to/dir/chunks.manifest")
|
|
13
|
+
|
|
14
|
+
# Use like a regular dict
|
|
15
|
+
store["my_key"] = {"some": "value"}
|
|
16
|
+
value = store["my_key"]
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import math
|
|
21
|
+
import os
|
|
22
|
+
import sys
|
|
23
|
+
import threading
|
|
24
|
+
from functools import cached_property as lazy_property
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Dict, Generator, Iterator, List, Optional
|
|
27
|
+
|
|
28
|
+
from PyperCache.utils.fs import ensure_dirs_exist
|
|
29
|
+
from PyperCache.utils.serialization import PickleStore
|
|
30
|
+
|
|
31
|
+
# Private sentinel: distinguishes "no default supplied" from None in get().
|
|
32
|
+
_UNSET = object()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
# Helpers
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
def get_size_of_dict(d: dict) -> int:
|
|
40
|
+
"""Return the byte-length of *d* when serialised to a compact JSON string."""
|
|
41
|
+
return len(json.dumps(d))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def chunk_dictionary(
|
|
45
|
+
data: dict,
|
|
46
|
+
chunk_size_in_bytes: int,
|
|
47
|
+
) -> Generator[dict, None, None]:
|
|
48
|
+
"""Split *data* into sub-dictionaries whose estimated size stays under the limit."""
|
|
49
|
+
chunk: dict = {}
|
|
50
|
+
total_size: int = 0
|
|
51
|
+
|
|
52
|
+
for key, value in data.items():
|
|
53
|
+
item_size = sys.getsizeof(key) + get_size_of_dict(value)
|
|
54
|
+
|
|
55
|
+
if total_size + item_size > chunk_size_in_bytes:
|
|
56
|
+
yield chunk
|
|
57
|
+
chunk = {}
|
|
58
|
+
total_size = 0
|
|
59
|
+
|
|
60
|
+
chunk[key] = value
|
|
61
|
+
total_size += item_size
|
|
62
|
+
|
|
63
|
+
if chunk or not data:
|
|
64
|
+
yield chunk
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# Manifest
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
class ChunkedDictionaryManifest:
|
|
72
|
+
"""Reads and writes the JSON manifest that describes a :class:`ChunkedDictionary`."""
|
|
73
|
+
|
|
74
|
+
def __init__(self, manifest_filepath: str) -> None:
|
|
75
|
+
self.lock = threading.Lock()
|
|
76
|
+
self.filepath: Path = Path(manifest_filepath)
|
|
77
|
+
|
|
78
|
+
with open(manifest_filepath, "r") as fp:
|
|
79
|
+
manifest: dict = json.load(fp)
|
|
80
|
+
|
|
81
|
+
self.chunks_map: Dict[str, str] = manifest["chunks_map"]
|
|
82
|
+
self.chunk_size_in_bytes: int = manifest["chunk_size_in_bytes"]
|
|
83
|
+
self.chunks_path: Path = Path(manifest["chunks_path"])
|
|
84
|
+
self.chunks_count: int = manifest["chunks_count"]
|
|
85
|
+
|
|
86
|
+
def is_chunk_filepath(self, file: str) -> bool:
|
|
87
|
+
return file.startswith(str(self.chunks_path))
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def get_chunk_filename(index: int) -> str:
|
|
91
|
+
return f"{index}-chunk.pkl"
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def get_chunk_index_from_filename(filename: str) -> int:
|
|
95
|
+
return int(filename.replace("-chunk.pkl", ""))
|
|
96
|
+
|
|
97
|
+
def remove_unused_chunks(self) -> None:
|
|
98
|
+
with self.lock:
|
|
99
|
+
for chunk_filename in os.listdir(self.chunks_path):
|
|
100
|
+
if not chunk_filename.endswith("-chunk.pkl"):
|
|
101
|
+
continue
|
|
102
|
+
index = ChunkedDictionaryManifest.get_chunk_index_from_filename(chunk_filename)
|
|
103
|
+
if index + 1 > self.chunks_count:
|
|
104
|
+
filepath = self.chunks_path / chunk_filename
|
|
105
|
+
os.remove(filepath)
|
|
106
|
+
|
|
107
|
+
def erase_all_chunks_nonreversable(self) -> None:
|
|
108
|
+
with self.lock:
|
|
109
|
+
for chunk_filename in os.listdir(self.chunks_path):
|
|
110
|
+
if chunk_filename.endswith(".pkl"):
|
|
111
|
+
filepath = self.chunks_path / chunk_filename
|
|
112
|
+
os.remove(filepath)
|
|
113
|
+
self.chunks_map = {}
|
|
114
|
+
self.chunks_count = 0
|
|
115
|
+
|
|
116
|
+
def save(self) -> None:
|
|
117
|
+
with self.lock:
|
|
118
|
+
manifest = {
|
|
119
|
+
"chunk_size_in_bytes": self.chunk_size_in_bytes,
|
|
120
|
+
"chunks_path": str(self.chunks_path),
|
|
121
|
+
"chunks_count": self.chunks_count,
|
|
122
|
+
"chunks_map": self.chunks_map,
|
|
123
|
+
}
|
|
124
|
+
with open(self.filepath, "w") as fp:
|
|
125
|
+
json.dump(manifest, fp, indent=2)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ---------------------------------------------------------------------------
|
|
129
|
+
# ChunkedDictionary
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
class ChunkedDictionary:
|
|
133
|
+
"""A disk-backed dictionary whose entries are spread across pickle chunk files."""
|
|
134
|
+
|
|
135
|
+
def __init__(self, manifest_filepath: str) -> None:
|
|
136
|
+
self.lock = threading.Lock()
|
|
137
|
+
self.manifest = ChunkedDictionaryManifest(manifest_filepath)
|
|
138
|
+
self.manifest.remove_unused_chunks()
|
|
139
|
+
self.chunks: Dict[str, dict] = {}
|
|
140
|
+
|
|
141
|
+
# ------------------------------------------------------------------
|
|
142
|
+
# Class-level constructors
|
|
143
|
+
# ------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def from_dict(
|
|
147
|
+
cls,
|
|
148
|
+
data: dict,
|
|
149
|
+
directory: str | Path,
|
|
150
|
+
chunk_size_in_bytes: int,
|
|
151
|
+
) -> "ChunkedDictionary":
|
|
152
|
+
"""Build a new ChunkedDictionary on disk from an in-memory dict."""
|
|
153
|
+
directory = Path(directory)
|
|
154
|
+
ensure_dirs_exist(str(directory))
|
|
155
|
+
manifest_filepath = str(directory / "chunks.manifest")
|
|
156
|
+
|
|
157
|
+
chunks_path = directory
|
|
158
|
+
chunks = list(chunk_dictionary(data, chunk_size_in_bytes))
|
|
159
|
+
|
|
160
|
+
chunks_map: Dict[str, str] = {}
|
|
161
|
+
for i, chunk in enumerate(chunks):
|
|
162
|
+
chunk_filename = ChunkedDictionaryManifest.get_chunk_filename(i)
|
|
163
|
+
chunk_filepath = chunks_path / chunk_filename
|
|
164
|
+
PickleStore.save_object(chunk, str(chunk_filepath))
|
|
165
|
+
for key in chunk:
|
|
166
|
+
chunks_map[key] = chunk_filename
|
|
167
|
+
|
|
168
|
+
manifest = {
|
|
169
|
+
"chunk_size_in_bytes": chunk_size_in_bytes,
|
|
170
|
+
"chunks_path": str(chunks_path),
|
|
171
|
+
"chunks_count": len(chunks),
|
|
172
|
+
"chunks_map": chunks_map,
|
|
173
|
+
}
|
|
174
|
+
with open(manifest_filepath, "w") as fp:
|
|
175
|
+
json.dump(manifest, fp, indent=2)
|
|
176
|
+
|
|
177
|
+
return cls(manifest_filepath)
|
|
178
|
+
|
|
179
|
+
@classmethod
|
|
180
|
+
def from_disk(cls, manifest_filepath: str | Path) -> "ChunkedDictionary":
|
|
181
|
+
"""Open an existing ChunkedDictionary from its manifest file."""
|
|
182
|
+
return cls(str(manifest_filepath))
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def directory_contains_chunked_dictionary(directory: str | Path) -> bool:
|
|
186
|
+
"""Return True if *directory* contains a valid chunks.manifest file."""
|
|
187
|
+
return (Path(directory) / "chunks.manifest").exists()
|
|
188
|
+
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
# Bulk access
|
|
191
|
+
# ------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
def data(self) -> dict:
|
|
194
|
+
return {k: self[k] for k in self.keys()}
|
|
195
|
+
|
|
196
|
+
def erase_everything(self) -> None:
|
|
197
|
+
with self.lock:
|
|
198
|
+
self.chunks = {}
|
|
199
|
+
self.manifest.erase_all_chunks_nonreversable()
|
|
200
|
+
self.manifest.save()
|
|
201
|
+
|
|
202
|
+
# ------------------------------------------------------------------
|
|
203
|
+
# dict-like interface
|
|
204
|
+
# ------------------------------------------------------------------
|
|
205
|
+
|
|
206
|
+
def __contains__(self, key: str) -> bool:
|
|
207
|
+
return key in self.keys()
|
|
208
|
+
|
|
209
|
+
def __len__(self) -> int:
|
|
210
|
+
return len(self.keys())
|
|
211
|
+
|
|
212
|
+
def items(self) -> Iterator:
|
|
213
|
+
return self.data().items()
|
|
214
|
+
|
|
215
|
+
def keys(self) -> List[str]:
|
|
216
|
+
return list(self.manifest.chunks_map.keys())
|
|
217
|
+
|
|
218
|
+
def get(self, key: str, default_value: Any = _UNSET) -> Any:
|
|
219
|
+
if default_value is not _UNSET and key not in self.keys():
|
|
220
|
+
return default_value
|
|
221
|
+
return self[key]
|
|
222
|
+
|
|
223
|
+
def __getitem__(self, key: str) -> Any:
|
|
224
|
+
chunk_filename: str = self.manifest.chunks_map[key]
|
|
225
|
+
chunk = self.get_chunk(chunk_filename)
|
|
226
|
+
return chunk[key]
|
|
227
|
+
|
|
228
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
229
|
+
chunk_filename: Optional[str] = None
|
|
230
|
+
|
|
231
|
+
if key in self.manifest.chunks_map:
|
|
232
|
+
chunk_filename = self.manifest.chunks_map[key]
|
|
233
|
+
chunk = self.get_chunk(chunk_filename)
|
|
234
|
+
with self.lock:
|
|
235
|
+
chunk[key] = value
|
|
236
|
+
else:
|
|
237
|
+
last_chunk_index = self.manifest.chunks_count - 1
|
|
238
|
+
|
|
239
|
+
if last_chunk_index == -1:
|
|
240
|
+
last_chunk_index = 0
|
|
241
|
+
last_chunk_filename = self.create_new_chunk()
|
|
242
|
+
else:
|
|
243
|
+
last_chunk_filename = ChunkedDictionaryManifest.get_chunk_filename(last_chunk_index)
|
|
244
|
+
|
|
245
|
+
last_chunk = self.get_chunk(last_chunk_filename)
|
|
246
|
+
last_chunk_size = get_size_of_dict(last_chunk)
|
|
247
|
+
|
|
248
|
+
if last_chunk_size + get_size_of_dict({key: value}) < self.manifest.chunk_size_in_bytes:
|
|
249
|
+
chunk_filename = last_chunk_filename
|
|
250
|
+
else:
|
|
251
|
+
chunk_filename = self.create_new_chunk()
|
|
252
|
+
|
|
253
|
+
with self.lock:
|
|
254
|
+
self.manifest.chunks_map[key] = chunk_filename
|
|
255
|
+
self.chunks[chunk_filename][key] = value
|
|
256
|
+
|
|
257
|
+
assert chunk_filename is not None
|
|
258
|
+
self.save_chunk(chunk_filename)
|
|
259
|
+
|
|
260
|
+
# ------------------------------------------------------------------
|
|
261
|
+
# Chunk management
|
|
262
|
+
# ------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
def create_new_chunk(self) -> str:
|
|
265
|
+
with self.lock:
|
|
266
|
+
index = self.manifest.chunks_count
|
|
267
|
+
chunk_filename = ChunkedDictionaryManifest.get_chunk_filename(index)
|
|
268
|
+
chunk_filepath = self.manifest.chunks_path / chunk_filename
|
|
269
|
+
PickleStore.save_object({}, str(chunk_filepath))
|
|
270
|
+
self.chunks[chunk_filename] = {}
|
|
271
|
+
self.manifest.chunks_count += 1
|
|
272
|
+
return chunk_filename
|
|
273
|
+
|
|
274
|
+
def get_chunk(self, chunk_filename: str) -> dict:
|
|
275
|
+
chunk_filename = str(chunk_filename)
|
|
276
|
+
assert not self.manifest.is_chunk_filepath(chunk_filename)
|
|
277
|
+
if chunk_filename not in self.chunks:
|
|
278
|
+
chunk_filepath = str(self.manifest.chunks_path / chunk_filename)
|
|
279
|
+
self.chunks[chunk_filename] = PickleStore.load_object(chunk_filepath)
|
|
280
|
+
return self.chunks[chunk_filename]
|
|
281
|
+
|
|
282
|
+
def save_chunk(self, chunk_filename: str) -> None:
|
|
283
|
+
with self.lock:
|
|
284
|
+
chunk = self.chunks[chunk_filename]
|
|
285
|
+
chunk_filepath = self.manifest.chunks_path / chunk_filename
|
|
286
|
+
PickleStore.save_object(chunk, str(chunk_filepath))
|
|
287
|
+
self.manifest.save()
|
|
288
|
+
|
|
289
|
+
def resize_data_chunks(self, chunk_size_in_bytes: int) -> None:
|
|
290
|
+
all_data = self.data()
|
|
291
|
+
manifest_filepath = str(self.manifest.filepath)
|
|
292
|
+
chunks_path = str(self.manifest.chunks_path)
|
|
293
|
+
self.erase_everything()
|
|
294
|
+
self.manifest.chunk_size_in_bytes = chunk_size_in_bytes
|
|
295
|
+
new = ChunkedDictionary.from_dict(all_data, chunks_path, chunk_size_in_bytes)
|
|
296
|
+
self.manifest = new.manifest
|
|
297
|
+
self.chunks = new.chunks
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Factory: map a file extension to the appropriate storage backend class."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Dict, Type
|
|
5
|
+
|
|
6
|
+
from PyperCache.storage.base import StorageMechanism
|
|
7
|
+
from PyperCache.storage.backends import ChunkedStorage, JSONStorage, PickleStorage
|
|
8
|
+
from PyperCache.storage.sqlite_storage import SQLiteStorage
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Maps file extensions to their corresponding storage backend class.
|
|
12
|
+
_EXTENSION_TO_STORAGE: Dict[str, Type[StorageMechanism]] = {
|
|
13
|
+
".manifest": ChunkedStorage,
|
|
14
|
+
".json": JSONStorage,
|
|
15
|
+
".pkl": PickleStorage,
|
|
16
|
+
".db": SQLiteStorage, # SQLite — zero-cost, stdlib, no server required
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_storage_mechanism(filepath: str) -> Type[StorageMechanism]:
|
|
21
|
+
"""Return the :class:`StorageMechanism` subclass appropriate for *filepath*.
|
|
22
|
+
|
|
23
|
+
The backend is selected solely from the file extension.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
filepath: Path to the cache store file.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
The matching :class:`StorageMechanism` subclass (*not* an instance).
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
ValueError: If no backend supports the given file extension.
|
|
33
|
+
"""
|
|
34
|
+
extension = Path(filepath).suffix.lower()
|
|
35
|
+
mechanism = _EXTENSION_TO_STORAGE.get(extension)
|
|
36
|
+
if mechanism is None:
|
|
37
|
+
raise ValueError(
|
|
38
|
+
f"No storage mechanism found for file extension: {extension!r}"
|
|
39
|
+
)
|
|
40
|
+
return mechanism
|