kvgit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kvgit/__init__.py +23 -0
- kvgit/content_types.py +24 -0
- kvgit/errors.py +28 -0
- kvgit/gc.py +346 -0
- kvgit/kv/__init__.py +8 -0
- kvgit/kv/base.py +62 -0
- kvgit/kv/composite.py +130 -0
- kvgit/kv/disk.py +70 -0
- kvgit/kv/memory.py +77 -0
- kvgit/live.py +45 -0
- kvgit/namespaced.py +94 -0
- kvgit/staged.py +277 -0
- kvgit/store.py +66 -0
- kvgit/versioned.py +853 -0
- kvgit-0.1.0.dist-info/METADATA +111 -0
- kvgit-0.1.0.dist-info/RECORD +19 -0
- kvgit-0.1.0.dist-info/WHEEL +5 -0
- kvgit-0.1.0.dist-info/licenses/LICENSE +21 -0
- kvgit-0.1.0.dist-info/top_level.txt +1 -0
kvgit/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""kvgit: Versioned key-value store."""
|
|
2
|
+
|
|
3
|
+
from .content_types import MergeFn, counter, last_writer_wins
|
|
4
|
+
from .errors import ConcurrencyError, MergeConflict
|
|
5
|
+
from .live import Live
|
|
6
|
+
from .namespaced import Namespaced
|
|
7
|
+
from .staged import Staged
|
|
8
|
+
from .store import store
|
|
9
|
+
from .versioned import MergeResult, Versioned
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ConcurrencyError",
|
|
13
|
+
"Live",
|
|
14
|
+
"MergeConflict",
|
|
15
|
+
"MergeFn",
|
|
16
|
+
"MergeResult",
|
|
17
|
+
"Namespaced",
|
|
18
|
+
"Staged",
|
|
19
|
+
"Versioned",
|
|
20
|
+
"counter",
|
|
21
|
+
"last_writer_wins",
|
|
22
|
+
"store",
|
|
23
|
+
]
|
kvgit/content_types.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Merge functions for typed values."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Callable
|
|
4
|
+
|
|
5
|
+
MergeFn = Callable[[Any | None, Any, Any], Any]
|
|
6
|
+
"""Merge function: (old_value | None, our_value, their_value) -> merged_value.
|
|
7
|
+
|
|
8
|
+
Any argument can be None (key absent or removed on that side).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def counter() -> MergeFn:
|
|
13
|
+
"""Counter merge: ours + theirs - old."""
|
|
14
|
+
|
|
15
|
+
def merge(old: int | None, ours: int, theirs: int) -> int:
|
|
16
|
+
base = old if old is not None else 0
|
|
17
|
+
return ours + theirs - base
|
|
18
|
+
|
|
19
|
+
return merge
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def last_writer_wins() -> MergeFn:
|
|
23
|
+
"""Last-writer-wins: always returns theirs."""
|
|
24
|
+
return lambda old, ours, theirs: theirs
|
kvgit/errors.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""kvgit error types."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ConcurrencyError(Exception):
|
|
5
|
+
"""Raised when a concurrent write conflict occurs during merge.
|
|
6
|
+
|
|
7
|
+
Another process updated HEAD between when this branch started
|
|
8
|
+
and when merge was attempted via CAS. The caller should reset
|
|
9
|
+
and retry.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MergeConflict(Exception):
|
|
14
|
+
"""Raised when a three-way merge encounters unresolvable conflicts.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
conflicting_keys: The set of keys that could not be auto-merged.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
conflicting_keys: set[str],
|
|
23
|
+
merge_errors: dict[str, Exception] | None = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
self.conflicting_keys = conflicting_keys
|
|
26
|
+
self.merge_errors = merge_errors or {}
|
|
27
|
+
keys_str = ", ".join(sorted(conflicting_keys))
|
|
28
|
+
super().__init__(f"Merge conflict on keys: {keys_str}")
|
kvgit/gc.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""GCVersioned: Versioned state with automatic garbage collection."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Callable
|
|
7
|
+
|
|
8
|
+
from .errors import ConcurrencyError
|
|
9
|
+
from .kv.base import KVStore
|
|
10
|
+
from .versioned import (
|
|
11
|
+
BRANCH_HEAD,
|
|
12
|
+
COMMIT_KEYSET,
|
|
13
|
+
INFO_KEY,
|
|
14
|
+
META_KEY,
|
|
15
|
+
PARENT_COMMIT,
|
|
16
|
+
TOTAL_VAR_SIZE_KEY,
|
|
17
|
+
MergeResult,
|
|
18
|
+
MetaEntry,
|
|
19
|
+
Versioned,
|
|
20
|
+
_from_bytes,
|
|
21
|
+
_meta_from_bytes,
|
|
22
|
+
_meta_to_bytes,
|
|
23
|
+
_to_bytes,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _is_system_key(key: str) -> bool:
|
|
28
|
+
"""Check if a key is a system/protected key (starts with ``__``).
|
|
29
|
+
|
|
30
|
+
Handles both direct keys (``"__foo__"``) and namespaced keys
|
|
31
|
+
(``"ns/__foo__"``) by extracting the base key name.
|
|
32
|
+
|
|
33
|
+
This is the default ``is_protected`` policy for ``GCVersioned``.
|
|
34
|
+
"""
|
|
35
|
+
base_key = key.split("/")[-1] if "/" in key else key
|
|
36
|
+
return base_key.startswith("__")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class RebaseResult:
|
|
41
|
+
"""Result of a rebase/GC operation."""
|
|
42
|
+
|
|
43
|
+
performed: bool
|
|
44
|
+
new_commit: str | None
|
|
45
|
+
dropped_keys: tuple[str, ...]
|
|
46
|
+
kept_keys: tuple[str, ...]
|
|
47
|
+
total_size_before: int
|
|
48
|
+
total_size_after: int
|
|
49
|
+
orphans_cleaned: int = 0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class GCVersioned(Versioned):
|
|
53
|
+
"""Versioned state with built-in garbage collection via rebase.
|
|
54
|
+
|
|
55
|
+
Rebase strategy (high/low water):
|
|
56
|
+
- Track total persisted user-var size from commit metadata.
|
|
57
|
+
- If total <= high_water_bytes: no-op.
|
|
58
|
+
- If total > high_water_bytes: drop coldest user keys (oldest touch,
|
|
59
|
+
then largest) until total <= low_water_bytes (default 80% of high).
|
|
60
|
+
- Protected keys (as determined by ``is_protected``) are always retained.
|
|
61
|
+
- Write a fresh root commit with only retained keys, then delete
|
|
62
|
+
dropped blobs and orphaned commits.
|
|
63
|
+
|
|
64
|
+
Every ``commit()`` auto-runs the high/low check.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
store: KVStore | None = None,
|
|
70
|
+
*,
|
|
71
|
+
commit_hash: str | None = None,
|
|
72
|
+
branch: str = "main",
|
|
73
|
+
high_water_bytes: int,
|
|
74
|
+
low_water_bytes: int | None = None,
|
|
75
|
+
is_protected: Callable[[str], bool] = _is_system_key,
|
|
76
|
+
) -> None:
|
|
77
|
+
super().__init__(store, commit_hash=commit_hash, branch=branch)
|
|
78
|
+
if high_water_bytes <= 0:
|
|
79
|
+
raise ValueError("high_water_bytes must be > 0")
|
|
80
|
+
self.high_water = high_water_bytes
|
|
81
|
+
self.low_water = (
|
|
82
|
+
low_water_bytes
|
|
83
|
+
if low_water_bytes is not None
|
|
84
|
+
else int(high_water_bytes * 0.8)
|
|
85
|
+
)
|
|
86
|
+
if self.low_water <= 0 or self.low_water > self.high_water:
|
|
87
|
+
self.low_water = int(high_water_bytes * 0.8)
|
|
88
|
+
self._is_protected = is_protected
|
|
89
|
+
self.last_rebase_result: RebaseResult | None = None
|
|
90
|
+
|
|
91
|
+
def commit(
|
|
92
|
+
self,
|
|
93
|
+
updates: dict[str, bytes] | None = None,
|
|
94
|
+
removals: set[str] | None = None,
|
|
95
|
+
*,
|
|
96
|
+
on_conflict: str = "raise",
|
|
97
|
+
merge_fns=None,
|
|
98
|
+
default_merge=None,
|
|
99
|
+
info: dict | None = None,
|
|
100
|
+
) -> "MergeResult":
|
|
101
|
+
"""Commit changes, then run GC if above high water mark."""
|
|
102
|
+
|
|
103
|
+
result = super().commit(
|
|
104
|
+
updates,
|
|
105
|
+
removals,
|
|
106
|
+
on_conflict=on_conflict,
|
|
107
|
+
merge_fns=merge_fns,
|
|
108
|
+
default_merge=default_merge,
|
|
109
|
+
info=info,
|
|
110
|
+
)
|
|
111
|
+
if result.merged:
|
|
112
|
+
rebase_result = self.maybe_rebase()
|
|
113
|
+
self.last_rebase_result = rebase_result
|
|
114
|
+
return result
|
|
115
|
+
|
|
116
|
+
def maybe_rebase(self) -> RebaseResult:
|
|
117
|
+
"""Run rebase only if total size exceeds high water mark."""
|
|
118
|
+
total = self._load_total_size()
|
|
119
|
+
if total <= self.high_water:
|
|
120
|
+
return RebaseResult(
|
|
121
|
+
performed=False,
|
|
122
|
+
new_commit=None,
|
|
123
|
+
dropped_keys=(),
|
|
124
|
+
kept_keys=tuple(self._commit_keys.keys()),
|
|
125
|
+
total_size_before=total,
|
|
126
|
+
total_size_after=total,
|
|
127
|
+
)
|
|
128
|
+
return self.rebase()
|
|
129
|
+
|
|
130
|
+
def rebase(
|
|
131
|
+
self,
|
|
132
|
+
keep_keys: set[str] | None = None,
|
|
133
|
+
*,
|
|
134
|
+
info: dict | None = None,
|
|
135
|
+
) -> RebaseResult:
|
|
136
|
+
"""Rebase: create a fresh root commit, dropping cold keys.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
keep_keys: If provided, retain exactly these keys (plus protected
|
|
140
|
+
keys). Otherwise, use the high/low water strategy.
|
|
141
|
+
info: Optional metadata for the rebase commit.
|
|
142
|
+
"""
|
|
143
|
+
meta = self._meta
|
|
144
|
+
total_before = self._load_total_size(
|
|
145
|
+
default=sum(e.size or 0 for e in meta.values())
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Identify protected and user keys
|
|
149
|
+
protected_keys = {
|
|
150
|
+
k: v for k, v in self._commit_keys.items() if self._is_protected(k)
|
|
151
|
+
}
|
|
152
|
+
user_meta = {k: v for k, v in meta.items() if not self._is_protected(k)}
|
|
153
|
+
|
|
154
|
+
retained_keys = set(protected_keys.keys()) | set(user_meta.keys())
|
|
155
|
+
total = sum(e.size or 0 for e in user_meta.values())
|
|
156
|
+
dropped: list[str] = []
|
|
157
|
+
|
|
158
|
+
if keep_keys is not None:
|
|
159
|
+
# Explicit keep set — drop everything not in it (except protected keys)
|
|
160
|
+
for key in list(retained_keys):
|
|
161
|
+
if self._is_protected(key):
|
|
162
|
+
continue
|
|
163
|
+
if key not in keep_keys:
|
|
164
|
+
retained_keys.discard(key)
|
|
165
|
+
dropped.append(key)
|
|
166
|
+
total -= (user_meta.get(key) and user_meta[key].size) or 0
|
|
167
|
+
else:
|
|
168
|
+
# High/low water strategy: drop coldest until under low water
|
|
169
|
+
candidates: list[tuple[str, MetaEntry]] = sorted(
|
|
170
|
+
user_meta.items(),
|
|
171
|
+
key=lambda kv: (kv[1].last_touch, -(kv[1].size or 0)),
|
|
172
|
+
)
|
|
173
|
+
for key, entry in candidates:
|
|
174
|
+
if total <= self.low_water:
|
|
175
|
+
break
|
|
176
|
+
retained_keys.discard(key)
|
|
177
|
+
dropped.append(key)
|
|
178
|
+
total -= entry.size or 0
|
|
179
|
+
|
|
180
|
+
# Build new commit with retained keys
|
|
181
|
+
from .versioned import _content_hash
|
|
182
|
+
|
|
183
|
+
# Collect retained data
|
|
184
|
+
new_commit_keys: dict[str, str] = {}
|
|
185
|
+
new_meta: dict[str, MetaEntry] = {}
|
|
186
|
+
retained_data: dict[str, bytes] = {}
|
|
187
|
+
|
|
188
|
+
for key in retained_keys:
|
|
189
|
+
versioned_key = self._commit_keys.get(key)
|
|
190
|
+
if not versioned_key:
|
|
191
|
+
continue
|
|
192
|
+
value = self.store.get(versioned_key)
|
|
193
|
+
if value is None:
|
|
194
|
+
continue
|
|
195
|
+
if not self._is_protected(key):
|
|
196
|
+
retained_data[key] = value
|
|
197
|
+
if key in meta:
|
|
198
|
+
new_meta[key] = meta[key]
|
|
199
|
+
|
|
200
|
+
# Content-addressable hash for the rebase commit (parent=None, fresh root)
|
|
201
|
+
preview_keys: dict[str, str] = {}
|
|
202
|
+
for key in protected_keys:
|
|
203
|
+
preview_keys[key] = protected_keys[key]
|
|
204
|
+
for key in retained_data:
|
|
205
|
+
preview_keys[key] = f"<pending:{key}>"
|
|
206
|
+
new_hash = _content_hash((), preview_keys, retained_data, info=info)
|
|
207
|
+
|
|
208
|
+
# Build the write batch
|
|
209
|
+
diffs: dict[str, bytes] = {}
|
|
210
|
+
|
|
211
|
+
# Protected keys — copy blobs with new versioned keys
|
|
212
|
+
for key, old_vk in protected_keys.items():
|
|
213
|
+
value = self.store.get(old_vk)
|
|
214
|
+
if value is None:
|
|
215
|
+
continue
|
|
216
|
+
new_vk = f"{new_hash}:{key}"
|
|
217
|
+
new_commit_keys[key] = new_vk
|
|
218
|
+
diffs[new_vk] = value
|
|
219
|
+
|
|
220
|
+
# Retained user keys
|
|
221
|
+
for key, value in retained_data.items():
|
|
222
|
+
new_vk = f"{new_hash}:{key}"
|
|
223
|
+
new_commit_keys[key] = new_vk
|
|
224
|
+
diffs[new_vk] = value
|
|
225
|
+
|
|
226
|
+
# Commit metadata
|
|
227
|
+
diffs[COMMIT_KEYSET % new_hash] = _to_bytes(new_commit_keys)
|
|
228
|
+
diffs[PARENT_COMMIT % new_hash] = _to_bytes([])
|
|
229
|
+
diffs[META_KEY % new_hash] = _meta_to_bytes(new_meta)
|
|
230
|
+
total_after = sum(e.size or 0 for e in new_meta.values())
|
|
231
|
+
diffs[TOTAL_VAR_SIZE_KEY % new_hash] = _to_bytes(total_after)
|
|
232
|
+
if info is not None:
|
|
233
|
+
diffs[INFO_KEY % new_hash] = _to_bytes(info)
|
|
234
|
+
|
|
235
|
+
self.store.set_many(**diffs)
|
|
236
|
+
|
|
237
|
+
# CAS HEAD to the new rebase commit
|
|
238
|
+
branch_key = BRANCH_HEAD % self._branch
|
|
239
|
+
expected = _to_bytes(self._base_commit)
|
|
240
|
+
if not self.store.cas(branch_key, _to_bytes(new_hash), expected=expected):
|
|
241
|
+
raise ConcurrencyError("HEAD changed during rebase.")
|
|
242
|
+
|
|
243
|
+
# Delete dropped blobs
|
|
244
|
+
to_delete = []
|
|
245
|
+
for key in dropped:
|
|
246
|
+
vk = self._commit_keys.get(key)
|
|
247
|
+
if vk:
|
|
248
|
+
to_delete.append(vk)
|
|
249
|
+
if to_delete:
|
|
250
|
+
self.store.remove_many(*to_delete)
|
|
251
|
+
|
|
252
|
+
# Update in-memory state
|
|
253
|
+
self._commit_keys = new_commit_keys
|
|
254
|
+
self._current_commit = new_hash
|
|
255
|
+
self._base_commit = new_hash
|
|
256
|
+
self._meta = new_meta
|
|
257
|
+
|
|
258
|
+
# Clean orphaned commits
|
|
259
|
+
orphans_cleaned = self.clean_orphans()
|
|
260
|
+
|
|
261
|
+
return RebaseResult(
|
|
262
|
+
performed=True,
|
|
263
|
+
new_commit=new_hash,
|
|
264
|
+
dropped_keys=tuple(dropped),
|
|
265
|
+
kept_keys=tuple(retained_keys),
|
|
266
|
+
total_size_before=total_before,
|
|
267
|
+
total_size_after=total_after,
|
|
268
|
+
orphans_cleaned=orphans_cleaned,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def clean_orphans(self, min_age: float = 3600) -> int:
|
|
272
|
+
"""Remove orphaned commits unreachable from HEAD.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
min_age: Only delete orphans older than this many seconds
|
|
276
|
+
(default 1 hour).
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Number of orphaned commits cleaned.
|
|
280
|
+
"""
|
|
281
|
+
# Mark phase: find all reachable commits across ALL branches
|
|
282
|
+
reachable: set[str] = set()
|
|
283
|
+
prefix = BRANCH_HEAD.replace("%s", "")
|
|
284
|
+
for key in self.store.keys():
|
|
285
|
+
if isinstance(key, str) and key.startswith(prefix):
|
|
286
|
+
head_bytes = self.store.get(key)
|
|
287
|
+
if head_bytes is None:
|
|
288
|
+
continue
|
|
289
|
+
branch_head = _from_bytes(head_bytes)
|
|
290
|
+
for commit in self.history(commit_hash=branch_head, all_parents=True):
|
|
291
|
+
reachable.add(commit)
|
|
292
|
+
|
|
293
|
+
# Sweep phase: find orphaned commits by scanning for meta keys
|
|
294
|
+
meta_prefix = META_KEY.replace("%s", "")
|
|
295
|
+
cutoff_time = time.time() - min_age
|
|
296
|
+
orphans: list[str] = []
|
|
297
|
+
|
|
298
|
+
for key in self.store.keys():
|
|
299
|
+
if not isinstance(key, str) or not key.startswith(meta_prefix):
|
|
300
|
+
continue
|
|
301
|
+
commit_hash = key[len(meta_prefix) :]
|
|
302
|
+
if not commit_hash or commit_hash in reachable:
|
|
303
|
+
continue
|
|
304
|
+
# Check age
|
|
305
|
+
meta_bytes = self.store.get(key)
|
|
306
|
+
if meta_bytes is None:
|
|
307
|
+
continue
|
|
308
|
+
try:
|
|
309
|
+
meta = _meta_from_bytes(meta_bytes)
|
|
310
|
+
if meta:
|
|
311
|
+
first_entry = next(iter(meta.values()), None)
|
|
312
|
+
if first_entry and first_entry.created_at < cutoff_time:
|
|
313
|
+
orphans.append(commit_hash)
|
|
314
|
+
except (json.JSONDecodeError, TypeError, KeyError):
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
# Delete orphaned commits and their data
|
|
318
|
+
for orphan_hash in orphans:
|
|
319
|
+
keyset_bytes = self.store.get(COMMIT_KEYSET % orphan_hash)
|
|
320
|
+
if keyset_bytes:
|
|
321
|
+
try:
|
|
322
|
+
keyset = _from_bytes(keyset_bytes)
|
|
323
|
+
blob_keys = list(keyset.values())
|
|
324
|
+
if blob_keys:
|
|
325
|
+
self.store.remove_many(*blob_keys)
|
|
326
|
+
except Exception:
|
|
327
|
+
pass
|
|
328
|
+
self.store.remove_many(
|
|
329
|
+
META_KEY % orphan_hash,
|
|
330
|
+
COMMIT_KEYSET % orphan_hash,
|
|
331
|
+
PARENT_COMMIT % orphan_hash,
|
|
332
|
+
TOTAL_VAR_SIZE_KEY % orphan_hash,
|
|
333
|
+
INFO_KEY % orphan_hash,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return len(orphans)
|
|
337
|
+
|
|
338
|
+
def _load_total_size(self, default: int = 0) -> int:
|
|
339
|
+
"""Load the total variable size for the current commit."""
|
|
340
|
+
total_bytes = self.store.get(TOTAL_VAR_SIZE_KEY % self._current_commit)
|
|
341
|
+
if total_bytes is None:
|
|
342
|
+
return default
|
|
343
|
+
try:
|
|
344
|
+
return _from_bytes(total_bytes)
|
|
345
|
+
except Exception:
|
|
346
|
+
return default
|
kvgit/kv/__init__.py
ADDED
kvgit/kv/base.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Abstract KV store interface."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Iterable, Mapping
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class KVStore(ABC):
|
|
8
|
+
"""Key-value store operating on bytes only.
|
|
9
|
+
|
|
10
|
+
All values are stored and retrieved as bytes. Serialization is
|
|
11
|
+
handled at higher layers (e.g., Versioned).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def get(self, key: str) -> bytes | None:
|
|
16
|
+
"""Get bytes value for key, or None if not found."""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def set(self, key: str, value: bytes) -> None:
|
|
20
|
+
"""Set bytes value for key."""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def get_many(self, *args: str) -> Mapping[str, bytes]:
|
|
24
|
+
"""Get multiple keys, returning only keys that exist."""
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def set_many(self, **kwargs: bytes) -> None:
|
|
28
|
+
"""Set multiple key-value pairs."""
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def items(self) -> Iterable[tuple[str, bytes]]:
|
|
32
|
+
"""Iterate over all key-value pairs."""
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def keys(self) -> Iterable[str]:
|
|
36
|
+
"""Iterate over all keys."""
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def __contains__(self, key: str) -> bool:
|
|
40
|
+
"""Check if key exists in store."""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def remove(self, key: str) -> None:
|
|
44
|
+
"""Remove a key if present."""
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def remove_many(self, *keys: str) -> None:
|
|
48
|
+
"""Remove multiple keys."""
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def cas(self, key: str, value: bytes, expected: bytes | None) -> bool:
|
|
52
|
+
"""Atomic compare-and-swap.
|
|
53
|
+
|
|
54
|
+
Set value only if current value equals expected.
|
|
55
|
+
None means "key must not exist".
|
|
56
|
+
|
|
57
|
+
Returns True if swap succeeded, False otherwise.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def clear(self) -> None:
|
|
62
|
+
"""Remove all items from the store."""
|
kvgit/kv/composite.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""N-tier composite cache over multiple KV stores."""
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Mapping
|
|
4
|
+
|
|
5
|
+
from .base import KVStore
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Composite(KVStore):
|
|
9
|
+
"""N-tier cache composing any number of KV stores.
|
|
10
|
+
|
|
11
|
+
On get: check L1, L2, ..., Ln in order. On hit at tier i,
|
|
12
|
+
populate L1..L(i-1) and return.
|
|
13
|
+
|
|
14
|
+
On set: write to all tiers (most durable first).
|
|
15
|
+
|
|
16
|
+
On cas: delegate to Ln (authoritative), update caches on success.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
stores: List of KV stores ordered fastest -> most durable.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, stores: list[KVStore]) -> None:
|
|
23
|
+
if not stores:
|
|
24
|
+
raise ValueError("Composite requires at least one store")
|
|
25
|
+
self._stores = stores
|
|
26
|
+
|
|
27
|
+
def get(self, key: str) -> bytes | None:
|
|
28
|
+
for i, store in enumerate(self._stores):
|
|
29
|
+
try:
|
|
30
|
+
value = store.get(key)
|
|
31
|
+
if value is not None:
|
|
32
|
+
if i > 0:
|
|
33
|
+
for j in range(i):
|
|
34
|
+
try:
|
|
35
|
+
self._stores[j].set(key, value)
|
|
36
|
+
except Exception:
|
|
37
|
+
pass # best-effort cache population
|
|
38
|
+
return value
|
|
39
|
+
except Exception:
|
|
40
|
+
continue # tier unavailable, try next
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
def get_many(self, *args: str) -> Mapping[str, bytes]:
|
|
44
|
+
result: dict[str, bytes] = {}
|
|
45
|
+
remaining = set(args)
|
|
46
|
+
for i, store in enumerate(self._stores):
|
|
47
|
+
if not remaining:
|
|
48
|
+
break
|
|
49
|
+
try:
|
|
50
|
+
tier_values: dict[str, bytes] = {}
|
|
51
|
+
for key in remaining:
|
|
52
|
+
value = store.get(key)
|
|
53
|
+
if value is not None:
|
|
54
|
+
tier_values[key] = value
|
|
55
|
+
if tier_values and i > 0:
|
|
56
|
+
for j in range(i):
|
|
57
|
+
try:
|
|
58
|
+
self._stores[j].set_many(**tier_values)
|
|
59
|
+
except Exception:
|
|
60
|
+
pass
|
|
61
|
+
result.update(tier_values)
|
|
62
|
+
remaining -= tier_values.keys()
|
|
63
|
+
except Exception:
|
|
64
|
+
continue
|
|
65
|
+
return result
|
|
66
|
+
|
|
67
|
+
def __contains__(self, key: str) -> bool:
|
|
68
|
+
for store in self._stores:
|
|
69
|
+
try:
|
|
70
|
+
if key in store:
|
|
71
|
+
return True
|
|
72
|
+
except Exception:
|
|
73
|
+
continue
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
def keys(self) -> Iterable[str]:
|
|
77
|
+
return self._stores[-1].keys()
|
|
78
|
+
|
|
79
|
+
def items(self) -> Iterable[tuple[str, bytes]]:
|
|
80
|
+
return self._stores[-1].items()
|
|
81
|
+
|
|
82
|
+
def set(self, key: str, value: bytes) -> None:
|
|
83
|
+
self._stores[-1].set(key, value)
|
|
84
|
+
for store in self._stores[:-1]:
|
|
85
|
+
try:
|
|
86
|
+
store.set(key, value)
|
|
87
|
+
except Exception:
|
|
88
|
+
pass # best-effort cache population
|
|
89
|
+
|
|
90
|
+
def set_many(self, **kwargs: bytes) -> None:
|
|
91
|
+
self._stores[-1].set_many(**kwargs)
|
|
92
|
+
for store in self._stores[:-1]:
|
|
93
|
+
try:
|
|
94
|
+
store.set_many(**kwargs)
|
|
95
|
+
except Exception:
|
|
96
|
+
pass # best-effort cache population
|
|
97
|
+
|
|
98
|
+
def remove(self, key: str) -> None:
|
|
99
|
+
self._stores[-1].remove(key)
|
|
100
|
+
for store in self._stores[:-1]:
|
|
101
|
+
try:
|
|
102
|
+
store.remove(key)
|
|
103
|
+
except Exception:
|
|
104
|
+
pass # best-effort cache population
|
|
105
|
+
|
|
106
|
+
def remove_many(self, *keys: str) -> None:
|
|
107
|
+
self._stores[-1].remove_many(*keys)
|
|
108
|
+
for store in self._stores[:-1]:
|
|
109
|
+
try:
|
|
110
|
+
store.remove_many(*keys)
|
|
111
|
+
except Exception:
|
|
112
|
+
pass # best-effort cache population
|
|
113
|
+
|
|
114
|
+
def clear(self) -> None:
|
|
115
|
+
self._stores[-1].clear()
|
|
116
|
+
for store in self._stores[:-1]:
|
|
117
|
+
try:
|
|
118
|
+
store.clear()
|
|
119
|
+
except Exception:
|
|
120
|
+
pass # best-effort cache population
|
|
121
|
+
|
|
122
|
+
def cas(self, key: str, value: bytes, expected: bytes | None) -> bool:
|
|
123
|
+
success = self._stores[-1].cas(key, value, expected)
|
|
124
|
+
if success:
|
|
125
|
+
for store in self._stores[:-1]:
|
|
126
|
+
try:
|
|
127
|
+
store.set(key, value)
|
|
128
|
+
except Exception:
|
|
129
|
+
pass
|
|
130
|
+
return success
|
kvgit/kv/disk.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Disk-backed KV store using diskcache."""
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Mapping, cast
|
|
4
|
+
|
|
5
|
+
from .base import KVStore
|
|
6
|
+
|
|
7
|
+
ONE_GB = 1024 * 1024 * 1024
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Disk(KVStore):
|
|
11
|
+
"""KV store backed by diskcache (SQLite + mmap)."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, directory: str, size_limit: int = ONE_GB) -> None:
|
|
14
|
+
from diskcache import Cache as DiskCache
|
|
15
|
+
|
|
16
|
+
self.store = DiskCache(directory, size_limit=size_limit)
|
|
17
|
+
|
|
18
|
+
def get(self, key: str) -> bytes | None:
|
|
19
|
+
return cast(bytes | None, self.store.get(key))
|
|
20
|
+
|
|
21
|
+
def set(self, key: str, value: bytes) -> None:
|
|
22
|
+
if not isinstance(value, bytes):
|
|
23
|
+
raise TypeError(f"Expected bytes, got {type(value).__name__}")
|
|
24
|
+
self.store[key] = value
|
|
25
|
+
|
|
26
|
+
def get_many(self, *args: str) -> Mapping[str, bytes]:
|
|
27
|
+
return {k: v for k in args if (v := self.get(k)) is not None}
|
|
28
|
+
|
|
29
|
+
def set_many(self, **kwargs: bytes) -> None:
|
|
30
|
+
for key, value in kwargs.items():
|
|
31
|
+
if not isinstance(value, bytes):
|
|
32
|
+
raise TypeError(f"Expected bytes for {key}, got {type(value).__name__}")
|
|
33
|
+
with self.store.transact():
|
|
34
|
+
for key, value in kwargs.items():
|
|
35
|
+
self.set(key, value)
|
|
36
|
+
|
|
37
|
+
def items(self) -> Iterable[tuple[str, bytes]]:
|
|
38
|
+
for key in self.store.iterkeys():
|
|
39
|
+
yield str(key), cast(bytes, self.store[key])
|
|
40
|
+
|
|
41
|
+
def keys(self) -> Iterable[str]:
|
|
42
|
+
for key in self.store.iterkeys():
|
|
43
|
+
yield str(key)
|
|
44
|
+
|
|
45
|
+
def __contains__(self, key: str) -> bool:
|
|
46
|
+
return key in self.store
|
|
47
|
+
|
|
48
|
+
def remove(self, key: str) -> None:
|
|
49
|
+
try:
|
|
50
|
+
del self.store[key]
|
|
51
|
+
except KeyError:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
def remove_many(self, *keys: str) -> None:
|
|
55
|
+
with self.store.transact():
|
|
56
|
+
for key in keys:
|
|
57
|
+
self.store.delete(key, retry=False)
|
|
58
|
+
|
|
59
|
+
def cas(self, key: str, value: bytes, expected: bytes | None) -> bool:
|
|
60
|
+
if not isinstance(value, bytes):
|
|
61
|
+
raise TypeError(f"Expected bytes, got {type(value).__name__}")
|
|
62
|
+
with self.store.transact():
|
|
63
|
+
current = cast(bytes | None, self.store.get(key))
|
|
64
|
+
if current == expected:
|
|
65
|
+
self.store[key] = value
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
def clear(self) -> None:
|
|
70
|
+
self.store.clear()
|