kvgit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kvgit-0.1.0/LICENSE +21 -0
- kvgit-0.1.0/PKG-INFO +111 -0
- kvgit-0.1.0/README.md +73 -0
- kvgit-0.1.0/kvgit/__init__.py +23 -0
- kvgit-0.1.0/kvgit/content_types.py +24 -0
- kvgit-0.1.0/kvgit/errors.py +28 -0
- kvgit-0.1.0/kvgit/gc.py +346 -0
- kvgit-0.1.0/kvgit/kv/__init__.py +8 -0
- kvgit-0.1.0/kvgit/kv/base.py +62 -0
- kvgit-0.1.0/kvgit/kv/composite.py +130 -0
- kvgit-0.1.0/kvgit/kv/disk.py +70 -0
- kvgit-0.1.0/kvgit/kv/memory.py +77 -0
- kvgit-0.1.0/kvgit/live.py +45 -0
- kvgit-0.1.0/kvgit/namespaced.py +94 -0
- kvgit-0.1.0/kvgit/staged.py +277 -0
- kvgit-0.1.0/kvgit/store.py +66 -0
- kvgit-0.1.0/kvgit/versioned.py +853 -0
- kvgit-0.1.0/kvgit.egg-info/PKG-INFO +111 -0
- kvgit-0.1.0/kvgit.egg-info/SOURCES.txt +32 -0
- kvgit-0.1.0/kvgit.egg-info/dependency_links.txt +1 -0
- kvgit-0.1.0/kvgit.egg-info/requires.txt +12 -0
- kvgit-0.1.0/kvgit.egg-info/top_level.txt +1 -0
- kvgit-0.1.0/pyproject.toml +43 -0
- kvgit-0.1.0/setup.cfg +4 -0
- kvgit-0.1.0/tests/test_content_types.py +88 -0
- kvgit-0.1.0/tests/test_gc.py +154 -0
- kvgit-0.1.0/tests/test_kv_composite.py +105 -0
- kvgit-0.1.0/tests/test_kv_disk.py +96 -0
- kvgit-0.1.0/tests/test_kv_memory.py +115 -0
- kvgit-0.1.0/tests/test_live.py +109 -0
- kvgit-0.1.0/tests/test_namespaced.py +237 -0
- kvgit-0.1.0/tests/test_staged.py +425 -0
- kvgit-0.1.0/tests/test_store_factory.py +53 -0
- kvgit-0.1.0/tests/test_versioned.py +812 -0
kvgit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Adam Ashenfelter
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
kvgit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kvgit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Versioned key-value store with git-like commit, branch, and merge semantics.
|
|
5
|
+
Author: ashenfad
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ashenfad/kvgit
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/ashenfad/kvgit/issues
|
|
9
|
+
Project-URL: Documentation, https://github.com/ashenfad/kvgit#readme
|
|
10
|
+
Project-URL: Source, https://github.com/ashenfad/kvgit
|
|
11
|
+
Keywords: versioning,git,key-value,branching,merge
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Database
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
22
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
23
|
+
Classifier: Operating System :: OS Independent
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Provides-Extra: disk
|
|
29
|
+
Requires-Dist: diskcache; extra == "disk"
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: diskcache; extra == "all"
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-timeout; extra == "dev"
|
|
35
|
+
Requires-Dist: diskcache; extra == "dev"
|
|
36
|
+
Requires-Dist: ruff; extra == "dev"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# kvgit 🔀
|
|
40
|
+
|
|
41
|
+
Versioned key-value store with git-like commit, branch, and merge semantics.
|
|
42
|
+
|
|
43
|
+
Values are `Any` (serialized via pickle by default). Commits are content-addressable. Branches are cheap. Merges are three-way with pluggable per-key conflict resolution.
|
|
44
|
+
|
|
45
|
+
## Features
|
|
46
|
+
|
|
47
|
+
- **Commits** -- immutable, content-addressable snapshots
|
|
48
|
+
- **Branches** -- named branch heads with CAS-based concurrency
|
|
49
|
+
- **Three-way merge** -- auto-merges non-overlapping changes, pluggable merge functions for conflicts
|
|
50
|
+
- **Merge functions** -- counters, last-writer-wins, or custom per-key merge logic
|
|
51
|
+
- **Garbage collection** -- high/low water rebase drops cold keys automatically
|
|
52
|
+
- **Namespaces** -- key-prefixed views with full read/write support
|
|
53
|
+
- **Pluggable backends** -- in-memory, disk (via diskcache), or bring your own `KVStore`
|
|
54
|
+
|
|
55
|
+
## Install
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install kvgit # in-memory only
|
|
59
|
+
pip install kvgit[disk] # adds disk backend via diskcache
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Quick example
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import kvgit
|
|
66
|
+
|
|
67
|
+
# Create a store -- values are Any (pickle-serialized by default)
|
|
68
|
+
s = kvgit.store()
|
|
69
|
+
|
|
70
|
+
s["user"] = "alice"
|
|
71
|
+
s["score"] = 0
|
|
72
|
+
s.commit()
|
|
73
|
+
|
|
74
|
+
first = s.current_commit
|
|
75
|
+
|
|
76
|
+
# Update and commit again
|
|
77
|
+
s["score"] = 100
|
|
78
|
+
s.commit()
|
|
79
|
+
print(s["score"]) # 100
|
|
80
|
+
|
|
81
|
+
# Rollback to the first commit
|
|
82
|
+
s.reset_to(first)
|
|
83
|
+
print(s["score"]) # 0
|
|
84
|
+
|
|
85
|
+
# Branching
|
|
86
|
+
s["score"] = 50
|
|
87
|
+
s.commit()
|
|
88
|
+
|
|
89
|
+
dev = s.create_branch("dev")
|
|
90
|
+
dev["score"] = 999
|
|
91
|
+
dev.commit()
|
|
92
|
+
|
|
93
|
+
print(s["score"]) # 50 (main unchanged)
|
|
94
|
+
print(dev["score"]) # 999 (dev branch)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Development
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
uv sync --extra dev
|
|
101
|
+
uv run pytest
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Documentation
|
|
105
|
+
|
|
106
|
+
See [`docs/`](docs/) for detailed API documentation:
|
|
107
|
+
|
|
108
|
+
- [Core API (Versioned)](docs/versioned.md) -- commits, reads, writes, merging, branching, history
|
|
109
|
+
- [Merge Functions](docs/content-types.md) -- per-key merge logic for conflict resolution
|
|
110
|
+
- [Garbage Collection](docs/gc.md) -- GCVersioned, rebase, orphan cleanup
|
|
111
|
+
- [Backends & Namespaces](docs/backends.md) -- KVStore interface, Memory, Disk, Store, Staged, Live, Namespaced
|
kvgit-0.1.0/README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# kvgit 🔀
|
|
2
|
+
|
|
3
|
+
Versioned key-value store with git-like commit, branch, and merge semantics.
|
|
4
|
+
|
|
5
|
+
Values are `Any` (serialized via pickle by default). Commits are content-addressable. Branches are cheap. Merges are three-way with pluggable per-key conflict resolution.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Commits** -- immutable, content-addressable snapshots
|
|
10
|
+
- **Branches** -- named branch heads with CAS-based concurrency
|
|
11
|
+
- **Three-way merge** -- auto-merges non-overlapping changes, pluggable merge functions for conflicts
|
|
12
|
+
- **Merge functions** -- counters, last-writer-wins, or custom per-key merge logic
|
|
13
|
+
- **Garbage collection** -- high/low water rebase drops cold keys automatically
|
|
14
|
+
- **Namespaces** -- key-prefixed views with full read/write support
|
|
15
|
+
- **Pluggable backends** -- in-memory, disk (via diskcache), or bring your own `KVStore`
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install kvgit # in-memory only
|
|
21
|
+
pip install kvgit[disk] # adds disk backend via diskcache
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quick example
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
import kvgit
|
|
28
|
+
|
|
29
|
+
# Create a store -- values are Any (pickle-serialized by default)
|
|
30
|
+
s = kvgit.store()
|
|
31
|
+
|
|
32
|
+
s["user"] = "alice"
|
|
33
|
+
s["score"] = 0
|
|
34
|
+
s.commit()
|
|
35
|
+
|
|
36
|
+
first = s.current_commit
|
|
37
|
+
|
|
38
|
+
# Update and commit again
|
|
39
|
+
s["score"] = 100
|
|
40
|
+
s.commit()
|
|
41
|
+
print(s["score"]) # 100
|
|
42
|
+
|
|
43
|
+
# Rollback to the first commit
|
|
44
|
+
s.reset_to(first)
|
|
45
|
+
print(s["score"]) # 0
|
|
46
|
+
|
|
47
|
+
# Branching
|
|
48
|
+
s["score"] = 50
|
|
49
|
+
s.commit()
|
|
50
|
+
|
|
51
|
+
dev = s.create_branch("dev")
|
|
52
|
+
dev["score"] = 999
|
|
53
|
+
dev.commit()
|
|
54
|
+
|
|
55
|
+
print(s["score"]) # 50 (main unchanged)
|
|
56
|
+
print(dev["score"]) # 999 (dev branch)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Development
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uv sync --extra dev
|
|
63
|
+
uv run pytest
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Documentation
|
|
67
|
+
|
|
68
|
+
See [`docs/`](docs/) for detailed API documentation:
|
|
69
|
+
|
|
70
|
+
- [Core API (Versioned)](docs/versioned.md) -- commits, reads, writes, merging, branching, history
|
|
71
|
+
- [Merge Functions](docs/content-types.md) -- per-key merge logic for conflict resolution
|
|
72
|
+
- [Garbage Collection](docs/gc.md) -- GCVersioned, rebase, orphan cleanup
|
|
73
|
+
- [Backends & Namespaces](docs/backends.md) -- KVStore interface, Memory, Disk, Store, Staged, Live, Namespaced
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""kvgit: Versioned key-value store."""
|
|
2
|
+
|
|
3
|
+
from .content_types import MergeFn, counter, last_writer_wins
|
|
4
|
+
from .errors import ConcurrencyError, MergeConflict
|
|
5
|
+
from .live import Live
|
|
6
|
+
from .namespaced import Namespaced
|
|
7
|
+
from .staged import Staged
|
|
8
|
+
from .store import store
|
|
9
|
+
from .versioned import MergeResult, Versioned
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ConcurrencyError",
|
|
13
|
+
"Live",
|
|
14
|
+
"MergeConflict",
|
|
15
|
+
"MergeFn",
|
|
16
|
+
"MergeResult",
|
|
17
|
+
"Namespaced",
|
|
18
|
+
"Staged",
|
|
19
|
+
"Versioned",
|
|
20
|
+
"counter",
|
|
21
|
+
"last_writer_wins",
|
|
22
|
+
"store",
|
|
23
|
+
]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Merge functions for typed values."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Callable
|
|
4
|
+
|
|
5
|
+
MergeFn = Callable[[Any | None, Any, Any], Any]
|
|
6
|
+
"""Merge function: (old_value | None, our_value, their_value) -> merged_value.
|
|
7
|
+
|
|
8
|
+
Any argument can be None (key absent or removed on that side).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def counter() -> MergeFn:
|
|
13
|
+
"""Counter merge: ours + theirs - old."""
|
|
14
|
+
|
|
15
|
+
def merge(old: int | None, ours: int, theirs: int) -> int:
|
|
16
|
+
base = old if old is not None else 0
|
|
17
|
+
return ours + theirs - base
|
|
18
|
+
|
|
19
|
+
return merge
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def last_writer_wins() -> MergeFn:
|
|
23
|
+
"""Last-writer-wins: always returns theirs."""
|
|
24
|
+
return lambda old, ours, theirs: theirs
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""kvgit error types."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ConcurrencyError(Exception):
|
|
5
|
+
"""Raised when a concurrent write conflict occurs during merge.
|
|
6
|
+
|
|
7
|
+
Another process updated HEAD between when this branch started
|
|
8
|
+
and when merge was attempted via CAS. The caller should reset
|
|
9
|
+
and retry.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MergeConflict(Exception):
|
|
14
|
+
"""Raised when a three-way merge encounters unresolvable conflicts.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
conflicting_keys: The set of keys that could not be auto-merged.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
conflicting_keys: set[str],
|
|
23
|
+
merge_errors: dict[str, Exception] | None = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
self.conflicting_keys = conflicting_keys
|
|
26
|
+
self.merge_errors = merge_errors or {}
|
|
27
|
+
keys_str = ", ".join(sorted(conflicting_keys))
|
|
28
|
+
super().__init__(f"Merge conflict on keys: {keys_str}")
|
kvgit-0.1.0/kvgit/gc.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""GCVersioned: Versioned state with automatic garbage collection."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Callable
|
|
7
|
+
|
|
8
|
+
from .errors import ConcurrencyError
|
|
9
|
+
from .kv.base import KVStore
|
|
10
|
+
from .versioned import (
|
|
11
|
+
BRANCH_HEAD,
|
|
12
|
+
COMMIT_KEYSET,
|
|
13
|
+
INFO_KEY,
|
|
14
|
+
META_KEY,
|
|
15
|
+
PARENT_COMMIT,
|
|
16
|
+
TOTAL_VAR_SIZE_KEY,
|
|
17
|
+
MergeResult,
|
|
18
|
+
MetaEntry,
|
|
19
|
+
Versioned,
|
|
20
|
+
_from_bytes,
|
|
21
|
+
_meta_from_bytes,
|
|
22
|
+
_meta_to_bytes,
|
|
23
|
+
_to_bytes,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _is_system_key(key: str) -> bool:
|
|
28
|
+
"""Check if a key is a system/protected key (starts with ``__``).
|
|
29
|
+
|
|
30
|
+
Handles both direct keys (``"__foo__"``) and namespaced keys
|
|
31
|
+
(``"ns/__foo__"``) by extracting the base key name.
|
|
32
|
+
|
|
33
|
+
This is the default ``is_protected`` policy for ``GCVersioned``.
|
|
34
|
+
"""
|
|
35
|
+
base_key = key.split("/")[-1] if "/" in key else key
|
|
36
|
+
return base_key.startswith("__")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class RebaseResult:
|
|
41
|
+
"""Result of a rebase/GC operation."""
|
|
42
|
+
|
|
43
|
+
performed: bool
|
|
44
|
+
new_commit: str | None
|
|
45
|
+
dropped_keys: tuple[str, ...]
|
|
46
|
+
kept_keys: tuple[str, ...]
|
|
47
|
+
total_size_before: int
|
|
48
|
+
total_size_after: int
|
|
49
|
+
orphans_cleaned: int = 0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class GCVersioned(Versioned):
|
|
53
|
+
"""Versioned state with built-in garbage collection via rebase.
|
|
54
|
+
|
|
55
|
+
Rebase strategy (high/low water):
|
|
56
|
+
- Track total persisted user-var size from commit metadata.
|
|
57
|
+
- If total <= high_water_bytes: no-op.
|
|
58
|
+
- If total > high_water_bytes: drop coldest user keys (oldest touch,
|
|
59
|
+
then largest) until total <= low_water_bytes (default 80% of high).
|
|
60
|
+
- Protected keys (as determined by ``is_protected``) are always retained.
|
|
61
|
+
- Write a fresh root commit with only retained keys, then delete
|
|
62
|
+
dropped blobs and orphaned commits.
|
|
63
|
+
|
|
64
|
+
Every ``commit()`` auto-runs the high/low check.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
store: KVStore | None = None,
|
|
70
|
+
*,
|
|
71
|
+
commit_hash: str | None = None,
|
|
72
|
+
branch: str = "main",
|
|
73
|
+
high_water_bytes: int,
|
|
74
|
+
low_water_bytes: int | None = None,
|
|
75
|
+
is_protected: Callable[[str], bool] = _is_system_key,
|
|
76
|
+
) -> None:
|
|
77
|
+
super().__init__(store, commit_hash=commit_hash, branch=branch)
|
|
78
|
+
if high_water_bytes <= 0:
|
|
79
|
+
raise ValueError("high_water_bytes must be > 0")
|
|
80
|
+
self.high_water = high_water_bytes
|
|
81
|
+
self.low_water = (
|
|
82
|
+
low_water_bytes
|
|
83
|
+
if low_water_bytes is not None
|
|
84
|
+
else int(high_water_bytes * 0.8)
|
|
85
|
+
)
|
|
86
|
+
if self.low_water <= 0 or self.low_water > self.high_water:
|
|
87
|
+
self.low_water = int(high_water_bytes * 0.8)
|
|
88
|
+
self._is_protected = is_protected
|
|
89
|
+
self.last_rebase_result: RebaseResult | None = None
|
|
90
|
+
|
|
91
|
+
def commit(
|
|
92
|
+
self,
|
|
93
|
+
updates: dict[str, bytes] | None = None,
|
|
94
|
+
removals: set[str] | None = None,
|
|
95
|
+
*,
|
|
96
|
+
on_conflict: str = "raise",
|
|
97
|
+
merge_fns=None,
|
|
98
|
+
default_merge=None,
|
|
99
|
+
info: dict | None = None,
|
|
100
|
+
) -> "MergeResult":
|
|
101
|
+
"""Commit changes, then run GC if above high water mark."""
|
|
102
|
+
|
|
103
|
+
result = super().commit(
|
|
104
|
+
updates,
|
|
105
|
+
removals,
|
|
106
|
+
on_conflict=on_conflict,
|
|
107
|
+
merge_fns=merge_fns,
|
|
108
|
+
default_merge=default_merge,
|
|
109
|
+
info=info,
|
|
110
|
+
)
|
|
111
|
+
if result.merged:
|
|
112
|
+
rebase_result = self.maybe_rebase()
|
|
113
|
+
self.last_rebase_result = rebase_result
|
|
114
|
+
return result
|
|
115
|
+
|
|
116
|
+
def maybe_rebase(self) -> RebaseResult:
|
|
117
|
+
"""Run rebase only if total size exceeds high water mark."""
|
|
118
|
+
total = self._load_total_size()
|
|
119
|
+
if total <= self.high_water:
|
|
120
|
+
return RebaseResult(
|
|
121
|
+
performed=False,
|
|
122
|
+
new_commit=None,
|
|
123
|
+
dropped_keys=(),
|
|
124
|
+
kept_keys=tuple(self._commit_keys.keys()),
|
|
125
|
+
total_size_before=total,
|
|
126
|
+
total_size_after=total,
|
|
127
|
+
)
|
|
128
|
+
return self.rebase()
|
|
129
|
+
|
|
130
|
+
def rebase(
|
|
131
|
+
self,
|
|
132
|
+
keep_keys: set[str] | None = None,
|
|
133
|
+
*,
|
|
134
|
+
info: dict | None = None,
|
|
135
|
+
) -> RebaseResult:
|
|
136
|
+
"""Rebase: create a fresh root commit, dropping cold keys.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
keep_keys: If provided, retain exactly these keys (plus protected
|
|
140
|
+
keys). Otherwise, use the high/low water strategy.
|
|
141
|
+
info: Optional metadata for the rebase commit.
|
|
142
|
+
"""
|
|
143
|
+
meta = self._meta
|
|
144
|
+
total_before = self._load_total_size(
|
|
145
|
+
default=sum(e.size or 0 for e in meta.values())
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Identify protected and user keys
|
|
149
|
+
protected_keys = {
|
|
150
|
+
k: v for k, v in self._commit_keys.items() if self._is_protected(k)
|
|
151
|
+
}
|
|
152
|
+
user_meta = {k: v for k, v in meta.items() if not self._is_protected(k)}
|
|
153
|
+
|
|
154
|
+
retained_keys = set(protected_keys.keys()) | set(user_meta.keys())
|
|
155
|
+
total = sum(e.size or 0 for e in user_meta.values())
|
|
156
|
+
dropped: list[str] = []
|
|
157
|
+
|
|
158
|
+
if keep_keys is not None:
|
|
159
|
+
# Explicit keep set — drop everything not in it (except protected keys)
|
|
160
|
+
for key in list(retained_keys):
|
|
161
|
+
if self._is_protected(key):
|
|
162
|
+
continue
|
|
163
|
+
if key not in keep_keys:
|
|
164
|
+
retained_keys.discard(key)
|
|
165
|
+
dropped.append(key)
|
|
166
|
+
total -= (user_meta.get(key) and user_meta[key].size) or 0
|
|
167
|
+
else:
|
|
168
|
+
# High/low water strategy: drop coldest until under low water
|
|
169
|
+
candidates: list[tuple[str, MetaEntry]] = sorted(
|
|
170
|
+
user_meta.items(),
|
|
171
|
+
key=lambda kv: (kv[1].last_touch, -(kv[1].size or 0)),
|
|
172
|
+
)
|
|
173
|
+
for key, entry in candidates:
|
|
174
|
+
if total <= self.low_water:
|
|
175
|
+
break
|
|
176
|
+
retained_keys.discard(key)
|
|
177
|
+
dropped.append(key)
|
|
178
|
+
total -= entry.size or 0
|
|
179
|
+
|
|
180
|
+
# Build new commit with retained keys
|
|
181
|
+
from .versioned import _content_hash
|
|
182
|
+
|
|
183
|
+
# Collect retained data
|
|
184
|
+
new_commit_keys: dict[str, str] = {}
|
|
185
|
+
new_meta: dict[str, MetaEntry] = {}
|
|
186
|
+
retained_data: dict[str, bytes] = {}
|
|
187
|
+
|
|
188
|
+
for key in retained_keys:
|
|
189
|
+
versioned_key = self._commit_keys.get(key)
|
|
190
|
+
if not versioned_key:
|
|
191
|
+
continue
|
|
192
|
+
value = self.store.get(versioned_key)
|
|
193
|
+
if value is None:
|
|
194
|
+
continue
|
|
195
|
+
if not self._is_protected(key):
|
|
196
|
+
retained_data[key] = value
|
|
197
|
+
if key in meta:
|
|
198
|
+
new_meta[key] = meta[key]
|
|
199
|
+
|
|
200
|
+
# Content-addressable hash for the rebase commit (parent=None, fresh root)
|
|
201
|
+
preview_keys: dict[str, str] = {}
|
|
202
|
+
for key in protected_keys:
|
|
203
|
+
preview_keys[key] = protected_keys[key]
|
|
204
|
+
for key in retained_data:
|
|
205
|
+
preview_keys[key] = f"<pending:{key}>"
|
|
206
|
+
new_hash = _content_hash((), preview_keys, retained_data, info=info)
|
|
207
|
+
|
|
208
|
+
# Build the write batch
|
|
209
|
+
diffs: dict[str, bytes] = {}
|
|
210
|
+
|
|
211
|
+
# Protected keys — copy blobs with new versioned keys
|
|
212
|
+
for key, old_vk in protected_keys.items():
|
|
213
|
+
value = self.store.get(old_vk)
|
|
214
|
+
if value is None:
|
|
215
|
+
continue
|
|
216
|
+
new_vk = f"{new_hash}:{key}"
|
|
217
|
+
new_commit_keys[key] = new_vk
|
|
218
|
+
diffs[new_vk] = value
|
|
219
|
+
|
|
220
|
+
# Retained user keys
|
|
221
|
+
for key, value in retained_data.items():
|
|
222
|
+
new_vk = f"{new_hash}:{key}"
|
|
223
|
+
new_commit_keys[key] = new_vk
|
|
224
|
+
diffs[new_vk] = value
|
|
225
|
+
|
|
226
|
+
# Commit metadata
|
|
227
|
+
diffs[COMMIT_KEYSET % new_hash] = _to_bytes(new_commit_keys)
|
|
228
|
+
diffs[PARENT_COMMIT % new_hash] = _to_bytes([])
|
|
229
|
+
diffs[META_KEY % new_hash] = _meta_to_bytes(new_meta)
|
|
230
|
+
total_after = sum(e.size or 0 for e in new_meta.values())
|
|
231
|
+
diffs[TOTAL_VAR_SIZE_KEY % new_hash] = _to_bytes(total_after)
|
|
232
|
+
if info is not None:
|
|
233
|
+
diffs[INFO_KEY % new_hash] = _to_bytes(info)
|
|
234
|
+
|
|
235
|
+
self.store.set_many(**diffs)
|
|
236
|
+
|
|
237
|
+
# CAS HEAD to the new rebase commit
|
|
238
|
+
branch_key = BRANCH_HEAD % self._branch
|
|
239
|
+
expected = _to_bytes(self._base_commit)
|
|
240
|
+
if not self.store.cas(branch_key, _to_bytes(new_hash), expected=expected):
|
|
241
|
+
raise ConcurrencyError("HEAD changed during rebase.")
|
|
242
|
+
|
|
243
|
+
# Delete dropped blobs
|
|
244
|
+
to_delete = []
|
|
245
|
+
for key in dropped:
|
|
246
|
+
vk = self._commit_keys.get(key)
|
|
247
|
+
if vk:
|
|
248
|
+
to_delete.append(vk)
|
|
249
|
+
if to_delete:
|
|
250
|
+
self.store.remove_many(*to_delete)
|
|
251
|
+
|
|
252
|
+
# Update in-memory state
|
|
253
|
+
self._commit_keys = new_commit_keys
|
|
254
|
+
self._current_commit = new_hash
|
|
255
|
+
self._base_commit = new_hash
|
|
256
|
+
self._meta = new_meta
|
|
257
|
+
|
|
258
|
+
# Clean orphaned commits
|
|
259
|
+
orphans_cleaned = self.clean_orphans()
|
|
260
|
+
|
|
261
|
+
return RebaseResult(
|
|
262
|
+
performed=True,
|
|
263
|
+
new_commit=new_hash,
|
|
264
|
+
dropped_keys=tuple(dropped),
|
|
265
|
+
kept_keys=tuple(retained_keys),
|
|
266
|
+
total_size_before=total_before,
|
|
267
|
+
total_size_after=total_after,
|
|
268
|
+
orphans_cleaned=orphans_cleaned,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def clean_orphans(self, min_age: float = 3600) -> int:
|
|
272
|
+
"""Remove orphaned commits unreachable from HEAD.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
min_age: Only delete orphans older than this many seconds
|
|
276
|
+
(default 1 hour).
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Number of orphaned commits cleaned.
|
|
280
|
+
"""
|
|
281
|
+
# Mark phase: find all reachable commits across ALL branches
|
|
282
|
+
reachable: set[str] = set()
|
|
283
|
+
prefix = BRANCH_HEAD.replace("%s", "")
|
|
284
|
+
for key in self.store.keys():
|
|
285
|
+
if isinstance(key, str) and key.startswith(prefix):
|
|
286
|
+
head_bytes = self.store.get(key)
|
|
287
|
+
if head_bytes is None:
|
|
288
|
+
continue
|
|
289
|
+
branch_head = _from_bytes(head_bytes)
|
|
290
|
+
for commit in self.history(commit_hash=branch_head, all_parents=True):
|
|
291
|
+
reachable.add(commit)
|
|
292
|
+
|
|
293
|
+
# Sweep phase: find orphaned commits by scanning for meta keys
|
|
294
|
+
meta_prefix = META_KEY.replace("%s", "")
|
|
295
|
+
cutoff_time = time.time() - min_age
|
|
296
|
+
orphans: list[str] = []
|
|
297
|
+
|
|
298
|
+
for key in self.store.keys():
|
|
299
|
+
if not isinstance(key, str) or not key.startswith(meta_prefix):
|
|
300
|
+
continue
|
|
301
|
+
commit_hash = key[len(meta_prefix) :]
|
|
302
|
+
if not commit_hash or commit_hash in reachable:
|
|
303
|
+
continue
|
|
304
|
+
# Check age
|
|
305
|
+
meta_bytes = self.store.get(key)
|
|
306
|
+
if meta_bytes is None:
|
|
307
|
+
continue
|
|
308
|
+
try:
|
|
309
|
+
meta = _meta_from_bytes(meta_bytes)
|
|
310
|
+
if meta:
|
|
311
|
+
first_entry = next(iter(meta.values()), None)
|
|
312
|
+
if first_entry and first_entry.created_at < cutoff_time:
|
|
313
|
+
orphans.append(commit_hash)
|
|
314
|
+
except (json.JSONDecodeError, TypeError, KeyError):
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
# Delete orphaned commits and their data
|
|
318
|
+
for orphan_hash in orphans:
|
|
319
|
+
keyset_bytes = self.store.get(COMMIT_KEYSET % orphan_hash)
|
|
320
|
+
if keyset_bytes:
|
|
321
|
+
try:
|
|
322
|
+
keyset = _from_bytes(keyset_bytes)
|
|
323
|
+
blob_keys = list(keyset.values())
|
|
324
|
+
if blob_keys:
|
|
325
|
+
self.store.remove_many(*blob_keys)
|
|
326
|
+
except Exception:
|
|
327
|
+
pass
|
|
328
|
+
self.store.remove_many(
|
|
329
|
+
META_KEY % orphan_hash,
|
|
330
|
+
COMMIT_KEYSET % orphan_hash,
|
|
331
|
+
PARENT_COMMIT % orphan_hash,
|
|
332
|
+
TOTAL_VAR_SIZE_KEY % orphan_hash,
|
|
333
|
+
INFO_KEY % orphan_hash,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return len(orphans)
|
|
337
|
+
|
|
338
|
+
def _load_total_size(self, default: int = 0) -> int:
|
|
339
|
+
"""Load the total variable size for the current commit."""
|
|
340
|
+
total_bytes = self.store.get(TOTAL_VAR_SIZE_KEY % self._current_commit)
|
|
341
|
+
if total_bytes is None:
|
|
342
|
+
return default
|
|
343
|
+
try:
|
|
344
|
+
return _from_bytes(total_bytes)
|
|
345
|
+
except Exception:
|
|
346
|
+
return default
|