kanta 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kanta/__init__.py ADDED
@@ -0,0 +1,26 @@
1
+ from .diff import compute_diff
2
+ from .diff import replay_jsonl as replay
3
+ from .exceptions import DatabaseError, DataIntegrityError, FileLockError, ReplayError
4
+ from .filelock import LockedFile
5
+ from .kanta import Kanta
6
+ from .logging import configure_logging, format_diff, log_change
7
+ from .serialization import JsonSerializer, MsgPackSerializer
8
+ from .structs import ChangeRecord, Snapshot
9
+
10
+ __all__ = [
11
+ "ChangeRecord",
12
+ "compute_diff",
13
+ "configure_logging",
14
+ "DataIntegrityError",
15
+ "DatabaseError",
16
+ "FileLockError",
17
+ "format_diff",
18
+ "JsonSerializer",
19
+ "Kanta",
20
+ "LockedFile",
21
+ "log_change",
22
+ "MsgPackSerializer",
23
+ "ReplayError",
24
+ "replay",
25
+ "Snapshot",
26
+ ]
kanta/diff.py ADDED
@@ -0,0 +1,81 @@
1
+ """Diff computation and replay utilities."""
2
+
3
+ import jsondiff
4
+
5
+ from kanta.kanta.structs import ChangeRecord
6
+ from kanta.serialization.base import ReplayResult, replay
7
+ from kanta.serialization.framing import LineFramer
8
+ from kanta.serialization.json import JsonSerializer
9
+
10
+
11
+ def compute_diff(previous: dict, current: dict) -> dict | None:
12
+ """Compute a jsondiff patch between two dicts.
13
+
14
+ Returns None if there is no difference.
15
+ """
16
+ return jsondiff.diff(previous, current, marshal=True) or None
17
+
18
+
19
+ def _apply_diff(state: dict, diff: dict) -> dict:
20
+ """Apply a jsondiff patch manually, handling ``$replace`` and ``$delete``.
21
+
22
+ jsondiff.patch does not handle nested ``$replace`` commands when the
23
+ parent key is missing from the state. This function recursively applies
24
+ diffs, treating ``$replace`` as full replacement and ``$delete`` as
25
+ key removal.
26
+ """
27
+ if not isinstance(diff, dict):
28
+ return diff
29
+
30
+ result = dict(state) if isinstance(state, dict) else state
31
+ if not isinstance(result, dict):
32
+ result = {}
33
+
34
+ for key, value in diff.items():
35
+ if key == "$replace":
36
+ return value
37
+ elif key == "$delete":
38
+ if isinstance(value, list):
39
+ for k in value:
40
+ result.pop(k, None)
41
+ else:
42
+ result.pop(value, None)
43
+ elif isinstance(value, dict):
44
+ old = result.get(key, {})
45
+ if not isinstance(old, dict):
46
+ old = {}
47
+ result[key] = _apply_diff(old, value)
48
+ else:
49
+ result[key] = value
50
+
51
+ return result
52
+
53
+
54
+ def patch_state(state: dict, diff: dict) -> dict:
55
+ """Apply a jsondiff patch to a state dict.
56
+
57
+ The diff was produced with ``marshal=True`` (string keys like
58
+ ``"$replace"`` and ``"$delete"``) and decoded from JSON.
59
+ """
60
+ return _apply_diff(state, diff)
61
+
62
+
63
+ # Backward-compatible JSONL replay using the default serializer.
64
+ _default_serializer = JsonSerializer()
65
+ _default_framer = LineFramer()
66
+
67
+
68
+ def replay_jsonl(
69
+ data: bytes,
70
+ *,
71
+ type: type[ChangeRecord] = ChangeRecord,
72
+ ) -> ReplayResult:
73
+ """Replay database state from JSONL file data.
74
+
75
+ This is the legacy public API that hard-codes JSON/JSONL handling.
76
+ """
77
+ return replay(
78
+ data,
79
+ framer=_default_framer,
80
+ decode=_default_serializer.decode,
81
+ )
kanta/exceptions.py ADDED
@@ -0,0 +1,61 @@
1
+ """Custom exception types for Kanta."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ class DatabaseError(ValueError):
10
+ """Exception raised for database loading errors."""
11
+
12
+ def __init__(
13
+ self,
14
+ message: str,
15
+ *,
16
+ db_path: Path | None = None,
17
+ line_number: int | None = None,
18
+ byte_pos: int | None = None,
19
+ cause_type: str | None = None,
20
+ ):
21
+ self.db_path = db_path
22
+ self.line_number = line_number
23
+ self.byte_pos = byte_pos
24
+ self.cause_type = cause_type
25
+ super().__init__(message)
26
+
27
+
28
+ class ReplayError(DatabaseError):
29
+ """Structured replay error with source location metadata."""
30
+
31
+ def __init__(
32
+ self,
33
+ message: str,
34
+ *,
35
+ line_number: int | None = None,
36
+ byte_pos: int | None = None,
37
+ record_type: str | None = None,
38
+ ):
39
+ self.record_type = record_type
40
+ super().__init__(message, line_number=line_number, byte_pos=byte_pos)
41
+
42
+
43
+ class FileLockError(DatabaseError):
44
+ """Raised when database file open/lock operations fail."""
45
+
46
+
47
+ class DataIntegrityError(RuntimeError):
48
+ """Raised when in-memory data integrity invariants are violated."""
49
+
50
+ def __init__(
51
+ self,
52
+ message: str,
53
+ *,
54
+ db_path: Path | None = None,
55
+ action: str | None = None,
56
+ diff: dict[str, Any] | None = None,
57
+ ):
58
+ self.db_path = db_path
59
+ self.action = action
60
+ self.diff = diff
61
+ super().__init__(message)
kanta/filelock.py ADDED
@@ -0,0 +1,274 @@
1
+ """Cross-platform locked file for the database (no separate .lock files).
2
+
3
+ Unix: open() + fcntl.flock (advisory, cooperative among processes that flock).
4
+ Windows: CreateFileW with FILE_SHARE_READ (OS-enforced, allows readers, blocks writers).
5
+
6
+ A single file descriptor is opened once for both reading and writing.
7
+ The lock is acquired atomically (on Windows) or immediately after open (on Unix),
8
+ and the same descriptor is used for the lifetime of the process: first to read
9
+ the existing content, then to append new writes.
10
+ """
11
+
12
+ import logging
13
+ import os
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ from kanta.exceptions import FileLockError
18
+
19
+ _logger = logging.getLogger(__name__)
20
+
21
+
22
+ def _fatal(msg: str, *, db_path: Path | None = None) -> None:
23
+ """Log a fatal error and raise a typed exception."""
24
+ _logger.critical(msg)
25
+ raise FileLockError(msg, db_path=db_path)
26
+
27
+
28
+ if sys.platform == "win32":
29
+ import ctypes
30
+ from ctypes import wintypes
31
+
32
+ _kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
33
+
34
+ _GENERIC_READ = 0x80000000
35
+ _GENERIC_WRITE = 0x40000000
36
+ _FILE_SHARE_READ = 0x00000001
37
+ _OPEN_EXISTING = 3
38
+ _OPEN_ALWAYS = 4
39
+ _FILE_ATTRIBUTE_NORMAL = 0x80
40
+ _FILE_BEGIN = 0
41
+ _FILE_END = 2
42
+ _ERROR_SHARING_VIOLATION = 32
43
+ _INVALID_FILE_SIZE = 0xFFFFFFFF
44
+
45
+ _kernel32.CreateFileW.restype = wintypes.HANDLE
46
+ _kernel32.CreateFileW.argtypes = [
47
+ wintypes.LPCWSTR,
48
+ wintypes.DWORD,
49
+ wintypes.DWORD,
50
+ ctypes.c_void_p,
51
+ wintypes.DWORD,
52
+ wintypes.DWORD,
53
+ wintypes.HANDLE,
54
+ ]
55
+ _kernel32.ReadFile.restype = wintypes.BOOL
56
+ _kernel32.ReadFile.argtypes = [
57
+ wintypes.HANDLE,
58
+ ctypes.c_void_p,
59
+ wintypes.DWORD,
60
+ ctypes.POINTER(wintypes.DWORD),
61
+ ctypes.c_void_p,
62
+ ]
63
+ _kernel32.WriteFile.restype = wintypes.BOOL
64
+ _kernel32.WriteFile.argtypes = [
65
+ wintypes.HANDLE,
66
+ ctypes.c_void_p,
67
+ wintypes.DWORD,
68
+ ctypes.POINTER(wintypes.DWORD),
69
+ ctypes.c_void_p,
70
+ ]
71
+ _kernel32.GetFileSize.restype = wintypes.DWORD
72
+ _kernel32.GetFileSize.argtypes = [
73
+ wintypes.HANDLE,
74
+ ctypes.POINTER(wintypes.DWORD),
75
+ ]
76
+ _kernel32.SetFilePointer.restype = wintypes.DWORD
77
+ _kernel32.SetFilePointer.argtypes = [
78
+ wintypes.HANDLE,
79
+ wintypes.LONG,
80
+ ctypes.POINTER(wintypes.LONG),
81
+ wintypes.DWORD,
82
+ ]
83
+ _kernel32.CloseHandle.restype = wintypes.BOOL
84
+ _kernel32.CloseHandle.argtypes = [wintypes.HANDLE]
85
+
86
+ def _is_invalid_handle(handle) -> bool:
87
+ return ctypes.c_void_p(handle).value == ctypes.c_void_p(-1).value
88
+
89
+ else:
90
+ import fcntl
91
+
92
+
93
+ class LockedFile:
94
+ """A file opened with an exclusive write lock.
95
+
96
+ Usage::
97
+
98
+ f = LockedFile()
99
+ f.open(path) # open + lock (read+write)
100
+ content = f.read() # read entire content
101
+ f.write(data) # append data (seeks to end first)
102
+ f.close() # release lock + close fd
103
+
104
+ Unix: fcntl.flock (advisory) — read-only callers that don't flock are unaffected.
105
+ Windows: CreateFileW with FILE_SHARE_READ — OS blocks other writers.
106
+ """
107
+
108
+ def __init__(self) -> None:
109
+ self._fd: int | None = None # Unix fd or Windows HANDLE
110
+
111
+ def open(self, path: Path, *, create: bool = False) -> None:
112
+ """Open *path* for read+write with an exclusive lock.
113
+
114
+ Args:
115
+ path: File to open and lock.
116
+ create: If True, create the file if it doesn't exist (bootstrap).
117
+
118
+ Raises:
119
+ FileLockError: If the file is locked by another process or not found.
120
+ """
121
+ if self._fd is not None:
122
+ return # Already open (idempotent)
123
+
124
+ if sys.platform == "win32":
125
+ self._open_win32(path, create)
126
+ else:
127
+ self._open_unix(path, create)
128
+
129
+ def open_and_read(self, path: Path, create: bool = False) -> bytes:
130
+ """Open *path* with exclusive lock and read all content.
131
+
132
+ Combined operation for efficient use with asyncio.to_thread().
133
+ """
134
+ self.open(path, create=create)
135
+ return self.read()
136
+
137
+ def read(self) -> bytes:
138
+ """Read the entire file content from the beginning."""
139
+ if self._fd is None:
140
+ raise RuntimeError("LockedFile.read() called on a closed file")
141
+
142
+ if sys.platform == "win32":
143
+ return self._read_win32()
144
+ else:
145
+ return self._read_unix()
146
+
147
+ def write(self, data: bytes) -> None:
148
+ """Append *data* to the end of the file."""
149
+ if self._fd is None:
150
+ raise RuntimeError("LockedFile.write() called on a closed file")
151
+
152
+ if sys.platform == "win32":
153
+ self._write_win32(data)
154
+ else:
155
+ self._write_unix(data)
156
+
157
+ def size(self) -> int:
158
+ """Return current file size in bytes."""
159
+ if self._fd is None:
160
+ raise RuntimeError("LockedFile.size() called on a closed file")
161
+
162
+ if sys.platform == "win32":
163
+ size = _kernel32.GetFileSize(self._fd, None)
164
+ if size == _INVALID_FILE_SIZE:
165
+ raise OSError(
166
+ f"GetFileSize failed: Windows error {ctypes.get_last_error()}"
167
+ )
168
+ return int(size)
169
+
170
+ current = os.lseek(self._fd, 0, os.SEEK_CUR)
171
+ end = os.lseek(self._fd, 0, os.SEEK_END)
172
+ os.lseek(self._fd, current, os.SEEK_SET)
173
+ return end
174
+
175
+ def close(self) -> None:
176
+ """Release the lock and close the file."""
177
+ if self._fd is None:
178
+ return
179
+ if sys.platform == "win32":
180
+ _kernel32.CloseHandle(self._fd)
181
+ else:
182
+ os.close(self._fd)
183
+ self._fd = None
184
+
185
+ @property
186
+ def is_open(self) -> bool:
187
+ return self._fd is not None
188
+
189
+ # -- Unix ----------------------------------------------------------------
190
+
191
+ def _open_unix(self, path: Path, create: bool) -> None:
192
+ flags = os.O_RDWR | (os.O_CREAT if create else 0)
193
+ try:
194
+ fd = os.open(path, flags, 0o666)
195
+ except FileNotFoundError:
196
+ _fatal(f"Database file not found: {path.resolve()}", db_path=path)
197
+ try:
198
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
199
+ except OSError:
200
+ os.close(fd)
201
+ _fatal(
202
+ f"{path.resolve()}: database already locked by another instance",
203
+ db_path=path,
204
+ )
205
+ self._fd = fd
206
+
207
+ def _read_unix(self) -> bytes:
208
+ os.lseek(self._fd, 0, os.SEEK_SET)
209
+ chunks = []
210
+ while True:
211
+ chunk = os.read(self._fd, 1 << 20) # 1 MiB
212
+ if not chunk:
213
+ break
214
+ chunks.append(chunk)
215
+ return b"".join(chunks)
216
+
217
+ def _write_unix(self, data: bytes) -> None:
218
+ os.lseek(self._fd, 0, os.SEEK_END)
219
+ os.write(self._fd, data)
220
+
221
+ # -- Windows -------------------------------------------------------------
222
+
223
+ def _open_win32(self, path: Path, create: bool) -> None:
224
+ disposition = _OPEN_ALWAYS if create else _OPEN_EXISTING
225
+ handle = _kernel32.CreateFileW(
226
+ str(path),
227
+ _GENERIC_READ | _GENERIC_WRITE,
228
+ _FILE_SHARE_READ,
229
+ None,
230
+ disposition,
231
+ _FILE_ATTRIBUTE_NORMAL,
232
+ None,
233
+ )
234
+ if _is_invalid_handle(handle):
235
+ err = ctypes.get_last_error()
236
+ if err == _ERROR_SHARING_VIOLATION:
237
+ _fatal(
238
+ f"{path.resolve()}: database already locked by another instance",
239
+ db_path=path,
240
+ )
241
+ _fatal(
242
+ f"Failed to open database {path.resolve()}: Windows error {err}",
243
+ db_path=path,
244
+ )
245
+ self._fd = handle
246
+
247
+ def _read_win32(self) -> bytes:
248
+ _kernel32.SetFilePointer(self._fd, 0, None, _FILE_BEGIN)
249
+ size = _kernel32.GetFileSize(self._fd, None)
250
+ if size == _INVALID_FILE_SIZE:
251
+ raise OSError(
252
+ f"GetFileSize failed: Windows error {ctypes.get_last_error()}"
253
+ )
254
+ if size == 0:
255
+ return b""
256
+ buf = ctypes.create_string_buffer(size)
257
+ bytes_read = wintypes.DWORD()
258
+ ok = _kernel32.ReadFile(self._fd, buf, size, ctypes.byref(bytes_read), None)
259
+ if not ok:
260
+ raise OSError(f"ReadFile failed: Windows error {ctypes.get_last_error()}")
261
+ return buf.raw[: bytes_read.value]
262
+
263
+ def _write_win32(self, data: bytes) -> None:
264
+ _kernel32.SetFilePointer(self._fd, 0, None, _FILE_END)
265
+ written = wintypes.DWORD()
266
+ ok = _kernel32.WriteFile(
267
+ self._fd,
268
+ data,
269
+ len(data),
270
+ ctypes.byref(written),
271
+ None,
272
+ )
273
+ if not ok:
274
+ raise OSError(f"WriteFile failed: Windows error {ctypes.get_last_error()}")
kanta/kanta.py ADDED
@@ -0,0 +1,206 @@
1
+ """JSONL persistence layer with background flush task."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable
6
+ from pathlib import Path
7
+ from types import ModuleType
8
+ from typing import Any, Generic, TypeVar
9
+
10
+ from kanta.exceptions import DatabaseError
11
+ from kanta.kanta.kantaimpl import KantaImpl
12
+ from kanta.serialization import JsonSerializer, Serializer
13
+ from kanta.transaction import transaction as _transaction
14
+
15
+ T = TypeVar("T")
16
+
17
+
18
+ class Kanta(Generic[T]):
19
+ """JSONL persistence layer for a msgspec.Struct database state.
20
+
21
+ The application defines its schema as a msgspec.Struct (e.g. ``Data``,
22
+ ``Project``). The Kanta instance holds the live state as that struct type.
23
+ Internally it round-trips through plain dicts for diffing, replay,
24
+ and serialization.
25
+
26
+ A background task periodically flushes pending changes to disk.
27
+ Call `await kanta.open()` to start the background task,
28
+ and `await kanta.close()` to stop it.
29
+
30
+ All transactions are synchronous — they immediately affect the
31
+ in-memory ``kanta.data``. Persistence happens asynchronously in
32
+ the background (or via explicit ``await kanta.flush()``).
33
+
34
+ Usage::
35
+
36
+ class Data(msgspec.Struct):
37
+ users: dict[str, User] = {}
38
+
39
+ kanta = Kanta("data.db", Data())
40
+ await kanta.open()
41
+
42
+ with kanta.transaction(action="create_user") as data:
43
+ data.users["alice"] = User(name="Alice")
44
+
45
+ await kanta.close()
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ filename: Path | str,
51
+ data: T,
52
+ *,
53
+ type: type[T] | None = None,
54
+ migrations: ModuleType | str | None = None,
55
+ migration_ctx: Any | None = None,
56
+ serializer: Serializer | None = None,
57
+ fatal_error: Callable[[DatabaseError], None] | None = None,
58
+ flush_interval: float = 0.1,
59
+ ):
60
+ """Initialize a Kanta persistence instance.
61
+
62
+ Args:
63
+ filename: Path to the database file.
64
+ data: Caller-owned root msgspec.Struct state instance.
65
+ type: Optional explicit root type. Defaults to ``type(data)``.
66
+ migrations: Optional migrations module object or import path.
67
+ migration_ctx: Optional context object passed to migration functions.
68
+ flush_interval: Background flush interval in seconds.
69
+ serializer: Optional serializer implementation.
70
+ fatal_error: Optional callback invoked immediately when the
71
+ background writer encounters a DatabaseError.
72
+
73
+ Raises:
74
+ ImportError: If ``migrations`` is a string path that cannot be imported.
75
+ ValueError: If migration definitions are invalid.
76
+ """
77
+ active_serializer = serializer if serializer is not None else JsonSerializer()
78
+ data_type = type if type is not None else data.__class__
79
+
80
+ self._impl = KantaImpl(
81
+ serializer=active_serializer,
82
+ fatal_error=fatal_error,
83
+ filename=filename,
84
+ data=data,
85
+ type=data_type,
86
+ migrations=migrations,
87
+ migration_ctx=migration_ctx,
88
+ flush_interval=flush_interval,
89
+ )
90
+
91
+ @property
92
+ def data(self) -> T:
93
+ """Current in-memory state object.
94
+
95
+ Returns:
96
+ The live state instance of the configured ``type``.
97
+
98
+ Notes:
99
+ Mutations should only be performed via :meth:`transaction`
100
+ to ensure proper diffing and persistence.
101
+ """
102
+ return self._impl.data
103
+
104
+ @data.setter
105
+ def data(self, value: T) -> None:
106
+ """Replace the in-memory state object.
107
+
108
+ Notes:
109
+ Mutations should only be performed via :meth:`transaction`
110
+ to ensure proper diffing and persistence.
111
+
112
+ Args:
113
+ value: New state object instance.
114
+ """
115
+ self._impl.data = value
116
+
117
+ @property
118
+ def version(self) -> int:
119
+ """Current schema/database version.
120
+
121
+ Returns:
122
+ Integer version derived from migrations/replay state.
123
+ """
124
+ return self._impl.version
125
+
126
+ @property
127
+ def filename(self) -> Path:
128
+ """Database file path.
129
+
130
+ Returns:
131
+ Filesystem path used for persistence.
132
+ """
133
+ return self._impl.filename
134
+
135
+ async def open(self) -> None:
136
+ """Open the database file and start background persistence.
137
+
138
+ This loads existing records, applies configured migrations, and starts
139
+ the background flush task.
140
+
141
+ Calling ``open`` more than once on the same instance is not allowed.
142
+
143
+ Raises:
144
+ kanta.exceptions.DatabaseError: If replay or decoding fails.
145
+ kanta.exceptions.DataIntegrityError: If the instance is already open.
146
+ """
147
+ await self._impl.open()
148
+
149
+ async def __aenter__(self) -> Kanta[T]:
150
+ """Enter async context manager and open the database.
151
+
152
+ Returns:
153
+ The current ``Kanta`` instance itself.
154
+ """
155
+ await self.open()
156
+ return self
157
+
158
+ async def __aexit__(self, exc_type, exc, tb) -> None:
159
+ """Exit async context manager and close the database.
160
+
161
+ Args:
162
+ exc_type: Exception type raised inside the context, if any.
163
+ exc: Exception instance raised inside the context, if any.
164
+ tb: Traceback for the exception, if any.
165
+ """
166
+ await self.close()
167
+
168
+ async def flush(self) -> None:
169
+ """Asynchronously flush pending change records to disk."""
170
+ await self._impl.flush()
171
+
172
+ def request_snapshot(self) -> None:
173
+ """Request a snapshot to be written on the next background iteration."""
174
+ self._impl.snapshot.request_force()
175
+
176
+ async def close(self) -> None:
177
+ """Stop background task, flush pending changes, and close file lock."""
178
+ await self._impl.close()
179
+
180
+ def transaction(
181
+ self,
182
+ action: str,
183
+ *,
184
+ user: str | None = None,
185
+ user_display: str | None = None,
186
+ resolver: Any = None,
187
+ ):
188
+ """Create a transactional mutation context manager.
189
+
190
+ Args:
191
+ action: Action label stored in the change record.
192
+ user: Optional user identifier stored in metadata.
193
+ user_display: Optional display name used for logging/resolution.
194
+ resolver: Optional callable for resolving identifiers in logs.
195
+
196
+ Returns:
197
+ A context manager yielding the live state object for mutation.
198
+
199
+ Notes:
200
+ On successful exit, a diff is queued for persistence.
201
+ If an exception is raised inside the context, in-memory changes are
202
+ rolled back.
203
+ """
204
+ return _transaction(
205
+ self._impl, action, user=user, user_display=user_display, resolver=resolver
206
+ )