kanta 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kanta-0.1.0/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ # Python cache/artifacts
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.so
5
+
6
+ # Hidden things
7
+ .*
8
+ !.gitignore
9
+
10
+ # Build/distribution artifacts
11
+ build/
12
+ dist/
13
+ *.egg-info/
14
+ *.lock
kanta-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: kanta
3
+ Version: 0.1.0
4
+ Summary: Kanta No-SQL database for async Python and frameworks such as FastAPI and Sanic
5
+ Project-URL: Repository, https://git.zi.fi/LeoVasanko/kanta
6
+ Author: Leo Vasanko
7
+ Keywords: kantadb
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: blake3>=1.0.8
10
+ Requires-Dist: jsondiff>=2.2.1
11
+ Requires-Dist: msgspec>=0.20.0
12
+ Provides-Extra: bin
13
+ Requires-Dist: blake3>=1.0.8; extra == 'bin'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # Kanta database
17
+
18
+ Kanta is a small embedded NoSQL store for async Python apps. It keeps live state in memory, writes transactional diffs to an append-only log, and supports versioned schema migrations.
19
+
20
+ ## Why Kanta
21
+
22
+ - Fast synchronous reads and modifications on native Python objects
23
+ - Durable writes with append-only file and periodic snapshots
24
+ - Transaction semantics with rollback on failure
25
+ - Explicit schema evolution via `migrate_vN` functions
26
+ - Line-based JSON or binary MessagePack, or bring your own serializer
27
+ - Even in JSON we can use non-string keys, bytes, datetimes, UUID and other types
28
+
29
+ This design is often preferable when you want low-latency local persistence without operating a separate database service. You get straightforward deployment, auditable history, and deterministic replay while keeping application state ergonomic to work with.
30
+
31
+ Queries and updates on native data are far faster than over an SQL server connection, and we can can provide fully synchronous operation. The limitation is that you can only use the same database within a single process at a time, but this is well suited for async programming.
32
+
33
+ ## Quick Start
34
+
35
+ ```python
36
+ import asyncio
37
+ import msgspec
38
+ from uuid import UUID, uuid7
39
+
40
+ from kanta import Kanta
41
+
42
+
43
+ class User(msgspec.Struct):
44
+ name: str = ""
45
+ email: str | None = None
46
+
47
+
48
+ class Data(msgspec.Struct):
49
+ users: dict[UUID, User] = {}
50
+
51
+
52
+ async def main() -> None:
53
+ async with Kanta("data.kantadb", Data()) as kanta:
54
+ user_id = uuid7()
55
+ with kanta.transaction(action="create_user") as data:
56
+ data.users[user_id] = User(name="Alice")
57
+
58
+
59
+ asyncio.run(main())
60
+ ```
61
+
62
+ ## Core Concepts
63
+
64
+ 1. Define your schema as a `msgspec.Struct` root object.
65
+ 2. Mutate data inside `with kanta.transaction(...):`.
66
+ 3. Let Kanta flush queued changes to disk in the background.
67
+ 4. Use snapshots and replay for fast startup and full history.
68
+
69
+ ## Migrations
70
+
71
+ Adding or removing a field and other such simple operations are automatic, but when the time comes to really change your data model, implement a `migrate_v1` function that converts your old data to the new form. This works on plain built-in dict and other types, to avoid needing to preserve old versions of your structs.
72
+
73
+ Pass a module (or import path) containing `migrate_vN` functions:
74
+
75
+ ```python
76
+ kanta = Kanta("data.kantadb", Data(), migrations="myapp.migrations")
77
+ await kanta.open()
78
+ ```
79
+
80
+ Kanta tracks migration version metadata automatically, and fast forwards your database to current version by running all the migrations needed while opening the database.
81
+
82
+ ## On-Disk Format
83
+
84
+ Kanta in JSON mode (default) stores newline-delimited records. Transaction history is viewable by any simple text editor, and rollbacks to prior state are done by simply removing final lines (one per transaction)
85
+
86
+ MsgPack mode uses binary records with length and checksum to avoid data corruption.
87
+
88
+ - Change line: JSON object with metadata + `diff`
89
+ - Snapshot line: `SNAPSHOT { ... full state ... }`
90
+
91
+ See `docs/database.md` for format details and invariants.
kanta-0.1.0/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # Kanta database
2
+
3
+ Kanta is a small embedded NoSQL store for async Python apps. It keeps live state in memory, writes transactional diffs to an append-only log, and supports versioned schema migrations.
4
+
5
+ ## Why Kanta
6
+
7
+ - Fast synchronous reads and modifications on native Python objects
8
+ - Durable writes with append-only file and periodic snapshots
9
+ - Transaction semantics with rollback on failure
10
+ - Explicit schema evolution via `migrate_vN` functions
11
+ - Line-based JSON or binary MessagePack, or bring your own serializer
12
+ - Even in JSON we can use non-string keys, bytes, datetimes, UUID and other types
13
+
14
+ This design is often preferable when you want low-latency local persistence without operating a separate database service. You get straightforward deployment, auditable history, and deterministic replay while keeping application state ergonomic to work with.
15
+
16
+ Queries and updates on native data are far faster than over an SQL server connection, and we can can provide fully synchronous operation. The limitation is that you can only use the same database within a single process at a time, but this is well suited for async programming.
17
+
18
+ ## Quick Start
19
+
20
+ ```python
21
+ import asyncio
22
+ import msgspec
23
+ from uuid import UUID, uuid7
24
+
25
+ from kanta import Kanta
26
+
27
+
28
+ class User(msgspec.Struct):
29
+ name: str = ""
30
+ email: str | None = None
31
+
32
+
33
+ class Data(msgspec.Struct):
34
+ users: dict[UUID, User] = {}
35
+
36
+
37
+ async def main() -> None:
38
+ async with Kanta("data.kantadb", Data()) as kanta:
39
+ user_id = uuid7()
40
+ with kanta.transaction(action="create_user") as data:
41
+ data.users[user_id] = User(name="Alice")
42
+
43
+
44
+ asyncio.run(main())
45
+ ```
46
+
47
+ ## Core Concepts
48
+
49
+ 1. Define your schema as a `msgspec.Struct` root object.
50
+ 2. Mutate data inside `with kanta.transaction(...):`.
51
+ 3. Let Kanta flush queued changes to disk in the background.
52
+ 4. Use snapshots and replay for fast startup and full history.
53
+
54
+ ## Migrations
55
+
56
+ Adding or removing a field and other such simple operations are automatic, but when the time comes to really change your data model, implement a `migrate_v1` function that converts your old data to the new form. This works on plain built-in dict and other types, to avoid needing to preserve old versions of your structs.
57
+
58
+ Pass a module (or import path) containing `migrate_vN` functions:
59
+
60
+ ```python
61
+ kanta = Kanta("data.kantadb", Data(), migrations="myapp.migrations")
62
+ await kanta.open()
63
+ ```
64
+
65
+ Kanta tracks migration version metadata automatically, and fast forwards your database to current version by running all the migrations needed while opening the database.
66
+
67
+ ## On-Disk Format
68
+
69
+ Kanta in JSON mode (default) stores newline-delimited records. Transaction history is viewable by any simple text editor, and rollbacks to prior state are done by simply removing final lines (one per transaction)
70
+
71
+ MsgPack mode uses binary records with length and checksum to avoid data corruption.
72
+
73
+ - Change line: JSON object with metadata + `diff`
74
+ - Snapshot line: `SNAPSHOT { ... full state ... }`
75
+
76
+ See `docs/database.md` for format details and invariants.
@@ -0,0 +1,16 @@
1
+ """Project-root shim package for local development layout.
2
+
3
+ This forwards imports to the inner `kanta/` package directory so
4
+ `from kanta import ...` works when running tests from the workspace root.
5
+ """
6
+
7
+ import importlib
8
+ from pathlib import Path
9
+
10
+ _inner_pkg = Path(__file__).with_name("kanta")
11
+ if str(_inner_pkg) not in __path__:
12
+ __path__.append(str(_inner_pkg))
13
+
14
+ _pkg = importlib.import_module(".kanta", __name__)
15
+ __all__ = list(getattr(_pkg, "__all__", ()))
16
+ globals().update({name: getattr(_pkg, name) for name in __all__})
@@ -0,0 +1 @@
1
+ *
@@ -0,0 +1,116 @@
1
+ # Kanta Database Format and Design Principles
2
+
3
+ This document describes the on-disk format and design principles of Kanta.
4
+ It is intentionally focused on the current standalone package behavior.
5
+
6
+ ## Core Principles
7
+
8
+ 1. Append-only durability
9
+ - State changes are persisted as appended JSON lines.
10
+ - Existing lines are never edited in place.
11
+
12
+ 2. Differential persistence
13
+ - Kanta stores diffs (patches), not full state, for normal writes.
14
+ - This keeps write volume small and preserves a clear change history.
15
+
16
+ 3. Deterministic replay
17
+ - Current state is reconstructed by replaying log records in order.
18
+ - Snapshot records accelerate replay while preserving deterministic results.
19
+
20
+ 4. Transactional in-memory writes
21
+ - Application code mutates in-memory data inside `kanta.transaction(...)`.
22
+ - On success, Kanta computes and queues a diff record.
23
+ - On failure, in-memory data is rolled back.
24
+
25
+ 5. Explicit schema evolution
26
+ - Schema migration functions are versioned (`migrate_vN`).
27
+ - Migrations run at open time and advance the stored version.
28
+
29
+ ## On-Disk Record Types
30
+
31
+ Kanta uses a newline-delimited stream where each line is either a change
32
+ record or a snapshot record.
33
+
34
+ ### Change record
35
+
36
+ One JSON object per line:
37
+
38
+ ```json
39
+ {"ts":"2026-06-10T02:55:00Z","a":"update","v":5,"u":"user-id","m":"2026-06-10T02:55:00Z","diff":{"users":{"alice":{"age":31}}}}
40
+ ```
41
+
42
+ Fields:
43
+ - `ts`: UTC timestamp of the record.
44
+ - `a`: action name.
45
+ - `v`: schema version after this change.
46
+ - `u`: optional actor identifier.
47
+ - `m`: optional domain modification timestamp.
48
+ - `diff`: jsondiff patch payload.
49
+
50
+ ### Snapshot record
51
+
52
+ Snapshot lines are prefixed with `SNAPSHOT `, followed by JSON:
53
+
54
+ ```text
55
+ SNAPSHOT {"ts":"2026-06-10T00:00:00Z","v":5,"state":{"users":{}},"m":"2026-06-10T00:00:00Z"}
56
+ ```
57
+
58
+ Fields:
59
+ - `ts`: snapshot creation time.
60
+ - `v`: schema version represented by the snapshot.
61
+ - `state`: full state dictionary.
62
+ - `m`: optional domain modification timestamp.
63
+
64
+ ## Replay Model
65
+
66
+ 1. Find the last snapshot in the file, if present.
67
+ 2. Initialize replay state from snapshot state (or `{}` if none).
68
+ 3. Replay subsequent change records in order using patch application.
69
+ 4. The final replay state becomes in-memory `kanta.data`.
70
+
71
+ This model provides fast startup for large logs while retaining append-only
72
+ history.
73
+
74
+ ## Serialization Semantics
75
+
76
+ - In-memory data is defined by an application `msgspec.Struct` type.
77
+ - Kanta round-trips through plain builtins for persistence and diffing.
78
+ - Dict keys are serialized as strings (`str_keys=True`) for stable JSON form.
79
+ - Normalization changes introduced by struct decode/encode are logged as
80
+ `migrate:msgspec` when they produce a diff.
81
+
82
+ ## Transaction Semantics
83
+
84
+ - `kanta.transaction(action=...)` captures a pre-transaction snapshot dict.
85
+ - On success:
86
+ - compute diff between previous builtins and current builtins,
87
+ - queue a `ChangeRecord` if non-empty.
88
+ - On exception:
89
+ - restore in-memory data from snapshot,
90
+ - re-raise the exception.
91
+
92
+ Nested transactions are rejected.
93
+
94
+ ## Flush and Lifecycle
95
+
96
+ - Writes are queued in memory.
97
+ - `kanta.flush()` appends queued records to disk.
98
+ - A background async task can flush periodically.
99
+ - `kanta.close()` performs final flush and releases file resources.
100
+ - `async with Kanta(...)` guarantees open/close lifecycle management.
101
+
102
+ ## Migrations
103
+
104
+ - Migration source is configured on `Kanta(...)` via `migrations=`.
105
+ - Accepted values:
106
+ - imported module object,
107
+ - import path string.
108
+ - Migrations mutate replayed dict state in-place and return the new version.
109
+
110
+ ## Safety Invariants
111
+
112
+ - Any detected out-of-transaction mutation is treated as a fatal consistency
113
+ violation.
114
+ - Flush failures mark the instance as failed and trigger shutdown behavior.
115
+ - Object identity of `kanta.data` is preserved across rollback when possible,
116
+ minimizing stale-reference hazards for callers.
@@ -0,0 +1,26 @@
1
+ from .diff import compute_diff
2
+ from .diff import replay_jsonl as replay
3
+ from .exceptions import DatabaseError, DataIntegrityError, FileLockError, ReplayError
4
+ from .filelock import LockedFile
5
+ from .kanta import Kanta
6
+ from .logging import configure_logging, format_diff, log_change
7
+ from .serialization import JsonSerializer, MsgPackSerializer
8
+ from .structs import ChangeRecord, Snapshot
9
+
10
+ __all__ = [
11
+ "ChangeRecord",
12
+ "compute_diff",
13
+ "configure_logging",
14
+ "DataIntegrityError",
15
+ "DatabaseError",
16
+ "FileLockError",
17
+ "format_diff",
18
+ "JsonSerializer",
19
+ "Kanta",
20
+ "LockedFile",
21
+ "log_change",
22
+ "MsgPackSerializer",
23
+ "ReplayError",
24
+ "replay",
25
+ "Snapshot",
26
+ ]
@@ -0,0 +1,81 @@
1
+ """Diff computation and replay utilities."""
2
+
3
+ import jsondiff
4
+
5
+ from kanta.kanta.structs import ChangeRecord
6
+ from kanta.serialization.base import ReplayResult, replay
7
+ from kanta.serialization.framing import LineFramer
8
+ from kanta.serialization.json import JsonSerializer
9
+
10
+
11
+ def compute_diff(previous: dict, current: dict) -> dict | None:
12
+ """Compute a jsondiff patch between two dicts.
13
+
14
+ Returns None if there is no difference.
15
+ """
16
+ return jsondiff.diff(previous, current, marshal=True) or None
17
+
18
+
19
+ def _apply_diff(state: dict, diff: dict) -> dict:
20
+ """Apply a jsondiff patch manually, handling ``$replace`` and ``$delete``.
21
+
22
+ jsondiff.patch does not handle nested ``$replace`` commands when the
23
+ parent key is missing from the state. This function recursively applies
24
+ diffs, treating ``$replace`` as full replacement and ``$delete`` as
25
+ key removal.
26
+ """
27
+ if not isinstance(diff, dict):
28
+ return diff
29
+
30
+ result = dict(state) if isinstance(state, dict) else state
31
+ if not isinstance(result, dict):
32
+ result = {}
33
+
34
+ for key, value in diff.items():
35
+ if key == "$replace":
36
+ return value
37
+ elif key == "$delete":
38
+ if isinstance(value, list):
39
+ for k in value:
40
+ result.pop(k, None)
41
+ else:
42
+ result.pop(value, None)
43
+ elif isinstance(value, dict):
44
+ old = result.get(key, {})
45
+ if not isinstance(old, dict):
46
+ old = {}
47
+ result[key] = _apply_diff(old, value)
48
+ else:
49
+ result[key] = value
50
+
51
+ return result
52
+
53
+
54
+ def patch_state(state: dict, diff: dict) -> dict:
55
+ """Apply a jsondiff patch to a state dict.
56
+
57
+ The diff was produced with ``marshal=True`` (string keys like
58
+ ``"$replace"`` and ``"$delete"``) and decoded from JSON.
59
+ """
60
+ return _apply_diff(state, diff)
61
+
62
+
63
+ # Backward-compatible JSONL replay using the default serializer.
64
+ _default_serializer = JsonSerializer()
65
+ _default_framer = LineFramer()
66
+
67
+
68
+ def replay_jsonl(
69
+ data: bytes,
70
+ *,
71
+ type: type[ChangeRecord] = ChangeRecord,
72
+ ) -> ReplayResult:
73
+ """Replay database state from JSONL file data.
74
+
75
+ This is the legacy public API that hard-codes JSON/JSONL handling.
76
+ """
77
+ return replay(
78
+ data,
79
+ framer=_default_framer,
80
+ decode=_default_serializer.decode,
81
+ )
@@ -0,0 +1,61 @@
1
+ """Custom exception types for Kanta."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ class DatabaseError(ValueError):
10
+ """Exception raised for database loading errors."""
11
+
12
+ def __init__(
13
+ self,
14
+ message: str,
15
+ *,
16
+ db_path: Path | None = None,
17
+ line_number: int | None = None,
18
+ byte_pos: int | None = None,
19
+ cause_type: str | None = None,
20
+ ):
21
+ self.db_path = db_path
22
+ self.line_number = line_number
23
+ self.byte_pos = byte_pos
24
+ self.cause_type = cause_type
25
+ super().__init__(message)
26
+
27
+
28
+ class ReplayError(DatabaseError):
29
+ """Structured replay error with source location metadata."""
30
+
31
+ def __init__(
32
+ self,
33
+ message: str,
34
+ *,
35
+ line_number: int | None = None,
36
+ byte_pos: int | None = None,
37
+ record_type: str | None = None,
38
+ ):
39
+ self.record_type = record_type
40
+ super().__init__(message, line_number=line_number, byte_pos=byte_pos)
41
+
42
+
43
+ class FileLockError(DatabaseError):
44
+ """Raised when database file open/lock operations fail."""
45
+
46
+
47
+ class DataIntegrityError(RuntimeError):
48
+ """Raised when in-memory data integrity invariants are violated."""
49
+
50
+ def __init__(
51
+ self,
52
+ message: str,
53
+ *,
54
+ db_path: Path | None = None,
55
+ action: str | None = None,
56
+ diff: dict[str, Any] | None = None,
57
+ ):
58
+ self.db_path = db_path
59
+ self.action = action
60
+ self.diff = diff
61
+ super().__init__(message)