PyPI - kanta - Versions diffs - 0.1.0__tar.gz - Mend

kanta 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

kanta-0.1.0/.gitignore +14 -0
kanta-0.1.0/PKG-INFO +91 -0
kanta-0.1.0/README.md +76 -0
kanta-0.1.0/__init__.py +16 -0
kanta-0.1.0/dist/.gitignore +1 -0
kanta-0.1.0/docs/database.md +116 -0
kanta-0.1.0/kanta/__init__.py +26 -0
kanta-0.1.0/kanta/diff.py +81 -0
kanta-0.1.0/kanta/exceptions.py +61 -0
kanta-0.1.0/kanta/filelock.py +274 -0
kanta-0.1.0/kanta/kanta.py +206 -0
kanta-0.1.0/kanta/kantaimpl.py +137 -0
kanta-0.1.0/kanta/logging.py +289 -0
kanta-0.1.0/kanta/migrate.py +117 -0
kanta-0.1.0/kanta/persistence.py +215 -0
kanta-0.1.0/kanta/serialization/__init__.py +59 -0
kanta-0.1.0/kanta/serialization/base.py +158 -0
kanta-0.1.0/kanta/serialization/framing.py +304 -0
kanta-0.1.0/kanta/serialization/json.py +23 -0
kanta-0.1.0/kanta/serialization/msgpack.py +23 -0
kanta-0.1.0/kanta/snapshot.py +65 -0
kanta-0.1.0/kanta/structs.py +35 -0
kanta-0.1.0/kanta/transaction.py +76 -0
kanta-0.1.0/main.py +6 -0
kanta-0.1.0/pyproject.toml +42 -0
kanta-0.1.0/tests/__init__.py +0 -0
kanta-0.1.0/tests/conftest.py +14 -0
kanta-0.1.0/tests/support.py +82 -0
kanta-0.1.0/tests/test_diff.py +16 -0
kanta-0.1.0/tests/test_format_diff.py +25 -0
kanta-0.1.0/tests/test_framing.py +58 -0
kanta-0.1.0/tests/test_kanta_integration.py +391 -0
kanta-0.1.0/tests/test_logging.py +17 -0
kanta-0.1.0/tests/test_migrations.py +53 -0
kanta-0.1.0/tests/test_replay.py +34 -0
kanta-0.1.0/tests/test_snapshot.py +34 -0

kanta-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,14 @@
+# Python cache/artifacts
+__pycache__/
+*.py[cod]
+*.so
+# Hidden things
+.*
+!.gitignore
+# Build/distribution artifacts
+build/
+dist/
+*.egg-info/
+*.lock

kanta-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,91 @@
+Metadata-Version: 2.4
+Name: kanta
+Version: 0.1.0
+Summary: Kanta No-SQL database for async Python and frameworks such as FastAPI and Sanic
+Project-URL: Repository, https://git.zi.fi/LeoVasanko/kanta
+Author: Leo Vasanko
+Keywords: kantadb
+Requires-Python: >=3.11
+Requires-Dist: blake3>=1.0.8
+Requires-Dist: jsondiff>=2.2.1
+Requires-Dist: msgspec>=0.20.0
+Provides-Extra: bin
+Requires-Dist: blake3>=1.0.8; extra == 'bin'
+Description-Content-Type: text/markdown
+# Kanta database
+Kanta is a small embedded NoSQL store for async Python apps. It keeps live state in memory, writes transactional diffs to an append-only log, and supports versioned schema migrations.
+## Why Kanta
+- Fast synchronous reads and modifications on native Python objects
+- Durable writes with append-only file and periodic snapshots
+- Transaction semantics with rollback on failure
+- Explicit schema evolution via `migrate_vN` functions
+- Line-based JSON or binary MessagePack, or bring your own serializer
+- Even in JSON we can use non-string keys, bytes, datetimes, UUID and other types
+This design is often preferable when you want low-latency local persistence without operating a separate database service. You get straightforward deployment, auditable history, and deterministic replay while keeping application state ergonomic to work with.
+Queries and updates on native data are far faster than over an SQL server connection, and we can can provide fully synchronous operation. The limitation is that you can only use the same database within a single process at a time, but this is well suited for async programming.
+## Quick Start
+```python
+import asyncio
+import msgspec
+from uuid import UUID, uuid7
+from kanta import Kanta
+class User(msgspec.Struct):
+    name: str = ""
+    email: str | None = None
+class Data(msgspec.Struct):
+    users: dict[UUID, User] = {}
+async def main() -> None:
+    async with Kanta("data.kantadb", Data()) as kanta:
+        user_id = uuid7()
+        with kanta.transaction(action="create_user") as data:
+            data.users[user_id] = User(name="Alice")
+asyncio.run(main())
+```
+## Core Concepts
+1. Define your schema as a `msgspec.Struct` root object.
+2. Mutate data inside `with kanta.transaction(...):`.
+3. Let Kanta flush queued changes to disk in the background.
+4. Use snapshots and replay for fast startup and full history.
+## Migrations
+Adding or removing a field and other such simple operations are automatic, but when the time comes to really change your data model, implement a `migrate_v1` function that converts your old data to the new form. This works on plain built-in dict and other types, to avoid needing to preserve old versions of your structs.
+Pass a module (or import path) containing `migrate_vN` functions:
+```python
+kanta = Kanta("data.kantadb", Data(), migrations="myapp.migrations")
+await kanta.open()
+```
+Kanta tracks migration version metadata automatically, and fast forwards your database to current version by running all the migrations needed while opening the database.
+## On-Disk Format
+Kanta in JSON mode (default) stores newline-delimited records. Transaction history is viewable by any simple text editor, and rollbacks to prior state are done by simply removing final lines (one per transaction)
+MsgPack mode uses binary records with length and checksum to avoid data corruption.
+- Change line: JSON object with metadata + `diff`
+- Snapshot line: `SNAPSHOT { ... full state ... }`
+See `docs/database.md` for format details and invariants.

kanta-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,76 @@
+# Kanta database
+Kanta is a small embedded NoSQL store for async Python apps. It keeps live state in memory, writes transactional diffs to an append-only log, and supports versioned schema migrations.
+## Why Kanta
+- Fast synchronous reads and modifications on native Python objects
+- Durable writes with append-only file and periodic snapshots
+- Transaction semantics with rollback on failure
+- Explicit schema evolution via `migrate_vN` functions
+- Line-based JSON or binary MessagePack, or bring your own serializer
+- Even in JSON we can use non-string keys, bytes, datetimes, UUID and other types
+This design is often preferable when you want low-latency local persistence without operating a separate database service. You get straightforward deployment, auditable history, and deterministic replay while keeping application state ergonomic to work with.
+Queries and updates on native data are far faster than over an SQL server connection, and we can can provide fully synchronous operation. The limitation is that you can only use the same database within a single process at a time, but this is well suited for async programming.
+## Quick Start
+```python
+import asyncio
+import msgspec
+from uuid import UUID, uuid7
+from kanta import Kanta
+class User(msgspec.Struct):
+    name: str = ""
+    email: str | None = None
+class Data(msgspec.Struct):
+    users: dict[UUID, User] = {}
+async def main() -> None:
+    async with Kanta("data.kantadb", Data()) as kanta:
+        user_id = uuid7()
+        with kanta.transaction(action="create_user") as data:
+            data.users[user_id] = User(name="Alice")
+asyncio.run(main())
+```
+## Core Concepts
+1. Define your schema as a `msgspec.Struct` root object.
+2. Mutate data inside `with kanta.transaction(...):`.
+3. Let Kanta flush queued changes to disk in the background.
+4. Use snapshots and replay for fast startup and full history.
+## Migrations
+Adding or removing a field and other such simple operations are automatic, but when the time comes to really change your data model, implement a `migrate_v1` function that converts your old data to the new form. This works on plain built-in dict and other types, to avoid needing to preserve old versions of your structs.
+Pass a module (or import path) containing `migrate_vN` functions:
+```python
+kanta = Kanta("data.kantadb", Data(), migrations="myapp.migrations")
+await kanta.open()
+```
+Kanta tracks migration version metadata automatically, and fast forwards your database to current version by running all the migrations needed while opening the database.
+## On-Disk Format
+Kanta in JSON mode (default) stores newline-delimited records. Transaction history is viewable by any simple text editor, and rollbacks to prior state are done by simply removing final lines (one per transaction)
+MsgPack mode uses binary records with length and checksum to avoid data corruption.
+- Change line: JSON object with metadata + `diff`
+- Snapshot line: `SNAPSHOT { ... full state ... }`
+See `docs/database.md` for format details and invariants.

kanta-0.1.0/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Project-root shim package for local development layout.
+This forwards imports to the inner `kanta/` package directory so
+`from kanta import ...` works when running tests from the workspace root.
+"""
+import importlib
+from pathlib import Path
+_inner_pkg = Path(__file__).with_name("kanta")
+if str(_inner_pkg) not in __path__:
+    __path__.append(str(_inner_pkg))
+_pkg = importlib.import_module(".kanta", __name__)
+__all__ = list(getattr(_pkg, "__all__", ()))
+globals().update({name: getattr(_pkg, name) for name in __all__})

kanta-0.1.0/dist/.gitignore ADDED Viewed

	@@ -0,0 +1 @@
1	+ *

kanta-0.1.0/docs/database.md ADDED Viewed

@@ -0,0 +1,116 @@
+# Kanta Database Format and Design Principles
+This document describes the on-disk format and design principles of Kanta.
+It is intentionally focused on the current standalone package behavior.
+## Core Principles
+1. Append-only durability
+- State changes are persisted as appended JSON lines.
+- Existing lines are never edited in place.
+2. Differential persistence
+- Kanta stores diffs (patches), not full state, for normal writes.
+- This keeps write volume small and preserves a clear change history.
+3. Deterministic replay
+- Current state is reconstructed by replaying log records in order.
+- Snapshot records accelerate replay while preserving deterministic results.
+4. Transactional in-memory writes
+- Application code mutates in-memory data inside `kanta.transaction(...)`.
+- On success, Kanta computes and queues a diff record.
+- On failure, in-memory data is rolled back.
+5. Explicit schema evolution
+- Schema migration functions are versioned (`migrate_vN`).
+- Migrations run at open time and advance the stored version.
+## On-Disk Record Types
+Kanta uses a newline-delimited stream where each line is either a change
+record or a snapshot record.
+### Change record
+One JSON object per line:
+```json
+{"ts":"2026-06-10T02:55:00Z","a":"update","v":5,"u":"user-id","m":"2026-06-10T02:55:00Z","diff":{"users":{"alice":{"age":31}}}}
+```
+Fields:
+- `ts`: UTC timestamp of the record.
+- `a`: action name.
+- `v`: schema version after this change.
+- `u`: optional actor identifier.
+- `m`: optional domain modification timestamp.
+- `diff`: jsondiff patch payload.
+### Snapshot record
+Snapshot lines are prefixed with `SNAPSHOT `, followed by JSON:
+```text
+SNAPSHOT {"ts":"2026-06-10T00:00:00Z","v":5,"state":{"users":{}},"m":"2026-06-10T00:00:00Z"}
+```
+Fields:
+- `ts`: snapshot creation time.
+- `v`: schema version represented by the snapshot.
+- `state`: full state dictionary.
+- `m`: optional domain modification timestamp.
+## Replay Model
+1. Find the last snapshot in the file, if present.
+2. Initialize replay state from snapshot state (or `{}` if none).
+3. Replay subsequent change records in order using patch application.
+4. The final replay state becomes in-memory `kanta.data`.
+This model provides fast startup for large logs while retaining append-only
+history.
+## Serialization Semantics
+- In-memory data is defined by an application `msgspec.Struct` type.
+- Kanta round-trips through plain builtins for persistence and diffing.
+- Dict keys are serialized as strings (`str_keys=True`) for stable JSON form.
+- Normalization changes introduced by struct decode/encode are logged as
+  `migrate:msgspec` when they produce a diff.
+## Transaction Semantics
+- `kanta.transaction(action=...)` captures a pre-transaction snapshot dict.
+- On success:
+  - compute diff between previous builtins and current builtins,
+  - queue a `ChangeRecord` if non-empty.
+- On exception:
+  - restore in-memory data from snapshot,
+  - re-raise the exception.
+Nested transactions are rejected.
+## Flush and Lifecycle
+- Writes are queued in memory.
+- `kanta.flush()` appends queued records to disk.
+- A background async task can flush periodically.
+- `kanta.close()` performs final flush and releases file resources.
+- `async with Kanta(...)` guarantees open/close lifecycle management.
+## Migrations
+- Migration source is configured on `Kanta(...)` via `migrations=`.
+- Accepted values:
+  - imported module object,
+  - import path string.
+- Migrations mutate replayed dict state in-place and return the new version.
+## Safety Invariants
+- Any detected out-of-transaction mutation is treated as a fatal consistency
+  violation.
+- Flush failures mark the instance as failed and trigger shutdown behavior.
+- Object identity of `kanta.data` is preserved across rollback when possible,
+  minimizing stale-reference hazards for callers.

kanta-0.1.0/kanta/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+from .diff import compute_diff
+from .diff import replay_jsonl as replay
+from .exceptions import DatabaseError, DataIntegrityError, FileLockError, ReplayError
+from .filelock import LockedFile
+from .kanta import Kanta
+from .logging import configure_logging, format_diff, log_change
+from .serialization import JsonSerializer, MsgPackSerializer
+from .structs import ChangeRecord, Snapshot
+__all__ = [
+    "ChangeRecord",
+    "compute_diff",
+    "configure_logging",
+    "DataIntegrityError",
+    "DatabaseError",
+    "FileLockError",
+    "format_diff",
+    "JsonSerializer",
+    "Kanta",
+    "LockedFile",
+    "log_change",
+    "MsgPackSerializer",
+    "ReplayError",
+    "replay",
+    "Snapshot",
+]

kanta-0.1.0/kanta/diff.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Diff computation and replay utilities."""
+import jsondiff
+from kanta.kanta.structs import ChangeRecord
+from kanta.serialization.base import ReplayResult, replay
+from kanta.serialization.framing import LineFramer
+from kanta.serialization.json import JsonSerializer
+def compute_diff(previous: dict, current: dict) -> dict | None:
+    """Compute a jsondiff patch between two dicts.
+    Returns None if there is no difference.
+    """
+    return jsondiff.diff(previous, current, marshal=True) or None
+def _apply_diff(state: dict, diff: dict) -> dict:
+    """Apply a jsondiff patch manually, handling ``$replace`` and ``$delete``.
+    jsondiff.patch does not handle nested ``$replace`` commands when the
+    parent key is missing from the state.  This function recursively applies
+    diffs, treating ``$replace`` as full replacement and ``$delete`` as
+    key removal.
+    """
+    if not isinstance(diff, dict):
+        return diff
+    result = dict(state) if isinstance(state, dict) else state
+    if not isinstance(result, dict):
+        result = {}
+    for key, value in diff.items():
+        if key == "$replace":
+            return value
+        elif key == "$delete":
+            if isinstance(value, list):
+                for k in value:
+                    result.pop(k, None)
+            else:
+                result.pop(value, None)
+        elif isinstance(value, dict):
+            old = result.get(key, {})
+            if not isinstance(old, dict):
+                old = {}
+            result[key] = _apply_diff(old, value)
+        else:
+            result[key] = value
+    return result
+def patch_state(state: dict, diff: dict) -> dict:
+    """Apply a jsondiff patch to a state dict.
+    The diff was produced with ``marshal=True`` (string keys like
+    ``"$replace"`` and ``"$delete"``) and decoded from JSON.
+    """
+    return _apply_diff(state, diff)
+# Backward-compatible JSONL replay using the default serializer.
+_default_serializer = JsonSerializer()
+_default_framer = LineFramer()
+def replay_jsonl(
+    data: bytes,
+    *,
+    type: type[ChangeRecord] = ChangeRecord,
+) -> ReplayResult:
+    """Replay database state from JSONL file data.
+    This is the legacy public API that hard-codes JSON/JSONL handling.
+    """
+    return replay(
+        data,
+        framer=_default_framer,
+        decode=_default_serializer.decode,
+    )

kanta-0.1.0/kanta/exceptions.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""Custom exception types for Kanta."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+class DatabaseError(ValueError):
+    """Exception raised for database loading errors."""
+    def __init__(
+        self,
+        message: str,
+        *,
+        db_path: Path | None = None,
+        line_number: int | None = None,
+        byte_pos: int | None = None,
+        cause_type: str | None = None,
+    ):
+        self.db_path = db_path
+        self.line_number = line_number
+        self.byte_pos = byte_pos
+        self.cause_type = cause_type
+        super().__init__(message)
+class ReplayError(DatabaseError):
+    """Structured replay error with source location metadata."""
+    def __init__(
+        self,
+        message: str,
+        *,
+        line_number: int | None = None,
+        byte_pos: int | None = None,
+        record_type: str | None = None,
+    ):
+        self.record_type = record_type
+        super().__init__(message, line_number=line_number, byte_pos=byte_pos)
+class FileLockError(DatabaseError):
+    """Raised when database file open/lock operations fail."""
+class DataIntegrityError(RuntimeError):
+    """Raised when in-memory data integrity invariants are violated."""
+    def __init__(
+        self,
+        message: str,
+        *,
+        db_path: Path | None = None,
+        action: str | None = None,
+        diff: dict[str, Any] | None = None,
+    ):
+        self.db_path = db_path
+        self.action = action
+        self.diff = diff
+        super().__init__(message)