PyPI - brainlessdb - Versions diffs - 0.1.0__py3-none-any.whl - Mend

brainlessdb 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

brainless/__init__.py +57 -0
brainless/bucket.py +127 -0
brainless/client.py +150 -0
brainless/collection.py +628 -0
brainless/entity.py +572 -0
brainless/py.typed +0 -0
brainless/schema.py +164 -0
brainlessdb-0.1.0.dist-info/METADATA +454 -0
brainlessdb-0.1.0.dist-info/RECORD +11 -0
brainlessdb-0.1.0.dist-info/WHEEL +5 -0
brainlessdb-0.1.0.dist-info/top_level.txt +1 -0

brainless/collection.py ADDED Viewed

@@ -0,0 +1,628 @@
+"""Collection class for managing entities."""
+from __future__ import annotations
+import hashlib
+import logging
+import uuid as uuid_module
+from collections.abc import AsyncIterator, Iterator
+from dataclasses import asdict, is_dataclass
+from typing import TYPE_CHECKING, Any, Generic, TypeVar
+from brainless.entity import Entity, _make_tracked_instance
+from brainless.schema import Schema, infer_schema_from_dict
+if TYPE_CHECKING:
+    from brainless.bucket import Bucket
+    from brainless.client import Brainless
+_log = logging.getLogger(__name__)
+T = TypeVar("T")
+R = TypeVar("R")  # For typed() return
+def _invert(value: Any) -> Any:
+    """Invert value for descending sort."""
+    if isinstance(value, (int, float)):
+        return -value
+    if isinstance(value, str):
+        # Invert each character's ordinal
+        return [-ord(c) for c in value]
+    # Fallback - wrap in tuple that sorts reversed
+    return value
+class Collection(Generic[T]):
+    """Collection of entities backed by NATS KV bucket
+    Provides CRUD operations, dict-style access, iteration, and filtering.
+    Schema is inferred from the first add() call.
+    Can be typed for IDE support:
+        queue: Collection[QueueItem] = brainless.queue.typed(QueueItem)
+    """
+    def __init__(self, client: Brainless, name: str) -> None:
+        self._client = client
+        self._name = name
+        self._schema: Schema | None = None
+        self._entities: dict[str, Entity] = {}
+        self._dirty: set[str] = set()
+        self._deleted: set[str] = set()
+        self._bucket: Bucket | None = None
+        self._loaded = False
+        self._cast_type: type | None = None
+        # Lazy indexes: field -> {value -> {uuid, ...}}
+        self._indexes: dict[str, dict[Any, set[str]]] = {}
+    @property
+    def name(self) -> str:
+        return self._name
+    @property
+    def schema(self) -> Schema | None:
+        return self._schema
+    def typed(self, cls: type[R]) -> Collection[R]:
+        """Set dataclass type for query results
+        When set, find/filter/all/order_by/iteration return typed instances.
+        @param cls: Dataclass type to convert results to
+        @return: Self for chaining (typed as Collection[R] for IDE support)
+        """
+        self._cast_type = cls
+        return self  # type: ignore[return-value]
+    def _convert(self, entity: Entity) -> T:
+        """Convert entity to tracked cast type if set."""
+        if self._cast_type is None:
+            return entity  # type: ignore[return-value]
+        return _make_tracked_instance(self._cast_type, entity)  # type: ignore[return-value]
+    def _generate_uuid(self) -> str:
+        """Generate UUID1 with location-based node."""
+        location_hash = hashlib.sha256(self._client.location.encode()).digest()
+        node = int.from_bytes(location_hash[:6], "big")
+        return str(uuid_module.uuid1(node=node))
+    def _infer_schema(self, data: dict[str, Any]) -> None:
+        """Infer and lock schema from first entity data."""
+        self._schema = infer_schema_from_dict(self._name, data)
+        self._schema.lock()
+        _log.info(
+            "Inferred schema for '%s': %s",
+            self._name,
+            list(self._schema.fields.keys()),
+        )
+    def mark_dirty(self, entity: Entity) -> None:
+        """Mark entity for background flush."""
+        self._dirty.add(entity.uuid)
+    def _build_index(self, field: str) -> dict[Any, set[str]]:
+        """Build index for a field from all entities."""
+        index: dict[Any, set[str]] = {}
+        for uuid, entity in self._entities.items():
+            if field in entity:
+                value = entity[field]
+                if value not in index:
+                    index[value] = set()
+                index[value].add(uuid)
+        self._indexes[field] = index
+        _log.debug("Built index for '%s.%s' (%d values)", self._name, field, len(index))
+        return index
+    def _get_index(self, field: str) -> dict[Any, set[str]]:
+        """Get or build index for field."""
+        if field not in self._indexes:
+            return self._build_index(field)
+        return self._indexes[field]
+    def _index_add(self, entity: Entity) -> None:
+        """Add entity to existing indexes."""
+        for field, index in self._indexes.items():
+            if field in entity:
+                value = entity[field]
+                if value not in index:
+                    index[value] = set()
+                index[value].add(entity.uuid)
+    def _index_remove(self, entity: Entity) -> None:
+        """Remove entity from all indexes."""
+        for field, index in self._indexes.items():
+            if field in entity:
+                value = entity[field]
+                if value in index:
+                    index[value].discard(entity.uuid)
+                    if not index[value]:
+                        del index[value]
+    def on_field_change(
+        self,
+        entity: Entity,
+        field: str,
+        old_value: Any,
+        new_value: Any,
+    ) -> None:
+        """Called by Entity when a field value changes."""
+        if field not in self._indexes:
+            return
+        index = self._indexes[field]
+        # Remove from old value
+        if old_value in index:
+            index[old_value].discard(entity.uuid)
+            if not index[old_value]:
+                del index[old_value]
+        # Add to new value
+        if new_value not in index:
+            index[new_value] = set()
+        index[new_value].add(entity.uuid)
+    async def _ensure_bucket(self) -> Bucket | None:
+        """Get or create bucket for this collection."""
+        if self._bucket is None and self._client.connected:
+            self._bucket = await self._client.get_bucket(self._name)
+        return self._bucket
+    async def _ensure_loaded(self) -> None:
+        """Ensure collection is loaded from NATS."""
+        if not self._loaded:
+            await self.load()
+    async def load(self) -> int:
+        """Load all entities from NATS bucket
+        @return: Number of entities loaded
+        """
+        if self._loaded:
+            return 0
+        bucket = await self._ensure_bucket()
+        if bucket is None:
+            self._loaded = True
+            return 0
+        data = await bucket.all()
+        count = 0
+        for uuid, entity_data in data.items():
+            # Strip uuid from data if present (legacy data)
+            entity_data.pop("uuid", None)
+            # Infer schema from first record if not set
+            if self._schema is None and entity_data:
+                self._infer_schema(entity_data)
+            entity = Entity(self, uuid, entity_data)
+            self._entities[uuid] = entity
+            count += 1
+        self._loaded = True
+        if count > 0:
+            _log.info("Loaded %d entities into '%s'", count, self._name)
+        return count
+    async def flush(self) -> int:
+        """Flush dirty entities and deletions to NATS bucket
+        @return: Number of operations performed
+        """
+        bucket = await self._ensure_bucket()
+        ops = 0
+        # Flush dirty entities
+        if self._dirty:
+            to_flush = list(self._dirty)
+            for uuid in to_flush:
+                entity = self._entities.get(uuid)
+                if entity is None:
+                    self._dirty.discard(uuid)
+                    continue
+                if bucket is not None:
+                    await bucket.put(uuid, entity.data)
+                entity.mark_clean()
+                self._dirty.discard(uuid)
+                ops += 1
+        # Flush deletions
+        if self._deleted:
+            to_delete = list(self._deleted)
+            for uuid in to_delete:
+                if bucket is not None:
+                    await bucket.delete(uuid)
+                self._deleted.discard(uuid)
+                ops += 1
+        if ops > 0:
+            _log.debug("Flushed %d operations from '%s'", ops, self._name)
+        return ops
+    def add(
+        self,
+        data: dict[str, Any] | Any | None = None,
+        **kwargs: Any,
+    ) -> T:
+        """Add new entity to collection
+        Accepts dict, dataclass instance, or keyword arguments.
+        Schema is inferred from the first add() call.
+        If a dataclass with a uuid field is passed, the generated uuid
+        is set on the original object for convenience.
+        @param data: Dictionary or dataclass instance
+        @param kwargs: Field values as keyword arguments
+        @return: Created entity (typed if typed() set)
+        """
+        input_dataclass = None
+        # Normalize input to dict
+        if data is None:
+            entity_data = kwargs
+        elif is_dataclass(data) and not isinstance(data, type):
+            input_dataclass = data
+            entity_data = {**asdict(data), **kwargs}
+        elif isinstance(data, dict):
+            entity_data = {**data, **kwargs}
+        else:
+            raise TypeError(f"Expected dict or dataclass, got {type(data).__name__}")
+        if not entity_data:
+            raise ValueError("Cannot add empty entity")
+        # Generate uuid
+        entity_uuid = self._generate_uuid()
+        # Set uuid on input dataclass if it has a uuid field
+        if input_dataclass is not None and hasattr(input_dataclass, "uuid"):
+            input_dataclass.uuid = entity_uuid
+        # Remove uuid from data (stored as key, not in value)
+        entity_data.pop("uuid", None)
+        # Infer schema on first add (after uuid removed)
+        if self._schema is None:
+            self._infer_schema(entity_data)
+        # Validate and apply defaults
+        entity_data = self._schema.apply_defaults(entity_data)
+        self._schema.validate(entity_data)
+        # Create entity
+        entity = Entity(self, entity_uuid, entity_data)
+        self._entities[entity_uuid] = entity
+        # Maintain indexes
+        self._index_add(entity)
+        # Mark for persistence
+        entity.mark_dirty()
+        return self._convert(entity)
+    async def get(self, uuid: str) -> Entity | None:
+        """Get entity by UUID
+        @param uuid: Entity UUID
+        @return: Entity or None if not found
+        """
+        await self._ensure_loaded()
+        return self._entities.get(uuid)
+    def delete(self, entity: str | Entity | Any) -> bool:
+        """Delete entity by UUID, Entity, or object with uuid attribute
+        Removal from NATS bucket happens on next flush.
+        @param entity: Entity UUID string, Entity instance, or object with uuid attr
+        @return: True if deleted, False if not found
+        """
+        if isinstance(entity, str):
+            uuid = entity
+        elif hasattr(entity, "uuid"):
+            uuid = entity.uuid
+        else:
+            raise TypeError(f"Cannot get uuid from {type(entity).__name__}")
+        existing = self._entities.get(uuid)
+        if existing is None:
+            return False
+        # Remove from indexes
+        self._index_remove(existing)
+        del self._entities[uuid]
+        self._dirty.discard(uuid)
+        self._deleted.add(uuid)
+        return True
+    def _get_nested_value(self, entity: Entity, key: str, expected: Any = None) -> tuple[bool, Any]:
+        """Get value from entity, supporting nested access via __
+        When a list or dict is encountered, checks if ANY item matches.
+        If expected is provided, returns True if any path equals expected.
+        @param entity: Entity to get value from
+        @param key: Field name, supports __ for nested access
+        @param expected: If provided, check if any path matches this value
+        @return: (found, value) tuple
+        """
+        parts = key.split("__")
+        return self._traverse_path(entity, parts, expected)
+    def _traverse_path(self, value: Any, parts: list[str], expected: Any = None) -> tuple[bool, Any]:
+        """Recursively traverse a path through nested structures.
+        Handles dicts, Entities, and lists/dicts (checks if ANY item matches).
+        If expected is provided, checks if any terminal value equals expected.
+        """
+        if not parts:
+            # Terminal - check expected if provided
+            if expected is not None:
+                return value == expected, value
+            return True, value
+        part = parts[0]
+        remaining = parts[1:]
+        if isinstance(value, Entity):
+            if part not in value:
+                return False, None
+            return self._traverse_path(value[part], remaining, expected)
+        elif isinstance(value, dict):
+            if part in value:
+                return self._traverse_path(value[part], remaining, expected)
+            # part not in value - try iterating values (for dict[K, Dataclass] pattern)
+            for item in value.values():
+                found, result = self._traverse_path(item, [part] + remaining, expected)
+                if found:
+                    return True, result
+            return False, None
+        elif isinstance(value, list):
+            # For lists, check if ANY item matches the remaining path
+            for item in value:
+                found, result = self._traverse_path(item, [part] + remaining, expected)
+                if found:
+                    return True, result
+            return False, None
+        return False, None
+    def _matches(self, entity: Entity, criteria: dict[str, Any]) -> bool:
+        """Check if entity matches all criteria."""
+        for key, expected in criteria.items():
+            found, _ = self._get_nested_value(entity, key, expected)
+            if not found:
+                return False
+        return True
+    async def filter(self, **criteria: Any) -> list[T]:
+        """Filter entities by field values
+        Supports nested access via double underscore:
+            filter(caller__city="Prague")
+        Uses indexes for O(1) lookup on top-level fields.
+        @param criteria: Field-value pairs to match
+        @return: List of matching entities (typed if typed() set)
+        """
+        await self._ensure_loaded()
+        if not criteria:
+            return [self._convert(e) for e in self._entities.values()]
+        # Try indexed lookup for first top-level field
+        candidates: set[str] | None = None
+        remaining_criteria: dict[str, Any] = {}
+        for key, value in criteria.items():
+            if "__" not in key:
+                # Top-level field - use index
+                index = self._get_index(key)
+                uuids = index.get(value, set())
+                if candidates is None:
+                    candidates = uuids.copy()
+                else:
+                    candidates &= uuids
+                # Early exit if no matches
+                if not candidates:
+                    return []
+            else:
+                remaining_criteria[key] = value
+        # If we have indexed candidates, filter those
+        if candidates is not None:
+            entities = [self._entities[u] for u in candidates if u in self._entities]
+            if remaining_criteria:
+                return [self._convert(e) for e in entities if self._matches(e, remaining_criteria)]
+            return [self._convert(e) for e in entities]
+        # No indexable fields, full scan
+        return [self._convert(e) for e in self._entities.values() if self._matches(e, criteria)]
+    async def find(self, **criteria: Any) -> T | None:
+        """Find first entity matching criteria
+        Supports nested access via double underscore:
+            find(caller__city="Prague")
+        Uses indexes for O(1) lookup on top-level fields.
+        @param criteria: Field-value pairs to match
+        @return: First matching entity (typed if typed() set) or None
+        """
+        await self._ensure_loaded()
+        if not criteria:
+            # Return first entity if any
+            for entity in self._entities.values():
+                return self._convert(entity)
+            return None
+        # Try indexed lookup for first top-level field
+        candidates: set[str] | None = None
+        remaining_criteria: dict[str, Any] = {}
+        for key, value in criteria.items():
+            if "__" not in key:
+                # Top-level field - use index
+                index = self._get_index(key)
+                uuids = index.get(value, set())
+                if candidates is None:
+                    candidates = uuids.copy()
+                else:
+                    candidates &= uuids
+                # Early exit if no matches
+                if not candidates:
+                    return None
+            else:
+                remaining_criteria[key] = value
+        # If we have indexed candidates, search those
+        if candidates is not None:
+            for uuid in candidates:
+                entity = self._entities.get(uuid)
+                if entity is None:
+                    continue
+                if remaining_criteria:
+                    if self._matches(entity, remaining_criteria):
+                        return self._convert(entity)
+                else:
+                    return self._convert(entity)
+            return None
+        # No indexable fields, full scan
+        for entity in self._entities.values():
+            if self._matches(entity, criteria):
+                return self._convert(entity)
+        return None
+    async def all(self) -> list[T]:
+        """Get all entities in collection."""
+        await self._ensure_loaded()
+        return [self._convert(e) for e in self._entities.values()]
+    async def order_by(self, *keys: str, **criteria: Any) -> list[T]:
+        """Get entities sorted by key(s), optionally filtered
+        Use minus prefix for descending order: order_by("-created_at")
+        Multiple keys are applied in order: order_by("priority", "-created_at")
+        @param keys: Field names to sort by (prefix with - for descending)
+        @param criteria: Filter criteria (same as filter())
+        @return: Sorted list of entities (typed if typed() set)
+        """
+        await self._ensure_loaded()
+        # Get raw entities for sorting (filter without conversion)
+        if criteria:
+            candidates: set[str] | None = None
+            remaining_criteria: dict[str, Any] = {}
+            for key, value in criteria.items():
+                if "__" not in key:
+                    index = self._get_index(key)
+                    uuids = index.get(value, set())
+                    if candidates is None:
+                        candidates = uuids.copy()
+                    else:
+                        candidates &= uuids
+                    if not candidates:
+                        return []
+                else:
+                    remaining_criteria[key] = value
+            if candidates is not None:
+                entities = [self._entities[u] for u in candidates if u in self._entities]
+                if remaining_criteria:
+                    entities = [e for e in entities if self._matches(e, remaining_criteria)]
+            else:
+                entities = [e for e in self._entities.values() if self._matches(e, criteria)]
+        else:
+            entities = list(self._entities.values())
+        if not keys:
+            return [self._convert(e) for e in entities]
+        def sort_key(entity: Entity) -> tuple:
+            result = []
+            for key in keys:
+                descending = key.startswith("-")
+                field = key[1:] if descending else key
+                found, value = self._get_nested_value(entity, field)
+                if not found:
+                    value = None
+                # None values sort last regardless of direction
+                if value is None:
+                    result.append((1, None))
+                elif descending:
+                    result.append((0, _invert(value)))
+                else:
+                    result.append((0, value))
+            return tuple(result)
+        return [self._convert(e) for e in sorted(entities, key=sort_key)]
+    def count(self) -> int:
+        """Return number of entities in collection."""
+        return len(self._entities)
+    def clear(self) -> None:
+        """Remove all entities from collection (in-memory only)."""
+        self._entities.clear()
+        self._dirty.clear()
+        self._indexes.clear()
+    def __getitem__(self, uuid: str) -> Entity:
+        """Dict-style access by UUID."""
+        entity = self._entities.get(uuid)
+        if entity is None:
+            raise KeyError(uuid)
+        return entity
+    def __delitem__(self, entity: str | Entity | Any) -> None:
+        """Dict-style deletion."""
+        if not self.delete(entity):
+            if isinstance(entity, str):
+                uuid = entity
+            elif hasattr(entity, "uuid"):
+                uuid = entity.uuid
+            else:
+                uuid = str(entity)
+            raise KeyError(uuid)
+    def __contains__(self, entity: str | Entity | Any) -> bool:
+        """Check if entity exists in collection."""
+        if isinstance(entity, str):
+            uuid = entity
+        elif hasattr(entity, "uuid"):
+            uuid = entity.uuid
+        else:
+            return False
+        return uuid in self._entities
+    def __iter__(self) -> Iterator[T]:
+        """Iterate over all entities (converted if typed() set)."""
+        for entity in self._entities.values():
+            yield self._convert(entity)
+    async def __aiter__(self) -> AsyncIterator[T]:
+        """Async iterate over all entities (loads if needed)."""
+        await self._ensure_loaded()
+        for entity in self._entities.values():
+            yield self._convert(entity)
+    def __len__(self) -> int:
+        """Return number of entities."""
+        return len(self._entities)
+    def __repr__(self) -> str:
+        return f"<Collection '{self._name}' ({len(self._entities)} entities)>"