PyPI - scruby - Versions diffs - 0.17.0__py3-none-any.whl → 0.27.2__py3-none-any.whl - Mend

scruby 0.17.0py3-none-any.whl → 0.27.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

scruby/__init__.py +17 -9
scruby/aggregation.py +4 -0
scruby/db.py +100 -643
scruby/errors.py +8 -2
scruby/mixins/__init__.py +25 -0
scruby/mixins/collection.py +50 -0
scruby/mixins/count.py +62 -0
scruby/mixins/custom_task.py +75 -0
scruby/mixins/delete.py +96 -0
scruby/mixins/docs.py +168 -0
scruby/mixins/find.py +149 -0
scruby/mixins/update.py +99 -0
scruby/settings.py +44 -0
{scruby-0.17.0.dist-info → scruby-0.27.2.dist-info}/METADATA +114 -96
scruby-0.27.2.dist-info/RECORD +18 -0
{scruby-0.17.0.dist-info → scruby-0.27.2.dist-info}/WHEEL +1 -1
scruby/constants.py +0 -31
scruby-0.17.0.dist-info/RECORD +0 -10
{scruby-0.17.0.dist-info → scruby-0.27.2.dist-info}/licenses/LICENSE +0 -0

scruby/errors.py CHANGED Viewed

@@ -1,3 +1,7 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
 """Scruby Exceptions."""
 from __future__ import annotations
@@ -5,6 +9,8 @@ from __future__ import annotations
 __all__ = (
     "ScrubyException",
     "MetadataValueError",
+    "KeyAlreadyExistsError",
+    "KeyNotExistsError",
 )
@@ -26,7 +32,7 @@ class MetadataValueError(ScrubyException):
 class KeyAlreadyExistsError(ScrubyException):
     """Exception is raised if the key already exists."""
-    def __init__(self) -> None:
+    def __init__(self) -> None:  # noqa: D107
         self.message = "The key already exists."
         super().__init__(self.message)
@@ -34,6 +40,6 @@ class KeyAlreadyExistsError(ScrubyException):
 class KeyNotExistsError(ScrubyException):
     """Exception is raised If the key is not exists."""
-    def __init__(self) -> None:
+    def __init__(self) -> None:  # noqa: D107
         self.message = "The key not exists."
         super().__init__(self.message)

scruby/mixins/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Mixins."""
+from __future__ import annotations
+__all__ = (
+    "Collection",
+    "Count",
+    "CustomTask",
+    "Delete",
+    "Find",
+    "Keys",
+    "Update",
+)
+from scruby.mixins.collection import Collection
+from scruby.mixins.count import Count
+from scruby.mixins.custom_task import CustomTask
+from scruby.mixins.delete import Delete
+from scruby.mixins.docs import Keys
+from scruby.mixins.find import Find
+from scruby.mixins.update import Update

scruby/mixins/collection.py ADDED Viewed

@@ -0,0 +1,50 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Methods for working with collections."""
+from __future__ import annotations
+__all__ = ("Collection",)
+from shutil import rmtree
+from anyio import Path, to_thread
+from scruby import settings
+class Collection:
+    """Methods for working with collections."""
+    def collection_name(self) -> str:
+        """Get collection name.
+        Returns:
+            Collection name.
+        """
+        return self._class_model.__name__
+    @staticmethod
+    async def collection_list() -> list[str]:
+        """Get collection list."""
+        target_directory = Path(settings.DB_ROOT)
+        # Get all entries in the directory
+        all_entries = Path.iterdir(target_directory)
+        directory_names: list[str] = [entry.name async for entry in all_entries]
+        return directory_names
+    @staticmethod
+    async def delete_collection(name: str) -> None:
+        """Asynchronous method for deleting a collection by its name.
+        Args:
+            name (str): Collection name.
+        Returns:
+            None.
+        """
+        target_directory = f"{settings.DB_ROOT}/{name}"
+        await to_thread.run_sync(rmtree, target_directory)  # pyrefly: ignore[bad-argument-type]
+        return

scruby/mixins/count.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Methods for counting the number of documents."""
+from __future__ import annotations
+__all__ = ("Count",)
+import concurrent.futures
+from collections.abc import Callable
+from typing import Any
+class Count:
+    """Methods for counting the number of documents."""
+    async def estimated_document_count(self) -> int:
+        """Get an estimate of the number of documents in this collection using collection metadata.
+        Returns:
+            The number of documents.
+        """
+        meta = await self.get_meta()
+        return meta.counter_documents
+    async def count_documents(
+        self,
+        filter_fn: Callable,
+    ) -> int:
+        """Count the number of documents a matching the filter in this collection.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn: A function that execute the conditions of filtering.
+        Returns:
+            The number of documents.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        search_task_fn: Callable = self._task_find
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        counter: int = 0
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                future = executor.submit(
+                    search_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                )
+                if await future.result() is not None:
+                    counter += 1
+        return counter

scruby/mixins/custom_task.py ADDED Viewed

@@ -0,0 +1,75 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Quantum methods for running custom tasks."""
+from __future__ import annotations
+__all__ = ("CustomTask",)
+from collections.abc import Callable
+from typing import Any
+import orjson
+from anyio import Path
+class CustomTask:
+    """Quantum methods for running custom tasks."""
+    @staticmethod
+    async def _task_get_docs(
+        branch_number: int,
+        hash_reduce_left: int,
+        db_root: str,
+        class_model: Any,
+    ) -> list[Any]:
+        """Get documents for custom task.
+        This method is for internal use.
+        Returns:
+            List of documents.
+        """
+        branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
+        separated_hash: str = "/".join(list(branch_number_as_hash))
+        leaf_path: Path = Path(
+            *(
+                db_root,
+                class_model.__name__,
+                separated_hash,
+                "leaf.json",
+            ),
+        )
+        docs: list[Any] = []
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict[str, str] = orjson.loads(data_json) or {}
+            for _, val in data.items():
+                docs.append(class_model.model_validate_json(val))
+        return docs
+    async def run_custom_task(self, custom_task_fn: Callable, limit_docs: int = 1000) -> Any:
+        """Running custom task.
+        This method running a task created on the basis of a quantum loop.
+        Effectiveness running task depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            custom_task_fn: A function that execute the custom task.
+            limit_docs: Limiting the number of documents. By default = 1000.
+        Returns:
+            The result of a custom task.
+        """
+        kwargs = {
+            "get_docs_fn": self._task_get_docs,
+            "branch_numbers": range(1, self._max_branch_number),
+            "hash_reduce_left": self._hash_reduce_left,
+            "db_root": self._db_root,
+            "class_model": self._class_model,
+            "limit_docs": limit_docs,
+        }
+        return await custom_task_fn(**kwargs)

scruby/mixins/delete.py ADDED Viewed

@@ -0,0 +1,96 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Methods for deleting documents."""
+from __future__ import annotations
+__all__ = ("Delete",)
+import concurrent.futures
+from collections.abc import Callable
+from typing import Any
+import orjson
+from anyio import Path
+class Delete:
+    """Methods for deleting documents."""
+    @staticmethod
+    async def _task_delete(
+        branch_number: int,
+        filter_fn: Callable,
+        hash_reduce_left: int,
+        db_root: str,
+        class_model: Any,
+    ) -> int:
+        """Task for find and delete documents.
+        This method is for internal use.
+        Returns:
+            The number of deleted documents.
+        """
+        branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
+        separated_hash: str = "/".join(list(branch_number_as_hash))
+        leaf_path: Path = Path(
+            *(
+                db_root,
+                class_model.__name__,
+                separated_hash,
+                "leaf.json",
+            ),
+        )
+        counter: int = 0
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict[str, str] = orjson.loads(data_json) or {}
+            new_state: dict[str, str] = {}
+            for key, val in data.items():
+                doc = class_model.model_validate_json(val)
+                if filter_fn(doc):
+                    counter -= 1
+                else:
+                    new_state[key] = val
+            await leaf_path.write_bytes(orjson.dumps(new_state))
+        return counter
+    async def delete_many(
+        self,
+        filter_fn: Callable,
+    ) -> int:
+        """Delete one or more documents matching the filter.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn: A function that execute the conditions of filtering.
+        Returns:
+            The number of deleted documents.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        search_task_fn: Callable = self._task_delete
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        counter: int = 0
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                future = executor.submit(
+                    search_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                )
+                counter += await future.result()
+        if counter < 0:
+            await self._counter_documents(counter)
+        return abs(counter)

scruby/mixins/docs.py ADDED Viewed

@@ -0,0 +1,168 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Methods for working with keys."""
+from __future__ import annotations
+__all__ = ("Keys",)
+import logging
+from typing import Any
+import orjson
+from scruby.errors import (
+    KeyAlreadyExistsError,
+    KeyNotExistsError,
+)
+class Keys:
+    """Methods for working with keys."""
+    async def add_doc(self, doc: Any) -> None:
+        """Asynchronous method for adding document to collection.
+        Args:
+            doc: Value of key. Type, derived from `BaseModel`.
+        Returns:
+            None.
+        """
+        # Check if the Model matches the collection
+        if not isinstance(doc, self._class_model):
+            doc_class_name = doc.__class__.__name__
+            collection_name = self._class_model.__name__
+            msg = (
+                f"(add_doc) Parameter `doc` => Model `{doc_class_name}` does not match collection `{collection_name}`!"
+            )
+            logging.error(msg)
+            raise TypeError(msg)
+        # The path to cell of collection.
+        leaf_path, prepared_key = await self._get_leaf_path(doc.key)
+        doc_json: str = doc.model_dump_json()
+        # Write key-value to collection.
+        if await leaf_path.exists():
+            # Add new key.
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            try:
+                data[prepared_key]
+            except KeyError:
+                data[prepared_key] = doc_json
+                await leaf_path.write_bytes(orjson.dumps(data))
+            else:
+                err = KeyAlreadyExistsError()
+                logging.error(err.message)
+                raise err
+        else:
+            # Add new document to a blank leaf.
+            await leaf_path.write_bytes(orjson.dumps({prepared_key: doc_json}))
+        await self._counter_documents(1)
+    async def update_doc(self, doc: Any) -> None:
+        """Asynchronous method for updating key to collection.
+        Args:
+            doc: Value of key. Type `BaseModel`.
+        Returns:
+            None.
+        """
+        # Check if the Model matches the collection
+        if not isinstance(doc, self._class_model):
+            doc_class_name = doc.__class__.__name__
+            collection_name = self._class_model.__name__
+            msg = (
+                f"(update_doc) Parameter `doc` => Model `{doc_class_name}` "
+                f"does not match collection `{collection_name}`!"
+            )
+            logging.error(msg)
+            raise TypeError(msg)
+        # The path to cell of collection.
+        leaf_path, prepared_key = await self._get_leaf_path(doc.key)
+        doc_json: str = doc.model_dump_json()
+        # Update the existing key.
+        if await leaf_path.exists():
+            # Update the existing key.
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            try:
+                data[prepared_key]
+                data[prepared_key] = doc_json
+                await leaf_path.write_bytes(orjson.dumps(data))
+            except KeyError:
+                err = KeyNotExistsError()
+                logging.error(err.message)
+                raise err from None
+        else:
+            logging.error("The key not exists.")
+            raise KeyError()
+    async def get_key(self, key: str) -> Any:
+        """Asynchronous method for getting value of key from collection.
+        Args:
+            key: Key name.
+        Returns:
+            Value of key or KeyError.
+        """
+        # The path to the database cell.
+        leaf_path, prepared_key = await self._get_leaf_path(key)
+        # Get value of key.
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            obj: Any = self._class_model.model_validate_json(data[prepared_key])
+            return obj
+        msg: str = "`get_key` - The unacceptable key value."
+        logging.error(msg)
+        raise KeyError()
+    async def has_key(self, key: str) -> bool:
+        """Asynchronous method for checking presence of key in collection.
+        Args:
+            key: Key name.
+        Returns:
+            True, if the key is present.
+        """
+        # Get path to cell of collection.
+        leaf_path, prepared_key = await self._get_leaf_path(key)
+        # Checking whether there is a key.
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            try:
+                data[prepared_key]
+                return True
+            except KeyError:
+                return False
+        return False
+    async def delete_key(self, key: str) -> None:
+        """Asynchronous method for deleting key from collection.
+        Args:
+            key: Key name.
+        Returns:
+            None.
+        """
+        # The path to the database cell.
+        leaf_path, prepared_key = await self._get_leaf_path(key)
+        # Deleting key.
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            del data[prepared_key]
+            await leaf_path.write_bytes(orjson.dumps(data))
+            await self._counter_documents(-1)
+            return
+        msg: str = "`delete_key` - The unacceptable key value."
+        logging.error(msg)
+        raise KeyError()

scruby/mixins/find.py ADDED Viewed

@@ -0,0 +1,149 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Quantum methods for searching documents."""
+from __future__ import annotations
+__all__ = ("Find",)
+import concurrent.futures
+from collections.abc import Callable
+from typing import Any
+import orjson
+from anyio import Path
+class Find:
+    """Quantum methods for searching documents."""
+    @staticmethod
+    async def _task_find(
+        branch_number: int,
+        filter_fn: Callable,
+        hash_reduce_left: str,
+        db_root: str,
+        class_model: Any,
+        filter_is_checking: bool = True,
+    ) -> list[Any] | None:
+        """Task for find documents.
+        This method is for internal use.
+        Returns:
+            List of documents or None.
+        """
+        branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
+        separated_hash: str = "/".join(list(branch_number_as_hash))
+        leaf_path: Path = Path(
+            *(
+                db_root,
+                class_model.__name__,
+                separated_hash,
+                "leaf.json",
+            ),
+        )
+        docs: list[Any] = []
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict[str, str] = orjson.loads(data_json) or {}
+            for _, val in data.items():
+                doc = class_model.model_validate_json(val)
+                if not filter_is_checking or filter_fn(doc):
+                    docs.append(doc)
+        return docs or None
+    async def find_one(
+        self,
+        filter_fn: Callable,
+    ) -> Any | None:
+        """Finds a single document matching the filter.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn (Callable): A function that execute the conditions of filtering.
+        Returns:
+            Document or None.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        search_task_fn: Callable = self._task_find
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                future = executor.submit(
+                    search_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                )
+                docs = await future.result()
+                if docs is not None:
+                    return docs[0]
+        return None
+    async def find_many(
+        self,
+        filter_fn: Callable = lambda _: True,
+        limit_docs: int = 1000,
+        page_number: int = 1,
+    ) -> list[Any] | None:
+        """Finds one or more documents matching the filter.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn (Callable): A function that execute the conditions of filtering.
+                                  By default it searches for all documents.
+            limit_docs (int): Limiting the number of documents. By default = 1000.
+            page_number (int): For pagination output. By default = 1.
+                               Number of documents per page = limit_docs.
+        Returns:
+            List of documents or None.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        search_task_fn: Callable = self._task_find
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        counter: int = 0
+        number_docs_skippe: int = limit_docs * (page_number - 1) if page_number > 1 else 0
+        result: list[Any] = []
+        filter_is_checking: bool = False
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                if number_docs_skippe == 0 and counter >= limit_docs:
+                    return result[:limit_docs]
+                future = executor.submit(
+                    search_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                    filter_is_checking,
+                )
+                docs = await future.result()
+                if docs is not None:
+                    for doc in docs:
+                        if number_docs_skippe == 0:
+                            if counter >= limit_docs:
+                                return result[:limit_docs]
+                            if filter_fn(doc):
+                                result.append(doc)
+                                counter += 1
+                        else:
+                            number_docs_skippe -= 1
+        return result or None

scruby 0.17.0__py3-none-any.whl → 0.27.2__py3-none-any.whl

scruby 0.17.0py3-none-any.whl → 0.27.2py3-none-any.whl