PyPI - scruby - Versions diffs - 0.9.3__py3-none-any.whl → 0.27.2__py3-none-any.whl - Mend

scruby 0.9.3py3-none-any.whl → 0.27.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

scruby/__init__.py +37 -29
scruby/aggregation.py +152 -0
scruby/db.py +213 -301
scruby/errors.py +45 -0
scruby/mixins/__init__.py +25 -0
scruby/mixins/collection.py +50 -0
scruby/mixins/count.py +62 -0
scruby/mixins/custom_task.py +75 -0
scruby/mixins/delete.py +96 -0
scruby/mixins/docs.py +168 -0
scruby/mixins/find.py +149 -0
scruby/mixins/update.py +99 -0
scruby/settings.py +44 -0
{scruby-0.9.3.dist-info → scruby-0.27.2.dist-info}/METADATA +138 -112
scruby-0.27.2.dist-info/RECORD +18 -0
{scruby-0.9.3.dist-info → scruby-0.27.2.dist-info}/WHEEL +1 -1
{scruby-0.9.3.dist-info → scruby-0.27.2.dist-info}/licenses/LICENSE +21 -21
scruby/constants.py +0 -31
scruby-0.9.3.dist-info/RECORD +0 -8

scruby/mixins/count.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Methods for counting the number of documents."""
+from __future__ import annotations
+__all__ = ("Count",)
+import concurrent.futures
+from collections.abc import Callable
+from typing import Any
+class Count:
+    """Methods for counting the number of documents."""
+    async def estimated_document_count(self) -> int:
+        """Get an estimate of the number of documents in this collection using collection metadata.
+        Returns:
+            The number of documents.
+        """
+        meta = await self.get_meta()
+        return meta.counter_documents
+    async def count_documents(
+        self,
+        filter_fn: Callable,
+    ) -> int:
+        """Count the number of documents a matching the filter in this collection.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn: A function that execute the conditions of filtering.
+        Returns:
+            The number of documents.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        search_task_fn: Callable = self._task_find
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        counter: int = 0
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                future = executor.submit(
+                    search_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                )
+                if await future.result() is not None:
+                    counter += 1
+        return counter

scruby/mixins/custom_task.py ADDED Viewed

@@ -0,0 +1,75 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Quantum methods for running custom tasks."""
+from __future__ import annotations
+__all__ = ("CustomTask",)
+from collections.abc import Callable
+from typing import Any
+import orjson
+from anyio import Path
+class CustomTask:
+    """Quantum methods for running custom tasks."""
+    @staticmethod
+    async def _task_get_docs(
+        branch_number: int,
+        hash_reduce_left: int,
+        db_root: str,
+        class_model: Any,
+    ) -> list[Any]:
+        """Get documents for custom task.
+        This method is for internal use.
+        Returns:
+            List of documents.
+        """
+        branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
+        separated_hash: str = "/".join(list(branch_number_as_hash))
+        leaf_path: Path = Path(
+            *(
+                db_root,
+                class_model.__name__,
+                separated_hash,
+                "leaf.json",
+            ),
+        )
+        docs: list[Any] = []
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict[str, str] = orjson.loads(data_json) or {}
+            for _, val in data.items():
+                docs.append(class_model.model_validate_json(val))
+        return docs
+    async def run_custom_task(self, custom_task_fn: Callable, limit_docs: int = 1000) -> Any:
+        """Running custom task.
+        This method running a task created on the basis of a quantum loop.
+        Effectiveness running task depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            custom_task_fn: A function that execute the custom task.
+            limit_docs: Limiting the number of documents. By default = 1000.
+        Returns:
+            The result of a custom task.
+        """
+        kwargs = {
+            "get_docs_fn": self._task_get_docs,
+            "branch_numbers": range(1, self._max_branch_number),
+            "hash_reduce_left": self._hash_reduce_left,
+            "db_root": self._db_root,
+            "class_model": self._class_model,
+            "limit_docs": limit_docs,
+        }
+        return await custom_task_fn(**kwargs)

scruby/mixins/delete.py ADDED Viewed

@@ -0,0 +1,96 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Methods for deleting documents."""
+from __future__ import annotations
+__all__ = ("Delete",)
+import concurrent.futures
+from collections.abc import Callable
+from typing import Any
+import orjson
+from anyio import Path
+class Delete:
+    """Methods for deleting documents."""
+    @staticmethod
+    async def _task_delete(
+        branch_number: int,
+        filter_fn: Callable,
+        hash_reduce_left: int,
+        db_root: str,
+        class_model: Any,
+    ) -> int:
+        """Task for find and delete documents.
+        This method is for internal use.
+        Returns:
+            The number of deleted documents.
+        """
+        branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
+        separated_hash: str = "/".join(list(branch_number_as_hash))
+        leaf_path: Path = Path(
+            *(
+                db_root,
+                class_model.__name__,
+                separated_hash,
+                "leaf.json",
+            ),
+        )
+        counter: int = 0
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict[str, str] = orjson.loads(data_json) or {}
+            new_state: dict[str, str] = {}
+            for key, val in data.items():
+                doc = class_model.model_validate_json(val)
+                if filter_fn(doc):
+                    counter -= 1
+                else:
+                    new_state[key] = val
+            await leaf_path.write_bytes(orjson.dumps(new_state))
+        return counter
+    async def delete_many(
+        self,
+        filter_fn: Callable,
+    ) -> int:
+        """Delete one or more documents matching the filter.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn: A function that execute the conditions of filtering.
+        Returns:
+            The number of deleted documents.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        search_task_fn: Callable = self._task_delete
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        counter: int = 0
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                future = executor.submit(
+                    search_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                )
+                counter += await future.result()
+        if counter < 0:
+            await self._counter_documents(counter)
+        return abs(counter)

scruby/mixins/docs.py ADDED Viewed

@@ -0,0 +1,168 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Methods for working with keys."""
+from __future__ import annotations
+__all__ = ("Keys",)
+import logging
+from typing import Any
+import orjson
+from scruby.errors import (
+    KeyAlreadyExistsError,
+    KeyNotExistsError,
+)
+class Keys:
+    """Methods for working with keys."""
+    async def add_doc(self, doc: Any) -> None:
+        """Asynchronous method for adding document to collection.
+        Args:
+            doc: Value of key. Type, derived from `BaseModel`.
+        Returns:
+            None.
+        """
+        # Check if the Model matches the collection
+        if not isinstance(doc, self._class_model):
+            doc_class_name = doc.__class__.__name__
+            collection_name = self._class_model.__name__
+            msg = (
+                f"(add_doc) Parameter `doc` => Model `{doc_class_name}` does not match collection `{collection_name}`!"
+            )
+            logging.error(msg)
+            raise TypeError(msg)
+        # The path to cell of collection.
+        leaf_path, prepared_key = await self._get_leaf_path(doc.key)
+        doc_json: str = doc.model_dump_json()
+        # Write key-value to collection.
+        if await leaf_path.exists():
+            # Add new key.
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            try:
+                data[prepared_key]
+            except KeyError:
+                data[prepared_key] = doc_json
+                await leaf_path.write_bytes(orjson.dumps(data))
+            else:
+                err = KeyAlreadyExistsError()
+                logging.error(err.message)
+                raise err
+        else:
+            # Add new document to a blank leaf.
+            await leaf_path.write_bytes(orjson.dumps({prepared_key: doc_json}))
+        await self._counter_documents(1)
+    async def update_doc(self, doc: Any) -> None:
+        """Asynchronous method for updating key to collection.
+        Args:
+            doc: Value of key. Type `BaseModel`.
+        Returns:
+            None.
+        """
+        # Check if the Model matches the collection
+        if not isinstance(doc, self._class_model):
+            doc_class_name = doc.__class__.__name__
+            collection_name = self._class_model.__name__
+            msg = (
+                f"(update_doc) Parameter `doc` => Model `{doc_class_name}` "
+                f"does not match collection `{collection_name}`!"
+            )
+            logging.error(msg)
+            raise TypeError(msg)
+        # The path to cell of collection.
+        leaf_path, prepared_key = await self._get_leaf_path(doc.key)
+        doc_json: str = doc.model_dump_json()
+        # Update the existing key.
+        if await leaf_path.exists():
+            # Update the existing key.
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            try:
+                data[prepared_key]
+                data[prepared_key] = doc_json
+                await leaf_path.write_bytes(orjson.dumps(data))
+            except KeyError:
+                err = KeyNotExistsError()
+                logging.error(err.message)
+                raise err from None
+        else:
+            logging.error("The key not exists.")
+            raise KeyError()
+    async def get_key(self, key: str) -> Any:
+        """Asynchronous method for getting value of key from collection.
+        Args:
+            key: Key name.
+        Returns:
+            Value of key or KeyError.
+        """
+        # The path to the database cell.
+        leaf_path, prepared_key = await self._get_leaf_path(key)
+        # Get value of key.
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            obj: Any = self._class_model.model_validate_json(data[prepared_key])
+            return obj
+        msg: str = "`get_key` - The unacceptable key value."
+        logging.error(msg)
+        raise KeyError()
+    async def has_key(self, key: str) -> bool:
+        """Asynchronous method for checking presence of key in collection.
+        Args:
+            key: Key name.
+        Returns:
+            True, if the key is present.
+        """
+        # Get path to cell of collection.
+        leaf_path, prepared_key = await self._get_leaf_path(key)
+        # Checking whether there is a key.
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            try:
+                data[prepared_key]
+                return True
+            except KeyError:
+                return False
+        return False
+    async def delete_key(self, key: str) -> None:
+        """Asynchronous method for deleting key from collection.
+        Args:
+            key: Key name.
+        Returns:
+            None.
+        """
+        # The path to the database cell.
+        leaf_path, prepared_key = await self._get_leaf_path(key)
+        # Deleting key.
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict = orjson.loads(data_json) or {}
+            del data[prepared_key]
+            await leaf_path.write_bytes(orjson.dumps(data))
+            await self._counter_documents(-1)
+            return
+        msg: str = "`delete_key` - The unacceptable key value."
+        logging.error(msg)
+        raise KeyError()

scruby/mixins/find.py ADDED Viewed

@@ -0,0 +1,149 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Quantum methods for searching documents."""
+from __future__ import annotations
+__all__ = ("Find",)
+import concurrent.futures
+from collections.abc import Callable
+from typing import Any
+import orjson
+from anyio import Path
+class Find:
+    """Quantum methods for searching documents."""
+    @staticmethod
+    async def _task_find(
+        branch_number: int,
+        filter_fn: Callable,
+        hash_reduce_left: str,
+        db_root: str,
+        class_model: Any,
+        filter_is_checking: bool = True,
+    ) -> list[Any] | None:
+        """Task for find documents.
+        This method is for internal use.
+        Returns:
+            List of documents or None.
+        """
+        branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
+        separated_hash: str = "/".join(list(branch_number_as_hash))
+        leaf_path: Path = Path(
+            *(
+                db_root,
+                class_model.__name__,
+                separated_hash,
+                "leaf.json",
+            ),
+        )
+        docs: list[Any] = []
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict[str, str] = orjson.loads(data_json) or {}
+            for _, val in data.items():
+                doc = class_model.model_validate_json(val)
+                if not filter_is_checking or filter_fn(doc):
+                    docs.append(doc)
+        return docs or None
+    async def find_one(
+        self,
+        filter_fn: Callable,
+    ) -> Any | None:
+        """Finds a single document matching the filter.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn (Callable): A function that execute the conditions of filtering.
+        Returns:
+            Document or None.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        search_task_fn: Callable = self._task_find
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                future = executor.submit(
+                    search_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                )
+                docs = await future.result()
+                if docs is not None:
+                    return docs[0]
+        return None
+    async def find_many(
+        self,
+        filter_fn: Callable = lambda _: True,
+        limit_docs: int = 1000,
+        page_number: int = 1,
+    ) -> list[Any] | None:
+        """Finds one or more documents matching the filter.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn (Callable): A function that execute the conditions of filtering.
+                                  By default it searches for all documents.
+            limit_docs (int): Limiting the number of documents. By default = 1000.
+            page_number (int): For pagination output. By default = 1.
+                               Number of documents per page = limit_docs.
+        Returns:
+            List of documents or None.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        search_task_fn: Callable = self._task_find
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        counter: int = 0
+        number_docs_skippe: int = limit_docs * (page_number - 1) if page_number > 1 else 0
+        result: list[Any] = []
+        filter_is_checking: bool = False
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                if number_docs_skippe == 0 and counter >= limit_docs:
+                    return result[:limit_docs]
+                future = executor.submit(
+                    search_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                    filter_is_checking,
+                )
+                docs = await future.result()
+                if docs is not None:
+                    for doc in docs:
+                        if number_docs_skippe == 0:
+                            if counter >= limit_docs:
+                                return result[:limit_docs]
+                            if filter_fn(doc):
+                                result.append(doc)
+                                counter += 1
+                        else:
+                            number_docs_skippe -= 1
+        return result or None

scruby/mixins/update.py ADDED Viewed

@@ -0,0 +1,99 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Methods for updating documents."""
+from __future__ import annotations
+__all__ = ("Update",)
+import concurrent.futures
+from collections.abc import Callable
+from typing import Any
+import orjson
+from anyio import Path
+class Update:
+    """Methods for updating documents."""
+    @staticmethod
+    async def _task_update(
+        branch_number: int,
+        filter_fn: Callable,
+        hash_reduce_left: str,
+        db_root: str,
+        class_model: Any,
+        new_data: dict[str, Any],
+    ) -> int:
+        """Task for find documents.
+        This method is for internal use.
+        Returns:
+            The number of updated documents.
+        """
+        branch_number_as_hash: str = f"{branch_number:08x}"[hash_reduce_left:]
+        separated_hash: str = "/".join(list(branch_number_as_hash))
+        leaf_path: Path = Path(
+            *(
+                db_root,
+                class_model.__name__,
+                separated_hash,
+                "leaf.json",
+            ),
+        )
+        counter: int = 0
+        if await leaf_path.exists():
+            data_json: bytes = await leaf_path.read_bytes()
+            data: dict[str, str] = orjson.loads(data_json) or {}
+            new_state: dict[str, str] = {}
+            for _, val in data.items():
+                doc = class_model.model_validate_json(val)
+                if filter_fn(doc):
+                    for key, value in new_data.items():
+                        doc.__dict__[key] = value
+                        new_state[key] = doc.model_dump_json()
+                    counter += 1
+            await leaf_path.write_bytes(orjson.dumps(new_state))
+        return counter
+    async def update_many(
+        self,
+        filter_fn: Callable,
+        new_data: dict[str, Any],
+    ) -> int:
+        """Updates one or more documents matching the filter.
+        The search is based on the effect of a quantum loop.
+        The search effectiveness depends on the number of processor threads.
+        Ideally, hundreds and even thousands of threads are required.
+        Args:
+            filter_fn: A function that execute the conditions of filtering.
+            new_data: New data for the fields that need to be updated.
+        Returns:
+            The number of updated documents.
+        """
+        branch_numbers: range = range(1, self._max_branch_number)
+        update_task_fn: Callable = self._task_update
+        hash_reduce_left: int = self._hash_reduce_left
+        db_root: str = self._db_root
+        class_model: Any = self._class_model
+        counter: int = 0
+        with concurrent.futures.ThreadPoolExecutor(self._max_workers) as executor:
+            for branch_number in branch_numbers:
+                future = executor.submit(
+                    update_task_fn,
+                    branch_number,
+                    filter_fn,
+                    hash_reduce_left,
+                    db_root,
+                    class_model,
+                    new_data,
+                )
+                counter += await future.result()
+        return counter

scruby/settings.py ADDED Viewed

@@ -0,0 +1,44 @@
+# Scruby - Asynchronous library for building and managing a hybrid database, by scheme of key-value.
+# Copyright (c) 2025 Gennady Kostyunin
+# SPDX-License-Identifier: MIT
+#
+"""Database settings.
+The module contains the following parameters:
+- `DB_ROOT` - Path to root directory of database. `By default = "ScrubyDB" (in root of project)`.
+- `HASH_REDUCE_LEFT` - The length of the hash reduction on the left side.
+    - `0` - 4294967296 branches in collection.
+    - `2` - 16777216 branches in collection.
+    - `4` - 65536 branches in collection.
+    - `6` - 256 branches in collection (by default).
+- `MAX_WORKERS` - The maximum number of processes that can be used `By default = None`.
+"""
+from __future__ import annotations
+__all__ = (
+    "DB_ROOT",
+    "HASH_REDUCE_LEFT",
+    "MAX_WORKERS",
+)
+from typing import Literal
+# Path to root directory of database
+# By default = "ScrubyDB" (in root of project).
+DB_ROOT: str = "ScrubyDB"
+# The length of the hash reduction on the left side.
+# 0 = 4294967296 branches in collection.
+# 2 = 16777216 branches in collection.
+# 4 = 65536 branches in collection.
+# 6 = 256 branches in collection (by default).
+# Number of branches is number of requests to the hard disk during quantum operations.
+# Quantum operations: find_one, find_many, count_documents, delete_many, run_custom_task.
+HASH_REDUCE_LEFT: Literal[0, 2, 4, 6] = 6
+# The maximum number of processes that can be used to execute the given calls.
+# If None, then as many worker processes will be
+# created as the machine has processors.
+MAX_WORKERS: int | None = None

scruby 0.9.3__py3-none-any.whl → 0.27.2__py3-none-any.whl

scruby 0.9.3py3-none-any.whl → 0.27.2py3-none-any.whl