PyPI - aixtools - Versions diffs - 0.0.0__py3-none-any.whl - Mend

aixtools 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aixtools might be problematic. Click here for more details.

Files changed (88) hide show

aixtools/.chainlit/config.toml +113 -0
aixtools/.chainlit/translations/bn.json +214 -0
aixtools/.chainlit/translations/en-US.json +214 -0
aixtools/.chainlit/translations/gu.json +214 -0
aixtools/.chainlit/translations/he-IL.json +214 -0
aixtools/.chainlit/translations/hi.json +214 -0
aixtools/.chainlit/translations/ja.json +214 -0
aixtools/.chainlit/translations/kn.json +214 -0
aixtools/.chainlit/translations/ml.json +214 -0
aixtools/.chainlit/translations/mr.json +214 -0
aixtools/.chainlit/translations/nl.json +214 -0
aixtools/.chainlit/translations/ta.json +214 -0
aixtools/.chainlit/translations/te.json +214 -0
aixtools/.chainlit/translations/zh-CN.json +214 -0
aixtools/__init__.py +11 -0
aixtools/_version.py +34 -0
aixtools/a2a/app.py +126 -0
aixtools/a2a/google_sdk/__init__.py +0 -0
aixtools/a2a/google_sdk/card.py +27 -0
aixtools/a2a/google_sdk/pydantic_ai_adapter/agent_executor.py +199 -0
aixtools/a2a/google_sdk/pydantic_ai_adapter/storage.py +26 -0
aixtools/a2a/google_sdk/remote_agent_connection.py +88 -0
aixtools/a2a/google_sdk/utils.py +59 -0
aixtools/a2a/utils.py +115 -0
aixtools/agents/__init__.py +12 -0
aixtools/agents/agent.py +164 -0
aixtools/agents/agent_batch.py +71 -0
aixtools/agents/prompt.py +97 -0
aixtools/app.py +143 -0
aixtools/chainlit.md +14 -0
aixtools/compliance/__init__.py +9 -0
aixtools/compliance/private_data.py +138 -0
aixtools/context.py +17 -0
aixtools/db/__init__.py +17 -0
aixtools/db/database.py +110 -0
aixtools/db/vector_db.py +115 -0
aixtools/google/client.py +25 -0
aixtools/log_view/__init__.py +17 -0
aixtools/log_view/app.py +195 -0
aixtools/log_view/display.py +285 -0
aixtools/log_view/export.py +51 -0
aixtools/log_view/filters.py +41 -0
aixtools/log_view/log_utils.py +26 -0
aixtools/log_view/node_summary.py +229 -0
aixtools/logfilters/__init__.py +7 -0
aixtools/logfilters/context_filter.py +67 -0
aixtools/logging/__init__.py +30 -0
aixtools/logging/log_objects.py +227 -0
aixtools/logging/logging_config.py +161 -0
aixtools/logging/mcp_log_models.py +102 -0
aixtools/logging/mcp_logger.py +172 -0
aixtools/logging/model_patch_logging.py +87 -0
aixtools/logging/open_telemetry.py +36 -0
aixtools/mcp/__init__.py +9 -0
aixtools/mcp/client.py +375 -0
aixtools/mcp/example_client.py +30 -0
aixtools/mcp/example_server.py +22 -0
aixtools/mcp/fast_mcp_log.py +31 -0
aixtools/mcp/faulty_mcp.py +319 -0
aixtools/model_patch/model_patch.py +63 -0
aixtools/server/__init__.py +29 -0
aixtools/server/app_mounter.py +90 -0
aixtools/server/path.py +72 -0
aixtools/server/utils.py +70 -0
aixtools/server/workspace_privacy.py +65 -0
aixtools/testing/__init__.py +9 -0
aixtools/testing/aix_test_model.py +149 -0
aixtools/testing/mock_tool.py +66 -0
aixtools/testing/model_patch_cache.py +279 -0
aixtools/tools/doctor/__init__.py +3 -0
aixtools/tools/doctor/tool_doctor.py +61 -0
aixtools/tools/doctor/tool_recommendation.py +44 -0
aixtools/utils/__init__.py +35 -0
aixtools/utils/chainlit/cl_agent_show.py +82 -0
aixtools/utils/chainlit/cl_utils.py +168 -0
aixtools/utils/config.py +131 -0
aixtools/utils/config_util.py +69 -0
aixtools/utils/enum_with_description.py +37 -0
aixtools/utils/files.py +17 -0
aixtools/utils/persisted_dict.py +99 -0
aixtools/utils/utils.py +167 -0
aixtools/vault/__init__.py +7 -0
aixtools/vault/vault.py +137 -0
aixtools-0.0.0.dist-info/METADATA +669 -0
aixtools-0.0.0.dist-info/RECORD +88 -0
aixtools-0.0.0.dist-info/WHEEL +5 -0
aixtools-0.0.0.dist-info/entry_points.txt +2 -0
aixtools-0.0.0.dist-info/top_level.txt +1 -0

aixtools/utils/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+Utils package initialization.
+"""
+from aixtools.logging.logging_config import get_logger  # pylint: disable=import-error
+from aixtools.utils import config
+from aixtools.utils.enum_with_description import EnumWithDescription
+from aixtools.utils.persisted_dict import PersistedDict
+from aixtools.utils.utils import (
+    escape_backticks,
+    escape_newline,
+    find_file,
+    prepend_all_lines,
+    remove_quotes,
+    tabit,
+    to_str,
+    tripple_quote_strip,
+    truncate,
+)
+__all__ = [
+    "config",
+    "PersistedDict",
+    "EnumWithDescription",
+    "escape_newline",
+    "escape_backticks",
+    "find_file",
+    "get_logger",
+    "prepend_all_lines",
+    "remove_quotes",
+    "tabit",
+    "to_str",
+    "truncate",
+    "tripple_quote_strip",
+]

aixtools/utils/chainlit/cl_agent_show.py ADDED Viewed

@@ -0,0 +1,82 @@
+import chainlit as cl
+import rich
+from pydantic_ai import Agent
+from pydantic_ai.messages import (
+    FinalResultEvent,
+    FunctionToolCallEvent,
+    FunctionToolResultEvent,
+    PartDeltaEvent,
+    PartStartEvent,
+    TextPartDelta,
+    ToolCallPartDelta,
+)
+from aixtools.logging.log_objects import ObjectLogger
+def _show_debug_info(debug, *args):
+    if debug:
+        rich.print(*args)
+async def show_run(agent: Agent, prompt, msg: cl.Message, debug=False, verbose=True):  # noqa: PLR0912
+    """Run an agent with a prompt and send the results to a message."""
+    nodes = []
+    async with agent.iter(prompt) as run:
+        with ObjectLogger(debug=debug, verbose=verbose) as agent_logger:
+            async for node in run:
+                nodes.append(node)
+                agent_logger.log(node)
+                if Agent.is_user_prompt_node(node):
+                    # A user prompt node => The user has provided input
+                    _show_debug_info(debug, "=== UserPromptNode: ", node)
+                elif Agent.is_model_request_node(node):
+                    # A model request node => We can stream tokens from the model's request
+                    _show_debug_info(debug, "=== ModelRequestNode: streaming partial request tokens ===")
+                    async with node.stream(run.ctx) as request_stream:
+                        async for event in request_stream:
+                            if isinstance(event, PartStartEvent):
+                                _show_debug_info(debug, f"[Request] Starting part {event.index}: ", event.part)
+                            elif isinstance(event, PartDeltaEvent):
+                                if isinstance(event.delta, TextPartDelta):
+                                    _show_debug_info(
+                                        debug,
+                                        (
+                                            "[ModelRequestNone / PartDeltaEvent / TextPartDelta] "
+                                            f"Part {event.index}: {event.delta.content_delta}"
+                                        ),
+                                    )
+                                    await msg.stream_token(event.delta.content_delta)
+                                elif isinstance(event.delta, ToolCallPartDelta):
+                                    _show_debug_info(
+                                        debug,
+                                        f"[ModelRequestNone / PartDeltaEvent / ToolCallPartDelta] Part {event.index}, ",
+                                        event.delta,
+                                    )
+                            elif isinstance(event, FinalResultEvent):
+                                _show_debug_info(
+                                    debug, f"[Result] The model produced a final result (tool_name={event.tool_name})"
+                                )
+                elif Agent.is_call_tools_node(node):
+                    # A handle-response node => The model returned some data, potentially calls a tool
+                    _show_debug_info(debug, "=== CallToolsNode: streaming partial response & tool usage ===")
+                    async with node.stream(run.ctx) as handle_stream:
+                        async for event in handle_stream:
+                            if isinstance(event, FunctionToolCallEvent):
+                                _show_debug_info(
+                                    debug,
+                                    (
+                                        f"[Tools] The LLM calls tool={event.part.tool_name!r} "
+                                        f"with args={event.part.args} (tool_call_id={event.part.tool_call_id!r})"
+                                    ),
+                                )
+                            elif isinstance(event, FunctionToolResultEvent):
+                                _show_debug_info(
+                                    debug,
+                                    f"[Tools] Tool call {event.tool_call_id!r} returned => {event.result.content}",
+                                )
+                elif Agent.is_end_node(node):
+                    assert run.result.output == node.data.output
+                    # Once an End node is reached, the agent run is complete
+                    _show_debug_info(debug, f"=== Final Agent Output: {run.result.output} ===")
+    return run.result.output

aixtools/utils/chainlit/cl_utils.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""
+Utilities for Chainlit
+"""
+import inspect
+from copy import deepcopy
+from functools import wraps
+from typing import Callable, List, Optional, Union
+import pandas as pd
+from chainlit import Step
+from chainlit.context import get_context
+from literalai.observability.step import TrueStepType
+from aixtools.logging.logging_config import get_logger
+from aixtools.utils.utils import truncate
+logger = get_logger(__name__)
+DEFAULT_SKIP_ARGS = ("self", "cls")
+MAX_SIZE_STR = 10 * 1024
+MAX_SIZE_DF_ROWS = 100
+def is_chainlit() -> bool:
+    """Are we running in chainlit?"""
+    try:
+        get_context()
+        return True
+    except Exception:
+        return False
+def flatten_args_kwargs(func, args, kwargs, skip_args=DEFAULT_SKIP_ARGS):
+    signature = inspect.signature(func)
+    bound_arguments = signature.bind(*args, **kwargs)
+    bound_arguments.apply_defaults()
+    return {k: deepcopy(v) for k, v in bound_arguments.arguments.items() if k not in skip_args}
+def _step_name(func, args, kwargs):
+    """
+    Create a step name: class.method
+    It detects the class name from the first method's argument.
+    """
+    if len(args) == 0:
+        return func.__name__
+    signature = inspect.signature(func)
+    bound_arguments = signature.bind(*args, **kwargs)
+    arguments = [(k, v) for k, v in bound_arguments.arguments.items()]
+    arg0_name, arg0_value = arguments[0]
+    if arg0_name == "self":
+        return f"{arg0_value.__class__.__name__}.{func.__name__}"
+    if arg0_name == "cls":
+        return f"{arg0_value.__name__}.{func.__name__}"
+    return func.__name__
+def limit_size(data):
+    """ """
+    if isinstance(data, str):
+        return truncate(data, max_len=MAX_SIZE_STR)
+    if isinstance(data, pd.DataFrame):
+        if len(data) > MAX_SIZE_DF_ROWS:
+            return data.head(MAX_SIZE_DF_ROWS)
+    return data
+def cl_step(  # noqa: PLR0913
+    original_function: Optional[Callable] = None,
+    *,
+    name: Optional[str] = "",
+    type: TrueStepType = "undefined",
+    id: Optional[str] = None,
+    parent_id: Optional[str] = None,
+    tags: Optional[List[str]] = None,
+    language: Optional[str] = None,
+    show_input: Union[bool, str] = "json",
+    default_open: bool = False,
+):
+    """
+    Step decorator for async and sync functions and methods (they ignore the self argument).
+    It deactivates if not within a Chainlit context.
+    """
+    def wrapper(func: Callable):
+        # Handle async decorator
+        if inspect.iscoroutinefunction(func):
+            @wraps(func)
+            async def async_wrapper(*args, **kwargs):
+                nonlocal name
+                if not name:
+                    name = _step_name(func, args, kwargs)
+                if is_chainlit():
+                    async with Step(
+                        type=type,
+                        name=name,
+                        id=id,
+                        parent_id=parent_id,
+                        tags=tags,
+                        language=language,
+                        show_input=show_input,
+                        default_open=default_open,
+                    ) as step:
+                        try:
+                            step.input = flatten_args_kwargs(func, args, kwargs)
+                        except Exception as e:
+                            logger.exception(e)
+                        result = await func(*args, **kwargs)
+                        try:
+                            if result and not step.output:
+                                step.output = limit_size(result)
+                        except Exception as e:
+                            step.is_error = True
+                            step.output = str(e)
+                        return result
+                else:
+                    # If not in Chainlit, just call the function
+                    result = await func(*args, **kwargs)
+                    print(f"Function '{func.__name__}' called with args: {args}, kwargs: {kwargs}, result: {result}")
+                    return result
+            return async_wrapper
+        else:
+            # Handle sync decorator
+            @wraps(func)
+            def sync_wrapper(*args, **kwargs):
+                nonlocal name
+                if not name:
+                    name = _step_name(func, args, kwargs)
+                if is_chainlit():
+                    with Step(
+                        type=type,
+                        name=name,
+                        id=id,
+                        parent_id=parent_id,
+                        tags=tags,
+                        language=language,
+                        show_input=show_input,
+                        default_open=default_open,
+                    ) as step:
+                        try:
+                            step.input = flatten_args_kwargs(func, args, kwargs)
+                        except Exception as e:
+                            logger.exception(e)
+                        result = func(*args, **kwargs)
+                        try:
+                            if result and not step.output:
+                                step.output = limit_size(result)
+                        except Exception as e:
+                            step.is_error = True
+                            step.output = str(e)
+                        return result
+                else:
+                    # If not in Chainlit, just call the function
+                    result = func(*args, **kwargs)
+                    print(f"Function '{func.__name__}' called with args: {args}, kwargs: {kwargs}, result: {result}")
+                    return result
+            return sync_wrapper
+    func = original_function
+    if not func:
+        return wrapper
+    else:
+        return wrapper(func)

aixtools/utils/config.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""
+Configuration settings and environment variables for the application.
+"""
+import logging
+import sys
+from pathlib import Path
+from dotenv import dotenv_values, load_dotenv
+from aixtools.utils.config_util import find_env_file, get_project_root, get_variable_env
+from aixtools.utils.utils import str2bool
+# Debug mode
+LOG_LEVEL = logging.DEBUG
+# Set up some environment variables (there are usually set up by 'config.sh')
+# This file's path
+FILE_PATH = Path(__file__).resolve()
+# This project's root directory (AixTools)
+# if installed as a package, it will be `.venv/lib/python3.x/site-packages/aixtools`
+PROJECT_DIR = FILE_PATH.parent.parent.parent.resolve()
+# Get the main project directory (the one project that is using this package)
+PROJECT_ROOT = get_project_root()
+# From the environment variables
+# Iterate over all parents of FILE_PATH to find .env files
+def all_parents(path: Path):
+    """Yield all parent directories of a given path."""
+    while path.parent != path:
+        yield path
+        path = path.parent
+# Set up environment search path
+# Start with the most specific (current directory) and expand outward
+env_dirs = [Path.cwd(), PROJECT_ROOT, FILE_PATH.parent]
+env_file = find_env_file(env_dirs)
+if env_file:
+    logging.info("Using .env file at '%s'", env_file)
+    # Load the environment variables from the found .env file
+    load_dotenv(env_file)
+    # Assign project dir based on the .env file
+    MAIN_PROJECT_DIR = Path(env_file).parent
+    logging.info("Using MAIN_PROJECT_DIR='%s'", MAIN_PROJECT_DIR)
+    # Assign variables in '.env' global python environment
+    env_vars = dotenv_values(env_file)
+    globals().update(env_vars)
+else:
+    logging.error("No '.env' file found in any of the search paths, or their parents: %s", env_dirs)
+    sys.exit(1)
+# ---
+# Directories
+# ---
+SCRIPTS_DIR = MAIN_PROJECT_DIR / "scripts"
+DATA_DIR = Path(get_variable_env("DATA_DIR") or MAIN_PROJECT_DIR / "data")
+DATA_DB_DIR = Path(get_variable_env("DATA_DB_DIR", default=DATA_DIR / "db"))
+LOGS_DIR = MAIN_PROJECT_DIR / "logs"
+logging.warning("Using         DATA_DIR='%s'", DATA_DIR)
+# Vector database
+VDB_CHROMA_PATH = DATA_DB_DIR / "chroma.db"
+VDB_DEFAULT_SIMILARITY_THRESHOLD = 0.85
+# ---
+# Variables in '.env' file
+# Explicitly load specific variables
+# ---
+MODEL_TIMEOUT = int(get_variable_env("MODEL_TIMEOUT", default="120"))  # type: ignore
+MODEL_FAMILY = get_variable_env("MODEL_FAMILY")
+# Azure models
+AZURE_MODEL_NAME = get_variable_env("AZURE_MODEL_NAME")
+AZURE_OPENAI_ENDPOINT = get_variable_env("AZURE_OPENAI_ENDPOINT")
+AZURE_OPENAI_API_KEY = get_variable_env("AZURE_OPENAI_API_KEY")
+AZURE_OPENAI_API_VERSION = get_variable_env("AZURE_OPENAI_API_VERSION")
+# OpenAI models
+OPENAI_API_KEY = get_variable_env("OPENAI_API_KEY")
+OPENAI_MODEL_NAME = get_variable_env("OPENAI_MODEL_NAME")
+# Ollama models
+OLLAMA_URL = get_variable_env("OLLAMA_URL")
+OLLAMA_MODEL_NAME = get_variable_env("OLLAMA_MODEL_NAME")
+# OpenRouter models
+OPENROUTER_API_KEY = get_variable_env("OPENROUTER_API_KEY")
+OPENROUTER_API_URL = get_variable_env("OPENROUTER_API_URL", default="https://openrouter.ai/api/v1")
+OPENROUTER_MODEL_NAME = get_variable_env("OPENROUTER_MODEL_NAME")
+# Embeddings
+VDB_EMBEDDINGS_MODEL_FAMILY = get_variable_env("VDB_EMBEDDINGS_MODEL_FAMILY")
+OPENAI_VDB_EMBEDDINGS_MODEL_NAME = get_variable_env("OPENAI_VDB_EMBEDDINGS_MODEL_NAME")
+AZURE_VDB_EMBEDDINGS_MODEL_NAME = get_variable_env("AZURE_VDB_EMBEDDINGS_MODEL_NAME")
+OLLAMA_VDB_EMBEDDINGS_MODEL_NAME = get_variable_env("OLLAMA_VDB_EMBEDDINGS_MODEL_NAME")
+# Bedrock models
+AWS_ACCESS_KEY_ID = get_variable_env("AWS_ACCESS_KEY_ID", allow_empty=True)
+AWS_SECRET_ACCESS_KEY = get_variable_env("AWS_SECRET_ACCESS_KEY", allow_empty=True)
+AWS_SESSION_TOKEN = get_variable_env("AWS_SESSION_TOKEN", allow_empty=True)
+AWS_REGION = get_variable_env("AWS_REGION", allow_empty=True, default="us-east-1")
+AWS_PROFILE = get_variable_env("AWS_PROFILE", allow_empty=True)
+BEDROCK_MODEL_NAME = get_variable_env("BEDROCK_MODEL_NAME", allow_empty=True)
+# LogFire
+LOGFIRE_TOKEN = get_variable_env("LOGFIRE_TOKEN", True, "")
+LOGFIRE_TRACES_ENDPOINT = get_variable_env("LOGFIRE_TRACES_ENDPOINT", True, "")
+# Google Vertex AI
+GOOGLE_GENAI_USE_VERTEXAI = str2bool(get_variable_env("GOOGLE_GENAI_USE_VERTEXAI", True, True))
+GOOGLE_CLOUD_PROJECT = get_variable_env("GOOGLE_CLOUD_PROJECT", True)
+GOOGLE_CLOUD_LOCATION = get_variable_env("GOOGLE_CLOUD_LOCATION", True)
+# vault parameters.
+VAULT_ADDRESS = get_variable_env("VAULT_ADDRESS", default="http://localhost:8200")
+VAULT_TOKEN = get_variable_env("VAULT_TOKEN", default="vault-token")
+VAULT_ENV = get_variable_env("ENV", default="dev")
+VAULT_MOUNT_POINT = get_variable_env("VAULT_MOUNT_POINT", default="secret")
+VAULT_PATH_PREFIX = get_variable_env("VAULT_PATH_PREFIX", default="path")

aixtools/utils/config_util.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""
+Utility functions for configuration management and environment variables.
+"""
+import logging
+import os
+import sys
+from pathlib import Path
+from dotenv import find_dotenv
+def get_project_root() -> Path:
+    """
+    Return the directory where the main script lives.
+    Falls back to the current working directory if run interactively.
+    """
+    main_mod = sys.modules.get("__main__")
+    main_file = getattr(main_mod, "__file__", None)
+    if main_file:
+        return Path(main_file).resolve().parent
+    # no __file__ (e.g. interactive shell); assume cwd is the project root
+    return Path.cwd()
+def all_parents(path: Path):
+    """Yield all parent directories of a given path."""
+    while path.parent != path:
+        yield path
+        path = path.parent
+def find_env_file(env_search_dirs: list[Path]):
+    """Find the first .env file in the given list of paths and their parents."""
+    env_file = find_dotenv()
+    logging.warning("Looking for '.env' file in default directory")
+    if env_file:
+        return env_file
+    # Find all parents of the paths
+    for search_dir in env_search_dirs:
+        # '.env' file in this directory?
+        logging.warning("Looking for '.env' file at '%s'", search_dir)
+        env_file = find_dotenv(str(search_dir / ".env"))
+        if env_file:
+            return env_file
+        # Try all parents of this dir
+        for parent_dir in all_parents(search_dir):
+            logging.warning("Looking for '.env' file at '%s'", parent_dir)
+            env_file = find_dotenv(str(parent_dir / ".env"))
+            if env_file:
+                return env_file
+    return None
+def get_variable_env(name: str, allow_empty=True, default=None) -> str | None:
+    """Retrieve environment variable with optional validation and default value."""
+    val = os.environ.get(name, default)
+    if not allow_empty and ((val is None) or (val == "")):
+        raise ValueError(f"Environment variable {name} is not set")
+    return val
+def set_variable_env(name: str, val: str) -> str:
+    """Set environment variable and validate it's not None."""
+    os.environ[name] = val
+    if val is None:
+        raise ValueError(f"Environment variable {name} is set to None")
+    return val

aixtools/utils/enum_with_description.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""
+Enhanced Enum implementation that supports descriptions for enum values.
+"""
+from enum import Enum
+class EnumWithDescription(str, Enum):
+    """
+    An enum with string values and descriptions.
+    Each enum value has a string representation and a description.
+    Example:
+        class MyEnum(EnumWithDescription):
+            VALUE1 = "value1", "This is a description for VALUE1"
+            VALUE2 = "value2", "This is a description for VALUE2"
+            VALUE3 = "value3", "This is a description for VALUE3"
+        print(MyEnum.describe())
+        # Output:
+        # VALUE1: This is a description for VALUE1
+        # VALUE2: This is a description for VALUE2
+        # VALUE3: This is a description for VALUE3
+    """
+    @classmethod
+    def describe(cls) -> str:
+        """
+        Get the description of a decision's enum values
+        """
+        return "\n".join([f"{field.name}: {field.__doc__}" for field in cls])
+    def __new__(cls, value, doc):
+        obj = str.__new__(cls, value)
+        obj._value_ = value
+        obj.__doc__ = doc
+        return obj

aixtools/utils/files.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""File utilities"""
+def is_text_content(data: bytes, mime_type: str) -> bool:
+    """Check if content is text based on mime type and content analysis."""
+    # Check mime type first
+    if mime_type and (
+        mime_type.startswith("text/") or mime_type in ["application/json", "application/xml", "application/javascript"]
+    ):
+        return True
+    # Try to decode as UTF-8 to check if it's text
+    try:
+        data.decode("utf-8")
+        return True
+    except UnicodeDecodeError:
+        return False

aixtools/utils/persisted_dict.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""
+Dictionary implementation that automatically persists its contents to disk.
+"""
+import json
+import pickle
+from pathlib import Path
+from aixtools.logging.logging_config import get_logger
+logger = get_logger(__name__)
+DATA_KEY = "__dictionary_data__"
+class PersistedDict(dict):
+    """
+    A dictionary that persists to a file on disk as JSON.
+    Keys are always converted to strings.
+    """
+    def __init__(self, file_path: Path):
+        self.file_path = file_path if isinstance(file_path, Path) else Path(file_path)
+        self.use_pickle = None
+        if file_path.suffix == ".json":
+            self.use_pickle = False
+        elif file_path.suffix == ".pkl":
+            self.use_pickle = True
+        else:
+            raise ValueError(f"Unsupported file extension '{file_path.suffix}' for file '{file_path}'")
+        self.load()
+    def __contains__(self, key):
+        return super().__contains__(str(key))
+    def __delitem__(self, key):
+        super().__delitem__(str(key))
+        self.save()
+    def get(self, key, default=None):
+        return super().get(str(key), default)
+    def __getitem__(self, key):
+        return super().__getitem__(str(key))
+    def load(self):
+        """Load dictionary data from disk using either pickle or JSON format."""
+        if self.use_pickle:
+            self._load_pickle()
+        else:
+            self._load_json()
+    def _load_json(self):
+        try:
+            with open(self.file_path, "r", encoding="utf-8") as f:
+                self.update(json.load(f))
+            logger.debug("Persistent dictionary: Loaded %d items from JSON file '%s'", len(self), self.file_path)
+        except FileNotFoundError:
+            pass
+    def _load_pickle(self):
+        try:
+            with open(self.file_path, "rb") as f:
+                object_data = pickle.load(f)
+            for k, v in object_data[DATA_KEY].items():
+                super().__setitem__(str(k), v)
+            for k, v in object_data.items():
+                if k != DATA_KEY:
+                    self.__dict__[k] = v
+            logger.debug("Persistent dictionary: Loaded %d items from pickle file '%s'", len(self), self.file_path)
+        except FileNotFoundError:
+            pass
+    def save(self):
+        """Save dictionary data to disk using either pickle or JSON format."""
+        if self.use_pickle:
+            self._save_pickle()
+        else:
+            self._save_json()
+    def _save_json(self):
+        self.file_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.file_path, "w", encoding="utf-8") as f:
+            json.dump(self, f, indent=2)
+    def _save_pickle(self):
+        self.file_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.file_path, "wb") as f:
+            object_data = dict(self.__dict__)
+            object_data[DATA_KEY] = dict(self)
+            pickle.dump(object_data, f)
+    def __setitem__(self, key, value):
+        super().__setitem__(str(key), value)
+        self.save()
+    def update(self, *args, **kwargs):
+        super().update(*args, **kwargs)
+        self.save()