PyPI - agenta - Versions diffs - 0.65.0__py3-none-any.whl → 0.70.1__py3-none-any.whl - Mend

agenta 0.65.0py3-none-any.whl → 0.70.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

agenta/__init__.py +66 -36
agenta/client/backend/types/testset_output_response.py +1 -0
agenta/sdk/agenta_init.py +90 -12
agenta/sdk/assets.py +16 -10
agenta/sdk/engines/tracing/tracing.py +2 -2
agenta/sdk/evaluations/metrics.py +3 -3
agenta/sdk/litellm/litellm.py +38 -30
agenta/sdk/middleware/auth.py +19 -4
agenta/sdk/middleware/otel.py +0 -3
agenta/sdk/middleware/vault.py +20 -5
agenta/sdk/middlewares/running/vault.py +1 -1
agenta/sdk/models/evaluations.py +11 -3
agenta/sdk/models/shared.py +1 -1
agenta/sdk/tracing/exporters.py +1 -0
agenta/sdk/tracing/processors.py +40 -37
agenta/sdk/tracing/tracing.py +91 -2
agenta/sdk/workflows/runners/__init__.py +3 -0
agenta/sdk/workflows/runners/base.py +30 -0
agenta/sdk/workflows/runners/daytona.py +268 -0
agenta/sdk/workflows/runners/local.py +108 -0
agenta/sdk/workflows/runners/registry.py +48 -0
agenta/sdk/workflows/sandbox.py +18 -81
{agenta-0.65.0.dist-info → agenta-0.70.1.dist-info}/METADATA +4 -2
{agenta-0.65.0.dist-info → agenta-0.70.1.dist-info}/RECORD +25 -20
{agenta-0.65.0.dist-info → agenta-0.70.1.dist-info}/WHEEL +0 -0

agenta/sdk/tracing/processors.py CHANGED Viewed

@@ -1,7 +1,5 @@
 from typing import Optional, Dict, List
 from threading import Lock
-from json import dumps
-from uuid import UUID
 from opentelemetry.baggage import get_all as get_baggage
 from opentelemetry.context import Context
@@ -14,8 +12,7 @@ from opentelemetry.sdk.trace.export import (
 from opentelemetry.trace import SpanContext
 from agenta.sdk.utils.logging import get_module_logger
-from agenta.sdk.tracing.conventions import Reference
+from agenta.sdk.models.tracing import BaseModel
 from agenta.sdk.contexts.tracing import TracingContext
 log = get_module_logger(__name__)
@@ -65,15 +62,36 @@ class TraceProcessor(SpanProcessor):
         # )
         for key in self.references.keys():
-            span.set_attribute(f"ag.refs.{key}", self.references[key])
+            ref = self.references[key]
+            if isinstance(ref, BaseModel):
+                try:
+                    ref = ref.model_dump(mode="json", exclude_none=True)
+                except Exception:  # pylint: disable=bare-except
+                    pass
+            if isinstance(ref, dict):
+                for field, value in ref.items():
+                    span.set_attribute(f"ag.refs.{key}.{field}", str(value))
         baggage = get_baggage(parent_context)
         for key in baggage.keys():
-            if key.startswith("ag.refs."):
-                _key = key.replace("ag.refs.", "")
-                if _key in [_.value for _ in Reference.__members__.values()]:
-                    span.set_attribute(key, baggage[key])
+            if key.startswith("ag."):
+                value = baggage[key]
+                if key.startswith("ag.refs."):
+                    ref = value
+                    if isinstance(value, BaseModel):
+                        try:
+                            ref = value.model_dump(mode="json", exclude_none=True)  # type: ignore
+                        except Exception:  # pylint: disable=bare-except
+                            pass
+                    if isinstance(ref, dict):
+                        for field, val in ref.items():
+                            span.set_attribute(f"{key}.{field}", str(val))
+                else:
+                    # Not a reference - only set if it's a valid attribute type
+                    if isinstance(value, (str, bool, int, float, bytes)):
+                        span.set_attribute(key, value)
         context = TracingContext.get()
@@ -105,10 +123,11 @@ class TraceProcessor(SpanProcessor):
         if not self.inline:
             if context.links:
                 for key, link in context.links.items():
-                    try:
-                        link = link.model_dump(mode="json", exclude_none=True)
-                    except:  # pylint: disable=bare-except
-                        pass
+                    if isinstance(link, BaseModel):
+                        try:
+                            link = link.model_dump(mode="json", exclude_none=True)
+                        except Exception:
+                            pass
                     if not isinstance(link, dict):
                         continue
                     if not link.get("trace_id") or not link.get("span_id"):
@@ -127,30 +146,14 @@ class TraceProcessor(SpanProcessor):
         if context.references:
             for key, ref in context.references.items():
-                try:
-                    ref = ref.model_dump(mode="json", exclude_none=True)
-                except:  # pylint: disable=bare-except
-                    pass
-                if not isinstance(ref, dict):
-                    continue
-                if not ref.get("id") and not ref.get("slug") and not ref.get("version"):
-                    continue
-                if ref.get("id"):
-                    span.set_attribute(
-                        f"ag.refs.{key}.id",
-                        str(ref.get("id")),
-                    )
-                if ref.get("slug"):
-                    span.set_attribute(
-                        f"ag.refs.{key}.slug",
-                        str(ref.get("slug")),
-                    )
-                if ref.get("version"):
-                    span.set_attribute(
-                        f"ag.refs.{key}.version",
-                        str(ref.get("version")),
-                    )
+                if isinstance(ref, BaseModel):
+                    try:
+                        ref = ref.model_dump(mode="json", exclude_none=True)
+                    except Exception:
+                        pass
+                if isinstance(ref, dict):
+                    for field, value in ref.items():
+                        span.set_attribute(f"ag.refs.{key}.{field}", str(value))
         trace_id = span.context.trace_id
         span_id = span.context.span_id

agenta/sdk/tracing/tracing.py CHANGED Viewed

@@ -31,6 +31,7 @@ from agenta.sdk.tracing.conventions import Reference, is_valid_attribute_key
 from agenta.sdk.tracing.propagation import extract, inject
 from agenta.sdk.utils.cache import TTLLRUCache
+import agenta as ag
 log = get_module_logger(__name__)
@@ -101,7 +102,7 @@ class Tracing(metaclass=Singleton):
         # TRACE PROCESSORS -- OTLP
         try:
-            log.info("Agenta - OLTP URL: %s", self.otlp_url)
+            log.info("Agenta -    OTLP URL: %s", self.otlp_url)
             _otlp = TraceProcessor(
                 OTLPExporter(
@@ -114,7 +115,7 @@ class Tracing(metaclass=Singleton):
             self.tracer_provider.add_span_processor(_otlp)
         except:  # pylint: disable=bare-except
-            log.warning("Agenta - OLTP unreachable, skipping exports.")
+            log.warning("Agenta - OTLP unreachable, skipping exports.")
         # --- INLINE
         if inline:
@@ -215,6 +216,42 @@ class Tracing(metaclass=Singleton):
                         namespace="metrics",
                     )
+    def store_session(
+        self,
+        session_id: Optional[str] = None,
+        span: Optional[Span] = None,
+    ):
+        """Set session attributes on the current span.
+        Args:
+            session_id: Unique identifier for the session
+            span: Optional span to set attributes on (defaults to current span)
+        """
+        with suppress():
+            if span is None:
+                span = self.get_current_span()
+            if session_id:
+                span.set_attribute("id", session_id, namespace="session")
+    def store_user(
+        self,
+        user_id: Optional[str] = None,
+        span: Optional[Span] = None,
+    ):
+        """Set user attributes on the current span.
+        Args:
+            user_id: Unique identifier for the user
+            span: Optional span to set attributes on (defaults to current span)
+        """
+        with suppress():
+            if span is None:
+                span = self.get_current_span()
+            if user_id:
+                span.set_attribute("id", user_id, namespace="user")
     def is_inline_trace_ready(
         self,
         trace_id: Optional[int] = None,
@@ -314,6 +351,58 @@ class Tracing(metaclass=Singleton):
         return None
+    def get_trace_url(
+        self,
+        trace_id: Optional[str] = None,
+    ) -> str:
+        """
+        Build a URL to view a trace in the Agenta UI.
+        Automatically extracts the trace ID from the current tracing context
+        if not explicitly provided.
+        Args:
+            trace_id: Optional trace ID (hex string format). If not provided,
+                      it will be automatically extracted from the current trace context.
+        Returns:
+            The full URL to view the trace in the observability dashboard
+        Raises:
+            RuntimeError: If the SDK is not initialized, no active trace context exists,
+                          or scope info cannot be fetched
+        """
+        if trace_id is None:
+            span_ctx = self.get_span_context()
+            if span_ctx is None or not span_ctx.is_valid:
+                raise RuntimeError(
+                    "No active trace context found. "
+                    "Make sure you call this within an instrumented function or span."
+                )
+            trace_id = f"{span_ctx.trace_id:032x}"
+        if not ag or not ag.DEFAULT_AGENTA_SINGLETON_INSTANCE:
+            raise RuntimeError(
+                "Agenta SDK is not initialized. Please call ag.init() first."
+            )
+        api_url = ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.api_url
+        web_url = api_url.replace("/api", "") if api_url else None
+        (organization_id, workspace_id, project_id) = (
+            ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.resolve_scopes()
+        )
+        if not web_url or not workspace_id or not project_id:
+            raise RuntimeError(
+                "Could not determine workspace/project context. Please call ag.init() first."
+            )
+        return (
+            f"{web_url}/w/{workspace_id}/p/{project_id}/observability?trace={trace_id}"
+        )
 def get_tracer(
     tracing: Tracing,

agenta/sdk/workflows/runners/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from agenta.sdk.workflows.runners.registry import get_runner
+__all__ = ["get_runner"]

agenta/sdk/workflows/runners/base.py ADDED Viewed

@@ -0,0 +1,30 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Union
+class CodeRunner(ABC):
+    """Abstract base class for code runners (local and remote execution)."""
+    @abstractmethod
+    def run(
+        self,
+        code: str,
+        app_params: Dict[str, Any],
+        inputs: Dict[str, Any],
+        output: Union[dict, str],
+        correct_answer: Any,
+    ) -> Union[float, None]:
+        """
+        Execute code and return a float score between 0 and 1.
+        Args:
+            code: Python code to execute
+            app_params: Application parameters
+            inputs: Input data for the code
+            output: Output from the application variant
+            correct_answer: Expected/correct answer for comparison
+        Returns:
+            Float score between 0 and 1, or None if execution fails
+        """
+        pass

agenta/sdk/workflows/runners/daytona.py ADDED Viewed

@@ -0,0 +1,268 @@
+import os
+import json
+from typing import Any, Dict, Union, Optional
+from daytona import Daytona, DaytonaConfig, Sandbox
+from agenta.sdk.workflows.runners.base import CodeRunner
+from agenta.sdk.utils.logging import get_module_logger
+log = get_module_logger(__name__)
+# Template for wrapping user code with evaluation context
+EVALUATION_CODE_TEMPLATE = """
+import json
+# Parse all parameters from a single dict
+params = json.loads({params_json!r})
+app_params = params['app_params']
+inputs = params['inputs']
+output = params['output']
+correct_answer = params['correct_answer']
+# User-provided evaluation code
+{user_code}
+# Execute and capture result
+result = evaluate(app_params, inputs, output, correct_answer)
+# Ensure result is a float
+if isinstance(result, (float, int, str)):
+    try:
+        result = float(result)
+    except (ValueError, TypeError):
+        result = None
+# Print result for capture
+print(json.dumps({{"result": result}}))
+"""
+class DaytonaRunner(CodeRunner):
+    """Remote code runner using Daytona sandbox for execution."""
+    _instance: Optional["DaytonaRunner"] = None
+    def __new__(cls):
+        """Singleton pattern to reuse Daytona client and sandbox."""
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    def __init__(self):
+        """Initialize Daytona runner with config from environment variables."""
+        if self._initialized:
+            return
+        self._initialized = True
+        self.daytona: Optional[Daytona] = None
+        self._validate_config()
+    def _validate_config(self) -> None:
+        """Validate required environment variables for Daytona."""
+        # Only DAYTONA_API_KEY is strictly required
+        # DAYTONA_API_URL defaults to https://app.daytona.io/api
+        # DAYTONA_TARGET defaults to AGENTA_REGION or 'eu'
+        if not os.getenv("DAYTONA_API_KEY"):
+            raise ValueError(
+                "Missing required environment variable: DAYTONA_API_KEY. "
+                "Set AGENTA_SERVICES_SANDBOX_RUNNER=local to use local execution instead."
+            )
+    def _initialize_client(self) -> None:
+        """Lazily initialize Daytona client on first use."""
+        if self.daytona is not None:
+            return
+        try:
+            # Get configuration with fallbacks
+            api_url = os.getenv("DAYTONA_API_URL") or "https://app.daytona.io/api"
+            api_key = os.getenv("DAYTONA_API_KEY")
+            target = os.getenv("DAYTONA_TARGET") or os.getenv("AGENTA_REGION") or "eu"
+            config = DaytonaConfig(
+                api_url=api_url,
+                api_key=api_key,
+                target=target,
+            )
+            self.daytona = Daytona(config)
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize Daytona client: {e}")
+    def _create_sandbox(self) -> Any:
+        """Create a new sandbox for this run from snapshot."""
+        try:
+            if self.daytona is None:
+                raise RuntimeError("Daytona client not initialized")
+            snapshot_id = os.getenv("AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON")
+            if not snapshot_id:
+                raise RuntimeError(
+                    "AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON environment variable is required. "
+                    "Set it to the Daytona sandbox ID or snapshot name you want to use."
+                )
+            from daytona import CreateSandboxFromSnapshotParams
+            sandbox = self.daytona.create(
+                CreateSandboxFromSnapshotParams(
+                    snapshot=snapshot_id,
+                    ephemeral=True,
+                )
+            )
+            return sandbox
+        except Exception as e:
+            raise RuntimeError(f"Failed to create sandbox from snapshot: {e}")
+    def run(
+        self,
+        code: str,
+        app_params: Dict[str, Any],
+        inputs: Dict[str, Any],
+        output: Union[dict, str],
+        correct_answer: Any,
+    ) -> Union[float, None]:
+        """
+        Execute provided Python code in Daytona sandbox.
+        The code must define an `evaluate()` function that takes
+        (app_params, inputs, output, correct_answer) and returns a float (0-1).
+        Args:
+            code: The Python code to be executed
+            app_params: The parameters of the app variant
+            inputs: Inputs to be used during code execution
+            output: The output of the app variant after being called
+            correct_answer: The correct answer (or target) for comparison
+        Returns:
+            Float score between 0 and 1, or None if execution fails
+        """
+        self._initialize_client()
+        sandbox: Sandbox = self._create_sandbox()
+        try:
+            # Prepare all parameters as a single dict
+            params = {
+                "app_params": app_params,
+                "inputs": inputs,
+                "output": output,
+                "correct_answer": correct_answer,
+            }
+            params_json = json.dumps(params)
+            # Wrap the user code with the necessary context and evaluation
+            wrapped_code = EVALUATION_CODE_TEMPLATE.format(
+                params_json=params_json,
+                user_code=code,
+            )
+            # Log the input parameters for debugging
+            # log.debug("Input parameters to evaluation:")
+            # print("\n" + "=" * 80)
+            # print("INPUT PARAMETERS:")
+            # print("=" * 80)
+            # print(f"app_params: {app_params}")
+            # print(f"inputs: {inputs}")
+            # print(f"output: {output}")
+            # print(f"correct_answer: {correct_answer}")
+            # print("=" * 80 + "\n")
+            # Log the generated code for debugging
+            # log.debug("Generated code to send to Daytona:")
+            # print("=" * 80)
+            # print("GENERATED CODE TO SEND TO DAYTONA:")
+            # print("=" * 80)
+            # code_lines = wrapped_code.split("\n")
+            # for i, line in enumerate(code_lines, 1):
+            #     log.debug(f"  {i:3d}: {line}")
+            #     print(f"  {i:3d}: {line}")
+            # print("=" * 80)
+            # print(f"Total lines: {len(code_lines)}")
+            # print("=" * 80 + "\n")
+            # Callback functions to capture output and errors
+            stdout_lines = []
+            stderr_lines = []
+            def on_stdout(line: str) -> None:
+                """Capture stdout output."""
+                # log.debug(f"[STDOUT] {line}")
+                # print(f"[STDOUT] {line}")
+                stdout_lines.append(line)
+            def on_stderr(line: str) -> None:
+                """Capture stderr output."""
+                # log.warning(f"[STDERR] {line}")
+                # print(f"[STDERR] {line}")
+                stderr_lines.append(line)
+            def on_error(error: Exception) -> None:
+                """Capture errors."""
+                log.error(f"[ERROR] {type(error).__name__}: {error}")
+                # print(f"[ERROR] {type(error).__name__}: {error}")
+            # Execute the code in the Daytona sandbox
+            # log.debug("Executing code in Daytona sandbox")
+            response = sandbox.code_interpreter.run_code(
+                wrapped_code,
+                on_stdout=on_stdout,
+                on_stderr=on_stderr,
+                on_error=on_error,
+            )
+            # log.debug(f"Raw response: {response}")
+            # print(f"Raw response: {response}")
+            # Parse the result from the response object
+            # Response has stdout, stderr, and error fields
+            response_stdout = response.stdout if hasattr(response, "stdout") else ""
+            response_error = response.error if hasattr(response, "error") else None
+            sandbox.delete()
+            if response_error:
+                log.error(f"Sandbox execution error: {response_error}")
+                raise RuntimeError(f"Sandbox execution failed: {response_error}")
+            # Parse the result from stdout
+            output_lines = response_stdout.strip().split("\n")
+            for line in reversed(output_lines):
+                if not line.strip():
+                    continue
+                try:
+                    result_obj = json.loads(line)
+                    if isinstance(result_obj, dict) and "result" in result_obj:
+                        result = result_obj["result"]
+                        if isinstance(result, (float, int, type(None))):
+                            return float(result) if result is not None else None
+                except json.JSONDecodeError:
+                    continue
+            raise ValueError("Could not parse evaluation result from Daytona output")
+        except Exception as e:
+            log.error(f"Error during Daytona code execution: {e}", exc_info=True)
+            # print(f"Exception details: {type(e).__name__}: {e}")
+            raise RuntimeError(f"Error during Daytona code execution: {e}")
+    def cleanup(self) -> None:
+        """Clean up Daytona client resources."""
+        try:
+            self.daytona = None
+        except Exception as e:
+            # Log but don't raise on cleanup failures
+            log.error(f"Warning: Failed to cleanup Daytona resources", exc_info=True)
+    def __del__(self):
+        """Ensure cleanup on deletion."""
+        try:
+            self.cleanup()
+        except Exception:
+            pass

agenta/sdk/workflows/runners/local.py ADDED Viewed

@@ -0,0 +1,108 @@
+from typing import Any, Dict, Union, Text
+from RestrictedPython import safe_builtins, compile_restricted, utility_builtins
+from RestrictedPython.Eval import (
+    default_guarded_getiter,
+    default_guarded_getitem,
+)
+from RestrictedPython.Guards import (
+    guarded_iter_unpack_sequence,
+    full_write_guard,
+)
+from agenta.sdk.workflows.runners.base import CodeRunner
+class LocalRunner(CodeRunner):
+    """Local code runner using RestrictedPython for safe execution."""
+    def run(
+        self,
+        code: str,
+        app_params: Dict[str, Any],
+        inputs: Dict[str, Any],
+        output: Union[dict, str],
+        correct_answer: Any,
+    ) -> Union[float, None]:
+        """
+        Execute provided Python code safely using RestrictedPython.
+        Args:
+            code: The Python code to be executed
+            app_params: The parameters of the app variant
+            inputs: Inputs to be used during code execution
+            output: The output of the app variant after being called
+            correct_answer: The correct answer (or target) for comparison
+            code: The Python code to be executed
+        Returns:
+            Float score between 0 and 1, or None if execution fails
+        """
+        # Define the available built-ins
+        local_builtins = safe_builtins.copy()
+        # Add the __import__ built-in function to the local builtins
+        local_builtins["__import__"] = __import__
+        # Define supported packages
+        allowed_imports = [
+            "math",
+            "random",
+            "datetime",
+            "json",
+            "requests",
+            "typing",
+        ]
+        # Create a dictionary to simulate allowed imports
+        allowed_modules = {}
+        for package_name in allowed_imports:
+            allowed_modules[package_name] = __import__(package_name)
+        # Add the allowed modules to the local built-ins
+        local_builtins.update(allowed_modules)
+        local_builtins.update(utility_builtins)
+        # Define the environment for the code execution
+        environment = {
+            "_getiter_": default_guarded_getiter,
+            "_getitem_": default_guarded_getitem,
+            "_iter_unpack_sequence_": guarded_iter_unpack_sequence,
+            "_write_": full_write_guard,
+            "__builtins__": local_builtins,
+        }
+        # Compile the code in a restricted environment
+        byte_code = compile_restricted(code, filename="<inline>", mode="exec")
+        # Call the evaluation function, extract the result if it exists
+        # and is a float between 0 and 1
+        try:
+            # Execute the code
+            exec(byte_code, environment)
+            # Call the evaluation function, extract the result
+            result = environment["evaluate"](app_params, inputs, output, correct_answer)
+            # Attempt to convert result to float
+            if isinstance(result, (float, int, str)):
+                try:
+                    result = float(result)
+                except ValueError as e:
+                    raise ValueError(f"Result cannot be converted to float: {e}")
+            if not isinstance(result, float):
+                raise TypeError(
+                    f"Result is not a float after conversion: {type(result)}"
+                )
+            return result
+        except KeyError as e:
+            raise KeyError(f"Missing expected key in environment: {e}")
+        except SyntaxError as e:
+            raise SyntaxError(f"Syntax error in provided code: {e}")
+        except Exception as e:
+            raise RuntimeError(f"Error during code execution: {e}")

agenta 0.65.0__py3-none-any.whl → 0.70.1__py3-none-any.whl

agenta 0.65.0py3-none-any.whl → 0.70.1py3-none-any.whl