PyPI - agenta - Versions diffs - 0.63.2__py3-none-any.whl → 0.68.0__py3-none-any.whl - Mend

agenta 0.63.2py3-none-any.whl → 0.68.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

agenta/client/backend/types/projects_response.py +1 -0
agenta/client/backend/types/testset_output_response.py +1 -0
agenta/sdk/agenta_init.py +2 -2
agenta/sdk/engines/tracing/tracing.py +2 -2
agenta/sdk/evaluations/metrics.py +3 -3
agenta/sdk/evaluations/runs.py +5 -6
agenta/sdk/models/evaluations.py +11 -3
agenta/sdk/tracing/tracing.py +2 -2
agenta/sdk/workflows/runners/__init__.py +3 -0
agenta/sdk/workflows/runners/base.py +30 -0
agenta/sdk/workflows/runners/daytona.py +274 -0
agenta/sdk/workflows/runners/local.py +108 -0
agenta/sdk/workflows/runners/registry.py +31 -0
agenta/sdk/workflows/sandbox.py +18 -81
{agenta-0.63.2.dist-info → agenta-0.68.0.dist-info}/METADATA +18 -23
{agenta-0.63.2.dist-info → agenta-0.68.0.dist-info}/RECORD +17 -12
{agenta-0.63.2.dist-info → agenta-0.68.0.dist-info}/WHEEL +0 -0

agenta/client/backend/types/projects_response.py CHANGED Viewed

@@ -13,6 +13,7 @@ class ProjectsResponse(UniversalBaseModel):
     workspace_name: typing.Optional[str] = None
     project_id: str
     project_name: str
+    is_default_project: bool = False
     user_role: typing.Optional[str] = None
     is_demo: typing.Optional[bool] = None

agenta/client/backend/types/testset_output_response.py CHANGED Viewed

@@ -13,6 +13,7 @@ class TestsetOutputResponse(UniversalBaseModel):
     name: str
     created_at: str
     updated_at: str
+    columns: typing.List[str]
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(

agenta/sdk/agenta_init.py CHANGED Viewed

@@ -70,7 +70,7 @@ class AgentaSingleton:
         """
-        log.info("Agenta -  SDK ver: %s", version("agenta"))
+        log.info("Agenta -     SDK ver: %s", version("agenta"))
         config = {}
         if config_fname:
@@ -118,7 +118,7 @@ class AgentaSingleton:
             or None  # NO FALLBACK
         )
-        log.info("Agenta -  API URL: %s", self.api_url)
+        log.info("Agenta -     API URL: %s", self.api_url)
         self.scope_type = (
             scope_type

agenta/sdk/engines/tracing/tracing.py CHANGED Viewed

@@ -114,7 +114,7 @@ class Tracing(metaclass=Singleton):
         # TRACE PROCESSORS -- OTLP
         try:
-            log.info("Agenta - OLTP URL: %s", self.otlp_url)
+            log.info("Agenta -    OTLP URL: %s", self.otlp_url)
             _otlp = TraceProcessor(
                 OTLPExporter(
@@ -127,7 +127,7 @@ class Tracing(metaclass=Singleton):
             self.tracer_provider.add_span_processor(_otlp)
         except:  # pylint: disable=bare-except
-            log.warning("Agenta - OLTP unreachable, skipping exports.")
+            log.warning("Agenta - OTLP unreachable, skipping exports.")
         # GLOBAL TRACER PROVIDER -- INSTRUMENTATION LIBRARIES
         set_tracer_provider(self.tracer_provider)

agenta/sdk/evaluations/metrics.py CHANGED Viewed

@@ -13,15 +13,15 @@ async def arefresh(
     # timestamp: Optional[str] = None,
     # interval: Optional[float] = None,
 ) -> EvaluationMetrics:
-    payload = dict(
+    metrics = dict(
         run_id=str(run_id),
         scenario_id=str(scenario_id) if scenario_id else None,
     )
     response = authed_api()(
         method="POST",
-        endpoint=f"/preview/evaluations/metrics/refresh",
-        params=payload,
+        endpoint="/preview/evaluations/metrics/refresh",
+        json=dict(metrics=metrics),
     )
     try:

agenta/sdk/evaluations/runs.py CHANGED Viewed

@@ -124,11 +124,10 @@ async def aclose(
 async def aurl(
     *,
     run_id: UUID,
-) -> str:
+) -> Optional[str]:
     response = authed_api()(
         method="GET",
-        endpoint=f"/projects",
-        params={"scope": "project"},
+        endpoint=f"/projects/current",
     )
     try:
@@ -137,10 +136,10 @@ async def aurl(
         print(response.text)
         raise
-    if len(response.json()) != 1:
-        return None
+    project_info = response.json()
-    project_info = response.json()[0]
+    if not project_info:
+        return None
     workspace_id = project_info.get("workspace_id")
     project_id = project_info.get("project_id")

agenta/sdk/models/evaluations.py CHANGED Viewed

@@ -53,9 +53,17 @@ class EvaluationStatus(str, Enum):
 class EvaluationRunFlags(BaseModel):
-    is_closed: Optional[bool] = None  # Indicates if the run is immutable
-    is_live: Optional[bool] = None  # Indicates if the run is updated periodically
-    is_active: Optional[bool] = None  # Indicates if the run is currently active
+    is_live: bool = False  # Indicates if the run has live queries
+    is_active: bool = False  # Indicates if the run is currently active
+    is_closed: bool = False  # Indicates if the run is modifiable
+    #
+    has_queries: bool = False  # Indicates if the run has queries
+    has_testsets: bool = False  # Indicates if the run has testsets
+    has_evaluators: bool = False  # Indicates if the run has evaluators
+    #
+    has_custom: bool = False  # Indicates if the run has custom evaluators
+    has_human: bool = False  # Indicates if the run has human evaluators
+    has_auto: bool = False  # Indicates if the run has auto evaluators
 class SimpleEvaluationFlags(EvaluationRunFlags):

agenta/sdk/tracing/tracing.py CHANGED Viewed

@@ -101,7 +101,7 @@ class Tracing(metaclass=Singleton):
         # TRACE PROCESSORS -- OTLP
         try:
-            log.info("Agenta - OLTP URL: %s", self.otlp_url)
+            log.info("Agenta -    OTLP URL: %s", self.otlp_url)
             _otlp = TraceProcessor(
                 OTLPExporter(
@@ -114,7 +114,7 @@ class Tracing(metaclass=Singleton):
             self.tracer_provider.add_span_processor(_otlp)
         except:  # pylint: disable=bare-except
-            log.warning("Agenta - OLTP unreachable, skipping exports.")
+            log.warning("Agenta - OTLP unreachable, skipping exports.")
         # --- INLINE
         if inline:

agenta/sdk/workflows/runners/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from agenta.sdk.workflows.runners.registry import get_runner
+__all__ = ["get_runner"]

agenta/sdk/workflows/runners/base.py ADDED Viewed

@@ -0,0 +1,30 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Union
+class CodeRunner(ABC):
+    """Abstract base class for code runners (local and remote execution)."""
+    @abstractmethod
+    def run(
+        self,
+        code: str,
+        app_params: Dict[str, Any],
+        inputs: Dict[str, Any],
+        output: Union[dict, str],
+        correct_answer: Any,
+    ) -> Union[float, None]:
+        """
+        Execute code and return a float score between 0 and 1.
+        Args:
+            code: Python code to execute
+            app_params: Application parameters
+            inputs: Input data for the code
+            output: Output from the application variant
+            correct_answer: Expected/correct answer for comparison
+        Returns:
+            Float score between 0 and 1, or None if execution fails
+        """
+        pass

agenta/sdk/workflows/runners/daytona.py ADDED Viewed

@@ -0,0 +1,274 @@
+import os
+import json
+from typing import Any, Dict, Union, Optional
+from daytona import Daytona, DaytonaConfig, Sandbox
+from agenta.sdk.workflows.runners.base import CodeRunner
+from agenta.sdk.utils.logging import get_module_logger
+log = get_module_logger(__name__)
+# Template for wrapping user code with evaluation context
+EVALUATION_CODE_TEMPLATE = """
+import json
+# Parse all parameters from a single dict
+params = json.loads({params_json!r})
+app_params = params['app_params']
+inputs = params['inputs']
+output = params['output']
+correct_answer = params['correct_answer']
+# User-provided evaluation code
+{user_code}
+# Execute and capture result
+result = evaluate(app_params, inputs, output, correct_answer)
+# Ensure result is a float
+if isinstance(result, (float, int, str)):
+    try:
+        result = float(result)
+    except (ValueError, TypeError):
+        result = None
+# Print result for capture
+print(json.dumps({{"result": result}}))
+"""
+class DaytonaRunner(CodeRunner):
+    """Remote code runner using Daytona sandbox for execution."""
+    _instance: Optional["DaytonaRunner"] = None
+    def __new__(cls):
+        """Singleton pattern to reuse Daytona client and sandbox."""
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    def __init__(self):
+        """Initialize Daytona runner with config from environment variables."""
+        if self._initialized:
+            return
+        self._initialized = True
+        self.daytona: Optional[Daytona] = None
+        self._validate_config()
+    def _validate_config(self) -> None:
+        """Validate required environment variables for Daytona."""
+        # Only DAYTONA_API_KEY is strictly required
+        # DAYTONA_API_URL defaults to https://app.daytona.io/api
+        # DAYTONA_TARGET defaults to AGENTA_REGION or 'eu'
+        if not os.getenv("DAYTONA_API_KEY"):
+            raise ValueError(
+                "Missing required environment variable: DAYTONA_API_KEY. "
+                "Set AGENTA_SERVICES_SANDBOX_RUNNER=local to use local execution instead."
+            )
+    def _initialize_client(self) -> None:
+        """Lazily initialize Daytona client on first use."""
+        if self.daytona is not None:
+            return
+        try:
+            # Get configuration with fallbacks
+            api_url = os.getenv("DAYTONA_API_URL") or "https://app.daytona.io/api"
+            api_key = os.getenv("DAYTONA_API_KEY")
+            target = os.getenv("DAYTONA_TARGET") or os.getenv("AGENTA_REGION") or "eu"
+            config = DaytonaConfig(
+                api_url=api_url,
+                api_key=api_key,
+                target=target,
+            )
+            self.daytona = Daytona(config)
+            # log.debug("Daytona client initialized")
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize Daytona client: {e}")
+    def _create_sandbox(self) -> Any:
+        """Create a new sandbox for this run from snapshot."""
+        try:
+            if self.daytona is None:
+                raise RuntimeError("Daytona client not initialized")
+            snapshot_id = os.getenv("AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON")
+            if not snapshot_id:
+                raise RuntimeError(
+                    "AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON environment variable is required. "
+                    "Set it to the Daytona sandbox ID or snapshot name you want to use."
+                )
+            # log.debug(f"Creating sandbox from snapshot: {snapshot_id}")
+            from daytona import CreateSandboxFromSnapshotParams
+            sandbox = self.daytona.create(
+                CreateSandboxFromSnapshotParams(
+                    snapshot=snapshot_id,
+                    ephemeral=True,
+                )
+            )
+            # log.debug(
+            #     f"Sandbox created: {sandbox.id if hasattr(sandbox, 'id') else sandbox}"
+            # )
+            return sandbox
+        except Exception as e:
+            raise RuntimeError(f"Failed to create sandbox from snapshot: {e}")
+    def run(
+        self,
+        code: str,
+        app_params: Dict[str, Any],
+        inputs: Dict[str, Any],
+        output: Union[dict, str],
+        correct_answer: Any,
+    ) -> Union[float, None]:
+        """
+        Execute provided Python code in Daytona sandbox.
+        The code must define an `evaluate()` function that takes
+        (app_params, inputs, output, correct_answer) and returns a float (0-1).
+        Args:
+            code: The Python code to be executed
+            app_params: The parameters of the app variant
+            inputs: Inputs to be used during code execution
+            output: The output of the app variant after being called
+            correct_answer: The correct answer (or target) for comparison
+        Returns:
+            Float score between 0 and 1, or None if execution fails
+        """
+        self._initialize_client()
+        sandbox: Sandbox = self._create_sandbox()
+        try:
+            # Prepare all parameters as a single dict
+            params = {
+                "app_params": app_params,
+                "inputs": inputs,
+                "output": output,
+                "correct_answer": correct_answer,
+            }
+            params_json = json.dumps(params)
+            # Wrap the user code with the necessary context and evaluation
+            wrapped_code = EVALUATION_CODE_TEMPLATE.format(
+                params_json=params_json,
+                user_code=code,
+            )
+            # Log the input parameters for debugging
+            # log.debug("Input parameters to evaluation:")
+            # print("\n" + "=" * 80)
+            # print("INPUT PARAMETERS:")
+            # print("=" * 80)
+            # print(f"app_params: {app_params}")
+            # print(f"inputs: {inputs}")
+            # print(f"output: {output}")
+            # print(f"correct_answer: {correct_answer}")
+            # print("=" * 80 + "\n")
+            # Log the generated code for debugging
+            # log.debug("Generated code to send to Daytona:")
+            # print("=" * 80)
+            # print("GENERATED CODE TO SEND TO DAYTONA:")
+            # print("=" * 80)
+            # code_lines = wrapped_code.split("\n")
+            # for i, line in enumerate(code_lines, 1):
+            #     log.debug(f"  {i:3d}: {line}")
+            #     print(f"  {i:3d}: {line}")
+            # print("=" * 80)
+            # print(f"Total lines: {len(code_lines)}")
+            # print("=" * 80 + "\n")
+            # Callback functions to capture output and errors
+            stdout_lines = []
+            stderr_lines = []
+            def on_stdout(line: str) -> None:
+                """Capture stdout output."""
+                # log.debug(f"[STDOUT] {line}")
+                # print(f"[STDOUT] {line}")
+                stdout_lines.append(line)
+            def on_stderr(line: str) -> None:
+                """Capture stderr output."""
+                # log.warning(f"[STDERR] {line}")
+                # print(f"[STDERR] {line}")
+                stderr_lines.append(line)
+            def on_error(error: Exception) -> None:
+                """Capture errors."""
+                log.error(f"[ERROR] {type(error).__name__}: {error}")
+                # print(f"[ERROR] {type(error).__name__}: {error}")
+            # Execute the code in the Daytona sandbox
+            # log.debug("Executing code in Daytona sandbox")
+            response = sandbox.code_interpreter.run_code(
+                wrapped_code,
+                on_stdout=on_stdout,
+                on_stderr=on_stderr,
+                on_error=on_error,
+            )
+            # log.debug(f"Raw response: {response}")
+            # print(f"Raw response: {response}")
+            # Parse the result from the response object
+            # Response has stdout, stderr, and error fields
+            response_stdout = response.stdout if hasattr(response, "stdout") else ""
+            response_error = response.error if hasattr(response, "error") else None
+            sandbox.delete()
+            if response_error:
+                log.error(f"Sandbox execution error: {response_error}")
+                raise RuntimeError(f"Sandbox execution failed: {response_error}")
+            # Parse the result from stdout
+            output_lines = response_stdout.strip().split("\n")
+            for line in reversed(output_lines):
+                if not line.strip():
+                    continue
+                try:
+                    result_obj = json.loads(line)
+                    if isinstance(result_obj, dict) and "result" in result_obj:
+                        result = result_obj["result"]
+                        if isinstance(result, (float, int, type(None))):
+                            return float(result) if result is not None else None
+                except json.JSONDecodeError:
+                    continue
+            raise ValueError("Could not parse evaluation result from Daytona output")
+        except Exception as e:
+            log.error(f"Error during Daytona code execution: {e}", exc_info=True)
+            # print(f"Exception details: {type(e).__name__}: {e}")
+            raise RuntimeError(f"Error during Daytona code execution: {e}")
+    def cleanup(self) -> None:
+        """Clean up Daytona client resources."""
+        try:
+            self.daytona = None
+        except Exception as e:
+            # Log but don't raise on cleanup failures
+            log.error(f"Warning: Failed to cleanup Daytona resources", exc_info=True)
+    def __del__(self):
+        """Ensure cleanup on deletion."""
+        try:
+            self.cleanup()
+        except Exception:
+            pass

agenta/sdk/workflows/runners/local.py ADDED Viewed

@@ -0,0 +1,108 @@
+from typing import Any, Dict, Union, Text
+from RestrictedPython import safe_builtins, compile_restricted, utility_builtins
+from RestrictedPython.Eval import (
+    default_guarded_getiter,
+    default_guarded_getitem,
+)
+from RestrictedPython.Guards import (
+    guarded_iter_unpack_sequence,
+    full_write_guard,
+)
+from agenta.sdk.workflows.runners.base import CodeRunner
+class LocalRunner(CodeRunner):
+    """Local code runner using RestrictedPython for safe execution."""
+    def run(
+        self,
+        code: str,
+        app_params: Dict[str, Any],
+        inputs: Dict[str, Any],
+        output: Union[dict, str],
+        correct_answer: Any,
+    ) -> Union[float, None]:
+        """
+        Execute provided Python code safely using RestrictedPython.
+        Args:
+            code: The Python code to be executed
+            app_params: The parameters of the app variant
+            inputs: Inputs to be used during code execution
+            output: The output of the app variant after being called
+            correct_answer: The correct answer (or target) for comparison
+            code: The Python code to be executed
+        Returns:
+            Float score between 0 and 1, or None if execution fails
+        """
+        # Define the available built-ins
+        local_builtins = safe_builtins.copy()
+        # Add the __import__ built-in function to the local builtins
+        local_builtins["__import__"] = __import__
+        # Define supported packages
+        allowed_imports = [
+            "math",
+            "random",
+            "datetime",
+            "json",
+            "requests",
+            "typing",
+        ]
+        # Create a dictionary to simulate allowed imports
+        allowed_modules = {}
+        for package_name in allowed_imports:
+            allowed_modules[package_name] = __import__(package_name)
+        # Add the allowed modules to the local built-ins
+        local_builtins.update(allowed_modules)
+        local_builtins.update(utility_builtins)
+        # Define the environment for the code execution
+        environment = {
+            "_getiter_": default_guarded_getiter,
+            "_getitem_": default_guarded_getitem,
+            "_iter_unpack_sequence_": guarded_iter_unpack_sequence,
+            "_write_": full_write_guard,
+            "__builtins__": local_builtins,
+        }
+        # Compile the code in a restricted environment
+        byte_code = compile_restricted(code, filename="<inline>", mode="exec")
+        # Call the evaluation function, extract the result if it exists
+        # and is a float between 0 and 1
+        try:
+            # Execute the code
+            exec(byte_code, environment)
+            # Call the evaluation function, extract the result
+            result = environment["evaluate"](app_params, inputs, output, correct_answer)
+            # Attempt to convert result to float
+            if isinstance(result, (float, int, str)):
+                try:
+                    result = float(result)
+                except ValueError as e:
+                    raise ValueError(f"Result cannot be converted to float: {e}")
+            if not isinstance(result, float):
+                raise TypeError(
+                    f"Result is not a float after conversion: {type(result)}"
+                )
+            return result
+        except KeyError as e:
+            raise KeyError(f"Missing expected key in environment: {e}")
+        except SyntaxError as e:
+            raise SyntaxError(f"Syntax error in provided code: {e}")
+        except Exception as e:
+            raise RuntimeError(f"Error during code execution: {e}")

agenta/sdk/workflows/runners/registry.py ADDED Viewed

@@ -0,0 +1,31 @@
+import os
+from agenta.sdk.workflows.runners.base import CodeRunner
+from agenta.sdk.workflows.runners.local import LocalRunner
+from agenta.sdk.workflows.runners.daytona import DaytonaRunner
+def get_runner() -> CodeRunner:
+    """
+    Registry to get the appropriate code runner based on environment configuration.
+    Uses AGENTA_SERVICES_SANDBOX_RUNNER environment variable:
+    - "local" (default): Uses RestrictedPython for local execution
+    - "daytona": Uses Daytona remote sandbox
+    Returns:
+        CodeRunner: An instance of LocalRunner or DaytonaRunner
+    Raises:
+        ValueError: If Daytona runner is selected but required environment variables are missing
+    """
+    runner_type = os.getenv("AGENTA_SERVICES_SANDBOX_RUNNER", "local").lower()
+    if runner_type == "daytona":
+        return DaytonaRunner()
+    elif runner_type == "local":
+        return LocalRunner()
+    else:
+        raise ValueError(
+            f"Unknown AGENTA_SERVICES_SANDBOX_RUNNER value: {runner_type}. "
+            f"Supported values: 'local', 'daytona'"
+        )

agenta/sdk/workflows/sandbox.py CHANGED Viewed

@@ -1,14 +1,9 @@
 from typing import Union, Text, Dict, Any
-from RestrictedPython import safe_builtins, compile_restricted, utility_builtins
-from RestrictedPython.Eval import (
-    default_guarded_getiter,
-    default_guarded_getitem,
-)
-from RestrictedPython.Guards import (
-    guarded_iter_unpack_sequence,
-    full_write_guard,
-)
+from agenta.sdk.workflows.runners import get_runner
+# Cache for the runner instance
+_runner = None
 def is_import_safe(python_code: Text) -> bool:
@@ -36,83 +31,25 @@ def execute_code_safely(
     code: Text,
 ) -> Union[float, None]:
     """
-    Execute the provided Python code safely using RestrictedPython.
+    Execute the provided Python code safely.
+    Uses the configured runner (local RestrictedPython or remote Daytona)
+    based on the AGENTA_SERVICES_SANDBOX_RUNNER environment variable.
     Args:
-        - app_params (Dict[str, str]): The parameters of the app variant.
-        - inputs (dict): Inputs to be used during code execution.
-        - output (str): The output of the app variant after being called.
-        - correct_answer (str): The correct answer (or target) of the app variant.
+        - app_params (Dict[str, Any]): The parameters of the app variant.
+        - inputs (Dict[str, Any]): Inputs to be used during code execution.
+        - output (Union[dict, str]): The output of the app variant after being called.
+        - correct_answer (Any): The correct answer (or target) of the app variant.
         - code (Text): The Python code to be executed.
-        - datapoint (Dict[str, str]): The test datapoint.
     Returns:
-    - (float): Result of the execution if successful. Should be between 0 and 1.
-    - None if execution fails or result is not a float between 0 and 1.
+        - (float): Result of the execution if successful. Should be between 0 and 1.
+        - None if execution fails or result is not a float between 0 and 1.
     """
-    # Define the available built-ins
-    local_builtins = safe_builtins.copy()
-    # Add the __import__ built-in function to the local builtins
-    local_builtins["__import__"] = __import__
-    # Define supported packages
-    allowed_imports = [
-        "math",
-        "random",
-        "datetime",
-        "json",
-        "requests",
-        "typing",
-    ]
-    # Create a dictionary to simulate allowed imports
-    allowed_modules = {}
-    for package_name in allowed_imports:
-        allowed_modules[package_name] = __import__(package_name)
-    # Add the allowed modules to the local built-ins
-    local_builtins.update(allowed_modules)
-    local_builtins.update(utility_builtins)
-    # Define the environment for the code execution
-    environment = {
-        "_getiter_": default_guarded_getiter,
-        "_getitem_": default_guarded_getitem,
-        "_iter_unpack_sequence_": guarded_iter_unpack_sequence,
-        "_write_": full_write_guard,
-        "__builtins__": local_builtins,
-    }
-    # Compile the code in a restricted environment
-    byte_code = compile_restricted(code, filename="<inline>", mode="exec")
-    # Call the evaluation function, extract the result if it exists
-    # and is a float between 0 and 1
-    try:
-        # Execute the code
-        exec(byte_code, environment)
-        # Call the evaluation function, extract the result
-        result = environment["evaluate"](app_params, inputs, output, correct_answer)
-        # Attempt to convert result to float
-        if isinstance(result, (float, int, str)):
-            try:
-                result = float(result)
-            except ValueError as e:
-                raise ValueError(f"Result cannot be converted to float: {e}")
-        if not isinstance(result, float):
-            raise TypeError(f"Result is not a float after conversion: {type(result)}")
-        return result
-    except KeyError as e:
-        raise KeyError(f"Missing expected key in environment: {e}")
+    global _runner
-    except SyntaxError as e:
-        raise SyntaxError(f"Syntax error in provided code: {e}")
+    if _runner is None:
+        _runner = get_runner()
-    except Exception as e:
-        raise RuntimeError(f"Error during code execution: {e}")
+    return _runner.run(code, app_params, inputs, output, correct_answer)

{agenta-0.63.2.dist-info → agenta-0.68.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agenta
-Version: 0.63.2
+Version: 0.68.0
 Summary: The SDK for agenta is an open-source LLMOps platform.
 Keywords: LLMOps,LLM,evaluation,prompt engineering
 Author: Mahmoud Mabrouk
@@ -15,29 +15,24 @@ Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: 3.14
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Software Development :: Libraries
-Requires-Dist: decorator (>=5.2.1,<6.0.0)
-Requires-Dist: fastapi (>=0.116.0,<0.117.0)
-Requires-Dist: google-auth (>=2.23,<3)
-Requires-Dist: h11 (>=0.16.0,<0.17.0)
-Requires-Dist: httpx (>=0.28.0,<0.29.0)
-Requires-Dist: huggingface-hub (<0.31.0)
-Requires-Dist: importlib-metadata (>=8.0.0,<9.0)
-Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
-Requires-Dist: litellm (==1.78.7)
-Requires-Dist: openai (>=1.106.0)
-Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
-Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1.27.0,<2.0.0)
-Requires-Dist: opentelemetry-instrumentation (>=0.56b0)
-Requires-Dist: opentelemetry-sdk (>=1.27.0,<2.0.0)
+Requires-Dist: daytona (>=0.121.0,<0.122.0)
+Requires-Dist: fastapi (>=0.122,<0.123)
+Requires-Dist: httpx (>=0.28,<0.29)
+Requires-Dist: importlib-metadata (>=8,<9)
+Requires-Dist: jinja2 (>=3,<4)
+Requires-Dist: litellm (>=1,<2)
+Requires-Dist: openai (>=2,<3)
+Requires-Dist: opentelemetry-api (>=1,<2)
+Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1,<2)
+Requires-Dist: opentelemetry-instrumentation (>=0.59b0,<0.60)
+Requires-Dist: opentelemetry-sdk (>=1,<2)
 Requires-Dist: pydantic (>=2,<3)
-Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
-Requires-Dist: python-jsonpath (>=2.0.0,<3.0.0)
-Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
-Requires-Dist: restrictedpython (>=8.0,<9.0) ; python_version >= "3.11" and python_version < "3.14"
-Requires-Dist: starlette (>=0.47.0,<0.48.0)
-Requires-Dist: structlog (>=25.2.0,<26.0.0)
-Requires-Dist: tiktoken (==0.11.0)
-Requires-Dist: toml (>=0.10.2,<0.11.0)
+Requires-Dist: python-dotenv (>=1,<2)
+Requires-Dist: python-jsonpath (>=2,<3)
+Requires-Dist: pyyaml (>=6,<7)
+Requires-Dist: restrictedpython (>=8,<9) ; python_version >= "3.11" and python_version < "3.14"
+Requires-Dist: structlog (>=25,<26)
+Requires-Dist: toml (>=0.10,<0.11)
 Project-URL: Documentation, https://agenta.ai/docs/
 Project-URL: Homepage, https://agenta.ai
 Project-URL: Repository, https://github.com/agenta-ai/agenta

{agenta-0.63.2.dist-info → agenta-0.68.0.dist-info}/RECORD RENAMED Viewed

@@ -224,7 +224,7 @@ agenta/client/backend/types/plan.py,sha256=_285lMKz5ehwET6j8JM11WlrC_J1HaMSTNwLy
 agenta/client/backend/types/project_membership_request.py,sha256=iNu9ahVXguMOD4d-FGBy7Nk27PlGBLtqFwPlja6WAe4,679
 agenta/client/backend/types/project_request.py,sha256=8AHeu6Cta61Xtj8bnAdmmRN9OpZHssPNHTdwgPblyPk,679
 agenta/client/backend/types/project_scope.py,sha256=i8bwPAsuFMPcAalpN_GBnoAdvMwb5n5IosD8FjOk6no,729
-agenta/client/backend/types/projects_response.py,sha256=Ek-oRlJCfn6o3A_uWPXivpml0H_mQXjQ6BXNVWVGQTk,852
+agenta/client/backend/types/projects_response.py,sha256=jjG6_i_Zcx0ioUhvKT0tflxy6ZSDykHalCERJ4NpAmw,889
 agenta/client/backend/types/recursive_types.py,sha256=vhXwrFzfA5qsalENaVy5dLUEUS3dv8UIet88WNwBGbE,910
 agenta/client/backend/types/reference.py,sha256=fU39dioX8RdqbGK-Y-4sb0zOcgf5Hv03wadOLkwEWTA,923
 agenta/client/backend/types/reference_dto.py,sha256=P-qyS63Sn1Ih5cz_ayzJe36UqkKEp6gRNX96kWh0EG8,690
@@ -248,7 +248,7 @@ agenta/client/backend/types/status_dto.py,sha256=uB5qmKQATBO4dbsYv90gm4a49EDXhpd
 agenta/client/backend/types/tags_request.py,sha256=2Zu42tyEoa2OfEQ_cArZ9EzRYQHfEf_F9QCmksKKvCI,770
 agenta/client/backend/types/testcase_response.py,sha256=xfCMXlt1FGFKZNOYp1IHRwGcI_35I_a7y8gUUDXIkQE,838
 agenta/client/backend/types/testset.py,sha256=wLYSXJyjvG5X-o-eRP4p6TdgPZTYDXBe7H7xtyeqczE,1399
-agenta/client/backend/types/testset_output_response.py,sha256=QRMX6ypP_LuwhCz-HEOBqE22CC-cEa69AJ65iSRzduA,731
+agenta/client/backend/types/testset_output_response.py,sha256=eeNYr5cvEoYD1Grhr6xmEKRSfVAp17dqFJxTiqr8V0w,761
 agenta/client/backend/types/testset_request.py,sha256=-jxEdDlmtxM4Z07ruo0nyJWBKu88ElM3XcPOS4cHp7I,579
 agenta/client/backend/types/testset_response.py,sha256=KES03ufUqhK5xJhzpCK1o0N-v5lpR-BQF3cov9bas2g,619
 agenta/client/backend/types/testset_simple_response.py,sha256=qQnuFDPhqFeRzKcBNxRyXHe5KktG5NZOs6WoE7PKSCg,582
@@ -306,7 +306,7 @@ agenta/client/types.py,sha256=wBGDVktTL2EblEKW23Y-VrFp7V_JHLPMHltt2jEkF0Q,129
 agenta/config.py,sha256=0VrTqduB4g8Mt_Ll7ffFcEjKF5qjTUIxmUtTPW2ygWw,653
 agenta/config.toml,sha256=sIORbhnyct2R9lJrquxhNL4pHul3O0R7iaipCoja5MY,193
 agenta/sdk/__init__.py,sha256=7QUpZ409HcLB22A80qaZydzhs6afPnCvG0Tfq6PE4fk,3011
-agenta/sdk/agenta_init.py,sha256=hBFb0weC54fIReu95779ueUYlBZDqK446nUi8gTdZNE,7280
+agenta/sdk/agenta_init.py,sha256=Vm14_nzyObAmwQFQvAANLsOjqy5AwYIFa6MrdnJ0aqY,7286
 agenta/sdk/assets.py,sha256=51uSUp-qlFLB-nLSrDDTDXOQhM-2yGIuODgALYt1i9Y,8699
 agenta/sdk/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 agenta/sdk/context/running.py,sha256=3gEuUdQrJwcuN93MlXFZ6aHXNxUW6dUk_EudgaxOkCU,907
@@ -333,14 +333,14 @@ agenta/sdk/engines/tracing/inline.py,sha256=y2S_MGGqmXgyUgbkNNyrb8_X-QtGuDy8Jwxl
 agenta/sdk/engines/tracing/processors.py,sha256=lRhT-ifu1LEPMOoqMzeX_qtWQ0cHbodUpSjlBGZcDZA,5149
 agenta/sdk/engines/tracing/propagation.py,sha256=Zu_z5In8eOhy0tkYzQOI09T4OwdjGMP74nhzvElvyFE,2593
 agenta/sdk/engines/tracing/spans.py,sha256=luZ6lB1mBqrilm2hXZx2ELx6sBQmZM9wThdr8G-yeyM,3715
-agenta/sdk/engines/tracing/tracing.py,sha256=NF3Vl_FzzH_rxRRu2puTUAbX34KHcz8olhqGbf7RARE,9281
+agenta/sdk/engines/tracing/tracing.py,sha256=pt40BWz_GA6ycogPrqNhddpvrufB-vAVClIBGI9ON_s,9284
 agenta/sdk/evaluations/__init__.py,sha256=hFb_O8aNkDS0LuZxJYydLxvYLIBPNuab7JIYL87OXPc,105
-agenta/sdk/evaluations/metrics.py,sha256=AQGQau5njqc6dfPZHbry5tVep3cmuzi4woRM26cqP60,830
+agenta/sdk/evaluations/metrics.py,sha256=rBXxPpI9T1QLI1AB6JXEFeCEGCCAjrzG9KBTOeLm04o,841
 agenta/sdk/evaluations/preview/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 agenta/sdk/evaluations/preview/evaluate.py,sha256=fUVVsGlSwpdE97e2iCnGJKyHi-DeZzZwDedTEmgSyY8,27450
 agenta/sdk/evaluations/preview/utils.py,sha256=o-9GvQDhLgnbQ81r8A9zj49BDtd9Pe5_oJlO9ay_qTg,32049
 agenta/sdk/evaluations/results.py,sha256=3pe2c0oI1Wc1wFCKFeQnbe_iwtNn9U0MO_c4U3HtrAs,1590
-agenta/sdk/evaluations/runs.py,sha256=Hp6uxPI9HDu35ZtOoL-_xt53CvCjtbaxe4c3CGyJH-A,3429
+agenta/sdk/evaluations/runs.py,sha256=8euS2Zfzcw9a7SDOnhK4DymuYNIjIXQ1iWlRuuf0q78,3398
 agenta/sdk/evaluations/scenarios.py,sha256=XlsVa_M8FmnSvVniF_FEZUhZDLYIotx4V0SRmPEzWS8,1024
 agenta/sdk/litellm/__init__.py,sha256=Bpz1gfHQc0MN1yolWcjifLWznv6GjHggvRGQSpxpihM,37
 agenta/sdk/litellm/litellm.py,sha256=E7omr9kz0yn8CUK5O0g0QUlDA4bD5fllYtHK9RL2bXE,10646
@@ -377,7 +377,7 @@ agenta/sdk/middlewares/running/resolver.py,sha256=I4nX7jpsq7oKSwQnlsoLm9kh94G2qx
 agenta/sdk/middlewares/running/vault.py,sha256=DqeWyViDDRJycXtRKEO1S7ihlBfCnPFtXSfj_6trW98,3845
 agenta/sdk/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 agenta/sdk/models/blobs.py,sha256=g8zV6V3UcuskEV6tK_tvt8GDWqx1okfE3Oy4n59mgU0,576
-agenta/sdk/models/evaluations.py,sha256=HQfV2Wyt7FINgaNlup9NAWQSgRVu6eaEDB6D2mqbQlI,2599
+agenta/sdk/models/evaluations.py,sha256=twHFZQlAT2xYSqefJdpwtDLSu5GpsDmswV_o04ACyFs,3003
 agenta/sdk/models/git.py,sha256=ol5H3lu6s3Lx2l7K3glbCqcvAnPXsDsYa-OsSR0CupM,2415
 agenta/sdk/models/shared.py,sha256=ynHFXsOgkpvzHJuBcmPp2lMj7ia6x0e9LvNAiKVpPSo,4020
 agenta/sdk/models/testsets.py,sha256=cHCZEWqnDMGqKKH9yVuM5FroG10YFKJ4nF5xZniE-Ds,3415
@@ -392,7 +392,7 @@ agenta/sdk/tracing/inline.py,sha256=UKt10JGKdS6gVDIpExng3UC8vegAcuA2KxlzyvSdUZ0,
 agenta/sdk/tracing/processors.py,sha256=A7rsaicpFq9xZgyhU3hV5ZQoz6X33gB81G9IhB-x3Xg,8597
 agenta/sdk/tracing/propagation.py,sha256=Zu_z5In8eOhy0tkYzQOI09T4OwdjGMP74nhzvElvyFE,2593
 agenta/sdk/tracing/spans.py,sha256=r-R68d12BjvilHgbqN-1xp26qxdVRzxRcFUO-IB_u94,3780
-agenta/sdk/tracing/tracing.py,sha256=5M_cyptJFR9wnMcRktSB5atjYSTZ8CsdwYtAbFXhRpI,9233
+agenta/sdk/tracing/tracing.py,sha256=mogsWlTwz-pYvzpst4xb4kjuGRtywzrU9GO9T9stvZw,9236
 agenta/sdk/types.py,sha256=41yIQagl5L_7WFInjiACHwuNfCQqDrrDOusD17kJGWs,28469
 agenta/sdk/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 agenta/sdk/utils/cache.py,sha256=Er1Hvu1QVLGl99HkUHZ2lKBg3f6PnpkD1uZRvK9r3u4,1429
@@ -414,8 +414,13 @@ agenta/sdk/workflows/configurations.py,sha256=k1YGP3y49WwiFMaQ1rKpCHqCYWWe6nQ7EP
 agenta/sdk/workflows/errors.py,sha256=x582njGfNTMfu4v8bhHdU_Wf_oa8_mHXc3CEE9F2ZBk,8350
 agenta/sdk/workflows/handlers.py,sha256=z_DtfgiejsToO4kqXvXBnO36hd0rk97Y2J2hYlIfZmo,59496
 agenta/sdk/workflows/interfaces.py,sha256=I5Bfil0awdL1TAb_vHqW5n5BHxSBOTuDMhOi4RnUt8A,36315
-agenta/sdk/workflows/sandbox.py,sha256=pzy5mdNDjBAQu1qFwMAxHirWiKX20mq2i7lrwA-ABjc,3816
+agenta/sdk/workflows/runners/__init__.py,sha256=HoYaKf9G03WEUbY7B1uX4O_6xE5dfliNCG1nEuWp1ks,87
+agenta/sdk/workflows/runners/base.py,sha256=WgX0OgbLL5PHeGqLNAvrV7NC3FHDWVfU7v9EBj8MIW0,857
+agenta/sdk/workflows/runners/daytona.py,sha256=g09014ocerh7X-sLOJ01i5ko6YysdXCPfnrOfn9MWQQ,9791
+agenta/sdk/workflows/runners/local.py,sha256=SJ1msO35mQ4XzlqZi9fE25QJu-PDnYru8a66pMo-5vs,3636
+agenta/sdk/workflows/runners/registry.py,sha256=bHM7hTiFawdOM30RwCHeAwFN8C0zGF0at1zsI-57tlw,1067
+agenta/sdk/workflows/sandbox.py,sha256=O1Opeg4hc9jygAzyF5cCsStmMjYgrahA_aF0JdGbBO0,1734
 agenta/sdk/workflows/utils.py,sha256=UDG5or8qqiSCpqi0Fphjxkkhu4MdbiCkHn_yIQcTd0c,11664
-agenta-0.63.2.dist-info/METADATA,sha256=6xWqYfDrqOgIQ1XrtvnDYMjWkgWfL9aFqWAw_uUKDbM,31855
-agenta-0.63.2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-agenta-0.63.2.dist-info/RECORD,,
+agenta-0.68.0.dist-info/METADATA,sha256=B8evMuWFY51eFJX03VPfmeyJ-ns-4wFnBSxVtcmWj1A,31572
+agenta-0.68.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+agenta-0.68.0.dist-info/RECORD,,

{agenta-0.63.2.dist-info → agenta-0.68.0.dist-info}/WHEEL RENAMED Viewed

File without changes

agenta 0.63.2__py3-none-any.whl → 0.68.0__py3-none-any.whl

agenta 0.63.2py3-none-any.whl → 0.68.0py3-none-any.whl