agenta 0.63.2__py3-none-any.whl → 0.68.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ class ProjectsResponse(UniversalBaseModel):
13
13
  workspace_name: typing.Optional[str] = None
14
14
  project_id: str
15
15
  project_name: str
16
+ is_default_project: bool = False
16
17
  user_role: typing.Optional[str] = None
17
18
  is_demo: typing.Optional[bool] = None
18
19
 
@@ -13,6 +13,7 @@ class TestsetOutputResponse(UniversalBaseModel):
13
13
  name: str
14
14
  created_at: str
15
15
  updated_at: str
16
+ columns: typing.List[str]
16
17
 
17
18
  if IS_PYDANTIC_V2:
18
19
  model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
agenta/sdk/agenta_init.py CHANGED
@@ -70,7 +70,7 @@ class AgentaSingleton:
70
70
 
71
71
  """
72
72
 
73
- log.info("Agenta - SDK ver: %s", version("agenta"))
73
+ log.info("Agenta - SDK ver: %s", version("agenta"))
74
74
 
75
75
  config = {}
76
76
  if config_fname:
@@ -118,7 +118,7 @@ class AgentaSingleton:
118
118
  or None # NO FALLBACK
119
119
  )
120
120
 
121
- log.info("Agenta - API URL: %s", self.api_url)
121
+ log.info("Agenta - API URL: %s", self.api_url)
122
122
 
123
123
  self.scope_type = (
124
124
  scope_type
@@ -114,7 +114,7 @@ class Tracing(metaclass=Singleton):
114
114
 
115
115
  # TRACE PROCESSORS -- OTLP
116
116
  try:
117
- log.info("Agenta - OLTP URL: %s", self.otlp_url)
117
+ log.info("Agenta - OTLP URL: %s", self.otlp_url)
118
118
 
119
119
  _otlp = TraceProcessor(
120
120
  OTLPExporter(
@@ -127,7 +127,7 @@ class Tracing(metaclass=Singleton):
127
127
 
128
128
  self.tracer_provider.add_span_processor(_otlp)
129
129
  except: # pylint: disable=bare-except
130
- log.warning("Agenta - OLTP unreachable, skipping exports.")
130
+ log.warning("Agenta - OTLP unreachable, skipping exports.")
131
131
 
132
132
  # GLOBAL TRACER PROVIDER -- INSTRUMENTATION LIBRARIES
133
133
  set_tracer_provider(self.tracer_provider)
@@ -13,15 +13,15 @@ async def arefresh(
13
13
  # timestamp: Optional[str] = None,
14
14
  # interval: Optional[float] = None,
15
15
  ) -> EvaluationMetrics:
16
- payload = dict(
16
+ metrics = dict(
17
17
  run_id=str(run_id),
18
18
  scenario_id=str(scenario_id) if scenario_id else None,
19
19
  )
20
20
 
21
21
  response = authed_api()(
22
22
  method="POST",
23
- endpoint=f"/preview/evaluations/metrics/refresh",
24
- params=payload,
23
+ endpoint="/preview/evaluations/metrics/refresh",
24
+ json=dict(metrics=metrics),
25
25
  )
26
26
 
27
27
  try:
@@ -124,11 +124,10 @@ async def aclose(
124
124
  async def aurl(
125
125
  *,
126
126
  run_id: UUID,
127
- ) -> str:
127
+ ) -> Optional[str]:
128
128
  response = authed_api()(
129
129
  method="GET",
130
- endpoint=f"/projects",
131
- params={"scope": "project"},
130
+ endpoint=f"/projects/current",
132
131
  )
133
132
 
134
133
  try:
@@ -137,10 +136,10 @@ async def aurl(
137
136
  print(response.text)
138
137
  raise
139
138
 
140
- if len(response.json()) != 1:
141
- return None
139
+ project_info = response.json()
142
140
 
143
- project_info = response.json()[0]
141
+ if not project_info:
142
+ return None
144
143
 
145
144
  workspace_id = project_info.get("workspace_id")
146
145
  project_id = project_info.get("project_id")
@@ -53,9 +53,17 @@ class EvaluationStatus(str, Enum):
53
53
 
54
54
 
55
55
  class EvaluationRunFlags(BaseModel):
56
- is_closed: Optional[bool] = None # Indicates if the run is immutable
57
- is_live: Optional[bool] = None # Indicates if the run is updated periodically
58
- is_active: Optional[bool] = None # Indicates if the run is currently active
56
+ is_live: bool = False # Indicates if the run has live queries
57
+ is_active: bool = False # Indicates if the run is currently active
58
+ is_closed: bool = False # Indicates if the run is modifiable
59
+ #
60
+ has_queries: bool = False # Indicates if the run has queries
61
+ has_testsets: bool = False # Indicates if the run has testsets
62
+ has_evaluators: bool = False # Indicates if the run has evaluators
63
+ #
64
+ has_custom: bool = False # Indicates if the run has custom evaluators
65
+ has_human: bool = False # Indicates if the run has human evaluators
66
+ has_auto: bool = False # Indicates if the run has auto evaluators
59
67
 
60
68
 
61
69
  class SimpleEvaluationFlags(EvaluationRunFlags):
@@ -101,7 +101,7 @@ class Tracing(metaclass=Singleton):
101
101
 
102
102
  # TRACE PROCESSORS -- OTLP
103
103
  try:
104
- log.info("Agenta - OLTP URL: %s", self.otlp_url)
104
+ log.info("Agenta - OTLP URL: %s", self.otlp_url)
105
105
 
106
106
  _otlp = TraceProcessor(
107
107
  OTLPExporter(
@@ -114,7 +114,7 @@ class Tracing(metaclass=Singleton):
114
114
 
115
115
  self.tracer_provider.add_span_processor(_otlp)
116
116
  except: # pylint: disable=bare-except
117
- log.warning("Agenta - OLTP unreachable, skipping exports.")
117
+ log.warning("Agenta - OTLP unreachable, skipping exports.")
118
118
 
119
119
  # --- INLINE
120
120
  if inline:
@@ -0,0 +1,3 @@
1
+ from agenta.sdk.workflows.runners.registry import get_runner
2
+
3
+ __all__ = ["get_runner"]
@@ -0,0 +1,30 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Dict, Union
3
+
4
+
5
+ class CodeRunner(ABC):
6
+ """Abstract base class for code runners (local and remote execution)."""
7
+
8
+ @abstractmethod
9
+ def run(
10
+ self,
11
+ code: str,
12
+ app_params: Dict[str, Any],
13
+ inputs: Dict[str, Any],
14
+ output: Union[dict, str],
15
+ correct_answer: Any,
16
+ ) -> Union[float, None]:
17
+ """
18
+ Execute code and return a float score between 0 and 1.
19
+
20
+ Args:
21
+ code: Python code to execute
22
+ app_params: Application parameters
23
+ inputs: Input data for the code
24
+ output: Output from the application variant
25
+ correct_answer: Expected/correct answer for comparison
26
+
27
+ Returns:
28
+ Float score between 0 and 1, or None if execution fails
29
+ """
30
+ pass
@@ -0,0 +1,274 @@
1
+ import os
2
+ import json
3
+ from typing import Any, Dict, Union, Optional
4
+
5
+ from daytona import Daytona, DaytonaConfig, Sandbox
6
+
7
+ from agenta.sdk.workflows.runners.base import CodeRunner
8
+
9
+ from agenta.sdk.utils.logging import get_module_logger
10
+
11
+ log = get_module_logger(__name__)
12
+
13
+ # Template for wrapping user code with evaluation context
14
+ EVALUATION_CODE_TEMPLATE = """
15
+ import json
16
+
17
+ # Parse all parameters from a single dict
18
+ params = json.loads({params_json!r})
19
+ app_params = params['app_params']
20
+ inputs = params['inputs']
21
+ output = params['output']
22
+ correct_answer = params['correct_answer']
23
+
24
+ # User-provided evaluation code
25
+ {user_code}
26
+
27
+ # Execute and capture result
28
+ result = evaluate(app_params, inputs, output, correct_answer)
29
+
30
+ # Ensure result is a float
31
+ if isinstance(result, (float, int, str)):
32
+ try:
33
+ result = float(result)
34
+ except (ValueError, TypeError):
35
+ result = None
36
+
37
+ # Print result for capture
38
+ print(json.dumps({{"result": result}}))
39
+ """
40
+
41
+
42
+ class DaytonaRunner(CodeRunner):
43
+ """Remote code runner using Daytona sandbox for execution."""
44
+
45
+ _instance: Optional["DaytonaRunner"] = None
46
+
47
+ def __new__(cls):
48
+ """Singleton pattern to reuse Daytona client and sandbox."""
49
+ if cls._instance is None:
50
+ cls._instance = super().__new__(cls)
51
+ cls._instance._initialized = False
52
+ return cls._instance
53
+
54
+ def __init__(self):
55
+ """Initialize Daytona runner with config from environment variables."""
56
+ if self._initialized:
57
+ return
58
+
59
+ self._initialized = True
60
+ self.daytona: Optional[Daytona] = None
61
+ self._validate_config()
62
+
63
+ def _validate_config(self) -> None:
64
+ """Validate required environment variables for Daytona."""
65
+ # Only DAYTONA_API_KEY is strictly required
66
+ # DAYTONA_API_URL defaults to https://app.daytona.io/api
67
+ # DAYTONA_TARGET defaults to AGENTA_REGION or 'eu'
68
+ if not os.getenv("DAYTONA_API_KEY"):
69
+ raise ValueError(
70
+ "Missing required environment variable: DAYTONA_API_KEY. "
71
+ "Set AGENTA_SERVICES_SANDBOX_RUNNER=local to use local execution instead."
72
+ )
73
+
74
+ def _initialize_client(self) -> None:
75
+ """Lazily initialize Daytona client on first use."""
76
+ if self.daytona is not None:
77
+ return
78
+
79
+ try:
80
+ # Get configuration with fallbacks
81
+ api_url = os.getenv("DAYTONA_API_URL") or "https://app.daytona.io/api"
82
+ api_key = os.getenv("DAYTONA_API_KEY")
83
+ target = os.getenv("DAYTONA_TARGET") or os.getenv("AGENTA_REGION") or "eu"
84
+
85
+ config = DaytonaConfig(
86
+ api_url=api_url,
87
+ api_key=api_key,
88
+ target=target,
89
+ )
90
+ self.daytona = Daytona(config)
91
+ # log.debug("Daytona client initialized")
92
+
93
+ except Exception as e:
94
+ raise RuntimeError(f"Failed to initialize Daytona client: {e}")
95
+
96
+ def _create_sandbox(self) -> Any:
97
+ """Create a new sandbox for this run from snapshot."""
98
+ try:
99
+ if self.daytona is None:
100
+ raise RuntimeError("Daytona client not initialized")
101
+
102
+ snapshot_id = os.getenv("AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON")
103
+
104
+ if not snapshot_id:
105
+ raise RuntimeError(
106
+ "AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON environment variable is required. "
107
+ "Set it to the Daytona sandbox ID or snapshot name you want to use."
108
+ )
109
+
110
+ # log.debug(f"Creating sandbox from snapshot: {snapshot_id}")
111
+
112
+ from daytona import CreateSandboxFromSnapshotParams
113
+
114
+ sandbox = self.daytona.create(
115
+ CreateSandboxFromSnapshotParams(
116
+ snapshot=snapshot_id,
117
+ ephemeral=True,
118
+ )
119
+ )
120
+
121
+ # log.debug(
122
+ # f"Sandbox created: {sandbox.id if hasattr(sandbox, 'id') else sandbox}"
123
+ # )
124
+ return sandbox
125
+
126
+ except Exception as e:
127
+ raise RuntimeError(f"Failed to create sandbox from snapshot: {e}")
128
+
129
+ def run(
130
+ self,
131
+ code: str,
132
+ app_params: Dict[str, Any],
133
+ inputs: Dict[str, Any],
134
+ output: Union[dict, str],
135
+ correct_answer: Any,
136
+ ) -> Union[float, None]:
137
+ """
138
+ Execute provided Python code in Daytona sandbox.
139
+
140
+ The code must define an `evaluate()` function that takes
141
+ (app_params, inputs, output, correct_answer) and returns a float (0-1).
142
+
143
+ Args:
144
+ code: The Python code to be executed
145
+ app_params: The parameters of the app variant
146
+ inputs: Inputs to be used during code execution
147
+ output: The output of the app variant after being called
148
+ correct_answer: The correct answer (or target) for comparison
149
+
150
+ Returns:
151
+ Float score between 0 and 1, or None if execution fails
152
+ """
153
+ self._initialize_client()
154
+ sandbox: Sandbox = self._create_sandbox()
155
+
156
+ try:
157
+ # Prepare all parameters as a single dict
158
+ params = {
159
+ "app_params": app_params,
160
+ "inputs": inputs,
161
+ "output": output,
162
+ "correct_answer": correct_answer,
163
+ }
164
+ params_json = json.dumps(params)
165
+
166
+ # Wrap the user code with the necessary context and evaluation
167
+ wrapped_code = EVALUATION_CODE_TEMPLATE.format(
168
+ params_json=params_json,
169
+ user_code=code,
170
+ )
171
+
172
+ # Log the input parameters for debugging
173
+ # log.debug("Input parameters to evaluation:")
174
+ # print("\n" + "=" * 80)
175
+ # print("INPUT PARAMETERS:")
176
+ # print("=" * 80)
177
+ # print(f"app_params: {app_params}")
178
+ # print(f"inputs: {inputs}")
179
+ # print(f"output: {output}")
180
+ # print(f"correct_answer: {correct_answer}")
181
+ # print("=" * 80 + "\n")
182
+
183
+ # Log the generated code for debugging
184
+ # log.debug("Generated code to send to Daytona:")
185
+ # print("=" * 80)
186
+ # print("GENERATED CODE TO SEND TO DAYTONA:")
187
+ # print("=" * 80)
188
+ # code_lines = wrapped_code.split("\n")
189
+ # for i, line in enumerate(code_lines, 1):
190
+ # log.debug(f" {i:3d}: {line}")
191
+ # print(f" {i:3d}: {line}")
192
+ # print("=" * 80)
193
+ # print(f"Total lines: {len(code_lines)}")
194
+ # print("=" * 80 + "\n")
195
+
196
+ # Callback functions to capture output and errors
197
+ stdout_lines = []
198
+ stderr_lines = []
199
+
200
+ def on_stdout(line: str) -> None:
201
+ """Capture stdout output."""
202
+ # log.debug(f"[STDOUT] {line}")
203
+ # print(f"[STDOUT] {line}")
204
+ stdout_lines.append(line)
205
+
206
+ def on_stderr(line: str) -> None:
207
+ """Capture stderr output."""
208
+ # log.warning(f"[STDERR] {line}")
209
+ # print(f"[STDERR] {line}")
210
+ stderr_lines.append(line)
211
+
212
+ def on_error(error: Exception) -> None:
213
+ """Capture errors."""
214
+ log.error(f"[ERROR] {type(error).__name__}: {error}")
215
+ # print(f"[ERROR] {type(error).__name__}: {error}")
216
+
217
+ # Execute the code in the Daytona sandbox
218
+ # log.debug("Executing code in Daytona sandbox")
219
+ response = sandbox.code_interpreter.run_code(
220
+ wrapped_code,
221
+ on_stdout=on_stdout,
222
+ on_stderr=on_stderr,
223
+ on_error=on_error,
224
+ )
225
+
226
+ # log.debug(f"Raw response: {response}")
227
+ # print(f"Raw response: {response}")
228
+
229
+ # Parse the result from the response object
230
+ # Response has stdout, stderr, and error fields
231
+ response_stdout = response.stdout if hasattr(response, "stdout") else ""
232
+ response_error = response.error if hasattr(response, "error") else None
233
+
234
+ sandbox.delete()
235
+
236
+ if response_error:
237
+ log.error(f"Sandbox execution error: {response_error}")
238
+ raise RuntimeError(f"Sandbox execution failed: {response_error}")
239
+
240
+ # Parse the result from stdout
241
+ output_lines = response_stdout.strip().split("\n")
242
+ for line in reversed(output_lines):
243
+ if not line.strip():
244
+ continue
245
+ try:
246
+ result_obj = json.loads(line)
247
+ if isinstance(result_obj, dict) and "result" in result_obj:
248
+ result = result_obj["result"]
249
+ if isinstance(result, (float, int, type(None))):
250
+ return float(result) if result is not None else None
251
+ except json.JSONDecodeError:
252
+ continue
253
+
254
+ raise ValueError("Could not parse evaluation result from Daytona output")
255
+
256
+ except Exception as e:
257
+ log.error(f"Error during Daytona code execution: {e}", exc_info=True)
258
+ # print(f"Exception details: {type(e).__name__}: {e}")
259
+ raise RuntimeError(f"Error during Daytona code execution: {e}")
260
+
261
+ def cleanup(self) -> None:
262
+ """Clean up Daytona client resources."""
263
+ try:
264
+ self.daytona = None
265
+ except Exception as e:
266
+ # Log but don't raise on cleanup failures
267
+ log.error(f"Warning: Failed to cleanup Daytona resources", exc_info=True)
268
+
269
+ def __del__(self):
270
+ """Ensure cleanup on deletion."""
271
+ try:
272
+ self.cleanup()
273
+ except Exception:
274
+ pass
@@ -0,0 +1,108 @@
1
+ from typing import Any, Dict, Union, Text
2
+
3
+ from RestrictedPython import safe_builtins, compile_restricted, utility_builtins
4
+ from RestrictedPython.Eval import (
5
+ default_guarded_getiter,
6
+ default_guarded_getitem,
7
+ )
8
+ from RestrictedPython.Guards import (
9
+ guarded_iter_unpack_sequence,
10
+ full_write_guard,
11
+ )
12
+
13
+ from agenta.sdk.workflows.runners.base import CodeRunner
14
+
15
+
16
+ class LocalRunner(CodeRunner):
17
+ """Local code runner using RestrictedPython for safe execution."""
18
+
19
+ def run(
20
+ self,
21
+ code: str,
22
+ app_params: Dict[str, Any],
23
+ inputs: Dict[str, Any],
24
+ output: Union[dict, str],
25
+ correct_answer: Any,
26
+ ) -> Union[float, None]:
27
+ """
28
+ Execute provided Python code safely using RestrictedPython.
29
+
30
+ Args:
31
+ code: The Python code to be executed
32
+ app_params: The parameters of the app variant
33
+ inputs: Inputs to be used during code execution
34
+ output: The output of the app variant after being called
35
+ correct_answer: The correct answer (or target) for comparison
36
+ code: The Python code to be executed
37
+
38
+ Returns:
39
+ Float score between 0 and 1, or None if execution fails
40
+ """
41
+ # Define the available built-ins
42
+ local_builtins = safe_builtins.copy()
43
+
44
+ # Add the __import__ built-in function to the local builtins
45
+ local_builtins["__import__"] = __import__
46
+
47
+ # Define supported packages
48
+ allowed_imports = [
49
+ "math",
50
+ "random",
51
+ "datetime",
52
+ "json",
53
+ "requests",
54
+ "typing",
55
+ ]
56
+
57
+ # Create a dictionary to simulate allowed imports
58
+ allowed_modules = {}
59
+ for package_name in allowed_imports:
60
+ allowed_modules[package_name] = __import__(package_name)
61
+
62
+ # Add the allowed modules to the local built-ins
63
+ local_builtins.update(allowed_modules)
64
+ local_builtins.update(utility_builtins)
65
+
66
+ # Define the environment for the code execution
67
+ environment = {
68
+ "_getiter_": default_guarded_getiter,
69
+ "_getitem_": default_guarded_getitem,
70
+ "_iter_unpack_sequence_": guarded_iter_unpack_sequence,
71
+ "_write_": full_write_guard,
72
+ "__builtins__": local_builtins,
73
+ }
74
+
75
+ # Compile the code in a restricted environment
76
+ byte_code = compile_restricted(code, filename="<inline>", mode="exec")
77
+
78
+ # Call the evaluation function, extract the result if it exists
79
+ # and is a float between 0 and 1
80
+ try:
81
+ # Execute the code
82
+ exec(byte_code, environment)
83
+
84
+ # Call the evaluation function, extract the result
85
+ result = environment["evaluate"](app_params, inputs, output, correct_answer)
86
+
87
+ # Attempt to convert result to float
88
+ if isinstance(result, (float, int, str)):
89
+ try:
90
+ result = float(result)
91
+ except ValueError as e:
92
+ raise ValueError(f"Result cannot be converted to float: {e}")
93
+
94
+ if not isinstance(result, float):
95
+ raise TypeError(
96
+ f"Result is not a float after conversion: {type(result)}"
97
+ )
98
+
99
+ return result
100
+
101
+ except KeyError as e:
102
+ raise KeyError(f"Missing expected key in environment: {e}")
103
+
104
+ except SyntaxError as e:
105
+ raise SyntaxError(f"Syntax error in provided code: {e}")
106
+
107
+ except Exception as e:
108
+ raise RuntimeError(f"Error during code execution: {e}")
@@ -0,0 +1,31 @@
1
+ import os
2
+ from agenta.sdk.workflows.runners.base import CodeRunner
3
+ from agenta.sdk.workflows.runners.local import LocalRunner
4
+ from agenta.sdk.workflows.runners.daytona import DaytonaRunner
5
+
6
+
7
+ def get_runner() -> CodeRunner:
8
+ """
9
+ Registry to get the appropriate code runner based on environment configuration.
10
+
11
+ Uses AGENTA_SERVICES_SANDBOX_RUNNER environment variable:
12
+ - "local" (default): Uses RestrictedPython for local execution
13
+ - "daytona": Uses Daytona remote sandbox
14
+
15
+ Returns:
16
+ CodeRunner: An instance of LocalRunner or DaytonaRunner
17
+
18
+ Raises:
19
+ ValueError: If Daytona runner is selected but required environment variables are missing
20
+ """
21
+ runner_type = os.getenv("AGENTA_SERVICES_SANDBOX_RUNNER", "local").lower()
22
+
23
+ if runner_type == "daytona":
24
+ return DaytonaRunner()
25
+ elif runner_type == "local":
26
+ return LocalRunner()
27
+ else:
28
+ raise ValueError(
29
+ f"Unknown AGENTA_SERVICES_SANDBOX_RUNNER value: {runner_type}. "
30
+ f"Supported values: 'local', 'daytona'"
31
+ )
@@ -1,14 +1,9 @@
1
1
  from typing import Union, Text, Dict, Any
2
2
 
3
- from RestrictedPython import safe_builtins, compile_restricted, utility_builtins
4
- from RestrictedPython.Eval import (
5
- default_guarded_getiter,
6
- default_guarded_getitem,
7
- )
8
- from RestrictedPython.Guards import (
9
- guarded_iter_unpack_sequence,
10
- full_write_guard,
11
- )
3
+ from agenta.sdk.workflows.runners import get_runner
4
+
5
+ # Cache for the runner instance
6
+ _runner = None
12
7
 
13
8
 
14
9
  def is_import_safe(python_code: Text) -> bool:
@@ -36,83 +31,25 @@ def execute_code_safely(
36
31
  code: Text,
37
32
  ) -> Union[float, None]:
38
33
  """
39
- Execute the provided Python code safely using RestrictedPython.
34
+ Execute the provided Python code safely.
35
+
36
+ Uses the configured runner (local RestrictedPython or remote Daytona)
37
+ based on the AGENTA_SERVICES_SANDBOX_RUNNER environment variable.
40
38
 
41
39
  Args:
42
- - app_params (Dict[str, str]): The parameters of the app variant.
43
- - inputs (dict): Inputs to be used during code execution.
44
- - output (str): The output of the app variant after being called.
45
- - correct_answer (str): The correct answer (or target) of the app variant.
40
+ - app_params (Dict[str, Any]): The parameters of the app variant.
41
+ - inputs (Dict[str, Any]): Inputs to be used during code execution.
42
+ - output (Union[dict, str]): The output of the app variant after being called.
43
+ - correct_answer (Any): The correct answer (or target) of the app variant.
46
44
  - code (Text): The Python code to be executed.
47
- - datapoint (Dict[str, str]): The test datapoint.
48
45
 
49
46
  Returns:
50
- - (float): Result of the execution if successful. Should be between 0 and 1.
51
- - None if execution fails or result is not a float between 0 and 1.
47
+ - (float): Result of the execution if successful. Should be between 0 and 1.
48
+ - None if execution fails or result is not a float between 0 and 1.
52
49
  """
53
- # Define the available built-ins
54
- local_builtins = safe_builtins.copy()
55
-
56
- # Add the __import__ built-in function to the local builtins
57
- local_builtins["__import__"] = __import__
58
-
59
- # Define supported packages
60
- allowed_imports = [
61
- "math",
62
- "random",
63
- "datetime",
64
- "json",
65
- "requests",
66
- "typing",
67
- ]
68
-
69
- # Create a dictionary to simulate allowed imports
70
- allowed_modules = {}
71
- for package_name in allowed_imports:
72
- allowed_modules[package_name] = __import__(package_name)
73
-
74
- # Add the allowed modules to the local built-ins
75
- local_builtins.update(allowed_modules)
76
- local_builtins.update(utility_builtins)
77
-
78
- # Define the environment for the code execution
79
- environment = {
80
- "_getiter_": default_guarded_getiter,
81
- "_getitem_": default_guarded_getitem,
82
- "_iter_unpack_sequence_": guarded_iter_unpack_sequence,
83
- "_write_": full_write_guard,
84
- "__builtins__": local_builtins,
85
- }
86
-
87
- # Compile the code in a restricted environment
88
- byte_code = compile_restricted(code, filename="<inline>", mode="exec")
89
-
90
- # Call the evaluation function, extract the result if it exists
91
- # and is a float between 0 and 1
92
- try:
93
- # Execute the code
94
- exec(byte_code, environment)
95
-
96
- # Call the evaluation function, extract the result
97
- result = environment["evaluate"](app_params, inputs, output, correct_answer)
98
-
99
- # Attempt to convert result to float
100
- if isinstance(result, (float, int, str)):
101
- try:
102
- result = float(result)
103
- except ValueError as e:
104
- raise ValueError(f"Result cannot be converted to float: {e}")
105
-
106
- if not isinstance(result, float):
107
- raise TypeError(f"Result is not a float after conversion: {type(result)}")
108
-
109
- return result
110
-
111
- except KeyError as e:
112
- raise KeyError(f"Missing expected key in environment: {e}")
50
+ global _runner
113
51
 
114
- except SyntaxError as e:
115
- raise SyntaxError(f"Syntax error in provided code: {e}")
52
+ if _runner is None:
53
+ _runner = get_runner()
116
54
 
117
- except Exception as e:
118
- raise RuntimeError(f"Error during code execution: {e}")
55
+ return _runner.run(code, app_params, inputs, output, correct_answer)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agenta
3
- Version: 0.63.2
3
+ Version: 0.68.0
4
4
  Summary: The SDK for agenta is an open-source LLMOps platform.
5
5
  Keywords: LLMOps,LLM,evaluation,prompt engineering
6
6
  Author: Mahmoud Mabrouk
@@ -15,29 +15,24 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Classifier: Programming Language :: Python :: 3.14
16
16
  Classifier: Programming Language :: Python :: 3.9
17
17
  Classifier: Topic :: Software Development :: Libraries
18
- Requires-Dist: decorator (>=5.2.1,<6.0.0)
19
- Requires-Dist: fastapi (>=0.116.0,<0.117.0)
20
- Requires-Dist: google-auth (>=2.23,<3)
21
- Requires-Dist: h11 (>=0.16.0,<0.17.0)
22
- Requires-Dist: httpx (>=0.28.0,<0.29.0)
23
- Requires-Dist: huggingface-hub (<0.31.0)
24
- Requires-Dist: importlib-metadata (>=8.0.0,<9.0)
25
- Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
26
- Requires-Dist: litellm (==1.78.7)
27
- Requires-Dist: openai (>=1.106.0)
28
- Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
29
- Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1.27.0,<2.0.0)
30
- Requires-Dist: opentelemetry-instrumentation (>=0.56b0)
31
- Requires-Dist: opentelemetry-sdk (>=1.27.0,<2.0.0)
18
+ Requires-Dist: daytona (>=0.121.0,<0.122.0)
19
+ Requires-Dist: fastapi (>=0.122,<0.123)
20
+ Requires-Dist: httpx (>=0.28,<0.29)
21
+ Requires-Dist: importlib-metadata (>=8,<9)
22
+ Requires-Dist: jinja2 (>=3,<4)
23
+ Requires-Dist: litellm (>=1,<2)
24
+ Requires-Dist: openai (>=2,<3)
25
+ Requires-Dist: opentelemetry-api (>=1,<2)
26
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1,<2)
27
+ Requires-Dist: opentelemetry-instrumentation (>=0.59b0,<0.60)
28
+ Requires-Dist: opentelemetry-sdk (>=1,<2)
32
29
  Requires-Dist: pydantic (>=2,<3)
33
- Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
34
- Requires-Dist: python-jsonpath (>=2.0.0,<3.0.0)
35
- Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
36
- Requires-Dist: restrictedpython (>=8.0,<9.0) ; python_version >= "3.11" and python_version < "3.14"
37
- Requires-Dist: starlette (>=0.47.0,<0.48.0)
38
- Requires-Dist: structlog (>=25.2.0,<26.0.0)
39
- Requires-Dist: tiktoken (==0.11.0)
40
- Requires-Dist: toml (>=0.10.2,<0.11.0)
30
+ Requires-Dist: python-dotenv (>=1,<2)
31
+ Requires-Dist: python-jsonpath (>=2,<3)
32
+ Requires-Dist: pyyaml (>=6,<7)
33
+ Requires-Dist: restrictedpython (>=8,<9) ; python_version >= "3.11" and python_version < "3.14"
34
+ Requires-Dist: structlog (>=25,<26)
35
+ Requires-Dist: toml (>=0.10,<0.11)
41
36
  Project-URL: Documentation, https://agenta.ai/docs/
42
37
  Project-URL: Homepage, https://agenta.ai
43
38
  Project-URL: Repository, https://github.com/agenta-ai/agenta
@@ -224,7 +224,7 @@ agenta/client/backend/types/plan.py,sha256=_285lMKz5ehwET6j8JM11WlrC_J1HaMSTNwLy
224
224
  agenta/client/backend/types/project_membership_request.py,sha256=iNu9ahVXguMOD4d-FGBy7Nk27PlGBLtqFwPlja6WAe4,679
225
225
  agenta/client/backend/types/project_request.py,sha256=8AHeu6Cta61Xtj8bnAdmmRN9OpZHssPNHTdwgPblyPk,679
226
226
  agenta/client/backend/types/project_scope.py,sha256=i8bwPAsuFMPcAalpN_GBnoAdvMwb5n5IosD8FjOk6no,729
227
- agenta/client/backend/types/projects_response.py,sha256=Ek-oRlJCfn6o3A_uWPXivpml0H_mQXjQ6BXNVWVGQTk,852
227
+ agenta/client/backend/types/projects_response.py,sha256=jjG6_i_Zcx0ioUhvKT0tflxy6ZSDykHalCERJ4NpAmw,889
228
228
  agenta/client/backend/types/recursive_types.py,sha256=vhXwrFzfA5qsalENaVy5dLUEUS3dv8UIet88WNwBGbE,910
229
229
  agenta/client/backend/types/reference.py,sha256=fU39dioX8RdqbGK-Y-4sb0zOcgf5Hv03wadOLkwEWTA,923
230
230
  agenta/client/backend/types/reference_dto.py,sha256=P-qyS63Sn1Ih5cz_ayzJe36UqkKEp6gRNX96kWh0EG8,690
@@ -248,7 +248,7 @@ agenta/client/backend/types/status_dto.py,sha256=uB5qmKQATBO4dbsYv90gm4a49EDXhpd
248
248
  agenta/client/backend/types/tags_request.py,sha256=2Zu42tyEoa2OfEQ_cArZ9EzRYQHfEf_F9QCmksKKvCI,770
249
249
  agenta/client/backend/types/testcase_response.py,sha256=xfCMXlt1FGFKZNOYp1IHRwGcI_35I_a7y8gUUDXIkQE,838
250
250
  agenta/client/backend/types/testset.py,sha256=wLYSXJyjvG5X-o-eRP4p6TdgPZTYDXBe7H7xtyeqczE,1399
251
- agenta/client/backend/types/testset_output_response.py,sha256=QRMX6ypP_LuwhCz-HEOBqE22CC-cEa69AJ65iSRzduA,731
251
+ agenta/client/backend/types/testset_output_response.py,sha256=eeNYr5cvEoYD1Grhr6xmEKRSfVAp17dqFJxTiqr8V0w,761
252
252
  agenta/client/backend/types/testset_request.py,sha256=-jxEdDlmtxM4Z07ruo0nyJWBKu88ElM3XcPOS4cHp7I,579
253
253
  agenta/client/backend/types/testset_response.py,sha256=KES03ufUqhK5xJhzpCK1o0N-v5lpR-BQF3cov9bas2g,619
254
254
  agenta/client/backend/types/testset_simple_response.py,sha256=qQnuFDPhqFeRzKcBNxRyXHe5KktG5NZOs6WoE7PKSCg,582
@@ -306,7 +306,7 @@ agenta/client/types.py,sha256=wBGDVktTL2EblEKW23Y-VrFp7V_JHLPMHltt2jEkF0Q,129
306
306
  agenta/config.py,sha256=0VrTqduB4g8Mt_Ll7ffFcEjKF5qjTUIxmUtTPW2ygWw,653
307
307
  agenta/config.toml,sha256=sIORbhnyct2R9lJrquxhNL4pHul3O0R7iaipCoja5MY,193
308
308
  agenta/sdk/__init__.py,sha256=7QUpZ409HcLB22A80qaZydzhs6afPnCvG0Tfq6PE4fk,3011
309
- agenta/sdk/agenta_init.py,sha256=hBFb0weC54fIReu95779ueUYlBZDqK446nUi8gTdZNE,7280
309
+ agenta/sdk/agenta_init.py,sha256=Vm14_nzyObAmwQFQvAANLsOjqy5AwYIFa6MrdnJ0aqY,7286
310
310
  agenta/sdk/assets.py,sha256=51uSUp-qlFLB-nLSrDDTDXOQhM-2yGIuODgALYt1i9Y,8699
311
311
  agenta/sdk/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
312
312
  agenta/sdk/context/running.py,sha256=3gEuUdQrJwcuN93MlXFZ6aHXNxUW6dUk_EudgaxOkCU,907
@@ -333,14 +333,14 @@ agenta/sdk/engines/tracing/inline.py,sha256=y2S_MGGqmXgyUgbkNNyrb8_X-QtGuDy8Jwxl
333
333
  agenta/sdk/engines/tracing/processors.py,sha256=lRhT-ifu1LEPMOoqMzeX_qtWQ0cHbodUpSjlBGZcDZA,5149
334
334
  agenta/sdk/engines/tracing/propagation.py,sha256=Zu_z5In8eOhy0tkYzQOI09T4OwdjGMP74nhzvElvyFE,2593
335
335
  agenta/sdk/engines/tracing/spans.py,sha256=luZ6lB1mBqrilm2hXZx2ELx6sBQmZM9wThdr8G-yeyM,3715
336
- agenta/sdk/engines/tracing/tracing.py,sha256=NF3Vl_FzzH_rxRRu2puTUAbX34KHcz8olhqGbf7RARE,9281
336
+ agenta/sdk/engines/tracing/tracing.py,sha256=pt40BWz_GA6ycogPrqNhddpvrufB-vAVClIBGI9ON_s,9284
337
337
  agenta/sdk/evaluations/__init__.py,sha256=hFb_O8aNkDS0LuZxJYydLxvYLIBPNuab7JIYL87OXPc,105
338
- agenta/sdk/evaluations/metrics.py,sha256=AQGQau5njqc6dfPZHbry5tVep3cmuzi4woRM26cqP60,830
338
+ agenta/sdk/evaluations/metrics.py,sha256=rBXxPpI9T1QLI1AB6JXEFeCEGCCAjrzG9KBTOeLm04o,841
339
339
  agenta/sdk/evaluations/preview/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
340
340
  agenta/sdk/evaluations/preview/evaluate.py,sha256=fUVVsGlSwpdE97e2iCnGJKyHi-DeZzZwDedTEmgSyY8,27450
341
341
  agenta/sdk/evaluations/preview/utils.py,sha256=o-9GvQDhLgnbQ81r8A9zj49BDtd9Pe5_oJlO9ay_qTg,32049
342
342
  agenta/sdk/evaluations/results.py,sha256=3pe2c0oI1Wc1wFCKFeQnbe_iwtNn9U0MO_c4U3HtrAs,1590
343
- agenta/sdk/evaluations/runs.py,sha256=Hp6uxPI9HDu35ZtOoL-_xt53CvCjtbaxe4c3CGyJH-A,3429
343
+ agenta/sdk/evaluations/runs.py,sha256=8euS2Zfzcw9a7SDOnhK4DymuYNIjIXQ1iWlRuuf0q78,3398
344
344
  agenta/sdk/evaluations/scenarios.py,sha256=XlsVa_M8FmnSvVniF_FEZUhZDLYIotx4V0SRmPEzWS8,1024
345
345
  agenta/sdk/litellm/__init__.py,sha256=Bpz1gfHQc0MN1yolWcjifLWznv6GjHggvRGQSpxpihM,37
346
346
  agenta/sdk/litellm/litellm.py,sha256=E7omr9kz0yn8CUK5O0g0QUlDA4bD5fllYtHK9RL2bXE,10646
@@ -377,7 +377,7 @@ agenta/sdk/middlewares/running/resolver.py,sha256=I4nX7jpsq7oKSwQnlsoLm9kh94G2qx
377
377
  agenta/sdk/middlewares/running/vault.py,sha256=DqeWyViDDRJycXtRKEO1S7ihlBfCnPFtXSfj_6trW98,3845
378
378
  agenta/sdk/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
379
379
  agenta/sdk/models/blobs.py,sha256=g8zV6V3UcuskEV6tK_tvt8GDWqx1okfE3Oy4n59mgU0,576
380
- agenta/sdk/models/evaluations.py,sha256=HQfV2Wyt7FINgaNlup9NAWQSgRVu6eaEDB6D2mqbQlI,2599
380
+ agenta/sdk/models/evaluations.py,sha256=twHFZQlAT2xYSqefJdpwtDLSu5GpsDmswV_o04ACyFs,3003
381
381
  agenta/sdk/models/git.py,sha256=ol5H3lu6s3Lx2l7K3glbCqcvAnPXsDsYa-OsSR0CupM,2415
382
382
  agenta/sdk/models/shared.py,sha256=ynHFXsOgkpvzHJuBcmPp2lMj7ia6x0e9LvNAiKVpPSo,4020
383
383
  agenta/sdk/models/testsets.py,sha256=cHCZEWqnDMGqKKH9yVuM5FroG10YFKJ4nF5xZniE-Ds,3415
@@ -392,7 +392,7 @@ agenta/sdk/tracing/inline.py,sha256=UKt10JGKdS6gVDIpExng3UC8vegAcuA2KxlzyvSdUZ0,
392
392
  agenta/sdk/tracing/processors.py,sha256=A7rsaicpFq9xZgyhU3hV5ZQoz6X33gB81G9IhB-x3Xg,8597
393
393
  agenta/sdk/tracing/propagation.py,sha256=Zu_z5In8eOhy0tkYzQOI09T4OwdjGMP74nhzvElvyFE,2593
394
394
  agenta/sdk/tracing/spans.py,sha256=r-R68d12BjvilHgbqN-1xp26qxdVRzxRcFUO-IB_u94,3780
395
- agenta/sdk/tracing/tracing.py,sha256=5M_cyptJFR9wnMcRktSB5atjYSTZ8CsdwYtAbFXhRpI,9233
395
+ agenta/sdk/tracing/tracing.py,sha256=mogsWlTwz-pYvzpst4xb4kjuGRtywzrU9GO9T9stvZw,9236
396
396
  agenta/sdk/types.py,sha256=41yIQagl5L_7WFInjiACHwuNfCQqDrrDOusD17kJGWs,28469
397
397
  agenta/sdk/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
398
398
  agenta/sdk/utils/cache.py,sha256=Er1Hvu1QVLGl99HkUHZ2lKBg3f6PnpkD1uZRvK9r3u4,1429
@@ -414,8 +414,13 @@ agenta/sdk/workflows/configurations.py,sha256=k1YGP3y49WwiFMaQ1rKpCHqCYWWe6nQ7EP
414
414
  agenta/sdk/workflows/errors.py,sha256=x582njGfNTMfu4v8bhHdU_Wf_oa8_mHXc3CEE9F2ZBk,8350
415
415
  agenta/sdk/workflows/handlers.py,sha256=z_DtfgiejsToO4kqXvXBnO36hd0rk97Y2J2hYlIfZmo,59496
416
416
  agenta/sdk/workflows/interfaces.py,sha256=I5Bfil0awdL1TAb_vHqW5n5BHxSBOTuDMhOi4RnUt8A,36315
417
- agenta/sdk/workflows/sandbox.py,sha256=pzy5mdNDjBAQu1qFwMAxHirWiKX20mq2i7lrwA-ABjc,3816
417
+ agenta/sdk/workflows/runners/__init__.py,sha256=HoYaKf9G03WEUbY7B1uX4O_6xE5dfliNCG1nEuWp1ks,87
418
+ agenta/sdk/workflows/runners/base.py,sha256=WgX0OgbLL5PHeGqLNAvrV7NC3FHDWVfU7v9EBj8MIW0,857
419
+ agenta/sdk/workflows/runners/daytona.py,sha256=g09014ocerh7X-sLOJ01i5ko6YysdXCPfnrOfn9MWQQ,9791
420
+ agenta/sdk/workflows/runners/local.py,sha256=SJ1msO35mQ4XzlqZi9fE25QJu-PDnYru8a66pMo-5vs,3636
421
+ agenta/sdk/workflows/runners/registry.py,sha256=bHM7hTiFawdOM30RwCHeAwFN8C0zGF0at1zsI-57tlw,1067
422
+ agenta/sdk/workflows/sandbox.py,sha256=O1Opeg4hc9jygAzyF5cCsStmMjYgrahA_aF0JdGbBO0,1734
418
423
  agenta/sdk/workflows/utils.py,sha256=UDG5or8qqiSCpqi0Fphjxkkhu4MdbiCkHn_yIQcTd0c,11664
419
- agenta-0.63.2.dist-info/METADATA,sha256=6xWqYfDrqOgIQ1XrtvnDYMjWkgWfL9aFqWAw_uUKDbM,31855
420
- agenta-0.63.2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
421
- agenta-0.63.2.dist-info/RECORD,,
424
+ agenta-0.68.0.dist-info/METADATA,sha256=B8evMuWFY51eFJX03VPfmeyJ-ns-4wFnBSxVtcmWj1A,31572
425
+ agenta-0.68.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
426
+ agenta-0.68.0.dist-info/RECORD,,