agenta 0.70.1__py3-none-any.whl → 0.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. agenta/__init__.py +9 -3
  2. agenta/sdk/__init__.py +2 -4
  3. agenta/sdk/agenta_init.py +22 -75
  4. agenta/sdk/assets.py +57 -0
  5. agenta/sdk/context/serving.py +2 -0
  6. agenta/sdk/contexts/routing.py +2 -0
  7. agenta/sdk/contexts/running.py +3 -2
  8. agenta/sdk/decorators/running.py +8 -4
  9. agenta/sdk/decorators/serving.py +65 -26
  10. agenta/sdk/decorators/tracing.py +51 -30
  11. agenta/sdk/engines/tracing/inline.py +8 -1
  12. agenta/sdk/engines/tracing/processors.py +23 -12
  13. agenta/sdk/evaluations/preview/evaluate.py +36 -8
  14. agenta/sdk/evaluations/runs.py +2 -1
  15. agenta/sdk/litellm/mockllm.py +2 -2
  16. agenta/sdk/managers/config.py +3 -1
  17. agenta/sdk/managers/secrets.py +25 -8
  18. agenta/sdk/managers/testsets.py +143 -227
  19. agenta/sdk/middleware/config.py +3 -1
  20. agenta/sdk/middleware/otel.py +3 -1
  21. agenta/sdk/middleware/vault.py +33 -18
  22. agenta/sdk/middlewares/routing/otel.py +1 -1
  23. agenta/sdk/middlewares/running/vault.py +33 -17
  24. agenta/sdk/router.py +30 -5
  25. agenta/sdk/tracing/inline.py +8 -1
  26. agenta/sdk/tracing/processors.py +8 -3
  27. agenta/sdk/tracing/propagation.py +9 -12
  28. agenta/sdk/types.py +19 -21
  29. agenta/sdk/utils/client.py +10 -9
  30. agenta/sdk/utils/lazy.py +253 -0
  31. agenta/sdk/workflows/builtin.py +2 -0
  32. agenta/sdk/workflows/configurations.py +1 -0
  33. agenta/sdk/workflows/handlers.py +236 -81
  34. agenta/sdk/workflows/interfaces.py +47 -0
  35. agenta/sdk/workflows/runners/base.py +6 -2
  36. agenta/sdk/workflows/runners/daytona.py +250 -131
  37. agenta/sdk/workflows/runners/local.py +22 -56
  38. agenta/sdk/workflows/runners/registry.py +1 -1
  39. agenta/sdk/workflows/sandbox.py +17 -5
  40. agenta/sdk/workflows/templates.py +81 -0
  41. agenta/sdk/workflows/utils.py +6 -0
  42. {agenta-0.70.1.dist-info → agenta-0.75.0.dist-info}/METADATA +4 -8
  43. {agenta-0.70.1.dist-info → agenta-0.75.0.dist-info}/RECORD +44 -44
  44. agenta/config.py +0 -25
  45. agenta/config.toml +0 -4
  46. {agenta-0.70.1.dist-info → agenta-0.75.0.dist-info}/WHEEL +0 -0
@@ -1,5 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, Dict, Union
2
+ from typing import Any, Dict, Union, Optional
3
3
 
4
4
 
5
5
  class CodeRunner(ABC):
@@ -13,16 +13,20 @@ class CodeRunner(ABC):
13
13
  inputs: Dict[str, Any],
14
14
  output: Union[dict, str],
15
15
  correct_answer: Any,
16
+ runtime: Optional[str] = None,
17
+ templates: Optional[Dict[str, str]] = None,
16
18
  ) -> Union[float, None]:
17
19
  """
18
20
  Execute code and return a float score between 0 and 1.
19
21
 
20
22
  Args:
21
- code: Python code to execute
23
+ code: Code to execute
22
24
  app_params: Application parameters
23
25
  inputs: Input data for the code
24
26
  output: Output from the application variant
25
27
  correct_answer: Expected/correct answer for comparison
28
+ runtime: Runtime environment (python, javascript, typescript), None = python
29
+ templates: Wrapper templates keyed by runtime.
26
30
 
27
31
  Returns:
28
32
  Float score between 0 and 1, or None if execution fails
@@ -1,42 +1,55 @@
1
1
  import os
2
2
  import json
3
- from typing import Any, Dict, Union, Optional
4
-
5
- from daytona import Daytona, DaytonaConfig, Sandbox
3
+ from contextlib import contextmanager
4
+ from typing import Any, Dict, Generator, Union, Optional, TYPE_CHECKING
6
5
 
6
+ import agenta as ag
7
7
  from agenta.sdk.workflows.runners.base import CodeRunner
8
+ from agenta.sdk.contexts.running import RunningContext
9
+ from agenta.sdk.utils.lazy import _load_daytona
8
10
 
9
11
  from agenta.sdk.utils.logging import get_module_logger
10
12
 
13
+ if TYPE_CHECKING:
14
+ from daytona import Sandbox
15
+
11
16
  log = get_module_logger(__name__)
12
17
 
13
- # Template for wrapping user code with evaluation context
14
- EVALUATION_CODE_TEMPLATE = """
15
- import json
16
18
 
17
- # Parse all parameters from a single dict
18
- params = json.loads({params_json!r})
19
- app_params = params['app_params']
20
- inputs = params['inputs']
21
- output = params['output']
22
- correct_answer = params['correct_answer']
19
+ def _extract_error_message(error_text: str) -> str:
20
+ """Extract a clean error message from a Python traceback.
21
+
22
+ Given a full traceback string, extracts just the final error line
23
+ (e.g., "NameError: name 'foo' is not defined") instead of the full
24
+ noisy traceback with base64-encoded code.
25
+
26
+ Args:
27
+ error_text: Full error/traceback string
28
+
29
+ Returns:
30
+ Clean error message, or original text if extraction fails
31
+ """
32
+ if not error_text:
33
+ return "Unknown error"
23
34
 
24
- # User-provided evaluation code
25
- {user_code}
35
+ lines = error_text.strip().split("\n")
26
36
 
27
- # Execute and capture result
28
- result = evaluate(app_params, inputs, output, correct_answer)
37
+ # Look for common Python error patterns from the end
38
+ for line in reversed(lines):
39
+ line = line.strip()
40
+ # Match patterns like "NameError: ...", "ValueError: ...", etc.
41
+ if ": " in line and not line.startswith("File "):
42
+ # Check if it looks like an error line (ErrorType: message)
43
+ parts = line.split(": ", 1)
44
+ if parts[0].replace(".", "").replace("_", "").isalnum():
45
+ return line
29
46
 
30
- # Ensure result is a float
31
- if isinstance(result, (float, int, str)):
32
- try:
33
- result = float(result)
34
- except (ValueError, TypeError):
35
- result = None
47
+ # Fallback: return last non-empty line
48
+ for line in reversed(lines):
49
+ if line.strip():
50
+ return line.strip()
36
51
 
37
- # Print result for capture
38
- print(json.dumps({{"result": result}}))
39
- """
52
+ return error_text[:200] if len(error_text) > 200 else error_text
40
53
 
41
54
 
42
55
  class DaytonaRunner(CodeRunner):
@@ -57,7 +70,7 @@ class DaytonaRunner(CodeRunner):
57
70
  return
58
71
 
59
72
  self._initialized = True
60
- self.daytona: Optional[Daytona] = None
73
+ self.daytona = None
61
74
  self._validate_config()
62
75
 
63
76
  def _validate_config(self) -> None:
@@ -77,6 +90,8 @@ class DaytonaRunner(CodeRunner):
77
90
  return
78
91
 
79
92
  try:
93
+ Daytona, DaytonaConfig, _, _ = _load_daytona()
94
+
80
95
  # Get configuration with fallbacks
81
96
  api_url = os.getenv("DAYTONA_API_URL") or "https://app.daytona.io/api"
82
97
  api_key = os.getenv("DAYTONA_API_KEY")
@@ -92,26 +107,114 @@ class DaytonaRunner(CodeRunner):
92
107
  except Exception as e:
93
108
  raise RuntimeError(f"Failed to initialize Daytona client: {e}")
94
109
 
95
- def _create_sandbox(self) -> Any:
96
- """Create a new sandbox for this run from snapshot."""
110
+ def _get_provider_env_vars(self) -> Dict[str, str]:
111
+ """
112
+ Fetch user secrets and extract standard provider keys as environment variables.
113
+
114
+ Returns:
115
+ Dictionary of environment variables for standard providers
116
+ """
117
+ env_vars = {}
118
+
119
+ # Get secrets from context (set by vault middleware)
120
+ ctx = RunningContext.get()
121
+ secrets = getattr(ctx, "vault_secrets", [])
122
+
123
+ # Standard provider keys mapping
124
+ provider_env_mapping = {
125
+ "openai": "OPENAI_API_KEY",
126
+ "cohere": "COHERE_API_KEY",
127
+ "anyscale": "ANYSCALE_API_KEY",
128
+ "deepinfra": "DEEPINFRA_API_KEY",
129
+ "alephalpha": "ALEPHALPHA_API_KEY",
130
+ "groq": "GROQ_API_KEY",
131
+ "mistralai": "MISTRALAI_API_KEY",
132
+ "anthropic": "ANTHROPIC_API_KEY",
133
+ "perplexityai": "PERPLEXITYAI_API_KEY",
134
+ "togetherai": "TOGETHERAI_API_KEY",
135
+ "openrouter": "OPENROUTER_API_KEY",
136
+ "gemini": "GEMINI_API_KEY",
137
+ }
138
+
139
+ # Extract provider keys from secrets
140
+ for secret in secrets:
141
+ if secret.get("kind") == "provider_key":
142
+ secret_data = secret.get("data", {})
143
+ provider_kind = secret_data.get("kind")
144
+
145
+ if provider_kind in provider_env_mapping:
146
+ provider_settings = secret_data.get("provider", {})
147
+ api_key = provider_settings.get("key")
148
+
149
+ if api_key:
150
+ env_var_name = provider_env_mapping[provider_kind]
151
+ env_vars[env_var_name] = api_key
152
+
153
+ return env_vars
154
+
155
+ def _create_sandbox(self, runtime: Optional[str] = None) -> Any:
156
+ """Create a new sandbox for this run from snapshot.
157
+
158
+ Args:
159
+ runtime: Runtime environment (python, javascript, typescript), None = python
160
+ """
97
161
  try:
98
162
  if self.daytona is None:
99
163
  raise RuntimeError("Daytona client not initialized")
100
164
 
101
- snapshot_id = os.getenv("AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON")
165
+ # Normalize runtime: None means python
166
+ runtime = runtime or "python"
167
+
168
+ # Select general snapshot
169
+ snapshot_id = os.getenv("DAYTONA_SNAPSHOT")
102
170
 
103
171
  if not snapshot_id:
104
172
  raise RuntimeError(
105
- "AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON environment variable is required. "
106
- "Set it to the Daytona sandbox ID or snapshot name you want to use."
173
+ f"No Daytona snapshot configured for runtime '{runtime}'. "
174
+ f"Set DAYTONA_SNAPSHOT environment variable."
107
175
  )
108
176
 
109
- from daytona import CreateSandboxFromSnapshotParams
177
+ _, _, _, CreateSandboxFromSnapshotParams = _load_daytona()
178
+
179
+ agenta_host = (
180
+ ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.host
181
+ #
182
+ or ""
183
+ )
184
+ agenta_api_url = (
185
+ ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.api_url
186
+ #
187
+ or ""
188
+ )
189
+ agenta_credentials = (
190
+ RunningContext.get().credentials
191
+ #
192
+ or ""
193
+ )
194
+ agenta_api_key = (
195
+ agenta_credentials[7:]
196
+ if agenta_credentials.startswith("ApiKey ")
197
+ else ""
198
+ )
199
+
200
+ # Get provider API keys from user secrets
201
+ provider_env_vars = self._get_provider_env_vars()
202
+
203
+ # Combine base env vars with provider keys
204
+ env_vars = {
205
+ "AGENTA_HOST": agenta_host,
206
+ "AGENTA_API_URL": agenta_api_url,
207
+ "AGENTA_API_KEY": agenta_api_key,
208
+ "AGENTA_CREDENTIALS": agenta_credentials,
209
+ **provider_env_vars, # Add provider API keys
210
+ }
110
211
 
111
212
  sandbox = self.daytona.create(
112
213
  CreateSandboxFromSnapshotParams(
113
214
  snapshot=snapshot_id,
114
215
  ephemeral=True,
216
+ env_vars=env_vars,
217
+ language=runtime,
115
218
  )
116
219
  )
117
220
 
@@ -120,6 +223,29 @@ class DaytonaRunner(CodeRunner):
120
223
  except Exception as e:
121
224
  raise RuntimeError(f"Failed to create sandbox from snapshot: {e}")
122
225
 
226
+ @contextmanager
227
+ def _sandbox_context(
228
+ self, runtime: Optional[str] = None
229
+ ) -> Generator["Sandbox", None, None]:
230
+ """Context manager for sandbox lifecycle.
231
+
232
+ Ensures sandbox is deleted even if an error occurs during execution.
233
+
234
+ Args:
235
+ runtime: Runtime environment (python, javascript, typescript), None = python
236
+
237
+ Yields:
238
+ Sandbox instance
239
+ """
240
+ sandbox = self._create_sandbox(runtime=runtime)
241
+ try:
242
+ yield sandbox
243
+ finally:
244
+ try:
245
+ sandbox.delete()
246
+ except Exception as e:
247
+ log.error("Failed to delete sandbox: %s", e)
248
+
123
249
  def run(
124
250
  self,
125
251
  code: str,
@@ -127,130 +253,123 @@ class DaytonaRunner(CodeRunner):
127
253
  inputs: Dict[str, Any],
128
254
  output: Union[dict, str],
129
255
  correct_answer: Any,
256
+ runtime: Optional[str] = None,
257
+ templates: Optional[Dict[str, str]] = None,
130
258
  ) -> Union[float, None]:
131
259
  """
132
- Execute provided Python code in Daytona sandbox.
260
+ Execute provided code in Daytona sandbox.
133
261
 
134
262
  The code must define an `evaluate()` function that takes
135
263
  (app_params, inputs, output, correct_answer) and returns a float (0-1).
136
264
 
137
265
  Args:
138
- code: The Python code to be executed
266
+ code: The code to be executed
139
267
  app_params: The parameters of the app variant
140
268
  inputs: Inputs to be used during code execution
141
269
  output: The output of the app variant after being called
142
270
  correct_answer: The correct answer (or target) for comparison
271
+ runtime: Runtime environment (python, javascript, typescript), None = python
272
+ templates: Wrapper templates keyed by runtime.
143
273
 
144
274
  Returns:
145
275
  Float score between 0 and 1, or None if execution fails
146
276
  """
147
- self._initialize_client()
148
- sandbox: Sandbox = self._create_sandbox()
149
-
150
- try:
151
- # Prepare all parameters as a single dict
152
- params = {
153
- "app_params": app_params,
154
- "inputs": inputs,
155
- "output": output,
156
- "correct_answer": correct_answer,
157
- }
158
- params_json = json.dumps(params)
159
-
160
- # Wrap the user code with the necessary context and evaluation
161
- wrapped_code = EVALUATION_CODE_TEMPLATE.format(
162
- params_json=params_json,
163
- user_code=code,
164
- )
165
-
166
- # Log the input parameters for debugging
167
- # log.debug("Input parameters to evaluation:")
168
- # print("\n" + "=" * 80)
169
- # print("INPUT PARAMETERS:")
170
- # print("=" * 80)
171
- # print(f"app_params: {app_params}")
172
- # print(f"inputs: {inputs}")
173
- # print(f"output: {output}")
174
- # print(f"correct_answer: {correct_answer}")
175
- # print("=" * 80 + "\n")
176
-
177
- # Log the generated code for debugging
178
- # log.debug("Generated code to send to Daytona:")
179
- # print("=" * 80)
180
- # print("GENERATED CODE TO SEND TO DAYTONA:")
181
- # print("=" * 80)
182
- # code_lines = wrapped_code.split("\n")
183
- # for i, line in enumerate(code_lines, 1):
184
- # log.debug(f" {i:3d}: {line}")
185
- # print(f" {i:3d}: {line}")
186
- # print("=" * 80)
187
- # print(f"Total lines: {len(code_lines)}")
188
- # print("=" * 80 + "\n")
189
-
190
- # Callback functions to capture output and errors
191
- stdout_lines = []
192
- stderr_lines = []
193
-
194
- def on_stdout(line: str) -> None:
195
- """Capture stdout output."""
196
- # log.debug(f"[STDOUT] {line}")
197
- # print(f"[STDOUT] {line}")
198
- stdout_lines.append(line)
199
-
200
- def on_stderr(line: str) -> None:
201
- """Capture stderr output."""
202
- # log.warning(f"[STDERR] {line}")
203
- # print(f"[STDERR] {line}")
204
- stderr_lines.append(line)
205
-
206
- def on_error(error: Exception) -> None:
207
- """Capture errors."""
208
- log.error(f"[ERROR] {type(error).__name__}: {error}")
209
- # print(f"[ERROR] {type(error).__name__}: {error}")
210
-
211
- # Execute the code in the Daytona sandbox
212
- # log.debug("Executing code in Daytona sandbox")
213
- response = sandbox.code_interpreter.run_code(
214
- wrapped_code,
215
- on_stdout=on_stdout,
216
- on_stderr=on_stderr,
217
- on_error=on_error,
218
- )
219
-
220
- # log.debug(f"Raw response: {response}")
221
- # print(f"Raw response: {response}")
277
+ # Normalize runtime: None means python
278
+ runtime = runtime or "python"
222
279
 
223
- # Parse the result from the response object
224
- # Response has stdout, stderr, and error fields
225
- response_stdout = response.stdout if hasattr(response, "stdout") else ""
226
- response_error = response.error if hasattr(response, "error") else None
280
+ self._initialize_client()
227
281
 
228
- sandbox.delete()
282
+ with self._sandbox_context(runtime=runtime) as sandbox:
283
+ try:
284
+ # Prepare all parameters as a single dict
285
+ params = {
286
+ "app_params": app_params,
287
+ "inputs": inputs,
288
+ "output": output,
289
+ "correct_answer": correct_answer,
290
+ }
291
+ params_json = json.dumps(params)
292
+
293
+ if not templates:
294
+ raise RuntimeError(
295
+ "Missing evaluator templates for Daytona execution"
296
+ )
297
+
298
+ template = templates.get(runtime)
299
+ if template is None:
300
+ raise RuntimeError(
301
+ f"Missing evaluator template for runtime '{runtime}'"
302
+ )
303
+
304
+ # Wrap the user code with the necessary context and evaluation
305
+ wrapped_code = template.format(
306
+ params_json=params_json,
307
+ user_code=code,
308
+ )
229
309
 
230
- if response_error:
231
- log.error(f"Sandbox execution error: {response_error}")
232
- raise RuntimeError(f"Sandbox execution failed: {response_error}")
310
+ # Execute the code in the Daytona sandbox
311
+ response = sandbox.process.code_run(wrapped_code)
312
+ response_stdout = response.result if hasattr(response, "result") else ""
313
+ response_exit_code = getattr(response, "exit_code", 0)
314
+ response_error = getattr(response, "error", None) or getattr(
315
+ response, "stderr", None
316
+ )
233
317
 
234
- # Parse the result from stdout
235
- output_lines = response_stdout.strip().split("\n")
236
- for line in reversed(output_lines):
237
- if not line.strip():
238
- continue
239
- try:
240
- result_obj = json.loads(line)
318
+ if response_exit_code and response_exit_code != 0:
319
+ raw_error = response_error or response_stdout or "Unknown error"
320
+ # Log full error for debugging
321
+ # log.warning(
322
+ # "Sandbox execution error (exit_code=%s): %s",
323
+ # response_exit_code,
324
+ # raw_error,
325
+ # )
326
+ # Extract clean error message for user display
327
+ clean_error = _extract_error_message(raw_error)
328
+ raise RuntimeError(clean_error)
329
+
330
+ # Parse the result from stdout
331
+ output_lines = response_stdout.strip().split("\n")
332
+ for line in reversed(output_lines):
333
+ if not line.strip():
334
+ continue
335
+ try:
336
+ result_obj = json.loads(line)
337
+ if isinstance(result_obj, dict) and "result" in result_obj:
338
+ result = result_obj["result"]
339
+ if isinstance(result, (float, int, type(None))):
340
+ return float(result) if result is not None else None
341
+ except json.JSONDecodeError:
342
+ continue
343
+
344
+ # Fallback: attempt to extract a JSON object containing "result"
345
+ for line in reversed(output_lines):
346
+ if "result" not in line:
347
+ continue
348
+ start = line.find("{")
349
+ end = line.rfind("}")
350
+ if start == -1 or end == -1 or end <= start:
351
+ continue
352
+ try:
353
+ result_obj = json.loads(line[start : end + 1])
354
+ except json.JSONDecodeError:
355
+ continue
241
356
  if isinstance(result_obj, dict) and "result" in result_obj:
242
357
  result = result_obj["result"]
243
358
  if isinstance(result, (float, int, type(None))):
244
359
  return float(result) if result is not None else None
245
- except json.JSONDecodeError:
246
- continue
247
360
 
248
- raise ValueError("Could not parse evaluation result from Daytona output")
361
+ # log.warning(
362
+ # "Evaluation output did not include JSON result: %s", response_stdout
363
+ # )
364
+ raise ValueError(
365
+ "Could not parse evaluation result from Daytona output"
366
+ )
249
367
 
250
- except Exception as e:
251
- log.error(f"Error during Daytona code execution: {e}", exc_info=True)
252
- # print(f"Exception details: {type(e).__name__}: {e}")
253
- raise RuntimeError(f"Error during Daytona code execution: {e}")
368
+ except Exception as e:
369
+ # log.warning(
370
+ # f"Error during Daytona code execution:\n {e}", exc_info=True
371
+ # )
372
+ raise RuntimeError(e)
254
373
 
255
374
  def cleanup(self) -> None:
256
375
  """Clean up Daytona client resources."""
@@ -1,20 +1,10 @@
1
- from typing import Any, Dict, Union, Text
2
-
3
- from RestrictedPython import safe_builtins, compile_restricted, utility_builtins
4
- from RestrictedPython.Eval import (
5
- default_guarded_getiter,
6
- default_guarded_getitem,
7
- )
8
- from RestrictedPython.Guards import (
9
- guarded_iter_unpack_sequence,
10
- full_write_guard,
11
- )
1
+ from typing import Any, Dict, Union, Optional
12
2
 
13
3
  from agenta.sdk.workflows.runners.base import CodeRunner
14
4
 
15
5
 
16
6
  class LocalRunner(CodeRunner):
17
- """Local code runner using RestrictedPython for safe execution."""
7
+ """Local code runner using direct Python execution."""
18
8
 
19
9
  def run(
20
10
  self,
@@ -23,9 +13,11 @@ class LocalRunner(CodeRunner):
23
13
  inputs: Dict[str, Any],
24
14
  output: Union[dict, str],
25
15
  correct_answer: Any,
16
+ runtime: Optional[str] = None,
17
+ templates: Optional[Dict[str, str]] = None,
26
18
  ) -> Union[float, None]:
27
19
  """
28
- Execute provided Python code safely using RestrictedPython.
20
+ Execute provided Python code directly.
29
21
 
30
22
  Args:
31
23
  code: The Python code to be executed
@@ -33,55 +25,29 @@ class LocalRunner(CodeRunner):
33
25
  inputs: Inputs to be used during code execution
34
26
  output: The output of the app variant after being called
35
27
  correct_answer: The correct answer (or target) for comparison
36
- code: The Python code to be executed
28
+ runtime: Runtime environment (only "python" is supported for local runner)
29
+ templates: Wrapper templates keyed by runtime (unused for local runner).
37
30
 
38
31
  Returns:
39
32
  Float score between 0 and 1, or None if execution fails
40
33
  """
41
- # Define the available built-ins
42
- local_builtins = safe_builtins.copy()
43
-
44
- # Add the __import__ built-in function to the local builtins
45
- local_builtins["__import__"] = __import__
46
-
47
- # Define supported packages
48
- allowed_imports = [
49
- "math",
50
- "random",
51
- "datetime",
52
- "json",
53
- "requests",
54
- "typing",
55
- ]
56
-
57
- # Create a dictionary to simulate allowed imports
58
- allowed_modules = {}
59
- for package_name in allowed_imports:
60
- allowed_modules[package_name] = __import__(package_name)
61
-
62
- # Add the allowed modules to the local built-ins
63
- local_builtins.update(allowed_modules)
64
- local_builtins.update(utility_builtins)
65
-
66
- # Define the environment for the code execution
67
- environment = {
68
- "_getiter_": default_guarded_getiter,
69
- "_getitem_": default_guarded_getitem,
70
- "_iter_unpack_sequence_": guarded_iter_unpack_sequence,
71
- "_write_": full_write_guard,
72
- "__builtins__": local_builtins,
73
- }
74
-
75
- # Compile the code in a restricted environment
76
- byte_code = compile_restricted(code, filename="<inline>", mode="exec")
77
-
78
- # Call the evaluation function, extract the result if it exists
79
- # and is a float between 0 and 1
34
+ # Normalize runtime: None means python
35
+ runtime = runtime or "python"
36
+
37
+ # Local runner only supports Python
38
+ if runtime != "python":
39
+ raise ValueError(
40
+ f"LocalRunner only supports 'python' runtime, got: {runtime}"
41
+ )
42
+
43
+ # Define the environment for code execution
44
+ environment: dict[str, Any] = dict()
45
+
46
+ # Execute the code directly
80
47
  try:
81
- # Execute the code
82
- exec(byte_code, environment)
48
+ exec(code, environment)
83
49
 
84
- # Call the evaluation function, extract the result
50
+ # Call the evaluation function
85
51
  result = environment["evaluate"](app_params, inputs, output, correct_answer)
86
52
 
87
53
  # Attempt to convert result to float
@@ -19,7 +19,7 @@ def get_runner() -> CodeRunner:
19
19
  Registry to get the appropriate code runner based on environment configuration.
20
20
 
21
21
  Uses AGENTA_SERVICES_SANDBOX_RUNNER environment variable:
22
- - "local" (default): Uses RestrictedPython for local execution
22
+ - "local" (default): Uses current container for local execution
23
23
  - "daytona": Uses Daytona remote sandbox
24
24
 
25
25
  Returns:
@@ -1,4 +1,4 @@
1
- from typing import Union, Text, Dict, Any
1
+ from typing import Union, Text, Dict, Any, Optional
2
2
 
3
3
  from agenta.sdk.workflows.runners import get_runner
4
4
 
@@ -29,11 +29,13 @@ def execute_code_safely(
29
29
  output: Union[dict, str],
30
30
  correct_answer: Any, # for backward compatibility reasons
31
31
  code: Text,
32
+ runtime: Optional[str] = None,
33
+ templates: Optional[Dict[str, str]] = None,
32
34
  ) -> Union[float, None]:
33
35
  """
34
- Execute the provided Python code safely.
36
+ Execute the provided code safely.
35
37
 
36
- Uses the configured runner (local RestrictedPython or remote Daytona)
38
+ Uses the configured runner (local or remote Daytona)
37
39
  based on the AGENTA_SERVICES_SANDBOX_RUNNER environment variable.
38
40
 
39
41
  Args:
@@ -41,7 +43,9 @@ def execute_code_safely(
41
43
  - inputs (Dict[str, Any]): Inputs to be used during code execution.
42
44
  - output (Union[dict, str]): The output of the app variant after being called.
43
45
  - correct_answer (Any): The correct answer (or target) of the app variant.
44
- - code (Text): The Python code to be executed.
46
+ - code (Text): The code to be executed.
47
+ - runtime (Optional[str]): Runtime environment (python, javascript, typescript). None = python.
48
+ - templates (Optional[Dict[str, str]]): Wrapper templates keyed by runtime.
45
49
 
46
50
  Returns:
47
51
  - (float): Result of the execution if successful. Should be between 0 and 1.
@@ -52,4 +56,12 @@ def execute_code_safely(
52
56
  if _runner is None:
53
57
  _runner = get_runner()
54
58
 
55
- return _runner.run(code, app_params, inputs, output, correct_answer)
59
+ return _runner.run(
60
+ code,
61
+ app_params,
62
+ inputs,
63
+ output,
64
+ correct_answer,
65
+ runtime,
66
+ templates,
67
+ )