agenta 0.65.0__py3-none-any.whl → 0.70.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenta/__init__.py +66 -36
- agenta/client/backend/types/testset_output_response.py +1 -0
- agenta/sdk/agenta_init.py +90 -12
- agenta/sdk/assets.py +16 -10
- agenta/sdk/engines/tracing/tracing.py +2 -2
- agenta/sdk/evaluations/metrics.py +3 -3
- agenta/sdk/litellm/litellm.py +38 -30
- agenta/sdk/middleware/auth.py +19 -4
- agenta/sdk/middleware/otel.py +0 -3
- agenta/sdk/middleware/vault.py +20 -5
- agenta/sdk/middlewares/running/vault.py +1 -1
- agenta/sdk/models/evaluations.py +11 -3
- agenta/sdk/models/shared.py +1 -1
- agenta/sdk/tracing/exporters.py +1 -0
- agenta/sdk/tracing/processors.py +40 -37
- agenta/sdk/tracing/tracing.py +91 -2
- agenta/sdk/workflows/runners/__init__.py +3 -0
- agenta/sdk/workflows/runners/base.py +30 -0
- agenta/sdk/workflows/runners/daytona.py +268 -0
- agenta/sdk/workflows/runners/local.py +108 -0
- agenta/sdk/workflows/runners/registry.py +48 -0
- agenta/sdk/workflows/sandbox.py +18 -81
- {agenta-0.65.0.dist-info → agenta-0.70.1.dist-info}/METADATA +4 -2
- {agenta-0.65.0.dist-info → agenta-0.70.1.dist-info}/RECORD +25 -20
- {agenta-0.65.0.dist-info → agenta-0.70.1.dist-info}/WHEEL +0 -0
agenta/sdk/tracing/processors.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
from typing import Optional, Dict, List
|
|
2
2
|
from threading import Lock
|
|
3
|
-
from json import dumps
|
|
4
|
-
from uuid import UUID
|
|
5
3
|
|
|
6
4
|
from opentelemetry.baggage import get_all as get_baggage
|
|
7
5
|
from opentelemetry.context import Context
|
|
@@ -14,8 +12,7 @@ from opentelemetry.sdk.trace.export import (
|
|
|
14
12
|
from opentelemetry.trace import SpanContext
|
|
15
13
|
|
|
16
14
|
from agenta.sdk.utils.logging import get_module_logger
|
|
17
|
-
from agenta.sdk.tracing
|
|
18
|
-
|
|
15
|
+
from agenta.sdk.models.tracing import BaseModel
|
|
19
16
|
from agenta.sdk.contexts.tracing import TracingContext
|
|
20
17
|
|
|
21
18
|
log = get_module_logger(__name__)
|
|
@@ -65,15 +62,36 @@ class TraceProcessor(SpanProcessor):
|
|
|
65
62
|
# )
|
|
66
63
|
|
|
67
64
|
for key in self.references.keys():
|
|
68
|
-
|
|
65
|
+
ref = self.references[key]
|
|
66
|
+
if isinstance(ref, BaseModel):
|
|
67
|
+
try:
|
|
68
|
+
ref = ref.model_dump(mode="json", exclude_none=True)
|
|
69
|
+
except Exception: # pylint: disable=bare-except
|
|
70
|
+
pass
|
|
71
|
+
if isinstance(ref, dict):
|
|
72
|
+
for field, value in ref.items():
|
|
73
|
+
span.set_attribute(f"ag.refs.{key}.{field}", str(value))
|
|
69
74
|
|
|
70
75
|
baggage = get_baggage(parent_context)
|
|
71
76
|
|
|
72
77
|
for key in baggage.keys():
|
|
73
|
-
if key.startswith("ag.
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
78
|
+
if key.startswith("ag."):
|
|
79
|
+
value = baggage[key]
|
|
80
|
+
|
|
81
|
+
if key.startswith("ag.refs."):
|
|
82
|
+
ref = value
|
|
83
|
+
if isinstance(value, BaseModel):
|
|
84
|
+
try:
|
|
85
|
+
ref = value.model_dump(mode="json", exclude_none=True) # type: ignore
|
|
86
|
+
except Exception: # pylint: disable=bare-except
|
|
87
|
+
pass
|
|
88
|
+
if isinstance(ref, dict):
|
|
89
|
+
for field, val in ref.items():
|
|
90
|
+
span.set_attribute(f"{key}.{field}", str(val))
|
|
91
|
+
else:
|
|
92
|
+
# Not a reference - only set if it's a valid attribute type
|
|
93
|
+
if isinstance(value, (str, bool, int, float, bytes)):
|
|
94
|
+
span.set_attribute(key, value)
|
|
77
95
|
|
|
78
96
|
context = TracingContext.get()
|
|
79
97
|
|
|
@@ -105,10 +123,11 @@ class TraceProcessor(SpanProcessor):
|
|
|
105
123
|
if not self.inline:
|
|
106
124
|
if context.links:
|
|
107
125
|
for key, link in context.links.items():
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
126
|
+
if isinstance(link, BaseModel):
|
|
127
|
+
try:
|
|
128
|
+
link = link.model_dump(mode="json", exclude_none=True)
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
112
131
|
if not isinstance(link, dict):
|
|
113
132
|
continue
|
|
114
133
|
if not link.get("trace_id") or not link.get("span_id"):
|
|
@@ -127,30 +146,14 @@ class TraceProcessor(SpanProcessor):
|
|
|
127
146
|
|
|
128
147
|
if context.references:
|
|
129
148
|
for key, ref in context.references.items():
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if ref.get("id"):
|
|
140
|
-
span.set_attribute(
|
|
141
|
-
f"ag.refs.{key}.id",
|
|
142
|
-
str(ref.get("id")),
|
|
143
|
-
)
|
|
144
|
-
if ref.get("slug"):
|
|
145
|
-
span.set_attribute(
|
|
146
|
-
f"ag.refs.{key}.slug",
|
|
147
|
-
str(ref.get("slug")),
|
|
148
|
-
)
|
|
149
|
-
if ref.get("version"):
|
|
150
|
-
span.set_attribute(
|
|
151
|
-
f"ag.refs.{key}.version",
|
|
152
|
-
str(ref.get("version")),
|
|
153
|
-
)
|
|
149
|
+
if isinstance(ref, BaseModel):
|
|
150
|
+
try:
|
|
151
|
+
ref = ref.model_dump(mode="json", exclude_none=True)
|
|
152
|
+
except Exception:
|
|
153
|
+
pass
|
|
154
|
+
if isinstance(ref, dict):
|
|
155
|
+
for field, value in ref.items():
|
|
156
|
+
span.set_attribute(f"ag.refs.{key}.{field}", str(value))
|
|
154
157
|
|
|
155
158
|
trace_id = span.context.trace_id
|
|
156
159
|
span_id = span.context.span_id
|
agenta/sdk/tracing/tracing.py
CHANGED
|
@@ -31,6 +31,7 @@ from agenta.sdk.tracing.conventions import Reference, is_valid_attribute_key
|
|
|
31
31
|
from agenta.sdk.tracing.propagation import extract, inject
|
|
32
32
|
from agenta.sdk.utils.cache import TTLLRUCache
|
|
33
33
|
|
|
34
|
+
import agenta as ag
|
|
34
35
|
|
|
35
36
|
log = get_module_logger(__name__)
|
|
36
37
|
|
|
@@ -101,7 +102,7 @@ class Tracing(metaclass=Singleton):
|
|
|
101
102
|
|
|
102
103
|
# TRACE PROCESSORS -- OTLP
|
|
103
104
|
try:
|
|
104
|
-
log.info("Agenta -
|
|
105
|
+
log.info("Agenta - OTLP URL: %s", self.otlp_url)
|
|
105
106
|
|
|
106
107
|
_otlp = TraceProcessor(
|
|
107
108
|
OTLPExporter(
|
|
@@ -114,7 +115,7 @@ class Tracing(metaclass=Singleton):
|
|
|
114
115
|
|
|
115
116
|
self.tracer_provider.add_span_processor(_otlp)
|
|
116
117
|
except: # pylint: disable=bare-except
|
|
117
|
-
log.warning("Agenta -
|
|
118
|
+
log.warning("Agenta - OTLP unreachable, skipping exports.")
|
|
118
119
|
|
|
119
120
|
# --- INLINE
|
|
120
121
|
if inline:
|
|
@@ -215,6 +216,42 @@ class Tracing(metaclass=Singleton):
|
|
|
215
216
|
namespace="metrics",
|
|
216
217
|
)
|
|
217
218
|
|
|
219
|
+
def store_session(
|
|
220
|
+
self,
|
|
221
|
+
session_id: Optional[str] = None,
|
|
222
|
+
span: Optional[Span] = None,
|
|
223
|
+
):
|
|
224
|
+
"""Set session attributes on the current span.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
session_id: Unique identifier for the session
|
|
228
|
+
span: Optional span to set attributes on (defaults to current span)
|
|
229
|
+
"""
|
|
230
|
+
with suppress():
|
|
231
|
+
if span is None:
|
|
232
|
+
span = self.get_current_span()
|
|
233
|
+
|
|
234
|
+
if session_id:
|
|
235
|
+
span.set_attribute("id", session_id, namespace="session")
|
|
236
|
+
|
|
237
|
+
def store_user(
|
|
238
|
+
self,
|
|
239
|
+
user_id: Optional[str] = None,
|
|
240
|
+
span: Optional[Span] = None,
|
|
241
|
+
):
|
|
242
|
+
"""Set user attributes on the current span.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
user_id: Unique identifier for the user
|
|
246
|
+
span: Optional span to set attributes on (defaults to current span)
|
|
247
|
+
"""
|
|
248
|
+
with suppress():
|
|
249
|
+
if span is None:
|
|
250
|
+
span = self.get_current_span()
|
|
251
|
+
|
|
252
|
+
if user_id:
|
|
253
|
+
span.set_attribute("id", user_id, namespace="user")
|
|
254
|
+
|
|
218
255
|
def is_inline_trace_ready(
|
|
219
256
|
self,
|
|
220
257
|
trace_id: Optional[int] = None,
|
|
@@ -314,6 +351,58 @@ class Tracing(metaclass=Singleton):
|
|
|
314
351
|
|
|
315
352
|
return None
|
|
316
353
|
|
|
354
|
+
def get_trace_url(
|
|
355
|
+
self,
|
|
356
|
+
trace_id: Optional[str] = None,
|
|
357
|
+
) -> str:
|
|
358
|
+
"""
|
|
359
|
+
Build a URL to view a trace in the Agenta UI.
|
|
360
|
+
|
|
361
|
+
Automatically extracts the trace ID from the current tracing context
|
|
362
|
+
if not explicitly provided.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
trace_id: Optional trace ID (hex string format). If not provided,
|
|
366
|
+
it will be automatically extracted from the current trace context.
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
The full URL to view the trace in the observability dashboard
|
|
370
|
+
|
|
371
|
+
Raises:
|
|
372
|
+
RuntimeError: If the SDK is not initialized, no active trace context exists,
|
|
373
|
+
or scope info cannot be fetched
|
|
374
|
+
"""
|
|
375
|
+
if trace_id is None:
|
|
376
|
+
span_ctx = self.get_span_context()
|
|
377
|
+
if span_ctx is None or not span_ctx.is_valid:
|
|
378
|
+
raise RuntimeError(
|
|
379
|
+
"No active trace context found. "
|
|
380
|
+
"Make sure you call this within an instrumented function or span."
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
trace_id = f"{span_ctx.trace_id:032x}"
|
|
384
|
+
|
|
385
|
+
if not ag or not ag.DEFAULT_AGENTA_SINGLETON_INSTANCE:
|
|
386
|
+
raise RuntimeError(
|
|
387
|
+
"Agenta SDK is not initialized. Please call ag.init() first."
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
api_url = ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.api_url
|
|
391
|
+
web_url = api_url.replace("/api", "") if api_url else None
|
|
392
|
+
|
|
393
|
+
(organization_id, workspace_id, project_id) = (
|
|
394
|
+
ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.resolve_scopes()
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
if not web_url or not workspace_id or not project_id:
|
|
398
|
+
raise RuntimeError(
|
|
399
|
+
"Could not determine workspace/project context. Please call ag.init() first."
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
return (
|
|
403
|
+
f"{web_url}/w/{workspace_id}/p/{project_id}/observability?trace={trace_id}"
|
|
404
|
+
)
|
|
405
|
+
|
|
317
406
|
|
|
318
407
|
def get_tracer(
|
|
319
408
|
tracing: Tracing,
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any, Dict, Union
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class CodeRunner(ABC):
|
|
6
|
+
"""Abstract base class for code runners (local and remote execution)."""
|
|
7
|
+
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def run(
|
|
10
|
+
self,
|
|
11
|
+
code: str,
|
|
12
|
+
app_params: Dict[str, Any],
|
|
13
|
+
inputs: Dict[str, Any],
|
|
14
|
+
output: Union[dict, str],
|
|
15
|
+
correct_answer: Any,
|
|
16
|
+
) -> Union[float, None]:
|
|
17
|
+
"""
|
|
18
|
+
Execute code and return a float score between 0 and 1.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
code: Python code to execute
|
|
22
|
+
app_params: Application parameters
|
|
23
|
+
inputs: Input data for the code
|
|
24
|
+
output: Output from the application variant
|
|
25
|
+
correct_answer: Expected/correct answer for comparison
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Float score between 0 and 1, or None if execution fails
|
|
29
|
+
"""
|
|
30
|
+
pass
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, Dict, Union, Optional
|
|
4
|
+
|
|
5
|
+
from daytona import Daytona, DaytonaConfig, Sandbox
|
|
6
|
+
|
|
7
|
+
from agenta.sdk.workflows.runners.base import CodeRunner
|
|
8
|
+
|
|
9
|
+
from agenta.sdk.utils.logging import get_module_logger
|
|
10
|
+
|
|
11
|
+
log = get_module_logger(__name__)
|
|
12
|
+
|
|
13
|
+
# Template for wrapping user code with evaluation context
|
|
14
|
+
EVALUATION_CODE_TEMPLATE = """
|
|
15
|
+
import json
|
|
16
|
+
|
|
17
|
+
# Parse all parameters from a single dict
|
|
18
|
+
params = json.loads({params_json!r})
|
|
19
|
+
app_params = params['app_params']
|
|
20
|
+
inputs = params['inputs']
|
|
21
|
+
output = params['output']
|
|
22
|
+
correct_answer = params['correct_answer']
|
|
23
|
+
|
|
24
|
+
# User-provided evaluation code
|
|
25
|
+
{user_code}
|
|
26
|
+
|
|
27
|
+
# Execute and capture result
|
|
28
|
+
result = evaluate(app_params, inputs, output, correct_answer)
|
|
29
|
+
|
|
30
|
+
# Ensure result is a float
|
|
31
|
+
if isinstance(result, (float, int, str)):
|
|
32
|
+
try:
|
|
33
|
+
result = float(result)
|
|
34
|
+
except (ValueError, TypeError):
|
|
35
|
+
result = None
|
|
36
|
+
|
|
37
|
+
# Print result for capture
|
|
38
|
+
print(json.dumps({{"result": result}}))
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class DaytonaRunner(CodeRunner):
|
|
43
|
+
"""Remote code runner using Daytona sandbox for execution."""
|
|
44
|
+
|
|
45
|
+
_instance: Optional["DaytonaRunner"] = None
|
|
46
|
+
|
|
47
|
+
def __new__(cls):
|
|
48
|
+
"""Singleton pattern to reuse Daytona client and sandbox."""
|
|
49
|
+
if cls._instance is None:
|
|
50
|
+
cls._instance = super().__new__(cls)
|
|
51
|
+
cls._instance._initialized = False
|
|
52
|
+
return cls._instance
|
|
53
|
+
|
|
54
|
+
def __init__(self):
|
|
55
|
+
"""Initialize Daytona runner with config from environment variables."""
|
|
56
|
+
if self._initialized:
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
self._initialized = True
|
|
60
|
+
self.daytona: Optional[Daytona] = None
|
|
61
|
+
self._validate_config()
|
|
62
|
+
|
|
63
|
+
def _validate_config(self) -> None:
|
|
64
|
+
"""Validate required environment variables for Daytona."""
|
|
65
|
+
# Only DAYTONA_API_KEY is strictly required
|
|
66
|
+
# DAYTONA_API_URL defaults to https://app.daytona.io/api
|
|
67
|
+
# DAYTONA_TARGET defaults to AGENTA_REGION or 'eu'
|
|
68
|
+
if not os.getenv("DAYTONA_API_KEY"):
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"Missing required environment variable: DAYTONA_API_KEY. "
|
|
71
|
+
"Set AGENTA_SERVICES_SANDBOX_RUNNER=local to use local execution instead."
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def _initialize_client(self) -> None:
|
|
75
|
+
"""Lazily initialize Daytona client on first use."""
|
|
76
|
+
if self.daytona is not None:
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
# Get configuration with fallbacks
|
|
81
|
+
api_url = os.getenv("DAYTONA_API_URL") or "https://app.daytona.io/api"
|
|
82
|
+
api_key = os.getenv("DAYTONA_API_KEY")
|
|
83
|
+
target = os.getenv("DAYTONA_TARGET") or os.getenv("AGENTA_REGION") or "eu"
|
|
84
|
+
|
|
85
|
+
config = DaytonaConfig(
|
|
86
|
+
api_url=api_url,
|
|
87
|
+
api_key=api_key,
|
|
88
|
+
target=target,
|
|
89
|
+
)
|
|
90
|
+
self.daytona = Daytona(config)
|
|
91
|
+
|
|
92
|
+
except Exception as e:
|
|
93
|
+
raise RuntimeError(f"Failed to initialize Daytona client: {e}")
|
|
94
|
+
|
|
95
|
+
def _create_sandbox(self) -> Any:
|
|
96
|
+
"""Create a new sandbox for this run from snapshot."""
|
|
97
|
+
try:
|
|
98
|
+
if self.daytona is None:
|
|
99
|
+
raise RuntimeError("Daytona client not initialized")
|
|
100
|
+
|
|
101
|
+
snapshot_id = os.getenv("AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON")
|
|
102
|
+
|
|
103
|
+
if not snapshot_id:
|
|
104
|
+
raise RuntimeError(
|
|
105
|
+
"AGENTA_SERVICES_SANDBOX_SNAPSHOT_PYTHON environment variable is required. "
|
|
106
|
+
"Set it to the Daytona sandbox ID or snapshot name you want to use."
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
from daytona import CreateSandboxFromSnapshotParams
|
|
110
|
+
|
|
111
|
+
sandbox = self.daytona.create(
|
|
112
|
+
CreateSandboxFromSnapshotParams(
|
|
113
|
+
snapshot=snapshot_id,
|
|
114
|
+
ephemeral=True,
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
return sandbox
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
raise RuntimeError(f"Failed to create sandbox from snapshot: {e}")
|
|
122
|
+
|
|
123
|
+
def run(
|
|
124
|
+
self,
|
|
125
|
+
code: str,
|
|
126
|
+
app_params: Dict[str, Any],
|
|
127
|
+
inputs: Dict[str, Any],
|
|
128
|
+
output: Union[dict, str],
|
|
129
|
+
correct_answer: Any,
|
|
130
|
+
) -> Union[float, None]:
|
|
131
|
+
"""
|
|
132
|
+
Execute provided Python code in Daytona sandbox.
|
|
133
|
+
|
|
134
|
+
The code must define an `evaluate()` function that takes
|
|
135
|
+
(app_params, inputs, output, correct_answer) and returns a float (0-1).
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
code: The Python code to be executed
|
|
139
|
+
app_params: The parameters of the app variant
|
|
140
|
+
inputs: Inputs to be used during code execution
|
|
141
|
+
output: The output of the app variant after being called
|
|
142
|
+
correct_answer: The correct answer (or target) for comparison
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Float score between 0 and 1, or None if execution fails
|
|
146
|
+
"""
|
|
147
|
+
self._initialize_client()
|
|
148
|
+
sandbox: Sandbox = self._create_sandbox()
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Prepare all parameters as a single dict
|
|
152
|
+
params = {
|
|
153
|
+
"app_params": app_params,
|
|
154
|
+
"inputs": inputs,
|
|
155
|
+
"output": output,
|
|
156
|
+
"correct_answer": correct_answer,
|
|
157
|
+
}
|
|
158
|
+
params_json = json.dumps(params)
|
|
159
|
+
|
|
160
|
+
# Wrap the user code with the necessary context and evaluation
|
|
161
|
+
wrapped_code = EVALUATION_CODE_TEMPLATE.format(
|
|
162
|
+
params_json=params_json,
|
|
163
|
+
user_code=code,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Log the input parameters for debugging
|
|
167
|
+
# log.debug("Input parameters to evaluation:")
|
|
168
|
+
# print("\n" + "=" * 80)
|
|
169
|
+
# print("INPUT PARAMETERS:")
|
|
170
|
+
# print("=" * 80)
|
|
171
|
+
# print(f"app_params: {app_params}")
|
|
172
|
+
# print(f"inputs: {inputs}")
|
|
173
|
+
# print(f"output: {output}")
|
|
174
|
+
# print(f"correct_answer: {correct_answer}")
|
|
175
|
+
# print("=" * 80 + "\n")
|
|
176
|
+
|
|
177
|
+
# Log the generated code for debugging
|
|
178
|
+
# log.debug("Generated code to send to Daytona:")
|
|
179
|
+
# print("=" * 80)
|
|
180
|
+
# print("GENERATED CODE TO SEND TO DAYTONA:")
|
|
181
|
+
# print("=" * 80)
|
|
182
|
+
# code_lines = wrapped_code.split("\n")
|
|
183
|
+
# for i, line in enumerate(code_lines, 1):
|
|
184
|
+
# log.debug(f" {i:3d}: {line}")
|
|
185
|
+
# print(f" {i:3d}: {line}")
|
|
186
|
+
# print("=" * 80)
|
|
187
|
+
# print(f"Total lines: {len(code_lines)}")
|
|
188
|
+
# print("=" * 80 + "\n")
|
|
189
|
+
|
|
190
|
+
# Callback functions to capture output and errors
|
|
191
|
+
stdout_lines = []
|
|
192
|
+
stderr_lines = []
|
|
193
|
+
|
|
194
|
+
def on_stdout(line: str) -> None:
|
|
195
|
+
"""Capture stdout output."""
|
|
196
|
+
# log.debug(f"[STDOUT] {line}")
|
|
197
|
+
# print(f"[STDOUT] {line}")
|
|
198
|
+
stdout_lines.append(line)
|
|
199
|
+
|
|
200
|
+
def on_stderr(line: str) -> None:
|
|
201
|
+
"""Capture stderr output."""
|
|
202
|
+
# log.warning(f"[STDERR] {line}")
|
|
203
|
+
# print(f"[STDERR] {line}")
|
|
204
|
+
stderr_lines.append(line)
|
|
205
|
+
|
|
206
|
+
def on_error(error: Exception) -> None:
|
|
207
|
+
"""Capture errors."""
|
|
208
|
+
log.error(f"[ERROR] {type(error).__name__}: {error}")
|
|
209
|
+
# print(f"[ERROR] {type(error).__name__}: {error}")
|
|
210
|
+
|
|
211
|
+
# Execute the code in the Daytona sandbox
|
|
212
|
+
# log.debug("Executing code in Daytona sandbox")
|
|
213
|
+
response = sandbox.code_interpreter.run_code(
|
|
214
|
+
wrapped_code,
|
|
215
|
+
on_stdout=on_stdout,
|
|
216
|
+
on_stderr=on_stderr,
|
|
217
|
+
on_error=on_error,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# log.debug(f"Raw response: {response}")
|
|
221
|
+
# print(f"Raw response: {response}")
|
|
222
|
+
|
|
223
|
+
# Parse the result from the response object
|
|
224
|
+
# Response has stdout, stderr, and error fields
|
|
225
|
+
response_stdout = response.stdout if hasattr(response, "stdout") else ""
|
|
226
|
+
response_error = response.error if hasattr(response, "error") else None
|
|
227
|
+
|
|
228
|
+
sandbox.delete()
|
|
229
|
+
|
|
230
|
+
if response_error:
|
|
231
|
+
log.error(f"Sandbox execution error: {response_error}")
|
|
232
|
+
raise RuntimeError(f"Sandbox execution failed: {response_error}")
|
|
233
|
+
|
|
234
|
+
# Parse the result from stdout
|
|
235
|
+
output_lines = response_stdout.strip().split("\n")
|
|
236
|
+
for line in reversed(output_lines):
|
|
237
|
+
if not line.strip():
|
|
238
|
+
continue
|
|
239
|
+
try:
|
|
240
|
+
result_obj = json.loads(line)
|
|
241
|
+
if isinstance(result_obj, dict) and "result" in result_obj:
|
|
242
|
+
result = result_obj["result"]
|
|
243
|
+
if isinstance(result, (float, int, type(None))):
|
|
244
|
+
return float(result) if result is not None else None
|
|
245
|
+
except json.JSONDecodeError:
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
raise ValueError("Could not parse evaluation result from Daytona output")
|
|
249
|
+
|
|
250
|
+
except Exception as e:
|
|
251
|
+
log.error(f"Error during Daytona code execution: {e}", exc_info=True)
|
|
252
|
+
# print(f"Exception details: {type(e).__name__}: {e}")
|
|
253
|
+
raise RuntimeError(f"Error during Daytona code execution: {e}")
|
|
254
|
+
|
|
255
|
+
def cleanup(self) -> None:
|
|
256
|
+
"""Clean up Daytona client resources."""
|
|
257
|
+
try:
|
|
258
|
+
self.daytona = None
|
|
259
|
+
except Exception as e:
|
|
260
|
+
# Log but don't raise on cleanup failures
|
|
261
|
+
log.error(f"Warning: Failed to cleanup Daytona resources", exc_info=True)
|
|
262
|
+
|
|
263
|
+
def __del__(self):
|
|
264
|
+
"""Ensure cleanup on deletion."""
|
|
265
|
+
try:
|
|
266
|
+
self.cleanup()
|
|
267
|
+
except Exception:
|
|
268
|
+
pass
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from typing import Any, Dict, Union, Text
|
|
2
|
+
|
|
3
|
+
from RestrictedPython import safe_builtins, compile_restricted, utility_builtins
|
|
4
|
+
from RestrictedPython.Eval import (
|
|
5
|
+
default_guarded_getiter,
|
|
6
|
+
default_guarded_getitem,
|
|
7
|
+
)
|
|
8
|
+
from RestrictedPython.Guards import (
|
|
9
|
+
guarded_iter_unpack_sequence,
|
|
10
|
+
full_write_guard,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from agenta.sdk.workflows.runners.base import CodeRunner
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LocalRunner(CodeRunner):
|
|
17
|
+
"""Local code runner using RestrictedPython for safe execution."""
|
|
18
|
+
|
|
19
|
+
def run(
|
|
20
|
+
self,
|
|
21
|
+
code: str,
|
|
22
|
+
app_params: Dict[str, Any],
|
|
23
|
+
inputs: Dict[str, Any],
|
|
24
|
+
output: Union[dict, str],
|
|
25
|
+
correct_answer: Any,
|
|
26
|
+
) -> Union[float, None]:
|
|
27
|
+
"""
|
|
28
|
+
Execute provided Python code safely using RestrictedPython.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
code: The Python code to be executed
|
|
32
|
+
app_params: The parameters of the app variant
|
|
33
|
+
inputs: Inputs to be used during code execution
|
|
34
|
+
output: The output of the app variant after being called
|
|
35
|
+
correct_answer: The correct answer (or target) for comparison
|
|
36
|
+
code: The Python code to be executed
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Float score between 0 and 1, or None if execution fails
|
|
40
|
+
"""
|
|
41
|
+
# Define the available built-ins
|
|
42
|
+
local_builtins = safe_builtins.copy()
|
|
43
|
+
|
|
44
|
+
# Add the __import__ built-in function to the local builtins
|
|
45
|
+
local_builtins["__import__"] = __import__
|
|
46
|
+
|
|
47
|
+
# Define supported packages
|
|
48
|
+
allowed_imports = [
|
|
49
|
+
"math",
|
|
50
|
+
"random",
|
|
51
|
+
"datetime",
|
|
52
|
+
"json",
|
|
53
|
+
"requests",
|
|
54
|
+
"typing",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
# Create a dictionary to simulate allowed imports
|
|
58
|
+
allowed_modules = {}
|
|
59
|
+
for package_name in allowed_imports:
|
|
60
|
+
allowed_modules[package_name] = __import__(package_name)
|
|
61
|
+
|
|
62
|
+
# Add the allowed modules to the local built-ins
|
|
63
|
+
local_builtins.update(allowed_modules)
|
|
64
|
+
local_builtins.update(utility_builtins)
|
|
65
|
+
|
|
66
|
+
# Define the environment for the code execution
|
|
67
|
+
environment = {
|
|
68
|
+
"_getiter_": default_guarded_getiter,
|
|
69
|
+
"_getitem_": default_guarded_getitem,
|
|
70
|
+
"_iter_unpack_sequence_": guarded_iter_unpack_sequence,
|
|
71
|
+
"_write_": full_write_guard,
|
|
72
|
+
"__builtins__": local_builtins,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# Compile the code in a restricted environment
|
|
76
|
+
byte_code = compile_restricted(code, filename="<inline>", mode="exec")
|
|
77
|
+
|
|
78
|
+
# Call the evaluation function, extract the result if it exists
|
|
79
|
+
# and is a float between 0 and 1
|
|
80
|
+
try:
|
|
81
|
+
# Execute the code
|
|
82
|
+
exec(byte_code, environment)
|
|
83
|
+
|
|
84
|
+
# Call the evaluation function, extract the result
|
|
85
|
+
result = environment["evaluate"](app_params, inputs, output, correct_answer)
|
|
86
|
+
|
|
87
|
+
# Attempt to convert result to float
|
|
88
|
+
if isinstance(result, (float, int, str)):
|
|
89
|
+
try:
|
|
90
|
+
result = float(result)
|
|
91
|
+
except ValueError as e:
|
|
92
|
+
raise ValueError(f"Result cannot be converted to float: {e}")
|
|
93
|
+
|
|
94
|
+
if not isinstance(result, float):
|
|
95
|
+
raise TypeError(
|
|
96
|
+
f"Result is not a float after conversion: {type(result)}"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
return result
|
|
100
|
+
|
|
101
|
+
except KeyError as e:
|
|
102
|
+
raise KeyError(f"Missing expected key in environment: {e}")
|
|
103
|
+
|
|
104
|
+
except SyntaxError as e:
|
|
105
|
+
raise SyntaxError(f"Syntax error in provided code: {e}")
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise RuntimeError(f"Error during code execution: {e}")
|