deepeval 3.5.2__py3-none-any.whl → 3.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +94 -2
- deepeval/config/utils.py +54 -1
- deepeval/constants.py +27 -0
- deepeval/integrations/pydantic_ai/__init__.py +3 -1
- deepeval/integrations/pydantic_ai/agent.py +339 -0
- deepeval/integrations/pydantic_ai/patcher.py +479 -406
- deepeval/integrations/pydantic_ai/utils.py +239 -2
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
- deepeval/metrics/non_advice/non_advice.py +2 -2
- deepeval/metrics/pii_leakage/pii_leakage.py +2 -2
- deepeval/models/embedding_models/azure_embedding_model.py +40 -9
- deepeval/models/embedding_models/local_embedding_model.py +52 -9
- deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
- deepeval/models/embedding_models/openai_embedding_model.py +47 -5
- deepeval/models/llms/amazon_bedrock_model.py +31 -4
- deepeval/models/llms/anthropic_model.py +39 -13
- deepeval/models/llms/azure_model.py +37 -38
- deepeval/models/llms/deepseek_model.py +36 -7
- deepeval/models/llms/gemini_model.py +10 -0
- deepeval/models/llms/grok_model.py +50 -3
- deepeval/models/llms/kimi_model.py +37 -7
- deepeval/models/llms/local_model.py +38 -12
- deepeval/models/llms/ollama_model.py +15 -3
- deepeval/models/llms/openai_model.py +37 -44
- deepeval/models/mlllms/gemini_model.py +21 -3
- deepeval/models/mlllms/ollama_model.py +38 -13
- deepeval/models/mlllms/openai_model.py +18 -42
- deepeval/models/retry_policy.py +548 -64
- deepeval/tracing/tracing.py +87 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/METADATA +1 -1
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/RECORD +35 -34
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/WHEEL +0 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.5.
|
|
1
|
+
__version__: str = "3.5.4"
|
deepeval/config/settings.py
CHANGED
|
@@ -9,6 +9,7 @@ Central config for DeepEval.
|
|
|
9
9
|
type coercion.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
+
import logging
|
|
12
13
|
import os
|
|
13
14
|
import re
|
|
14
15
|
|
|
@@ -16,11 +17,17 @@ from dotenv import dotenv_values
|
|
|
16
17
|
from pathlib import Path
|
|
17
18
|
from pydantic import AnyUrl, SecretStr, field_validator, confloat
|
|
18
19
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
19
|
-
from typing import Any, Dict, Optional, NamedTuple
|
|
20
|
+
from typing import Any, Dict, List, Optional, NamedTuple
|
|
20
21
|
|
|
21
|
-
from deepeval.config.utils import
|
|
22
|
+
from deepeval.config.utils import (
|
|
23
|
+
parse_bool,
|
|
24
|
+
coerce_to_list,
|
|
25
|
+
dedupe_preserve_order,
|
|
26
|
+
)
|
|
27
|
+
from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
|
|
22
28
|
|
|
23
29
|
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
24
31
|
_SAVE_RE = re.compile(r"^(?P<scheme>dotenv)(?::(?P<path>.+))?$")
|
|
25
32
|
|
|
26
33
|
|
|
@@ -264,6 +271,13 @@ class Settings(BaseSettings):
|
|
|
264
271
|
LOCAL_EMBEDDING_MODEL_NAME: Optional[str] = None
|
|
265
272
|
LOCAL_EMBEDDING_BASE_URL: Optional[AnyUrl] = None
|
|
266
273
|
|
|
274
|
+
#
|
|
275
|
+
# Retry Policy
|
|
276
|
+
#
|
|
277
|
+
DEEPEVAL_SDK_RETRY_PROVIDERS: Optional[List[str]] = None
|
|
278
|
+
DEEPEVAL_RETRY_BEFORE_LOG_LEVEL: Optional[int] = None # default -> INFO
|
|
279
|
+
DEEPEVAL_RETRY_AFTER_LOG_LEVEL: Optional[int] = None # default -> ERROR
|
|
280
|
+
|
|
267
281
|
#
|
|
268
282
|
# Telemetry and Debug
|
|
269
283
|
#
|
|
@@ -283,6 +297,12 @@ class Settings(BaseSettings):
|
|
|
283
297
|
CONFIDENT_SAMPLE_RATE: Optional[float] = 1.0
|
|
284
298
|
OTEL_EXPORTER_OTLP_ENDPOINT: Optional[AnyUrl] = None
|
|
285
299
|
|
|
300
|
+
#
|
|
301
|
+
# Network
|
|
302
|
+
#
|
|
303
|
+
MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = 3.05
|
|
304
|
+
MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = 10.0
|
|
305
|
+
|
|
286
306
|
##############
|
|
287
307
|
# Validators #
|
|
288
308
|
##############
|
|
@@ -401,6 +421,78 @@ class Settings(BaseSettings):
|
|
|
401
421
|
return None
|
|
402
422
|
return s.upper()
|
|
403
423
|
|
|
424
|
+
@field_validator("DEEPEVAL_SDK_RETRY_PROVIDERS", mode="before")
|
|
425
|
+
@classmethod
|
|
426
|
+
def _coerce_to_list(cls, v):
|
|
427
|
+
# works with JSON list, comma/space/semicolon separated, or real lists
|
|
428
|
+
return coerce_to_list(v, lower=True)
|
|
429
|
+
|
|
430
|
+
@field_validator("DEEPEVAL_SDK_RETRY_PROVIDERS", mode="after")
|
|
431
|
+
@classmethod
|
|
432
|
+
def _validate_sdk_provider_list(cls, v):
|
|
433
|
+
if v is None:
|
|
434
|
+
return None
|
|
435
|
+
|
|
436
|
+
normalized: list[str] = []
|
|
437
|
+
star = False
|
|
438
|
+
|
|
439
|
+
for item in v:
|
|
440
|
+
s = str(item).strip()
|
|
441
|
+
if not s:
|
|
442
|
+
continue
|
|
443
|
+
if s == "*":
|
|
444
|
+
star = True
|
|
445
|
+
continue
|
|
446
|
+
s = slugify(s)
|
|
447
|
+
if s in SUPPORTED_PROVIDER_SLUGS:
|
|
448
|
+
normalized.append(s)
|
|
449
|
+
else:
|
|
450
|
+
if cls.DEEPEVAL_VERBOSE_MODE:
|
|
451
|
+
logger.warning("Unknown provider slug %r dropped", item)
|
|
452
|
+
|
|
453
|
+
if star:
|
|
454
|
+
return ["*"]
|
|
455
|
+
|
|
456
|
+
# It is important to dedup after normalization to catch variants
|
|
457
|
+
normalized = dedupe_preserve_order(normalized)
|
|
458
|
+
return normalized or None
|
|
459
|
+
|
|
460
|
+
@field_validator(
|
|
461
|
+
"DEEPEVAL_RETRY_BEFORE_LOG_LEVEL",
|
|
462
|
+
"DEEPEVAL_RETRY_AFTER_LOG_LEVEL",
|
|
463
|
+
mode="before",
|
|
464
|
+
)
|
|
465
|
+
@classmethod
|
|
466
|
+
def _coerce_log_level(cls, v):
|
|
467
|
+
if v is None:
|
|
468
|
+
return None
|
|
469
|
+
if isinstance(v, (int, float)):
|
|
470
|
+
return int(v)
|
|
471
|
+
|
|
472
|
+
s = str(v).strip().upper()
|
|
473
|
+
if not s:
|
|
474
|
+
return None
|
|
475
|
+
|
|
476
|
+
import logging
|
|
477
|
+
|
|
478
|
+
# Accept standard names or numeric strings
|
|
479
|
+
name_to_level = {
|
|
480
|
+
"CRITICAL": logging.CRITICAL,
|
|
481
|
+
"ERROR": logging.ERROR,
|
|
482
|
+
"WARNING": logging.WARNING,
|
|
483
|
+
"INFO": logging.INFO,
|
|
484
|
+
"DEBUG": logging.DEBUG,
|
|
485
|
+
"NOTSET": logging.NOTSET,
|
|
486
|
+
}
|
|
487
|
+
if s.isdigit() or (s.startswith("-") and s[1:].isdigit()):
|
|
488
|
+
return int(s)
|
|
489
|
+
if s in name_to_level:
|
|
490
|
+
return name_to_level[s]
|
|
491
|
+
raise ValueError(
|
|
492
|
+
"Retry log level must be one of DEBUG, INFO, WARNING, ERROR, "
|
|
493
|
+
"CRITICAL, NOTSET, or a numeric logging level."
|
|
494
|
+
)
|
|
495
|
+
|
|
404
496
|
#######################
|
|
405
497
|
# Persistence support #
|
|
406
498
|
#######################
|
deepeval/config/utils.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
|
-
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from typing import Any, Iterable, List, Optional
|
|
6
|
+
|
|
3
7
|
|
|
4
8
|
_TRUTHY = frozenset({"1", "true", "t", "yes", "y", "on", "enable", "enabled"})
|
|
5
9
|
_FALSY = frozenset({"0", "false", "f", "no", "n", "off", "disable", "disabled"})
|
|
10
|
+
_LIST_SEP_RE = re.compile(r"[,\s;]+")
|
|
6
11
|
|
|
7
12
|
|
|
8
13
|
def parse_bool(value: Any, default: bool = False) -> bool:
|
|
@@ -84,3 +89,51 @@ def set_env_bool(key: str, value: Optional[bool] = False) -> None:
|
|
|
84
89
|
- Use `get_env_bool` to read back and parse the value safely.
|
|
85
90
|
"""
|
|
86
91
|
os.environ[key] = bool_to_env_str(bool(value))
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def coerce_to_list(
|
|
95
|
+
v,
|
|
96
|
+
*,
|
|
97
|
+
lower: bool = False,
|
|
98
|
+
allow_json: bool = True,
|
|
99
|
+
sep_re: re.Pattern = _LIST_SEP_RE,
|
|
100
|
+
) -> Optional[List[str]]:
|
|
101
|
+
"""
|
|
102
|
+
Coerce None / str / list / tuple / set into a clean List[str].
|
|
103
|
+
- Accepts JSON arrays ("[...]"") or delimited strings (comma/space/semicolon).
|
|
104
|
+
- Strips whitespace, drops empties, optionally lowercases.
|
|
105
|
+
"""
|
|
106
|
+
if v is None:
|
|
107
|
+
return None
|
|
108
|
+
if isinstance(v, (list, tuple, set)):
|
|
109
|
+
items = list(v)
|
|
110
|
+
else:
|
|
111
|
+
s = str(v).strip()
|
|
112
|
+
if not s:
|
|
113
|
+
return None
|
|
114
|
+
if allow_json and s.startswith("[") and s.endswith("]"):
|
|
115
|
+
try:
|
|
116
|
+
parsed = json.loads(s)
|
|
117
|
+
items = parsed if isinstance(parsed, list) else [s]
|
|
118
|
+
except Exception:
|
|
119
|
+
items = sep_re.split(s)
|
|
120
|
+
else:
|
|
121
|
+
items = sep_re.split(s)
|
|
122
|
+
|
|
123
|
+
out: List[str] = []
|
|
124
|
+
for item in items:
|
|
125
|
+
s = str(item).strip()
|
|
126
|
+
if not s:
|
|
127
|
+
continue
|
|
128
|
+
out.append(s.lower() if lower else s)
|
|
129
|
+
return out or None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
|
|
133
|
+
seen = set()
|
|
134
|
+
out: List[str] = []
|
|
135
|
+
for x in items:
|
|
136
|
+
if x not in seen:
|
|
137
|
+
seen.add(x)
|
|
138
|
+
out.append(x)
|
|
139
|
+
return out
|
deepeval/constants.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
1
3
|
KEY_FILE: str = ".deepeval"
|
|
2
4
|
HIDDEN_DIR: str = ".deepeval"
|
|
3
5
|
PYTEST_RUN_TEST_NAME: str = "CONFIDENT_AI_RUN_TEST_NAME"
|
|
@@ -11,3 +13,28 @@ CONFIDENT_TRACE_ENVIRONMENT = "CONFIDENT_TRACE_ENVIRONMENT"
|
|
|
11
13
|
CONFIDENT_TRACING_ENABLED = "CONFIDENT_TRACING_ENABLED"
|
|
12
14
|
CONFIDENT_OPEN_BROWSER = "CONFIDENT_OPEN_BROWSER"
|
|
13
15
|
CONFIDENT_TEST_CASE_BATCH_SIZE = "CONFIDENT_TEST_CASE_BATCH_SIZE"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ProviderSlug(str, Enum):
|
|
19
|
+
OPENAI = "openai"
|
|
20
|
+
AZURE = "azure"
|
|
21
|
+
ANTHROPIC = "anthropic"
|
|
22
|
+
BEDROCK = "bedrock"
|
|
23
|
+
DEEPSEEK = "deepseek"
|
|
24
|
+
GOOGLE = "google"
|
|
25
|
+
GROK = "grok"
|
|
26
|
+
KIMI = "kimi"
|
|
27
|
+
LITELLM = "litellm"
|
|
28
|
+
LOCAL = "local"
|
|
29
|
+
OLLAMA = "ollama"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def slugify(value: str | ProviderSlug) -> str:
|
|
33
|
+
return (
|
|
34
|
+
value.value
|
|
35
|
+
if isinstance(value, ProviderSlug)
|
|
36
|
+
else str(value).strip().lower()
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
SUPPORTED_PROVIDER_SLUGS = frozenset(s.value for s in ProviderSlug)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from .agent import DeepEvalPydanticAIAgent as Agent
|
|
1
2
|
from .patcher import instrument as instrument_pydantic_ai
|
|
3
|
+
from .otel import instrument_pydantic_ai as otel_instrument_pydantic_ai
|
|
2
4
|
|
|
3
|
-
__all__ = ["instrument_pydantic_ai"]
|
|
5
|
+
__all__ = ["instrument_pydantic_ai", "Agent", otel_instrument_pydantic_ai]
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from typing import Optional, List, Generic, TypeVar
|
|
3
|
+
from contextvars import ContextVar
|
|
4
|
+
from contextlib import asynccontextmanager
|
|
5
|
+
|
|
6
|
+
from deepeval.prompt import Prompt
|
|
7
|
+
from deepeval.tracing.types import AgentSpan
|
|
8
|
+
from deepeval.tracing.tracing import Observer
|
|
9
|
+
from deepeval.metrics.base_metric import BaseMetric
|
|
10
|
+
from deepeval.tracing.context import current_span_context
|
|
11
|
+
from deepeval.integrations.pydantic_ai.utils import extract_tools_called
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from pydantic_ai.agent import Agent
|
|
15
|
+
from pydantic_ai.tools import AgentDepsT
|
|
16
|
+
from pydantic_ai.output import OutputDataT
|
|
17
|
+
from deepeval.integrations.pydantic_ai.utils import (
|
|
18
|
+
create_patched_tool,
|
|
19
|
+
update_trace_context,
|
|
20
|
+
patch_llm_model,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
is_pydantic_ai_installed = True
|
|
24
|
+
except:
|
|
25
|
+
is_pydantic_ai_installed = False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def pydantic_ai_installed():
|
|
29
|
+
if not is_pydantic_ai_installed:
|
|
30
|
+
raise ImportError(
|
|
31
|
+
"Pydantic AI is not installed. Please install it with `pip install pydantic-ai`."
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_IS_RUN_SYNC = ContextVar("deepeval_is_run_sync", default=False)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DeepEvalPydanticAIAgent(
|
|
39
|
+
Agent[AgentDepsT, OutputDataT], Generic[AgentDepsT, OutputDataT]
|
|
40
|
+
):
|
|
41
|
+
|
|
42
|
+
trace_name: Optional[str] = None
|
|
43
|
+
trace_tags: Optional[List[str]] = None
|
|
44
|
+
trace_metadata: Optional[dict] = None
|
|
45
|
+
trace_thread_id: Optional[str] = None
|
|
46
|
+
trace_user_id: Optional[str] = None
|
|
47
|
+
trace_metric_collection: Optional[str] = None
|
|
48
|
+
trace_metrics: Optional[List[BaseMetric]] = None
|
|
49
|
+
|
|
50
|
+
llm_prompt: Optional[Prompt] = None
|
|
51
|
+
llm_metrics: Optional[List[BaseMetric]] = None
|
|
52
|
+
llm_metric_collection: Optional[str] = None
|
|
53
|
+
|
|
54
|
+
agent_metrics: Optional[List[BaseMetric]] = None
|
|
55
|
+
agent_metric_collection: Optional[str] = None
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
*args,
|
|
60
|
+
trace_name: Optional[str] = None,
|
|
61
|
+
trace_tags: Optional[List[str]] = None,
|
|
62
|
+
trace_metadata: Optional[dict] = None,
|
|
63
|
+
trace_thread_id: Optional[str] = None,
|
|
64
|
+
trace_user_id: Optional[str] = None,
|
|
65
|
+
trace_metric_collection: Optional[str] = None,
|
|
66
|
+
trace_metrics: Optional[List[BaseMetric]] = None,
|
|
67
|
+
llm_metric_collection: Optional[str] = None,
|
|
68
|
+
llm_metrics: Optional[List[BaseMetric]] = None,
|
|
69
|
+
llm_prompt: Optional[Prompt] = None,
|
|
70
|
+
agent_metric_collection: Optional[str] = None,
|
|
71
|
+
agent_metrics: Optional[List[BaseMetric]] = None,
|
|
72
|
+
**kwargs
|
|
73
|
+
):
|
|
74
|
+
pydantic_ai_installed()
|
|
75
|
+
|
|
76
|
+
self.trace_name = trace_name
|
|
77
|
+
self.trace_tags = trace_tags
|
|
78
|
+
self.trace_metadata = trace_metadata
|
|
79
|
+
self.trace_thread_id = trace_thread_id
|
|
80
|
+
self.trace_user_id = trace_user_id
|
|
81
|
+
self.trace_metric_collection = trace_metric_collection
|
|
82
|
+
self.trace_metrics = trace_metrics
|
|
83
|
+
|
|
84
|
+
self.llm_metric_collection = llm_metric_collection
|
|
85
|
+
self.llm_metrics = llm_metrics
|
|
86
|
+
self.llm_prompt = llm_prompt
|
|
87
|
+
|
|
88
|
+
self.agent_metric_collection = agent_metric_collection
|
|
89
|
+
self.agent_metrics = agent_metrics
|
|
90
|
+
|
|
91
|
+
super().__init__(*args, **kwargs)
|
|
92
|
+
|
|
93
|
+
patch_llm_model(
|
|
94
|
+
self._model, llm_metric_collection, llm_metrics, llm_prompt
|
|
95
|
+
) # TODO: Add dual patch guards
|
|
96
|
+
|
|
97
|
+
async def run(
|
|
98
|
+
self,
|
|
99
|
+
*args,
|
|
100
|
+
name: Optional[str] = None,
|
|
101
|
+
tags: Optional[List[str]] = None,
|
|
102
|
+
user_id: Optional[str] = None,
|
|
103
|
+
metadata: Optional[dict] = None,
|
|
104
|
+
thread_id: Optional[str] = None,
|
|
105
|
+
metrics: Optional[List[BaseMetric]] = None,
|
|
106
|
+
metric_collection: Optional[str] = None,
|
|
107
|
+
**kwargs
|
|
108
|
+
):
|
|
109
|
+
sig = inspect.signature(super().run)
|
|
110
|
+
bound = sig.bind_partial(*args, **kwargs)
|
|
111
|
+
bound.apply_defaults()
|
|
112
|
+
input = bound.arguments.get("user_prompt", None)
|
|
113
|
+
|
|
114
|
+
agent_name = super().name if super().name is not None else "Agent"
|
|
115
|
+
|
|
116
|
+
with Observer(
|
|
117
|
+
span_type="agent" if not _IS_RUN_SYNC.get() else "custom",
|
|
118
|
+
func_name=agent_name if not _IS_RUN_SYNC.get() else "run",
|
|
119
|
+
function_kwargs={"input": input},
|
|
120
|
+
metrics=self.agent_metrics if not _IS_RUN_SYNC.get() else None,
|
|
121
|
+
metric_collection=(
|
|
122
|
+
self.agent_metric_collection if not _IS_RUN_SYNC.get() else None
|
|
123
|
+
),
|
|
124
|
+
) as observer:
|
|
125
|
+
result = await super().run(*args, **kwargs)
|
|
126
|
+
observer.result = result.output
|
|
127
|
+
update_trace_context(
|
|
128
|
+
trace_name=name if name is not None else self.trace_name,
|
|
129
|
+
trace_tags=tags if tags is not None else self.trace_tags,
|
|
130
|
+
trace_metadata=(
|
|
131
|
+
metadata if metadata is not None else self.trace_metadata
|
|
132
|
+
),
|
|
133
|
+
trace_thread_id=(
|
|
134
|
+
thread_id if thread_id is not None else self.trace_thread_id
|
|
135
|
+
),
|
|
136
|
+
trace_user_id=(
|
|
137
|
+
user_id if user_id is not None else self.trace_user_id
|
|
138
|
+
),
|
|
139
|
+
trace_metric_collection=(
|
|
140
|
+
metric_collection
|
|
141
|
+
if metric_collection is not None
|
|
142
|
+
else self.trace_metric_collection
|
|
143
|
+
),
|
|
144
|
+
trace_metrics=(
|
|
145
|
+
metrics if metrics is not None else self.trace_metrics
|
|
146
|
+
),
|
|
147
|
+
trace_input=input,
|
|
148
|
+
trace_output=result.output,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
agent_span: AgentSpan = current_span_context.get()
|
|
152
|
+
try:
|
|
153
|
+
agent_span.tools_called = extract_tools_called(result)
|
|
154
|
+
except:
|
|
155
|
+
pass
|
|
156
|
+
# TODO: available tools
|
|
157
|
+
# TODO: agent handoffs
|
|
158
|
+
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
def run_sync(
|
|
162
|
+
self,
|
|
163
|
+
*args,
|
|
164
|
+
name: Optional[str] = None,
|
|
165
|
+
tags: Optional[List[str]] = None,
|
|
166
|
+
metadata: Optional[dict] = None,
|
|
167
|
+
thread_id: Optional[str] = None,
|
|
168
|
+
user_id: Optional[str] = None,
|
|
169
|
+
metric_collection: Optional[str] = None,
|
|
170
|
+
metrics: Optional[List[BaseMetric]] = None,
|
|
171
|
+
**kwargs
|
|
172
|
+
):
|
|
173
|
+
sig = inspect.signature(super().run_sync)
|
|
174
|
+
bound = sig.bind_partial(*args, **kwargs)
|
|
175
|
+
bound.apply_defaults()
|
|
176
|
+
input = bound.arguments.get("user_prompt", None)
|
|
177
|
+
|
|
178
|
+
token = _IS_RUN_SYNC.set(True)
|
|
179
|
+
|
|
180
|
+
agent_name = super().name if super().name is not None else "Agent"
|
|
181
|
+
|
|
182
|
+
with Observer(
|
|
183
|
+
span_type="agent",
|
|
184
|
+
func_name=agent_name,
|
|
185
|
+
function_kwargs={"input": input},
|
|
186
|
+
metrics=self.agent_metrics,
|
|
187
|
+
metric_collection=self.agent_metric_collection,
|
|
188
|
+
) as observer:
|
|
189
|
+
try:
|
|
190
|
+
result = super().run_sync(*args, **kwargs)
|
|
191
|
+
finally:
|
|
192
|
+
_IS_RUN_SYNC.reset(token)
|
|
193
|
+
|
|
194
|
+
observer.result = result.output
|
|
195
|
+
update_trace_context(
|
|
196
|
+
trace_name=name if name is not None else self.trace_name,
|
|
197
|
+
trace_tags=tags if tags is not None else self.trace_tags,
|
|
198
|
+
trace_metadata=(
|
|
199
|
+
metadata if metadata is not None else self.trace_metadata
|
|
200
|
+
),
|
|
201
|
+
trace_thread_id=(
|
|
202
|
+
thread_id if thread_id is not None else self.trace_thread_id
|
|
203
|
+
),
|
|
204
|
+
trace_user_id=(
|
|
205
|
+
user_id if user_id is not None else self.trace_user_id
|
|
206
|
+
),
|
|
207
|
+
trace_metric_collection=(
|
|
208
|
+
metric_collection
|
|
209
|
+
if metric_collection is not None
|
|
210
|
+
else self.trace_metric_collection
|
|
211
|
+
),
|
|
212
|
+
trace_metrics=(
|
|
213
|
+
metrics if metrics is not None else self.trace_metrics
|
|
214
|
+
),
|
|
215
|
+
trace_input=input,
|
|
216
|
+
trace_output=result.output,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
agent_span: AgentSpan = current_span_context.get()
|
|
220
|
+
try:
|
|
221
|
+
agent_span.tools_called = extract_tools_called(result)
|
|
222
|
+
except:
|
|
223
|
+
pass
|
|
224
|
+
|
|
225
|
+
# TODO: available tools
|
|
226
|
+
# TODO: agent handoffs
|
|
227
|
+
|
|
228
|
+
return result
|
|
229
|
+
|
|
230
|
+
@asynccontextmanager
|
|
231
|
+
async def run_stream(
|
|
232
|
+
self,
|
|
233
|
+
*args,
|
|
234
|
+
name: Optional[str] = None,
|
|
235
|
+
tags: Optional[List[str]] = None,
|
|
236
|
+
metadata: Optional[dict] = None,
|
|
237
|
+
thread_id: Optional[str] = None,
|
|
238
|
+
user_id: Optional[str] = None,
|
|
239
|
+
metric_collection: Optional[str] = None,
|
|
240
|
+
metrics: Optional[List[BaseMetric]] = None,
|
|
241
|
+
**kwargs
|
|
242
|
+
):
|
|
243
|
+
sig = inspect.signature(super().run_stream)
|
|
244
|
+
super_params = sig.parameters
|
|
245
|
+
super_kwargs = {k: v for k, v in kwargs.items() if k in super_params}
|
|
246
|
+
bound = sig.bind_partial(*args, **super_kwargs)
|
|
247
|
+
bound.apply_defaults()
|
|
248
|
+
input = bound.arguments.get("user_prompt", None)
|
|
249
|
+
|
|
250
|
+
agent_name = super().name if super().name is not None else "Agent"
|
|
251
|
+
|
|
252
|
+
with Observer(
|
|
253
|
+
span_type="agent",
|
|
254
|
+
func_name=agent_name,
|
|
255
|
+
function_kwargs={"input": input},
|
|
256
|
+
metrics=self.agent_metrics,
|
|
257
|
+
metric_collection=self.agent_metric_collection,
|
|
258
|
+
) as observer:
|
|
259
|
+
final_result = None
|
|
260
|
+
async with super().run_stream(*args, **super_kwargs) as result:
|
|
261
|
+
try:
|
|
262
|
+
yield result
|
|
263
|
+
finally:
|
|
264
|
+
try:
|
|
265
|
+
final_result = await result.get_output()
|
|
266
|
+
observer.result = final_result
|
|
267
|
+
except Exception:
|
|
268
|
+
pass
|
|
269
|
+
|
|
270
|
+
update_trace_context(
|
|
271
|
+
trace_name=(
|
|
272
|
+
name if name is not None else self.trace_name
|
|
273
|
+
),
|
|
274
|
+
trace_tags=(
|
|
275
|
+
tags if tags is not None else self.trace_tags
|
|
276
|
+
),
|
|
277
|
+
trace_metadata=(
|
|
278
|
+
metadata
|
|
279
|
+
if metadata is not None
|
|
280
|
+
else self.trace_metadata
|
|
281
|
+
),
|
|
282
|
+
trace_thread_id=(
|
|
283
|
+
thread_id
|
|
284
|
+
if thread_id is not None
|
|
285
|
+
else self.trace_thread_id
|
|
286
|
+
),
|
|
287
|
+
trace_user_id=(
|
|
288
|
+
user_id
|
|
289
|
+
if user_id is not None
|
|
290
|
+
else self.trace_user_id
|
|
291
|
+
),
|
|
292
|
+
trace_metric_collection=(
|
|
293
|
+
metric_collection
|
|
294
|
+
if metric_collection is not None
|
|
295
|
+
else self.trace_metric_collection
|
|
296
|
+
),
|
|
297
|
+
trace_metrics=(
|
|
298
|
+
metrics
|
|
299
|
+
if metrics is not None
|
|
300
|
+
else self.trace_metrics
|
|
301
|
+
),
|
|
302
|
+
trace_input=input,
|
|
303
|
+
trace_output=(
|
|
304
|
+
final_result if final_result is not None else None
|
|
305
|
+
),
|
|
306
|
+
)
|
|
307
|
+
agent_span: AgentSpan = current_span_context.get()
|
|
308
|
+
try:
|
|
309
|
+
if final_result is not None:
|
|
310
|
+
agent_span.tools_called = extract_tools_called(
|
|
311
|
+
final_result
|
|
312
|
+
)
|
|
313
|
+
except:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
def tool(
|
|
317
|
+
self,
|
|
318
|
+
*args,
|
|
319
|
+
metrics: Optional[List[BaseMetric]] = None,
|
|
320
|
+
metric_collection: Optional[str] = None,
|
|
321
|
+
**kwargs
|
|
322
|
+
):
|
|
323
|
+
# Direct decoration: @agent.tool
|
|
324
|
+
if args and callable(args[0]):
|
|
325
|
+
patched_func = create_patched_tool(
|
|
326
|
+
args[0], metrics, metric_collection
|
|
327
|
+
)
|
|
328
|
+
new_args = (patched_func,) + args[1:]
|
|
329
|
+
return super(DeepEvalPydanticAIAgent, self).tool(
|
|
330
|
+
*new_args, **kwargs
|
|
331
|
+
)
|
|
332
|
+
# Decoration with args: @agent.tool(...)
|
|
333
|
+
super_tool = super(DeepEvalPydanticAIAgent, self).tool
|
|
334
|
+
|
|
335
|
+
def decorator(func):
|
|
336
|
+
patched_func = create_patched_tool(func, metrics, metric_collection)
|
|
337
|
+
return super_tool(*args, **kwargs)(patched_func)
|
|
338
|
+
|
|
339
|
+
return decorator
|