agentevals-cli 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. agentevals/__init__.py +16 -0
  2. agentevals/_protocol.py +83 -0
  3. agentevals/api/__init__.py +0 -0
  4. agentevals/api/app.py +137 -0
  5. agentevals/api/debug_routes.py +268 -0
  6. agentevals/api/models.py +204 -0
  7. agentevals/api/otlp_app.py +25 -0
  8. agentevals/api/otlp_routes.py +383 -0
  9. agentevals/api/routes.py +554 -0
  10. agentevals/api/streaming_routes.py +373 -0
  11. agentevals/builtin_metrics.py +234 -0
  12. agentevals/cli.py +643 -0
  13. agentevals/config.py +108 -0
  14. agentevals/converter.py +328 -0
  15. agentevals/custom_evaluators.py +468 -0
  16. agentevals/eval_config_loader.py +147 -0
  17. agentevals/evaluator/__init__.py +24 -0
  18. agentevals/evaluator/resolver.py +70 -0
  19. agentevals/evaluator/sources.py +293 -0
  20. agentevals/evaluator/templates.py +224 -0
  21. agentevals/extraction.py +444 -0
  22. agentevals/genai_converter.py +538 -0
  23. agentevals/loader/__init__.py +7 -0
  24. agentevals/loader/base.py +53 -0
  25. agentevals/loader/jaeger.py +112 -0
  26. agentevals/loader/otlp.py +193 -0
  27. agentevals/mcp_server.py +236 -0
  28. agentevals/output.py +204 -0
  29. agentevals/runner.py +310 -0
  30. agentevals/sdk.py +433 -0
  31. agentevals/streaming/__init__.py +120 -0
  32. agentevals/streaming/incremental_processor.py +337 -0
  33. agentevals/streaming/processor.py +285 -0
  34. agentevals/streaming/session.py +36 -0
  35. agentevals/streaming/ws_server.py +806 -0
  36. agentevals/trace_attrs.py +32 -0
  37. agentevals/trace_metrics.py +126 -0
  38. agentevals/utils/__init__.py +0 -0
  39. agentevals/utils/genai_messages.py +142 -0
  40. agentevals/utils/log_buffer.py +43 -0
  41. agentevals/utils/log_enrichment.py +187 -0
  42. agentevals_cli-0.5.2.dist-info/METADATA +22 -0
  43. agentevals_cli-0.5.2.dist-info/RECORD +46 -0
  44. agentevals_cli-0.5.2.dist-info/WHEEL +4 -0
  45. agentevals_cli-0.5.2.dist-info/entry_points.txt +2 -0
  46. agentevals_cli-0.5.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,32 @@
1
+ """Centralized OTel span attribute key constants.
2
+
3
+ Single source of truth for all attribute names used across the converter,
4
+ extraction, streaming, and runner modules.
5
+ """
6
+
7
+ # OTel scope
8
+ OTEL_SCOPE = "otel.scope.name"
9
+ OTEL_SCOPE_VERSION = "otel.scope.version"
10
+
11
+ # Google ADK scope value
12
+ ADK_SCOPE_VALUE = "gcp.vertex.agent"
13
+
14
+ # Standard OTel GenAI semantic conventions (gen_ai.*)
15
+ OTEL_GENAI_OP = "gen_ai.operation.name"
16
+ OTEL_GENAI_AGENT_NAME = "gen_ai.agent.name"
17
+ OTEL_GENAI_REQUEST_MODEL = "gen_ai.request.model"
18
+ OTEL_GENAI_INPUT_MESSAGES = "gen_ai.input.messages"
19
+ OTEL_GENAI_OUTPUT_MESSAGES = "gen_ai.output.messages"
20
+ OTEL_GENAI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
21
+ OTEL_GENAI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
22
+ OTEL_GENAI_TOOL_NAME = "gen_ai.tool.name"
23
+ OTEL_GENAI_TOOL_CALL_ID = "gen_ai.tool.call.id"
24
+ OTEL_GENAI_TOOL_CALL_ARGUMENTS = "gen_ai.tool.call.arguments"
25
+ OTEL_GENAI_TOOL_CALL_RESULT = "gen_ai.tool.call.result"
26
+
27
+ # ADK-specific custom attributes (gcp.vertex.agent.*)
28
+ ADK_LLM_REQUEST = "gcp.vertex.agent.llm_request"
29
+ ADK_LLM_RESPONSE = "gcp.vertex.agent.llm_response"
30
+ ADK_TOOL_CALL_ARGS = "gcp.vertex.agent.tool_call_args"
31
+ ADK_TOOL_RESPONSE = "gcp.vertex.agent.tool_response"
32
+ ADK_INVOCATION_ID = "gcp.vertex.agent.invocation_id"
@@ -0,0 +1,126 @@
1
+ """Extract performance and metadata from trace spans."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from .extraction import (
8
+ extract_agent_response_from_attrs,
9
+ extract_token_usage_from_attrs,
10
+ extract_user_text_from_attrs,
11
+ get_extractor,
12
+ )
13
+ from .trace_attrs import OTEL_GENAI_AGENT_NAME, OTEL_GENAI_REQUEST_MODEL
14
+
15
+
16
+ def _truncate(text: str, max_length: int = 200) -> str:
17
+ if len(text) <= max_length:
18
+ return text
19
+ return text[:max_length] + "..."
20
+
21
+
22
+ def _calc_percentiles(values: list[float]) -> dict[str, float]:
23
+ if not values:
24
+ return {"p50": 0.0, "p95": 0.0, "p99": 0.0}
25
+ import statistics
26
+
27
+ sorted_values = sorted(values)
28
+ n = len(sorted_values)
29
+ return {
30
+ "p50": statistics.median(sorted_values),
31
+ "p95": sorted_values[int(n * 0.95)] if n > 1 else sorted_values[0],
32
+ "p99": sorted_values[int(n * 0.99)] if n > 1 else sorted_values[0],
33
+ }
34
+
35
+
36
+ def extract_performance_metrics(trace, extractor=None) -> dict[str, Any]:
37
+ """Extract latency and token usage metrics from trace spans."""
38
+ agent_latencies = []
39
+ llm_latencies = []
40
+ tool_latencies = []
41
+ prompt_tokens = []
42
+ output_tokens = []
43
+ total_tokens = []
44
+
45
+ if extractor is None:
46
+ extractor = get_extractor(trace)
47
+ invocation_spans = extractor.find_invocation_spans(trace)
48
+
49
+ if not invocation_spans and trace.root_spans:
50
+ for root_span in trace.root_spans:
51
+ agent_latencies.append(root_span.duration / 1000.0)
52
+
53
+ for inv_span in invocation_spans:
54
+ agent_latencies.append(inv_span.duration / 1000.0)
55
+
56
+ for span in trace.all_spans:
57
+ duration_ms = span.duration / 1000.0
58
+ role = extractor.classify_span(span)
59
+
60
+ if role == "llm":
61
+ llm_latencies.append(duration_ms)
62
+ in_toks, out_toks, _ = extract_token_usage_from_attrs(span.tags)
63
+ if in_toks or out_toks:
64
+ prompt_tokens.append(in_toks)
65
+ output_tokens.append(out_toks)
66
+ total_tokens.append(in_toks + out_toks)
67
+ elif role == "tool":
68
+ tool_latencies.append(duration_ms)
69
+
70
+ return {
71
+ "latency": {
72
+ "overall": _calc_percentiles(agent_latencies),
73
+ "llm_calls": _calc_percentiles(llm_latencies),
74
+ "tool_executions": _calc_percentiles(tool_latencies),
75
+ },
76
+ "tokens": {
77
+ "total_prompt": sum(prompt_tokens) if prompt_tokens else 0,
78
+ "total_output": sum(output_tokens) if output_tokens else 0,
79
+ "total": sum(total_tokens) if total_tokens else 0,
80
+ "per_llm_call": _calc_percentiles(total_tokens) if total_tokens else {"p50": 0.0, "p95": 0.0, "p99": 0.0},
81
+ },
82
+ }
83
+
84
+
85
+ def extract_trace_metadata(trace, extractor=None) -> dict[str, Any]:
86
+ """Extract agent name, model, timing, and preview text from a trace."""
87
+ metadata: dict[str, Any] = {
88
+ "agent_name": None,
89
+ "model": None,
90
+ "start_time": None,
91
+ "user_input_preview": None,
92
+ "final_output_preview": None,
93
+ }
94
+
95
+ if extractor is None:
96
+ extractor = get_extractor(trace)
97
+ invocation_spans = extractor.find_invocation_spans(trace)
98
+
99
+ if invocation_spans:
100
+ first_inv = invocation_spans[0]
101
+ metadata["agent_name"] = first_inv.get_tag(OTEL_GENAI_AGENT_NAME)
102
+ metadata["start_time"] = first_inv.start_time
103
+
104
+ llm_spans = extractor.find_llm_spans_in(first_inv)
105
+ if llm_spans:
106
+ metadata["model"] = llm_spans[0].get_tag(OTEL_GENAI_REQUEST_MODEL)
107
+
108
+ user_text = extract_user_text_from_attrs(llm_spans[0].tags)
109
+ if user_text:
110
+ metadata["user_input_preview"] = _truncate(user_text)
111
+
112
+ agent_text = extract_agent_response_from_attrs(llm_spans[-1].tags)
113
+ if agent_text:
114
+ metadata["final_output_preview"] = _truncate(agent_text)
115
+
116
+ if not metadata["agent_name"] and trace.root_spans:
117
+ metadata["agent_name"] = trace.root_spans[0].operation_name
118
+
119
+ if not metadata["model"]:
120
+ for span in trace.all_spans:
121
+ model = span.get_tag(OTEL_GENAI_REQUEST_MODEL)
122
+ if model:
123
+ metadata["model"] = model
124
+ break
125
+
126
+ return metadata
File without changes
@@ -0,0 +1,142 @@
1
+ """Utilities for parsing OTel GenAI semantic convention message formats.
2
+
3
+ Supports two message formats:
4
+ - Content-based (e.g. opentelemetry-instrumentation-openai-v2):
5
+ {"role": "user", "content": "Hello"}
6
+ {"role": "assistant", "content": "...", "tool_calls": [{"type": "function", ...}]}
7
+
8
+ - Parts-based (OTel GenAI semconv v1.36.0+):
9
+ {"role": "user", "parts": [{"type": "text", "content": "Hello"}]}
10
+ {"role": "assistant", "parts": [{"type": "tool_call", "name": "...", "arguments": {...}}]}
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import logging
17
+ from typing import Any
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ USER_ROLES = ("user", "human")
22
+ ASSISTANT_ROLES = ("assistant", "model", "ai")
23
+
24
+
25
+ def parse_json_attr(raw: str | dict | list | Any, tag_name: str = "") -> dict | list | Any:
26
+ """Parse a JSON string from an OTel span attribute value.
27
+
28
+ If *raw* is already a dict or list it is returned as-is.
29
+ Returns ``{}`` on parse failure.
30
+ """
31
+ if isinstance(raw, (dict, list)):
32
+ return raw
33
+ if isinstance(raw, str):
34
+ try:
35
+ return json.loads(raw)
36
+ except json.JSONDecodeError:
37
+ logger.warning("Failed to parse JSON in %s: %s", tag_name, raw[:200])
38
+ return {}
39
+ return {}
40
+
41
+
42
+ def extract_text_from_message(msg: dict) -> str:
43
+ """Extract text content from a GenAI message in any supported format."""
44
+ content = msg.get("content")
45
+ if isinstance(content, str) and content:
46
+ return content
47
+ if isinstance(content, list):
48
+ parts = [item["text"] for item in content if isinstance(item, dict) and "text" in item]
49
+ if parts:
50
+ return " ".join(parts)
51
+
52
+ parts = msg.get("parts")
53
+ if isinstance(parts, list):
54
+ text_parts = []
55
+ for part in parts:
56
+ if not isinstance(part, dict) or part.get("type") != "text":
57
+ continue
58
+ text = part.get("content") or part.get("text", "")
59
+ if text:
60
+ text_parts.append(text)
61
+ if text_parts:
62
+ return " ".join(text_parts)
63
+
64
+ return ""
65
+
66
+
67
+ def extract_tool_calls_from_message(msg: dict) -> list[dict[str, Any]]:
68
+ """Extract tool calls from a GenAI message in any supported format.
69
+
70
+ Returns a normalized list of:
71
+ {"name": str, "id": str | None, "arguments": dict}
72
+ """
73
+ result = []
74
+
75
+ tool_calls = msg.get("tool_calls")
76
+ if isinstance(tool_calls, list):
77
+ for tc in tool_calls:
78
+ if not isinstance(tc, dict):
79
+ continue
80
+ if tc.get("type") == "function" and "function" in tc:
81
+ func = tc["function"]
82
+ args = _parse_args(func.get("arguments", {}))
83
+ result.append(
84
+ {
85
+ "name": func.get("name", ""),
86
+ "id": tc.get("id"),
87
+ "arguments": args,
88
+ }
89
+ )
90
+
91
+ if not result:
92
+ parts = msg.get("parts")
93
+ if isinstance(parts, list):
94
+ for part in parts:
95
+ if not isinstance(part, dict) or part.get("type") != "tool_call":
96
+ continue
97
+ args = _parse_args(part.get("arguments", {}))
98
+ result.append(
99
+ {
100
+ "name": part.get("name", ""),
101
+ "id": part.get("id"),
102
+ "arguments": args,
103
+ }
104
+ )
105
+
106
+ return result
107
+
108
+
109
+ def extract_tool_call_args_from_messages(
110
+ messages_raw: str | list | Any,
111
+ tool_name: str,
112
+ ) -> tuple[dict, str | None]:
113
+ """Fallback: extract tool call args and ID from a messages attribute by matching *tool_name*.
114
+
115
+ Used when a tool span lacks ``gen_ai.tool.call.arguments`` directly
116
+ (e.g. Strands embeds the triggering tool_call in ``gen_ai.input.messages``).
117
+
118
+ Returns ``(args_dict, tool_call_id_or_None)``.
119
+ """
120
+ messages = parse_json_attr(messages_raw, "gen_ai.input.messages")
121
+ if not isinstance(messages, list):
122
+ return {}, None
123
+ for msg in messages:
124
+ if not isinstance(msg, dict):
125
+ continue
126
+ for tc in extract_tool_calls_from_message(msg):
127
+ if tc["name"] == tool_name and tc["arguments"]:
128
+ return tc["arguments"], tc.get("id")
129
+ return {}, None
130
+
131
+
132
+ def _parse_args(args: Any) -> dict:
133
+ if isinstance(args, dict):
134
+ return args
135
+ if isinstance(args, str):
136
+ try:
137
+ parsed = json.loads(args)
138
+ if isinstance(parsed, dict):
139
+ return parsed
140
+ except json.JSONDecodeError:
141
+ logger.warning("Failed to parse tool call arguments JSON: %s", args[:200])
142
+ return {}
@@ -0,0 +1,43 @@
1
+ import logging
2
+ from collections import deque
3
+ from dataclasses import dataclass
4
+ from datetime import UTC
5
+
6
+
7
+ @dataclass
8
+ class BufferedLogRecord:
9
+ timestamp: str
10
+ level: str
11
+ logger_name: str
12
+ message: str
13
+ exc_text: str | None = None
14
+
15
+
16
+ class RingBufferLogHandler(logging.Handler):
17
+ def __init__(self, capacity: int = 1000):
18
+ super().__init__()
19
+ self._buffer: deque[BufferedLogRecord] = deque(maxlen=capacity)
20
+
21
+ def emit(self, record: logging.LogRecord) -> None:
22
+ from datetime import datetime
23
+
24
+ self._buffer.append(
25
+ BufferedLogRecord(
26
+ timestamp=datetime.fromtimestamp(record.created, tz=UTC).isoformat(),
27
+ level=record.levelname,
28
+ logger_name=record.name,
29
+ message=self.format(record),
30
+ exc_text=record.exc_text,
31
+ )
32
+ )
33
+
34
+ def get_text(self) -> str:
35
+ lines = []
36
+ for r in self._buffer:
37
+ lines.append(f"[{r.timestamp}] {r.level} {r.logger_name}: {r.message}")
38
+ if r.exc_text:
39
+ lines.append(r.exc_text)
40
+ return "\n".join(lines)
41
+
42
+
43
+ log_buffer = RingBufferLogHandler(capacity=1000)
@@ -0,0 +1,187 @@
1
+ """Utilities for enriching OTel spans with GenAI log message content."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from collections import defaultdict
8
+
9
+ from ..trace_attrs import (
10
+ OTEL_GENAI_AGENT_NAME,
11
+ OTEL_GENAI_INPUT_MESSAGES,
12
+ OTEL_GENAI_OUTPUT_MESSAGES,
13
+ )
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def enrich_spans_with_logs(spans: list[dict], logs: list[dict], session_id: str | None = None) -> list[dict]:
19
+ """Enrich spans with message content from GenAI logs.
20
+
21
+ This reconstructs gen_ai.input.messages and gen_ai.output.messages attributes
22
+ from log events so the converter can extract message content.
23
+
24
+ When logs carry a ``span_id`` (OTLP path), each span is enriched only with
25
+ its own logs. When logs lack ``span_id`` (WebSocket SDK path), all messages
26
+ are injected into every span (legacy behavior).
27
+
28
+ Args:
29
+ spans: List of OTLP span dictionaries
30
+ logs: List of GenAI log event dictionaries
31
+ session_id: Optional session ID to add as agent.name attribute
32
+
33
+ Returns:
34
+ List of enriched span dictionaries with message attributes added
35
+ """
36
+ if not logs:
37
+ return spans
38
+
39
+ logger.debug("Enriching %d spans with %d logs", len(spans), len(logs))
40
+
41
+ has_span_ids = any(log.get("span_id") for log in logs)
42
+
43
+ if has_span_ids:
44
+ return _enrich_per_span(spans, logs, session_id)
45
+ return _enrich_broadcast(spans, logs, session_id)
46
+
47
+
48
+ def _extract_messages_from_logs(
49
+ logs: list[dict],
50
+ ) -> tuple[list[dict], list[dict]]:
51
+ """Extract deduplicated input/output messages from a list of log events."""
52
+ input_messages = []
53
+ output_messages = []
54
+ seen_user = set()
55
+ seen_assistant = set()
56
+
57
+ for log in logs:
58
+ event_name = log.get("event_name", "")
59
+ body = log.get("body", {})
60
+
61
+ if not isinstance(body, dict):
62
+ continue
63
+
64
+ if event_name == "gen_ai.user.message":
65
+ user_content = body.get("content", "")
66
+ if user_content and user_content not in seen_user:
67
+ input_messages.append({"role": "user", "content": user_content})
68
+ seen_user.add(user_content)
69
+
70
+ elif event_name in ("gen_ai.assistant.message", "gen_ai.choice"):
71
+ if event_name == "gen_ai.choice":
72
+ nested = body.get("message", {}) if isinstance(body.get("message"), dict) else {}
73
+ assistant_content = body.get("content") or nested.get("content") or ""
74
+ tool_calls = nested.get("tool_calls", [])
75
+ else:
76
+ assistant_content = body.get("content") or ""
77
+ tool_calls = body.get("tool_calls", [])
78
+
79
+ message_key = f"{assistant_content}:{json.dumps(tool_calls) if tool_calls else ''}"
80
+
81
+ if (assistant_content or tool_calls) and message_key not in seen_assistant:
82
+ assistant_msg = {"role": "assistant", "content": assistant_content}
83
+ if tool_calls:
84
+ assistant_msg["tool_calls"] = tool_calls
85
+ output_messages.append(assistant_msg)
86
+ seen_assistant.add(message_key)
87
+
88
+ return input_messages, output_messages
89
+
90
+
91
+ def _inject_messages(
92
+ span: dict,
93
+ input_messages: list[dict],
94
+ output_messages: list[dict],
95
+ session_id: str | None,
96
+ ) -> dict:
97
+ """Create a copy of *span* with message attributes injected."""
98
+ span_copy = span.copy()
99
+ attrs = list(span_copy.get("attributes", []))
100
+ span_copy["attributes"] = attrs
101
+
102
+ if input_messages:
103
+ attrs.append(
104
+ {
105
+ "key": OTEL_GENAI_INPUT_MESSAGES,
106
+ "value": {"stringValue": json.dumps(input_messages)},
107
+ }
108
+ )
109
+ if output_messages:
110
+ attrs.append(
111
+ {
112
+ "key": OTEL_GENAI_OUTPUT_MESSAGES,
113
+ "value": {"stringValue": json.dumps(output_messages)},
114
+ }
115
+ )
116
+ if session_id:
117
+ attrs.append(
118
+ {
119
+ "key": OTEL_GENAI_AGENT_NAME,
120
+ "value": {"stringValue": session_id},
121
+ }
122
+ )
123
+
124
+ return span_copy
125
+
126
+
127
+ def _enrich_per_span(
128
+ spans: list[dict],
129
+ logs: list[dict],
130
+ session_id: str | None,
131
+ ) -> list[dict]:
132
+ """Enrich each span with only the logs emitted within that span's context."""
133
+ logs_by_span: dict[str, list[dict]] = defaultdict(list)
134
+ for log in logs:
135
+ sid = log.get("span_id", "")
136
+ if sid:
137
+ logs_by_span[sid].append(log)
138
+
139
+ enriched = []
140
+ for span in spans:
141
+ span_id = span.get("spanId", "")
142
+ span_logs = logs_by_span.get(span_id, [])
143
+
144
+ if span_logs:
145
+ input_msgs, output_msgs = _extract_messages_from_logs(span_logs)
146
+ enriched.append(_inject_messages(span, input_msgs, output_msgs, session_id))
147
+ else:
148
+ span_copy = span.copy()
149
+ if session_id:
150
+ attrs = list(span_copy.get("attributes", []))
151
+ attrs.append(
152
+ {
153
+ "key": OTEL_GENAI_AGENT_NAME,
154
+ "value": {"stringValue": session_id},
155
+ }
156
+ )
157
+ span_copy["attributes"] = attrs
158
+ enriched.append(span_copy)
159
+
160
+ matched = sum(1 for sid in logs_by_span if any(s.get("spanId") == sid for s in spans))
161
+ logger.debug(
162
+ "Per-span enrichment: %d log groups, %d matched to spans",
163
+ len(logs_by_span),
164
+ matched,
165
+ )
166
+ return enriched
167
+
168
+
169
+ def _enrich_broadcast(
170
+ spans: list[dict],
171
+ logs: list[dict],
172
+ session_id: str | None,
173
+ ) -> list[dict]:
174
+ """Legacy enrichment: inject all messages into every span."""
175
+ input_messages, output_messages = _extract_messages_from_logs(logs)
176
+
177
+ if not (input_messages or output_messages):
178
+ logger.warning("No messages extracted from logs")
179
+ return spans
180
+
181
+ logger.debug(
182
+ "Broadcast enrichment: %d user, %d assistant messages",
183
+ len(input_messages),
184
+ len(output_messages),
185
+ )
186
+
187
+ return [_inject_messages(span, input_messages, output_messages, session_id) for span in spans]
@@ -0,0 +1,22 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentevals-cli
3
+ Version: 0.5.2
4
+ Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: click>=8.0
8
+ Requires-Dist: fastapi>=0.115.0
9
+ Requires-Dist: google-adk[eval]>=1.25.0
10
+ Requires-Dist: httpx>=0.27.0
11
+ Requires-Dist: opentelemetry-proto>=1.36.0
12
+ Requires-Dist: python-dotenv>=1.0.0
13
+ Requires-Dist: python-multipart>=0.0.12
14
+ Requires-Dist: pyyaml>=6.0
15
+ Requires-Dist: tabulate>=0.9.0
16
+ Requires-Dist: uvicorn[standard]>=0.32.0
17
+ Provides-Extra: live
18
+ Requires-Dist: httpx>=0.27.0; extra == 'live'
19
+ Requires-Dist: mcp>=1.26.0; extra == 'live'
20
+ Provides-Extra: streaming
21
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'streaming'
22
+ Requires-Dist: websockets>=12.0; extra == 'streaming'
@@ -0,0 +1,46 @@
1
+ agentevals/__init__.py,sha256=NU0vD23WlyU5pexvoOMAsCuFvbJ-OJ9rfoJcnRl72dc,448
2
+ agentevals/_protocol.py,sha256=IHqAdPBp76boK_PQU_oFIvRDjZ0hehOyv75tyHIceAI,2578
3
+ agentevals/builtin_metrics.py,sha256=UQF8Gwthbvg-43YrVlULhGOb98qM11SnAezW7nD1-_Q,7944
4
+ agentevals/cli.py,sha256=8asqKXD4BN9Tf97ounqaOcHLEwc85tzKy7kvcjUiYM0,20231
5
+ agentevals/config.py,sha256=G1x2xIjkS3LGHLbDtCHXihiTP2ORYnFczLX4R-X0MwI,3383
6
+ agentevals/converter.py,sha256=TMF8zHTwgULjtq-F7CQt78uHB28YZPOxQQp2jpmcpWg,11377
7
+ agentevals/custom_evaluators.py,sha256=cBTPnxVVP7YHNdBXuoN3uGtNWFky46Cf2MbAf2tc3cM,15875
8
+ agentevals/eval_config_loader.py,sha256=cQOxu4VoUEmrCGe4srV9zWqF0-wrHmLUr-Jo8eF9znc,4908
9
+ agentevals/extraction.py,sha256=whCTbP0mzxx1aIBYKCNU99-z2_tJzGHhYUMjOKs35UU,16735
10
+ agentevals/genai_converter.py,sha256=d3n5LpAr84eMpdve5tDNXDl-THN66I6Y8YCDM3vfQ5E,20399
11
+ agentevals/mcp_server.py,sha256=bZVfrGqyYdjuxoF-bN22CS4R6J0Vj_ct3gBYc_cbxe8,9395
12
+ agentevals/output.py,sha256=KvmNpIPIMKp1VwuBv31nwNxaXRGR679_nSNJpCn5ehs,7263
13
+ agentevals/runner.py,sha256=5BI9sDNgmsHhR6welq6cS9yaT2LG6p3ULXNS4cNTZQI,11050
14
+ agentevals/sdk.py,sha256=5sXtmiIGonMyHeTdO04GKdFPZB5yeSG1jCarKTQ86wM,15796
15
+ agentevals/trace_attrs.py,sha256=fJ5PjRNjhbD0r-SPHKhOwX_oEFyjUViVLjus1HOsEnw,1263
16
+ agentevals/trace_metrics.py,sha256=WpLgDB1Z_b6LmpkwknK0rxxcc-wmbZfocBtN97_g_uk,4266
17
+ agentevals/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ agentevals/api/app.py,sha256=LBFUVcMrqwFLtpimqQLp1GCkVxibxLKEZ1I3SQB8Hzg,4088
19
+ agentevals/api/debug_routes.py,sha256=vrSjqUAShsO6WQh0OqX2ZtcJeVAV5DPuaK-k5_sYULg,9093
20
+ agentevals/api/models.py,sha256=caYsK7vj7Cpb6Juhr4UbJU1nkvqG_5CbGw4NOVbjJ5s,4451
21
+ agentevals/api/otlp_app.py,sha256=N6qtNY0_vX5N5WLrQ7fXi8EspM6CcOgS32nFoVmT6uU,605
22
+ agentevals/api/otlp_routes.py,sha256=QJRZW_jt-HZ47aUax94pxUhphMa25qPGza5bJfwIiSo,14165
23
+ agentevals/api/routes.py,sha256=nE3bEt1Tzm37UyMoMhGl1oRyq5lR2C_7xmNWrUz6Yx0,20987
24
+ agentevals/api/streaming_routes.py,sha256=Uj88WXzvFbOq4xA0FYa7t2NsyWpnCORvXQiqQtCYiWI,13167
25
+ agentevals/evaluator/__init__.py,sha256=_4n0uxBA6LcEGOuIfn6nTbFGD32O2llfxStOxTU2Lis,574
26
+ agentevals/evaluator/resolver.py,sha256=AXo6kBpvLfIq2ZLXfckNI58AixrMv7KYJaffb-yyBgg,2529
27
+ agentevals/evaluator/sources.py,sha256=SJ2MIAwrtmgtdnGscqC-9MjUp76jKBQUwnX-1rWUkBk,9591
28
+ agentevals/evaluator/templates.py,sha256=1kqcdhdmNsTotJlMydbZsvM8Tmgm4JXXa1B6B7MspwY,4914
29
+ agentevals/loader/__init__.py,sha256=EpnM_rHm7v17hDDzBUT3JzfuAqEZdHT5jJ8_z3jnayc,202
30
+ agentevals/loader/base.py,sha256=OTeoEyHbvUEbwzsYzuRHeK8Sx7fuLTafd9T4UI7cGSk,1503
31
+ agentevals/loader/jaeger.py,sha256=DgN7CJdN2w4OGONye90HIeUtYTv-vu9gFNesCAI9ULo,3568
32
+ agentevals/loader/otlp.py,sha256=alShML5ukryFKgt85VC1Nc6U69hK3zH7tcL36IcwP6w,7194
33
+ agentevals/streaming/__init__.py,sha256=WnAeBETNIK9KcA47vAQnL7s8WPFgknQLdy4laknGm9E,3939
34
+ agentevals/streaming/incremental_processor.py,sha256=b2DEf1SgyHT8EHcFeAhhiNYwpE039d8GYcw49guAsP4,14550
35
+ agentevals/streaming/processor.py,sha256=gXwwKzEzR6vWokKR2AIhnaQYYYmcE6Jk_fcBYMLonks,10886
36
+ agentevals/streaming/session.py,sha256=vUWzqq1CQigpkiDEqJqDM9LTyLw0Q39DilYy1Yw0XZE,1210
37
+ agentevals/streaming/ws_server.py,sha256=kkdavfBrcCoo5vnVz_FHAWZ2lARU6g9mSy41y5XChrc,32872
38
+ agentevals/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ agentevals/utils/genai_messages.py,sha256=BURKS3eoUQzmiwMuYPznv9IDwUKyzsiCI4klUlBTPVM,4704
40
+ agentevals/utils/log_buffer.py,sha256=bpDCPvfwBC2ZzOXXh0bmcbq3bSc1JixTSJZf9XqvG_M,1188
41
+ agentevals/utils/log_enrichment.py,sha256=SmhQknMoz3Pkbg0Rz1J83B1if74Qfh2rthrP6O21bD4,6117
42
+ agentevals_cli-0.5.2.dist-info/METADATA,sha256=SxxDY7HJtYtn7wqxUKM01RN4memFgtQcAAy6BsL5nq0,802
43
+ agentevals_cli-0.5.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
44
+ agentevals_cli-0.5.2.dist-info/entry_points.txt,sha256=lLyvQGvs92BySjju70F7byMFSAb9LTexKmSkwF4jkks,51
45
+ agentevals_cli-0.5.2.dist-info/licenses/LICENSE,sha256=Ox7lseFP2kBRXBjsLweW1jLmWiCyrKjwF8ZUvCbKd70,11310
46
+ agentevals_cli-0.5.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ agentevals = agentevals.cli:main