agentops-accelerator 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentops/__init__.py +10 -0
- agentops/__main__.py +6 -0
- agentops/agent/__init__.py +12 -0
- agentops/agent/_legacy_ids.py +92 -0
- agentops/agent/analyzer.py +207 -0
- agentops/agent/checks/__init__.py +1 -0
- agentops/agent/checks/catalog.py +880 -0
- agentops/agent/checks/errors.py +279 -0
- agentops/agent/checks/foundry_config.py +75 -0
- agentops/agent/checks/latency.py +84 -0
- agentops/agent/checks/opex.py +157 -0
- agentops/agent/checks/opex_workspace.py +874 -0
- agentops/agent/checks/posture.py +36 -0
- agentops/agent/checks/posture_rules/__init__.py +53 -0
- agentops/agent/checks/posture_rules/content_filter.py +59 -0
- agentops/agent/checks/posture_rules/diagnostics.py +74 -0
- agentops/agent/checks/posture_rules/local_auth.py +55 -0
- agentops/agent/checks/posture_rules/managed_identity.py +59 -0
- agentops/agent/checks/posture_rules/network.py +68 -0
- agentops/agent/checks/regression.py +78 -0
- agentops/agent/checks/release_readiness.py +182 -0
- agentops/agent/checks/safety.py +247 -0
- agentops/agent/checks/spec_conformance.py +375 -0
- agentops/agent/cockpit.py +5159 -0
- agentops/agent/config.py +240 -0
- agentops/agent/findings.py +113 -0
- agentops/agent/history.py +142 -0
- agentops/agent/knowledge/__init__.py +182 -0
- agentops/agent/knowledge/waf-checklist.csv +39 -0
- agentops/agent/llm_assist/__init__.py +16 -0
- agentops/agent/llm_assist/_base.py +124 -0
- agentops/agent/llm_assist/_bundle_rule.py +154 -0
- agentops/agent/llm_assist/_client.py +347 -0
- agentops/agent/llm_assist/_dataset_rules.py +191 -0
- agentops/agent/llm_assist/_engine.py +106 -0
- agentops/agent/llm_assist/_prompt_rules.py +291 -0
- agentops/agent/llm_assist/_spec_rules.py +235 -0
- agentops/agent/production_telemetry.py +430 -0
- agentops/agent/report.py +207 -0
- agentops/agent/server/__init__.py +1 -0
- agentops/agent/server/app.py +84 -0
- agentops/agent/server/auth.py +94 -0
- agentops/agent/server/chat.py +44 -0
- agentops/agent/server/protocol.py +72 -0
- agentops/agent/sources/__init__.py +1 -0
- agentops/agent/sources/azure_monitor.py +523 -0
- agentops/agent/sources/azure_resources.py +602 -0
- agentops/agent/sources/foundry_control.py +174 -0
- agentops/agent/sources/results_history.py +494 -0
- agentops/agent/sources/spec_detectors/__init__.py +42 -0
- agentops/agent/sources/spec_detectors/_base.py +58 -0
- agentops/agent/sources/spec_detectors/agents_md.py +75 -0
- agentops/agent/sources/spec_detectors/spec_kit.py +172 -0
- agentops/agent/time_range.py +117 -0
- agentops/cli/__init__.py +1 -0
- agentops/cli/app.py +4823 -0
- agentops/core/__init__.py +1 -0
- agentops/core/agentops_config.py +592 -0
- agentops/core/config_loader.py +22 -0
- agentops/core/evaluators.py +480 -0
- agentops/core/release_evidence.py +56 -0
- agentops/core/results.py +117 -0
- agentops/mcp/__init__.py +10 -0
- agentops/mcp/server.py +232 -0
- agentops/pipeline/__init__.py +8 -0
- agentops/pipeline/cloud_results.py +189 -0
- agentops/pipeline/cloud_runner.py +901 -0
- agentops/pipeline/comparison.py +108 -0
- agentops/pipeline/diagnostics.py +51 -0
- agentops/pipeline/invocations.py +535 -0
- agentops/pipeline/official_eval.py +414 -0
- agentops/pipeline/orchestrator.py +775 -0
- agentops/pipeline/prompt_deploy.py +377 -0
- agentops/pipeline/publisher.py +121 -0
- agentops/pipeline/reporter.py +202 -0
- agentops/pipeline/runtime.py +409 -0
- agentops/pipeline/thresholds.py +84 -0
- agentops/services/__init__.py +1 -0
- agentops/services/cicd.py +720 -0
- agentops/services/eval_analysis.py +848 -0
- agentops/services/evidence_pack.py +757 -0
- agentops/services/initializer.py +86 -0
- agentops/services/preflight.py +470 -0
- agentops/services/setup_wizard.py +709 -0
- agentops/services/skills.py +643 -0
- agentops/services/trace_promotion.py +300 -0
- agentops/services/workflow_analysis.py +1129 -0
- agentops/templates/.gitignore +15 -0
- agentops/templates/__init__.py +1 -0
- agentops/templates/agent-server/Dockerfile +23 -0
- agentops/templates/agent-server/README.md +61 -0
- agentops/templates/agent-server/main.bicep +94 -0
- agentops/templates/agent.yaml +87 -0
- agentops/templates/agentops.yaml +58 -0
- agentops/templates/foundry.svg +71 -0
- agentops/templates/icon.png +0 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +118 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +73 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +141 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +94 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +167 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +118 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +68 -0
- agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +210 -0
- agentops/templates/pipelines/azuredevops/agentops-pr.yml +155 -0
- agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +106 -0
- agentops/templates/project.gitignore +36 -0
- agentops/templates/sample-traces.jsonl +3 -0
- agentops/templates/skills/agentops-agent/SKILL.md +137 -0
- agentops/templates/skills/agentops-config/SKILL.md +113 -0
- agentops/templates/skills/agentops-dataset/SKILL.md +84 -0
- agentops/templates/skills/agentops-eval/SKILL.md +189 -0
- agentops/templates/skills/agentops-report/SKILL.md +71 -0
- agentops/templates/skills/agentops-workflow/SKILL.md +471 -0
- agentops/templates/smoke.jsonl +3 -0
- agentops/templates/waf-checklist.README.md +84 -0
- agentops/templates/waf-checklist.csv +22 -0
- agentops/templates/workflows/agentops-deploy-dev-azd.yml +166 -0
- agentops/templates/workflows/agentops-deploy-dev.yml +187 -0
- agentops/templates/workflows/agentops-deploy-prod-azd.yml +183 -0
- agentops/templates/workflows/agentops-deploy-prod.yml +171 -0
- agentops/templates/workflows/agentops-deploy-prompt-agent.yml +197 -0
- agentops/templates/workflows/agentops-deploy-qa-azd.yml +156 -0
- agentops/templates/workflows/agentops-deploy-qa.yml +145 -0
- agentops/templates/workflows/agentops-pr-prompt-agent.yml +210 -0
- agentops/templates/workflows/agentops-pr.yml +148 -0
- agentops/templates/workflows/agentops-watchdog.yml +122 -0
- agentops/utils/__init__.py +1 -0
- agentops/utils/azd_env.py +435 -0
- agentops/utils/azure_endpoints.py +62 -0
- agentops/utils/colors.py +47 -0
- agentops/utils/dotenv_loader.py +105 -0
- agentops/utils/foundry_discovery.py +229 -0
- agentops/utils/logging.py +59 -0
- agentops/utils/telemetry.py +554 -0
- agentops/utils/yaml.py +36 -0
- agentops_accelerator-0.3.0.dist-info/METADATA +278 -0
- agentops_accelerator-0.3.0.dist-info/RECORD +142 -0
- agentops_accelerator-0.3.0.dist-info/WHEEL +5 -0
- agentops_accelerator-0.3.0.dist-info/entry_points.txt +2 -0
- agentops_accelerator-0.3.0.dist-info/licenses/LICENSE +21 -0
- agentops_accelerator-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
"""Optional OpenTelemetry instrumentation for AgentOps evaluation runs.
|
|
2
|
+
|
|
3
|
+
All OpenTelemetry imports are **lazy** - they only happen when tracing is
|
|
4
|
+
enabled via ``APPLICATIONINSIGHTS_CONNECTION_STRING`` (Azure Monitor) or
|
|
5
|
+
the ``AGENTOPS_OTLP_ENDPOINT`` environment variable. When neither variable
|
|
6
|
+
is set, every public function in this module is a no-op.
|
|
7
|
+
|
|
8
|
+
Schema design follows three OTel semantic convention layers:
|
|
9
|
+
https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-agent-spans/
|
|
10
|
+
|
|
11
|
+
* **CICD** (``cicd.pipeline.*``) - the eval run as a pipeline
|
|
12
|
+
* **GenAI** (``gen_ai.*``) - the agent/model invocation
|
|
13
|
+
* **AgentOps** (``agentops.eval.*``) - evaluation-specific (score, threshold)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
from contextlib import contextmanager
|
|
20
|
+
from typing import Any, Generator, Optional
|
|
21
|
+
from urllib.parse import urlparse
|
|
22
|
+
from uuid import UUID
|
|
23
|
+
|
|
24
|
+
from agentops.utils.logging import get_logger
|
|
25
|
+
|
|
26
|
+
log = get_logger(__name__)
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Lazy globals - initialised on first call to ``init_tracing()``
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
_tracer: Any = None
|
|
32
|
+
_tracing_enabled: bool = False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def is_enabled() -> bool:
|
|
36
|
+
"""Return True when tracing has been initialised."""
|
|
37
|
+
return _tracing_enabled
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def init_tracing() -> None:
|
|
41
|
+
"""Initialise tracing when Azure Monitor or OTLP export is configured.
|
|
42
|
+
|
|
43
|
+
Resolution order for the App Insights connection string:
|
|
44
|
+
|
|
45
|
+
1. ``APPLICATIONINSIGHTS_CONNECTION_STRING`` (or the AgentOps-prefixed
|
|
46
|
+
variant) - explicit user configuration always wins.
|
|
47
|
+
2. ``AGENTOPS_OTLP_ENDPOINT`` - use a generic OTLP/HTTP exporter.
|
|
48
|
+
3. **Auto-discovery**: when neither of the above is set but
|
|
49
|
+
``AZURE_AI_FOUNDRY_PROJECT_ENDPOINT`` is, ask the Foundry project
|
|
50
|
+
(via the ``azure-ai-projects`` SDK) for the connection string of
|
|
51
|
+
the Application Insights resource attached to it. This lets
|
|
52
|
+
eval runs and watchdog analyses emit traces into the same App
|
|
53
|
+
Insights the Foundry project already uses, without any extra
|
|
54
|
+
configuration.
|
|
55
|
+
|
|
56
|
+
Safe to call multiple times; only the first call has an effect.
|
|
57
|
+
"""
|
|
58
|
+
global _tracer, _tracing_enabled # noqa: PLW0603
|
|
59
|
+
|
|
60
|
+
if _tracing_enabled:
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
appinsights_connection_string = os.getenv(
|
|
64
|
+
"APPLICATIONINSIGHTS_CONNECTION_STRING"
|
|
65
|
+
) or os.getenv("AGENTOPS_APPLICATIONINSIGHTS_CONNECTION_STRING")
|
|
66
|
+
if appinsights_connection_string and not is_appinsights_connection_string(
|
|
67
|
+
appinsights_connection_string
|
|
68
|
+
):
|
|
69
|
+
appinsights_connection_string = None
|
|
70
|
+
otlp_endpoint = os.getenv("AGENTOPS_OTLP_ENDPOINT")
|
|
71
|
+
|
|
72
|
+
if not appinsights_connection_string and not otlp_endpoint:
|
|
73
|
+
# Fallback: ask the Foundry project for the App Insights it owns.
|
|
74
|
+
try:
|
|
75
|
+
from agentops.utils.foundry_discovery import (
|
|
76
|
+
resolve_appinsights_connection_from_env,
|
|
77
|
+
)
|
|
78
|
+
appinsights_connection_string = resolve_appinsights_connection_from_env()
|
|
79
|
+
except Exception: # noqa: BLE001
|
|
80
|
+
# Discovery is best-effort - never raise into init_tracing.
|
|
81
|
+
appinsights_connection_string = None
|
|
82
|
+
|
|
83
|
+
if not appinsights_connection_string and not otlp_endpoint:
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
# Opt into Azure's "experimental" GenAI tracing flag by default. This
|
|
87
|
+
# tells the OTel instrumentation to capture prompt + response content
|
|
88
|
+
# as span attributes (not just metadata), which is exactly what an
|
|
89
|
+
# eval / watchdog workflow needs to inspect a failing row in the
|
|
90
|
+
# Foundry portal. The flag is "experimental" only in the sense that
|
|
91
|
+
# Azure may change the underlying schema - not that it is unsafe.
|
|
92
|
+
# Users who want to opt out can set the env var to "false" explicitly.
|
|
93
|
+
os.environ.setdefault("AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING", "true")
|
|
94
|
+
os.environ.setdefault("OTEL_SERVICE_NAME", "agentops")
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
from opentelemetry import trace
|
|
98
|
+
except ImportError:
|
|
99
|
+
# opentelemetry not installed - tracing stays disabled
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
if appinsights_connection_string:
|
|
103
|
+
try:
|
|
104
|
+
from azure.monitor.opentelemetry import configure_azure_monitor
|
|
105
|
+
|
|
106
|
+
kwargs = {"connection_string": appinsights_connection_string}
|
|
107
|
+
resource = _agentops_resource()
|
|
108
|
+
if resource is not None:
|
|
109
|
+
kwargs["resource"] = resource
|
|
110
|
+
configure_azure_monitor(**kwargs)
|
|
111
|
+
_tracer = trace.get_tracer("agentops")
|
|
112
|
+
_tracing_enabled = True
|
|
113
|
+
return
|
|
114
|
+
except ImportError:
|
|
115
|
+
# Azure Monitor exporter not installed - try OTLP below if configured.
|
|
116
|
+
pass
|
|
117
|
+
except Exception as exc: # noqa: BLE001
|
|
118
|
+
log.warning("Azure Monitor tracing disabled: %s", exc)
|
|
119
|
+
appinsights_connection_string = None
|
|
120
|
+
|
|
121
|
+
if not otlp_endpoint:
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
126
|
+
OTLPSpanExporter,
|
|
127
|
+
)
|
|
128
|
+
from opentelemetry.sdk.resources import Resource
|
|
129
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
130
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
131
|
+
|
|
132
|
+
import agentops
|
|
133
|
+
|
|
134
|
+
resource = Resource(
|
|
135
|
+
attributes={
|
|
136
|
+
"service.name": "agentops",
|
|
137
|
+
"service.version": getattr(agentops, "__version__", "0.0.0"),
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
provider = TracerProvider(resource=resource)
|
|
142
|
+
exporter = OTLPSpanExporter(endpoint=otlp_endpoint + "/v1/traces")
|
|
143
|
+
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
144
|
+
trace.set_tracer_provider(provider)
|
|
145
|
+
|
|
146
|
+
_tracer = trace.get_tracer("agentops")
|
|
147
|
+
_tracing_enabled = True
|
|
148
|
+
except ImportError:
|
|
149
|
+
# OTLP exporter not installed - tracing stays disabled
|
|
150
|
+
pass
|
|
151
|
+
except Exception as exc: # noqa: BLE001
|
|
152
|
+
log.warning("OTLP tracing disabled: %s", exc)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def is_appinsights_connection_string(value: str) -> bool:
|
|
156
|
+
"""Return True for real App Insights connection strings.
|
|
157
|
+
|
|
158
|
+
CI systems can leave undefined variables as literal placeholders such
|
|
159
|
+
as ``$(APPLICATIONINSIGHTS_CONNECTION_STRING)``. Treat those as absent
|
|
160
|
+
so Foundry auto-discovery still has a chance to configure telemetry.
|
|
161
|
+
"""
|
|
162
|
+
parts = _appinsights_connection_string_parts(value)
|
|
163
|
+
instrumentation_key = parts.get("instrumentationkey")
|
|
164
|
+
if not instrumentation_key:
|
|
165
|
+
return False
|
|
166
|
+
try:
|
|
167
|
+
UUID(instrumentation_key)
|
|
168
|
+
except ValueError:
|
|
169
|
+
return False
|
|
170
|
+
ingestion_endpoint = parts.get("ingestionendpoint")
|
|
171
|
+
if ingestion_endpoint:
|
|
172
|
+
parsed = urlparse(ingestion_endpoint)
|
|
173
|
+
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
|
|
174
|
+
return False
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _is_appinsights_connection_string(value: str) -> bool:
|
|
179
|
+
return is_appinsights_connection_string(value)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _appinsights_connection_string_parts(value: str) -> dict[str, str]:
|
|
183
|
+
value = value.strip()
|
|
184
|
+
if not value or "$(" in value or "${{" in value:
|
|
185
|
+
return {}
|
|
186
|
+
parts: dict[str, str] = {}
|
|
187
|
+
for segment in value.split(";"):
|
|
188
|
+
segment = segment.strip()
|
|
189
|
+
if not segment:
|
|
190
|
+
continue
|
|
191
|
+
if "=" not in segment:
|
|
192
|
+
return {}
|
|
193
|
+
key, part_value = segment.split("=", 1)
|
|
194
|
+
key = key.strip().lower()
|
|
195
|
+
if not key:
|
|
196
|
+
return {}
|
|
197
|
+
parts[key] = part_value.strip()
|
|
198
|
+
return parts
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _agentops_resource() -> Optional[Any]:
|
|
202
|
+
try:
|
|
203
|
+
from opentelemetry.sdk.resources import Resource
|
|
204
|
+
import agentops
|
|
205
|
+
except Exception: # noqa: BLE001
|
|
206
|
+
return None
|
|
207
|
+
return Resource.create(
|
|
208
|
+
{
|
|
209
|
+
"service.name": "agentops",
|
|
210
|
+
"service.version": getattr(agentops, "__version__", "0.0.0"),
|
|
211
|
+
}
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def shutdown() -> None:
|
|
216
|
+
"""Flush and shut down the tracer provider."""
|
|
217
|
+
if not _tracing_enabled:
|
|
218
|
+
return
|
|
219
|
+
try:
|
|
220
|
+
from opentelemetry import trace
|
|
221
|
+
|
|
222
|
+
provider = trace.get_tracer_provider()
|
|
223
|
+
if hasattr(provider, "shutdown"):
|
|
224
|
+
provider.shutdown()
|
|
225
|
+
except Exception: # noqa: BLE001
|
|
226
|
+
pass
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# ---------------------------------------------------------------------------
|
|
230
|
+
# Span context managers
|
|
231
|
+
# ---------------------------------------------------------------------------
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@contextmanager
|
|
235
|
+
def eval_run_span(
|
|
236
|
+
*,
|
|
237
|
+
bundle_name: str,
|
|
238
|
+
dataset_name: str,
|
|
239
|
+
backend_type: str,
|
|
240
|
+
target: str,
|
|
241
|
+
model: Optional[str] = None,
|
|
242
|
+
agent_id: Optional[str] = None,
|
|
243
|
+
) -> Generator[Optional[Any], None, None]:
|
|
244
|
+
"""Root span for an evaluation run (CICD pipeline run)."""
|
|
245
|
+
if not _tracing_enabled or _tracer is None:
|
|
246
|
+
yield None
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
from opentelemetry.trace import SpanKind, StatusCode
|
|
250
|
+
|
|
251
|
+
with _tracer.start_as_current_span(
|
|
252
|
+
f"RUN {bundle_name}",
|
|
253
|
+
kind=SpanKind.SERVER,
|
|
254
|
+
) as span:
|
|
255
|
+
# CICD semconv
|
|
256
|
+
span.set_attribute("cicd.pipeline.name", bundle_name)
|
|
257
|
+
span.set_attribute("cicd.pipeline.action.name", "RUN")
|
|
258
|
+
|
|
259
|
+
# AgentOps evaluation attributes
|
|
260
|
+
span.set_attribute("agentops.eval.dataset", dataset_name)
|
|
261
|
+
span.set_attribute("agentops.eval.backend", backend_type)
|
|
262
|
+
span.set_attribute("agentops.eval.target", target)
|
|
263
|
+
if model:
|
|
264
|
+
span.set_attribute("agentops.eval.model", model)
|
|
265
|
+
if agent_id:
|
|
266
|
+
span.set_attribute("agentops.eval.agent_id", agent_id)
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
yield span
|
|
270
|
+
except Exception as exc:
|
|
271
|
+
span.set_status(StatusCode.ERROR, str(exc))
|
|
272
|
+
span.record_exception(exc)
|
|
273
|
+
raise
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def set_eval_run_result(
|
|
277
|
+
span: Any,
|
|
278
|
+
*,
|
|
279
|
+
passed: bool,
|
|
280
|
+
items_total: int,
|
|
281
|
+
items_passed: int,
|
|
282
|
+
) -> None:
|
|
283
|
+
"""Set final result attributes on the root eval run span."""
|
|
284
|
+
if span is None:
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
from opentelemetry.trace import StatusCode
|
|
288
|
+
|
|
289
|
+
span.set_attribute("cicd.pipeline.result", "success" if passed else "failure")
|
|
290
|
+
span.set_attribute("agentops.eval.items_total", items_total)
|
|
291
|
+
span.set_attribute("agentops.eval.items_passed", items_passed)
|
|
292
|
+
if items_total > 0:
|
|
293
|
+
span.set_attribute("agentops.eval.pass_rate", items_passed / items_total)
|
|
294
|
+
|
|
295
|
+
if passed:
|
|
296
|
+
span.set_status(StatusCode.OK)
|
|
297
|
+
else:
|
|
298
|
+
span.set_status(StatusCode.ERROR, "Threshold failure")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@contextmanager
|
|
302
|
+
def eval_item_span(
|
|
303
|
+
*,
|
|
304
|
+
row_index: int,
|
|
305
|
+
input_text: Optional[str] = None,
|
|
306
|
+
expected_text: Optional[str] = None,
|
|
307
|
+
) -> Generator[Optional[Any], None, None]:
|
|
308
|
+
"""Span for a single evaluation item (CICD task run)."""
|
|
309
|
+
if not _tracing_enabled or _tracer is None:
|
|
310
|
+
yield None
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
from opentelemetry.trace import SpanKind, StatusCode
|
|
314
|
+
|
|
315
|
+
_label = f"eval_item {row_index}"
|
|
316
|
+
if input_text:
|
|
317
|
+
_snippet = input_text[:60].replace("\n", " ")
|
|
318
|
+
if len(input_text) > 60:
|
|
319
|
+
_snippet += "\u2026"
|
|
320
|
+
_label = f"{_label} - '{_snippet}'"
|
|
321
|
+
|
|
322
|
+
with _tracer.start_as_current_span(
|
|
323
|
+
_label,
|
|
324
|
+
kind=SpanKind.SERVER,
|
|
325
|
+
) as span:
|
|
326
|
+
# CICD task attributes
|
|
327
|
+
span.set_attribute("cicd.pipeline.task.name", "eval_item")
|
|
328
|
+
span.set_attribute("cicd.pipeline.task.run.id", str(row_index))
|
|
329
|
+
|
|
330
|
+
# AgentOps item attributes
|
|
331
|
+
span.set_attribute("agentops.eval.item.index", row_index)
|
|
332
|
+
if input_text:
|
|
333
|
+
span.set_attribute("agentops.eval.item.input", input_text)
|
|
334
|
+
if expected_text:
|
|
335
|
+
span.set_attribute("agentops.eval.item.expected", expected_text)
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
yield span
|
|
339
|
+
except Exception as exc:
|
|
340
|
+
span.set_attribute("cicd.pipeline.task.run.result", "failure")
|
|
341
|
+
span.set_attribute("agentops.eval.item.passed", False)
|
|
342
|
+
span.set_status(StatusCode.ERROR, str(exc))
|
|
343
|
+
span.record_exception(exc)
|
|
344
|
+
raise
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def set_eval_item_result(span: Any, *, passed: bool) -> None:
|
|
348
|
+
"""Set final result on an eval item span."""
|
|
349
|
+
if span is None:
|
|
350
|
+
return
|
|
351
|
+
from opentelemetry.trace import StatusCode
|
|
352
|
+
|
|
353
|
+
span.set_attribute(
|
|
354
|
+
"cicd.pipeline.task.run.result", "success" if passed else "failure"
|
|
355
|
+
)
|
|
356
|
+
span.set_attribute("agentops.eval.item.passed", passed)
|
|
357
|
+
span.set_status(StatusCode.OK if passed else StatusCode.ERROR)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
@contextmanager
|
|
361
|
+
def agent_invoke_span(
|
|
362
|
+
*,
|
|
363
|
+
target: str,
|
|
364
|
+
model: Optional[str] = None,
|
|
365
|
+
agent_id: Optional[str] = None,
|
|
366
|
+
agent_name: Optional[str] = None,
|
|
367
|
+
agent_version: Optional[str] = None,
|
|
368
|
+
provider: str = "azure.ai.inference",
|
|
369
|
+
) -> Generator[Optional[Any], None, None]:
|
|
370
|
+
"""Span for agent/model invocation (GenAI semconv)."""
|
|
371
|
+
if not _tracing_enabled or _tracer is None:
|
|
372
|
+
yield None
|
|
373
|
+
return
|
|
374
|
+
|
|
375
|
+
from opentelemetry.trace import SpanKind
|
|
376
|
+
|
|
377
|
+
operation = "invoke_agent" if target == "agent" else "chat"
|
|
378
|
+
span_name = f"{operation} {agent_name or model or 'unknown'}"
|
|
379
|
+
|
|
380
|
+
with _tracer.start_as_current_span(
|
|
381
|
+
span_name,
|
|
382
|
+
kind=SpanKind.CLIENT,
|
|
383
|
+
) as span:
|
|
384
|
+
# GenAI semconv
|
|
385
|
+
span.set_attribute("gen_ai.operation.name", operation)
|
|
386
|
+
span.set_attribute("gen_ai.provider.name", provider)
|
|
387
|
+
if model:
|
|
388
|
+
span.set_attribute("gen_ai.request.model", model)
|
|
389
|
+
if agent_id:
|
|
390
|
+
span.set_attribute("gen_ai.agent.id", agent_id)
|
|
391
|
+
if agent_name:
|
|
392
|
+
span.set_attribute("gen_ai.agent.name", agent_name)
|
|
393
|
+
if agent_version:
|
|
394
|
+
span.set_attribute("gen_ai.agent.version", agent_version)
|
|
395
|
+
|
|
396
|
+
yield span
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def set_agent_invoke_result(
|
|
400
|
+
span: Any,
|
|
401
|
+
*,
|
|
402
|
+
response_model: Optional[str] = None,
|
|
403
|
+
input_tokens: Optional[int] = None,
|
|
404
|
+
output_tokens: Optional[int] = None,
|
|
405
|
+
) -> None:
|
|
406
|
+
"""Set GenAI response attributes on an agent invoke span."""
|
|
407
|
+
if span is None:
|
|
408
|
+
return
|
|
409
|
+
if response_model:
|
|
410
|
+
span.set_attribute("gen_ai.response.model", response_model)
|
|
411
|
+
if input_tokens is not None:
|
|
412
|
+
span.set_attribute("gen_ai.usage.input_tokens", input_tokens)
|
|
413
|
+
if output_tokens is not None:
|
|
414
|
+
span.set_attribute("gen_ai.usage.output_tokens", output_tokens)
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def record_evaluator_span(
|
|
418
|
+
*,
|
|
419
|
+
evaluator_name: str,
|
|
420
|
+
builtin_name: str,
|
|
421
|
+
source: str,
|
|
422
|
+
score: float,
|
|
423
|
+
threshold: Optional[float] = None,
|
|
424
|
+
criteria: Optional[str] = None,
|
|
425
|
+
passed: Optional[bool] = None,
|
|
426
|
+
) -> None:
|
|
427
|
+
"""Create a child span for a single evaluator result."""
|
|
428
|
+
if not _tracing_enabled or _tracer is None:
|
|
429
|
+
return
|
|
430
|
+
|
|
431
|
+
from opentelemetry.trace import SpanKind
|
|
432
|
+
|
|
433
|
+
with _tracer.start_as_current_span(
|
|
434
|
+
f"evaluator {builtin_name}",
|
|
435
|
+
kind=SpanKind.INTERNAL,
|
|
436
|
+
) as span:
|
|
437
|
+
span.set_attribute("agentops.eval.evaluator.name", evaluator_name)
|
|
438
|
+
span.set_attribute("agentops.eval.evaluator.builtin", builtin_name)
|
|
439
|
+
span.set_attribute("agentops.eval.evaluator.source", source)
|
|
440
|
+
span.set_attribute("agentops.eval.evaluator.score", score)
|
|
441
|
+
if threshold is not None:
|
|
442
|
+
span.set_attribute("agentops.eval.evaluator.threshold", threshold)
|
|
443
|
+
if criteria is not None:
|
|
444
|
+
span.set_attribute("agentops.eval.evaluator.criteria", criteria)
|
|
445
|
+
if passed is not None:
|
|
446
|
+
span.set_attribute("agentops.eval.evaluator.passed", passed)
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
# ---------------------------------------------------------------------------
|
|
450
|
+
# Doctor finding spans
|
|
451
|
+
# ---------------------------------------------------------------------------
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def record_agent_finding_span(finding: Any) -> None:
|
|
455
|
+
"""Create a queryable child span for a single ``agentops doctor`` finding."""
|
|
456
|
+
if not _tracing_enabled or _tracer is None:
|
|
457
|
+
return
|
|
458
|
+
|
|
459
|
+
from opentelemetry.trace import SpanKind, StatusCode
|
|
460
|
+
|
|
461
|
+
finding_id = str(getattr(finding, "id", "") or "unknown")
|
|
462
|
+
severity = getattr(finding, "severity", None)
|
|
463
|
+
category = getattr(finding, "category", None)
|
|
464
|
+
severity_value = str(getattr(severity, "value", severity) or "")
|
|
465
|
+
category_value = str(getattr(category, "value", category) or "")
|
|
466
|
+
|
|
467
|
+
with _tracer.start_as_current_span(
|
|
468
|
+
f"doctor finding {finding_id}",
|
|
469
|
+
kind=SpanKind.INTERNAL,
|
|
470
|
+
) as span:
|
|
471
|
+
span.set_attribute("agentops.agent.finding.id", finding_id)
|
|
472
|
+
span.set_attribute("agentops.agent.finding.severity", severity_value)
|
|
473
|
+
span.set_attribute("agentops.agent.finding.category", category_value)
|
|
474
|
+
span.set_attribute("agentops.agent.finding.title", str(getattr(finding, "title", "") or ""))
|
|
475
|
+
span.set_attribute("agentops.agent.finding.summary", str(getattr(finding, "summary", "") or ""))
|
|
476
|
+
span.set_attribute(
|
|
477
|
+
"agentops.agent.finding.recommendation",
|
|
478
|
+
str(getattr(finding, "recommendation", "") or ""),
|
|
479
|
+
)
|
|
480
|
+
span.set_attribute("agentops.agent.finding.source", str(getattr(finding, "source", "") or ""))
|
|
481
|
+
span.set_status(StatusCode.OK)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
# ---------------------------------------------------------------------------
|
|
485
|
+
# Watchdog agent spans
|
|
486
|
+
# ---------------------------------------------------------------------------
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
@contextmanager
|
|
490
|
+
def agent_analyze_span(
|
|
491
|
+
*,
|
|
492
|
+
workspace: str,
|
|
493
|
+
lookback_days: Optional[int] = None,
|
|
494
|
+
) -> Generator[Optional[Any], None, None]:
|
|
495
|
+
"""Root span for a watchdog ``agentops doctor`` run.
|
|
496
|
+
|
|
497
|
+
Mirrors :func:`eval_run_span` for the watchdog: when telemetry is
|
|
498
|
+
enabled (``APPLICATIONINSIGHTS_CONNECTION_STRING`` or
|
|
499
|
+
``AGENTOPS_OTLP_ENDPOINT`` set) the span carries source-collection
|
|
500
|
+
and finding-distribution attributes so analyses are queryable
|
|
501
|
+
alongside the evaluation runs they observe.
|
|
502
|
+
"""
|
|
503
|
+
if not _tracing_enabled or _tracer is None:
|
|
504
|
+
yield None
|
|
505
|
+
return
|
|
506
|
+
|
|
507
|
+
from opentelemetry.trace import SpanKind, StatusCode
|
|
508
|
+
|
|
509
|
+
with _tracer.start_as_current_span(
|
|
510
|
+
"ANALYZE watchdog",
|
|
511
|
+
kind=SpanKind.SERVER,
|
|
512
|
+
) as span:
|
|
513
|
+
span.set_attribute("cicd.pipeline.name", "agentops.agent.analyze")
|
|
514
|
+
span.set_attribute("cicd.pipeline.action.name", "ANALYZE")
|
|
515
|
+
span.set_attribute("agentops.agent.workspace", workspace)
|
|
516
|
+
if lookback_days is not None:
|
|
517
|
+
span.set_attribute("agentops.agent.lookback_days", lookback_days)
|
|
518
|
+
|
|
519
|
+
try:
|
|
520
|
+
yield span
|
|
521
|
+
except Exception as exc:
|
|
522
|
+
span.set_status(StatusCode.ERROR, str(exc))
|
|
523
|
+
span.record_exception(exc)
|
|
524
|
+
raise
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def set_agent_analyze_result(
|
|
528
|
+
span: Any,
|
|
529
|
+
*,
|
|
530
|
+
findings_total: int,
|
|
531
|
+
by_severity: dict,
|
|
532
|
+
by_category: dict,
|
|
533
|
+
max_severity: Optional[str],
|
|
534
|
+
sources_enabled: list,
|
|
535
|
+
) -> None:
|
|
536
|
+
"""Set final attributes on a watchdog analyze span."""
|
|
537
|
+
if span is None:
|
|
538
|
+
return
|
|
539
|
+
|
|
540
|
+
from opentelemetry.trace import StatusCode
|
|
541
|
+
|
|
542
|
+
span.set_attribute("agentops.agent.findings_total", findings_total)
|
|
543
|
+
for severity, count in by_severity.items():
|
|
544
|
+
span.set_attribute(f"agentops.agent.findings.severity.{severity}", count)
|
|
545
|
+
for category, count in by_category.items():
|
|
546
|
+
span.set_attribute(f"agentops.agent.findings.category.{category}", count)
|
|
547
|
+
if max_severity is not None:
|
|
548
|
+
span.set_attribute("agentops.agent.max_severity", max_severity)
|
|
549
|
+
span.set_attribute(
|
|
550
|
+
"agentops.agent.sources_enabled", ",".join(sorted(sources_enabled))
|
|
551
|
+
)
|
|
552
|
+
# The watchdog itself completes successfully even when findings exist -
|
|
553
|
+
# finding severity is observability, not pipeline failure.
|
|
554
|
+
span.set_status(StatusCode.OK)
|
agentops/utils/yaml.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""YAML load/save helpers using ruamel.yaml."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
from ruamel.yaml import YAML
|
|
9
|
+
from ruamel.yaml.error import YAMLError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def load_yaml(path: Path) -> Dict[str, Any]:
|
|
13
|
+
if not path.exists():
|
|
14
|
+
raise FileNotFoundError(f"YAML file not found: {path}")
|
|
15
|
+
|
|
16
|
+
yaml = YAML(typ="safe")
|
|
17
|
+
try:
|
|
18
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
19
|
+
data = yaml.load(handle)
|
|
20
|
+
except YAMLError as exc:
|
|
21
|
+
raise ValueError(f"Invalid YAML in {path}") from exc
|
|
22
|
+
|
|
23
|
+
if data is None:
|
|
24
|
+
return {}
|
|
25
|
+
if not isinstance(data, dict):
|
|
26
|
+
raise ValueError(f"YAML root must be a mapping: {path}")
|
|
27
|
+
return data
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def save_yaml(path: Path, data: Dict[str, Any]) -> None:
|
|
31
|
+
yaml = YAML()
|
|
32
|
+
yaml.default_flow_style = False
|
|
33
|
+
|
|
34
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
with path.open("w", encoding="utf-8") as handle:
|
|
36
|
+
yaml.dump(data, handle)
|