agentops-accelerator 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. agentops/__init__.py +10 -0
  2. agentops/__main__.py +6 -0
  3. agentops/agent/__init__.py +12 -0
  4. agentops/agent/_legacy_ids.py +92 -0
  5. agentops/agent/analyzer.py +207 -0
  6. agentops/agent/checks/__init__.py +1 -0
  7. agentops/agent/checks/catalog.py +880 -0
  8. agentops/agent/checks/errors.py +279 -0
  9. agentops/agent/checks/foundry_config.py +75 -0
  10. agentops/agent/checks/latency.py +84 -0
  11. agentops/agent/checks/opex.py +157 -0
  12. agentops/agent/checks/opex_workspace.py +874 -0
  13. agentops/agent/checks/posture.py +36 -0
  14. agentops/agent/checks/posture_rules/__init__.py +53 -0
  15. agentops/agent/checks/posture_rules/content_filter.py +59 -0
  16. agentops/agent/checks/posture_rules/diagnostics.py +74 -0
  17. agentops/agent/checks/posture_rules/local_auth.py +55 -0
  18. agentops/agent/checks/posture_rules/managed_identity.py +59 -0
  19. agentops/agent/checks/posture_rules/network.py +68 -0
  20. agentops/agent/checks/regression.py +78 -0
  21. agentops/agent/checks/release_readiness.py +182 -0
  22. agentops/agent/checks/safety.py +247 -0
  23. agentops/agent/checks/spec_conformance.py +375 -0
  24. agentops/agent/cockpit.py +5159 -0
  25. agentops/agent/config.py +240 -0
  26. agentops/agent/findings.py +113 -0
  27. agentops/agent/history.py +142 -0
  28. agentops/agent/knowledge/__init__.py +182 -0
  29. agentops/agent/knowledge/waf-checklist.csv +39 -0
  30. agentops/agent/llm_assist/__init__.py +16 -0
  31. agentops/agent/llm_assist/_base.py +124 -0
  32. agentops/agent/llm_assist/_bundle_rule.py +154 -0
  33. agentops/agent/llm_assist/_client.py +347 -0
  34. agentops/agent/llm_assist/_dataset_rules.py +191 -0
  35. agentops/agent/llm_assist/_engine.py +106 -0
  36. agentops/agent/llm_assist/_prompt_rules.py +291 -0
  37. agentops/agent/llm_assist/_spec_rules.py +235 -0
  38. agentops/agent/production_telemetry.py +430 -0
  39. agentops/agent/report.py +207 -0
  40. agentops/agent/server/__init__.py +1 -0
  41. agentops/agent/server/app.py +84 -0
  42. agentops/agent/server/auth.py +94 -0
  43. agentops/agent/server/chat.py +44 -0
  44. agentops/agent/server/protocol.py +72 -0
  45. agentops/agent/sources/__init__.py +1 -0
  46. agentops/agent/sources/azure_monitor.py +523 -0
  47. agentops/agent/sources/azure_resources.py +602 -0
  48. agentops/agent/sources/foundry_control.py +174 -0
  49. agentops/agent/sources/results_history.py +494 -0
  50. agentops/agent/sources/spec_detectors/__init__.py +42 -0
  51. agentops/agent/sources/spec_detectors/_base.py +58 -0
  52. agentops/agent/sources/spec_detectors/agents_md.py +75 -0
  53. agentops/agent/sources/spec_detectors/spec_kit.py +172 -0
  54. agentops/agent/time_range.py +117 -0
  55. agentops/cli/__init__.py +1 -0
  56. agentops/cli/app.py +4823 -0
  57. agentops/core/__init__.py +1 -0
  58. agentops/core/agentops_config.py +592 -0
  59. agentops/core/config_loader.py +22 -0
  60. agentops/core/evaluators.py +480 -0
  61. agentops/core/release_evidence.py +56 -0
  62. agentops/core/results.py +117 -0
  63. agentops/mcp/__init__.py +10 -0
  64. agentops/mcp/server.py +232 -0
  65. agentops/pipeline/__init__.py +8 -0
  66. agentops/pipeline/cloud_results.py +189 -0
  67. agentops/pipeline/cloud_runner.py +901 -0
  68. agentops/pipeline/comparison.py +108 -0
  69. agentops/pipeline/diagnostics.py +51 -0
  70. agentops/pipeline/invocations.py +535 -0
  71. agentops/pipeline/official_eval.py +414 -0
  72. agentops/pipeline/orchestrator.py +775 -0
  73. agentops/pipeline/prompt_deploy.py +377 -0
  74. agentops/pipeline/publisher.py +121 -0
  75. agentops/pipeline/reporter.py +202 -0
  76. agentops/pipeline/runtime.py +409 -0
  77. agentops/pipeline/thresholds.py +84 -0
  78. agentops/services/__init__.py +1 -0
  79. agentops/services/cicd.py +720 -0
  80. agentops/services/eval_analysis.py +848 -0
  81. agentops/services/evidence_pack.py +757 -0
  82. agentops/services/initializer.py +86 -0
  83. agentops/services/preflight.py +470 -0
  84. agentops/services/setup_wizard.py +709 -0
  85. agentops/services/skills.py +643 -0
  86. agentops/services/trace_promotion.py +300 -0
  87. agentops/services/workflow_analysis.py +1129 -0
  88. agentops/templates/.gitignore +15 -0
  89. agentops/templates/__init__.py +1 -0
  90. agentops/templates/agent-server/Dockerfile +23 -0
  91. agentops/templates/agent-server/README.md +61 -0
  92. agentops/templates/agent-server/main.bicep +94 -0
  93. agentops/templates/agent.yaml +87 -0
  94. agentops/templates/agentops.yaml +58 -0
  95. agentops/templates/foundry.svg +71 -0
  96. agentops/templates/icon.png +0 -0
  97. agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +118 -0
  98. agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +73 -0
  99. agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +141 -0
  100. agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +94 -0
  101. agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +167 -0
  102. agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +118 -0
  103. agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +68 -0
  104. agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +210 -0
  105. agentops/templates/pipelines/azuredevops/agentops-pr.yml +155 -0
  106. agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +106 -0
  107. agentops/templates/project.gitignore +36 -0
  108. agentops/templates/sample-traces.jsonl +3 -0
  109. agentops/templates/skills/agentops-agent/SKILL.md +137 -0
  110. agentops/templates/skills/agentops-config/SKILL.md +113 -0
  111. agentops/templates/skills/agentops-dataset/SKILL.md +84 -0
  112. agentops/templates/skills/agentops-eval/SKILL.md +189 -0
  113. agentops/templates/skills/agentops-report/SKILL.md +71 -0
  114. agentops/templates/skills/agentops-workflow/SKILL.md +471 -0
  115. agentops/templates/smoke.jsonl +3 -0
  116. agentops/templates/waf-checklist.README.md +84 -0
  117. agentops/templates/waf-checklist.csv +22 -0
  118. agentops/templates/workflows/agentops-deploy-dev-azd.yml +166 -0
  119. agentops/templates/workflows/agentops-deploy-dev.yml +187 -0
  120. agentops/templates/workflows/agentops-deploy-prod-azd.yml +183 -0
  121. agentops/templates/workflows/agentops-deploy-prod.yml +171 -0
  122. agentops/templates/workflows/agentops-deploy-prompt-agent.yml +197 -0
  123. agentops/templates/workflows/agentops-deploy-qa-azd.yml +156 -0
  124. agentops/templates/workflows/agentops-deploy-qa.yml +145 -0
  125. agentops/templates/workflows/agentops-pr-prompt-agent.yml +210 -0
  126. agentops/templates/workflows/agentops-pr.yml +148 -0
  127. agentops/templates/workflows/agentops-watchdog.yml +122 -0
  128. agentops/utils/__init__.py +1 -0
  129. agentops/utils/azd_env.py +435 -0
  130. agentops/utils/azure_endpoints.py +62 -0
  131. agentops/utils/colors.py +47 -0
  132. agentops/utils/dotenv_loader.py +105 -0
  133. agentops/utils/foundry_discovery.py +229 -0
  134. agentops/utils/logging.py +59 -0
  135. agentops/utils/telemetry.py +554 -0
  136. agentops/utils/yaml.py +36 -0
  137. agentops_accelerator-0.3.0.dist-info/METADATA +278 -0
  138. agentops_accelerator-0.3.0.dist-info/RECORD +142 -0
  139. agentops_accelerator-0.3.0.dist-info/WHEEL +5 -0
  140. agentops_accelerator-0.3.0.dist-info/entry_points.txt +2 -0
  141. agentops_accelerator-0.3.0.dist-info/licenses/LICENSE +21 -0
  142. agentops_accelerator-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,757 @@
1
+ """Build production-readiness evidence for a release candidate."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any, Optional, cast
11
+
12
+ from agentops.agent.analyzer import AnalysisResult
13
+ from agentops.agent.findings import Severity
14
+ from agentops.core.release_evidence import (
15
+ ReleaseEvidence,
16
+ ReleaseEvidenceCheck,
17
+ ReleaseEvidenceLink,
18
+ )
19
+ from agentops.pipeline.official_eval import OFFICIAL_EVAL_RUNNER
20
+ from agentops.utils.yaml import load_yaml
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class EvidenceWriteResult:
25
+ """Paths written by ``write_release_evidence``."""
26
+
27
+ evidence: ReleaseEvidence
28
+ directory: Path
29
+ json_path: Path
30
+ markdown_path: Path
31
+
32
+
33
+ _SECRET_PATTERNS = (
34
+ (
35
+ re.compile(r"(InstrumentationKey=)[^;,\s]+", re.IGNORECASE),
36
+ r"\1<redacted>",
37
+ ),
38
+ (
39
+ re.compile(r"(Authorization:\s*Bearer\s+)[A-Za-z0-9._\-]+", re.IGNORECASE),
40
+ r"\1<redacted>",
41
+ ),
42
+ (
43
+ re.compile(
44
+ r"(api[_-]?key|client[_-]?secret|connection[_-]?string)(['\"]?\s*[:=]\s*['\"]?)[^'\"\s,}]+",
45
+ re.IGNORECASE,
46
+ ),
47
+ r"\1\2<redacted>",
48
+ ),
49
+ )
50
+
51
+
52
+ def build_release_evidence(
53
+ workspace: Path,
54
+ *,
55
+ analysis: Optional[AnalysisResult] = None,
56
+ ) -> ReleaseEvidence:
57
+ """Collect repo-side release evidence into a stable schema."""
58
+
59
+ root = workspace.resolve()
60
+ official_eval = _official_eval_status(root)
61
+ latest_eval = _latest_eval(root, official_eval=official_eval)
62
+ workflows = _workflow_status(root)
63
+ doctor = _doctor_status(analysis)
64
+ foundry = _foundry_status(analysis)
65
+ monitoring = _monitoring_status(analysis)
66
+ trace_dataset = _trace_dataset_status(root)
67
+ ailz = _ailz_status(analysis)
68
+
69
+ checks: list[ReleaseEvidenceCheck] = []
70
+ blockers: list[str] = []
71
+ warnings: list[str] = []
72
+ ready: list[str] = []
73
+
74
+ _add_eval_check(checks, blockers, warnings, ready, latest_eval)
75
+ _add_threshold_check(checks, warnings, ready, root, latest_eval)
76
+ _add_baseline_check(checks, warnings, ready, root, latest_eval)
77
+ _add_workflow_checks(checks, warnings, ready, workflows)
78
+ _add_doctor_check(checks, blockers, warnings, ready, doctor)
79
+ _add_foundry_check(checks, warnings, ready, foundry)
80
+ _add_monitoring_check(checks, warnings, ready, monitoring)
81
+ _add_trace_dataset_check(checks, warnings, ready, trace_dataset)
82
+ _add_ailz_check(checks, warnings, ready, ailz)
83
+
84
+ status = "blocked" if blockers else "ready_with_warnings" if warnings else "ready"
85
+ links = _links(latest_eval)
86
+ target = latest_eval.get("target")
87
+ generated_at = datetime.now(timezone.utc).isoformat()
88
+
89
+ evidence = ReleaseEvidence(
90
+ generated_at=generated_at,
91
+ workspace=str(root),
92
+ status=status,
93
+ target=str(target) if target else None,
94
+ blockers=blockers,
95
+ warnings=warnings,
96
+ ready=ready,
97
+ checks=checks,
98
+ links=links,
99
+ latest_eval=latest_eval,
100
+ official_eval=official_eval,
101
+ doctor=doctor,
102
+ workflows=workflows,
103
+ foundry=foundry,
104
+ monitoring=monitoring,
105
+ trace_dataset=trace_dataset,
106
+ ailz=ailz,
107
+ )
108
+ return ReleaseEvidence.model_validate(_redact_obj(evidence.model_dump()))
109
+
110
+
111
+ def write_release_evidence(
112
+ workspace: Path,
113
+ *,
114
+ analysis: Optional[AnalysisResult] = None,
115
+ evidence: Optional[ReleaseEvidence] = None,
116
+ out_dir: Optional[Path] = None,
117
+ ) -> EvidenceWriteResult:
118
+ """Write ``evidence.json`` and ``evidence.md`` under the release folder."""
119
+
120
+ root = workspace.resolve()
121
+ payload = evidence or build_release_evidence(root, analysis=analysis)
122
+ target_dir = out_dir or (root / ".agentops" / "release" / "latest")
123
+ if not target_dir.is_absolute():
124
+ target_dir = root / target_dir
125
+ target_dir.mkdir(parents=True, exist_ok=True)
126
+
127
+ json_path = target_dir / "evidence.json"
128
+ markdown_path = target_dir / "evidence.md"
129
+ json_payload = json.dumps(payload.model_dump(), indent=2, default=str)
130
+ json_path.write_text(_redact_text(json_payload) + "\n", encoding="utf-8")
131
+ markdown_path.write_text(render_release_evidence_markdown(payload), encoding="utf-8")
132
+ return EvidenceWriteResult(payload, target_dir, json_path, markdown_path)
133
+
134
+
135
+ def render_release_evidence_markdown(evidence: ReleaseEvidence) -> str:
136
+ """Render a concise release-evidence report for PRs and reviews."""
137
+
138
+ icon = {"ready": "✅", "ready_with_warnings": "⚠️", "blocked": "❌"}[evidence.status]
139
+ lines = [
140
+ "# AgentOps Release Evidence",
141
+ "",
142
+ f"**Production readiness:** {icon} `{evidence.status}`",
143
+ "",
144
+ f"- **Generated:** {evidence.generated_at}",
145
+ f"- **Workspace:** `{evidence.workspace}`",
146
+ ]
147
+ if evidence.target:
148
+ lines.append(f"- **Target:** `{evidence.target}`")
149
+ lines.append("")
150
+
151
+ if evidence.blockers:
152
+ lines.append("## Blocking items")
153
+ lines.append("")
154
+ for item in evidence.blockers:
155
+ lines.append(f"- ❌ {item}")
156
+ lines.append("")
157
+
158
+ if evidence.warnings:
159
+ lines.append("## Warnings")
160
+ lines.append("")
161
+ for item in evidence.warnings:
162
+ lines.append(f"- ⚠️ {item}")
163
+ lines.append("")
164
+
165
+ if evidence.ready:
166
+ lines.append("## Ready signals")
167
+ lines.append("")
168
+ for item in evidence.ready:
169
+ lines.append(f"- ✅ {item}")
170
+ lines.append("")
171
+
172
+ _append_doctor_finding_summary(lines, evidence.doctor)
173
+
174
+ lines.append("## Readiness checks")
175
+ lines.append("")
176
+ lines.append("| Check | Status | Summary |")
177
+ lines.append("|---|---|---|")
178
+ for check in evidence.checks:
179
+ status_icon = {"ready": "✅", "warning": "⚠️", "blocked": "❌", "unknown": "❔"}[check.status]
180
+ lines.append(f"| {check.name} | {status_icon} `{check.status}` | {_cell(check.summary)} |")
181
+ lines.append("")
182
+
183
+ if evidence.links:
184
+ lines.append("## Links")
185
+ lines.append("")
186
+ for link in evidence.links:
187
+ lines.append(f"- [{link.label}]({link.url})")
188
+ lines.append("")
189
+
190
+ return _redact_text("\n".join(lines).rstrip() + "\n")
191
+
192
+
193
+ def _append_doctor_finding_summary(lines: list[str], doctor: dict[str, Any]) -> None:
194
+ if doctor.get("status") != "ok":
195
+ return
196
+ top_findings = _as_list(doctor.get("top_findings"))
197
+ if not top_findings:
198
+ return
199
+
200
+ counts = doctor.get("counts")
201
+ counts = counts if isinstance(counts, dict) else {}
202
+ total = doctor.get("findings_total")
203
+ try:
204
+ total_count = int(total) # type: ignore[arg-type]
205
+ except (TypeError, ValueError):
206
+ total_count = len(top_findings)
207
+
208
+ count_parts = []
209
+ for severity in ("critical", "warning", "info"):
210
+ count = counts.get(severity)
211
+ if count:
212
+ count_parts.append(f"{count} {severity}")
213
+ count_text = f" ({' · '.join(count_parts)})" if count_parts else ""
214
+
215
+ lines.append("## Doctor finding summary")
216
+ lines.append("")
217
+ lines.append(f"**Findings:** {total_count}{count_text}")
218
+ lines.append("")
219
+ for index, raw_finding in enumerate(top_findings, start=1):
220
+ if not isinstance(raw_finding, dict):
221
+ continue
222
+ severity = _cell(str(raw_finding.get("severity") or "info"))
223
+ category = _cell(str(raw_finding.get("category") or "uncategorized")).replace("_", " ")
224
+ finding_id = _cell(str(raw_finding.get("id") or "unknown"))
225
+ title = _cell(str(raw_finding.get("title") or ""))
226
+ lines.append(
227
+ f"{index}. **{severity}** [{category}] `{finding_id}` - {title}"
228
+ )
229
+ if total_count > len(top_findings):
230
+ lines.append(
231
+ f"{len(top_findings)} shown; see `.agentops/agent/report.md` for all findings."
232
+ )
233
+ lines.append("")
234
+
235
+
236
+ def _latest_eval(root: Path, *, official_eval: Optional[dict[str, Any]] = None) -> dict[str, Any]:
237
+ local_eval = _agentops_eval_status(root)
238
+ official = official_eval if official_eval is not None else _official_eval_status(root)
239
+ if _is_eval_available(local_eval):
240
+ return local_eval
241
+ if _is_eval_available(official):
242
+ return official
243
+ return local_eval
244
+
245
+
246
+ def _agentops_eval_status(root: Path) -> dict[str, Any]:
247
+ path = root / ".agentops" / "results" / "latest" / "results.json"
248
+ if not path.exists():
249
+ return {"status": "missing", "path": str(path)}
250
+ try:
251
+ payload = json.loads(path.read_text(encoding="utf-8"))
252
+ except (OSError, json.JSONDecodeError) as exc:
253
+ return {"status": "invalid", "path": str(path), "error": str(exc)}
254
+ if not isinstance(payload, dict):
255
+ return {"status": "invalid", "path": str(path), "error": "expected JSON object"}
256
+
257
+ summary_raw = payload.get("summary")
258
+ summary = cast(dict[str, Any], summary_raw) if isinstance(summary_raw, dict) else {}
259
+ target_raw = payload.get("target")
260
+ target = cast(dict[str, Any], target_raw) if isinstance(target_raw, dict) else {}
261
+ config_raw = payload.get("config")
262
+ config = cast(dict[str, Any], config_raw) if isinstance(config_raw, dict) else {}
263
+ raw_metrics = (
264
+ payload.get("aggregate_metrics")
265
+ or payload.get("metrics")
266
+ or payload.get("run_metrics")
267
+ or {}
268
+ )
269
+ metrics: dict[str, Any] = raw_metrics if isinstance(raw_metrics, dict) else {}
270
+ thresholds_raw = payload.get("thresholds")
271
+ thresholds = cast(list[Any], thresholds_raw) if isinstance(thresholds_raw, list) else []
272
+ cloud_raw = config.get("cloud_evaluation")
273
+ cloud = cast(dict[str, Any], cloud_raw) if isinstance(cloud_raw, dict) else {}
274
+ comparison = payload.get("comparison")
275
+
276
+ passed = summary.get("overall_passed")
277
+ if passed is None:
278
+ passed = summary.get("run_pass")
279
+ if passed is None and isinstance(metrics, dict) and "run_pass" in metrics:
280
+ try:
281
+ passed = bool(float(metrics["run_pass"]))
282
+ except (TypeError, ValueError):
283
+ passed = None
284
+
285
+ runner = "agentops-cloud" if cloud else "agentops-local"
286
+ return {
287
+ "status": "ok",
288
+ "runner": runner,
289
+ "path": str(path),
290
+ "passed": passed,
291
+ "target": target.get("raw") or config.get("agent"),
292
+ "target_kind": target.get("kind"),
293
+ "started_at": payload.get("started_at") or payload.get("timestamp"),
294
+ "items_total": summary.get("items_total"),
295
+ "items_passed_all": summary.get("items_passed_all"),
296
+ "metrics": metrics if isinstance(metrics, dict) else {},
297
+ "threshold_count": len(thresholds),
298
+ "has_comparison": isinstance(comparison, dict),
299
+ "foundry_report_url": cloud.get("report_url"),
300
+ "cloud_evaluation": cloud,
301
+ "machine_readable_thresholds": True,
302
+ }
303
+
304
+
305
+ def _official_eval_status(root: Path) -> dict[str, Any]:
306
+ directory = root / ".agentops" / "official-eval"
307
+ metadata_path = directory / "metadata.json"
308
+ result_path = directory / "result.json"
309
+ if not metadata_path.exists():
310
+ return {"status": "missing", "metadata_path": str(metadata_path)}
311
+ try:
312
+ metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
313
+ except (OSError, json.JSONDecodeError) as exc:
314
+ return {"status": "invalid", "metadata_path": str(metadata_path), "error": str(exc)}
315
+ if not isinstance(metadata, dict):
316
+ return {"status": "invalid", "metadata_path": str(metadata_path), "error": "expected JSON object"}
317
+
318
+ result: dict[str, Any] = {}
319
+ result_error: str | None = None
320
+ if result_path.exists():
321
+ try:
322
+ loaded = json.loads(result_path.read_text(encoding="utf-8"))
323
+ if isinstance(loaded, dict):
324
+ result = loaded
325
+ else:
326
+ result_error = "expected JSON object"
327
+ except (OSError, json.JSONDecodeError) as exc:
328
+ result_error = str(exc)
329
+
330
+ passed = _official_eval_passed(result)
331
+ payload: dict[str, Any] = {
332
+ "status": "ok",
333
+ "runner": OFFICIAL_EVAL_RUNNER,
334
+ "path": str(metadata_path),
335
+ "metadata_path": str(metadata_path),
336
+ "result_path": str(result_path) if result_path.exists() else None,
337
+ "result_recorded": result_path.exists() and result_error is None,
338
+ "passed": passed,
339
+ "target": metadata.get("agent_ids"),
340
+ "target_kind": "foundry_prompt",
341
+ "deployment_name": metadata.get("deployment_name"),
342
+ "data_path": metadata.get("data_path"),
343
+ "items_total": metadata.get("items_total"),
344
+ "official_evaluators": metadata.get("official_evaluators") if isinstance(metadata.get("official_evaluators"), list) else [],
345
+ "skipped_agentops_evaluators": metadata.get("skipped_agentops_evaluators") if isinstance(metadata.get("skipped_agentops_evaluators"), list) else [],
346
+ "warnings": metadata.get("warnings") if isinstance(metadata.get("warnings"), list) else [],
347
+ "machine_readable_thresholds": False,
348
+ "has_comparison": False,
349
+ "action": metadata.get("action"),
350
+ "azure_devops_task": metadata.get("azure_devops_task"),
351
+ "result": result,
352
+ }
353
+ if result_error:
354
+ payload["result_error"] = result_error
355
+ return payload
356
+
357
+
358
+ def _workflow_status(root: Path) -> dict[str, Any]:
359
+ github = root / ".github" / "workflows"
360
+ ado = root / ".azuredevops" / "pipelines"
361
+ pr = (github / "agentops-pr.yml").exists() or (ado / "agentops-pr.yml").exists()
362
+ deploy_files = list(github.glob("agentops-deploy-*.yml")) + list(ado.glob("agentops-deploy-*.yml"))
363
+ scheduled_doctor = (
364
+ (github / "agentops-doctor.yml").exists()
365
+ or (ado / "agentops-doctor.yml").exists()
366
+ or (github / "agentops-watchdog.yml").exists()
367
+ or (ado / "agentops-watchdog.yml").exists()
368
+ )
369
+ return {
370
+ "pr_gate": pr,
371
+ "deploy_workflows": [str(p.relative_to(root)) for p in deploy_files],
372
+ "deploy_count": len(deploy_files),
373
+ "scheduled_doctor": scheduled_doctor,
374
+ # Legacy key kept so existing evidence consumers do not break.
375
+ "watchdog": scheduled_doctor,
376
+ "github_workflows": github.is_dir(),
377
+ "azure_devops_pipelines": ado.is_dir(),
378
+ }
379
+
380
+
381
+ def _doctor_status(analysis: Optional[AnalysisResult]) -> dict[str, Any]:
382
+ if analysis is None:
383
+ return {"status": "not_run"}
384
+ counts = {"critical": 0, "warning": 0, "info": 0}
385
+ for finding in analysis.findings:
386
+ counts[finding.severity.value] += 1
387
+ return {
388
+ "status": "ok",
389
+ "findings_total": len(analysis.findings),
390
+ "max_severity": analysis.max_severity.value if analysis.max_severity else None,
391
+ "counts": counts,
392
+ "top_findings": [
393
+ {
394
+ "id": f.id,
395
+ "severity": f.severity.value,
396
+ "title": f.title,
397
+ "category": f.category.value,
398
+ }
399
+ for f in analysis.findings[:10]
400
+ ],
401
+ }
402
+
403
+
404
+ def _foundry_status(analysis: Optional[AnalysisResult]) -> dict[str, Any]:
405
+ if analysis is None or analysis.foundry is None:
406
+ return {"status": "not_run"}
407
+ foundry = analysis.foundry
408
+ diag = dict(foundry.diagnostics or {})
409
+ enabled_rules = [r for r in foundry.evaluation_rules if r.enabled is not False]
410
+ return {
411
+ "status": diag.get("status", "unknown"),
412
+ "agents_count": len(foundry.agents),
413
+ "evaluation_rules_count": len(foundry.evaluation_rules),
414
+ "enabled_evaluation_rules": len(enabled_rules),
415
+ "diagnostics": diag,
416
+ }
417
+
418
+
419
+ def _monitoring_status(analysis: Optional[AnalysisResult]) -> dict[str, Any]:
420
+ if analysis is None or analysis.monitor is None:
421
+ return {"status": "not_run"}
422
+ monitor = analysis.monitor
423
+ return {
424
+ "status": (monitor.diagnostics or {}).get("status", "unknown"),
425
+ "request_count": monitor.request_count,
426
+ "error_rate": monitor.error_rate,
427
+ "p95_duration_seconds": monitor.p95_duration_seconds,
428
+ "input_token_count": monitor.input_token_count,
429
+ "output_token_count": monitor.output_token_count,
430
+ "rate_limit_429_count": monitor.rate_limit_429_count,
431
+ "diagnostics": dict(monitor.diagnostics or {}),
432
+ }
433
+
434
+
435
+ def _trace_dataset_status(root: Path) -> dict[str, Any]:
436
+ manifest = root / ".agentops" / "data" / "trace-regression-manifest.json"
437
+ if not manifest.exists():
438
+ return {"status": "missing", "manifest": str(manifest)}
439
+ try:
440
+ payload = json.loads(manifest.read_text(encoding="utf-8"))
441
+ except (OSError, json.JSONDecodeError) as exc:
442
+ return {"status": "invalid", "manifest": str(manifest), "error": str(exc)}
443
+ if not isinstance(payload, dict):
444
+ return {"status": "invalid", "manifest": str(manifest), "error": "expected JSON object"}
445
+ return {"status": "ok", "manifest": str(manifest), **payload}
446
+
447
+
448
+ def _ailz_status(analysis: Optional[AnalysisResult]) -> dict[str, Any]:
449
+ if analysis is None:
450
+ return {"status": "not_run"}
451
+ readiness = [f for f in analysis.findings if f.id == "opex.ailz_readiness"]
452
+ gaps = [f for f in analysis.findings if f.id == "opex.ailz_gaps"]
453
+ if not readiness and not gaps:
454
+ return {"status": "not_detected"}
455
+ return {
456
+ "status": "gaps" if gaps else "ready",
457
+ "readiness": [f.summary for f in readiness],
458
+ "gaps": [gap for f in gaps for gap in _as_list(f.evidence.get("gaps"))],
459
+ }
460
+
461
+
462
+ def _add_eval_check(
463
+ checks: list[ReleaseEvidenceCheck],
464
+ blockers: list[str],
465
+ warnings: list[str],
466
+ ready: list[str],
467
+ latest_eval: dict[str, Any],
468
+ ) -> None:
469
+ status = latest_eval.get("status")
470
+ if status != "ok":
471
+ message = "No latest evaluation result was found; run `agentops eval run` or the generated eval workflow before treating this agent as production-ready."
472
+ blockers.append(message)
473
+ checks.append(ReleaseEvidenceCheck(name="Latest eval gate", status="blocked", summary=message, evidence=latest_eval))
474
+ return
475
+ if latest_eval.get("runner") == OFFICIAL_EVAL_RUNNER:
476
+ if latest_eval.get("passed") is False:
477
+ message = "Official AI Agent Evaluation did not complete successfully; review the CI job before promotion."
478
+ blockers.append(message)
479
+ checks.append(ReleaseEvidenceCheck(name="Latest eval gate", status="blocked", summary=message, evidence=latest_eval))
480
+ return
481
+ if latest_eval.get("passed") is True:
482
+ message = (
483
+ "Official AI Agent Evaluation completed, but it does not emit "
484
+ "AgentOps-normalized threshold evidence; run `agentops eval run` "
485
+ "so release evidence can enforce quality thresholds."
486
+ )
487
+ blockers.append(message)
488
+ checks.append(ReleaseEvidenceCheck(name="Latest eval gate", status="blocked", summary=message, evidence=latest_eval))
489
+ return
490
+ message = (
491
+ "Official AI Agent Evaluation input is present, but no "
492
+ "AgentOps-normalized pass/fail result was recorded."
493
+ )
494
+ blockers.append(message)
495
+ checks.append(ReleaseEvidenceCheck(name="Latest eval gate", status="blocked", summary=message, evidence=latest_eval))
496
+ return
497
+ if latest_eval.get("passed") is False:
498
+ message = "Latest evaluation failed one or more thresholds."
499
+ blockers.append(message)
500
+ checks.append(ReleaseEvidenceCheck(name="Latest eval gate", status="blocked", summary=message, evidence=latest_eval))
501
+ return
502
+ if latest_eval.get("passed") is True:
503
+ message = "Latest evaluation passed configured thresholds."
504
+ ready.append(message)
505
+ checks.append(ReleaseEvidenceCheck(name="Latest eval gate", status="ready", summary=message, evidence=latest_eval))
506
+ return
507
+ message = "Latest evaluation exists, but pass/fail status could not be determined."
508
+ warnings.append(message)
509
+ checks.append(ReleaseEvidenceCheck(name="Latest eval gate", status="warning", summary=message, evidence=latest_eval))
510
+
511
+
512
+ def _add_threshold_check(
513
+ checks: list[ReleaseEvidenceCheck],
514
+ warnings: list[str],
515
+ ready: list[str],
516
+ root: Path,
517
+ latest_eval: dict[str, Any],
518
+ ) -> None:
519
+ config = _agentops_config(root)
520
+ thresholds = config.get("thresholds") if isinstance(config, dict) else None
521
+ if isinstance(thresholds, dict) and thresholds:
522
+ if latest_eval.get("runner") == OFFICIAL_EVAL_RUNNER:
523
+ message = (
524
+ "agentops.yaml declares thresholds, but the official AI Agent Evaluation "
525
+ "runner does not emit AgentOps-normalized threshold evidence yet."
526
+ )
527
+ warnings.append(message)
528
+ checks.append(ReleaseEvidenceCheck(name="Threshold policy", status="warning", summary=message, evidence={"thresholds": list(thresholds)}))
529
+ return
530
+ message = "Explicit production thresholds are declared in agentops.yaml."
531
+ ready.append(message)
532
+ checks.append(ReleaseEvidenceCheck(name="Threshold policy", status="ready", summary=message, evidence={"thresholds": list(thresholds)}))
533
+ return
534
+ message = "No explicit thresholds are declared; defaults are useful for exploration but weak for production gates."
535
+ warnings.append(message)
536
+ checks.append(ReleaseEvidenceCheck(name="Threshold policy", status="warning", summary=message))
537
+
538
+
539
+ def _add_baseline_check(
540
+ checks: list[ReleaseEvidenceCheck],
541
+ warnings: list[str],
542
+ ready: list[str],
543
+ root: Path,
544
+ latest_eval: dict[str, Any],
545
+ ) -> None:
546
+ has_baseline = (root / ".agentops" / "baseline" / "results.json").exists() or bool(latest_eval.get("has_comparison"))
547
+ if has_baseline:
548
+ message = "A baseline or comparison is available for regression decisions."
549
+ ready.append(message)
550
+ checks.append(ReleaseEvidenceCheck(name="Regression baseline", status="ready", summary=message))
551
+ return
552
+ message = "No baseline comparison was found; capture a known-good results.json before promoting production releases."
553
+ warnings.append(message)
554
+ checks.append(ReleaseEvidenceCheck(name="Regression baseline", status="warning", summary=message))
555
+
556
+
557
+ def _add_workflow_checks(
558
+ checks: list[ReleaseEvidenceCheck],
559
+ warnings: list[str],
560
+ ready: list[str],
561
+ workflows: dict[str, Any],
562
+ ) -> None:
563
+ if workflows.get("pr_gate"):
564
+ message = "AgentOps PR gate workflow is present."
565
+ ready.append(message)
566
+ checks.append(ReleaseEvidenceCheck(name="PR gate", status="ready", summary=message, evidence=workflows))
567
+ else:
568
+ message = "No AgentOps PR gate workflow was found."
569
+ warnings.append(message)
570
+ checks.append(ReleaseEvidenceCheck(name="PR gate", status="warning", summary=message, evidence=workflows))
571
+
572
+ if int(workflows.get("deploy_count") or 0) > 0:
573
+ message = "Environment deploy workflows are present."
574
+ ready.append(message)
575
+ checks.append(ReleaseEvidenceCheck(name="Deploy workflows", status="ready", summary=message, evidence=workflows))
576
+ else:
577
+ message = "No AgentOps deploy workflow was found for dev/qa/prod promotion."
578
+ warnings.append(message)
579
+ checks.append(ReleaseEvidenceCheck(name="Deploy workflows", status="warning", summary=message, evidence=workflows))
580
+
581
+ if workflows.get("scheduled_doctor") or workflows.get("watchdog"):
582
+ ready.append("Optional scheduled AgentOps Doctor workflow is present.")
583
+
584
+
585
+ def _add_doctor_check(
586
+ checks: list[ReleaseEvidenceCheck],
587
+ blockers: list[str],
588
+ warnings: list[str],
589
+ ready: list[str],
590
+ doctor: dict[str, Any],
591
+ ) -> None:
592
+ if doctor.get("status") != "ok":
593
+ message = "Doctor was not run for this evidence pack."
594
+ warnings.append(message)
595
+ checks.append(ReleaseEvidenceCheck(name="Doctor readiness", status="warning", summary=message))
596
+ return
597
+ max_severity = doctor.get("max_severity")
598
+ if max_severity == Severity.CRITICAL.value:
599
+ message = "Doctor reported critical findings."
600
+ blockers.append(message)
601
+ checks.append(ReleaseEvidenceCheck(name="Doctor readiness", status="blocked", summary=message, evidence=doctor))
602
+ elif max_severity == Severity.WARNING.value:
603
+ message = "Doctor reported warnings that should be reviewed before production."
604
+ warnings.append(message)
605
+ checks.append(ReleaseEvidenceCheck(name="Doctor readiness", status="warning", summary=message, evidence=doctor))
606
+ else:
607
+ message = "Doctor reported no blocking or warning findings."
608
+ ready.append(message)
609
+ checks.append(ReleaseEvidenceCheck(name="Doctor readiness", status="ready", summary=message, evidence=doctor))
610
+
611
+
612
+ def _add_foundry_check(
613
+ checks: list[ReleaseEvidenceCheck],
614
+ warnings: list[str],
615
+ ready: list[str],
616
+ foundry: dict[str, Any],
617
+ ) -> None:
618
+ if foundry.get("status") == "ok":
619
+ ready.append("Foundry control-plane source is reachable.")
620
+ if int(foundry.get("enabled_evaluation_rules") or 0) > 0:
621
+ ready.append("Foundry continuous evaluation rules are enabled.")
622
+ checks.append(ReleaseEvidenceCheck(name="Foundry continuous evaluation", status="ready", summary="Foundry continuous evaluation rules are enabled.", evidence=foundry))
623
+ else:
624
+ message = "Foundry control-plane is reachable, but no enabled continuous evaluation rule was detected."
625
+ warnings.append(message)
626
+ checks.append(ReleaseEvidenceCheck(name="Foundry continuous evaluation", status="warning", summary=message, evidence=foundry))
627
+ return
628
+ message = "Foundry control-plane readiness is unknown; configure `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT` for production evidence."
629
+ warnings.append(message)
630
+ checks.append(ReleaseEvidenceCheck(name="Foundry control plane", status="warning", summary=message, evidence=foundry))
631
+
632
+
633
+ def _add_monitoring_check(
634
+ checks: list[ReleaseEvidenceCheck],
635
+ warnings: list[str],
636
+ ready: list[str],
637
+ monitoring: dict[str, Any],
638
+ ) -> None:
639
+ if monitoring.get("status") == "ok":
640
+ ready.append("Application Insights / Azure Monitor source is reachable.")
641
+ checks.append(ReleaseEvidenceCheck(name="Runtime monitoring", status="ready", summary="Application Insights / Azure Monitor source is reachable.", evidence=monitoring))
642
+ return
643
+ message = "Application Insights / Azure Monitor readiness is unknown; production traces and runtime metrics may not be available."
644
+ warnings.append(message)
645
+ checks.append(ReleaseEvidenceCheck(name="Runtime monitoring", status="warning", summary=message, evidence=monitoring))
646
+
647
+
648
+ def _add_trace_dataset_check(
649
+ checks: list[ReleaseEvidenceCheck],
650
+ warnings: list[str],
651
+ ready: list[str],
652
+ trace_dataset: dict[str, Any],
653
+ ) -> None:
654
+ if trace_dataset.get("status") == "ok":
655
+ rows = trace_dataset.get("rows")
656
+ message = f"Production trace regression dataset is available ({rows} row(s))."
657
+ ready.append(message)
658
+ checks.append(ReleaseEvidenceCheck(name="Trace-to-dataset flywheel", status="ready", summary=message, evidence=trace_dataset))
659
+ return
660
+ message = "No production trace regression dataset was found yet; harvest reviewed traces to turn production issues into regression tests."
661
+ warnings.append(message)
662
+ checks.append(ReleaseEvidenceCheck(name="Trace-to-dataset flywheel", status="warning", summary=message, evidence=trace_dataset))
663
+
664
+
665
+ def _add_ailz_check(
666
+ checks: list[ReleaseEvidenceCheck],
667
+ warnings: list[str],
668
+ ready: list[str],
669
+ ailz: dict[str, Any],
670
+ ) -> None:
671
+ status = ailz.get("status")
672
+ if status == "not_detected":
673
+ checks.append(ReleaseEvidenceCheck(name="AI Landing Zone readiness", status="unknown", summary="No AI Landing Zone signals were detected for this workspace.", evidence=ailz))
674
+ elif status == "ready":
675
+ message = "AI Landing Zone readiness signals are wired."
676
+ ready.append(message)
677
+ checks.append(ReleaseEvidenceCheck(name="AI Landing Zone readiness", status="ready", summary=message, evidence=ailz))
678
+ elif status == "gaps":
679
+ message = "AI Landing Zone signals were detected, but gaps remain."
680
+ warnings.append(message)
681
+ checks.append(ReleaseEvidenceCheck(name="AI Landing Zone readiness", status="warning", summary=message, evidence=ailz))
682
+ else:
683
+ checks.append(ReleaseEvidenceCheck(name="AI Landing Zone readiness", status="unknown", summary="AI Landing Zone readiness was not evaluated.", evidence=ailz))
684
+
685
+
686
+ def _links(latest_eval: dict[str, Any]) -> list[ReleaseEvidenceLink]:
687
+ links: list[ReleaseEvidenceLink] = []
688
+ report_url = latest_eval.get("foundry_report_url")
689
+ if report_url:
690
+ links.append(ReleaseEvidenceLink(label="Foundry evaluation report", url=str(report_url)))
691
+ return links
692
+
693
+
694
+ def _official_eval_passed(result: dict[str, Any]) -> Optional[bool]:
695
+ for key in ("status", "outcome", "conclusion", "job_status"):
696
+ value = result.get(key)
697
+ if value is None:
698
+ continue
699
+ normalized = str(value).strip().lower().replace("_", "").replace("-", "")
700
+ if normalized in {"success", "succeeded"}:
701
+ return True
702
+ if normalized in {"failure", "failed", "cancelled", "canceled", "timedout"}:
703
+ return False
704
+ return None
705
+
706
+
707
+ def _is_eval_available(eval_status: dict[str, Any]) -> bool:
708
+ return eval_status.get("status") == "ok"
709
+
710
+
711
+ def _evidence_mtime(eval_status: dict[str, Any]) -> float:
712
+ for key in ("result_path", "metadata_path", "path"):
713
+ value = eval_status.get(key)
714
+ if not value:
715
+ continue
716
+ path = Path(str(value))
717
+ try:
718
+ return path.stat().st_mtime
719
+ except OSError:
720
+ continue
721
+ return 0.0
722
+
723
+
724
+ def _agentops_config(root: Path) -> dict[str, Any]:
725
+ path = root / "agentops.yaml"
726
+ if not path.exists():
727
+ return {}
728
+ try:
729
+ data = load_yaml(path)
730
+ except Exception:
731
+ return {}
732
+ return data if isinstance(data, dict) else {}
733
+
734
+
735
+ def _as_list(value: Any) -> list[Any]:
736
+ return value if isinstance(value, list) else []
737
+
738
+
739
+ def _cell(text: str) -> str:
740
+ return str(text).replace("|", "\\|").replace("\n", " ")
741
+
742
+
743
+ def _redact_text(text: str) -> str:
744
+ out = text
745
+ for pattern, replacement in _SECRET_PATTERNS:
746
+ out = pattern.sub(replacement, out)
747
+ return out
748
+
749
+
750
+ def _redact_obj(value: Any) -> Any:
751
+ if isinstance(value, str):
752
+ return _redact_text(value)
753
+ if isinstance(value, list):
754
+ return [_redact_obj(item) for item in value]
755
+ if isinstance(value, dict):
756
+ return {key: _redact_obj(item) for key, item in value.items()}
757
+ return value