agentops-accelerator 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. agentops/__init__.py +10 -0
  2. agentops/__main__.py +6 -0
  3. agentops/agent/__init__.py +12 -0
  4. agentops/agent/_legacy_ids.py +92 -0
  5. agentops/agent/analyzer.py +207 -0
  6. agentops/agent/checks/__init__.py +1 -0
  7. agentops/agent/checks/catalog.py +880 -0
  8. agentops/agent/checks/errors.py +279 -0
  9. agentops/agent/checks/foundry_config.py +75 -0
  10. agentops/agent/checks/latency.py +84 -0
  11. agentops/agent/checks/opex.py +157 -0
  12. agentops/agent/checks/opex_workspace.py +874 -0
  13. agentops/agent/checks/posture.py +36 -0
  14. agentops/agent/checks/posture_rules/__init__.py +53 -0
  15. agentops/agent/checks/posture_rules/content_filter.py +59 -0
  16. agentops/agent/checks/posture_rules/diagnostics.py +74 -0
  17. agentops/agent/checks/posture_rules/local_auth.py +55 -0
  18. agentops/agent/checks/posture_rules/managed_identity.py +59 -0
  19. agentops/agent/checks/posture_rules/network.py +68 -0
  20. agentops/agent/checks/regression.py +78 -0
  21. agentops/agent/checks/release_readiness.py +182 -0
  22. agentops/agent/checks/safety.py +247 -0
  23. agentops/agent/checks/spec_conformance.py +375 -0
  24. agentops/agent/cockpit.py +5159 -0
  25. agentops/agent/config.py +240 -0
  26. agentops/agent/findings.py +113 -0
  27. agentops/agent/history.py +142 -0
  28. agentops/agent/knowledge/__init__.py +182 -0
  29. agentops/agent/knowledge/waf-checklist.csv +39 -0
  30. agentops/agent/llm_assist/__init__.py +16 -0
  31. agentops/agent/llm_assist/_base.py +124 -0
  32. agentops/agent/llm_assist/_bundle_rule.py +154 -0
  33. agentops/agent/llm_assist/_client.py +347 -0
  34. agentops/agent/llm_assist/_dataset_rules.py +191 -0
  35. agentops/agent/llm_assist/_engine.py +106 -0
  36. agentops/agent/llm_assist/_prompt_rules.py +291 -0
  37. agentops/agent/llm_assist/_spec_rules.py +235 -0
  38. agentops/agent/production_telemetry.py +430 -0
  39. agentops/agent/report.py +207 -0
  40. agentops/agent/server/__init__.py +1 -0
  41. agentops/agent/server/app.py +84 -0
  42. agentops/agent/server/auth.py +94 -0
  43. agentops/agent/server/chat.py +44 -0
  44. agentops/agent/server/protocol.py +72 -0
  45. agentops/agent/sources/__init__.py +1 -0
  46. agentops/agent/sources/azure_monitor.py +523 -0
  47. agentops/agent/sources/azure_resources.py +602 -0
  48. agentops/agent/sources/foundry_control.py +174 -0
  49. agentops/agent/sources/results_history.py +494 -0
  50. agentops/agent/sources/spec_detectors/__init__.py +42 -0
  51. agentops/agent/sources/spec_detectors/_base.py +58 -0
  52. agentops/agent/sources/spec_detectors/agents_md.py +75 -0
  53. agentops/agent/sources/spec_detectors/spec_kit.py +172 -0
  54. agentops/agent/time_range.py +117 -0
  55. agentops/cli/__init__.py +1 -0
  56. agentops/cli/app.py +4823 -0
  57. agentops/core/__init__.py +1 -0
  58. agentops/core/agentops_config.py +592 -0
  59. agentops/core/config_loader.py +22 -0
  60. agentops/core/evaluators.py +480 -0
  61. agentops/core/release_evidence.py +56 -0
  62. agentops/core/results.py +117 -0
  63. agentops/mcp/__init__.py +10 -0
  64. agentops/mcp/server.py +232 -0
  65. agentops/pipeline/__init__.py +8 -0
  66. agentops/pipeline/cloud_results.py +189 -0
  67. agentops/pipeline/cloud_runner.py +901 -0
  68. agentops/pipeline/comparison.py +108 -0
  69. agentops/pipeline/diagnostics.py +51 -0
  70. agentops/pipeline/invocations.py +535 -0
  71. agentops/pipeline/official_eval.py +414 -0
  72. agentops/pipeline/orchestrator.py +775 -0
  73. agentops/pipeline/prompt_deploy.py +377 -0
  74. agentops/pipeline/publisher.py +121 -0
  75. agentops/pipeline/reporter.py +202 -0
  76. agentops/pipeline/runtime.py +409 -0
  77. agentops/pipeline/thresholds.py +84 -0
  78. agentops/services/__init__.py +1 -0
  79. agentops/services/cicd.py +720 -0
  80. agentops/services/eval_analysis.py +848 -0
  81. agentops/services/evidence_pack.py +757 -0
  82. agentops/services/initializer.py +86 -0
  83. agentops/services/preflight.py +470 -0
  84. agentops/services/setup_wizard.py +709 -0
  85. agentops/services/skills.py +643 -0
  86. agentops/services/trace_promotion.py +300 -0
  87. agentops/services/workflow_analysis.py +1129 -0
  88. agentops/templates/.gitignore +15 -0
  89. agentops/templates/__init__.py +1 -0
  90. agentops/templates/agent-server/Dockerfile +23 -0
  91. agentops/templates/agent-server/README.md +61 -0
  92. agentops/templates/agent-server/main.bicep +94 -0
  93. agentops/templates/agent.yaml +87 -0
  94. agentops/templates/agentops.yaml +58 -0
  95. agentops/templates/foundry.svg +71 -0
  96. agentops/templates/icon.png +0 -0
  97. agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +118 -0
  98. agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +73 -0
  99. agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +141 -0
  100. agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +94 -0
  101. agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +167 -0
  102. agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +118 -0
  103. agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +68 -0
  104. agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +210 -0
  105. agentops/templates/pipelines/azuredevops/agentops-pr.yml +155 -0
  106. agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +106 -0
  107. agentops/templates/project.gitignore +36 -0
  108. agentops/templates/sample-traces.jsonl +3 -0
  109. agentops/templates/skills/agentops-agent/SKILL.md +137 -0
  110. agentops/templates/skills/agentops-config/SKILL.md +113 -0
  111. agentops/templates/skills/agentops-dataset/SKILL.md +84 -0
  112. agentops/templates/skills/agentops-eval/SKILL.md +189 -0
  113. agentops/templates/skills/agentops-report/SKILL.md +71 -0
  114. agentops/templates/skills/agentops-workflow/SKILL.md +471 -0
  115. agentops/templates/smoke.jsonl +3 -0
  116. agentops/templates/waf-checklist.README.md +84 -0
  117. agentops/templates/waf-checklist.csv +22 -0
  118. agentops/templates/workflows/agentops-deploy-dev-azd.yml +166 -0
  119. agentops/templates/workflows/agentops-deploy-dev.yml +187 -0
  120. agentops/templates/workflows/agentops-deploy-prod-azd.yml +183 -0
  121. agentops/templates/workflows/agentops-deploy-prod.yml +171 -0
  122. agentops/templates/workflows/agentops-deploy-prompt-agent.yml +197 -0
  123. agentops/templates/workflows/agentops-deploy-qa-azd.yml +156 -0
  124. agentops/templates/workflows/agentops-deploy-qa.yml +145 -0
  125. agentops/templates/workflows/agentops-pr-prompt-agent.yml +210 -0
  126. agentops/templates/workflows/agentops-pr.yml +148 -0
  127. agentops/templates/workflows/agentops-watchdog.yml +122 -0
  128. agentops/utils/__init__.py +1 -0
  129. agentops/utils/azd_env.py +435 -0
  130. agentops/utils/azure_endpoints.py +62 -0
  131. agentops/utils/colors.py +47 -0
  132. agentops/utils/dotenv_loader.py +105 -0
  133. agentops/utils/foundry_discovery.py +229 -0
  134. agentops/utils/logging.py +59 -0
  135. agentops/utils/telemetry.py +554 -0
  136. agentops/utils/yaml.py +36 -0
  137. agentops_accelerator-0.3.0.dist-info/METADATA +278 -0
  138. agentops_accelerator-0.3.0.dist-info/RECORD +142 -0
  139. agentops_accelerator-0.3.0.dist-info/WHEEL +5 -0
  140. agentops_accelerator-0.3.0.dist-info/entry_points.txt +2 -0
  141. agentops_accelerator-0.3.0.dist-info/licenses/LICENSE +21 -0
  142. agentops_accelerator-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,874 @@
1
+ """Workspace hygiene check for the Operational Excellence pillar.
2
+
3
+ These rules read the eval workspace (``agentops.yaml`` + ``.agentops/``
4
+ + ``.github/workflows/``) and flag operational-excellence gaps that
5
+ aren't covered by Foundry's Operate -> Compliance surface. Examples:
6
+
7
+ * Agent string isn't pinned to a version (``my-agent`` instead of
8
+ ``my-agent:3``).
9
+ * ``agentops.yaml`` ships with no ``thresholds:`` block - the gate is
10
+ loose and depends entirely on auto-defaults.
11
+ * Repo has no ``agentops-pr.yml`` CI gate.
12
+
13
+ Findings live under :class:`Category.OPERATIONAL_EXCELLENCE` with the
14
+ ``opex.*`` id prefix and default to ``warning`` severity unless
15
+ explicitly elevated. The companion time-based rules
16
+ (``opex.stale_evaluation``, ``opex.flaky_metric``) live in
17
+ :mod:`agentops.agent.checks.opex`.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import re
24
+ from pathlib import Path
25
+ from typing import Any, Dict, List, Optional
26
+
27
+ import yaml
28
+
29
+ from agentops.agent.findings import Category, Finding, Severity
30
+
31
+ SOURCE_NAME = "opex_workspace"
32
+
33
+
34
+ def run_opex_workspace_check(workspace: Path) -> List[Finding]:
35
+ """Run all MLOps hygiene rules against ``workspace`` and return findings.
36
+
37
+ Each rule is independent and defensive: anything it can't read is
38
+ silently skipped so the watchdog stays useful on partial setups.
39
+ """
40
+ findings: List[Finding] = []
41
+
42
+ config_path = workspace / "agentops.yaml"
43
+ config_data = _safe_load_yaml(config_path)
44
+
45
+ findings.extend(_check_agent_pinning(config_data))
46
+ findings.extend(_check_thresholds_block(config_data))
47
+ findings.extend(_check_pr_gate_workflow(workspace))
48
+ findings.extend(_check_deploy_gate_workflow(workspace))
49
+ findings.extend(_check_results_gitignored(workspace))
50
+ findings.extend(_check_dataset_versioning(workspace))
51
+ findings.extend(_check_bundle_versioning(workspace))
52
+ findings.extend(_check_results_dir_bloat(workspace))
53
+ findings.extend(_check_workflow_concurrency(workspace))
54
+ findings.extend(_check_workflow_sha_pinning(workspace))
55
+ findings.extend(_check_max_tokens_limit(workspace))
56
+ findings.extend(_check_ailz_readiness(workspace))
57
+
58
+ return findings
59
+
60
+
61
+ def _check_agent_pinning(config: Optional[dict]) -> List[Finding]:
62
+ """Warn when `agent:` is not pinned to a `:version` (foundry agent)
63
+ or to an explicit URL/model identifier."""
64
+ if not isinstance(config, dict):
65
+ return []
66
+ agent = config.get("agent")
67
+ if not isinstance(agent, str) or not agent.strip():
68
+ return []
69
+
70
+ # URL targets and model: targets are inherently pinned.
71
+ if agent.startswith("http://") or agent.startswith("https://"):
72
+ return []
73
+ if agent.lower().startswith("model:"):
74
+ return []
75
+
76
+ # For "name:version" - verify the part after ':' is non-empty and
77
+ # not the literal "latest" alias.
78
+ if ":" in agent:
79
+ _, _, version = agent.partition(":")
80
+ version = version.strip().lower()
81
+ if version and version != "latest":
82
+ return []
83
+
84
+ return [
85
+ Finding(
86
+ id="opex.unpinned_agent",
87
+ severity=Severity.WARNING,
88
+ category=Category.OPERATIONAL_EXCELLENCE,
89
+ title="Agent target is not pinned to a version",
90
+ summary=(
91
+ f"`agent: {agent}` has no explicit version. CI runs will "
92
+ "track whatever 'latest' resolves to, so a Foundry edit "
93
+ "to the agent can change eval results without a code "
94
+ "change in this repo."
95
+ ),
96
+ recommendation=(
97
+ "Pin the agent to a published version (for example "
98
+ "`agent: my-agent:3`). Bump the suffix deliberately when "
99
+ "you publish a new version in Foundry."
100
+ ),
101
+ source=SOURCE_NAME,
102
+ evidence={"agent": agent},
103
+ )
104
+ ]
105
+
106
+
107
+ def _check_thresholds_block(config: Optional[dict]) -> List[Finding]:
108
+ """Warn when `thresholds:` is absent or empty - auto-defaults are
109
+ fine for exploration but loose for prod gates."""
110
+ if not isinstance(config, dict):
111
+ return []
112
+ thresholds = config.get("thresholds")
113
+ if isinstance(thresholds, dict) and thresholds:
114
+ return []
115
+ return [
116
+ Finding(
117
+ id="opex.no_thresholds",
118
+ severity=Severity.WARNING,
119
+ category=Category.OPERATIONAL_EXCELLENCE,
120
+ title="agentops.yaml has no explicit thresholds",
121
+ summary=(
122
+ "Without a `thresholds:` block, AgentOps relies entirely "
123
+ "on auto-defaults to decide whether a run passes or "
124
+ "fails. That is fine for exploration but too loose for a "
125
+ "merge gate."
126
+ ),
127
+ recommendation=(
128
+ "Add a `thresholds:` map to `agentops.yaml` listing the "
129
+ "specific metric floors/ceilings your team agrees on "
130
+ "(e.g. `coherence: \">=3\"`, `avg_latency_seconds: "
131
+ "\"<=30\"`)."
132
+ ),
133
+ source=SOURCE_NAME,
134
+ )
135
+ ]
136
+
137
+
138
+ def _check_pr_gate_workflow(workspace: Path) -> List[Finding]:
139
+ """Warn when the repo has no `agentops-pr.yml` CI gate."""
140
+ candidate = workspace / ".github" / "workflows" / "agentops-pr.yml"
141
+ if candidate.exists():
142
+ return []
143
+ # If there's no .github/workflows directory at all, the repo may not
144
+ # be a CI-driven project - only warn when there *is* a workflows dir
145
+ # so we don't pester e.g. local-only sandboxes.
146
+ if not (workspace / ".github" / "workflows").is_dir():
147
+ return []
148
+ return [
149
+ Finding(
150
+ id="opex.no_pr_gate",
151
+ severity=Severity.WARNING,
152
+ category=Category.OPERATIONAL_EXCELLENCE,
153
+ title="Repository has no AgentOps PR gate",
154
+ summary=(
155
+ "There is a `.github/workflows/` directory but no "
156
+ "`agentops-pr.yml`. PRs can merge without running an "
157
+ "AgentOps evaluation, so quality regressions slip "
158
+ "through unchecked."
159
+ ),
160
+ recommendation=(
161
+ "Run `agentops workflow generate` to scaffold the PR "
162
+ "gate + deploy templates, commit the result, and require "
163
+ "the AgentOps PR check on your default branch under "
164
+ "Settings -> Branches."
165
+ ),
166
+ source=SOURCE_NAME,
167
+ )
168
+ ]
169
+
170
+
171
+ def _check_deploy_gate_workflow(workspace: Path) -> List[Finding]:
172
+ """Warn when the repo has a PR gate but no `agentops-deploy-*.yml`.
173
+
174
+ The PR gate alone protects merges; deploy workflows are what
175
+ actually run an eval against the promoted environment (dev / qa /
176
+ prod). Their absence means CI exercises evals on the PR branch but
177
+ never re-verifies after deployment.
178
+ """
179
+ workflows_dir = workspace / ".github" / "workflows"
180
+ if not workflows_dir.is_dir():
181
+ return []
182
+
183
+ deploy_files = list(workflows_dir.glob("agentops-deploy-*.yml")) + list(
184
+ workflows_dir.glob("agentops-deploy-*.yaml")
185
+ )
186
+ if deploy_files:
187
+ return []
188
+
189
+ # Only complain when there's a PR gate - otherwise the repo isn't
190
+ # opted into AgentOps CI at all and `no_pr_gate` already covers it.
191
+ if not (workflows_dir / "agentops-pr.yml").exists():
192
+ return []
193
+
194
+ return [
195
+ Finding(
196
+ id="opex.no_deploy_workflow",
197
+ severity=Severity.WARNING,
198
+ category=Category.OPERATIONAL_EXCELLENCE,
199
+ title="Repository has a PR gate but no deploy workflow",
200
+ summary=(
201
+ "`.github/workflows/` ships `agentops-pr.yml` but no "
202
+ "`agentops-deploy-*.yml`. CI runs evals on PR branches "
203
+ "but never re-validates the agent against its real "
204
+ "(dev / qa / prod) endpoint after deployment."
205
+ ),
206
+ recommendation=(
207
+ "Run `agentops workflow generate` (it scaffolds deploy "
208
+ "workflows for dev, qa, and prod), commit the result, "
209
+ "and wire the matching OIDC federated credentials in "
210
+ "Azure."
211
+ ),
212
+ source=SOURCE_NAME,
213
+ )
214
+ ]
215
+
216
+
217
+ def _check_results_gitignored(workspace: Path) -> List[Finding]:
218
+ """Warn when `.agentops/results/` is not in any reachable .gitignore.
219
+
220
+ Committing run artefacts is a real footgun: results.json and
221
+ backend_metrics.json can carry verbatim prompts and model outputs,
222
+ which is fine for short-lived evidence but can leak PII when pushed
223
+ to a shared remote. `agentops init` ships a `.agentops/.gitignore`
224
+ pre-populated with `results/`; this rule flags when that has been
225
+ removed or never existed.
226
+ """
227
+ candidates = [
228
+ workspace / ".gitignore",
229
+ workspace / ".agentops" / ".gitignore",
230
+ ]
231
+ for path in candidates:
232
+ if not path.exists():
233
+ continue
234
+ try:
235
+ text = path.read_text(encoding="utf-8")
236
+ except OSError:
237
+ continue
238
+ for line in text.splitlines():
239
+ stripped = line.strip()
240
+ if not stripped or stripped.startswith("#"):
241
+ continue
242
+ # Accept either the workspace-relative or repo-relative
243
+ # spelling. Trailing `/` is optional.
244
+ normalized = stripped.rstrip("/")
245
+ if normalized in {
246
+ ".agentops/results",
247
+ "results",
248
+ "/results",
249
+ ".agentops/results/*",
250
+ }:
251
+ return []
252
+
253
+ # Only warn when there *is* a results directory to protect - empty
254
+ # repos don't need the noise.
255
+ if not (workspace / ".agentops" / "results").is_dir():
256
+ return []
257
+
258
+ return [
259
+ Finding(
260
+ id="opex.results_not_gitignored",
261
+ severity=Severity.WARNING,
262
+ category=Category.OPERATIONAL_EXCELLENCE,
263
+ title="Eval results are not gitignored",
264
+ summary=(
265
+ "`.agentops/results/` exists but no reachable "
266
+ "`.gitignore` excludes it. Committing run artefacts "
267
+ "(prompts, model outputs, evidence) to git risks "
268
+ "leaking PII the next time the repo is pushed."
269
+ ),
270
+ recommendation=(
271
+ "Add `results/` to `.agentops/.gitignore` (or "
272
+ "`.agentops/results/` to the repo `.gitignore`). "
273
+ "`agentops init` scaffolds this for you on new "
274
+ "workspaces."
275
+ ),
276
+ source=SOURCE_NAME,
277
+ )
278
+ ]
279
+
280
+
281
+ def _check_dataset_versioning(workspace: Path) -> List[Finding]:
282
+ """Warn when dataset YAML files lack a top-level ``version`` field.
283
+
284
+ Without a version, edits to a dataset silently change the meaning
285
+ of historical eval runs - a quality regression report can be
286
+ invalidated by an out-of-band dataset edit no one noticed.
287
+ """
288
+ datasets_dir = workspace / ".agentops" / "datasets"
289
+ if not datasets_dir.is_dir():
290
+ return []
291
+
292
+ unversioned: List[str] = []
293
+ for path in sorted(datasets_dir.glob("*.yaml")):
294
+ data = _safe_load_yaml(path)
295
+ if data is None:
296
+ # Unreadable / non-dict YAML: treat as unversioned so the
297
+ # finding nudges the user to clean it up.
298
+ unversioned.append(path.name)
299
+ continue
300
+ if "version" not in data:
301
+ unversioned.append(path.name)
302
+
303
+ if not unversioned:
304
+ return []
305
+
306
+ return [
307
+ Finding(
308
+ id="opex.unversioned_dataset",
309
+ severity=Severity.WARNING,
310
+ category=Category.OPERATIONAL_EXCELLENCE,
311
+ title="Dataset YAML files are missing a `version` field",
312
+ summary=(
313
+ f"{len(unversioned)} dataset YAML file(s) in "
314
+ "`.agentops/datasets/` lack a top-level `version:` "
315
+ "field. Edits to the dataset will silently change the "
316
+ "meaning of historical eval runs."
317
+ ),
318
+ recommendation=(
319
+ "Add `version: 1` (or a higher integer when you change "
320
+ "the dataset) to each dataset YAML. Bump the version "
321
+ "whenever you edit the underlying JSONL rows so "
322
+ "regression comparisons remain meaningful."
323
+ ),
324
+ source=SOURCE_NAME,
325
+ evidence={"files": unversioned},
326
+ )
327
+ ]
328
+
329
+
330
+ def _check_bundle_versioning(workspace: Path) -> List[Finding]:
331
+ """Warn when bundle YAML files lack a top-level ``version`` field.
332
+
333
+ Bundles encode evaluator + threshold policy. Editing the policy
334
+ without bumping a version invalidates comparisons across historical
335
+ runs in exactly the same way an un-versioned dataset edit does.
336
+ """
337
+ bundles_dir = workspace / ".agentops" / "bundles"
338
+ if not bundles_dir.is_dir():
339
+ return []
340
+
341
+ unversioned: List[str] = []
342
+ for path in sorted(bundles_dir.glob("*.yaml")):
343
+ data = _safe_load_yaml(path)
344
+ if data is None:
345
+ unversioned.append(path.name)
346
+ continue
347
+ if "version" not in data:
348
+ unversioned.append(path.name)
349
+
350
+ if not unversioned:
351
+ return []
352
+
353
+ return [
354
+ Finding(
355
+ id="opex.unversioned_bundle",
356
+ severity=Severity.WARNING,
357
+ category=Category.OPERATIONAL_EXCELLENCE,
358
+ title="Bundle YAML files are missing a `version` field",
359
+ summary=(
360
+ f"{len(unversioned)} bundle YAML file(s) in "
361
+ "`.agentops/bundles/` lack a top-level `version:` "
362
+ "field. Edits to evaluators or thresholds will "
363
+ "silently change the meaning of historical eval runs."
364
+ ),
365
+ recommendation=(
366
+ "Add `version: 1` (or higher) to each bundle YAML. "
367
+ "Bump the version whenever you change evaluators or "
368
+ "thresholds so regression comparisons remain "
369
+ "meaningful."
370
+ ),
371
+ source=SOURCE_NAME,
372
+ evidence={"files": unversioned},
373
+ )
374
+ ]
375
+
376
+
377
+ def _check_results_dir_bloat(workspace: Path) -> List[Finding]:
378
+ """Warn when ``.agentops/results/`` has grown past a healthy size.
379
+
380
+ Results directories grow unboundedly by design - every CI run
381
+ writes a new timestamped folder. Past ~50 runs that's just clutter,
382
+ inflates clone times, and makes the cockpit slow. The fix is
383
+ either archival (move old runs to blob storage) or a rotation
384
+ policy in CI.
385
+ """
386
+ results_dir = workspace / ".agentops" / "results"
387
+ if not results_dir.is_dir():
388
+ return []
389
+
390
+ run_dirs = [
391
+ d
392
+ for d in results_dir.iterdir()
393
+ if d.is_dir() and d.name != "latest"
394
+ ]
395
+ threshold = 50
396
+ if len(run_dirs) <= threshold:
397
+ return []
398
+
399
+ severity = (
400
+ Severity.CRITICAL if len(run_dirs) >= threshold * 4 else Severity.WARNING
401
+ )
402
+ return [
403
+ Finding(
404
+ id="opex.results_dir_bloat",
405
+ severity=severity,
406
+ category=Category.OPERATIONAL_EXCELLENCE,
407
+ title="Eval results directory is bloated",
408
+ summary=(
409
+ f"`.agentops/results/` holds {len(run_dirs)} run "
410
+ f"folders (threshold: {threshold}). Past this point "
411
+ "the directory mostly clutters clones, slows the "
412
+ "cockpit, and obscures the runs that actually "
413
+ "matter."
414
+ ),
415
+ recommendation=(
416
+ "Archive old runs (e.g. upload to blob storage) or "
417
+ "add a retention step to CI that prunes runs older "
418
+ "than your chosen window. The `latest/` pointer is "
419
+ "always preserved and does not count toward the "
420
+ "threshold."
421
+ ),
422
+ source=SOURCE_NAME,
423
+ evidence={
424
+ "run_count": len(run_dirs),
425
+ "threshold": threshold,
426
+ },
427
+ )
428
+ ]
429
+
430
+
431
+ def _check_workflow_concurrency(workspace: Path) -> List[Finding]:
432
+ """Warn when AgentOps workflows lack a top-level ``concurrency:`` block.
433
+
434
+ Without one, two pushes on the same PR run in parallel and
435
+ double-bill Azure model quota - WAF AI Cost & Quota Management
436
+ asks for the opposite.
437
+ """
438
+ workflows_dir = workspace / ".github" / "workflows"
439
+ if not workflows_dir.is_dir():
440
+ return []
441
+
442
+ candidates = list(workflows_dir.glob("agentops-pr.yml")) + list(
443
+ workflows_dir.glob("agentops-deploy-*.yml")
444
+ ) + list(workflows_dir.glob("agentops-deploy-*.yaml"))
445
+
446
+ offenders: List[str] = []
447
+ for path in candidates:
448
+ data = _safe_load_yaml(path)
449
+ if not isinstance(data, dict):
450
+ continue
451
+ if "concurrency" not in data:
452
+ offenders.append(path.name)
453
+
454
+ if not offenders:
455
+ return []
456
+
457
+ return [
458
+ Finding(
459
+ id="opex.workflow_concurrency_lock",
460
+ severity=Severity.WARNING,
461
+ category=Category.OPERATIONAL_EXCELLENCE,
462
+ title="AgentOps workflows are missing a `concurrency:` block",
463
+ summary=(
464
+ f"{len(offenders)} AgentOps workflow(s) under "
465
+ "`.github/workflows/` have no top-level "
466
+ "`concurrency:` block. Two pushes on the same PR "
467
+ "(or two pipeline runs against the same environment) "
468
+ "will execute in parallel and double-bill Azure "
469
+ "model quota."
470
+ ),
471
+ recommendation=(
472
+ "Add a `concurrency:` block, for example:\n"
473
+ "```yaml\n"
474
+ "concurrency:\n"
475
+ " group: ${{ github.workflow }}-${{ github.ref }}\n"
476
+ " cancel-in-progress: true\n"
477
+ "```"
478
+ ),
479
+ source=SOURCE_NAME,
480
+ evidence={"files": offenders},
481
+ )
482
+ ]
483
+
484
+
485
+ _SHA40 = re.compile(r"^[0-9a-f]{40}$")
486
+ _USES = re.compile(r'^\s*-?\s*uses\s*:\s*([^\s#]+)\s*(?:#.*)?$', re.IGNORECASE)
487
+
488
+
489
+ def _check_workflow_sha_pinning(workspace: Path) -> List[Finding]:
490
+ """Warn when AgentOps workflows pin actions by tag instead of SHA.
491
+
492
+ WAF AI Reproducible Workflows asks for dependency immutability.
493
+ Tags can move; commit SHAs cannot.
494
+ """
495
+ workflows_dir = workspace / ".github" / "workflows"
496
+ if not workflows_dir.is_dir():
497
+ return []
498
+
499
+ offenders: List[Dict[str, Any]] = []
500
+ for path in sorted(workflows_dir.glob("agentops-*.yml")) + sorted(
501
+ workflows_dir.glob("agentops-*.yaml")
502
+ ):
503
+ try:
504
+ text = path.read_text(encoding="utf-8")
505
+ except OSError:
506
+ continue
507
+ for line_no, line in enumerate(text.splitlines(), start=1):
508
+ match = _USES.match(line)
509
+ if not match:
510
+ continue
511
+ ref = match.group(1)
512
+ # Skip local actions and docker:// refs.
513
+ if ref.startswith("./") or ref.startswith("docker://"):
514
+ continue
515
+ if "@" not in ref:
516
+ continue
517
+ _, _, suffix = ref.rpartition("@")
518
+ if _SHA40.match(suffix):
519
+ continue
520
+ offenders.append({"file": path.name, "line": line_no, "ref": ref})
521
+
522
+ if not offenders:
523
+ return []
524
+
525
+ return [
526
+ Finding(
527
+ id="opex.workflow_action_sha_pinning",
528
+ severity=Severity.WARNING,
529
+ category=Category.OPERATIONAL_EXCELLENCE,
530
+ title="AgentOps workflows pin actions by tag, not by commit SHA",
531
+ summary=(
532
+ f"{len(offenders)} `uses:` line(s) across AgentOps "
533
+ "workflows pin a GitHub Action to a tag (e.g. `@v4`) "
534
+ "rather than a 40-character commit SHA. Tags are "
535
+ "mutable; CI runs are not reproducible if the tag "
536
+ "moves."
537
+ ),
538
+ recommendation=(
539
+ "Replace each `uses: <owner>/<repo>@<tag>` with "
540
+ "`uses: <owner>/<repo>@<40-char-sha>`. The Dependabot "
541
+ "`github-actions` ecosystem can keep these pinned "
542
+ "SHAs current automatically."
543
+ ),
544
+ source=SOURCE_NAME,
545
+ evidence={"offenders": offenders},
546
+ )
547
+ ]
548
+
549
+
550
+ def _safe_load_yaml(path: Path) -> Optional[dict]:
551
+ if not path.exists():
552
+ return None
553
+ try:
554
+ with path.open("r", encoding="utf-8") as handle:
555
+ data = yaml.safe_load(handle)
556
+ except (OSError, yaml.YAMLError):
557
+ return None
558
+ return data if isinstance(data, dict) else None
559
+
560
+
561
+ def _check_max_tokens_limit(workspace: Path) -> List[Finding]:
562
+ """AI.26 — every model deployment / call should set a ``max_tokens`` limit.
563
+
564
+ Without an upper bound, a runaway prompt or a malicious user can
565
+ drive the bill arbitrarily high. We look in two places:
566
+
567
+ * ``agentops.yaml`` at the project root.
568
+ * Every ``*.yaml`` under ``.agentops/bundles/`` (evaluator bundles
569
+ that drive eval-time model calls).
570
+
571
+ The check is permissive: it fires only when at least one file
572
+ explicitly looks like it configures a model (has ``model:``,
573
+ ``deployment:``, or an ``evaluators:`` list) **and** none of the
574
+ candidate files declares ``max_tokens``. That avoids false
575
+ positives on bare workspaces / agent-only configs.
576
+ """
577
+ candidates: List[Path] = []
578
+ root = workspace / "agentops.yaml"
579
+ if root.exists():
580
+ candidates.append(root)
581
+ bundles_dir = workspace / ".agentops" / "bundles"
582
+ if bundles_dir.is_dir():
583
+ candidates.extend(sorted(bundles_dir.glob("*.y*ml")))
584
+ if not candidates:
585
+ return []
586
+
587
+ looks_model_shaped = False
588
+ files_with_max_tokens: List[str] = []
589
+ files_without_max_tokens: List[str] = []
590
+
591
+ for path in candidates:
592
+ try:
593
+ text = path.read_text(encoding="utf-8")
594
+ except OSError:
595
+ continue
596
+ # Cheap, format-agnostic detection: matches `max_tokens: <n>`
597
+ # at any nesting level in any of the candidate YAMLs.
598
+ if re.search(r"(?m)^\s*max_tokens\s*:", text):
599
+ files_with_max_tokens.append(str(path.relative_to(workspace)).replace("\\", "/"))
600
+ looks_model_shaped = True
601
+ continue
602
+ # Only count files that actually look like they configure a model.
603
+ if re.search(
604
+ r"(?m)^\s*(model|deployment|evaluators)\s*:",
605
+ text,
606
+ ):
607
+ looks_model_shaped = True
608
+ files_without_max_tokens.append(
609
+ str(path.relative_to(workspace)).replace("\\", "/")
610
+ )
611
+
612
+ if not looks_model_shaped:
613
+ return []
614
+ if files_with_max_tokens and not files_without_max_tokens:
615
+ return []
616
+ if not files_without_max_tokens:
617
+ return []
618
+
619
+ return [
620
+ Finding(
621
+ id="opex.max_tokens_undefined",
622
+ severity=Severity.WARNING,
623
+ category=Category.OPERATIONAL_EXCELLENCE,
624
+ title="`max_tokens` is not set on model / evaluator configuration",
625
+ summary=(
626
+ "Found model / evaluator YAML files that do not declare "
627
+ "a `max_tokens:` ceiling. Without an upper bound a single "
628
+ "runaway completion or a malicious prompt can drive token "
629
+ "spend arbitrarily high."
630
+ ),
631
+ recommendation=(
632
+ "Add a `max_tokens:` field next to each `model:` / "
633
+ "`deployment:` block (and inside `model_config:` for "
634
+ "AI-assisted evaluators). Pick a value just above your "
635
+ "longest legitimate response so legitimate traffic isn't "
636
+ "truncated."
637
+ ),
638
+ source=SOURCE_NAME,
639
+ evidence={
640
+ "files_without_max_tokens": files_without_max_tokens[:10],
641
+ "files_with_max_tokens": files_with_max_tokens[:10],
642
+ },
643
+ )
644
+ ]
645
+
646
+
647
+ def _check_ailz_readiness(workspace: Path) -> List[Finding]:
648
+ """Summarize AI Landing Zone deployment readiness from local files only.
649
+
650
+ This check intentionally does not call az, azd, Azure, or Foundry. It
651
+ activates only on canonical AI Landing Zone signals so generic azd projects
652
+ do not receive landing-zone-specific findings.
653
+ """
654
+
655
+ signals = _ailz_signals(workspace)
656
+ if not signals:
657
+ return []
658
+
659
+ preflight_script = workspace / "scripts" / "Invoke-PreflightChecks.ps1"
660
+ has_preflight_script = preflight_script.exists()
661
+ has_agentops_config = (workspace / "agentops.yaml").exists()
662
+ workflow_files = _agentops_deploy_workflow_files(workspace)
663
+ workflow_text = "\n".join(_safe_read_text(path) for path in workflow_files)
664
+ has_azd_workflow = "azd provision" in workflow_text.lower()
665
+ has_preflight_workflow = "invoke-preflightchecks.ps1" in workflow_text.lower()
666
+ network_isolated = _ailz_network_isolation_detected(workspace)
667
+ runner_path_declared = (
668
+ not network_isolated
669
+ or _private_runner_path_declared(workflow_text)
670
+ or _private_runner_path_declared(_safe_read_text(workspace / "README.md"))
671
+ )
672
+ deployment_mode = _ailz_deployment_mode(workspace)
673
+
674
+ dimensions = {
675
+ "canonical_signals": signals,
676
+ "deployment_mode": deployment_mode,
677
+ "ailz_preflight_script": has_preflight_script,
678
+ "agentops_eval_config": has_agentops_config,
679
+ "azd_deploy_workflow": has_azd_workflow,
680
+ "ailz_preflight_in_workflow": has_preflight_workflow,
681
+ "network_isolation_detected": network_isolated,
682
+ "private_runner_path_declared": runner_path_declared,
683
+ }
684
+
685
+ findings = [
686
+ Finding(
687
+ id="opex.ailz_readiness",
688
+ severity=Severity.INFO,
689
+ category=Category.OPERATIONAL_EXCELLENCE,
690
+ title="AI Landing Zone deployment readiness detected",
691
+ summary=(
692
+ "This workspace has AI Landing Zone deployment signals. "
693
+ "AgentOps can help turn those signals into an actionable "
694
+ "path: landing-zone preflight, azd/Bicep provisioning, "
695
+ "Doctor checks, eval gates, and post-deploy evidence."
696
+ ),
697
+ recommendation=(
698
+ "Run `agentops workflow analyze` to review the deployment "
699
+ "path, then generate azd-based workflows once the readiness "
700
+ "dimensions in the evidence are intentionally set."
701
+ ),
702
+ source=SOURCE_NAME,
703
+ evidence=dimensions,
704
+ )
705
+ ]
706
+
707
+ gaps: List[str] = []
708
+ if not has_preflight_script:
709
+ gaps.append("AILZ preflight script is missing from scripts/Invoke-PreflightChecks.ps1")
710
+ if not has_agentops_config:
711
+ gaps.append("agentops.yaml is missing, so post-deploy eval validation is not configured")
712
+ if not has_azd_workflow:
713
+ gaps.append("no AgentOps azd deploy workflow was found")
714
+ elif not has_preflight_workflow and has_preflight_script:
715
+ gaps.append("AgentOps azd deploy workflow does not run the AILZ preflight script")
716
+ if network_isolated and not runner_path_declared:
717
+ gaps.append(
718
+ "network isolation is detected but the CI runner/private-network execution path is not declared"
719
+ )
720
+
721
+ if gaps:
722
+ findings.append(
723
+ Finding(
724
+ id="opex.ailz_gaps",
725
+ severity=Severity.WARNING,
726
+ category=Category.OPERATIONAL_EXCELLENCE,
727
+ title="AI Landing Zone deployment readiness needs attention",
728
+ summary=(
729
+ "The project has AI Landing Zone signals, but one or "
730
+ "more readiness dimensions are still missing before the "
731
+ "pipeline can confidently provision and validate the "
732
+ "workload."
733
+ ),
734
+ recommendation=(
735
+ "Address the gaps listed in evidence. A common path is: "
736
+ "`agentops init`, `agentops eval analyze`, "
737
+ "`agentops workflow analyze`, then "
738
+ "`agentops workflow generate --deploy-mode azd --force`. "
739
+ "For hub-spoke or private-network rollout, use "
740
+ "`/agentops-workflow` to adapt runner access explicitly."
741
+ ),
742
+ source=SOURCE_NAME,
743
+ evidence={"gaps": gaps, "readiness": dimensions},
744
+ )
745
+ )
746
+
747
+ return findings
748
+
749
+
750
+ def _ailz_signals(workspace: Path) -> List[str]:
751
+ signals: List[str] = []
752
+
753
+ manifest = _safe_load_json(workspace / "manifest.json")
754
+ if isinstance(manifest, dict) and any(
755
+ key in manifest for key in ("ailz_tag", "ailz_version")
756
+ ):
757
+ signals.append("manifest.json pins AI Landing Zone version")
758
+
759
+ azure_yaml = _safe_load_yaml(workspace / "azure.yaml")
760
+ if isinstance(azure_yaml, dict):
761
+ metadata = azure_yaml.get("metadata")
762
+ template = ""
763
+ if isinstance(metadata, dict):
764
+ template = str(metadata.get("template") or "")
765
+ name = str(azure_yaml.get("name") or "")
766
+ if template == "azure-ai-lz" or name == "azure-ai-lz":
767
+ signals.append("azure.yaml identifies the azure-ai-lz template")
768
+ if _azure_yaml_invokes_ailz_preflight(azure_yaml):
769
+ signals.append("azure.yaml wires the AI Landing Zone preprovision hook")
770
+
771
+ if (workspace / "scripts" / "Invoke-PreflightChecks.ps1").exists():
772
+ signals.append("scripts/Invoke-PreflightChecks.ps1 is present")
773
+
774
+ return sorted(set(signals))
775
+
776
+
777
+ def _safe_load_json(path: Path) -> Optional[dict]:
778
+ text = _safe_read_text(path)
779
+ if not text:
780
+ return None
781
+ try:
782
+ data = json.loads(text)
783
+ except json.JSONDecodeError:
784
+ return None
785
+ return data if isinstance(data, dict) else None
786
+
787
+
788
+ def _safe_read_text(path: Path) -> str:
789
+ try:
790
+ if not path.exists() or path.stat().st_size > 200_000:
791
+ return ""
792
+ return path.read_text(encoding="utf-8", errors="ignore")
793
+ except OSError:
794
+ return ""
795
+
796
+
797
+ def _azure_yaml_invokes_ailz_preflight(data: dict) -> bool:
798
+ hooks = data.get("hooks")
799
+ if not isinstance(hooks, dict):
800
+ return False
801
+ return "invoke-preflightchecks.ps1" in str(hooks).lower()
802
+
803
+
804
+ def _ailz_deployment_mode(workspace: Path) -> str:
805
+ params = _safe_load_json(workspace / "main.parameters.json")
806
+ if not isinstance(params, dict):
807
+ params = _safe_load_json(workspace / "infra" / "main.parameters.json")
808
+ if isinstance(params, dict):
809
+ raw = (params.get("parameters") or {}).get("deploymentMode")
810
+ if isinstance(raw, dict) and raw.get("value"):
811
+ return str(raw["value"])
812
+ text = _safe_read_text(workspace / "README.md").lower()
813
+ if "ailz-integrated" in text or "hub-and-spoke" in text:
814
+ return "ailz-integrated"
815
+ if "standalone" in text and "ai landing zone" in text:
816
+ return "standalone"
817
+ return "unknown"
818
+
819
+
820
+ def _ailz_network_isolation_detected(workspace: Path) -> bool:
821
+ candidates = [
822
+ workspace / "main.parameters.json",
823
+ workspace / "infra" / "main.parameters.json",
824
+ workspace / "main.bicep",
825
+ workspace / "infra" / "main.bicep",
826
+ workspace / "README.md",
827
+ ]
828
+ text = "\n".join(_safe_read_text(path) for path in candidates).lower()
829
+ return any(
830
+ term in text
831
+ for term in (
832
+ "network_isolation",
833
+ "networkisolation",
834
+ "privateendpoint",
835
+ "private endpoint",
836
+ "azurefirewall",
837
+ "azure firewall",
838
+ "bastion",
839
+ "jumpbox",
840
+ "acr_task_agent_pool",
841
+ "acr tasks",
842
+ "hubintegration",
843
+ )
844
+ )
845
+
846
+
847
+ def _agentops_deploy_workflow_files(workspace: Path) -> List[Path]:
848
+ candidates: List[Path] = []
849
+ for root in (
850
+ workspace / ".github" / "workflows",
851
+ workspace / ".azuredevops" / "pipelines",
852
+ ):
853
+ if not root.is_dir():
854
+ continue
855
+ candidates.extend(sorted(root.glob("agentops-deploy-*.yml")))
856
+ candidates.extend(sorted(root.glob("agentops-deploy-*.yaml")))
857
+ return candidates
858
+
859
+
860
+ def _private_runner_path_declared(text: str) -> bool:
861
+ lowered = text.lower()
862
+ return any(
863
+ marker in lowered
864
+ for marker in (
865
+ "self-hosted",
866
+ "private runner",
867
+ "private agent",
868
+ "jumpbox",
869
+ "acr tasks",
870
+ "acr_task",
871
+ "agent pool",
872
+ "agent-pool",
873
+ )
874
+ )