agentops-accelerator 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. agentops/__init__.py +10 -0
  2. agentops/__main__.py +6 -0
  3. agentops/agent/__init__.py +12 -0
  4. agentops/agent/_legacy_ids.py +92 -0
  5. agentops/agent/analyzer.py +207 -0
  6. agentops/agent/checks/__init__.py +1 -0
  7. agentops/agent/checks/catalog.py +880 -0
  8. agentops/agent/checks/errors.py +279 -0
  9. agentops/agent/checks/foundry_config.py +75 -0
  10. agentops/agent/checks/latency.py +84 -0
  11. agentops/agent/checks/opex.py +157 -0
  12. agentops/agent/checks/opex_workspace.py +874 -0
  13. agentops/agent/checks/posture.py +36 -0
  14. agentops/agent/checks/posture_rules/__init__.py +53 -0
  15. agentops/agent/checks/posture_rules/content_filter.py +59 -0
  16. agentops/agent/checks/posture_rules/diagnostics.py +74 -0
  17. agentops/agent/checks/posture_rules/local_auth.py +55 -0
  18. agentops/agent/checks/posture_rules/managed_identity.py +59 -0
  19. agentops/agent/checks/posture_rules/network.py +68 -0
  20. agentops/agent/checks/regression.py +78 -0
  21. agentops/agent/checks/release_readiness.py +182 -0
  22. agentops/agent/checks/safety.py +247 -0
  23. agentops/agent/checks/spec_conformance.py +375 -0
  24. agentops/agent/cockpit.py +5159 -0
  25. agentops/agent/config.py +240 -0
  26. agentops/agent/findings.py +113 -0
  27. agentops/agent/history.py +142 -0
  28. agentops/agent/knowledge/__init__.py +182 -0
  29. agentops/agent/knowledge/waf-checklist.csv +39 -0
  30. agentops/agent/llm_assist/__init__.py +16 -0
  31. agentops/agent/llm_assist/_base.py +124 -0
  32. agentops/agent/llm_assist/_bundle_rule.py +154 -0
  33. agentops/agent/llm_assist/_client.py +347 -0
  34. agentops/agent/llm_assist/_dataset_rules.py +191 -0
  35. agentops/agent/llm_assist/_engine.py +106 -0
  36. agentops/agent/llm_assist/_prompt_rules.py +291 -0
  37. agentops/agent/llm_assist/_spec_rules.py +235 -0
  38. agentops/agent/production_telemetry.py +430 -0
  39. agentops/agent/report.py +207 -0
  40. agentops/agent/server/__init__.py +1 -0
  41. agentops/agent/server/app.py +84 -0
  42. agentops/agent/server/auth.py +94 -0
  43. agentops/agent/server/chat.py +44 -0
  44. agentops/agent/server/protocol.py +72 -0
  45. agentops/agent/sources/__init__.py +1 -0
  46. agentops/agent/sources/azure_monitor.py +523 -0
  47. agentops/agent/sources/azure_resources.py +602 -0
  48. agentops/agent/sources/foundry_control.py +174 -0
  49. agentops/agent/sources/results_history.py +494 -0
  50. agentops/agent/sources/spec_detectors/__init__.py +42 -0
  51. agentops/agent/sources/spec_detectors/_base.py +58 -0
  52. agentops/agent/sources/spec_detectors/agents_md.py +75 -0
  53. agentops/agent/sources/spec_detectors/spec_kit.py +172 -0
  54. agentops/agent/time_range.py +117 -0
  55. agentops/cli/__init__.py +1 -0
  56. agentops/cli/app.py +4823 -0
  57. agentops/core/__init__.py +1 -0
  58. agentops/core/agentops_config.py +592 -0
  59. agentops/core/config_loader.py +22 -0
  60. agentops/core/evaluators.py +480 -0
  61. agentops/core/release_evidence.py +56 -0
  62. agentops/core/results.py +117 -0
  63. agentops/mcp/__init__.py +10 -0
  64. agentops/mcp/server.py +232 -0
  65. agentops/pipeline/__init__.py +8 -0
  66. agentops/pipeline/cloud_results.py +189 -0
  67. agentops/pipeline/cloud_runner.py +901 -0
  68. agentops/pipeline/comparison.py +108 -0
  69. agentops/pipeline/diagnostics.py +51 -0
  70. agentops/pipeline/invocations.py +535 -0
  71. agentops/pipeline/official_eval.py +414 -0
  72. agentops/pipeline/orchestrator.py +775 -0
  73. agentops/pipeline/prompt_deploy.py +377 -0
  74. agentops/pipeline/publisher.py +121 -0
  75. agentops/pipeline/reporter.py +202 -0
  76. agentops/pipeline/runtime.py +409 -0
  77. agentops/pipeline/thresholds.py +84 -0
  78. agentops/services/__init__.py +1 -0
  79. agentops/services/cicd.py +720 -0
  80. agentops/services/eval_analysis.py +848 -0
  81. agentops/services/evidence_pack.py +757 -0
  82. agentops/services/initializer.py +86 -0
  83. agentops/services/preflight.py +470 -0
  84. agentops/services/setup_wizard.py +709 -0
  85. agentops/services/skills.py +643 -0
  86. agentops/services/trace_promotion.py +300 -0
  87. agentops/services/workflow_analysis.py +1129 -0
  88. agentops/templates/.gitignore +15 -0
  89. agentops/templates/__init__.py +1 -0
  90. agentops/templates/agent-server/Dockerfile +23 -0
  91. agentops/templates/agent-server/README.md +61 -0
  92. agentops/templates/agent-server/main.bicep +94 -0
  93. agentops/templates/agent.yaml +87 -0
  94. agentops/templates/agentops.yaml +58 -0
  95. agentops/templates/foundry.svg +71 -0
  96. agentops/templates/icon.png +0 -0
  97. agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +118 -0
  98. agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +73 -0
  99. agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +141 -0
  100. agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +94 -0
  101. agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +167 -0
  102. agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +118 -0
  103. agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +68 -0
  104. agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +210 -0
  105. agentops/templates/pipelines/azuredevops/agentops-pr.yml +155 -0
  106. agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +106 -0
  107. agentops/templates/project.gitignore +36 -0
  108. agentops/templates/sample-traces.jsonl +3 -0
  109. agentops/templates/skills/agentops-agent/SKILL.md +137 -0
  110. agentops/templates/skills/agentops-config/SKILL.md +113 -0
  111. agentops/templates/skills/agentops-dataset/SKILL.md +84 -0
  112. agentops/templates/skills/agentops-eval/SKILL.md +189 -0
  113. agentops/templates/skills/agentops-report/SKILL.md +71 -0
  114. agentops/templates/skills/agentops-workflow/SKILL.md +471 -0
  115. agentops/templates/smoke.jsonl +3 -0
  116. agentops/templates/waf-checklist.README.md +84 -0
  117. agentops/templates/waf-checklist.csv +22 -0
  118. agentops/templates/workflows/agentops-deploy-dev-azd.yml +166 -0
  119. agentops/templates/workflows/agentops-deploy-dev.yml +187 -0
  120. agentops/templates/workflows/agentops-deploy-prod-azd.yml +183 -0
  121. agentops/templates/workflows/agentops-deploy-prod.yml +171 -0
  122. agentops/templates/workflows/agentops-deploy-prompt-agent.yml +197 -0
  123. agentops/templates/workflows/agentops-deploy-qa-azd.yml +156 -0
  124. agentops/templates/workflows/agentops-deploy-qa.yml +145 -0
  125. agentops/templates/workflows/agentops-pr-prompt-agent.yml +210 -0
  126. agentops/templates/workflows/agentops-pr.yml +148 -0
  127. agentops/templates/workflows/agentops-watchdog.yml +122 -0
  128. agentops/utils/__init__.py +1 -0
  129. agentops/utils/azd_env.py +435 -0
  130. agentops/utils/azure_endpoints.py +62 -0
  131. agentops/utils/colors.py +47 -0
  132. agentops/utils/dotenv_loader.py +105 -0
  133. agentops/utils/foundry_discovery.py +229 -0
  134. agentops/utils/logging.py +59 -0
  135. agentops/utils/telemetry.py +554 -0
  136. agentops/utils/yaml.py +36 -0
  137. agentops_accelerator-0.3.0.dist-info/METADATA +278 -0
  138. agentops_accelerator-0.3.0.dist-info/RECORD +142 -0
  139. agentops_accelerator-0.3.0.dist-info/WHEEL +5 -0
  140. agentops_accelerator-0.3.0.dist-info/entry_points.txt +2 -0
  141. agentops_accelerator-0.3.0.dist-info/licenses/LICENSE +21 -0
  142. agentops_accelerator-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,471 @@
1
+ ---
2
+ name: agentops-workflow
3
+ description: "Set up AgentOps release-readiness workflows: PR eval gates, Doctor/evidence artifacts, and safe deploy handoffs to azd or Foundry prompt-agent tooling. Trigger on CI, CD, pipeline, workflow, GitHub Actions, Azure DevOps, ADO, PR gate, deploy, environments, GitFlow, release branch, promote to prod, DevOps, can we ship."
4
+ ---
5
+
6
+ # AgentOps Workflow
7
+
8
+ Help the user wire AgentOps into the release path so every candidate has a
9
+ clear gate and proof pack. The default starting point is a PR eval gate. Full
10
+ DEV/QA/PROD workflows are useful only after Azure auth, environments, and a real
11
+ deployment owner are configured.
12
+
13
+ **Pick the platform up front.** AgentOps supports two:
14
+
15
+ - `--platform github` (default) - writes `.github/workflows/*.yml` using
16
+ GitHub Actions. Auth via OIDC + GitHub Environments.
17
+ - `--platform azure-devops` - writes `.azuredevops/pipelines/*.yml` using
18
+ Azure DevOps Pipelines. Auth via a Service Connection + a variable
19
+ group named `agentops`.
20
+
21
+ The conceptual workflows are identical: one PR gate and optional deploy stages
22
+ (dev/qa/prod). The PR and production templates already run
23
+ `agentops doctor --evidence-pack` so reviewers get `evidence.json` and
24
+ `evidence.md` in artifacts and, for GitHub Actions, in the run summary with the
25
+ Doctor finding summary. A separate scheduled Doctor workflow is optional for
26
+ periodic health checks, not the default release path.
27
+
28
+ For a new repository or tutorial, start with the PR gate only:
29
+ `agentops workflow generate --kinds pr`. Generate DEV/QA/PROD deploy
30
+ workflows only after environments, Azure auth, and real build/deploy
31
+ commands are configured.
32
+
33
+ For copied accelerators or unfamiliar repos (for example GPT-RAG, Live Voice
34
+ Practice, AI Landing Zone/Bicep-based apps), run `agentops workflow analyze`
35
+ first and use the findings as the implementation plan before generating or
36
+ editing workflows.
37
+
38
+ AgentOps reuses **azd** for app/infrastructure deployment when the repo already
39
+ has an azd project, and stays **Foundry-native** for prompt-agent candidate
40
+ workflows. Do not invent a parallel deployment system. AgentOps should gate
41
+ quality and record proof; `azd provision`, `azd deploy`, azd hooks, Foundry
42
+ Toolkit, the `microsoft-foundry` skill, and project tooling own lifecycle
43
+ actions.
44
+
45
+ For Foundry prompt-agent configs (`agent: name:version`), the generated eval gate
46
+ should use **AgentOps cloud eval in Foundry**: a temporary cloud config plus
47
+ `agentops eval run`, not the legacy official Action/task. Foundry still executes
48
+ the managed eval; AgentOps enforces thresholds, writes `results.json` /
49
+ `report.md`, and makes PR failures explainable in the summary.
50
+
51
+ ## Fast path - generated GitHub setup
52
+
53
+ Use this path when the user already generated GitHub workflows or asks to get
54
+ the PR gate running. Stay local-first and deterministic; do not start
55
+ by discovering the whole Azure subscription.
56
+
57
+ 1. Inspect the repo before cloud discovery:
58
+ - `agentops init show --dir .` without `--reveal-secrets`.
59
+ - `agentops.yaml`.
60
+ - `.agentops/.env`, plus `.azure/config.json` and active `.azure/<env>/.env`
61
+ when the repo uses azd.
62
+ - `azd env get-values` when `azure.yaml` exists and azd is available.
63
+ - `.github/workflows/agentops-*.yml`.
64
+ 2. Read the generated workflows to determine exactly which GitHub environments
65
+ and variables are needed. For the prompt-agent quickstart, `pr` normally
66
+ means only `environment: dev`.
67
+ 3. Treat `dev` here as a GitHub Actions environment for OIDC and variables. It
68
+ normally points at the Foundry project already configured by `agentops init`;
69
+ it does not require creating a new Foundry project.
70
+ 4. Proceed only when these values are known or deliberately chosen:
71
+ - GitHub `owner/repo`.
72
+ - workflow environment names from `jobs.*.environment`.
73
+ - `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`.
74
+ - `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT`.
75
+ - `AZURE_OPENAI_DEPLOYMENT`.
76
+ - optional `APPLICATIONINSIGHTS_CONNECTION_STRING`.
77
+ 5. Prefer existing values and exact checks:
78
+ - `git remote get-url origin` and `gh repo view --json nameWithOwner`.
79
+ - `gh variable list --env <env>` and `gh secret list --env <env>`.
80
+ - `agentops init show`, local `.agentops/.env` or `.azure/<env>/.env`, and
81
+ `azd env get-values` values before `az account show`.
82
+ - `az account show` only as a proposal for tenant/subscription; confirm
83
+ before writing it to GitHub variables.
84
+ 6. Copy CI variables from local AgentOps/azd configuration into the GitHub
85
+ environment used by the workflow. Reuse local values for
86
+ `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT`, `AZURE_OPENAI_ENDPOINT`,
87
+ `AZURE_OPENAI_DEPLOYMENT`, and optional
88
+ `APPLICATIONINSIGHTS_CONNECTION_STRING` instead of asking the user to type
89
+ them again. Explain `AZURE_OPENAI_DEPLOYMENT` only if it is missing: it is
90
+ the Azure OpenAI deployment used as the evaluator/judge model, not the
91
+ user's agent.
92
+ 7. Do not enumerate subscriptions, Foundry projects, Azure OpenAI resources, or
93
+ model deployments to guess missing values. If `AZURE_SUBSCRIPTION_ID`,
94
+ `AZURE_TENANT_ID`, `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT`, or
95
+ `AZURE_OPENAI_DEPLOYMENT` is absent from AgentOps/azd/local env, ask the user
96
+ to choose or provide it. Only run a scoped Azure query after the user confirms
97
+ the subscription and the exact missing value.
98
+ 8. For GitHub OIDC, derive the federated credential subject from the generated
99
+ workflow. If the job has `environment: dev`, the subject is normally
100
+ `repo:<owner>/<repo>:environment:dev`. Do not assume branch or
101
+ `pull_request` subjects without reading the workflow.
102
+ 9. Before triggering a Foundry prompt-agent workflow, make sure the OIDC app /
103
+ service principal has Foundry data-plane access. It needs **Foundry User**
104
+ (role id `53ca6127-db72-4b80-b1b0-d745d6d5456d`, formerly Azure AI User) at
105
+ the Foundry project scope, or at the Foundry resource scope if that is the
106
+ team's standard. Azure **Reader** is not enough; without this role the eval
107
+ step fails on
108
+ `Microsoft.CognitiveServices/accounts/AIServices/agents/read`.
109
+ 10. If the Foundry RBAC assignment is missing, do not run the workflow yet.
110
+ Show the exact GitHub OIDC client ID / service principal, desired role, and
111
+ target Foundry scope, then ask the user to approve the role assignment or
112
+ get an Azure/Foundry admin to grant it. After assignment, read it back or ask
113
+ the user to confirm before dispatching the workflow.
114
+ When the user approves and you know the Foundry scope, use the role id to
115
+ avoid rename drift:
116
+ - `az ad sp show --id <AZURE_CLIENT_ID> --query id -o tsv`
117
+ - `az role assignment list --assignee <sp-object-id> --scope <foundry-scope> --include-inherited`
118
+ - `az role assignment create --assignee-object-id <sp-object-id> --assignee-principal-type ServicePrincipal --role 53ca6127-db72-4b80-b1b0-d745d6d5456d --scope <foundry-scope>`
119
+ 11. Ask before creating or updating GitHub repos, GitHub environments,
120
+ variables/secrets, Entra app registrations/service principals, federated
121
+ credentials, managed identities, or Azure RBAC assignments.
122
+ 12. When creating federated credentials from PowerShell, avoid fragile
123
+ interpolation. Do **not** write `"repo:$repo:environment:$envName"` because
124
+ `$repo:` can be parsed as a scoped variable. Use
125
+ `"repo:${repo}:environment:${envName}"` or
126
+ `("repo:{0}:environment:{1}" -f $repo, $envName)`, then build JSON from a
127
+ PowerShell object with `ConvertTo-Json`.
128
+ 13. After creating or updating a federated credential, read it back and verify
129
+ before triggering a workflow:
130
+ - `subject` exactly matches the generated workflow subject.
131
+ - `issuer` is `https://token.actions.githubusercontent.com`.
132
+ - `audiences` includes `api://AzureADTokenExchange`.
133
+ If any value differs, fix the credential before running GitHub Actions.
134
+ 14. Do not dispatch `gh workflow run` as a surprise validation step. First show
135
+ that the GitHub environment, variables/secrets, federated credential, and
136
+ Foundry RBAC are ready, then ask the user before triggering workflows.
137
+ 15. Avoid broad discovery unless local config is missing. Do **not** run broad
138
+ `az resource list`, `az graph query`, SDK inspection, or web search to find
139
+ the Foundry project when `agentops init show`, `.agentops/.env`, or
140
+ `.azure/<env>/.env` already has `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT`. If the
141
+ endpoint is missing, say exactly what is missing and ask the user before
142
+ scanning the subscription.
143
+
144
+ ## Branch model assumed
145
+
146
+ ```
147
+ feature/* ── PR ──▶ develop [agentops-pr] gate
148
+
149
+ └── merge ─▶ develop [agentops-deploy-dev] build + eval + deploy DEV
150
+ release/* ── push [agentops-deploy-qa] build + eval + deploy QA
151
+ release/* ── PR ──▶ main [agentops-pr] gate
152
+
153
+ └── merge ─▶ main [agentops-deploy-prod] safety eval + build + deploy PROD
154
+ ```
155
+
156
+ If the user is on trunk-based development, omit `qa` and `release/**`
157
+ and have them generate `--kinds pr,dev,prod`.
158
+
159
+ The PR workflow uses the eval step as the hard merge gate. Doctor also
160
+ runs there and writes release evidence; by default the Doctor step blocks
161
+ the PR on critical findings such as regression detection (`--severity-fail
162
+ critical`, the default behavior of `agentops workflow generate
163
+ --doctor-gate critical`). This catches metric drops (for example
164
+ groundedness going from 5.0 to 4.0) that would still pass the configured
165
+ eval thresholds. To restore the pre-1.x advisory behavior — Doctor writes
166
+ release evidence but does not block the PR — generate with `--doctor-gate
167
+ none`. DEV/QA/PROD deploy workflows always keep Doctor as a critical
168
+ release gate; the `--doctor-gate` flag only controls the PR template.
169
+
170
+ ## Step 0 - Prerequisites
171
+
172
+ 1. `pip install "agentops-accelerator @ git+https://github.com/Azure/agentops.git@main"` if `agentops` is missing.
173
+ 2. `agentops eval analyze` has been reviewed, `agentops.yaml` exists at the
174
+ project root, and `agentops eval run` works locally.
175
+ 3. The user's repo follows GitFlow (or is willing to). If not, ask which
176
+ branches map to dev/qa/prod and adjust the triggers after
177
+ generation.
178
+
179
+ ## Step 1 - Generate the workflows
180
+
181
+ First analyze the repo shape:
182
+
183
+ ```bash
184
+ agentops workflow analyze
185
+ agentops workflow analyze --format markdown --out agentops-workflow-plan.md
186
+ ```
187
+
188
+ Use the analysis to decide whether `--deploy-mode auto` is enough or whether
189
+ you need to adapt placeholders/project-specific deployment. The analyzer is
190
+ local-only and looks for `azure.yaml`, Bicep, AgentOps prompt-agent config,
191
+ landing-zone manifests, private-network signals, Docker/Container Apps signals,
192
+ and existing CI folders. Treat README matches as hints only; structural files
193
+ drive the recommendation.
194
+
195
+ **GitHub Actions (default):**
196
+
197
+ ```bash
198
+ agentops workflow generate --kinds pr
199
+ # or full scaffold:
200
+ agentops workflow generate --kinds pr,dev,qa,prod --force
201
+ ```
202
+
203
+ **Azure DevOps Pipelines:**
204
+
205
+ ```bash
206
+ agentops workflow generate --platform azure-devops --kinds pr
207
+ # or full scaffold:
208
+ agentops workflow generate --platform azure-devops --kinds pr,dev,qa,prod --force
209
+ ```
210
+
211
+ The full scaffold writes:
212
+
213
+ | Kind | GitHub Actions path | Azure DevOps path | Trigger | Environment |
214
+ |---|---|---|---|---|
215
+ | `pr` | `.github/workflows/agentops-pr.yml` | `.azuredevops/pipelines/agentops-pr.yml` | PRs to `develop`, `release/**`, `main` | `dev` |
216
+ | `dev` | `.github/workflows/agentops-deploy-dev.yml` | `.azuredevops/pipelines/agentops-deploy-dev.yml` | push to `develop` | `dev` |
217
+ | `qa` | `.github/workflows/agentops-deploy-qa.yml` | `.azuredevops/pipelines/agentops-deploy-qa.yml` | push to `release/**` | `qa` |
218
+ | `prod` | `.github/workflows/agentops-deploy-prod.yml` | `.azuredevops/pipelines/agentops-deploy-prod.yml` | push to `main` | `production` |
219
+ | `doctor` | `.github/workflows/agentops-doctor.yml` | `.azuredevops/pipelines/agentops-doctor.yml` | daily cron (06:00 UTC) | `dev` |
220
+
221
+ PR and PROD workflows upload release evidence. Explain that this is a
222
+ projection of existing eval/Doctor/Foundry/monitoring signals, not a separate
223
+ exit-code contract. Generate the optional scheduled Doctor workflow only when
224
+ the team explicitly wants periodic health-check artifacts outside PR/release
225
+ events.
226
+
227
+ Useful flags:
228
+
229
+ - `--platform github | azure-devops` - pick the CI/CD platform.
230
+ - `--force` - overwrite existing workflow files.
231
+ - `--kinds pr,dev,qa,prod` - generate a subset. Prefer `--kinds pr`
232
+ until deploy environments are configured.
233
+ - `--kinds doctor` - optional scheduled Doctor-only workflow for periodic
234
+ checks. Do not use it as a substitute for the PR gate.
235
+ - `--deploy-mode auto|placeholder|azd|prompt-agent` - `auto` uses azd
236
+ templates when `azure.yaml` exists, otherwise uses prompt-agent templates
237
+ when `agentops.yaml` targets a Foundry prompt agent; `azd` forces
238
+ `azd provision` / `azd deploy`; `prompt-agent` stages/evaluates a Foundry
239
+ prompt candidate; `placeholder` keeps the generic stack-agnostic scaffold.
240
+ - `--dir <path>` - non-default repo root.
241
+
242
+ ## Step 2 - Configure environments and Azure auth
243
+
244
+ ### GitHub Actions
245
+
246
+ Read the generated workflow files and create only the GitHub Environments used
247
+ by `jobs.*.environment`. For `pr`, that is usually only **`dev`**. For the full
248
+ scaffold, create **`dev`**, **`qa`**, and **`production`**.
249
+
250
+ - **`dev`** - no extra protection. Store the OIDC variables here when the
251
+ generated jobs use `environment: dev`.
252
+ - **`qa`** - usually no required reviewers, but isolated variables for QA.
253
+ - **`production`** - set required reviewers, optional wait timer, optional
254
+ deployment branch restriction to `main`, and production-specific variables.
255
+
256
+ Tell the user that environment-level variables override repository-level ones
257
+ inside jobs that declare that environment.
258
+
259
+ ### Azure DevOps
260
+
261
+ In **Pipelines → Environments**, create three: `dev`, `qa`,
262
+ `production`. On `production`, add a manual approval check (Approvals
263
+ and checks → New check → Approvals).
264
+
265
+ In **Pipelines → Library**, create a variable group named `agentops`
266
+ with these variables (mark sensitive ones as secret if needed):
267
+
268
+ - `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT`
269
+ - `AZURE_OPENAI_ENDPOINT`
270
+ - `AZURE_OPENAI_DEPLOYMENT`
271
+ - `APPLICATIONINSIGHTS_CONNECTION_STRING` - optional fallback if the
272
+ Foundry project's App Insights connection cannot be auto-discovered.
273
+
274
+ In **Project settings → Service connections**, create an Azure Resource
275
+ Manager service connection named `agentops-azure` scoped to the
276
+ subscription that hosts your Foundry project.
277
+
278
+ Grant the build service "Contribute to pull requests" permission on the
279
+ repository (Project settings → Repositories → Security → `Build Service`)
280
+ so the PR-comment step can post.
281
+
282
+ ## Step 3 - Configure Azure auth
283
+
284
+ ### GitHub Actions (OIDC)
285
+
286
+ At the GitHub Environment level when the workflow declares an environment
287
+ (preferred for the quickstart), or at repository level when intentionally shared
288
+ across environments, set:
289
+
290
+ - `AZURE_CLIENT_ID` - App registration / managed identity used for OIDC.
291
+ - `AZURE_TENANT_ID`
292
+ - `AZURE_SUBSCRIPTION_ID`
293
+ - `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT` - Foundry project URL used by the
294
+ eval step.
295
+ - `AZURE_OPENAI_DEPLOYMENT` - existing Azure OpenAI deployment used as the
296
+ evaluator/judge model. Reuse the local AgentOps/azd value when available.
297
+ - `APPLICATIONINSIGHTS_CONNECTION_STRING` - optional fallback as a
298
+ variable or secret. Generated workflows first try to auto-discover App
299
+ Insights from the Foundry project endpoint; this value makes eval and
300
+ Doctor telemetry explicit.
301
+
302
+ Then configure Workload Identity Federation on the Azure side
303
+ (`federated-credentials` on the app registration) for **each branch /
304
+ environment** the workflows will run from. See
305
+ `docs/ci-github-actions.md` for the exact `az` commands.
306
+
307
+ Also grant the same app registration / service principal **Foundry User** on the
308
+ Foundry project or Foundry resource before the first workflow run. The PR gate
309
+ uses Foundry data-plane APIs to read prompt agents; Azure `Reader` only proves
310
+ ARM access and will still fail the eval step with
311
+ `Microsoft.CognitiveServices/accounts/AIServices/agents/read`.
312
+
313
+ Tell the user that CI evals emit `agentops.eval.*` telemetry and scheduled
314
+ Doctor runs emit `agentops.agent.finding.*` telemetry when App Insights is
315
+ configured or auto-discovered. The Cockpit uses those signals for Azure
316
+ Monitor deep links.
317
+
318
+ ### Azure DevOps (Service Connection)
319
+
320
+ Already done in Step 2 - the `agentops-azure` service connection
321
+ handles auth. Make sure the underlying service principal or managed
322
+ identity has the **Foundry User** role on the Foundry project or resource.
323
+
324
+ ## Step 4 - Use azd for deployment
325
+
326
+ If the repo already has `azure.yaml`, generate azd-backed deployment
327
+ workflows:
328
+
329
+ ```bash
330
+ agentops workflow generate --kinds pr,dev,qa,prod --deploy-mode azd --force
331
+ ```
332
+
333
+ The deploy workflows will:
334
+
335
+ 1. run `azd env new ... || azd env select ...` in CI;
336
+ 2. run `azd provision --no-prompt` for DEV by default;
337
+ 3. run `azd provision --no-prompt` for QA/PROD only when manually
338
+ requested (`provision=true` in GitHub Actions or
339
+ `RUN_AZD_PROVISION=true` in Azure DevOps);
340
+ 4. run `agentops eval run` as the quality/safety gate;
341
+ 5. run `azd env refresh` on the deploy runner so a fresh CI workspace can
342
+ recover outputs from the previous infrastructure provision;
343
+ 6. run `azd deploy --no-prompt`.
344
+
345
+ Set `AZURE_ENV_NAME` per GitHub Environment / Azure DevOps variable
346
+ group if the user's azd env names are not exactly `dev`, `qa`, and
347
+ `production`. Set `AZURE_LOCATION` when the azd template needs an
348
+ explicit region.
349
+
350
+ ### If the user asks for "zero-trust deployment"
351
+
352
+ Do **not** replicate azd. Do this instead:
353
+
354
+ 1. Inspect the app and ask only for missing critical choices (region,
355
+ target host, private networking yes/no if not obvious).
356
+ 2. Prefer an existing azd template or AVM-backed template that already
357
+ implements managed identity, RBAC-only data access, private endpoints
358
+ where required, and no secrets in source.
359
+ 3. Create or adapt `azure.yaml`, `infra/`, and azd-native hooks declared
360
+ in `azure.yaml` (`preprovision`, `postprovision`, `predeploy`,
361
+ `postdeploy`) as needed.
362
+ 4. Run `azd provision` to validate the infrastructure path.
363
+ 5. Re-run `agentops workflow generate --deploy-mode azd --force` so CI
364
+ delegates provision/deploy to azd.
365
+
366
+ Never call ad-hoc hook scripts from the workflow (for example
367
+ `./agentops/deploy.sh` or `./.azd/hooks/*`). If custom behavior is
368
+ needed, put it behind azd's native hook mechanism in `azure.yaml`.
369
+
370
+ ### Copied accelerators / AI Landing Zone apps
371
+
372
+ For Azure AI accelerators copied from templates, use AgentOps to make the
373
+ landing-zone path actionable:
374
+
375
+ 1. AgentOps owns eval gates, Doctor, reports, Cockpit readiness, and the
376
+ workflow guardrails around deployment.
377
+ 2. Foundry owns hosted agents, prompt-agent versions, evaluations, traces,
378
+ monitoring, datasets, and operations.
379
+ 3. azd/Bicep/AILZ owns app and infrastructure deploy when `azure.yaml` or
380
+ `infra/*.bicep` exists.
381
+ 4. Project-specific steps such as indexing, data seeding, model deployment,
382
+ container build/push, App Config updates, or private-network post-provision
383
+ work stay in azd hooks or existing project tooling.
384
+
385
+ If `scripts/Invoke-PreflightChecks.ps1` exists, keep it in the deployment path:
386
+ AgentOps-generated azd workflows run it with `-Strict` before `azd provision`.
387
+ Doctor surfaces the same path as `AI Landing Zone deployment readiness`, with
388
+ evidence for preflight, `agentops.yaml`, azd workflow coverage, network
389
+ isolation, and the private runner path.
390
+
391
+ If `agentops workflow analyze` reports network isolation, private endpoints,
392
+ jumpbox/Bastion, Azure Firewall, or ACR Tasks, do not assume GitHub-hosted
393
+ runners can deploy everything. Plan self-hosted runner, jumpbox handoff, or ACR
394
+ Tasks agent-pool execution before enabling DEV/QA/PROD deploy stages.
395
+
396
+ If `azure.yaml` is missing and the user is not asking to create the
397
+ deployment assets yet, check whether this is a Foundry prompt agent. If
398
+ `agentops.yaml` has `agent: "name:version"`, prefer prompt-agent mode:
399
+
400
+ ```bash
401
+ agentops workflow generate --kinds pr,dev,qa,prod --deploy-mode prompt-agent --force
402
+ ```
403
+
404
+ Prompt-agent workflows:
405
+
406
+ 1. read `prompt_file` from `agentops.yaml` or
407
+ `AGENTOPS_AGENT_PROMPT_FILE`;
408
+ 2. create or reuse a candidate Foundry prompt-agent version from that file;
409
+ 3. generate `.agentops/deployments/agentops.candidate.yaml`;
410
+ 4. run `agentops eval run` against the candidate version;
411
+ 5. record `.agentops/deployments/foundry-agent.json` as a deployment
412
+ artifact only when the gate passes.
413
+
414
+ This avoids the bad pattern of evaluating one agent version and deploying a
415
+ different prompt. The invariant is: **evaluated version == deployed version**.
416
+ Foundry manages agent versions; AgentOps owns the repo-side gate and
417
+ deployment record.
418
+
419
+ If this is not a Foundry prompt agent and azd is not ready, generate
420
+ `--kinds pr` only or use `--deploy-mode placeholder`. Do not ship
421
+ DEV/QA/PROD workflows that pretend deployment is wired.
422
+
423
+ ## Step 5 - Branch protection
424
+
425
+ In Settings → Branches, add a rule for both `develop` and `main`:
426
+
427
+ - Require a pull request before merging.
428
+ - Require status checks to pass: select **`AgentOps PR / Eval (PR gate)`**
429
+ (the job name from `agentops-pr.yml`).
430
+ - Optional: require linear history.
431
+
432
+ This makes the eval gate a hard merge requirement.
433
+
434
+ ## Step 6 - Iterate
435
+
436
+ Common follow-ups:
437
+
438
+ - **Tighten thresholds for QA/PROD** - copy `agentops.yaml` to
439
+ `agentops-qa.yaml` / `agentops-prod.yaml` and tighten the
440
+ `thresholds:` block. Point each workflow at its own config via the
441
+ `inputs.config` default.
442
+ - **Scheduled runs** - add a `schedule:` entry in `agentops-pr.yml` (or a
443
+ new `agentops-nightly.yml`) to evaluate against `main` nightly.
444
+ - **Matrix per scenario** - if the user has multiple AgentOps config files,
445
+ extend the eval job with `strategy.matrix.config:` and reference
446
+ `${{ matrix.config }}`.
447
+ - **Regression baseline** - wire the deploy templates to download the
448
+ previous run's `results.json` artifact and call
449
+ `agentops eval run --baseline <results.json>`.
450
+
451
+ ## Guardrails
452
+
453
+ - Do **not** invent CLI flags. The supported `workflow analyze` flags are
454
+ `--dir`, `--format`, and `--out`. The supported `workflow generate` flags are
455
+ `--force`, `--dir`, `--kinds`, `--platform`, `--deploy-mode`, and
456
+ `--doctor-gate`.
457
+ - Do **not** push DEV/QA/PROD deploy workflows with placeholder
458
+ Build/Deploy steps or missing OIDC variables; generate PR-only first.
459
+ - Do **not** create parallel workflow files. Prefer editing the
460
+ generated ones.
461
+ - Do **not** auto-fill app/infrastructure deployment with raw Azure CLI
462
+ steps that bypass azd. AgentOps gates; azd provisions and deploys. For
463
+ Foundry prompt agents, use `--deploy-mode prompt-agent` so the workflow
464
+ calls the Foundry SDK and evaluates the candidate version before marking
465
+ it deployed.
466
+ - Do **not** use AgentOps workflows to create or deploy Foundry Hosted Agents.
467
+ Use Foundry Toolkit / the `microsoft-foundry` skill / the app's azd path,
468
+ then point AgentOps at the deployed URL for gates and evidence.
469
+ - The four workflow names (`agentops-pr`, `agentops-deploy-dev`,
470
+ `agentops-deploy-qa`, `agentops-deploy-prod`) are fixed - don't rename
471
+ them or branch-protection wiring will break.
@@ -0,0 +1,3 @@
1
+ {"input": "Answer with exactly this sentence: Paris is the capital of France and one of Europe's major cultural centers.", "expected": "Paris is the capital of France and one of Europe's major cultural centers."}
2
+ {"input": "Answer with exactly this sentence: Mars is known as the Red Planet because iron-rich dust gives its surface a reddish color.", "expected": "Mars is known as the Red Planet because iron-rich dust gives its surface a reddish color."}
3
+ {"input": "Answer with exactly this sentence: Water has the chemical formula H2O because each molecule contains two hydrogen atoms and one oxygen atom.", "expected": "Water has the chemical formula H2O because each molecule contains two hydrogen atoms and one oxygen atom."}
@@ -0,0 +1,84 @@
1
+ # `waf-checklist.csv` — workspace WAF checklist
2
+
3
+ This CSV is a **workspace override** for the AgentOps Doctor's WAF
4
+ knowledge base. The Doctor reads `.agentops/waf-checklist.csv` on
5
+ every run; rows here either:
6
+
7
+ * **Override** rows shipped in the packaged checklist
8
+ (`agentops/agent/knowledge/waf-checklist.csv`) by matching the
9
+ `doctor_check_id` column, **or**
10
+ * **Extend** the checklist with new rows that map your own custom
11
+ finding ids onto WAF pillars.
12
+
13
+ The seed file copied into your workspace by `agentops init` ships a
14
+ **curated shortlist** drawn from the public **Microsoft Azure AI
15
+ Landing Zones Checklist** (177 items), filtered to *only* items the
16
+ Doctor verifies automatically today.
17
+
18
+ ## Curation policy
19
+
20
+ * **Automation-only.** The Doctor is an *agent*; its purpose is to
21
+ remove manual toil. The seed checklist therefore ships only items
22
+ where the Doctor already has a working, deterministic rule (or an
23
+ opt-in LLM-judged rule) that surfaces the signal without human
24
+ intervention.
25
+ * No `manual` rows, no `planned` rows. New items earn their spot
26
+ here when their rule actually runs — not as a "we will do this
27
+ later" promise that is easy to forget.
28
+ * Sourced from the public AI Landing Zones Checklist
29
+ (https://learn.microsoft.com/azure/cloud-adoption-framework/scenarios/ai/),
30
+ prioritized by severity / leverage.
31
+
32
+ ## Column reference
33
+
34
+ | Column | Meaning |
35
+ |---|---|
36
+ | `pillar` | WAF pillar — `Security`, `Reliability`, `Performance`, or `OperationalExcellence`. |
37
+ | `area` | Free-text sub-area (e.g. `Identity`, `CI-CD`, `Telemetry`). |
38
+ | `item_id` | Stable short id. Items sourced from the AI Landing Zones Checklist use `ai_lz.AI.<n>`. |
39
+ | `title` | User-facing short label. Includes the original `[AI.<n>]` reference for traceability. |
40
+ | `detection_source` | Doctor source(s) that produce this signal — `results_history`, `azure_monitor`, `foundry_control`, `azure_resources`, `workspace_files`. |
41
+ | `detection_signal` | Short description of how the rule checks the signal (programmatic predicate or LLM judge). |
42
+ | `doctor_check_id` | The finding id Doctor emits. **A row with this column empty is ignored by the loader.** |
43
+ | `status` | Always `implemented` in this file — the policy is to ship only working checks. |
44
+ | `reference_url` | Public Microsoft Learn URL for the WAF pillar. |
45
+
46
+ ## How to extend
47
+
48
+ Add a new row at the bottom of the CSV when you implement a new
49
+ deterministic check or LLM-judged rule. Example:
50
+
51
+ ```csv
52
+ OperationalExcellence,Custom,my.team.review_cadence,Monthly review cadence enforced,workspace_files,review-log file modified within last 35 days,opex.review_cadence,implemented,https://your-wiki/policies/review
53
+ ```
54
+
55
+ Two practical rules to keep in mind:
56
+
57
+ 1. **Stay strict on `doctor_check_id`** — the loader skips rows
58
+ whose `doctor_check_id` is empty. The id you put here must match
59
+ the id the rule actually emits at runtime, otherwise the WAF
60
+ citation won't show up next to the finding.
61
+ 2. **No comment lines.** CSV has no portable comment syntax; this
62
+ `README.md` is the canonical place for documentation. The Doctor's
63
+ loader currently tolerates `#`-prefixed lines (they're skipped at
64
+ the `_row_to_item` filter), but Excel / pandas / any third-party
65
+ parser will treat them as data and clutter the file.
66
+
67
+ ## Seed shortlist
68
+
69
+ 19 items, all with a working rule. Per-pillar coverage is uneven by
70
+ design — we ship what verifies automatically, not a forced 10-per-
71
+ pillar quota.
72
+
73
+ | Pillar | Implemented |
74
+ |---|---:|
75
+ | Security | 6 |
76
+ | OperationalExcellence | 7 |
77
+ | Reliability | 3 |
78
+ | Performance | 3 |
79
+ | **Total** | **19** |
80
+
81
+ The Cost pillar is **not represented** in the seed file: the Doctor
82
+ has no automated cost check today. When a cost rule lands (e.g.,
83
+ `max_tokens` enforcement parsed from `run.yaml`, or budget-alert
84
+ audit via the Cost Management API), it will be added here.
@@ -0,0 +1,22 @@
1
+ pillar,area,item_id,title,detection_source,detection_signal,doctor_check_id,status,reference_url
2
+ Security,Identity,ai_lz.AI.102,Disable local (key-based) auth on the Azure OpenAI account [AI.102],azure_resources,account.disable_local_auth == true,waf.security.local_auth_disabled,implemented,https://learn.microsoft.com/azure/well-architected/ai/security
3
+ Security,Identity,ai_lz.AI.62,Use Microsoft Entra Auth with Managed Identity instead of API keys [AI.62],azure_resources,"account.identity.type in {SystemAssigned, UserAssigned}",waf.security.managed_identity,implemented,https://learn.microsoft.com/azure/well-architected/ai/security
4
+ Security,Network,ai_lz.AI.103,Restrict access to selected virtual networks or use private endpoints [AI.103],azure_resources,account.publicNetworkAccess == Disabled or private endpoint present,waf.security.public_network_access,implemented,https://learn.microsoft.com/azure/well-architected/ai/security
5
+ Security,Telemetry,ai_lz.AI.86,Diagnostic logs enabled on Azure AI service resources [AI.86],azure_resources,account has at least one diagnostic setting with a workspace_id,waf.security.diagnostic_settings,implemented,https://learn.microsoft.com/azure/well-architected/ai/security
6
+ Security,ContentSafety,ai_lz.AI.52,Content-Safety baseline content filter attached to approved models [AI.52],azure_resources + azure_monitor,no content-filter triggers AND RAI policy attached to deployments,safety.runtime.content_filter,implemented,https://learn.microsoft.com/azure/well-architected/ai/security
7
+ Security,ContentSafety,ai_lz.AI.93,Jailbreak / prompt-injection risk detection on the agent prompt [AI.93],foundry_control,LLM scans system prompt for override-phrasing / unbounded role-play,responsible_ai.llm.prompt_jailbreak_surface,implemented,https://learn.microsoft.com/azure/well-architected/ai/security
8
+ OperationalExcellence,Foundry,ai_lz.AI.130,Enable monitoring for Azure OpenAI / Foundry instances [AI.130],foundry_control,foundry_control.diagnostics.status == ok,opex.no_foundry_control_configured,implemented,https://learn.microsoft.com/azure/well-architected/ai/operations
9
+ OperationalExcellence,CI-CD,ai_lz.AI.135,Repository has an AgentOps PR gate (IaC + eval CI) [AI.135],workspace_files,.github/workflows/agentops-pr.yml exists,opex.no_pr_gate,implemented,https://learn.microsoft.com/azure/well-architected/ai/operations
10
+ OperationalExcellence,CI-CD,ai_lz.AI.2,Repository has deploy workflows so evals re-run after deployment [AI.2],workspace_files,at least one .github/workflows/agentops-deploy-*.yml exists,opex.no_deploy_workflow,implemented,https://learn.microsoft.com/azure/well-architected/ai/operations
11
+ OperationalExcellence,Versioning,ai_lz.AI.128,Agent target pinned to a version (detect model / prompt drift) [AI.128],workspace_files,agentops.yaml agent: <name>:<version>,opex.unpinned_agent,implemented,https://learn.microsoft.com/azure/well-architected/ai/operations
12
+ OperationalExcellence,Evaluation,ai_lz.AI.172,Evaluate system with a golden dataset (regression detection) [AI.172],results_history,regression check: latest_metric - baseline_metric > threshold_drop,regression,implemented,https://learn.microsoft.com/azure/well-architected/ai/operations
13
+ OperationalExcellence,Evaluation,ai_lz.AI.147,Evaluator bundle covers groundedness / relevance / coherence for the agent [AI.147],workspace_files,LLM judges bundle YAML against agent description,opex.llm.bundle_coverage,implemented,https://learn.microsoft.com/azure/well-architected/ai/operations
14
+ Reliability,Telemetry,ai_lz.AI.115,Production telemetry is wired and emitting traces [AI.115],azure_monitor,monitor.status == ok AND request_count > 0 over lookback,errors.no_runtime_telemetry,implemented,https://learn.microsoft.com/azure/well-architected/ai/reliability
15
+ Reliability,ErrorRate,ai_lz.AI.9,Production error rate under threshold (retry + healthchecks effective) [AI.9],azure_monitor,errors / requests > rate_threshold over the lookback window,errors.production_rate,implemented,https://learn.microsoft.com/azure/well-architected/ai/reliability
16
+ Reliability,FoundryRuns,ai_lz.AI.13,Foundry agent runs not failing [AI.13],foundry_control,foundry.failure_rate > rate_threshold,errors.foundry_runs,implemented,https://learn.microsoft.com/azure/well-architected/ai/reliability
17
+ Performance,Latency,ai_lz.AI.113,p95 latency under threshold (App Insights + eval history) [AI.113],azure_monitor + results_history,p95_seconds > p95_threshold_seconds,latency,implemented,https://learn.microsoft.com/azure/well-architected/ai/performance-efficiency
18
+ Performance,Stability,ai_lz.AI.143,Eval metrics are stable across runs (no flaky judge) [AI.143],results_history,coefficient of variation across last N runs > flaky_cv_threshold,opex.flaky_metric,implemented,https://learn.microsoft.com/azure/well-architected/ai/performance-efficiency
19
+ Performance,Freshness,ai_lz.AI.114,Evaluations run regularly enough to surface drift [AI.114],results_history,latest run timestamp older than stale_after_days,opex.stale_evaluation,implemented,https://learn.microsoft.com/azure/well-architected/ai/performance-efficiency
20
+ OperationalExcellence,Monitoring,ai_lz.AI.132,Runtime emits token-usage telemetry (gen_ai.usage.*) [AI.132],azure_monitor,KQL on gen_ai.usage.input_tokens + gen_ai.usage.output_tokens; warns when request_count > 0 but token counts are zero,opex.no_token_telemetry,implemented,https://learn.microsoft.com/azure/well-architected/ai/operations
21
+ Reliability,RateLimits,ai_lz.AI.154,Azure OpenAI HTTP 429 (rate-limit) responses under threshold [AI.154],azure_monitor,KQL counts dependency calls with resultCode == 429 over the lookback window,errors.rate_limit_pressure,implemented,https://learn.microsoft.com/azure/well-architected/ai/reliability
22
+ Cost,TokenLimits,ai_lz.AI.26,max_tokens limit set on every model / evaluator configuration [AI.26],workspace_files,regex match `max_tokens:` in agentops.yaml and .agentops/bundles/*.yaml,opex.max_tokens_undefined,implemented,https://learn.microsoft.com/azure/well-architected/ai/cost-optimization