agentops-accelerator 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentops/__init__.py +10 -0
- agentops/__main__.py +6 -0
- agentops/agent/__init__.py +12 -0
- agentops/agent/_legacy_ids.py +92 -0
- agentops/agent/analyzer.py +207 -0
- agentops/agent/checks/__init__.py +1 -0
- agentops/agent/checks/catalog.py +880 -0
- agentops/agent/checks/errors.py +279 -0
- agentops/agent/checks/foundry_config.py +75 -0
- agentops/agent/checks/latency.py +84 -0
- agentops/agent/checks/opex.py +157 -0
- agentops/agent/checks/opex_workspace.py +874 -0
- agentops/agent/checks/posture.py +36 -0
- agentops/agent/checks/posture_rules/__init__.py +53 -0
- agentops/agent/checks/posture_rules/content_filter.py +59 -0
- agentops/agent/checks/posture_rules/diagnostics.py +74 -0
- agentops/agent/checks/posture_rules/local_auth.py +55 -0
- agentops/agent/checks/posture_rules/managed_identity.py +59 -0
- agentops/agent/checks/posture_rules/network.py +68 -0
- agentops/agent/checks/regression.py +78 -0
- agentops/agent/checks/release_readiness.py +182 -0
- agentops/agent/checks/safety.py +247 -0
- agentops/agent/checks/spec_conformance.py +375 -0
- agentops/agent/cockpit.py +5159 -0
- agentops/agent/config.py +240 -0
- agentops/agent/findings.py +113 -0
- agentops/agent/history.py +142 -0
- agentops/agent/knowledge/__init__.py +182 -0
- agentops/agent/knowledge/waf-checklist.csv +39 -0
- agentops/agent/llm_assist/__init__.py +16 -0
- agentops/agent/llm_assist/_base.py +124 -0
- agentops/agent/llm_assist/_bundle_rule.py +154 -0
- agentops/agent/llm_assist/_client.py +347 -0
- agentops/agent/llm_assist/_dataset_rules.py +191 -0
- agentops/agent/llm_assist/_engine.py +106 -0
- agentops/agent/llm_assist/_prompt_rules.py +291 -0
- agentops/agent/llm_assist/_spec_rules.py +235 -0
- agentops/agent/production_telemetry.py +430 -0
- agentops/agent/report.py +207 -0
- agentops/agent/server/__init__.py +1 -0
- agentops/agent/server/app.py +84 -0
- agentops/agent/server/auth.py +94 -0
- agentops/agent/server/chat.py +44 -0
- agentops/agent/server/protocol.py +72 -0
- agentops/agent/sources/__init__.py +1 -0
- agentops/agent/sources/azure_monitor.py +523 -0
- agentops/agent/sources/azure_resources.py +602 -0
- agentops/agent/sources/foundry_control.py +174 -0
- agentops/agent/sources/results_history.py +494 -0
- agentops/agent/sources/spec_detectors/__init__.py +42 -0
- agentops/agent/sources/spec_detectors/_base.py +58 -0
- agentops/agent/sources/spec_detectors/agents_md.py +75 -0
- agentops/agent/sources/spec_detectors/spec_kit.py +172 -0
- agentops/agent/time_range.py +117 -0
- agentops/cli/__init__.py +1 -0
- agentops/cli/app.py +4823 -0
- agentops/core/__init__.py +1 -0
- agentops/core/agentops_config.py +592 -0
- agentops/core/config_loader.py +22 -0
- agentops/core/evaluators.py +480 -0
- agentops/core/release_evidence.py +56 -0
- agentops/core/results.py +117 -0
- agentops/mcp/__init__.py +10 -0
- agentops/mcp/server.py +232 -0
- agentops/pipeline/__init__.py +8 -0
- agentops/pipeline/cloud_results.py +189 -0
- agentops/pipeline/cloud_runner.py +901 -0
- agentops/pipeline/comparison.py +108 -0
- agentops/pipeline/diagnostics.py +51 -0
- agentops/pipeline/invocations.py +535 -0
- agentops/pipeline/official_eval.py +414 -0
- agentops/pipeline/orchestrator.py +775 -0
- agentops/pipeline/prompt_deploy.py +377 -0
- agentops/pipeline/publisher.py +121 -0
- agentops/pipeline/reporter.py +202 -0
- agentops/pipeline/runtime.py +409 -0
- agentops/pipeline/thresholds.py +84 -0
- agentops/services/__init__.py +1 -0
- agentops/services/cicd.py +720 -0
- agentops/services/eval_analysis.py +848 -0
- agentops/services/evidence_pack.py +757 -0
- agentops/services/initializer.py +86 -0
- agentops/services/preflight.py +470 -0
- agentops/services/setup_wizard.py +709 -0
- agentops/services/skills.py +643 -0
- agentops/services/trace_promotion.py +300 -0
- agentops/services/workflow_analysis.py +1129 -0
- agentops/templates/.gitignore +15 -0
- agentops/templates/__init__.py +1 -0
- agentops/templates/agent-server/Dockerfile +23 -0
- agentops/templates/agent-server/README.md +61 -0
- agentops/templates/agent-server/main.bicep +94 -0
- agentops/templates/agent.yaml +87 -0
- agentops/templates/agentops.yaml +58 -0
- agentops/templates/foundry.svg +71 -0
- agentops/templates/icon.png +0 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +118 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +73 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +141 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +94 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +167 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +118 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +68 -0
- agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +210 -0
- agentops/templates/pipelines/azuredevops/agentops-pr.yml +155 -0
- agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +106 -0
- agentops/templates/project.gitignore +36 -0
- agentops/templates/sample-traces.jsonl +3 -0
- agentops/templates/skills/agentops-agent/SKILL.md +137 -0
- agentops/templates/skills/agentops-config/SKILL.md +113 -0
- agentops/templates/skills/agentops-dataset/SKILL.md +84 -0
- agentops/templates/skills/agentops-eval/SKILL.md +189 -0
- agentops/templates/skills/agentops-report/SKILL.md +71 -0
- agentops/templates/skills/agentops-workflow/SKILL.md +471 -0
- agentops/templates/smoke.jsonl +3 -0
- agentops/templates/waf-checklist.README.md +84 -0
- agentops/templates/waf-checklist.csv +22 -0
- agentops/templates/workflows/agentops-deploy-dev-azd.yml +166 -0
- agentops/templates/workflows/agentops-deploy-dev.yml +187 -0
- agentops/templates/workflows/agentops-deploy-prod-azd.yml +183 -0
- agentops/templates/workflows/agentops-deploy-prod.yml +171 -0
- agentops/templates/workflows/agentops-deploy-prompt-agent.yml +197 -0
- agentops/templates/workflows/agentops-deploy-qa-azd.yml +156 -0
- agentops/templates/workflows/agentops-deploy-qa.yml +145 -0
- agentops/templates/workflows/agentops-pr-prompt-agent.yml +210 -0
- agentops/templates/workflows/agentops-pr.yml +148 -0
- agentops/templates/workflows/agentops-watchdog.yml +122 -0
- agentops/utils/__init__.py +1 -0
- agentops/utils/azd_env.py +435 -0
- agentops/utils/azure_endpoints.py +62 -0
- agentops/utils/colors.py +47 -0
- agentops/utils/dotenv_loader.py +105 -0
- agentops/utils/foundry_discovery.py +229 -0
- agentops/utils/logging.py +59 -0
- agentops/utils/telemetry.py +554 -0
- agentops/utils/yaml.py +36 -0
- agentops_accelerator-0.3.0.dist-info/METADATA +278 -0
- agentops_accelerator-0.3.0.dist-info/RECORD +142 -0
- agentops_accelerator-0.3.0.dist-info/WHEEL +5 -0
- agentops_accelerator-0.3.0.dist-info/entry_points.txt +2 -0
- agentops_accelerator-0.3.0.dist-info/licenses/LICENSE +21 -0
- agentops_accelerator-0.3.0.dist-info/top_level.txt +1 -0
agentops/agent/config.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""Pydantic configuration model for the watchdog agent."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ResultsHistorySourceConfig(BaseModel):
|
|
12
|
+
model_config = ConfigDict(extra="forbid")
|
|
13
|
+
enabled: bool = True
|
|
14
|
+
path: str = ".agentops/results"
|
|
15
|
+
lookback_runs: int = Field(10, ge=2)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AzureMonitorSourceConfig(BaseModel):
|
|
19
|
+
model_config = ConfigDict(extra="forbid")
|
|
20
|
+
enabled: bool = True
|
|
21
|
+
app_insights_resource_id: Optional[str] = None
|
|
22
|
+
log_analytics_workspace_id: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FoundryControlSourceConfig(BaseModel):
|
|
26
|
+
model_config = ConfigDict(extra="forbid")
|
|
27
|
+
enabled: bool = True
|
|
28
|
+
project_endpoint: Optional[str] = None
|
|
29
|
+
project_endpoint_env: str = "AZURE_AI_FOUNDRY_PROJECT_ENDPOINT"
|
|
30
|
+
agent_ids: List[str] = Field(default_factory=list)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class AzureResourcesSourceConfig(BaseModel):
|
|
34
|
+
"""Read-only management-plane source for Azure resource posture audits.
|
|
35
|
+
|
|
36
|
+
Requires ``Reader`` (or stronger) RBAC on the resource group, and the
|
|
37
|
+
``[agent]`` extra (which pulls in ``azure-mgmt-cognitiveservices`` and
|
|
38
|
+
``azure-mgmt-monitor``).
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
model_config = ConfigDict(extra="forbid")
|
|
42
|
+
enabled: bool = True
|
|
43
|
+
subscription_id: Optional[str] = None
|
|
44
|
+
subscription_id_env: str = "AZURE_SUBSCRIPTION_ID"
|
|
45
|
+
resource_group: Optional[str] = None
|
|
46
|
+
cognitive_services_account: Optional[str] = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SourcesConfig(BaseModel):
|
|
50
|
+
model_config = ConfigDict(extra="forbid")
|
|
51
|
+
results_history: ResultsHistorySourceConfig = Field(
|
|
52
|
+
default_factory=ResultsHistorySourceConfig
|
|
53
|
+
)
|
|
54
|
+
azure_monitor: AzureMonitorSourceConfig = Field(
|
|
55
|
+
default_factory=AzureMonitorSourceConfig
|
|
56
|
+
)
|
|
57
|
+
foundry_control: FoundryControlSourceConfig = Field(
|
|
58
|
+
default_factory=FoundryControlSourceConfig
|
|
59
|
+
)
|
|
60
|
+
azure_resources: AzureResourcesSourceConfig = Field(
|
|
61
|
+
default_factory=AzureResourcesSourceConfig
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class RegressionCheckConfig(BaseModel):
|
|
66
|
+
model_config = ConfigDict(extra="forbid")
|
|
67
|
+
metrics: List[str] = Field(
|
|
68
|
+
default_factory=lambda: [
|
|
69
|
+
"coherence",
|
|
70
|
+
"fluency",
|
|
71
|
+
"similarity",
|
|
72
|
+
"f1_score",
|
|
73
|
+
"groundedness",
|
|
74
|
+
"tool_call_accuracy",
|
|
75
|
+
]
|
|
76
|
+
)
|
|
77
|
+
threshold_drop: float = Field(0.10, ge=0.0, le=1.0)
|
|
78
|
+
min_runs: int = Field(3, ge=2)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class LatencyCheckConfig(BaseModel):
|
|
82
|
+
model_config = ConfigDict(extra="forbid")
|
|
83
|
+
p95_threshold_seconds: float = Field(5.0, gt=0)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ErrorsCheckConfig(BaseModel):
|
|
87
|
+
model_config = ConfigDict(extra="forbid")
|
|
88
|
+
rate_threshold: float = Field(0.05, ge=0.0, le=1.0)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class SafetyCheckConfig(BaseModel):
|
|
92
|
+
model_config = ConfigDict(extra="forbid")
|
|
93
|
+
severity_floor: str = "Medium" # Low | Medium | High
|
|
94
|
+
min_runtime_hits: int = Field(1, ge=1)
|
|
95
|
+
runtime_critical_hits: int = Field(10, ge=1)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class PostureCheckConfig(BaseModel):
|
|
99
|
+
"""WAF-AI posture audit configuration.
|
|
100
|
+
|
|
101
|
+
The MVP rule set targets the **Security** pillar of the
|
|
102
|
+
Microsoft Well-Architected Framework for AI workloads.
|
|
103
|
+
|
|
104
|
+
The check is enabled by default. If the Azure resources source cannot
|
|
105
|
+
be discovered or read, it returns no findings and records an
|
|
106
|
+
actionable source diagnostic instead of failing the whole Doctor run.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
model_config = ConfigDict(extra="forbid")
|
|
110
|
+
enabled: bool = True
|
|
111
|
+
pillar: str = "security"
|
|
112
|
+
exclude_rules: List[str] = Field(default_factory=list)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class OpexCheckConfig(BaseModel):
|
|
116
|
+
"""Operational-excellence (time-based) check configuration."""
|
|
117
|
+
|
|
118
|
+
model_config = ConfigDict(extra="forbid")
|
|
119
|
+
enabled: bool = True
|
|
120
|
+
stale_after_days: int = Field(14, ge=1)
|
|
121
|
+
min_runs_for_flaky: int = Field(5, ge=3)
|
|
122
|
+
flaky_cv_threshold: float = Field(0.30, gt=0.0, le=1.0)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class LLMAssistCheckConfig(BaseModel):
|
|
126
|
+
"""LLM-judged advisory checks.
|
|
127
|
+
|
|
128
|
+
Enabled by default - the Doctor auto-discovers a judge model from
|
|
129
|
+
the Foundry project on first use and reuses it on subsequent runs.
|
|
130
|
+
Set ``enabled: false`` to skip the suite entirely (e.g. in
|
|
131
|
+
ephemeral CI sandboxes that have no Foundry access).
|
|
132
|
+
|
|
133
|
+
The judge model is invoked via the Foundry project's OpenAI client.
|
|
134
|
+
No new credential flow.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
model_config = ConfigDict(extra="forbid")
|
|
138
|
+
enabled: bool = True
|
|
139
|
+
deployment_name: Optional[str] = None
|
|
140
|
+
deployment_name_env: str = "AZURE_AI_MODEL_DEPLOYMENT_NAME"
|
|
141
|
+
project_endpoint_env: str = "AZURE_AI_FOUNDRY_PROJECT_ENDPOINT"
|
|
142
|
+
project_endpoint: Optional[str] = None
|
|
143
|
+
rules: List[str] = Field(default_factory=list)
|
|
144
|
+
max_dataset_rows: int = Field(50, ge=1, le=500)
|
|
145
|
+
min_confidence: float = Field(0.6, ge=0.0, le=1.0)
|
|
146
|
+
cache_ttl_days: int = Field(30, ge=0)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class LLMSpecConformanceConfig(BaseModel):
|
|
150
|
+
"""LLM gap-analysis sub-config for spec-conformance."""
|
|
151
|
+
|
|
152
|
+
model_config = ConfigDict(extra="forbid")
|
|
153
|
+
enabled: bool = False
|
|
154
|
+
severity_floor: float = Field(0.6, ge=0.0, le=1.0)
|
|
155
|
+
max_input_chars: int = Field(30_000, ge=1_000, le=200_000)
|
|
156
|
+
max_workspace_paths: int = Field(200, ge=10, le=2_000)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class SpecConformanceCheckConfig(BaseModel):
|
|
160
|
+
"""Spec-conformance sub-check under Operational Excellence.
|
|
161
|
+
|
|
162
|
+
The check inspects the workspace for spec-driven-development
|
|
163
|
+
artifacts (spec-kit ``.specify/``, ``AGENTS.md``, Copilot
|
|
164
|
+
instructions) and flags drift between the spec and the
|
|
165
|
+
implementation.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
model_config = ConfigDict(extra="forbid")
|
|
169
|
+
enabled: bool = True
|
|
170
|
+
detectors: List[str] = Field(
|
|
171
|
+
default_factory=lambda: ["spec-kit", "agents-md"]
|
|
172
|
+
)
|
|
173
|
+
stale_after_days: int = Field(30, ge=1)
|
|
174
|
+
skip: List[str] = Field(default_factory=list)
|
|
175
|
+
llm_assist: LLMSpecConformanceConfig = Field(
|
|
176
|
+
default_factory=LLMSpecConformanceConfig
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class OperationalExcellenceCheckConfig(BaseModel):
|
|
181
|
+
"""Container for Operational Excellence sub-checks."""
|
|
182
|
+
|
|
183
|
+
model_config = ConfigDict(extra="forbid")
|
|
184
|
+
spec_conformance: SpecConformanceCheckConfig = Field(
|
|
185
|
+
default_factory=SpecConformanceCheckConfig
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class ChecksConfig(BaseModel):
|
|
190
|
+
model_config = ConfigDict(extra="forbid")
|
|
191
|
+
regression: RegressionCheckConfig = Field(default_factory=RegressionCheckConfig)
|
|
192
|
+
latency: LatencyCheckConfig = Field(default_factory=LatencyCheckConfig)
|
|
193
|
+
errors: ErrorsCheckConfig = Field(default_factory=ErrorsCheckConfig)
|
|
194
|
+
safety: SafetyCheckConfig = Field(default_factory=SafetyCheckConfig)
|
|
195
|
+
posture: PostureCheckConfig = Field(default_factory=PostureCheckConfig)
|
|
196
|
+
opex: OpexCheckConfig = Field(default_factory=OpexCheckConfig)
|
|
197
|
+
operational_excellence: OperationalExcellenceCheckConfig = Field(
|
|
198
|
+
default_factory=OperationalExcellenceCheckConfig
|
|
199
|
+
)
|
|
200
|
+
llm_assist: LLMAssistCheckConfig = Field(default_factory=LLMAssistCheckConfig)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class ServerConfig(BaseModel):
|
|
204
|
+
model_config = ConfigDict(extra="forbid")
|
|
205
|
+
github_app_client_id: Optional[str] = None
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class AgentConfig(BaseModel):
|
|
209
|
+
"""Root config for ``.agentops/agent.yaml``."""
|
|
210
|
+
|
|
211
|
+
model_config = ConfigDict(extra="forbid")
|
|
212
|
+
version: int = 1
|
|
213
|
+
sources: SourcesConfig = Field(default_factory=SourcesConfig)
|
|
214
|
+
checks: ChecksConfig = Field(default_factory=ChecksConfig)
|
|
215
|
+
server: ServerConfig = Field(default_factory=ServerConfig)
|
|
216
|
+
lookback_days: int = Field(7, ge=1)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def load_agent_config(path: Optional[Path]) -> AgentConfig:
|
|
220
|
+
"""Load an :class:`AgentConfig` from a YAML file (or return defaults).
|
|
221
|
+
|
|
222
|
+
Legacy ``genaiops.*`` rule ids in ``checks.llm_assist.rules`` are
|
|
223
|
+
rewritten to their canonical ``opex.*`` equivalents with a one-shot
|
|
224
|
+
deprecation warning. See ``_legacy_ids.py`` for details.
|
|
225
|
+
"""
|
|
226
|
+
if path is None or not path.exists():
|
|
227
|
+
return AgentConfig()
|
|
228
|
+
|
|
229
|
+
from agentops.utils.yaml import load_yaml
|
|
230
|
+
|
|
231
|
+
from agentops.agent._legacy_ids import canonicalize_id_list
|
|
232
|
+
|
|
233
|
+
raw = load_yaml(path)
|
|
234
|
+
if isinstance(raw, dict):
|
|
235
|
+
checks = raw.get("checks")
|
|
236
|
+
if isinstance(checks, dict):
|
|
237
|
+
llm = checks.get("llm_assist")
|
|
238
|
+
if isinstance(llm, dict) and isinstance(llm.get("rules"), list):
|
|
239
|
+
llm["rules"] = canonicalize_id_list(llm["rules"])
|
|
240
|
+
return AgentConfig.model_validate(raw)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Severity-ranked findings produced by the watchdog agent."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Dict
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Category(str, Enum):
|
|
11
|
+
"""High-level grouping for a finding.
|
|
12
|
+
|
|
13
|
+
Categories are stable user-facing buckets used for filtering and for
|
|
14
|
+
grouping the watchdog report. They are independent of severity:
|
|
15
|
+
a `quality` finding can be `critical`, `warning`, or `info`.
|
|
16
|
+
|
|
17
|
+
Categories mirror the **Microsoft Well-Architected Framework for AI**
|
|
18
|
+
pillars and are the single source of truth shared by doctor findings,
|
|
19
|
+
``waf-checklist.csv`` (``pillar`` column), the cockpit rows, and
|
|
20
|
+
config skip-lists.
|
|
21
|
+
|
|
22
|
+
* ``quality`` - eval-driven signals (regression, content-safety)
|
|
23
|
+
* ``performance`` - latency / throughput signals
|
|
24
|
+
* ``reliability`` - error / failure signals
|
|
25
|
+
* ``operational_excellence`` - workspace + CI hygiene, Foundry config
|
|
26
|
+
audit, and spec-conformance signals (pinning, gates, drift,
|
|
27
|
+
versioning, spec-vs-implementation alignment)
|
|
28
|
+
* ``security`` - repo & identity surface beyond Foundry Compliance
|
|
29
|
+
* ``responsible_ai`` - prompt + eval-bundle heuristics for RAI practices
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
QUALITY = "quality"
|
|
33
|
+
PERFORMANCE = "performance"
|
|
34
|
+
RELIABILITY = "reliability"
|
|
35
|
+
OPERATIONAL_EXCELLENCE = "operational_excellence"
|
|
36
|
+
SECURITY = "security"
|
|
37
|
+
RESPONSIBLE_AI = "responsible_ai"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Severity(str, Enum):
|
|
41
|
+
"""Severity level for a finding."""
|
|
42
|
+
|
|
43
|
+
INFO = "info"
|
|
44
|
+
WARNING = "warning"
|
|
45
|
+
CRITICAL = "critical"
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def rank(self) -> int:
|
|
49
|
+
return _SEVERITY_RANK[self]
|
|
50
|
+
|
|
51
|
+
def __lt__(self, other: object) -> bool: # type: ignore[override]
|
|
52
|
+
if not isinstance(other, Severity):
|
|
53
|
+
return NotImplemented
|
|
54
|
+
return self.rank < other.rank
|
|
55
|
+
|
|
56
|
+
def __le__(self, other: object) -> bool: # type: ignore[override]
|
|
57
|
+
if not isinstance(other, Severity):
|
|
58
|
+
return NotImplemented
|
|
59
|
+
return self.rank <= other.rank
|
|
60
|
+
|
|
61
|
+
def __gt__(self, other: object) -> bool: # type: ignore[override]
|
|
62
|
+
if not isinstance(other, Severity):
|
|
63
|
+
return NotImplemented
|
|
64
|
+
return self.rank > other.rank
|
|
65
|
+
|
|
66
|
+
def __ge__(self, other: object) -> bool: # type: ignore[override]
|
|
67
|
+
if not isinstance(other, Severity):
|
|
68
|
+
return NotImplemented
|
|
69
|
+
return self.rank >= other.rank
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
_SEVERITY_RANK = {
|
|
73
|
+
Severity.INFO: 0,
|
|
74
|
+
Severity.WARNING: 1,
|
|
75
|
+
Severity.CRITICAL: 2,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
_SEVERITY_EMOJI = {
|
|
80
|
+
Severity.INFO: "ℹ️",
|
|
81
|
+
Severity.WARNING: "⚠️",
|
|
82
|
+
Severity.CRITICAL: "🚨",
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def severity_emoji(severity: Severity) -> str:
|
|
87
|
+
return _SEVERITY_EMOJI[severity]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class Finding:
|
|
92
|
+
"""A single observation the watchdog agent surfaces."""
|
|
93
|
+
|
|
94
|
+
id: str
|
|
95
|
+
severity: Severity
|
|
96
|
+
title: str
|
|
97
|
+
summary: str
|
|
98
|
+
recommendation: str
|
|
99
|
+
source: str
|
|
100
|
+
category: Category = Category.QUALITY
|
|
101
|
+
evidence: Dict[str, Any] = field(default_factory=dict)
|
|
102
|
+
|
|
103
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
104
|
+
return {
|
|
105
|
+
"id": self.id,
|
|
106
|
+
"severity": self.severity.value,
|
|
107
|
+
"category": self.category.value,
|
|
108
|
+
"title": self.title,
|
|
109
|
+
"summary": self.summary,
|
|
110
|
+
"recommendation": self.recommendation,
|
|
111
|
+
"source": self.source,
|
|
112
|
+
"evidence": self.evidence,
|
|
113
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Append-only analysis history for the watchdog agent.
|
|
2
|
+
|
|
3
|
+
Each ``agentops doctor`` invocation appends one JSON record to
|
|
4
|
+
``.agentops/agent/history.jsonl``. The file is the canonical local
|
|
5
|
+
storage for the cockpit (``agentops cockpit``) and for any future
|
|
6
|
+
trend-based checks. No Azure resource required.
|
|
7
|
+
|
|
8
|
+
When OpenTelemetry tracing is configured, the same record is also
|
|
9
|
+
emitted as a span (see :func:`agentops.utils.telemetry.agent_analyze_span`);
|
|
10
|
+
the local JSONL remains authoritative because it works even when
|
|
11
|
+
tracing is disabled.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from collections import Counter
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any, Dict, List, Optional
|
|
22
|
+
|
|
23
|
+
from agentops.agent.findings import Category, Finding, Severity
|
|
24
|
+
|
|
25
|
+
_HISTORY_REL_PATH = ".agentops/agent/history.jsonl"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class AnalysisRecord:
|
|
30
|
+
"""A single watchdog analysis, captured for the cockpit and trend checks."""
|
|
31
|
+
|
|
32
|
+
timestamp: str
|
|
33
|
+
findings_total: int
|
|
34
|
+
findings_by_severity: Dict[str, int]
|
|
35
|
+
findings_by_category: Dict[str, int]
|
|
36
|
+
max_severity: Optional[str]
|
|
37
|
+
sources_enabled: List[str]
|
|
38
|
+
lookback_days: Optional[int]
|
|
39
|
+
duration_seconds: Optional[float]
|
|
40
|
+
findings: List[Dict[str, Any]] = field(default_factory=list)
|
|
41
|
+
|
|
42
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
43
|
+
return {
|
|
44
|
+
"timestamp": self.timestamp,
|
|
45
|
+
"findings_total": self.findings_total,
|
|
46
|
+
"findings_by_severity": self.findings_by_severity,
|
|
47
|
+
"findings_by_category": self.findings_by_category,
|
|
48
|
+
"max_severity": self.max_severity,
|
|
49
|
+
"sources_enabled": self.sources_enabled,
|
|
50
|
+
"lookback_days": self.lookback_days,
|
|
51
|
+
"duration_seconds": self.duration_seconds,
|
|
52
|
+
"findings": self.findings,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def from_dict(cls, payload: Dict[str, Any]) -> "AnalysisRecord":
|
|
57
|
+
return cls(
|
|
58
|
+
timestamp=str(payload.get("timestamp", "")),
|
|
59
|
+
findings_total=int(payload.get("findings_total", 0)),
|
|
60
|
+
findings_by_severity=dict(payload.get("findings_by_severity") or {}),
|
|
61
|
+
findings_by_category=dict(payload.get("findings_by_category") or {}),
|
|
62
|
+
max_severity=payload.get("max_severity"),
|
|
63
|
+
sources_enabled=list(payload.get("sources_enabled") or []),
|
|
64
|
+
lookback_days=payload.get("lookback_days"),
|
|
65
|
+
duration_seconds=payload.get("duration_seconds"),
|
|
66
|
+
findings=list(payload.get("findings") or []),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def history_path(workspace: Path) -> Path:
|
|
71
|
+
"""Return the absolute path to the analysis history file."""
|
|
72
|
+
return workspace / _HISTORY_REL_PATH
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def build_record(
|
|
76
|
+
findings: List[Finding],
|
|
77
|
+
*,
|
|
78
|
+
sources_enabled: List[str],
|
|
79
|
+
lookback_days: Optional[int],
|
|
80
|
+
duration_seconds: Optional[float],
|
|
81
|
+
timestamp: Optional[datetime] = None,
|
|
82
|
+
) -> AnalysisRecord:
|
|
83
|
+
"""Reduce a finding list into a serialisable :class:`AnalysisRecord`."""
|
|
84
|
+
now = timestamp or datetime.now(timezone.utc)
|
|
85
|
+
severity_counts = Counter(f.severity.value for f in findings)
|
|
86
|
+
category_counts = Counter(f.category.value for f in findings)
|
|
87
|
+
max_severity = max(findings, key=lambda f: f.severity.rank).severity.value if findings else None
|
|
88
|
+
|
|
89
|
+
return AnalysisRecord(
|
|
90
|
+
timestamp=now.isoformat(),
|
|
91
|
+
findings_total=len(findings),
|
|
92
|
+
findings_by_severity={s.value: severity_counts.get(s.value, 0) for s in Severity},
|
|
93
|
+
findings_by_category={c.value: category_counts.get(c.value, 0) for c in Category},
|
|
94
|
+
max_severity=max_severity,
|
|
95
|
+
sources_enabled=list(sources_enabled),
|
|
96
|
+
lookback_days=lookback_days,
|
|
97
|
+
duration_seconds=duration_seconds,
|
|
98
|
+
findings=[f.to_dict() for f in findings],
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def append_analysis(workspace: Path, record: AnalysisRecord) -> Path:
|
|
103
|
+
"""Append a record to the workspace's history.jsonl. Returns the path."""
|
|
104
|
+
path = history_path(workspace)
|
|
105
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
with path.open("a", encoding="utf-8") as handle:
|
|
107
|
+
handle.write(json.dumps(record.to_dict(), ensure_ascii=False) + "\n")
|
|
108
|
+
return path
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def load_analysis_history(
|
|
112
|
+
workspace: Path,
|
|
113
|
+
*,
|
|
114
|
+
limit: Optional[int] = None,
|
|
115
|
+
) -> List[AnalysisRecord]:
|
|
116
|
+
"""Load all records (or the most recent ``limit``) from history.jsonl.
|
|
117
|
+
|
|
118
|
+
Returns an empty list when the file does not exist, so callers can
|
|
119
|
+
treat history as "best effort" without special-casing first runs.
|
|
120
|
+
Malformed lines are skipped silently rather than crashing the
|
|
121
|
+
cockpit or trend checks.
|
|
122
|
+
"""
|
|
123
|
+
path = history_path(workspace)
|
|
124
|
+
if not path.exists():
|
|
125
|
+
return []
|
|
126
|
+
|
|
127
|
+
records: List[AnalysisRecord] = []
|
|
128
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
129
|
+
for line in handle:
|
|
130
|
+
text = line.strip()
|
|
131
|
+
if not text:
|
|
132
|
+
continue
|
|
133
|
+
try:
|
|
134
|
+
payload = json.loads(text)
|
|
135
|
+
except ValueError:
|
|
136
|
+
continue
|
|
137
|
+
if isinstance(payload, dict):
|
|
138
|
+
records.append(AnalysisRecord.from_dict(payload))
|
|
139
|
+
|
|
140
|
+
if limit is not None and limit > 0:
|
|
141
|
+
records = records[-limit:]
|
|
142
|
+
return records
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""WAF AI Landing Zones knowledge base for the Doctor agent.
|
|
2
|
+
|
|
3
|
+
This package ships a CSV (`waf-checklist.csv`) that maps every Doctor
|
|
4
|
+
finding id to the Microsoft Well-Architected Framework (WAF) for AI
|
|
5
|
+
workloads pillar/area it belongs to.
|
|
6
|
+
|
|
7
|
+
The shipped CSV is the **packaged baseline**. Users can override or
|
|
8
|
+
extend it on a per-workspace basis by dropping a file at
|
|
9
|
+
``.agentops/waf-checklist.csv`` (created by ``agentops init``). The
|
|
10
|
+
loader merges the two: rows in the workspace file override packaged
|
|
11
|
+
rows when ``doctor_check_id`` matches, and add new rows when the id
|
|
12
|
+
is new. This keeps the toolkit's baseline updateable while letting
|
|
13
|
+
teams version their own rules alongside the project.
|
|
14
|
+
|
|
15
|
+
Strict scope rule: a row only exists if Doctor itself can produce
|
|
16
|
+
that finding. Items that are exclusively visible through Foundry
|
|
17
|
+
Operate -> Compliance (public network, private endpoints,
|
|
18
|
+
content-filter attachment, custom subdomain) are intentionally *not*
|
|
19
|
+
in the packaged CSV - they belong to Foundry's surface, not the
|
|
20
|
+
Doctor's.
|
|
21
|
+
|
|
22
|
+
The CSV's ``doctor_check_id`` column may carry either a fully
|
|
23
|
+
qualified finding id (``opex.no_pr_gate``) or a prefix
|
|
24
|
+
(``regression`` covers ``regression.coherence``,
|
|
25
|
+
``regression.fluency``, ...). Lookups in :func:`find_waf_item` walk
|
|
26
|
+
the dot segments from longest to shortest so the most specific row
|
|
27
|
+
wins.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import csv
|
|
33
|
+
import logging
|
|
34
|
+
from dataclasses import dataclass
|
|
35
|
+
from functools import lru_cache
|
|
36
|
+
from importlib import resources
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from typing import Dict, List, Optional
|
|
39
|
+
|
|
40
|
+
log = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
_CSV_NAME = "waf-checklist.csv"
|
|
43
|
+
WORKSPACE_OVERRIDE_PATH = Path(".agentops") / "waf-checklist.csv"
|
|
44
|
+
_EXPECTED_COLUMNS = (
|
|
45
|
+
"pillar",
|
|
46
|
+
"area",
|
|
47
|
+
"item_id",
|
|
48
|
+
"title",
|
|
49
|
+
"detection_source",
|
|
50
|
+
"detection_signal",
|
|
51
|
+
"doctor_check_id",
|
|
52
|
+
"status",
|
|
53
|
+
"reference_url",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class WAFItem:
|
|
59
|
+
"""One row of the WAF checklist."""
|
|
60
|
+
|
|
61
|
+
pillar: str
|
|
62
|
+
area: str
|
|
63
|
+
item_id: str
|
|
64
|
+
title: str
|
|
65
|
+
detection_source: str
|
|
66
|
+
detection_signal: str
|
|
67
|
+
doctor_check_id: str
|
|
68
|
+
status: str
|
|
69
|
+
reference_url: str
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _row_to_item(row: Dict[str, str]) -> Optional[WAFItem]:
|
|
73
|
+
check_id = (row.get("doctor_check_id") or "").strip()
|
|
74
|
+
if not check_id:
|
|
75
|
+
return None
|
|
76
|
+
return WAFItem(
|
|
77
|
+
pillar=(row.get("pillar") or "").strip(),
|
|
78
|
+
area=(row.get("area") or "").strip(),
|
|
79
|
+
item_id=(row.get("item_id") or "").strip(),
|
|
80
|
+
title=(row.get("title") or "").strip(),
|
|
81
|
+
detection_source=(row.get("detection_source") or "").strip(),
|
|
82
|
+
detection_signal=(row.get("detection_signal") or "").strip(),
|
|
83
|
+
doctor_check_id=check_id,
|
|
84
|
+
status=(row.get("status") or "").strip(),
|
|
85
|
+
reference_url=(row.get("reference_url") or "").strip(),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _parse_csv_text(label: str, text: str) -> List[WAFItem]:
|
|
90
|
+
reader = csv.DictReader(text.splitlines())
|
|
91
|
+
missing = [c for c in _EXPECTED_COLUMNS if c not in (reader.fieldnames or [])]
|
|
92
|
+
if missing:
|
|
93
|
+
log.warning("WAF checklist %s is missing columns: %s", label, missing)
|
|
94
|
+
return []
|
|
95
|
+
items: List[WAFItem] = []
|
|
96
|
+
for row in reader:
|
|
97
|
+
item = _row_to_item(row)
|
|
98
|
+
if item is not None:
|
|
99
|
+
items.append(item)
|
|
100
|
+
return items
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@lru_cache(maxsize=1)
|
|
104
|
+
def _packaged_items() -> List[WAFItem]:
|
|
105
|
+
"""Load the packaged baseline (cached - ships in the wheel)."""
|
|
106
|
+
try:
|
|
107
|
+
text = (
|
|
108
|
+
resources.files(__name__).joinpath(_CSV_NAME).read_text(encoding="utf-8")
|
|
109
|
+
)
|
|
110
|
+
except FileNotFoundError:
|
|
111
|
+
log.warning("Packaged WAF checklist not found")
|
|
112
|
+
return []
|
|
113
|
+
except OSError as exc:
|
|
114
|
+
log.warning("Packaged WAF checklist could not be read: %s", exc)
|
|
115
|
+
return []
|
|
116
|
+
return _parse_csv_text("packaged", text)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _workspace_items(workspace: Path) -> List[WAFItem]:
|
|
120
|
+
"""Load the workspace override file. NOT cached - users edit it in place."""
|
|
121
|
+
override = workspace / WORKSPACE_OVERRIDE_PATH
|
|
122
|
+
if not override.is_file():
|
|
123
|
+
return []
|
|
124
|
+
try:
|
|
125
|
+
text = override.read_text(encoding="utf-8")
|
|
126
|
+
except OSError as exc:
|
|
127
|
+
log.warning("Workspace WAF checklist at %s unreadable: %s", override, exc)
|
|
128
|
+
return []
|
|
129
|
+
# Strip comment lines (#-prefixed) before parsing as CSV.
|
|
130
|
+
cleaned = "\n".join(
|
|
131
|
+
line for line in text.splitlines() if not line.lstrip().startswith("#")
|
|
132
|
+
)
|
|
133
|
+
return _parse_csv_text(f"workspace ({override})", cleaned)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def load_waf_checklist(workspace: Optional[Path] = None) -> List[WAFItem]:
|
|
137
|
+
"""Load the merged WAF checklist (packaged + optional workspace override).
|
|
138
|
+
|
|
139
|
+
Workspace rows override packaged rows by ``doctor_check_id`` and
|
|
140
|
+
add new ids that the packaged file does not carry. The packaged
|
|
141
|
+
baseline alone is returned when ``workspace`` is ``None`` or has
|
|
142
|
+
no override file.
|
|
143
|
+
"""
|
|
144
|
+
items_by_id: Dict[str, WAFItem] = {
|
|
145
|
+
item.doctor_check_id: item for item in _packaged_items()
|
|
146
|
+
}
|
|
147
|
+
if workspace is not None:
|
|
148
|
+
for item in _workspace_items(workspace):
|
|
149
|
+
items_by_id[item.doctor_check_id] = item
|
|
150
|
+
return list(items_by_id.values())
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def waf_index_by_check_id(
|
|
154
|
+
workspace: Optional[Path] = None,
|
|
155
|
+
) -> Dict[str, WAFItem]:
|
|
156
|
+
"""Return a `{doctor_check_id: WAFItem}` map for quick reporter lookup."""
|
|
157
|
+
return {item.doctor_check_id: item for item in load_waf_checklist(workspace)}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def find_waf_item(
|
|
161
|
+
finding_id: str, workspace: Optional[Path] = None
|
|
162
|
+
) -> Optional[WAFItem]:
|
|
163
|
+
"""Return the WAF row matching a finding id (most specific prefix wins).
|
|
164
|
+
|
|
165
|
+
Walks the dot segments from longest to shortest. For
|
|
166
|
+
``safety.runtime.content_filter`` we try the full id first, then
|
|
167
|
+
``safety.runtime``, then ``safety``. The first match is returned.
|
|
168
|
+
|
|
169
|
+
When ``workspace`` is provided, workspace-level overrides take
|
|
170
|
+
precedence (see :func:`load_waf_checklist`).
|
|
171
|
+
"""
|
|
172
|
+
if not finding_id:
|
|
173
|
+
return None
|
|
174
|
+
index = waf_index_by_check_id(workspace)
|
|
175
|
+
parts = finding_id.split(".")
|
|
176
|
+
while parts:
|
|
177
|
+
candidate = ".".join(parts)
|
|
178
|
+
item = index.get(candidate)
|
|
179
|
+
if item is not None:
|
|
180
|
+
return item
|
|
181
|
+
parts.pop()
|
|
182
|
+
return None
|