agentops-accelerator 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. agentops/__init__.py +10 -0
  2. agentops/__main__.py +6 -0
  3. agentops/agent/__init__.py +12 -0
  4. agentops/agent/_legacy_ids.py +92 -0
  5. agentops/agent/analyzer.py +207 -0
  6. agentops/agent/checks/__init__.py +1 -0
  7. agentops/agent/checks/catalog.py +880 -0
  8. agentops/agent/checks/errors.py +279 -0
  9. agentops/agent/checks/foundry_config.py +75 -0
  10. agentops/agent/checks/latency.py +84 -0
  11. agentops/agent/checks/opex.py +157 -0
  12. agentops/agent/checks/opex_workspace.py +874 -0
  13. agentops/agent/checks/posture.py +36 -0
  14. agentops/agent/checks/posture_rules/__init__.py +53 -0
  15. agentops/agent/checks/posture_rules/content_filter.py +59 -0
  16. agentops/agent/checks/posture_rules/diagnostics.py +74 -0
  17. agentops/agent/checks/posture_rules/local_auth.py +55 -0
  18. agentops/agent/checks/posture_rules/managed_identity.py +59 -0
  19. agentops/agent/checks/posture_rules/network.py +68 -0
  20. agentops/agent/checks/regression.py +78 -0
  21. agentops/agent/checks/release_readiness.py +182 -0
  22. agentops/agent/checks/safety.py +247 -0
  23. agentops/agent/checks/spec_conformance.py +375 -0
  24. agentops/agent/cockpit.py +5159 -0
  25. agentops/agent/config.py +240 -0
  26. agentops/agent/findings.py +113 -0
  27. agentops/agent/history.py +142 -0
  28. agentops/agent/knowledge/__init__.py +182 -0
  29. agentops/agent/knowledge/waf-checklist.csv +39 -0
  30. agentops/agent/llm_assist/__init__.py +16 -0
  31. agentops/agent/llm_assist/_base.py +124 -0
  32. agentops/agent/llm_assist/_bundle_rule.py +154 -0
  33. agentops/agent/llm_assist/_client.py +347 -0
  34. agentops/agent/llm_assist/_dataset_rules.py +191 -0
  35. agentops/agent/llm_assist/_engine.py +106 -0
  36. agentops/agent/llm_assist/_prompt_rules.py +291 -0
  37. agentops/agent/llm_assist/_spec_rules.py +235 -0
  38. agentops/agent/production_telemetry.py +430 -0
  39. agentops/agent/report.py +207 -0
  40. agentops/agent/server/__init__.py +1 -0
  41. agentops/agent/server/app.py +84 -0
  42. agentops/agent/server/auth.py +94 -0
  43. agentops/agent/server/chat.py +44 -0
  44. agentops/agent/server/protocol.py +72 -0
  45. agentops/agent/sources/__init__.py +1 -0
  46. agentops/agent/sources/azure_monitor.py +523 -0
  47. agentops/agent/sources/azure_resources.py +602 -0
  48. agentops/agent/sources/foundry_control.py +174 -0
  49. agentops/agent/sources/results_history.py +494 -0
  50. agentops/agent/sources/spec_detectors/__init__.py +42 -0
  51. agentops/agent/sources/spec_detectors/_base.py +58 -0
  52. agentops/agent/sources/spec_detectors/agents_md.py +75 -0
  53. agentops/agent/sources/spec_detectors/spec_kit.py +172 -0
  54. agentops/agent/time_range.py +117 -0
  55. agentops/cli/__init__.py +1 -0
  56. agentops/cli/app.py +4823 -0
  57. agentops/core/__init__.py +1 -0
  58. agentops/core/agentops_config.py +592 -0
  59. agentops/core/config_loader.py +22 -0
  60. agentops/core/evaluators.py +480 -0
  61. agentops/core/release_evidence.py +56 -0
  62. agentops/core/results.py +117 -0
  63. agentops/mcp/__init__.py +10 -0
  64. agentops/mcp/server.py +232 -0
  65. agentops/pipeline/__init__.py +8 -0
  66. agentops/pipeline/cloud_results.py +189 -0
  67. agentops/pipeline/cloud_runner.py +901 -0
  68. agentops/pipeline/comparison.py +108 -0
  69. agentops/pipeline/diagnostics.py +51 -0
  70. agentops/pipeline/invocations.py +535 -0
  71. agentops/pipeline/official_eval.py +414 -0
  72. agentops/pipeline/orchestrator.py +775 -0
  73. agentops/pipeline/prompt_deploy.py +377 -0
  74. agentops/pipeline/publisher.py +121 -0
  75. agentops/pipeline/reporter.py +202 -0
  76. agentops/pipeline/runtime.py +409 -0
  77. agentops/pipeline/thresholds.py +84 -0
  78. agentops/services/__init__.py +1 -0
  79. agentops/services/cicd.py +720 -0
  80. agentops/services/eval_analysis.py +848 -0
  81. agentops/services/evidence_pack.py +757 -0
  82. agentops/services/initializer.py +86 -0
  83. agentops/services/preflight.py +470 -0
  84. agentops/services/setup_wizard.py +709 -0
  85. agentops/services/skills.py +643 -0
  86. agentops/services/trace_promotion.py +300 -0
  87. agentops/services/workflow_analysis.py +1129 -0
  88. agentops/templates/.gitignore +15 -0
  89. agentops/templates/__init__.py +1 -0
  90. agentops/templates/agent-server/Dockerfile +23 -0
  91. agentops/templates/agent-server/README.md +61 -0
  92. agentops/templates/agent-server/main.bicep +94 -0
  93. agentops/templates/agent.yaml +87 -0
  94. agentops/templates/agentops.yaml +58 -0
  95. agentops/templates/foundry.svg +71 -0
  96. agentops/templates/icon.png +0 -0
  97. agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +118 -0
  98. agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +73 -0
  99. agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +141 -0
  100. agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +94 -0
  101. agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +167 -0
  102. agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +118 -0
  103. agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +68 -0
  104. agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +210 -0
  105. agentops/templates/pipelines/azuredevops/agentops-pr.yml +155 -0
  106. agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +106 -0
  107. agentops/templates/project.gitignore +36 -0
  108. agentops/templates/sample-traces.jsonl +3 -0
  109. agentops/templates/skills/agentops-agent/SKILL.md +137 -0
  110. agentops/templates/skills/agentops-config/SKILL.md +113 -0
  111. agentops/templates/skills/agentops-dataset/SKILL.md +84 -0
  112. agentops/templates/skills/agentops-eval/SKILL.md +189 -0
  113. agentops/templates/skills/agentops-report/SKILL.md +71 -0
  114. agentops/templates/skills/agentops-workflow/SKILL.md +471 -0
  115. agentops/templates/smoke.jsonl +3 -0
  116. agentops/templates/waf-checklist.README.md +84 -0
  117. agentops/templates/waf-checklist.csv +22 -0
  118. agentops/templates/workflows/agentops-deploy-dev-azd.yml +166 -0
  119. agentops/templates/workflows/agentops-deploy-dev.yml +187 -0
  120. agentops/templates/workflows/agentops-deploy-prod-azd.yml +183 -0
  121. agentops/templates/workflows/agentops-deploy-prod.yml +171 -0
  122. agentops/templates/workflows/agentops-deploy-prompt-agent.yml +197 -0
  123. agentops/templates/workflows/agentops-deploy-qa-azd.yml +156 -0
  124. agentops/templates/workflows/agentops-deploy-qa.yml +145 -0
  125. agentops/templates/workflows/agentops-pr-prompt-agent.yml +210 -0
  126. agentops/templates/workflows/agentops-pr.yml +148 -0
  127. agentops/templates/workflows/agentops-watchdog.yml +122 -0
  128. agentops/utils/__init__.py +1 -0
  129. agentops/utils/azd_env.py +435 -0
  130. agentops/utils/azure_endpoints.py +62 -0
  131. agentops/utils/colors.py +47 -0
  132. agentops/utils/dotenv_loader.py +105 -0
  133. agentops/utils/foundry_discovery.py +229 -0
  134. agentops/utils/logging.py +59 -0
  135. agentops/utils/telemetry.py +554 -0
  136. agentops/utils/yaml.py +36 -0
  137. agentops_accelerator-0.3.0.dist-info/METADATA +278 -0
  138. agentops_accelerator-0.3.0.dist-info/RECORD +142 -0
  139. agentops_accelerator-0.3.0.dist-info/WHEEL +5 -0
  140. agentops_accelerator-0.3.0.dist-info/entry_points.txt +2 -0
  141. agentops_accelerator-0.3.0.dist-info/licenses/LICENSE +21 -0
  142. agentops_accelerator-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,240 @@
1
+ """Pydantic configuration model for the watchdog agent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import List, Optional
7
+
8
+ from pydantic import BaseModel, ConfigDict, Field
9
+
10
+
11
+ class ResultsHistorySourceConfig(BaseModel):
12
+ model_config = ConfigDict(extra="forbid")
13
+ enabled: bool = True
14
+ path: str = ".agentops/results"
15
+ lookback_runs: int = Field(10, ge=2)
16
+
17
+
18
+ class AzureMonitorSourceConfig(BaseModel):
19
+ model_config = ConfigDict(extra="forbid")
20
+ enabled: bool = True
21
+ app_insights_resource_id: Optional[str] = None
22
+ log_analytics_workspace_id: Optional[str] = None
23
+
24
+
25
+ class FoundryControlSourceConfig(BaseModel):
26
+ model_config = ConfigDict(extra="forbid")
27
+ enabled: bool = True
28
+ project_endpoint: Optional[str] = None
29
+ project_endpoint_env: str = "AZURE_AI_FOUNDRY_PROJECT_ENDPOINT"
30
+ agent_ids: List[str] = Field(default_factory=list)
31
+
32
+
33
+ class AzureResourcesSourceConfig(BaseModel):
34
+ """Read-only management-plane source for Azure resource posture audits.
35
+
36
+ Requires ``Reader`` (or stronger) RBAC on the resource group, and the
37
+ ``[agent]`` extra (which pulls in ``azure-mgmt-cognitiveservices`` and
38
+ ``azure-mgmt-monitor``).
39
+ """
40
+
41
+ model_config = ConfigDict(extra="forbid")
42
+ enabled: bool = True
43
+ subscription_id: Optional[str] = None
44
+ subscription_id_env: str = "AZURE_SUBSCRIPTION_ID"
45
+ resource_group: Optional[str] = None
46
+ cognitive_services_account: Optional[str] = None
47
+
48
+
49
+ class SourcesConfig(BaseModel):
50
+ model_config = ConfigDict(extra="forbid")
51
+ results_history: ResultsHistorySourceConfig = Field(
52
+ default_factory=ResultsHistorySourceConfig
53
+ )
54
+ azure_monitor: AzureMonitorSourceConfig = Field(
55
+ default_factory=AzureMonitorSourceConfig
56
+ )
57
+ foundry_control: FoundryControlSourceConfig = Field(
58
+ default_factory=FoundryControlSourceConfig
59
+ )
60
+ azure_resources: AzureResourcesSourceConfig = Field(
61
+ default_factory=AzureResourcesSourceConfig
62
+ )
63
+
64
+
65
+ class RegressionCheckConfig(BaseModel):
66
+ model_config = ConfigDict(extra="forbid")
67
+ metrics: List[str] = Field(
68
+ default_factory=lambda: [
69
+ "coherence",
70
+ "fluency",
71
+ "similarity",
72
+ "f1_score",
73
+ "groundedness",
74
+ "tool_call_accuracy",
75
+ ]
76
+ )
77
+ threshold_drop: float = Field(0.10, ge=0.0, le=1.0)
78
+ min_runs: int = Field(3, ge=2)
79
+
80
+
81
+ class LatencyCheckConfig(BaseModel):
82
+ model_config = ConfigDict(extra="forbid")
83
+ p95_threshold_seconds: float = Field(5.0, gt=0)
84
+
85
+
86
+ class ErrorsCheckConfig(BaseModel):
87
+ model_config = ConfigDict(extra="forbid")
88
+ rate_threshold: float = Field(0.05, ge=0.0, le=1.0)
89
+
90
+
91
+ class SafetyCheckConfig(BaseModel):
92
+ model_config = ConfigDict(extra="forbid")
93
+ severity_floor: str = "Medium" # Low | Medium | High
94
+ min_runtime_hits: int = Field(1, ge=1)
95
+ runtime_critical_hits: int = Field(10, ge=1)
96
+
97
+
98
+ class PostureCheckConfig(BaseModel):
99
+ """WAF-AI posture audit configuration.
100
+
101
+ The MVP rule set targets the **Security** pillar of the
102
+ Microsoft Well-Architected Framework for AI workloads.
103
+
104
+ The check is enabled by default. If the Azure resources source cannot
105
+ be discovered or read, it returns no findings and records an
106
+ actionable source diagnostic instead of failing the whole Doctor run.
107
+ """
108
+
109
+ model_config = ConfigDict(extra="forbid")
110
+ enabled: bool = True
111
+ pillar: str = "security"
112
+ exclude_rules: List[str] = Field(default_factory=list)
113
+
114
+
115
+ class OpexCheckConfig(BaseModel):
116
+ """Operational-excellence (time-based) check configuration."""
117
+
118
+ model_config = ConfigDict(extra="forbid")
119
+ enabled: bool = True
120
+ stale_after_days: int = Field(14, ge=1)
121
+ min_runs_for_flaky: int = Field(5, ge=3)
122
+ flaky_cv_threshold: float = Field(0.30, gt=0.0, le=1.0)
123
+
124
+
125
+ class LLMAssistCheckConfig(BaseModel):
126
+ """LLM-judged advisory checks.
127
+
128
+ Enabled by default - the Doctor auto-discovers a judge model from
129
+ the Foundry project on first use and reuses it on subsequent runs.
130
+ Set ``enabled: false`` to skip the suite entirely (e.g. in
131
+ ephemeral CI sandboxes that have no Foundry access).
132
+
133
+ The judge model is invoked via the Foundry project's OpenAI client.
134
+ No new credential flow.
135
+ """
136
+
137
+ model_config = ConfigDict(extra="forbid")
138
+ enabled: bool = True
139
+ deployment_name: Optional[str] = None
140
+ deployment_name_env: str = "AZURE_AI_MODEL_DEPLOYMENT_NAME"
141
+ project_endpoint_env: str = "AZURE_AI_FOUNDRY_PROJECT_ENDPOINT"
142
+ project_endpoint: Optional[str] = None
143
+ rules: List[str] = Field(default_factory=list)
144
+ max_dataset_rows: int = Field(50, ge=1, le=500)
145
+ min_confidence: float = Field(0.6, ge=0.0, le=1.0)
146
+ cache_ttl_days: int = Field(30, ge=0)
147
+
148
+
149
+ class LLMSpecConformanceConfig(BaseModel):
150
+ """LLM gap-analysis sub-config for spec-conformance."""
151
+
152
+ model_config = ConfigDict(extra="forbid")
153
+ enabled: bool = False
154
+ severity_floor: float = Field(0.6, ge=0.0, le=1.0)
155
+ max_input_chars: int = Field(30_000, ge=1_000, le=200_000)
156
+ max_workspace_paths: int = Field(200, ge=10, le=2_000)
157
+
158
+
159
+ class SpecConformanceCheckConfig(BaseModel):
160
+ """Spec-conformance sub-check under Operational Excellence.
161
+
162
+ The check inspects the workspace for spec-driven-development
163
+ artifacts (spec-kit ``.specify/``, ``AGENTS.md``, Copilot
164
+ instructions) and flags drift between the spec and the
165
+ implementation.
166
+ """
167
+
168
+ model_config = ConfigDict(extra="forbid")
169
+ enabled: bool = True
170
+ detectors: List[str] = Field(
171
+ default_factory=lambda: ["spec-kit", "agents-md"]
172
+ )
173
+ stale_after_days: int = Field(30, ge=1)
174
+ skip: List[str] = Field(default_factory=list)
175
+ llm_assist: LLMSpecConformanceConfig = Field(
176
+ default_factory=LLMSpecConformanceConfig
177
+ )
178
+
179
+
180
+ class OperationalExcellenceCheckConfig(BaseModel):
181
+ """Container for Operational Excellence sub-checks."""
182
+
183
+ model_config = ConfigDict(extra="forbid")
184
+ spec_conformance: SpecConformanceCheckConfig = Field(
185
+ default_factory=SpecConformanceCheckConfig
186
+ )
187
+
188
+
189
+ class ChecksConfig(BaseModel):
190
+ model_config = ConfigDict(extra="forbid")
191
+ regression: RegressionCheckConfig = Field(default_factory=RegressionCheckConfig)
192
+ latency: LatencyCheckConfig = Field(default_factory=LatencyCheckConfig)
193
+ errors: ErrorsCheckConfig = Field(default_factory=ErrorsCheckConfig)
194
+ safety: SafetyCheckConfig = Field(default_factory=SafetyCheckConfig)
195
+ posture: PostureCheckConfig = Field(default_factory=PostureCheckConfig)
196
+ opex: OpexCheckConfig = Field(default_factory=OpexCheckConfig)
197
+ operational_excellence: OperationalExcellenceCheckConfig = Field(
198
+ default_factory=OperationalExcellenceCheckConfig
199
+ )
200
+ llm_assist: LLMAssistCheckConfig = Field(default_factory=LLMAssistCheckConfig)
201
+
202
+
203
+ class ServerConfig(BaseModel):
204
+ model_config = ConfigDict(extra="forbid")
205
+ github_app_client_id: Optional[str] = None
206
+
207
+
208
+ class AgentConfig(BaseModel):
209
+ """Root config for ``.agentops/agent.yaml``."""
210
+
211
+ model_config = ConfigDict(extra="forbid")
212
+ version: int = 1
213
+ sources: SourcesConfig = Field(default_factory=SourcesConfig)
214
+ checks: ChecksConfig = Field(default_factory=ChecksConfig)
215
+ server: ServerConfig = Field(default_factory=ServerConfig)
216
+ lookback_days: int = Field(7, ge=1)
217
+
218
+
219
+ def load_agent_config(path: Optional[Path]) -> AgentConfig:
220
+ """Load an :class:`AgentConfig` from a YAML file (or return defaults).
221
+
222
+ Legacy ``genaiops.*`` rule ids in ``checks.llm_assist.rules`` are
223
+ rewritten to their canonical ``opex.*`` equivalents with a one-shot
224
+ deprecation warning. See ``_legacy_ids.py`` for details.
225
+ """
226
+ if path is None or not path.exists():
227
+ return AgentConfig()
228
+
229
+ from agentops.utils.yaml import load_yaml
230
+
231
+ from agentops.agent._legacy_ids import canonicalize_id_list
232
+
233
+ raw = load_yaml(path)
234
+ if isinstance(raw, dict):
235
+ checks = raw.get("checks")
236
+ if isinstance(checks, dict):
237
+ llm = checks.get("llm_assist")
238
+ if isinstance(llm, dict) and isinstance(llm.get("rules"), list):
239
+ llm["rules"] = canonicalize_id_list(llm["rules"])
240
+ return AgentConfig.model_validate(raw)
@@ -0,0 +1,113 @@
1
+ """Severity-ranked findings produced by the watchdog agent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from enum import Enum
7
+ from typing import Any, Dict
8
+
9
+
10
+ class Category(str, Enum):
11
+ """High-level grouping for a finding.
12
+
13
+ Categories are stable user-facing buckets used for filtering and for
14
+ grouping the watchdog report. They are independent of severity:
15
+ a `quality` finding can be `critical`, `warning`, or `info`.
16
+
17
+ Categories mirror the **Microsoft Well-Architected Framework for AI**
18
+ pillars and are the single source of truth shared by doctor findings,
19
+ ``waf-checklist.csv`` (``pillar`` column), the cockpit rows, and
20
+ config skip-lists.
21
+
22
+ * ``quality`` - eval-driven signals (regression, content-safety)
23
+ * ``performance`` - latency / throughput signals
24
+ * ``reliability`` - error / failure signals
25
+ * ``operational_excellence`` - workspace + CI hygiene, Foundry config
26
+ audit, and spec-conformance signals (pinning, gates, drift,
27
+ versioning, spec-vs-implementation alignment)
28
+ * ``security`` - repo & identity surface beyond Foundry Compliance
29
+ * ``responsible_ai`` - prompt + eval-bundle heuristics for RAI practices
30
+ """
31
+
32
+ QUALITY = "quality"
33
+ PERFORMANCE = "performance"
34
+ RELIABILITY = "reliability"
35
+ OPERATIONAL_EXCELLENCE = "operational_excellence"
36
+ SECURITY = "security"
37
+ RESPONSIBLE_AI = "responsible_ai"
38
+
39
+
40
+ class Severity(str, Enum):
41
+ """Severity level for a finding."""
42
+
43
+ INFO = "info"
44
+ WARNING = "warning"
45
+ CRITICAL = "critical"
46
+
47
+ @property
48
+ def rank(self) -> int:
49
+ return _SEVERITY_RANK[self]
50
+
51
+ def __lt__(self, other: object) -> bool: # type: ignore[override]
52
+ if not isinstance(other, Severity):
53
+ return NotImplemented
54
+ return self.rank < other.rank
55
+
56
+ def __le__(self, other: object) -> bool: # type: ignore[override]
57
+ if not isinstance(other, Severity):
58
+ return NotImplemented
59
+ return self.rank <= other.rank
60
+
61
+ def __gt__(self, other: object) -> bool: # type: ignore[override]
62
+ if not isinstance(other, Severity):
63
+ return NotImplemented
64
+ return self.rank > other.rank
65
+
66
+ def __ge__(self, other: object) -> bool: # type: ignore[override]
67
+ if not isinstance(other, Severity):
68
+ return NotImplemented
69
+ return self.rank >= other.rank
70
+
71
+
72
+ _SEVERITY_RANK = {
73
+ Severity.INFO: 0,
74
+ Severity.WARNING: 1,
75
+ Severity.CRITICAL: 2,
76
+ }
77
+
78
+
79
+ _SEVERITY_EMOJI = {
80
+ Severity.INFO: "ℹ️",
81
+ Severity.WARNING: "⚠️",
82
+ Severity.CRITICAL: "🚨",
83
+ }
84
+
85
+
86
+ def severity_emoji(severity: Severity) -> str:
87
+ return _SEVERITY_EMOJI[severity]
88
+
89
+
90
+ @dataclass
91
+ class Finding:
92
+ """A single observation the watchdog agent surfaces."""
93
+
94
+ id: str
95
+ severity: Severity
96
+ title: str
97
+ summary: str
98
+ recommendation: str
99
+ source: str
100
+ category: Category = Category.QUALITY
101
+ evidence: Dict[str, Any] = field(default_factory=dict)
102
+
103
+ def to_dict(self) -> Dict[str, Any]:
104
+ return {
105
+ "id": self.id,
106
+ "severity": self.severity.value,
107
+ "category": self.category.value,
108
+ "title": self.title,
109
+ "summary": self.summary,
110
+ "recommendation": self.recommendation,
111
+ "source": self.source,
112
+ "evidence": self.evidence,
113
+ }
@@ -0,0 +1,142 @@
1
+ """Append-only analysis history for the watchdog agent.
2
+
3
+ Each ``agentops doctor`` invocation appends one JSON record to
4
+ ``.agentops/agent/history.jsonl``. The file is the canonical local
5
+ storage for the cockpit (``agentops cockpit``) and for any future
6
+ trend-based checks. No Azure resource required.
7
+
8
+ When OpenTelemetry tracing is configured, the same record is also
9
+ emitted as a span (see :func:`agentops.utils.telemetry.agent_analyze_span`);
10
+ the local JSONL remains authoritative because it works even when
11
+ tracing is disabled.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ from collections import Counter
18
+ from dataclasses import dataclass, field
19
+ from datetime import datetime, timezone
20
+ from pathlib import Path
21
+ from typing import Any, Dict, List, Optional
22
+
23
+ from agentops.agent.findings import Category, Finding, Severity
24
+
25
+ _HISTORY_REL_PATH = ".agentops/agent/history.jsonl"
26
+
27
+
28
+ @dataclass
29
+ class AnalysisRecord:
30
+ """A single watchdog analysis, captured for the cockpit and trend checks."""
31
+
32
+ timestamp: str
33
+ findings_total: int
34
+ findings_by_severity: Dict[str, int]
35
+ findings_by_category: Dict[str, int]
36
+ max_severity: Optional[str]
37
+ sources_enabled: List[str]
38
+ lookback_days: Optional[int]
39
+ duration_seconds: Optional[float]
40
+ findings: List[Dict[str, Any]] = field(default_factory=list)
41
+
42
+ def to_dict(self) -> Dict[str, Any]:
43
+ return {
44
+ "timestamp": self.timestamp,
45
+ "findings_total": self.findings_total,
46
+ "findings_by_severity": self.findings_by_severity,
47
+ "findings_by_category": self.findings_by_category,
48
+ "max_severity": self.max_severity,
49
+ "sources_enabled": self.sources_enabled,
50
+ "lookback_days": self.lookback_days,
51
+ "duration_seconds": self.duration_seconds,
52
+ "findings": self.findings,
53
+ }
54
+
55
+ @classmethod
56
+ def from_dict(cls, payload: Dict[str, Any]) -> "AnalysisRecord":
57
+ return cls(
58
+ timestamp=str(payload.get("timestamp", "")),
59
+ findings_total=int(payload.get("findings_total", 0)),
60
+ findings_by_severity=dict(payload.get("findings_by_severity") or {}),
61
+ findings_by_category=dict(payload.get("findings_by_category") or {}),
62
+ max_severity=payload.get("max_severity"),
63
+ sources_enabled=list(payload.get("sources_enabled") or []),
64
+ lookback_days=payload.get("lookback_days"),
65
+ duration_seconds=payload.get("duration_seconds"),
66
+ findings=list(payload.get("findings") or []),
67
+ )
68
+
69
+
70
+ def history_path(workspace: Path) -> Path:
71
+ """Return the absolute path to the analysis history file."""
72
+ return workspace / _HISTORY_REL_PATH
73
+
74
+
75
+ def build_record(
76
+ findings: List[Finding],
77
+ *,
78
+ sources_enabled: List[str],
79
+ lookback_days: Optional[int],
80
+ duration_seconds: Optional[float],
81
+ timestamp: Optional[datetime] = None,
82
+ ) -> AnalysisRecord:
83
+ """Reduce a finding list into a serialisable :class:`AnalysisRecord`."""
84
+ now = timestamp or datetime.now(timezone.utc)
85
+ severity_counts = Counter(f.severity.value for f in findings)
86
+ category_counts = Counter(f.category.value for f in findings)
87
+ max_severity = max(findings, key=lambda f: f.severity.rank).severity.value if findings else None
88
+
89
+ return AnalysisRecord(
90
+ timestamp=now.isoformat(),
91
+ findings_total=len(findings),
92
+ findings_by_severity={s.value: severity_counts.get(s.value, 0) for s in Severity},
93
+ findings_by_category={c.value: category_counts.get(c.value, 0) for c in Category},
94
+ max_severity=max_severity,
95
+ sources_enabled=list(sources_enabled),
96
+ lookback_days=lookback_days,
97
+ duration_seconds=duration_seconds,
98
+ findings=[f.to_dict() for f in findings],
99
+ )
100
+
101
+
102
+ def append_analysis(workspace: Path, record: AnalysisRecord) -> Path:
103
+ """Append a record to the workspace's history.jsonl. Returns the path."""
104
+ path = history_path(workspace)
105
+ path.parent.mkdir(parents=True, exist_ok=True)
106
+ with path.open("a", encoding="utf-8") as handle:
107
+ handle.write(json.dumps(record.to_dict(), ensure_ascii=False) + "\n")
108
+ return path
109
+
110
+
111
+ def load_analysis_history(
112
+ workspace: Path,
113
+ *,
114
+ limit: Optional[int] = None,
115
+ ) -> List[AnalysisRecord]:
116
+ """Load all records (or the most recent ``limit``) from history.jsonl.
117
+
118
+ Returns an empty list when the file does not exist, so callers can
119
+ treat history as "best effort" without special-casing first runs.
120
+ Malformed lines are skipped silently rather than crashing the
121
+ cockpit or trend checks.
122
+ """
123
+ path = history_path(workspace)
124
+ if not path.exists():
125
+ return []
126
+
127
+ records: List[AnalysisRecord] = []
128
+ with path.open("r", encoding="utf-8") as handle:
129
+ for line in handle:
130
+ text = line.strip()
131
+ if not text:
132
+ continue
133
+ try:
134
+ payload = json.loads(text)
135
+ except ValueError:
136
+ continue
137
+ if isinstance(payload, dict):
138
+ records.append(AnalysisRecord.from_dict(payload))
139
+
140
+ if limit is not None and limit > 0:
141
+ records = records[-limit:]
142
+ return records
@@ -0,0 +1,182 @@
1
+ """WAF AI Landing Zones knowledge base for the Doctor agent.
2
+
3
+ This package ships a CSV (`waf-checklist.csv`) that maps every Doctor
4
+ finding id to the Microsoft Well-Architected Framework (WAF) for AI
5
+ workloads pillar/area it belongs to.
6
+
7
+ The shipped CSV is the **packaged baseline**. Users can override or
8
+ extend it on a per-workspace basis by dropping a file at
9
+ ``.agentops/waf-checklist.csv`` (created by ``agentops init``). The
10
+ loader merges the two: rows in the workspace file override packaged
11
+ rows when ``doctor_check_id`` matches, and add new rows when the id
12
+ is new. This keeps the toolkit's baseline updateable while letting
13
+ teams version their own rules alongside the project.
14
+
15
+ Strict scope rule: a row only exists if Doctor itself can produce
16
+ that finding. Items that are exclusively visible through Foundry
17
+ Operate -> Compliance (public network, private endpoints,
18
+ content-filter attachment, custom subdomain) are intentionally *not*
19
+ in the packaged CSV - they belong to Foundry's surface, not the
20
+ Doctor's.
21
+
22
+ The CSV's ``doctor_check_id`` column may carry either a fully
23
+ qualified finding id (``opex.no_pr_gate``) or a prefix
24
+ (``regression`` covers ``regression.coherence``,
25
+ ``regression.fluency``, ...). Lookups in :func:`find_waf_item` walk
26
+ the dot segments from longest to shortest so the most specific row
27
+ wins.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import csv
33
+ import logging
34
+ from dataclasses import dataclass
35
+ from functools import lru_cache
36
+ from importlib import resources
37
+ from pathlib import Path
38
+ from typing import Dict, List, Optional
39
+
40
+ log = logging.getLogger(__name__)
41
+
42
+ _CSV_NAME = "waf-checklist.csv"
43
+ WORKSPACE_OVERRIDE_PATH = Path(".agentops") / "waf-checklist.csv"
44
+ _EXPECTED_COLUMNS = (
45
+ "pillar",
46
+ "area",
47
+ "item_id",
48
+ "title",
49
+ "detection_source",
50
+ "detection_signal",
51
+ "doctor_check_id",
52
+ "status",
53
+ "reference_url",
54
+ )
55
+
56
+
57
+ @dataclass(frozen=True)
58
+ class WAFItem:
59
+ """One row of the WAF checklist."""
60
+
61
+ pillar: str
62
+ area: str
63
+ item_id: str
64
+ title: str
65
+ detection_source: str
66
+ detection_signal: str
67
+ doctor_check_id: str
68
+ status: str
69
+ reference_url: str
70
+
71
+
72
+ def _row_to_item(row: Dict[str, str]) -> Optional[WAFItem]:
73
+ check_id = (row.get("doctor_check_id") or "").strip()
74
+ if not check_id:
75
+ return None
76
+ return WAFItem(
77
+ pillar=(row.get("pillar") or "").strip(),
78
+ area=(row.get("area") or "").strip(),
79
+ item_id=(row.get("item_id") or "").strip(),
80
+ title=(row.get("title") or "").strip(),
81
+ detection_source=(row.get("detection_source") or "").strip(),
82
+ detection_signal=(row.get("detection_signal") or "").strip(),
83
+ doctor_check_id=check_id,
84
+ status=(row.get("status") or "").strip(),
85
+ reference_url=(row.get("reference_url") or "").strip(),
86
+ )
87
+
88
+
89
+ def _parse_csv_text(label: str, text: str) -> List[WAFItem]:
90
+ reader = csv.DictReader(text.splitlines())
91
+ missing = [c for c in _EXPECTED_COLUMNS if c not in (reader.fieldnames or [])]
92
+ if missing:
93
+ log.warning("WAF checklist %s is missing columns: %s", label, missing)
94
+ return []
95
+ items: List[WAFItem] = []
96
+ for row in reader:
97
+ item = _row_to_item(row)
98
+ if item is not None:
99
+ items.append(item)
100
+ return items
101
+
102
+
103
+ @lru_cache(maxsize=1)
104
+ def _packaged_items() -> List[WAFItem]:
105
+ """Load the packaged baseline (cached - ships in the wheel)."""
106
+ try:
107
+ text = (
108
+ resources.files(__name__).joinpath(_CSV_NAME).read_text(encoding="utf-8")
109
+ )
110
+ except FileNotFoundError:
111
+ log.warning("Packaged WAF checklist not found")
112
+ return []
113
+ except OSError as exc:
114
+ log.warning("Packaged WAF checklist could not be read: %s", exc)
115
+ return []
116
+ return _parse_csv_text("packaged", text)
117
+
118
+
119
+ def _workspace_items(workspace: Path) -> List[WAFItem]:
120
+ """Load the workspace override file. NOT cached - users edit it in place."""
121
+ override = workspace / WORKSPACE_OVERRIDE_PATH
122
+ if not override.is_file():
123
+ return []
124
+ try:
125
+ text = override.read_text(encoding="utf-8")
126
+ except OSError as exc:
127
+ log.warning("Workspace WAF checklist at %s unreadable: %s", override, exc)
128
+ return []
129
+ # Strip comment lines (#-prefixed) before parsing as CSV.
130
+ cleaned = "\n".join(
131
+ line for line in text.splitlines() if not line.lstrip().startswith("#")
132
+ )
133
+ return _parse_csv_text(f"workspace ({override})", cleaned)
134
+
135
+
136
+ def load_waf_checklist(workspace: Optional[Path] = None) -> List[WAFItem]:
137
+ """Load the merged WAF checklist (packaged + optional workspace override).
138
+
139
+ Workspace rows override packaged rows by ``doctor_check_id`` and
140
+ add new ids that the packaged file does not carry. The packaged
141
+ baseline alone is returned when ``workspace`` is ``None`` or has
142
+ no override file.
143
+ """
144
+ items_by_id: Dict[str, WAFItem] = {
145
+ item.doctor_check_id: item for item in _packaged_items()
146
+ }
147
+ if workspace is not None:
148
+ for item in _workspace_items(workspace):
149
+ items_by_id[item.doctor_check_id] = item
150
+ return list(items_by_id.values())
151
+
152
+
153
+ def waf_index_by_check_id(
154
+ workspace: Optional[Path] = None,
155
+ ) -> Dict[str, WAFItem]:
156
+ """Return a `{doctor_check_id: WAFItem}` map for quick reporter lookup."""
157
+ return {item.doctor_check_id: item for item in load_waf_checklist(workspace)}
158
+
159
+
160
+ def find_waf_item(
161
+ finding_id: str, workspace: Optional[Path] = None
162
+ ) -> Optional[WAFItem]:
163
+ """Return the WAF row matching a finding id (most specific prefix wins).
164
+
165
+ Walks the dot segments from longest to shortest. For
166
+ ``safety.runtime.content_filter`` we try the full id first, then
167
+ ``safety.runtime``, then ``safety``. The first match is returned.
168
+
169
+ When ``workspace`` is provided, workspace-level overrides take
170
+ precedence (see :func:`load_waf_checklist`).
171
+ """
172
+ if not finding_id:
173
+ return None
174
+ index = waf_index_by_check_id(workspace)
175
+ parts = finding_id.split(".")
176
+ while parts:
177
+ candidate = ".".join(parts)
178
+ item = index.get(candidate)
179
+ if item is not None:
180
+ return item
181
+ parts.pop()
182
+ return None