agentops-accelerator 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentops/__init__.py +10 -0
- agentops/__main__.py +6 -0
- agentops/agent/__init__.py +12 -0
- agentops/agent/_legacy_ids.py +92 -0
- agentops/agent/analyzer.py +207 -0
- agentops/agent/checks/__init__.py +1 -0
- agentops/agent/checks/catalog.py +880 -0
- agentops/agent/checks/errors.py +279 -0
- agentops/agent/checks/foundry_config.py +75 -0
- agentops/agent/checks/latency.py +84 -0
- agentops/agent/checks/opex.py +157 -0
- agentops/agent/checks/opex_workspace.py +874 -0
- agentops/agent/checks/posture.py +36 -0
- agentops/agent/checks/posture_rules/__init__.py +53 -0
- agentops/agent/checks/posture_rules/content_filter.py +59 -0
- agentops/agent/checks/posture_rules/diagnostics.py +74 -0
- agentops/agent/checks/posture_rules/local_auth.py +55 -0
- agentops/agent/checks/posture_rules/managed_identity.py +59 -0
- agentops/agent/checks/posture_rules/network.py +68 -0
- agentops/agent/checks/regression.py +78 -0
- agentops/agent/checks/release_readiness.py +182 -0
- agentops/agent/checks/safety.py +247 -0
- agentops/agent/checks/spec_conformance.py +375 -0
- agentops/agent/cockpit.py +5159 -0
- agentops/agent/config.py +240 -0
- agentops/agent/findings.py +113 -0
- agentops/agent/history.py +142 -0
- agentops/agent/knowledge/__init__.py +182 -0
- agentops/agent/knowledge/waf-checklist.csv +39 -0
- agentops/agent/llm_assist/__init__.py +16 -0
- agentops/agent/llm_assist/_base.py +124 -0
- agentops/agent/llm_assist/_bundle_rule.py +154 -0
- agentops/agent/llm_assist/_client.py +347 -0
- agentops/agent/llm_assist/_dataset_rules.py +191 -0
- agentops/agent/llm_assist/_engine.py +106 -0
- agentops/agent/llm_assist/_prompt_rules.py +291 -0
- agentops/agent/llm_assist/_spec_rules.py +235 -0
- agentops/agent/production_telemetry.py +430 -0
- agentops/agent/report.py +207 -0
- agentops/agent/server/__init__.py +1 -0
- agentops/agent/server/app.py +84 -0
- agentops/agent/server/auth.py +94 -0
- agentops/agent/server/chat.py +44 -0
- agentops/agent/server/protocol.py +72 -0
- agentops/agent/sources/__init__.py +1 -0
- agentops/agent/sources/azure_monitor.py +523 -0
- agentops/agent/sources/azure_resources.py +602 -0
- agentops/agent/sources/foundry_control.py +174 -0
- agentops/agent/sources/results_history.py +494 -0
- agentops/agent/sources/spec_detectors/__init__.py +42 -0
- agentops/agent/sources/spec_detectors/_base.py +58 -0
- agentops/agent/sources/spec_detectors/agents_md.py +75 -0
- agentops/agent/sources/spec_detectors/spec_kit.py +172 -0
- agentops/agent/time_range.py +117 -0
- agentops/cli/__init__.py +1 -0
- agentops/cli/app.py +4823 -0
- agentops/core/__init__.py +1 -0
- agentops/core/agentops_config.py +592 -0
- agentops/core/config_loader.py +22 -0
- agentops/core/evaluators.py +480 -0
- agentops/core/release_evidence.py +56 -0
- agentops/core/results.py +117 -0
- agentops/mcp/__init__.py +10 -0
- agentops/mcp/server.py +232 -0
- agentops/pipeline/__init__.py +8 -0
- agentops/pipeline/cloud_results.py +189 -0
- agentops/pipeline/cloud_runner.py +901 -0
- agentops/pipeline/comparison.py +108 -0
- agentops/pipeline/diagnostics.py +51 -0
- agentops/pipeline/invocations.py +535 -0
- agentops/pipeline/official_eval.py +414 -0
- agentops/pipeline/orchestrator.py +775 -0
- agentops/pipeline/prompt_deploy.py +377 -0
- agentops/pipeline/publisher.py +121 -0
- agentops/pipeline/reporter.py +202 -0
- agentops/pipeline/runtime.py +409 -0
- agentops/pipeline/thresholds.py +84 -0
- agentops/services/__init__.py +1 -0
- agentops/services/cicd.py +720 -0
- agentops/services/eval_analysis.py +848 -0
- agentops/services/evidence_pack.py +757 -0
- agentops/services/initializer.py +86 -0
- agentops/services/preflight.py +470 -0
- agentops/services/setup_wizard.py +709 -0
- agentops/services/skills.py +643 -0
- agentops/services/trace_promotion.py +300 -0
- agentops/services/workflow_analysis.py +1129 -0
- agentops/templates/.gitignore +15 -0
- agentops/templates/__init__.py +1 -0
- agentops/templates/agent-server/Dockerfile +23 -0
- agentops/templates/agent-server/README.md +61 -0
- agentops/templates/agent-server/main.bicep +94 -0
- agentops/templates/agent.yaml +87 -0
- agentops/templates/agentops.yaml +58 -0
- agentops/templates/foundry.svg +71 -0
- agentops/templates/icon.png +0 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-dev-azd.yml +118 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-dev.yml +73 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prod-azd.yml +141 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prod.yml +94 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-prompt-agent.yml +167 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-qa-azd.yml +118 -0
- agentops/templates/pipelines/azuredevops/agentops-deploy-qa.yml +68 -0
- agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +210 -0
- agentops/templates/pipelines/azuredevops/agentops-pr.yml +155 -0
- agentops/templates/pipelines/azuredevops/agentops-watchdog.yml +106 -0
- agentops/templates/project.gitignore +36 -0
- agentops/templates/sample-traces.jsonl +3 -0
- agentops/templates/skills/agentops-agent/SKILL.md +137 -0
- agentops/templates/skills/agentops-config/SKILL.md +113 -0
- agentops/templates/skills/agentops-dataset/SKILL.md +84 -0
- agentops/templates/skills/agentops-eval/SKILL.md +189 -0
- agentops/templates/skills/agentops-report/SKILL.md +71 -0
- agentops/templates/skills/agentops-workflow/SKILL.md +471 -0
- agentops/templates/smoke.jsonl +3 -0
- agentops/templates/waf-checklist.README.md +84 -0
- agentops/templates/waf-checklist.csv +22 -0
- agentops/templates/workflows/agentops-deploy-dev-azd.yml +166 -0
- agentops/templates/workflows/agentops-deploy-dev.yml +187 -0
- agentops/templates/workflows/agentops-deploy-prod-azd.yml +183 -0
- agentops/templates/workflows/agentops-deploy-prod.yml +171 -0
- agentops/templates/workflows/agentops-deploy-prompt-agent.yml +197 -0
- agentops/templates/workflows/agentops-deploy-qa-azd.yml +156 -0
- agentops/templates/workflows/agentops-deploy-qa.yml +145 -0
- agentops/templates/workflows/agentops-pr-prompt-agent.yml +210 -0
- agentops/templates/workflows/agentops-pr.yml +148 -0
- agentops/templates/workflows/agentops-watchdog.yml +122 -0
- agentops/utils/__init__.py +1 -0
- agentops/utils/azd_env.py +435 -0
- agentops/utils/azure_endpoints.py +62 -0
- agentops/utils/colors.py +47 -0
- agentops/utils/dotenv_loader.py +105 -0
- agentops/utils/foundry_discovery.py +229 -0
- agentops/utils/logging.py +59 -0
- agentops/utils/telemetry.py +554 -0
- agentops/utils/yaml.py +36 -0
- agentops_accelerator-0.3.0.dist-info/METADATA +278 -0
- agentops_accelerator-0.3.0.dist-info/RECORD +142 -0
- agentops_accelerator-0.3.0.dist-info/WHEEL +5 -0
- agentops_accelerator-0.3.0.dist-info/entry_points.txt +2 -0
- agentops_accelerator-0.3.0.dist-info/licenses/LICENSE +21 -0
- agentops_accelerator-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Posture check - runs the WAF-AI rule registry against the resource snapshot."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
from agentops.agent.checks.posture_rules import RULE_REGISTRY
|
|
8
|
+
from agentops.agent.config import PostureCheckConfig
|
|
9
|
+
from agentops.agent.findings import Finding
|
|
10
|
+
from agentops.agent.sources.azure_resources import AzureResourcesPayload
|
|
11
|
+
|
|
12
|
+
SOURCE_NAME = "azure_resources"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def run_posture_check(
|
|
16
|
+
resources: AzureResourcesPayload,
|
|
17
|
+
config: PostureCheckConfig,
|
|
18
|
+
) -> List[Finding]:
|
|
19
|
+
if not config.enabled:
|
|
20
|
+
return []
|
|
21
|
+
|
|
22
|
+
diag = resources.diagnostics or {}
|
|
23
|
+
if diag.get("status") != "ok" or resources.account is None:
|
|
24
|
+
return []
|
|
25
|
+
|
|
26
|
+
excluded = {rid.strip() for rid in config.exclude_rules if rid and rid.strip()}
|
|
27
|
+
|
|
28
|
+
findings: List[Finding] = []
|
|
29
|
+
for rule_id, rule_fn in RULE_REGISTRY.items():
|
|
30
|
+
if rule_id in excluded:
|
|
31
|
+
continue
|
|
32
|
+
try:
|
|
33
|
+
findings.extend(rule_fn(resources, SOURCE_NAME))
|
|
34
|
+
except Exception: # pragma: no cover - rules must be defensive
|
|
35
|
+
continue
|
|
36
|
+
return findings
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Rule registry for the WAF-AI posture check.
|
|
2
|
+
|
|
3
|
+
Each rule is a small callable that receives the
|
|
4
|
+
:class:`AzureResourcesPayload` and the source name, and returns a list
|
|
5
|
+
of :class:`Finding`s (zero, one, or many). Rules are independent and
|
|
6
|
+
pure.
|
|
7
|
+
|
|
8
|
+
The ``posture`` check (see :mod:`agentops.agent.checks.posture`)
|
|
9
|
+
iterates the rules registered here and aggregates the findings.
|
|
10
|
+
|
|
11
|
+
To add a new rule:
|
|
12
|
+
|
|
13
|
+
* Add a module under this package.
|
|
14
|
+
* Implement ``def evaluate(payload, source_name) -> list[Finding]``.
|
|
15
|
+
* Register it in :data:`RULE_REGISTRY` below.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from typing import Callable, Dict, List
|
|
21
|
+
|
|
22
|
+
from agentops.agent.findings import Finding
|
|
23
|
+
from agentops.agent.sources.azure_resources import AzureResourcesPayload
|
|
24
|
+
|
|
25
|
+
RuleFn = Callable[[AzureResourcesPayload, str], List[Finding]]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _build_registry() -> Dict[str, RuleFn]:
|
|
29
|
+
# NOTE: two former rules (content_filter, network) were retired
|
|
30
|
+
# because Foundry's Operate -> Compliance surface now covers them
|
|
31
|
+
# natively (Guardrails tab + Security posture tab respectively).
|
|
32
|
+
# The watchdog's job in this area is the **complementary** half -
|
|
33
|
+
# runtime telemetry, identity scope, pipeline hygiene. The dropped
|
|
34
|
+
# rule modules are kept on disk so users with custom posture
|
|
35
|
+
# extensions importing them keep working.
|
|
36
|
+
from agentops.agent.checks.posture_rules.diagnostics import (
|
|
37
|
+
evaluate as diagnostics_rule,
|
|
38
|
+
)
|
|
39
|
+
from agentops.agent.checks.posture_rules.local_auth import (
|
|
40
|
+
evaluate as local_auth_rule,
|
|
41
|
+
)
|
|
42
|
+
from agentops.agent.checks.posture_rules.managed_identity import (
|
|
43
|
+
evaluate as managed_identity_rule,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
"waf.security.local_auth_disabled": local_auth_rule,
|
|
48
|
+
"waf.security.managed_identity": managed_identity_rule,
|
|
49
|
+
"waf.security.diagnostic_settings": diagnostics_rule,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
RULE_REGISTRY: Dict[str, RuleFn] = _build_registry()
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""WAF-AI Security: every model deployment needs a content filter (RAI policy).
|
|
2
|
+
|
|
3
|
+
The WAF-AI Security pillar (Responsible AI subsection) requires that
|
|
4
|
+
each Azure OpenAI / AI Foundry model deployment have a content filter
|
|
5
|
+
applied. The default ``Microsoft.Default`` policy is acceptable; a
|
|
6
|
+
deployment with no policy at all is not.
|
|
7
|
+
|
|
8
|
+
This rule fires for **each** deployment that has no
|
|
9
|
+
``rai_policy_name``.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import List
|
|
15
|
+
|
|
16
|
+
from agentops.agent.findings import Category, Finding, Severity
|
|
17
|
+
from agentops.agent.sources.azure_resources import AzureResourcesPayload
|
|
18
|
+
|
|
19
|
+
RULE_ID = "waf.security.content_filter"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def evaluate(payload: AzureResourcesPayload, source_name: str) -> List[Finding]:
|
|
23
|
+
account = payload.account
|
|
24
|
+
if account is None or not payload.deployments:
|
|
25
|
+
return []
|
|
26
|
+
|
|
27
|
+
missing = [d for d in payload.deployments if not d.rai_policy_name]
|
|
28
|
+
if not missing:
|
|
29
|
+
return []
|
|
30
|
+
|
|
31
|
+
return [
|
|
32
|
+
Finding(
|
|
33
|
+
id=RULE_ID,
|
|
34
|
+
severity=Severity.CRITICAL,
|
|
35
|
+
category=Category.SECURITY,
|
|
36
|
+
title="One or more deployments have no content filter applied",
|
|
37
|
+
summary=(
|
|
38
|
+
f"{len(missing)} of {len(payload.deployments)} "
|
|
39
|
+
f"deployment(s) on account `{account.name}` have no "
|
|
40
|
+
"RAI / content-filter policy. The WAF-AI Security "
|
|
41
|
+
"pillar requires Responsible AI policies on every "
|
|
42
|
+
"model deployment."
|
|
43
|
+
),
|
|
44
|
+
recommendation=(
|
|
45
|
+
"Apply a content-filter policy (start with "
|
|
46
|
+
"`Microsoft.Default`, then tune severity thresholds "
|
|
47
|
+
"for your workload) to every deployment listed below. "
|
|
48
|
+
"See https://learn.microsoft.com/azure/ai-services/openai/concepts/content-filter"
|
|
49
|
+
),
|
|
50
|
+
source=source_name,
|
|
51
|
+
evidence={
|
|
52
|
+
"account": account.name,
|
|
53
|
+
"deployments_missing_filter": [
|
|
54
|
+
{"name": d.name, "model": d.model} for d in missing
|
|
55
|
+
],
|
|
56
|
+
"deployments_total": len(payload.deployments),
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""WAF-AI Security: diagnostic settings must be configured.
|
|
2
|
+
|
|
3
|
+
Without diagnostic settings, audit logs and request traces from the
|
|
4
|
+
Cognitive Services account never reach a Log Analytics workspace,
|
|
5
|
+
storage account, or event hub - making incident investigation and
|
|
6
|
+
content-safety auditing effectively impossible.
|
|
7
|
+
|
|
8
|
+
The WAF-AI Security pillar recommends streaming diagnostic logs to
|
|
9
|
+
Log Analytics for every AI account in production.
|
|
10
|
+
|
|
11
|
+
This rule fires when **none** of the diagnostic settings on the
|
|
12
|
+
account ship logs to a destination (workspace / storage / event hub).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import List
|
|
18
|
+
|
|
19
|
+
from agentops.agent.findings import Category, Finding, Severity
|
|
20
|
+
from agentops.agent.sources.azure_resources import AzureResourcesPayload
|
|
21
|
+
|
|
22
|
+
RULE_ID = "waf.security.diagnostic_settings"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def evaluate(payload: AzureResourcesPayload, source_name: str) -> List[Finding]:
|
|
26
|
+
account = payload.account
|
|
27
|
+
if account is None:
|
|
28
|
+
return []
|
|
29
|
+
|
|
30
|
+
has_destination = any(
|
|
31
|
+
s.workspace_id or s.storage_account_id or s.event_hub_authorization_rule_id
|
|
32
|
+
for s in payload.diagnostic_settings
|
|
33
|
+
)
|
|
34
|
+
has_categories = any(s.enabled_log_categories for s in payload.diagnostic_settings)
|
|
35
|
+
|
|
36
|
+
if has_destination and has_categories:
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
return [
|
|
40
|
+
Finding(
|
|
41
|
+
id=RULE_ID,
|
|
42
|
+
severity=Severity.WARNING,
|
|
43
|
+
category=Category.SECURITY,
|
|
44
|
+
title="Diagnostic settings are missing or incomplete",
|
|
45
|
+
summary=(
|
|
46
|
+
f"Cognitive Services account `{account.name}` has "
|
|
47
|
+
f"{len(payload.diagnostic_settings)} diagnostic "
|
|
48
|
+
"setting(s), but none route enabled log categories to "
|
|
49
|
+
"a Log Analytics workspace, storage account, or event "
|
|
50
|
+
"hub. Audit and content-safety logs are not being "
|
|
51
|
+
"captured."
|
|
52
|
+
),
|
|
53
|
+
recommendation=(
|
|
54
|
+
"Create a diagnostic setting that ships the "
|
|
55
|
+
"`Audit`, `RequestResponse`, and `Trace` log categories "
|
|
56
|
+
"to a Log Analytics workspace. See "
|
|
57
|
+
"https://learn.microsoft.com/azure/ai-services/diagnostic-logging"
|
|
58
|
+
),
|
|
59
|
+
source=source_name,
|
|
60
|
+
evidence={
|
|
61
|
+
"account": account.name,
|
|
62
|
+
"diagnostic_settings": [
|
|
63
|
+
{
|
|
64
|
+
"name": s.name,
|
|
65
|
+
"workspace_id": s.workspace_id,
|
|
66
|
+
"storage_account_id": s.storage_account_id,
|
|
67
|
+
"event_hub_authorization_rule_id": s.event_hub_authorization_rule_id,
|
|
68
|
+
"enabled_log_categories": s.enabled_log_categories,
|
|
69
|
+
}
|
|
70
|
+
for s in payload.diagnostic_settings
|
|
71
|
+
],
|
|
72
|
+
},
|
|
73
|
+
)
|
|
74
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""WAF-AI Security: local (key-based) authentication must be disabled.
|
|
2
|
+
|
|
3
|
+
Cognitive Services / Azure OpenAI accounts ship with key-based auth
|
|
4
|
+
enabled by default. Microsoft Entra ID is the recommended path for
|
|
5
|
+
production AI workloads - keys can be exfiltrated, hard to rotate, and
|
|
6
|
+
bypass conditional access policies.
|
|
7
|
+
|
|
8
|
+
WAF-AI Security pillar reference:
|
|
9
|
+
https://learn.microsoft.com/azure/ai-services/openai/how-to/managed-identity
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import List
|
|
15
|
+
|
|
16
|
+
from agentops.agent.findings import Category, Finding, Severity
|
|
17
|
+
from agentops.agent.sources.azure_resources import AzureResourcesPayload
|
|
18
|
+
|
|
19
|
+
RULE_ID = "waf.security.local_auth_disabled"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def evaluate(payload: AzureResourcesPayload, source_name: str) -> List[Finding]:
|
|
23
|
+
account = payload.account
|
|
24
|
+
if account is None:
|
|
25
|
+
return []
|
|
26
|
+
if account.disable_local_auth is True:
|
|
27
|
+
return []
|
|
28
|
+
|
|
29
|
+
return [
|
|
30
|
+
Finding(
|
|
31
|
+
id=RULE_ID,
|
|
32
|
+
severity=Severity.CRITICAL,
|
|
33
|
+
category=Category.SECURITY,
|
|
34
|
+
title="Local (API key) authentication is enabled",
|
|
35
|
+
summary=(
|
|
36
|
+
f"Cognitive Services account `{account.name}` has "
|
|
37
|
+
f"`disableLocalAuth={account.disable_local_auth}`. "
|
|
38
|
+
"Key-based authentication is enabled, which contradicts "
|
|
39
|
+
"the WAF-AI Security pillar guidance to use Microsoft "
|
|
40
|
+
"Entra ID exclusively."
|
|
41
|
+
),
|
|
42
|
+
recommendation=(
|
|
43
|
+
"Set `disableLocalAuth: true` on the account, grant the "
|
|
44
|
+
"agent runtime the `Cognitive Services OpenAI User` "
|
|
45
|
+
"role via managed identity, and rotate any keys that "
|
|
46
|
+
"may have leaked. See "
|
|
47
|
+
"https://learn.microsoft.com/azure/ai-services/openai/how-to/managed-identity"
|
|
48
|
+
),
|
|
49
|
+
source=source_name,
|
|
50
|
+
evidence={
|
|
51
|
+
"account": account.name,
|
|
52
|
+
"disable_local_auth": account.disable_local_auth,
|
|
53
|
+
},
|
|
54
|
+
)
|
|
55
|
+
]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""WAF-AI Security: account must have a managed identity assigned.
|
|
2
|
+
|
|
3
|
+
Cognitive Services / Azure OpenAI accounts call downstream Azure
|
|
4
|
+
resources (Storage for fine-tuning data, Key Vault for customer keys,
|
|
5
|
+
etc.). The WAF-AI Security pillar recommends using a managed identity
|
|
6
|
+
for those calls instead of connection strings or keys.
|
|
7
|
+
|
|
8
|
+
This rule fires when the account ``identity.type`` is missing or
|
|
9
|
+
``None`` - i.e. neither system-assigned nor user-assigned managed
|
|
10
|
+
identity is configured.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from agentops.agent.findings import Category, Finding, Severity
|
|
18
|
+
from agentops.agent.sources.azure_resources import AzureResourcesPayload
|
|
19
|
+
|
|
20
|
+
RULE_ID = "waf.security.managed_identity"
|
|
21
|
+
|
|
22
|
+
_NO_IDENTITY_VALUES = {"", "none", "null"}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def evaluate(payload: AzureResourcesPayload, source_name: str) -> List[Finding]:
|
|
26
|
+
account = payload.account
|
|
27
|
+
if account is None:
|
|
28
|
+
return []
|
|
29
|
+
type_ = (account.identity_type or "").strip().lower()
|
|
30
|
+
if type_ and type_ not in _NO_IDENTITY_VALUES:
|
|
31
|
+
return []
|
|
32
|
+
|
|
33
|
+
return [
|
|
34
|
+
Finding(
|
|
35
|
+
id=RULE_ID,
|
|
36
|
+
severity=Severity.WARNING,
|
|
37
|
+
category=Category.SECURITY,
|
|
38
|
+
title="Account has no managed identity assigned",
|
|
39
|
+
summary=(
|
|
40
|
+
f"Cognitive Services account `{account.name}` has no "
|
|
41
|
+
"managed identity. The WAF-AI Security pillar "
|
|
42
|
+
"recommends assigning a system- or user-assigned MI so "
|
|
43
|
+
"downstream calls (Storage, Key Vault, Search) avoid "
|
|
44
|
+
"connection strings."
|
|
45
|
+
),
|
|
46
|
+
recommendation=(
|
|
47
|
+
"Enable a system-assigned managed identity (or attach "
|
|
48
|
+
"a user-assigned one) on the account, and grant it the "
|
|
49
|
+
"minimum role it needs on each downstream resource. "
|
|
50
|
+
"See https://learn.microsoft.com/azure/ai-services/authentication"
|
|
51
|
+
),
|
|
52
|
+
source=source_name,
|
|
53
|
+
evidence={
|
|
54
|
+
"account": account.name,
|
|
55
|
+
"identity_type": account.identity_type,
|
|
56
|
+
"user_assigned_identities": account.user_assigned_identities,
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""WAF-AI Security: restrict public network access to the AI account.
|
|
2
|
+
|
|
3
|
+
Cognitive Services / Azure OpenAI accounts default to ``Enabled``
|
|
4
|
+
public network access for convenience. For production AI workloads the
|
|
5
|
+
WAF-AI Security pillar recommends restricting network access via
|
|
6
|
+
private endpoints or a strict network ACL.
|
|
7
|
+
|
|
8
|
+
This rule fires unless ONE of the following is true:
|
|
9
|
+
|
|
10
|
+
* ``publicNetworkAccess == 'Disabled'``
|
|
11
|
+
* At least one private endpoint connection is attached
|
|
12
|
+
* Network ACLs default action is ``Deny``
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import List
|
|
18
|
+
|
|
19
|
+
from agentops.agent.findings import Category, Finding, Severity
|
|
20
|
+
from agentops.agent.sources.azure_resources import AzureResourcesPayload
|
|
21
|
+
|
|
22
|
+
RULE_ID = "waf.security.public_network_access"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def evaluate(payload: AzureResourcesPayload, source_name: str) -> List[Finding]:
|
|
26
|
+
account = payload.account
|
|
27
|
+
if account is None:
|
|
28
|
+
return []
|
|
29
|
+
|
|
30
|
+
pna = (account.public_network_access or "").lower()
|
|
31
|
+
has_private_endpoint = account.private_endpoint_count > 0
|
|
32
|
+
acl_default = (account.network_acls_default_action or "").lower()
|
|
33
|
+
|
|
34
|
+
if (
|
|
35
|
+
pna == "disabled"
|
|
36
|
+
or has_private_endpoint
|
|
37
|
+
or acl_default == "deny"
|
|
38
|
+
):
|
|
39
|
+
return []
|
|
40
|
+
|
|
41
|
+
return [
|
|
42
|
+
Finding(
|
|
43
|
+
id=RULE_ID,
|
|
44
|
+
severity=Severity.WARNING,
|
|
45
|
+
category=Category.SECURITY,
|
|
46
|
+
title="Public network access is open and unrestricted",
|
|
47
|
+
summary=(
|
|
48
|
+
f"Cognitive Services account `{account.name}` allows "
|
|
49
|
+
"public network access without a deny-by-default ACL or "
|
|
50
|
+
"a private endpoint. The WAF-AI Security pillar "
|
|
51
|
+
"recommends restricting network access for production "
|
|
52
|
+
"AI workloads."
|
|
53
|
+
),
|
|
54
|
+
recommendation=(
|
|
55
|
+
"Either set `publicNetworkAccess: Disabled` and attach "
|
|
56
|
+
"a private endpoint, or configure network ACLs with "
|
|
57
|
+
"`defaultAction: Deny` and an explicit allow list. See "
|
|
58
|
+
"https://learn.microsoft.com/azure/ai-services/cognitive-services-virtual-networks"
|
|
59
|
+
),
|
|
60
|
+
source=source_name,
|
|
61
|
+
evidence={
|
|
62
|
+
"account": account.name,
|
|
63
|
+
"public_network_access": account.public_network_access,
|
|
64
|
+
"private_endpoint_count": account.private_endpoint_count,
|
|
65
|
+
"network_acls_default_action": account.network_acls_default_action,
|
|
66
|
+
},
|
|
67
|
+
)
|
|
68
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Regression check: detect metric drops vs a rolling baseline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from statistics import mean
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
from agentops.agent.config import RegressionCheckConfig
|
|
9
|
+
from agentops.agent.findings import Category, Finding, Severity
|
|
10
|
+
from agentops.agent.sources.results_history import ResultsHistory
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def run_regression_check(
|
|
14
|
+
history: ResultsHistory, config: RegressionCheckConfig
|
|
15
|
+
) -> List[Finding]:
|
|
16
|
+
runs = history.runs
|
|
17
|
+
if len(runs) < config.min_runs:
|
|
18
|
+
return []
|
|
19
|
+
|
|
20
|
+
latest = runs[-1]
|
|
21
|
+
baseline_runs = runs[:-1]
|
|
22
|
+
if not baseline_runs:
|
|
23
|
+
return []
|
|
24
|
+
|
|
25
|
+
findings: List[Finding] = []
|
|
26
|
+
for metric in config.metrics:
|
|
27
|
+
baseline_values = [
|
|
28
|
+
r.metrics[metric] for r in baseline_runs if metric in r.metrics
|
|
29
|
+
]
|
|
30
|
+
if not baseline_values:
|
|
31
|
+
continue
|
|
32
|
+
if metric not in latest.metrics:
|
|
33
|
+
continue
|
|
34
|
+
|
|
35
|
+
baseline = mean(baseline_values)
|
|
36
|
+
current = latest.metrics[metric]
|
|
37
|
+
if baseline <= 0:
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
drop = (baseline - current) / baseline
|
|
41
|
+
if drop < config.threshold_drop:
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
severity = (
|
|
45
|
+
Severity.CRITICAL
|
|
46
|
+
if drop >= max(config.threshold_drop * 2, 0.20)
|
|
47
|
+
else Severity.WARNING
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
findings.append(
|
|
51
|
+
Finding(
|
|
52
|
+
id=f"regression.{metric}",
|
|
53
|
+
severity=severity,
|
|
54
|
+
category=Category.QUALITY,
|
|
55
|
+
title=f"Regression detected on `{metric}`",
|
|
56
|
+
summary=(
|
|
57
|
+
f"`{metric}` dropped {drop * 100:.1f}% in run "
|
|
58
|
+
f"`{latest.run_id}` (current={current:.4f}, "
|
|
59
|
+
f"baseline={baseline:.4f} over {len(baseline_values)} runs)."
|
|
60
|
+
),
|
|
61
|
+
recommendation=(
|
|
62
|
+
"Compare the latest run against the baseline runs in "
|
|
63
|
+
"`.agentops/results/` or the Foundry Evaluations page, "
|
|
64
|
+
"inspect prompt/model/dataset changes, and re-run the "
|
|
65
|
+
"evaluation after the fix."
|
|
66
|
+
),
|
|
67
|
+
source="results_history",
|
|
68
|
+
evidence={
|
|
69
|
+
"metric": metric,
|
|
70
|
+
"current": current,
|
|
71
|
+
"baseline_avg": baseline,
|
|
72
|
+
"drop_ratio": drop,
|
|
73
|
+
"baseline_runs": len(baseline_values),
|
|
74
|
+
"latest_run_id": latest.run_id,
|
|
75
|
+
},
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
return findings
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Production-readiness checks for the POC-to-production journey."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
from agentops.agent.findings import Category, Finding, Severity
|
|
9
|
+
from agentops.agent.sources.foundry_control import FoundryControlPayload
|
|
10
|
+
from agentops.agent.sources.results_history import ResultsHistory
|
|
11
|
+
|
|
12
|
+
SOURCE_NAME = "release_readiness"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def run_release_readiness_check(
|
|
16
|
+
workspace: Path,
|
|
17
|
+
history: ResultsHistory,
|
|
18
|
+
foundry: Optional[FoundryControlPayload],
|
|
19
|
+
) -> List[Finding]:
|
|
20
|
+
"""Return findings that block or weaken production release evidence."""
|
|
21
|
+
|
|
22
|
+
if not _is_agentops_workspace(workspace, history):
|
|
23
|
+
return []
|
|
24
|
+
|
|
25
|
+
findings: List[Finding] = []
|
|
26
|
+
findings.extend(_check_latest_eval(history))
|
|
27
|
+
findings.extend(_check_baseline(workspace, history))
|
|
28
|
+
findings.extend(_check_trace_regression_dataset(workspace, history))
|
|
29
|
+
findings.extend(_check_foundry_online_evaluation(foundry))
|
|
30
|
+
return findings
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _is_agentops_workspace(workspace: Path, history: ResultsHistory) -> bool:
|
|
34
|
+
return (
|
|
35
|
+
(workspace / "agentops.yaml").exists()
|
|
36
|
+
or bool(history.runs)
|
|
37
|
+
or (workspace / ".github" / "workflows" / "agentops-pr.yml").exists()
|
|
38
|
+
or (workspace / ".azuredevops" / "pipelines" / "agentops-pr.yml").exists()
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _check_latest_eval(history: ResultsHistory) -> List[Finding]:
|
|
43
|
+
if not history.runs:
|
|
44
|
+
return [
|
|
45
|
+
Finding(
|
|
46
|
+
id="opex.release.no_eval_evidence",
|
|
47
|
+
severity=Severity.WARNING,
|
|
48
|
+
category=Category.OPERATIONAL_EXCELLENCE,
|
|
49
|
+
title="No evaluation evidence is available for release",
|
|
50
|
+
summary=(
|
|
51
|
+
"AgentOps could not find a completed evaluation run in "
|
|
52
|
+
"`.agentops/results/` or Foundry fallback history. A "
|
|
53
|
+
"production promotion should have at least one recent eval "
|
|
54
|
+
"result attached to the release evidence."
|
|
55
|
+
),
|
|
56
|
+
recommendation=(
|
|
57
|
+
"Run `agentops eval analyze`, fix any setup gaps, then run "
|
|
58
|
+
"`agentops eval run` before promoting the agent."
|
|
59
|
+
),
|
|
60
|
+
source=SOURCE_NAME,
|
|
61
|
+
)
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
latest = history.runs[-1]
|
|
65
|
+
if latest.run_pass is False:
|
|
66
|
+
return [
|
|
67
|
+
Finding(
|
|
68
|
+
id="opex.release.latest_eval_failed",
|
|
69
|
+
severity=Severity.CRITICAL,
|
|
70
|
+
category=Category.OPERATIONAL_EXCELLENCE,
|
|
71
|
+
title="Latest evaluation run failed",
|
|
72
|
+
summary=(
|
|
73
|
+
f"The latest eval run `{latest.run_id}` did not pass. "
|
|
74
|
+
"A release with a failing quality gate should not be "
|
|
75
|
+
"promoted to production."
|
|
76
|
+
),
|
|
77
|
+
recommendation=(
|
|
78
|
+
"Open the latest `report.md` or Foundry evaluation report, "
|
|
79
|
+
"fix the failing rows or thresholds, and re-run the eval "
|
|
80
|
+
"before generating release evidence again."
|
|
81
|
+
),
|
|
82
|
+
source=SOURCE_NAME,
|
|
83
|
+
evidence={"run_id": latest.run_id, "run_pass": latest.run_pass},
|
|
84
|
+
)
|
|
85
|
+
]
|
|
86
|
+
return []
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _check_baseline(workspace: Path, history: ResultsHistory) -> List[Finding]:
|
|
90
|
+
if not history.runs:
|
|
91
|
+
return []
|
|
92
|
+
baseline = workspace / ".agentops" / "baseline" / "results.json"
|
|
93
|
+
if baseline.exists() or len(history.runs) >= 2:
|
|
94
|
+
return []
|
|
95
|
+
return [
|
|
96
|
+
Finding(
|
|
97
|
+
id="opex.release.no_baseline",
|
|
98
|
+
severity=Severity.WARNING,
|
|
99
|
+
category=Category.OPERATIONAL_EXCELLENCE,
|
|
100
|
+
title="No baseline result is available for regression gating",
|
|
101
|
+
summary=(
|
|
102
|
+
"AgentOps found an eval run, but no baseline or prior run to "
|
|
103
|
+
"compare against. The gate can say whether thresholds passed, "
|
|
104
|
+
"but not whether the candidate regressed from the last known "
|
|
105
|
+
"good behavior."
|
|
106
|
+
),
|
|
107
|
+
recommendation=(
|
|
108
|
+
"After a known-good run, copy "
|
|
109
|
+
"`.agentops/results/latest/results.json` to "
|
|
110
|
+
"`.agentops/baseline/results.json` or keep historical runs so "
|
|
111
|
+
"`agentops eval run --baseline` can render deltas."
|
|
112
|
+
),
|
|
113
|
+
source=SOURCE_NAME,
|
|
114
|
+
)
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _check_trace_regression_dataset(workspace: Path, history: ResultsHistory) -> List[Finding]:
|
|
119
|
+
if not history.runs:
|
|
120
|
+
return []
|
|
121
|
+
manifest = workspace / ".agentops" / "data" / "trace-regression-manifest.json"
|
|
122
|
+
if manifest.exists():
|
|
123
|
+
return []
|
|
124
|
+
return [
|
|
125
|
+
Finding(
|
|
126
|
+
id="opex.release.no_trace_regression_dataset",
|
|
127
|
+
severity=Severity.INFO,
|
|
128
|
+
category=Category.OPERATIONAL_EXCELLENCE,
|
|
129
|
+
title="Production traces are not feeding a regression dataset yet",
|
|
130
|
+
summary=(
|
|
131
|
+
"No trace-regression manifest was found under `.agentops/data/`. "
|
|
132
|
+
"This is acceptable for early exploration, but production "
|
|
133
|
+
"incidents and high-value conversations should become reviewed "
|
|
134
|
+
"regression rows over time."
|
|
135
|
+
),
|
|
136
|
+
recommendation=(
|
|
137
|
+
"Export relevant App Insights / Foundry traces and run "
|
|
138
|
+
"`agentops eval promote-traces --source <traces.jsonl> --apply` "
|
|
139
|
+
"to create a reviewed production-derived regression dataset."
|
|
140
|
+
),
|
|
141
|
+
source=SOURCE_NAME,
|
|
142
|
+
evidence={"manifest": str(manifest)},
|
|
143
|
+
)
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _check_foundry_online_evaluation(
|
|
148
|
+
foundry: Optional[FoundryControlPayload],
|
|
149
|
+
) -> List[Finding]:
|
|
150
|
+
if foundry is None:
|
|
151
|
+
return []
|
|
152
|
+
diag = foundry.diagnostics or {}
|
|
153
|
+
if diag.get("status") != "ok":
|
|
154
|
+
return []
|
|
155
|
+
if "evaluation_rules_count" not in diag and "evaluation_rules_warning" not in diag:
|
|
156
|
+
return []
|
|
157
|
+
enabled = [rule for rule in foundry.evaluation_rules if rule.enabled is not False]
|
|
158
|
+
if enabled:
|
|
159
|
+
return []
|
|
160
|
+
return [
|
|
161
|
+
Finding(
|
|
162
|
+
id="opex.release.no_continuous_eval",
|
|
163
|
+
severity=Severity.WARNING,
|
|
164
|
+
category=Category.OPERATIONAL_EXCELLENCE,
|
|
165
|
+
title="No enabled Foundry continuous evaluation rule is attached",
|
|
166
|
+
summary=(
|
|
167
|
+
"The Foundry control plane was reachable, but AgentOps did not "
|
|
168
|
+
"detect an enabled continuous evaluation rule. Production "
|
|
169
|
+
"responses may not be sampled and scored after deployment."
|
|
170
|
+
),
|
|
171
|
+
recommendation=(
|
|
172
|
+
"Enable Foundry continuous evaluation for the production agent "
|
|
173
|
+
"and include at least one safety or quality evaluator so runtime "
|
|
174
|
+
"traffic keeps producing quality evidence."
|
|
175
|
+
),
|
|
176
|
+
source=SOURCE_NAME,
|
|
177
|
+
evidence={
|
|
178
|
+
"evaluation_rules_count": len(foundry.evaluation_rules),
|
|
179
|
+
"agents": [agent.agent_id for agent in foundry.agents],
|
|
180
|
+
},
|
|
181
|
+
)
|
|
182
|
+
]
|