agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
|
@@ -0,0 +1,1137 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Base Integration Interface
|
|
5
|
+
|
|
6
|
+
All framework adapters inherit from this base class.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import copy
|
|
13
|
+
import difflib
|
|
14
|
+
import fnmatch
|
|
15
|
+
import hashlib
|
|
16
|
+
import logging
|
|
17
|
+
import re
|
|
18
|
+
from abc import ABC, abstractmethod
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from enum import Enum
|
|
22
|
+
from typing import Any, Callable, Protocol
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class PatternType(Enum):
|
|
28
|
+
"""Type of pattern matching for blocked_patterns."""
|
|
29
|
+
SUBSTRING = "substring"
|
|
30
|
+
REGEX = "regex"
|
|
31
|
+
GLOB = "glob"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class GovernanceEventType(Enum):
|
|
35
|
+
"""Event types emitted by the governance layer."""
|
|
36
|
+
POLICY_CHECK = "policy_check"
|
|
37
|
+
POLICY_VIOLATION = "policy_violation"
|
|
38
|
+
TOOL_CALL_BLOCKED = "tool_call_blocked"
|
|
39
|
+
CHECKPOINT_CREATED = "checkpoint_created"
|
|
40
|
+
DRIFT_DETECTED = "drift_detected"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class DriftResult:
|
|
45
|
+
"""Result of a drift detection comparison.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
score: Drift score in [0.0, 1.0]. 0 = identical, 1 = completely different.
|
|
49
|
+
exceeded: Whether the score exceeded the configured threshold.
|
|
50
|
+
threshold: The threshold that was checked against.
|
|
51
|
+
baseline_hash: Hash of the baseline output.
|
|
52
|
+
current_hash: Hash of the current output.
|
|
53
|
+
"""
|
|
54
|
+
score: float
|
|
55
|
+
exceeded: bool
|
|
56
|
+
threshold: float
|
|
57
|
+
baseline_hash: str
|
|
58
|
+
current_hash: str
|
|
59
|
+
|
|
60
|
+
def __repr__(self) -> str:
|
|
61
|
+
status = "EXCEEDED" if self.exceeded else "OK"
|
|
62
|
+
return f"DriftResult(score={self.score:.4f}, threshold={self.threshold}, {status})"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class GovernancePolicy:
|
|
67
|
+
"""Policy configuration for governed AI agents.
|
|
68
|
+
|
|
69
|
+
Defines the complete set of constraints, thresholds, and audit settings
|
|
70
|
+
that the governance layer enforces on agent behaviour. Policies are
|
|
71
|
+
validated on construction via ``__post_init__`` and can be serialized
|
|
72
|
+
to/from YAML for version-controlled configuration.
|
|
73
|
+
|
|
74
|
+
Policies are **composable**: create a base policy with sensible defaults
|
|
75
|
+
and derive stricter variants for sensitive environments. Use
|
|
76
|
+
``is_stricter_than()`` to verify that a derived policy never *loosens*
|
|
77
|
+
constraints relative to the base.
|
|
78
|
+
|
|
79
|
+
Attributes:
|
|
80
|
+
name: Human-readable policy name used in audit logs and error
|
|
81
|
+
messages. Defaults to ``"default"``.
|
|
82
|
+
max_tokens: Maximum number of tokens an agent may consume per
|
|
83
|
+
request. Must be a positive integer. Defaults to ``4096``.
|
|
84
|
+
max_tool_calls: Maximum number of tool invocations allowed per
|
|
85
|
+
request. ``0`` disables tool calls entirely. Must be a
|
|
86
|
+
non-negative integer. Defaults to ``10``.
|
|
87
|
+
allowed_tools: Explicit allowlist of tool names the agent may call.
|
|
88
|
+
An empty list means *all* tools are permitted (subject to other
|
|
89
|
+
constraints). Defaults to ``[]``.
|
|
90
|
+
blocked_patterns: Patterns that must not appear in tool arguments.
|
|
91
|
+
Each entry is either a plain substring string or a
|
|
92
|
+
``(pattern, PatternType)`` tuple for regex/glob matching.
|
|
93
|
+
Defaults to ``[]``.
|
|
94
|
+
require_human_approval: When ``True``, tool calls require explicit
|
|
95
|
+
human approval before execution. Defaults to ``False``.
|
|
96
|
+
timeout_seconds: Maximum wall-clock time (in seconds) allowed for
|
|
97
|
+
a single request. Must be a positive integer. Defaults to
|
|
98
|
+
``300``.
|
|
99
|
+
confidence_threshold: Minimum confidence score (0.0–1.0) for an
|
|
100
|
+
agent's action to be accepted without review. ``0.0``
|
|
101
|
+
effectively disables confidence checking. Defaults to ``0.8``.
|
|
102
|
+
drift_threshold: Maximum acceptable semantic drift score (0.0–1.0)
|
|
103
|
+
between an agent's stated intent and actual output before a
|
|
104
|
+
``DRIFT_DETECTED`` event is emitted. Defaults to ``0.15``.
|
|
105
|
+
log_all_calls: When ``True``, every tool call is recorded in the
|
|
106
|
+
audit log regardless of outcome. Defaults to ``True``.
|
|
107
|
+
checkpoint_frequency: Create a governance checkpoint every *N* tool
|
|
108
|
+
calls. Must be a positive integer. Defaults to ``5``.
|
|
109
|
+
max_concurrent: Maximum number of concurrent agent executions
|
|
110
|
+
allowed under this policy. Must be a positive integer.
|
|
111
|
+
Defaults to ``10``.
|
|
112
|
+
backpressure_threshold: Number of concurrent executions at which
|
|
113
|
+
the system begins applying backpressure (e.g. throttling new
|
|
114
|
+
requests). Should be less than ``max_concurrent`` to be
|
|
115
|
+
effective. Defaults to ``8``.
|
|
116
|
+
version: Semantic version string for the policy, enabling auditable
|
|
117
|
+
policy evolution. Defaults to ``"1.0.0"``.
|
|
118
|
+
|
|
119
|
+
Example:
|
|
120
|
+
Creating a strict read-only policy::
|
|
121
|
+
|
|
122
|
+
policy = GovernancePolicy(
|
|
123
|
+
name="read_only_strict",
|
|
124
|
+
max_tokens=2048,
|
|
125
|
+
max_tool_calls=5,
|
|
126
|
+
allowed_tools=["read_file", "web_search"],
|
|
127
|
+
blocked_patterns=[
|
|
128
|
+
"password",
|
|
129
|
+
("rm\\s+-rf", PatternType.REGEX),
|
|
130
|
+
("*.exe", PatternType.GLOB),
|
|
131
|
+
],
|
|
132
|
+
require_human_approval=True,
|
|
133
|
+
confidence_threshold=0.9,
|
|
134
|
+
drift_threshold=0.10,
|
|
135
|
+
version="2.0.0",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
Comparing policies::
|
|
139
|
+
|
|
140
|
+
base = GovernancePolicy()
|
|
141
|
+
strict = GovernancePolicy(max_tokens=1024, max_tool_calls=3)
|
|
142
|
+
assert strict.is_stricter_than(base)
|
|
143
|
+
|
|
144
|
+
Serialization round-trip::
|
|
145
|
+
|
|
146
|
+
yaml_str = policy.to_yaml()
|
|
147
|
+
restored = GovernancePolicy.from_yaml(yaml_str)
|
|
148
|
+
"""
|
|
149
|
+
name: str = "default"
|
|
150
|
+
max_tokens: int = 4096
|
|
151
|
+
max_tool_calls: int = 10
|
|
152
|
+
allowed_tools: list[str] = field(default_factory=list)
|
|
153
|
+
blocked_patterns: list[str | tuple[str, PatternType]] = field(default_factory=list)
|
|
154
|
+
require_human_approval: bool = False
|
|
155
|
+
timeout_seconds: int = 300
|
|
156
|
+
|
|
157
|
+
# Safety thresholds
|
|
158
|
+
confidence_threshold: float = 0.8
|
|
159
|
+
drift_threshold: float = 0.15
|
|
160
|
+
|
|
161
|
+
# Audit settings
|
|
162
|
+
log_all_calls: bool = True
|
|
163
|
+
checkpoint_frequency: int = 5 # Every N calls
|
|
164
|
+
|
|
165
|
+
# Concurrency limits
|
|
166
|
+
max_concurrent: int = 10
|
|
167
|
+
backpressure_threshold: int = 8 # Start slowing down at this level
|
|
168
|
+
|
|
169
|
+
# Version tracking
|
|
170
|
+
version: str = "1.0.0"
|
|
171
|
+
|
|
172
|
+
def __repr__(self) -> str:
|
|
173
|
+
return (
|
|
174
|
+
f"GovernancePolicy(max_tokens={self.max_tokens!r}, "
|
|
175
|
+
f"max_tool_calls={self.max_tool_calls!r}, "
|
|
176
|
+
f"require_human_approval={self.require_human_approval!r}, "
|
|
177
|
+
f"version={self.version!r})"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def __hash__(self) -> int:
|
|
181
|
+
return hash(
|
|
182
|
+
(
|
|
183
|
+
self.max_tokens,
|
|
184
|
+
self.max_tool_calls,
|
|
185
|
+
tuple(self.allowed_tools),
|
|
186
|
+
tuple(self.blocked_patterns),
|
|
187
|
+
self.require_human_approval,
|
|
188
|
+
self.timeout_seconds,
|
|
189
|
+
self.confidence_threshold,
|
|
190
|
+
self.drift_threshold,
|
|
191
|
+
self.log_all_calls,
|
|
192
|
+
self.checkpoint_frequency,
|
|
193
|
+
self.max_concurrent,
|
|
194
|
+
self.backpressure_threshold,
|
|
195
|
+
self.version,
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def __post_init__(self) -> None:
|
|
200
|
+
"""Validate policy fields on construction."""
|
|
201
|
+
self.validate()
|
|
202
|
+
|
|
203
|
+
def validate(self) -> None:
|
|
204
|
+
"""Validate all policy fields and raise ValueError for invalid inputs."""
|
|
205
|
+
# Validate positive integers (must be > 0)
|
|
206
|
+
for field_name in (
|
|
207
|
+
"max_tokens", "timeout_seconds",
|
|
208
|
+
"max_concurrent", "backpressure_threshold", "checkpoint_frequency",
|
|
209
|
+
):
|
|
210
|
+
value = getattr(self, field_name)
|
|
211
|
+
if not isinstance(value, int) or value <= 0:
|
|
212
|
+
raise ValueError(
|
|
213
|
+
f"{field_name} must be a positive integer, got {value!r}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Validate non-negative integers (>= 0 allowed)
|
|
217
|
+
for field_name in ("max_tool_calls",):
|
|
218
|
+
value = getattr(self, field_name)
|
|
219
|
+
if not isinstance(value, int) or value < 0:
|
|
220
|
+
raise ValueError(
|
|
221
|
+
f"{field_name} must be a non-negative integer, got {value!r}"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Validate float thresholds are in [0.0, 1.0]
|
|
225
|
+
for field_name in ("confidence_threshold", "drift_threshold"):
|
|
226
|
+
value = getattr(self, field_name)
|
|
227
|
+
if not isinstance(value, (int, float)) or not (0.0 <= value <= 1.0):
|
|
228
|
+
raise ValueError(
|
|
229
|
+
f"{field_name} must be a float between 0.0 and 1.0, got {value!r}"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Validate allowed_tools entries are strings
|
|
233
|
+
if not isinstance(self.allowed_tools, list):
|
|
234
|
+
raise ValueError(
|
|
235
|
+
f"allowed_tools must be a list, got {type(self.allowed_tools).__name__}"
|
|
236
|
+
)
|
|
237
|
+
for i, tool in enumerate(self.allowed_tools):
|
|
238
|
+
if not isinstance(tool, str):
|
|
239
|
+
raise ValueError(
|
|
240
|
+
f"allowed_tools[{i}] must be a string, got {type(tool).__name__}: {tool!r}"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# Validate blocked_patterns entries and precompile regex/glob patterns
|
|
244
|
+
if not isinstance(self.blocked_patterns, list):
|
|
245
|
+
raise ValueError(
|
|
246
|
+
f"blocked_patterns must be a list, got {type(self.blocked_patterns).__name__}"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Validate version is a non-empty string
|
|
250
|
+
if not isinstance(self.version, str) or not self.version:
|
|
251
|
+
raise ValueError(
|
|
252
|
+
f"version must be a non-empty string, got {self.version!r}"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
self._compiled_patterns: list[tuple[str, PatternType, re.Pattern | None]] = []
|
|
256
|
+
for i, pattern in enumerate(self.blocked_patterns):
|
|
257
|
+
if isinstance(pattern, str):
|
|
258
|
+
self._compiled_patterns.append((pattern, PatternType.SUBSTRING, None))
|
|
259
|
+
elif isinstance(pattern, tuple) and len(pattern) == 2:
|
|
260
|
+
pat_str, pat_type = pattern
|
|
261
|
+
if not isinstance(pat_str, str):
|
|
262
|
+
raise ValueError(
|
|
263
|
+
f"blocked_patterns[{i}][0] must be a string, got {type(pat_str).__name__}: {pat_str!r}"
|
|
264
|
+
)
|
|
265
|
+
if not isinstance(pat_type, PatternType):
|
|
266
|
+
raise ValueError(
|
|
267
|
+
f"blocked_patterns[{i}][1] must be a PatternType, got {type(pat_type).__name__}: {pat_type!r}"
|
|
268
|
+
)
|
|
269
|
+
compiled = None
|
|
270
|
+
if pat_type == PatternType.REGEX:
|
|
271
|
+
try:
|
|
272
|
+
compiled = re.compile(pat_str, re.IGNORECASE)
|
|
273
|
+
except re.error as e:
|
|
274
|
+
raise ValueError(
|
|
275
|
+
f"blocked_patterns[{i}] has invalid regex '{pat_str}': {e}"
|
|
276
|
+
) from e
|
|
277
|
+
elif pat_type == PatternType.GLOB:
|
|
278
|
+
try:
|
|
279
|
+
compiled = re.compile(fnmatch.translate(pat_str), re.IGNORECASE)
|
|
280
|
+
except re.error as e:
|
|
281
|
+
raise ValueError(
|
|
282
|
+
f"blocked_patterns[{i}] has invalid glob '{pat_str}': {e}"
|
|
283
|
+
) from e
|
|
284
|
+
self._compiled_patterns.append((pat_str, pat_type, compiled))
|
|
285
|
+
else:
|
|
286
|
+
raise ValueError(
|
|
287
|
+
f"blocked_patterns[{i}] must be a string or (string, PatternType) tuple, got {type(pattern).__name__}: {pattern!r}"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
def detect_conflicts(self) -> list[str]:
|
|
291
|
+
"""
|
|
292
|
+
Detect conflicting or contradictory policy settings.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
A list of human-readable warning strings describing each conflict.
|
|
296
|
+
"""
|
|
297
|
+
warnings: list[str] = []
|
|
298
|
+
|
|
299
|
+
# Backpressure will never trigger if threshold is >= max_concurrent
|
|
300
|
+
if self.backpressure_threshold >= self.max_concurrent:
|
|
301
|
+
warnings.append(
|
|
302
|
+
f"backpressure_threshold ({self.backpressure_threshold}) >= "
|
|
303
|
+
f"max_concurrent ({self.max_concurrent}): backpressure will never trigger"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Tools are allowed but max_tool_calls blocks any tool calls
|
|
307
|
+
if self.max_tool_calls == 0 and self.allowed_tools:
|
|
308
|
+
warnings.append(
|
|
309
|
+
f"max_tool_calls is 0 but allowed_tools is non-empty "
|
|
310
|
+
f"({self.allowed_tools}): tools are allowed but no calls permitted"
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Confidence checks effectively disabled
|
|
314
|
+
if self.confidence_threshold == 0.0:
|
|
315
|
+
warnings.append(
|
|
316
|
+
"confidence_threshold is 0.0: effectively disables confidence checking"
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# timeout_seconds is too low for reasonable execution (< 5s warning)
|
|
320
|
+
if self.timeout_seconds < 5:
|
|
321
|
+
warnings.append(
|
|
322
|
+
f"timeout_seconds ({self.timeout_seconds}) is very low (under 5s), "
|
|
323
|
+
f"may not allow reasonable execution time"
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
return warnings
|
|
327
|
+
|
|
328
|
+
def matches_pattern(self, text: str) -> list[str]:
|
|
329
|
+
"""Return all blocked patterns that match the given text."""
|
|
330
|
+
matches = []
|
|
331
|
+
for pat_str, pat_type, compiled in self._compiled_patterns:
|
|
332
|
+
if pat_type == PatternType.SUBSTRING:
|
|
333
|
+
if pat_str.lower() in text.lower():
|
|
334
|
+
matches.append(pat_str)
|
|
335
|
+
elif compiled is not None and compiled.search(text):
|
|
336
|
+
matches.append(pat_str)
|
|
337
|
+
return matches
|
|
338
|
+
|
|
339
|
+
def to_dict(self) -> dict[str, Any]:
|
|
340
|
+
"""Serialize policy to a dictionary."""
|
|
341
|
+
return {
|
|
342
|
+
"name": self.name,
|
|
343
|
+
"max_tokens": self.max_tokens,
|
|
344
|
+
"max_tool_calls": self.max_tool_calls,
|
|
345
|
+
"allowed_tools": self.allowed_tools,
|
|
346
|
+
"blocked_patterns": [
|
|
347
|
+
{"pattern": p, "type": t.value} if t != PatternType.SUBSTRING
|
|
348
|
+
else p
|
|
349
|
+
for p, t, _ in self._compiled_patterns
|
|
350
|
+
],
|
|
351
|
+
"require_human_approval": self.require_human_approval,
|
|
352
|
+
"timeout_seconds": self.timeout_seconds,
|
|
353
|
+
"confidence_threshold": self.confidence_threshold,
|
|
354
|
+
"drift_threshold": self.drift_threshold,
|
|
355
|
+
"log_all_calls": self.log_all_calls,
|
|
356
|
+
"checkpoint_frequency": self.checkpoint_frequency,
|
|
357
|
+
"max_concurrent": self.max_concurrent,
|
|
358
|
+
"backpressure_threshold": self.backpressure_threshold,
|
|
359
|
+
"version": self.version,
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
@classmethod
|
|
363
|
+
def from_dict(cls, data: dict[str, Any]) -> GovernancePolicy:
|
|
364
|
+
"""Deserialize policy from a dictionary.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
data: Dictionary as produced by ``to_dict()``.
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
Reconstructed GovernancePolicy instance.
|
|
371
|
+
"""
|
|
372
|
+
data = dict(data) # shallow copy to avoid mutating caller's dict
|
|
373
|
+
# Convert blocked_patterns back to tuples where needed
|
|
374
|
+
raw_patterns = data.get("blocked_patterns", [])
|
|
375
|
+
patterns: list[str | tuple[str, PatternType]] = []
|
|
376
|
+
for p in raw_patterns:
|
|
377
|
+
if isinstance(p, str):
|
|
378
|
+
patterns.append(p)
|
|
379
|
+
elif isinstance(p, dict) and "pattern" in p and "type" in p:
|
|
380
|
+
try:
|
|
381
|
+
pt = PatternType(p["type"])
|
|
382
|
+
except ValueError:
|
|
383
|
+
raise ValueError(f"Unknown pattern type: {p['type']!r}") from None
|
|
384
|
+
patterns.append((p["pattern"], pt))
|
|
385
|
+
else:
|
|
386
|
+
raise ValueError(f"Invalid blocked_pattern entry: {p!r}")
|
|
387
|
+
data["blocked_patterns"] = patterns
|
|
388
|
+
|
|
389
|
+
valid_fields = {
|
|
390
|
+
"name", "max_tokens", "max_tool_calls", "allowed_tools",
|
|
391
|
+
"blocked_patterns", "require_human_approval", "timeout_seconds",
|
|
392
|
+
"confidence_threshold", "drift_threshold", "log_all_calls",
|
|
393
|
+
"checkpoint_frequency", "max_concurrent", "backpressure_threshold",
|
|
394
|
+
"version",
|
|
395
|
+
}
|
|
396
|
+
filtered = {k: v for k, v in data.items() if k in valid_fields}
|
|
397
|
+
return cls(**filtered)
|
|
398
|
+
|
|
399
|
+
def compare_versions(self, other: GovernancePolicy) -> dict[str, Any]:
|
|
400
|
+
"""Compare this policy with another, including version info.
|
|
401
|
+
|
|
402
|
+
Returns a dict with version details and field-level changes.
|
|
403
|
+
"""
|
|
404
|
+
return {
|
|
405
|
+
"old_version": self.version,
|
|
406
|
+
"new_version": other.version,
|
|
407
|
+
"versions_differ": self.version != other.version,
|
|
408
|
+
"changes": self.diff(other),
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
def to_yaml(self) -> str:
|
|
412
|
+
"""Serialize policy to YAML string."""
|
|
413
|
+
import yaml
|
|
414
|
+
|
|
415
|
+
data = {
|
|
416
|
+
"max_tokens": self.max_tokens,
|
|
417
|
+
"max_tool_calls": self.max_tool_calls,
|
|
418
|
+
"allowed_tools": self.allowed_tools,
|
|
419
|
+
"blocked_patterns": [
|
|
420
|
+
{"pattern": p, "type": t.value} if t != PatternType.SUBSTRING
|
|
421
|
+
else p
|
|
422
|
+
for p, t, _ in self._compiled_patterns
|
|
423
|
+
],
|
|
424
|
+
"require_human_approval": self.require_human_approval,
|
|
425
|
+
"timeout_seconds": self.timeout_seconds,
|
|
426
|
+
"confidence_threshold": self.confidence_threshold,
|
|
427
|
+
"drift_threshold": self.drift_threshold,
|
|
428
|
+
"log_all_calls": self.log_all_calls,
|
|
429
|
+
"checkpoint_frequency": self.checkpoint_frequency,
|
|
430
|
+
"max_concurrent": self.max_concurrent,
|
|
431
|
+
"backpressure_threshold": self.backpressure_threshold,
|
|
432
|
+
"version": self.version,
|
|
433
|
+
}
|
|
434
|
+
return yaml.dump(data, default_flow_style=False, sort_keys=False)
|
|
435
|
+
|
|
436
|
+
@classmethod
|
|
437
|
+
def from_yaml(cls, yaml_str: str) -> GovernancePolicy:
|
|
438
|
+
"""Deserialize policy from YAML string."""
|
|
439
|
+
import yaml
|
|
440
|
+
|
|
441
|
+
data = yaml.safe_load(yaml_str)
|
|
442
|
+
if not isinstance(data, dict):
|
|
443
|
+
raise ValueError(f"Expected a YAML mapping, got {type(data).__name__}")
|
|
444
|
+
|
|
445
|
+
# Convert blocked_patterns back to tuples where needed
|
|
446
|
+
raw_patterns = data.get("blocked_patterns", [])
|
|
447
|
+
patterns: list[str | tuple[str, PatternType]] = []
|
|
448
|
+
for p in raw_patterns:
|
|
449
|
+
if isinstance(p, str):
|
|
450
|
+
patterns.append(p)
|
|
451
|
+
elif isinstance(p, dict) and "pattern" in p and "type" in p:
|
|
452
|
+
try:
|
|
453
|
+
pt = PatternType(p["type"])
|
|
454
|
+
except ValueError:
|
|
455
|
+
raise ValueError(f"Unknown pattern type: {p['type']!r}") from None
|
|
456
|
+
patterns.append((p["pattern"], pt))
|
|
457
|
+
else:
|
|
458
|
+
raise ValueError(f"Invalid blocked_pattern entry: {p!r}")
|
|
459
|
+
data["blocked_patterns"] = patterns
|
|
460
|
+
|
|
461
|
+
# Remove unknown keys
|
|
462
|
+
valid_fields = {
|
|
463
|
+
"max_tokens", "max_tool_calls", "allowed_tools", "blocked_patterns",
|
|
464
|
+
"require_human_approval", "timeout_seconds", "confidence_threshold",
|
|
465
|
+
"drift_threshold", "log_all_calls", "checkpoint_frequency",
|
|
466
|
+
"max_concurrent", "backpressure_threshold", "version",
|
|
467
|
+
}
|
|
468
|
+
filtered = {k: v for k, v in data.items() if k in valid_fields}
|
|
469
|
+
return cls(**filtered)
|
|
470
|
+
|
|
471
|
+
def save(self, filepath: str) -> None:
|
|
472
|
+
"""Save policy to a YAML file."""
|
|
473
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
474
|
+
f.write(self.to_yaml())
|
|
475
|
+
|
|
476
|
+
@classmethod
|
|
477
|
+
def load(cls, filepath: str) -> GovernancePolicy:
|
|
478
|
+
"""Load policy from a YAML file."""
|
|
479
|
+
with open(filepath, encoding="utf-8") as f:
|
|
480
|
+
return cls.from_yaml(f.read())
|
|
481
|
+
|
|
482
|
+
def diff(self, other: GovernancePolicy) -> dict[str, tuple[Any, Any]]:
|
|
483
|
+
"""Compare this policy with another, returning changed fields.
|
|
484
|
+
|
|
485
|
+
Returns a dict mapping field names to (self_value, other_value) tuples
|
|
486
|
+
for fields that differ between the two policies.
|
|
487
|
+
"""
|
|
488
|
+
changes: dict[str, tuple[Any, Any]] = {}
|
|
489
|
+
fields = [
|
|
490
|
+
"max_tokens", "max_tool_calls", "allowed_tools", "blocked_patterns",
|
|
491
|
+
"require_human_approval", "timeout_seconds", "confidence_threshold",
|
|
492
|
+
"drift_threshold", "log_all_calls", "checkpoint_frequency",
|
|
493
|
+
"max_concurrent", "backpressure_threshold", "version",
|
|
494
|
+
]
|
|
495
|
+
for f in fields:
|
|
496
|
+
v_self = getattr(self, f)
|
|
497
|
+
v_other = getattr(other, f)
|
|
498
|
+
if v_self != v_other:
|
|
499
|
+
changes[f] = (v_self, v_other)
|
|
500
|
+
return changes
|
|
501
|
+
|
|
502
|
+
def is_stricter_than(self, other: GovernancePolicy) -> bool:
|
|
503
|
+
"""Return True if this policy is more restrictive than other.
|
|
504
|
+
|
|
505
|
+
Stricter means: lower limits, higher thresholds, more blocked patterns,
|
|
506
|
+
fewer allowed tools, and human approval required.
|
|
507
|
+
"""
|
|
508
|
+
checks = [
|
|
509
|
+
self.max_tokens <= other.max_tokens,
|
|
510
|
+
self.max_tool_calls <= other.max_tool_calls,
|
|
511
|
+
self.timeout_seconds <= other.timeout_seconds,
|
|
512
|
+
self.max_concurrent <= other.max_concurrent,
|
|
513
|
+
self.backpressure_threshold <= other.backpressure_threshold,
|
|
514
|
+
self.confidence_threshold >= other.confidence_threshold,
|
|
515
|
+
self.checkpoint_frequency <= other.checkpoint_frequency,
|
|
516
|
+
len(self.blocked_patterns) >= len(other.blocked_patterns),
|
|
517
|
+
(not other.require_human_approval) or self.require_human_approval,
|
|
518
|
+
]
|
|
519
|
+
# allowed_tools: fewer allowed tools is stricter (unless both empty)
|
|
520
|
+
if self.allowed_tools or other.allowed_tools:
|
|
521
|
+
checks.append(
|
|
522
|
+
len(self.allowed_tools) <= len(other.allowed_tools)
|
|
523
|
+
if other.allowed_tools else True
|
|
524
|
+
)
|
|
525
|
+
# Must be at least one actual difference to be considered stricter
|
|
526
|
+
has_difference = any([
|
|
527
|
+
self.max_tokens < other.max_tokens,
|
|
528
|
+
self.max_tool_calls < other.max_tool_calls,
|
|
529
|
+
self.timeout_seconds < other.timeout_seconds,
|
|
530
|
+
self.confidence_threshold > other.confidence_threshold,
|
|
531
|
+
self.require_human_approval and not other.require_human_approval,
|
|
532
|
+
len(self.blocked_patterns) > len(other.blocked_patterns),
|
|
533
|
+
len(self.allowed_tools) < len(other.allowed_tools) if other.allowed_tools else False,
|
|
534
|
+
])
|
|
535
|
+
return all(checks) and has_difference
|
|
536
|
+
|
|
537
|
+
def format_diff(self, other: GovernancePolicy) -> str:
|
|
538
|
+
"""Return a human-readable diff between this policy and other."""
|
|
539
|
+
changes = self.diff(other)
|
|
540
|
+
if not changes:
|
|
541
|
+
return "Policies are identical."
|
|
542
|
+
lines = ["Policy Diff:", "-" * 50]
|
|
543
|
+
for field_name, (old, new) in changes.items():
|
|
544
|
+
lines.append(f" {field_name}: {old!r} -> {new!r}")
|
|
545
|
+
lines.append("-" * 50)
|
|
546
|
+
return "\n".join(lines)
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
_AGENT_ID_RE = re.compile(r"^[a-zA-Z0-9_-]+$")
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@dataclass
|
|
553
|
+
class ExecutionContext:
|
|
554
|
+
"""Context passed through the governance layer"""
|
|
555
|
+
agent_id: str
|
|
556
|
+
session_id: str
|
|
557
|
+
policy: GovernancePolicy
|
|
558
|
+
start_time: datetime = field(default_factory=datetime.now)
|
|
559
|
+
call_count: int = 0
|
|
560
|
+
total_tokens: int = 0
|
|
561
|
+
tool_calls: list[dict] = field(default_factory=list)
|
|
562
|
+
checkpoints: list[str] = field(default_factory=list)
|
|
563
|
+
_baseline_hash: str | None = field(default=None, repr=False)
|
|
564
|
+
_baseline_text: str | None = field(default=None, repr=False)
|
|
565
|
+
_drift_scores: list[float] = field(default_factory=list, repr=False)
|
|
566
|
+
|
|
567
|
+
def __repr__(self) -> str:
|
|
568
|
+
return f"ExecutionContext(agent_id={self.agent_id!r}, session_id={self.session_id!r})"
|
|
569
|
+
|
|
570
|
+
def __post_init__(self) -> None:
|
|
571
|
+
"""Validate context fields on construction."""
|
|
572
|
+
self.validate()
|
|
573
|
+
|
|
574
|
+
def validate(self) -> None:
|
|
575
|
+
"""Validate all context fields and raise ValueError for invalid inputs."""
|
|
576
|
+
# Validate agent_id is a non-empty string matching allowed pattern
|
|
577
|
+
if not isinstance(self.agent_id, str) or not self.agent_id:
|
|
578
|
+
raise ValueError(
|
|
579
|
+
f"agent_id must be a non-empty string, got {self.agent_id!r}"
|
|
580
|
+
)
|
|
581
|
+
if not _AGENT_ID_RE.match(self.agent_id):
|
|
582
|
+
raise ValueError(
|
|
583
|
+
f"agent_id must match ^[a-zA-Z0-9_-]+$, got {self.agent_id!r}"
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
# Validate session_id is a non-empty string
|
|
587
|
+
if not isinstance(self.session_id, str) or not self.session_id:
|
|
588
|
+
raise ValueError(
|
|
589
|
+
f"session_id must be a non-empty string, got {self.session_id!r}"
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
# Validate policy is a GovernancePolicy instance
|
|
593
|
+
if not isinstance(self.policy, GovernancePolicy):
|
|
594
|
+
raise ValueError(
|
|
595
|
+
f"policy must be a GovernancePolicy instance, got {type(self.policy).__name__}"
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
# Validate non-negative integers
|
|
599
|
+
for field_name in ("call_count", "total_tokens"):
|
|
600
|
+
value = getattr(self, field_name)
|
|
601
|
+
if not isinstance(value, int) or value < 0:
|
|
602
|
+
raise ValueError(
|
|
603
|
+
f"{field_name} must be a non-negative integer, got {value!r}"
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
# Validate checkpoints is a list of strings
|
|
607
|
+
if not isinstance(self.checkpoints, list):
|
|
608
|
+
raise ValueError(
|
|
609
|
+
f"checkpoints must be a list, got {type(self.checkpoints).__name__}"
|
|
610
|
+
)
|
|
611
|
+
for i, cp in enumerate(self.checkpoints):
|
|
612
|
+
if not isinstance(cp, str):
|
|
613
|
+
raise ValueError(
|
|
614
|
+
f"checkpoints[{i}] must be a string, got {type(cp).__name__}: {cp!r}"
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
# ── Abstract Tool Call Interceptor ────────────────────────────
|
|
619
|
+
|
|
620
|
+
@dataclass
|
|
621
|
+
class ToolCallRequest:
|
|
622
|
+
"""Vendor-neutral representation of a tool/function call."""
|
|
623
|
+
tool_name: str
|
|
624
|
+
arguments: dict[str, Any]
|
|
625
|
+
call_id: str = ""
|
|
626
|
+
agent_id: str = ""
|
|
627
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
628
|
+
|
|
629
|
+
def __repr__(self) -> str:
|
|
630
|
+
return f"ToolCallRequest(tool_name={self.tool_name!r}, call_id={self.call_id!r})"
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
@dataclass
|
|
634
|
+
class ToolCallResult:
|
|
635
|
+
"""Result of intercepting a tool call."""
|
|
636
|
+
allowed: bool
|
|
637
|
+
reason: str | None = None
|
|
638
|
+
modified_arguments: dict[str, Any] | None = None # For argument sanitization
|
|
639
|
+
audit_entry: dict[str, Any] | None = None
|
|
640
|
+
|
|
641
|
+
def __repr__(self) -> str:
|
|
642
|
+
return f"ToolCallResult(allowed={self.allowed!r}, reason={self.reason!r})"
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
class ToolCallInterceptor(Protocol):
|
|
646
|
+
"""
|
|
647
|
+
Abstract protocol for intercepting tool/function calls.
|
|
648
|
+
|
|
649
|
+
Implement this to add custom governance logic across any framework.
|
|
650
|
+
The same interceptor works with OpenAI, LangChain, CrewAI, etc.
|
|
651
|
+
|
|
652
|
+
Example:
|
|
653
|
+
class PIIInterceptor:
|
|
654
|
+
def intercept(self, request: ToolCallRequest) -> ToolCallResult:
|
|
655
|
+
if any(p in str(request.arguments) for p in ["ssn", "password"]):
|
|
656
|
+
return ToolCallResult(allowed=False, reason="PII detected")
|
|
657
|
+
return ToolCallResult(allowed=True)
|
|
658
|
+
"""
|
|
659
|
+
|
|
660
|
+
def intercept(self, request: ToolCallRequest) -> ToolCallResult:
|
|
661
|
+
"""Intercept a tool call and return allow/deny decision."""
|
|
662
|
+
...
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
class PolicyInterceptor:
|
|
666
|
+
"""
|
|
667
|
+
Default interceptor that enforces GovernancePolicy rules.
|
|
668
|
+
|
|
669
|
+
Checks:
|
|
670
|
+
- Human approval requirement (require_human_approval)
|
|
671
|
+
- Tool is in allowed_tools (if specified)
|
|
672
|
+
- Arguments don't contain blocked patterns
|
|
673
|
+
- Call count within limits
|
|
674
|
+
"""
|
|
675
|
+
|
|
676
|
+
def __init__(self, policy: GovernancePolicy, context: ExecutionContext | None = None):
|
|
677
|
+
self.policy = policy
|
|
678
|
+
self.context = context
|
|
679
|
+
|
|
680
|
+
def intercept(self, request: ToolCallRequest) -> ToolCallResult:
|
|
681
|
+
# Check human approval requirement
|
|
682
|
+
if self.policy.require_human_approval:
|
|
683
|
+
return ToolCallResult(
|
|
684
|
+
allowed=False,
|
|
685
|
+
reason=f"Tool '{request.tool_name}' requires human approval per governance policy",
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
# Check allowed tools
|
|
689
|
+
if self.policy.allowed_tools and request.tool_name not in self.policy.allowed_tools:
|
|
690
|
+
return ToolCallResult(
|
|
691
|
+
allowed=False,
|
|
692
|
+
reason=f"Tool '{request.tool_name}' not in allowed list: {self.policy.allowed_tools}",
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Check blocked patterns
|
|
696
|
+
args_str = str(request.arguments)
|
|
697
|
+
matched = self.policy.matches_pattern(args_str)
|
|
698
|
+
if matched:
|
|
699
|
+
return ToolCallResult(
|
|
700
|
+
allowed=False,
|
|
701
|
+
reason=f"Blocked pattern '{matched[0]}' detected in tool arguments",
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
# Check call count
|
|
705
|
+
if self.context and self.context.call_count >= self.policy.max_tool_calls:
|
|
706
|
+
return ToolCallResult(
|
|
707
|
+
allowed=False,
|
|
708
|
+
reason=f"Max tool calls exceeded ({self.policy.max_tool_calls})",
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
return ToolCallResult(allowed=True)
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
class ContentHashInterceptor:
|
|
715
|
+
"""Interceptor that verifies tool identity via content hashing.
|
|
716
|
+
|
|
717
|
+
Instead of relying solely on tool *names* (which can be aliased),
|
|
718
|
+
this interceptor checks that the callable behind a tool name has the
|
|
719
|
+
same SHA-256 source hash that was recorded when the tool was
|
|
720
|
+
registered. This defeats tool-wrapping and aliasing attacks
|
|
721
|
+
described in the Ona/Veto agent sandbox escape research.
|
|
722
|
+
|
|
723
|
+
Requires a ``tool_registry`` that stores content hashes (see
|
|
724
|
+
:class:`~agent_control_plane.tool_registry.ToolRegistry`).
|
|
725
|
+
|
|
726
|
+
Args:
|
|
727
|
+
tool_hashes: Mapping of tool name → expected SHA-256 hex digest.
|
|
728
|
+
strict: If ``True`` (default), block tools with no registered
|
|
729
|
+
hash. If ``False``, allow unknown tools with a warning.
|
|
730
|
+
"""
|
|
731
|
+
|
|
732
|
+
def __init__(
|
|
733
|
+
self,
|
|
734
|
+
tool_hashes: dict[str, str] | None = None,
|
|
735
|
+
strict: bool = True,
|
|
736
|
+
) -> None:
|
|
737
|
+
self._tool_hashes: dict[str, str] = dict(tool_hashes or {})
|
|
738
|
+
self._strict = strict
|
|
739
|
+
|
|
740
|
+
def register_hash(self, tool_name: str, content_hash: str) -> None:
|
|
741
|
+
"""Record the expected content hash for a tool."""
|
|
742
|
+
self._tool_hashes[tool_name] = content_hash
|
|
743
|
+
|
|
744
|
+
def intercept(self, request: ToolCallRequest) -> ToolCallResult:
|
|
745
|
+
expected = self._tool_hashes.get(request.tool_name)
|
|
746
|
+
if expected is None:
|
|
747
|
+
if self._strict:
|
|
748
|
+
return ToolCallResult(
|
|
749
|
+
allowed=False,
|
|
750
|
+
reason=(
|
|
751
|
+
f"Tool '{request.tool_name}' has no registered content hash "
|
|
752
|
+
"(possible alias or wrapper)"
|
|
753
|
+
),
|
|
754
|
+
)
|
|
755
|
+
logger.warning(
|
|
756
|
+
"No content hash for tool '%s' — allowing in non-strict mode",
|
|
757
|
+
request.tool_name,
|
|
758
|
+
)
|
|
759
|
+
return ToolCallResult(allowed=True)
|
|
760
|
+
|
|
761
|
+
# Verify the hash carried in request metadata (set by the framework adapter)
|
|
762
|
+
actual = request.metadata.get("content_hash", "")
|
|
763
|
+
if not actual:
|
|
764
|
+
return ToolCallResult(
|
|
765
|
+
allowed=False,
|
|
766
|
+
reason=(
|
|
767
|
+
f"Tool '{request.tool_name}' call is missing content_hash metadata "
|
|
768
|
+
"— cannot verify integrity"
|
|
769
|
+
),
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
if actual != expected:
|
|
773
|
+
return ToolCallResult(
|
|
774
|
+
allowed=False,
|
|
775
|
+
reason=(
|
|
776
|
+
f"Tool '{request.tool_name}' content hash mismatch: "
|
|
777
|
+
f"expected {expected[:12]}… got {actual[:12]}… "
|
|
778
|
+
"(possible tampering or wrapper)"
|
|
779
|
+
),
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
return ToolCallResult(allowed=True)
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
class CompositeInterceptor:
|
|
786
|
+
"""Chain multiple interceptors. All must allow for the call to proceed."""
|
|
787
|
+
|
|
788
|
+
def __init__(self, interceptors: list[Any] | None = None):
|
|
789
|
+
self.interceptors: list[Any] = interceptors or []
|
|
790
|
+
|
|
791
|
+
def add(self, interceptor: Any) -> CompositeInterceptor:
|
|
792
|
+
self.interceptors.append(interceptor)
|
|
793
|
+
return self
|
|
794
|
+
|
|
795
|
+
def intercept(self, request: ToolCallRequest) -> ToolCallResult:
|
|
796
|
+
for interceptor in self.interceptors:
|
|
797
|
+
result = interceptor.intercept(request)
|
|
798
|
+
if not result.allowed:
|
|
799
|
+
return result
|
|
800
|
+
return ToolCallResult(allowed=True)
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
# ── Bounded Concurrency ──────────────────────────────────────
|
|
804
|
+
|
|
805
|
+
class BoundedSemaphore:
|
|
806
|
+
"""
|
|
807
|
+
Async-compatible bounded semaphore with backpressure.
|
|
808
|
+
|
|
809
|
+
When concurrency exceeds backpressure_threshold, callers must wait.
|
|
810
|
+
When it exceeds max_concurrent, requests are rejected.
|
|
811
|
+
"""
|
|
812
|
+
|
|
813
|
+
def __init__(self, max_concurrent: int = 10, backpressure_threshold: int = 8):
|
|
814
|
+
self.max_concurrent = max_concurrent
|
|
815
|
+
self.backpressure_threshold = backpressure_threshold
|
|
816
|
+
self._active = 0
|
|
817
|
+
self._total_acquired = 0
|
|
818
|
+
self._total_rejected = 0
|
|
819
|
+
|
|
820
|
+
def try_acquire(self) -> tuple[bool, str | None]:
|
|
821
|
+
"""
|
|
822
|
+
Try to acquire a slot.
|
|
823
|
+
|
|
824
|
+
Returns (acquired, reason).
|
|
825
|
+
"""
|
|
826
|
+
if self._active >= self.max_concurrent:
|
|
827
|
+
self._total_rejected += 1
|
|
828
|
+
return False, f"Max concurrency reached ({self.max_concurrent})"
|
|
829
|
+
self._active += 1
|
|
830
|
+
self._total_acquired += 1
|
|
831
|
+
return True, None
|
|
832
|
+
|
|
833
|
+
def release(self) -> None:
|
|
834
|
+
"""Release a slot."""
|
|
835
|
+
if self._active > 0:
|
|
836
|
+
self._active -= 1
|
|
837
|
+
|
|
838
|
+
@property
|
|
839
|
+
def is_under_pressure(self) -> bool:
|
|
840
|
+
"""Check if backpressure threshold is reached."""
|
|
841
|
+
return self._active >= self.backpressure_threshold
|
|
842
|
+
|
|
843
|
+
@property
|
|
844
|
+
def active(self) -> int:
|
|
845
|
+
return self._active
|
|
846
|
+
|
|
847
|
+
@property
|
|
848
|
+
def available(self) -> int:
|
|
849
|
+
return max(0, self.max_concurrent - self._active)
|
|
850
|
+
|
|
851
|
+
def stats(self) -> dict[str, Any]:
|
|
852
|
+
return {
|
|
853
|
+
"active": self._active,
|
|
854
|
+
"max_concurrent": self.max_concurrent,
|
|
855
|
+
"available": self.available,
|
|
856
|
+
"under_pressure": self.is_under_pressure,
|
|
857
|
+
"total_acquired": self._total_acquired,
|
|
858
|
+
"total_rejected": self._total_rejected,
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
class BaseIntegration(ABC):
|
|
863
|
+
"""
|
|
864
|
+
Base class for framework integrations.
|
|
865
|
+
|
|
866
|
+
Wraps any agent framework with Agent OS governance:
|
|
867
|
+
- Pre-execution policy checks
|
|
868
|
+
- Post-execution validation
|
|
869
|
+
- Flight recording
|
|
870
|
+
- Signal handling
|
|
871
|
+
"""
|
|
872
|
+
|
|
873
|
+
def __init__(self, policy: GovernancePolicy | None = None) -> None:
|
|
874
|
+
self.policy: GovernancePolicy = policy or GovernancePolicy()
|
|
875
|
+
self.contexts: dict[str, ExecutionContext] = {}
|
|
876
|
+
self._signal_handlers: dict[str, Callable[..., Any]] = {}
|
|
877
|
+
self._event_listeners: dict[GovernanceEventType, list[Callable[..., Any]]] = {}
|
|
878
|
+
|
|
879
|
+
@abstractmethod
|
|
880
|
+
def wrap(self, agent: Any) -> Any:
|
|
881
|
+
"""
|
|
882
|
+
Wrap an agent with governance.
|
|
883
|
+
|
|
884
|
+
Returns a governed version of the agent that:
|
|
885
|
+
- Enforces policy on all operations
|
|
886
|
+
- Records execution to flight recorder
|
|
887
|
+
- Responds to signals (SIGSTOP, SIGKILL, etc.)
|
|
888
|
+
"""
|
|
889
|
+
pass
|
|
890
|
+
|
|
891
|
+
@abstractmethod
|
|
892
|
+
def unwrap(self, governed_agent: Any) -> Any:
|
|
893
|
+
"""Remove governance wrapper and return original agent."""
|
|
894
|
+
pass
|
|
895
|
+
|
|
896
|
+
def create_context(self, agent_id: str) -> ExecutionContext:
|
|
897
|
+
"""Create execution context for an agent.
|
|
898
|
+
|
|
899
|
+
The policy is **deep-copied** so that the session is pinned to
|
|
900
|
+
the policy that was active when the context was created. This
|
|
901
|
+
prevents mid-session mutations from leaking into running sessions.
|
|
902
|
+
"""
|
|
903
|
+
from uuid import uuid4
|
|
904
|
+
ctx = ExecutionContext(
|
|
905
|
+
agent_id=agent_id,
|
|
906
|
+
session_id=str(uuid4())[:8],
|
|
907
|
+
policy=copy.deepcopy(self.policy),
|
|
908
|
+
)
|
|
909
|
+
self.contexts[agent_id] = ctx
|
|
910
|
+
return ctx
|
|
911
|
+
|
|
912
|
+
def on(self, event_type: GovernanceEventType, callback: Callable[..., Any]) -> None:
|
|
913
|
+
"""Register a callback for a governance event type."""
|
|
914
|
+
self._event_listeners.setdefault(event_type, []).append(callback)
|
|
915
|
+
|
|
916
|
+
def emit(self, event_type: GovernanceEventType, data: dict[str, Any]) -> None:
|
|
917
|
+
"""Fire all registered callbacks for an event type."""
|
|
918
|
+
for cb in self._event_listeners.get(event_type, []):
|
|
919
|
+
try:
|
|
920
|
+
cb(data)
|
|
921
|
+
except Exception as exc: # noqa: BLE001 — listener errors must not break governance flow
|
|
922
|
+
logger.warning(
|
|
923
|
+
"Governance event listener error for %s: %s",
|
|
924
|
+
event_type, exc, exc_info=True,
|
|
925
|
+
)
|
|
926
|
+
|
|
927
|
+
def pre_execute(self, ctx: ExecutionContext, input_data: Any) -> tuple[bool, str | None]:
|
|
928
|
+
"""
|
|
929
|
+
Pre-execution policy check.
|
|
930
|
+
|
|
931
|
+
Returns (allowed, reason) tuple.
|
|
932
|
+
"""
|
|
933
|
+
event_base = {"agent_id": ctx.agent_id, "timestamp": datetime.now().isoformat()}
|
|
934
|
+
|
|
935
|
+
self.emit(GovernanceEventType.POLICY_CHECK, {**event_base, "phase": "pre_execute"})
|
|
936
|
+
|
|
937
|
+
# Check call count
|
|
938
|
+
if ctx.call_count >= self.policy.max_tool_calls:
|
|
939
|
+
reason = f"Max tool calls exceeded ({self.policy.max_tool_calls})"
|
|
940
|
+
self.emit(GovernanceEventType.POLICY_VIOLATION, {**event_base, "reason": reason})
|
|
941
|
+
return False, reason
|
|
942
|
+
|
|
943
|
+
# Check timeout
|
|
944
|
+
elapsed = (datetime.now() - ctx.start_time).total_seconds()
|
|
945
|
+
if elapsed > self.policy.timeout_seconds:
|
|
946
|
+
reason = f"Timeout exceeded ({self.policy.timeout_seconds}s)"
|
|
947
|
+
self.emit(GovernanceEventType.POLICY_VIOLATION, {**event_base, "reason": reason})
|
|
948
|
+
return False, reason
|
|
949
|
+
|
|
950
|
+
# Check blocked patterns
|
|
951
|
+
input_str = str(input_data)
|
|
952
|
+
matched = self.policy.matches_pattern(input_str)
|
|
953
|
+
if matched:
|
|
954
|
+
reason = f"Blocked pattern detected: {matched[0]}"
|
|
955
|
+
self.emit(GovernanceEventType.TOOL_CALL_BLOCKED, {**event_base, "reason": reason, "pattern": matched[0]})
|
|
956
|
+
return False, reason
|
|
957
|
+
|
|
958
|
+
# Check human approval requirement
|
|
959
|
+
if self.policy.require_human_approval:
|
|
960
|
+
reason = "Execution requires human approval per governance policy"
|
|
961
|
+
self.emit(GovernanceEventType.POLICY_VIOLATION, {**event_base, "reason": reason})
|
|
962
|
+
return False, reason
|
|
963
|
+
|
|
964
|
+
# Check confidence threshold
|
|
965
|
+
if self.policy.confidence_threshold > 0.0:
|
|
966
|
+
confidence = getattr(input_data, 'confidence', None)
|
|
967
|
+
if isinstance(confidence, (int, float)) and confidence < self.policy.confidence_threshold:
|
|
968
|
+
reason = (
|
|
969
|
+
f"Confidence {confidence:.2f} below threshold "
|
|
970
|
+
f"{self.policy.confidence_threshold:.2f}"
|
|
971
|
+
)
|
|
972
|
+
self.emit(GovernanceEventType.POLICY_VIOLATION, {**event_base, "reason": reason})
|
|
973
|
+
return False, reason
|
|
974
|
+
|
|
975
|
+
return True, None
|
|
976
|
+
|
|
977
|
+
def post_execute(self, ctx: ExecutionContext, output_data: Any) -> tuple[bool, str | None]:
|
|
978
|
+
"""
|
|
979
|
+
Post-execution validation including drift detection.
|
|
980
|
+
|
|
981
|
+
Computes a similarity score between the serialized output and the
|
|
982
|
+
baseline (first output) using ``SequenceMatcher``. The drift score
|
|
983
|
+
is ``1.0 - similarity`` (0.0 = identical, 1.0 = completely different).
|
|
984
|
+
|
|
985
|
+
When the score exceeds ``policy.drift_threshold`` a
|
|
986
|
+
``DRIFT_DETECTED`` governance event is emitted and a warning is
|
|
987
|
+
logged. Callers can register listeners for this event to enforce
|
|
988
|
+
blocking behaviour if desired.
|
|
989
|
+
|
|
990
|
+
Returns (valid, reason) tuple.
|
|
991
|
+
"""
|
|
992
|
+
ctx.call_count += 1
|
|
993
|
+
|
|
994
|
+
# Drift detection: compare output against baseline
|
|
995
|
+
if self.policy.drift_threshold > 0.0:
|
|
996
|
+
drift_result = self.compute_drift(ctx, output_data)
|
|
997
|
+
if drift_result is not None:
|
|
998
|
+
ctx._drift_scores.append(drift_result.score)
|
|
999
|
+
if drift_result.exceeded:
|
|
1000
|
+
reason = (
|
|
1001
|
+
f"Drift score {drift_result.score:.2f} exceeds threshold "
|
|
1002
|
+
f"{self.policy.drift_threshold:.2f}"
|
|
1003
|
+
)
|
|
1004
|
+
logger.warning(
|
|
1005
|
+
"Drift detected agent=%s score=%.4f threshold=%.2f",
|
|
1006
|
+
ctx.agent_id,
|
|
1007
|
+
drift_result.score,
|
|
1008
|
+
drift_result.threshold,
|
|
1009
|
+
)
|
|
1010
|
+
self.emit(GovernanceEventType.DRIFT_DETECTED, {
|
|
1011
|
+
"agent_id": ctx.agent_id,
|
|
1012
|
+
"timestamp": datetime.now().isoformat(),
|
|
1013
|
+
"reason": reason,
|
|
1014
|
+
"drift_score": drift_result.score,
|
|
1015
|
+
"threshold": drift_result.threshold,
|
|
1016
|
+
"baseline_hash": drift_result.baseline_hash,
|
|
1017
|
+
"current_hash": drift_result.current_hash,
|
|
1018
|
+
})
|
|
1019
|
+
else:
|
|
1020
|
+
logger.debug(
|
|
1021
|
+
"Drift check agent=%s score=%.4f threshold=%.2f",
|
|
1022
|
+
ctx.agent_id,
|
|
1023
|
+
drift_result.score,
|
|
1024
|
+
drift_result.threshold,
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
# Checkpoint if needed
|
|
1028
|
+
if ctx.call_count % self.policy.checkpoint_frequency == 0:
|
|
1029
|
+
checkpoint_id = f"checkpoint-{ctx.call_count}"
|
|
1030
|
+
ctx.checkpoints.append(checkpoint_id)
|
|
1031
|
+
self.emit(GovernanceEventType.CHECKPOINT_CREATED, {
|
|
1032
|
+
"agent_id": ctx.agent_id,
|
|
1033
|
+
"timestamp": datetime.now().isoformat(),
|
|
1034
|
+
"checkpoint_id": checkpoint_id,
|
|
1035
|
+
"call_count": ctx.call_count,
|
|
1036
|
+
})
|
|
1037
|
+
|
|
1038
|
+
return True, None
|
|
1039
|
+
|
|
1040
|
+
@staticmethod
|
|
1041
|
+
def compute_drift(ctx: ExecutionContext, output_data: Any) -> DriftResult | None:
|
|
1042
|
+
"""Compute drift between *output_data* and the baseline stored in *ctx*.
|
|
1043
|
+
|
|
1044
|
+
On the first call the output is recorded as the baseline and ``None``
|
|
1045
|
+
is returned (no comparison possible). Subsequent calls use
|
|
1046
|
+
``SequenceMatcher`` to compute a similarity ratio between the
|
|
1047
|
+
serialised baseline and the current output. The drift score is
|
|
1048
|
+
``1.0 - similarity`` (0.0 = identical, 1.0 = completely different).
|
|
1049
|
+
"""
|
|
1050
|
+
current_text = str(output_data)
|
|
1051
|
+
current_hash = hashlib.sha256(current_text.encode()).hexdigest()
|
|
1052
|
+
|
|
1053
|
+
if ctx._baseline_hash is None:
|
|
1054
|
+
ctx._baseline_hash = current_hash
|
|
1055
|
+
ctx._baseline_text = current_text
|
|
1056
|
+
return None
|
|
1057
|
+
|
|
1058
|
+
# SequenceMatcher ratio: 1.0 = identical, 0.0 = nothing in common
|
|
1059
|
+
similarity = difflib.SequenceMatcher(
|
|
1060
|
+
None, ctx._baseline_text, current_text
|
|
1061
|
+
).ratio()
|
|
1062
|
+
score = 1.0 - similarity
|
|
1063
|
+
|
|
1064
|
+
return DriftResult(
|
|
1065
|
+
score=score,
|
|
1066
|
+
exceeded=score > ctx.policy.drift_threshold,
|
|
1067
|
+
threshold=ctx.policy.drift_threshold,
|
|
1068
|
+
baseline_hash=ctx._baseline_hash,
|
|
1069
|
+
current_hash=current_hash,
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
async def async_pre_execute(self, ctx: ExecutionContext, input_data: Any) -> tuple[bool, str | None]:
|
|
1073
|
+
"""
|
|
1074
|
+
Async pre-execution policy check.
|
|
1075
|
+
|
|
1076
|
+
Defaults to calling the sync version. Override in subclasses
|
|
1077
|
+
to add async-specific logic (e.g., async database lookups).
|
|
1078
|
+
"""
|
|
1079
|
+
return self.pre_execute(ctx, input_data)
|
|
1080
|
+
|
|
1081
|
+
async def async_post_execute(self, ctx: ExecutionContext, output_data: Any) -> tuple[bool, str | None]:
|
|
1082
|
+
"""
|
|
1083
|
+
Async post-execution validation.
|
|
1084
|
+
|
|
1085
|
+
Defaults to calling the sync version. Override in subclasses
|
|
1086
|
+
to add async-specific logic.
|
|
1087
|
+
"""
|
|
1088
|
+
return self.post_execute(ctx, output_data)
|
|
1089
|
+
|
|
1090
|
+
def on_signal(self, signal: str, handler: Callable[..., Any]) -> None:
|
|
1091
|
+
"""Register a signal handler."""
|
|
1092
|
+
self._signal_handlers[signal] = handler
|
|
1093
|
+
|
|
1094
|
+
def signal(self, agent_id: str, signal: str) -> None:
|
|
1095
|
+
"""Send signal to agent."""
|
|
1096
|
+
if signal in self._signal_handlers:
|
|
1097
|
+
self._signal_handlers[signal](agent_id)
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
class AsyncGovernedWrapper:
|
|
1101
|
+
"""
|
|
1102
|
+
Async wrapper that applies governance around an async callable.
|
|
1103
|
+
|
|
1104
|
+
Uses asyncio.Lock for concurrent access control instead of threading.
|
|
1105
|
+
Calls async_pre_execute before and async_post_execute after the wrapped callable.
|
|
1106
|
+
"""
|
|
1107
|
+
|
|
1108
|
+
def __init__(self, integration: BaseIntegration, fn: Callable[..., Any], agent_id: str = "async-agent") -> None:
|
|
1109
|
+
self._integration = integration
|
|
1110
|
+
self._fn = fn
|
|
1111
|
+
self._ctx = integration.create_context(agent_id)
|
|
1112
|
+
self._lock = asyncio.Lock()
|
|
1113
|
+
|
|
1114
|
+
@property
|
|
1115
|
+
def context(self) -> ExecutionContext:
|
|
1116
|
+
return self._ctx
|
|
1117
|
+
|
|
1118
|
+
async def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
1119
|
+
async with self._lock:
|
|
1120
|
+
# Pre-execution check
|
|
1121
|
+
allowed, reason = await self._integration.async_pre_execute(self._ctx, (args, kwargs))
|
|
1122
|
+
if not allowed:
|
|
1123
|
+
raise PolicyViolationError(reason or "Policy check failed")
|
|
1124
|
+
|
|
1125
|
+
# Execute the wrapped callable
|
|
1126
|
+
result = await self._fn(*args, **kwargs)
|
|
1127
|
+
|
|
1128
|
+
# Post-execution validation
|
|
1129
|
+
valid, reason = await self._integration.async_post_execute(self._ctx, result)
|
|
1130
|
+
if not valid:
|
|
1131
|
+
raise PolicyViolationError(reason or "Post-execution validation failed")
|
|
1132
|
+
|
|
1133
|
+
return result
|
|
1134
|
+
|
|
1135
|
+
|
|
1136
|
+
# Backward compatibility: import from the centralized exception hierarchy
|
|
1137
|
+
from agent_os.exceptions import PolicyViolationError as PolicyViolationError # noqa: F401
|