npm - @ngocsangairvds/vsaf - Versions diffs - 3.2.14 → 3.2.16 - Mend

@ngocsangairvds/vsaf 3.2.14 → 3.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1442) hide show

package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/base.py DELETED Viewed

@@ -1,4035 +0,0 @@
-"""Base Agent Architecture (Phase 6)."""
-from __future__ import annotations
-import abc
-import asyncio
-import contextlib
-import json
-import os
-import random
-import re
-from collections.abc import Coroutine
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, TypeVar
-from urllib.parse import urlparse
-from uuid import uuid4
-from pydantic_ai.settings import ModelSettings
-from structlog import get_logger
-from vds_agent_core.profiles import resolve_default_failover_profiles
-from vds_audit_orchestrator.engine.provider_failure_classifier import (
-    ProviderFailureClass,
-    ProviderFailureClassifier,
-    ProviderHealthMemory,
-    RowFailoverContext,
-    TimeoutKind,
-    truncate_failover_profiles_considered,
-    truncate_row_failover_context,
-)
-from vds_audit_orchestrator.errors import AuditError
-from vds_audit_orchestrator.llm.cost_tracker import global_tracker
-from vds_audit_orchestrator.llm.provider import (
-    LLMProtocolType,
-    LLMSettings,
-    build_structured_output_extra_body,
-    resolve_openai_protocol_api_key,
-    resolve_protocol_api_key,
-    resolve_protocol_base_url,
-    should_suppress_anthropic_tool_choice,
-    should_suppress_openai_tool_choice,
-)
-from vds_audit_orchestrator.logging_config import log_context
-from vds_audit_orchestrator.observability.metrics import record_llm_usage
-from vds_audit_orchestrator.runtime_profiles import inherit_runtime_llm_policy
-if TYPE_CHECKING:
-    from vds_audit_orchestrator.models.evidence import EvidenceBundle
-    from vds_audit_orchestrator.models.task import AuditTask
-logger = get_logger()
-ResultType = TypeVar("ResultType")
-OPENAI_CODEX_PROTOCOL = "openai-codex"
-OPENAI_CODEX_BASELINE_MODELS: frozenset[str] = frozenset({"gpt-5.4", "gpt-5.3-codex", "gpt-5.2-codex"})
-PYDANTIC_AI_MODEL_SUPPORTED_PROTOCOLS: frozenset[LLMProtocolType] = frozenset(LLMProtocolType)
-PYDANTIC_AI_MODEL_SUPPORTED_PROTOCOL_VALUES: frozenset[str] = frozenset(
-    {item.value for item in PYDANTIC_AI_MODEL_SUPPORTED_PROTOCOLS}.union({OPENAI_CODEX_PROTOCOL})
-)
-@dataclass
-class AgentResult:
-    """Standard output from an agent."""
-    agent_name: str
-    success: bool
-    findings: list[dict[str, Any]] = field(default_factory=list)
-    metadata: dict[str, Any] = field(default_factory=dict)
-    error: AuditError | str | None = None
-    spawned_tasks: list[dict[str, Any]] = field(default_factory=list)
-@dataclass(frozen=True, slots=True)
-class AgentTimeoutPolicy:
-    """Runtime timeout budget and extension policy for agent model calls."""
-    initial_timeout_seconds: float
-    max_timeout_seconds: float
-    extension_seconds: float
-    extension_attempts: int
-    heartbeat_seconds: float
-    idle_post_tool_seconds: float
-    lease_window_seconds: float = 0.0
-    stall_detection_seconds: float = 30.0
-    absolute_timeout_seconds: float = 0.0
-@dataclass(frozen=True, slots=True)
-class _AgentRuntimeSnapshot:
-    settings: LLMSettings
-    pydantic_model: Any | None
-    model_settings: ModelSettings | None
-    client: Any | None
-@dataclass(slots=True)
-class _PydanticStreamResultWrapper:
-    """Expose stream results with RunResult-compatible accessors."""
-    output: Any
-    streamed_result: Any
-    def usage(self) -> Any:
-        return self.streamed_result.usage()
-    def all_messages(self, *, output_tool_return_content: str | None = None) -> list[Any]:
-        return self.streamed_result.all_messages(output_tool_return_content=output_tool_return_content)
-    def all_messages_json(self, *, output_tool_return_content: str | None = None) -> bytes:
-        return self.streamed_result.all_messages_json(output_tool_return_content=output_tool_return_content)
-    def new_messages(self, *, output_tool_return_content: str | None = None) -> list[Any]:
-        return self.streamed_result.new_messages(output_tool_return_content=output_tool_return_content)
-    def __getattr__(self, name: str) -> Any:
-        return getattr(self.streamed_result, name)
-class BaseAgent(abc.ABC):
-    """Abstract base class for analysis agents."""
-    def __init__(self, settings: LLMSettings | None = None, model_override: Any | None = None):
-        self.settings = settings or LLMSettings()
-        self.logger = logger.bind(agent=self.__class__.__name__)
-        self._pydantic_model: Any | None = None
-        self._model_settings: ModelSettings | None = None
-        self._last_agent_run_telemetry: dict[str, Any] = {}
-        self.client = None
-        settings_model = getattr(self.settings, "pydantic_model", None)
-        use_pydantic_ai = bool(getattr(self.settings, "use_pydantic_ai", False))
-        if settings_model is not None:
-            use_pydantic_ai = True
-        if self.settings.enabled:
-            if use_pydantic_ai:
-                try:
-                    self._model_settings = self._build_model_settings()
-                    self._pydantic_model = (
-                        model_override or settings_model or self._build_pydantic_ai_model(self.settings.model_standard)
-                    )
-                except Exception as e:
-                    self.logger.warning("pydantic_ai_init_failed", error=str(e))
-            if self._pydantic_model is None:
-                try:
-                    self.client = self.settings.get_client()
-                except Exception as e:
-                    self.logger.warning("agent_llm_init_failed", error=str(e))
-                    self.client = None
-    @staticmethod
-    def _normalize_profile_names(raw_profiles: Any) -> list[str]:
-        if isinstance(raw_profiles, str):
-            return [item.strip() for item in raw_profiles.split(",") if item.strip()]
-        if isinstance(raw_profiles, (list, tuple)):
-            names: list[str] = []
-            for item in raw_profiles:
-                normalized = str(item or "").strip()
-                if normalized:
-                    names.append(normalized)
-            return names
-        return []
-    def _resolve_active_runtime_profile_name(self) -> str | None:
-        value = str(os.getenv("VDS_AUDIT_ACTIVE_PROFILE") or "").strip()
-        return value or None
-    def _resolve_row_failover_profiles(self) -> list[str]:
-        # AC-145.1.1 / AC-145.1.3: Auto-derive failover profiles when the operator
-        # has NOT explicitly configured row_failover_profiles (env var absent).
-        # If the env var IS present (even as "[]"), respect the explicit setting.
-        failover_env_raw = os.getenv("VDS_AUDIT_LLM__ROW_FAILOVER_PROFILES")
-        active_profile = self._resolve_active_runtime_profile_name()
-        if failover_env_raw is None:
-            # Not explicitly configured — auto-derive from all runtime profiles.
-            auto_derived = resolve_default_failover_profiles(active_profile)
-            configured = auto_derived
-        else:
-            configured = self._normalize_profile_names(getattr(self.settings, "row_failover_profiles", []))
-        allowed = self._normalize_profile_names(os.getenv("VDS_AUDIT_ROW_FAILOVER_AVAILABLE_PROFILES"))
-        deduped: list[str] = []
-        seen: set[str] = set()
-        for candidate in configured:
-            if candidate == active_profile or candidate in seen:
-                continue
-            if allowed and candidate not in allowed:
-                continue
-            seen.add(candidate)
-            deduped.append(candidate)
-        return deduped
-    def _provider_identity(self, settings: LLMSettings | None = None) -> str:
-        effective = settings or self.settings
-        active_profile = self._resolve_active_runtime_profile_name()
-        if active_profile:
-            return active_profile
-        protocol = str(getattr(effective, "protocol", "") or "").strip()
-        model = str(getattr(effective, "model_standard", "") or "").strip()
-        base_url = str(getattr(effective, "base_url", "") or "").strip()
-        return ":".join(part for part in (protocol, model, base_url) if part) or "unknown"
-    @contextlib.contextmanager
-    def _temporary_agent_settings(self, settings: LLMSettings):
-        snapshot = _AgentRuntimeSnapshot(
-            settings=self.settings,
-            pydantic_model=self._pydantic_model,
-            model_settings=self._model_settings,
-            client=self.client,
-        )
-        self.settings = settings
-        self._model_settings = None
-        self._pydantic_model = None
-        self.client = None
-        if self.settings.enabled:
-            settings_model = getattr(self.settings, "pydantic_model", None)
-            use_pydantic_ai = bool(getattr(self.settings, "use_pydantic_ai", False))
-            if settings_model is not None:
-                use_pydantic_ai = True
-            if use_pydantic_ai:
-                try:
-                    self._model_settings = self._build_model_settings()
-                    self._pydantic_model = settings_model or self._build_pydantic_ai_model(self.settings.model_standard)
-                except Exception as exc:
-                    self.logger.warning("pydantic_ai_failover_init_failed", error=str(exc))
-                    self._pydantic_model = None
-            if self._pydantic_model is None:
-                try:
-                    self.client = self.settings.get_client()
-                except Exception as exc:
-                    self.logger.warning("agent_failover_llm_init_failed", error=str(exc))
-                    self.client = None
-        try:
-            yield
-        finally:
-            self.settings = snapshot.settings
-            self._pydantic_model = snapshot.pydantic_model
-            self._model_settings = snapshot.model_settings
-            self.client = snapshot.client
-    @abc.abstractmethod
-    async def analyze(self, evidence: EvidenceBundle | dict[str, Any] | AuditTask) -> AgentResult:
-        """Perform analysis on the evidence bundle."""
-        pass
-    @staticmethod
-    def _extract_json_object_candidates(raw: str) -> list[str]:
-        """Extract top-level JSON object substrings from a potentially concatenated payload."""
-        text = str(raw or "")
-        if not text:
-            return []
-        candidates: list[str] = []
-        in_string = False
-        escape = False
-        depth = 0
-        start_idx: int | None = None
-        for idx, ch in enumerate(text):
-            if in_string:
-                if escape:
-                    escape = False
-                elif ch == "\\":
-                    escape = True
-                elif ch == '"':
-                    in_string = False
-                continue
-            if ch == '"':
-                in_string = True
-                continue
-            if ch == "{":
-                if depth == 0:
-                    start_idx = idx
-                depth += 1
-                continue
-            if ch == "}" and depth > 0:
-                depth -= 1
-                if depth == 0 and start_idx is not None:
-                    candidates.append(text[start_idx : idx + 1])
-                    start_idx = None
-        return candidates
-    def _recover_output_from_final_result_validation_error(
-        self,
-        *,
-        result_type: type[Any],
-        exc: Exception,
-        telemetry: dict[str, Any] | None,
-    ) -> Any | None:
-        """Recover structured output from invalid final_result payloads.
-        This path is intentionally broader than trailing-character JSON errors.
-        Some providers surface schema-shape validation failures even when the
-        raw final payload is recoverable and the target model can normalize it
-        via `model_validate(...)`.
-        """
-        raw_candidates: list[str] = []
-        if isinstance(telemetry, dict):
-            raw_invalid_input = telemetry.get("event_final_result_invalid_input")
-            if isinstance(raw_invalid_input, str) and raw_invalid_input.strip():
-                raw_candidates.append(raw_invalid_input)
-            elif isinstance(raw_invalid_input, (dict, list)):
-                with contextlib.suppress(Exception):
-                    raw_candidates.append(json.dumps(raw_invalid_input, ensure_ascii=False))
-        for attr_name in ("body", "response"):
-            raw_value = getattr(exc, attr_name, None)
-            if isinstance(raw_value, str) and raw_value.strip():
-                raw_candidates.append(raw_value)
-            elif isinstance(raw_value, (dict, list)):
-                with contextlib.suppress(Exception):
-                    raw_candidates.append(json.dumps(raw_value, ensure_ascii=False))
-        for arg in getattr(exc, "args", ()) or ():
-            if isinstance(arg, str) and ("{" in arg and "}" in arg):
-                raw_candidates.append(arg)
-            elif isinstance(arg, (dict, list)):
-                with contextlib.suppress(Exception):
-                    raw_candidates.append(json.dumps(arg, ensure_ascii=False))
-        cause = getattr(exc, "__cause__", None)
-        if cause is not None:
-            for attr_name in ("body", "response"):
-                raw_value = getattr(cause, attr_name, None)
-                if isinstance(raw_value, str) and raw_value.strip():
-                    raw_candidates.append(raw_value)
-                elif isinstance(raw_value, (dict, list)):
-                    with contextlib.suppress(Exception):
-                        raw_candidates.append(json.dumps(raw_value, ensure_ascii=False))
-            for arg in getattr(cause, "args", ()) or ():
-                if isinstance(arg, str) and ("{" in arg and "}" in arg):
-                    raw_candidates.append(arg)
-                elif isinstance(arg, (dict, list)):
-                    with contextlib.suppress(Exception):
-                        raw_candidates.append(json.dumps(arg, ensure_ascii=False))
-        seen: set[str] = set()
-        all_json_objects: list[str] = []
-        for raw in raw_candidates:
-            for candidate in self._extract_json_object_candidates(raw):
-                normalized = candidate.strip()
-                if not normalized or normalized in seen:
-                    continue
-                seen.add(normalized)
-                all_json_objects.append(normalized)
-        for candidate in reversed(all_json_objects):
-            try:
-                parsed = json.loads(candidate)
-            except Exception:
-                continue
-            validator = getattr(result_type, "model_validate", None)
-            if not callable(validator):
-                continue
-            try:
-                return validator(parsed)
-            except Exception:
-                continue
-        return None
-    def _resolve_agent_timeout_policy(self, complexity: str) -> AgentTimeoutPolicy:
-        """Resolve timeout policy from config/settings with complexity-aware scaling."""
-        def _to_float(raw: Any, default: float, minimum: float) -> float:
-            try:
-                value = float(raw)
-            except (TypeError, ValueError):
-                value = default
-            return max(value, minimum)
-        def _to_int(raw: Any, default: int, minimum: int) -> int:
-            try:
-                value = int(raw)
-            except (TypeError, ValueError):
-                value = default
-            return max(value, minimum)
-        factor = self._complexity_timeout_factor(complexity)
-        base_timeout = _to_float(getattr(self.settings, "agent_timeout_seconds", 600.0), 600.0, 30.0)
-        max_timeout = _to_float(getattr(self.settings, "agent_timeout_max_seconds", 1800.0), 1800.0, 60.0)
-        extension_seconds = _to_float(
-            getattr(self.settings, "agent_timeout_extension_seconds", 300.0),
-            300.0,
-            0.0,
-        )
-        extension_attempts = _to_int(
-            getattr(self.settings, "agent_timeout_extension_attempts", 3),
-            3,
-            0,
-        )
-        heartbeat_seconds = _to_float(
-            getattr(self.settings, "agent_timeout_heartbeat_seconds", 30.0),
-            30.0,
-            5.0,
-        )
-        idle_post_tool_seconds = _to_float(
-            getattr(self.settings, "agent_idle_timeout_seconds", 180.0),
-            180.0,
-            30.0,
-        )
-        lease_window_seconds = _to_float(
-            getattr(self.settings, "row_progress_lease_seconds", 0.0),
-            0.0,
-            0.0,
-        )
-        stall_detection_seconds = _to_float(
-            getattr(self.settings, "row_stall_detection_seconds", idle_post_tool_seconds),
-            idle_post_tool_seconds,
-            1.0,
-        )
-        absolute_timeout_seconds = _to_float(
-            float(getattr(self.settings, "row_absolute_timeout_ms", 600_000)) / 1000.0,
-            600.0,
-            1.0,
-        )
-        initial_timeout = max(30.0, base_timeout * factor)
-        max_timeout = max(max_timeout, initial_timeout)
-        if lease_window_seconds > 0:
-            initial_timeout = min(initial_timeout, lease_window_seconds)
-            max_timeout = min(max_timeout, absolute_timeout_seconds)
-            max_timeout = max(max_timeout, initial_timeout)
-            stall_detection_seconds = min(stall_detection_seconds, lease_window_seconds)
-        return AgentTimeoutPolicy(
-            initial_timeout_seconds=initial_timeout,
-            max_timeout_seconds=max_timeout,
-            extension_seconds=extension_seconds,
-            extension_attempts=extension_attempts,
-            heartbeat_seconds=heartbeat_seconds,
-            idle_post_tool_seconds=idle_post_tool_seconds,
-            lease_window_seconds=lease_window_seconds,
-            stall_detection_seconds=stall_detection_seconds,
-            absolute_timeout_seconds=absolute_timeout_seconds,
-        )
-    def _resolve_pydantic_usage_limits(
-        self,
-        *,
-        max_turns: int | None,
-        max_tool_calls: int | None,
-    ) -> tuple[int | None, int | None]:
-        """Resolve request/tool usage limits for PydanticAI runs.
-        Request limit must not be lower than the tool-call budget, otherwise
-        heavily agentic runs may hit `request_limit` before consuming
-        `tool_calls_limit`.
-        """
-        normalized_tool_calls = max_tool_calls
-        if normalized_tool_calls is not None:
-            normalized_tool_calls = int(normalized_tool_calls)
-            # Preserve explicit zero-tool mode (`max_tool_calls=0`) so
-            # synthesis retries cannot accidentally invoke tools again.
-            if normalized_tool_calls < 0:
-                normalized_tool_calls = None
-        # Zero-tool mode is used by docs/security prompt-fed execution paths to
-        # explicitly disallow tools. In this mode, applying a tiny request_limit
-        # floor (2) can preempt normal output validation retries and causes
-        # avoidable "request_limit exceeded" failures.
-        if max_turns is None and normalized_tool_calls is None:
-            return None, None
-        request_limit = max_turns
-        if normalized_tool_calls is not None and normalized_tool_calls > 0:
-            tool_driven_request_floor = normalized_tool_calls + 2
-            request_limit = max(request_limit or 0, tool_driven_request_floor)
-        return request_limit, normalized_tool_calls
-    def _is_trace_payload_enabled(self) -> bool:
-        """Return True when payload-level diagnostics should be emitted."""
-        if bool(getattr(self.settings, "agent_trace_payload_enabled", False)):
-            return True
-        try:
-            from vds_audit_orchestrator.logging_config import is_trace_mode_enabled
-            return bool(is_trace_mode_enabled())
-        except Exception:
-            return False
-    def _trace_payload_max_chars(self) -> int:
-        raw = getattr(self.settings, "agent_trace_payload_max_chars", 20_000)
-        try:
-            value = int(raw)
-        except (TypeError, ValueError):
-            value = 20_000
-        return max(1, min(value, 200_000))
-    @staticmethod
-    def _redact_trace_text(value: str) -> str:
-        redacted = re.sub(r"(?i)(api[_-]?key|token|password|secret)\s*[:=]\s*([^\s,;]+)", r"\1=[REDACTED]", value)
-        redacted = re.sub(r"(?i)(authorization)\s*[:=]\s*(bearer\s+)?[^\s,;]+", r"\1=[REDACTED]", redacted)
-        return redacted
-    def _sanitize_trace_payload(self, payload: Any, *, depth: int = 0) -> Any:
-        """Return bounded trace payload data safe for structured logs."""
-        max_chars = self._trace_payload_max_chars()
-        if depth > 10:
-            return "...(trace-depth-limit)"
-        if isinstance(payload, str):
-            text = self._redact_trace_text(payload)
-            return text[:max_chars] + "...(truncated)" if len(text) > max_chars else text
-        if isinstance(payload, (int, float, bool)) or payload is None:
-            return payload
-        if isinstance(payload, list):
-            trimmed = payload[:100]
-            result = [self._sanitize_trace_payload(item, depth=depth + 1) for item in trimmed]
-            if len(payload) > len(trimmed):
-                result.append(f"...(truncated {len(payload) - len(trimmed)} items)")
-            return result
-        if isinstance(payload, tuple):
-            return self._sanitize_trace_payload(list(payload), depth=depth + 1)
-        if isinstance(payload, dict):
-            result: dict[str, Any] = {}
-            items = list(payload.items())
-            for key, value in items[:100]:
-                key_text = str(key)
-                if re.search(r"(?i)(password|secret|token|api[_-]?key|authorization)", key_text):
-                    result[key_text] = "[REDACTED]"
-                else:
-                    result[key_text] = self._sanitize_trace_payload(value, depth=depth + 1)
-            if len(items) > 100:
-                result["__truncated_items__"] = len(items) - 100
-            return result
-        return self._sanitize_trace_payload(str(payload), depth=depth + 1)
-    @staticmethod
-    def _serialize_skill_policy_drop(item: Any) -> dict[str, Any] | None:
-        skill_name = str(getattr(item, "skill_name", "") or "").strip()
-        reason = str(getattr(item, "reason", "") or "").strip()
-        if not skill_name and not reason:
-            return None
-        payload: dict[str, Any] = {
-            "skill_name": skill_name or "unknown",
-            "reason": reason or "unknown",
-        }
-        for field_name in ("metadata_name", "frontmatter_name", "runtime_name", "detail"):
-            raw_value = getattr(item, field_name, None)
-            if isinstance(raw_value, str):
-                value = raw_value.strip()
-                if value:
-                    payload[field_name] = value
-            elif raw_value is not None:
-                payload[field_name] = str(raw_value)
-        return payload
-    def _collect_skill_policy_diagnostics(self, toolsets: list[Any] | None) -> dict[str, Any] | None:
-        """Collect operator-facing skill-policy diagnostics from attached toolsets."""
-        if not toolsets:
-            return None
-        try:
-            from vds_audit_orchestrator.agents.toolsets.skills_toolset import get_toolset_policy_diagnostics
-        except Exception:
-            return None
-        metadata_count = 0
-        policy_eligible_count = 0
-        runtime_loaded_count = 0
-        blocked_tools: list[str] = []
-        drop_reasons: list[dict[str, Any]] = []
-        toolset_details: list[dict[str, Any]] = []
-        for index, toolset in enumerate(toolsets):
-            diagnostics = get_toolset_policy_diagnostics(toolset)
-            if diagnostics is None:
-                continue
-            toolset_metadata_count = int(getattr(diagnostics, "metadata_count", 0) or 0)
-            toolset_policy_eligible_count = int(getattr(diagnostics, "policy_eligible_count", 0) or 0)
-            toolset_runtime_loaded_count = int(getattr(diagnostics, "runtime_loaded_count", 0) or 0)
-            toolset_blocked_tools = [
-                str(name).strip() for name in (getattr(diagnostics, "blocked_tools", []) or []) if str(name).strip()
-            ]
-            toolset_drop_reasons: list[dict[str, Any]] = []
-            for drop in getattr(diagnostics, "dropped", []) or []:
-                serialized = self._serialize_skill_policy_drop(drop)
-                if serialized is None:
-                    continue
-                toolset_drop_reasons.append(serialized)
-                drop_reasons.append(dict(serialized))
-            metadata_count += toolset_metadata_count
-            policy_eligible_count += toolset_policy_eligible_count
-            runtime_loaded_count += toolset_runtime_loaded_count
-            for blocked in toolset_blocked_tools:
-                if blocked not in blocked_tools:
-                    blocked_tools.append(blocked)
-            toolset_details.append(
-                {
-                    "toolset_index": index,
-                    "metadata_count": toolset_metadata_count,
-                    "policy_eligible_count": toolset_policy_eligible_count,
-                    "runtime_loaded_count": toolset_runtime_loaded_count,
-                    "blocked_tools": toolset_blocked_tools,
-                    "drop_reasons": toolset_drop_reasons,
-                }
-            )
-        if not toolset_details:
-            return None
-        return {
-            "toolset_count": len(toolset_details),
-            "metadata_count": metadata_count,
-            "policy_eligible_count": policy_eligible_count,
-            "runtime_loaded_count": runtime_loaded_count,
-            "blocked_tools": blocked_tools,
-            "drop_reasons": drop_reasons,
-            "toolsets": toolset_details,
-        }
-    @staticmethod
-    def _create_skill_policy_enforcer(
-        *,
-        skills_needed: bool | None = None,
-        mode_override: str | None = None,
-        interpretation: dict[str, Any] | None = None,
-    ) -> Any:
-        """Create a SkillPolicyEnforcer from row-level inputs (FR-194..FR-198).
-        Returns the enforcer instance or None if the module is unavailable.
-        """
-        try:
-            from vds_agent_core.skills.policy import (
-                SkillPolicyEnforcer,
-                resolve_skill_policy_mode,
-            )
-        except Exception:
-            return None
-        mode = resolve_skill_policy_mode(
-            skills_needed=skills_needed,
-            mode_override=mode_override,
-            interpretation=interpretation,
-        )
-        effective_skills_needed = skills_needed if skills_needed is not None else True
-        if isinstance(interpretation, dict) and interpretation.get("skills_needed") is False:
-            effective_skills_needed = False
-        return SkillPolicyEnforcer(mode=mode, skills_needed=effective_skills_needed)
-    def _log_trace_payload(
-        self,
-        *,
-        event_name: str,
-        elapsed_sec: float,
-        payload: dict[str, Any],
-    ) -> None:
-        """Log payload-level diagnostics when trace mode is enabled."""
-        if not self._is_trace_payload_enabled():
-            return
-        self.logger.info(
-            event_name,
-            elapsed_sec=round(elapsed_sec, 2),
-            payload=self._sanitize_trace_payload(payload),
-        )
-    def _exception_diagnostics(self, exc: Exception) -> dict[str, Any]:
-        """Extract compact diagnostics for model/runtime exceptions."""
-        details: dict[str, Any] = {
-            "error_type": type(exc).__name__,
-            "error": str(exc),
-        }
-        body = getattr(exc, "body", None)
-        if isinstance(body, str) and body.strip():
-            details["error_body_preview"] = body[:4000]
-        cause = getattr(exc, "__cause__", None)
-        if cause is not None:
-            details["cause_type"] = type(cause).__name__
-            details["cause_message"] = str(cause)[:2000]
-            cause_body = getattr(cause, "body", None)
-            if isinstance(cause_body, str) and cause_body.strip():
-                details["cause_body_preview"] = cause_body[:4000]
-        return details
-    @staticmethod
-    def _complexity_timeout_factor(complexity: str) -> float:
-        """Map request complexity to timeout multiplier."""
-        normalized = complexity.strip().lower()
-        if normalized in {"simple", "low"}:
-            return 0.75
-        if normalized in {"complex", "high"}:
-            return 1.5
-        if normalized in {"critical"}:
-            return 2.0
-        return 1.0
-    @staticmethod
-    def _timeout_progress_snapshot(run_telemetry: dict[str, Any] | None) -> dict[str, float]:
-        """Extract a compact timeout-progress snapshot from runtime telemetry."""
-        snapshot: dict[str, float] = {
-            "event_last_seen_at": 0.0,
-            "event_tool_calls_started": 0.0,
-            "event_tool_calls_completed": 0.0,
-            "event_tool_effective_calls": 0.0,
-            "event_skill_execution_effective_tool_calls": 0.0,
-            "event_turn_index": 0.0,
-            "evidence_refs_count": 0.0,
-            "steps_executed": 0.0,
-            "steps_unique_tools": 0.0,
-        }
-        if not isinstance(run_telemetry, dict):
-            return snapshot
-        for key in ("event_last_seen_at",):
-            raw = run_telemetry.get(key)
-            if isinstance(raw, int | float):
-                snapshot[key] = float(raw)
-        for key in (
-            "event_tool_calls_started",
-            "event_tool_calls_completed",
-            "event_tool_effective_calls",
-            "event_skill_execution_effective_tool_calls",
-            "event_turn_index",
-            "evidence_refs_count",
-            "steps_executed",
-            "steps_unique_tools",
-        ):
-            raw = run_telemetry.get(key, 0)
-            if isinstance(raw, int | float):
-                snapshot[key] = float(max(0, int(raw)))
-        return snapshot
-    @staticmethod
-    def _timeout_progress_signal_type(
-        previous_snapshot: dict[str, float] | None,
-        current_snapshot: dict[str, float] | None,
-    ) -> str | None:
-        """Return the dominant progress signal since the previous snapshot."""
-        if not isinstance(previous_snapshot, dict) or not isinstance(current_snapshot, dict):
-            return None
-        if current_snapshot.get("event_last_seen_at", 0.0) > previous_snapshot.get("event_last_seen_at", 0.0):
-            return "event_stream"
-        for key, signal_type in (
-            ("event_tool_calls_started", "tool_call_started"),
-            ("event_tool_calls_completed", "tool_call_completed"),
-            ("event_tool_effective_calls", "tool_effective"),
-            ("event_skill_execution_effective_tool_calls", "skill_effective"),
-            ("event_turn_index", "partial_output"),
-            ("evidence_refs_count", "evidence_growth"),
-            ("steps_executed", "route_transition"),
-            ("steps_unique_tools", "route_transition"),
-        ):
-            if current_snapshot.get(key, 0.0) > previous_snapshot.get(key, 0.0):
-                return signal_type
-        return None
-    @staticmethod
-    def _timeout_progress_observed_since(
-        previous_snapshot: dict[str, float] | None,
-        current_snapshot: dict[str, float] | None,
-    ) -> bool:
-        """Return True when runtime telemetry shows forward progress since snapshot."""
-        if not isinstance(previous_snapshot, dict) or not isinstance(current_snapshot, dict):
-            return False
-        if current_snapshot.get("event_last_seen_at", 0.0) > previous_snapshot.get("event_last_seen_at", 0.0):
-            return True
-        for key in (
-            "event_tool_calls_started",
-            "event_tool_calls_completed",
-            "event_tool_effective_calls",
-            "event_skill_execution_effective_tool_calls",
-            "event_turn_index",
-        ):
-            if current_snapshot.get(key, 0.0) > previous_snapshot.get(key, 0.0):
-                return True
-        return False
-    async def _run_with_timeout_policy(
-        self,
-        *,
-        operation: Coroutine[Any, Any, ResultType],
-        timeout_policy: AgentTimeoutPolicy,
-        result_type_name: str,
-        run_telemetry: dict[str, Any] | None = None,
-    ) -> ResultType:
-        """Run awaitable with heartbeat + bounded timeout extensions."""
-        import time
-        start_time = time.monotonic()
-        timeout_budget = timeout_policy.initial_timeout_seconds
-        lease_mode_enabled = timeout_policy.lease_window_seconds > 0
-        extensions_used = 0
-        last_extension_progress_snapshot = self._timeout_progress_snapshot(run_telemetry)
-        previous_heartbeat_snapshot = dict(last_extension_progress_snapshot)
-        last_progress_at = start_time
-        last_progress_signal_type: str | None = None
-        task = asyncio.create_task(operation)
-        try:
-            while True:
-                elapsed = time.monotonic() - start_time
-                remaining = timeout_budget - elapsed
-                if remaining <= 0:
-                    current_progress_snapshot = self._timeout_progress_snapshot(run_telemetry)
-                    progress_since_last_extension = self._timeout_progress_observed_since(
-                        last_extension_progress_snapshot,
-                        current_progress_snapshot,
-                    )
-                    extension_cap_reached = extensions_used >= timeout_policy.extension_attempts
-                    if lease_mode_enabled:
-                        can_extend = (
-                            progress_since_last_extension and timeout_budget < timeout_policy.max_timeout_seconds
-                        )
-                    else:
-                        can_extend = (
-                            timeout_policy.extension_seconds > 0
-                            and (not extension_cap_reached or progress_since_last_extension)
-                            and timeout_budget < timeout_policy.max_timeout_seconds
-                        )
-                    if can_extend:
-                        increment = min(
-                            timeout_policy.lease_window_seconds
-                            if lease_mode_enabled
-                            else timeout_policy.extension_seconds,
-                            timeout_policy.max_timeout_seconds - timeout_budget,
-                        )
-                        timeout_budget += increment
-                        extensions_used += 1
-                        extension_reason = (
-                            "lease_progress_renewal"
-                            if lease_mode_enabled
-                            else (
-                                "progress_override_extension_cap"
-                                if extension_cap_reached and progress_since_last_extension
-                                else "within_extension_cap"
-                            )
-                        )
-                        self.logger.warning(
-                            "pydantic_agent_timeout_extended",
-                            result_type=result_type_name,
-                            timeout_budget_seconds=round(timeout_budget, 2),
-                            extension_seconds=round(increment, 2),
-                            extensions_used=extensions_used,
-                            max_extensions=timeout_policy.extension_attempts,
-                            extension_cap_reached=extension_cap_reached,
-                            progress_since_last_extension=progress_since_last_extension,
-                            extension_reason=extension_reason,
-                            lease_mode_enabled=lease_mode_enabled,
-                        )
-                        last_extension_progress_snapshot = current_progress_snapshot
-                        last_progress_at = time.monotonic()
-                        last_progress_signal_type = (
-                            self._timeout_progress_signal_type(
-                                previous_heartbeat_snapshot,
-                                current_progress_snapshot,
-                            )
-                            or "lease_renewal"
-                        )
-                        continue
-                    task.cancel()
-                    with contextlib.suppress(asyncio.CancelledError):
-                        await task
-                    self.logger.error(
-                        "pydantic_agent_call_timeout",
-                        result_type=result_type_name,
-                        timeout_budget_seconds=round(timeout_budget, 2),
-                        max_timeout_seconds=round(timeout_policy.max_timeout_seconds, 2),
-                        extensions_used=extensions_used,
-                        lease_mode_enabled=lease_mode_enabled,
-                        last_progress_signal_type=last_progress_signal_type,
-                    )
-                    raise RuntimeError(
-                        "PydanticAI agent timed out after "
-                        f"{timeout_budget:.0f}s (max budget {timeout_policy.max_timeout_seconds:.0f}s)"
-                    )
-                wait_timeout = min(remaining, timeout_policy.heartbeat_seconds)
-                done, _ = await asyncio.wait({task}, timeout=wait_timeout)
-                if task in done:
-                    return await task
-                if isinstance(run_telemetry, dict):
-                    current_progress_snapshot = self._timeout_progress_snapshot(run_telemetry)
-                    signal_type = self._timeout_progress_signal_type(
-                        previous_heartbeat_snapshot, current_progress_snapshot
-                    )
-                    if signal_type is not None:
-                        last_progress_at = time.monotonic()
-                        last_progress_signal_type = signal_type
-                    previous_heartbeat_snapshot = current_progress_snapshot
-                    if lease_mode_enabled:
-                        lease_stall_elapsed = time.monotonic() - last_progress_at
-                        if lease_stall_elapsed >= timeout_policy.stall_detection_seconds:
-                            task.cancel()
-                            with contextlib.suppress(asyncio.CancelledError):
-                                await task
-                            self.logger.error(
-                                "pydantic_agent_progress_lease_expired",
-                                result_type=result_type_name,
-                                timeout_budget_seconds=round(timeout_budget, 2),
-                                lease_window_seconds=round(timeout_policy.lease_window_seconds, 2),
-                                stall_detection_seconds=round(timeout_policy.stall_detection_seconds, 2),
-                                stall_elapsed_seconds=round(lease_stall_elapsed, 2),
-                                last_progress_signal_type=last_progress_signal_type,
-                            )
-                            raise RuntimeError(
-                                "PydanticAI agent progress lease expired after "
-                                f"{lease_stall_elapsed:.0f}s without progress"
-                            )
-                    tool_calls_started = int(run_telemetry.get("event_tool_calls_started", 0) or 0)
-                    tool_calls_completed = int(
-                        run_telemetry.get("event_tool_calls_completed", run_telemetry.get("event_tool_calls", 0)) or 0
-                    )
-                    tool_activity_count = max(tool_calls_started, tool_calls_completed)
-                    last_tool_activity_at = run_telemetry.get("event_last_tool_activity_at")
-                    last_seen_at = run_telemetry.get("event_last_seen_at")
-                    latest_activity_at: float | None = None
-                    activity_source = "tool"
-                    if isinstance(last_tool_activity_at, (int, float)):
-                        latest_activity_at = float(last_tool_activity_at)
-                    if isinstance(last_seen_at, (int, float)) and (
-                        latest_activity_at is None or float(last_seen_at) > latest_activity_at
-                    ):
-                        latest_activity_at = float(last_seen_at)
-                        activity_source = "event"
-                    if tool_activity_count > 0 and isinstance(latest_activity_at, float):
-                        stall_elapsed = time.monotonic() - latest_activity_at
-                        if stall_elapsed >= timeout_policy.idle_post_tool_seconds:
-                            task.cancel()
-                            with contextlib.suppress(asyncio.CancelledError):
-                                await task
-                            self.logger.error(
-                                "pydantic_agent_call_stalled",
-                                result_type=result_type_name,
-                                timeout_budget_seconds=round(timeout_budget, 2),
-                                idle_post_tool_seconds=round(timeout_policy.idle_post_tool_seconds, 2),
-                                stall_elapsed_seconds=round(stall_elapsed, 2),
-                                tool_calls_started=tool_calls_started,
-                                tool_calls_completed=tool_calls_completed,
-                                observed_tool_names=run_telemetry.get("event_tool_names"),
-                                activity_source=activity_source,
-                            )
-                            raise RuntimeError(
-                                "PydanticAI agent stalled after tool activity "
-                                f"for {stall_elapsed:.0f}s (idle limit {timeout_policy.idle_post_tool_seconds:.0f}s)"
-                            )
-                    tool_count_by_name = run_telemetry.get("event_tool_count_by_name")
-                    if isinstance(tool_count_by_name, dict) and tool_calls_completed >= 20:
-                        list_directory_calls = int(tool_count_by_name.get("list_directory", 0) or 0)
-                        unique_tools = len(tool_count_by_name)
-                        if list_directory_calls >= 14 and unique_tools <= 3:
-                            repetition_ratio = list_directory_calls / max(1, tool_calls_completed)
-                            if repetition_ratio >= 0.7:
-                                task.cancel()
-                                with contextlib.suppress(asyncio.CancelledError):
-                                    await task
-                                self.logger.error(
-                                    "pydantic_agent_list_directory_churn",
-                                    result_type=result_type_name,
-                                    timeout_budget_seconds=round(timeout_budget, 2),
-                                    tool_calls_completed=tool_calls_completed,
-                                    list_directory_calls=list_directory_calls,
-                                    unique_tools=unique_tools,
-                                    repetition_ratio=round(repetition_ratio, 3),
-                                    observed_tool_names=run_telemetry.get("event_tool_names"),
-                                )
-                                raise RuntimeError(
-                                    "PydanticAI agent detected list_directory churn "
-                                    f"({list_directory_calls}/{tool_calls_completed} calls)"
-                                )
-                self.logger.info(
-                    "pydantic_agent_call_heartbeat",
-                    result_type=result_type_name,
-                    elapsed_seconds=round(time.monotonic() - start_time, 2),
-                    remaining_seconds=round(max(0.0, timeout_budget - (time.monotonic() - start_time)), 2),
-                    timeout_budget_seconds=round(timeout_budget, 2),
-                    extensions_used=extensions_used,
-                    lease_mode_enabled=lease_mode_enabled,
-                    lease_window_seconds=round(timeout_policy.lease_window_seconds, 2) if lease_mode_enabled else 0.0,
-                    last_progress_signal_type=last_progress_signal_type,
-                )
-        except asyncio.CancelledError:
-            task.cancel()
-            with contextlib.suppress(asyncio.CancelledError):
-                await task
-            raise
-    def _merge_failover_telemetry(
-        self,
-        primary: dict[str, Any] | None,
-        secondary: dict[str, Any] | None,
-    ) -> dict[str, Any]:
-        merged = dict(primary or {})
-        fallback = dict(secondary or {})
-        if not merged:
-            return fallback
-        for key in (
-            "provider_failover_attempted",
-            "provider_failover_reason",
-            "provider_failover_final_provider",
-            "provider_failover_original_provider",
-            "provider_failover_profiles_considered",
-            "provider_retry_class",
-            "provider_retry_reason",
-            "retry_after_honored",
-            "retry_after_seconds",
-            "timeout_kind",
-        ):
-            if key in fallback:
-                merged[key] = fallback[key]
-        primary_chain = list(merged.get("provider_failover_chain") or [])
-        for item in list(fallback.get("provider_failover_chain") or []):
-            normalized = str(item or "").strip()
-            if normalized and normalized not in primary_chain:
-                primary_chain.append(normalized)
-        if primary_chain:
-            merged["provider_failover_chain"] = primary_chain
-        merged["provider_transient_retry_count"] = max(
-            int(merged.get("provider_transient_retry_count", 0) or 0),
-            int(fallback.get("provider_transient_retry_count", 0) or 0),
-        )
-        merged["provider_failover_hops"] = max(
-            int(merged.get("provider_failover_hops", 0) or 0),
-            int(fallback.get("provider_failover_hops", 0) or 0),
-        )
-        if "provider_failover_profiles_considered" in merged:
-            max_provider_hops = max(0, int(getattr(self.settings, "row_failover_max_provider_hops", 2) or 0))
-            merged["provider_failover_profiles_considered"] = truncate_failover_profiles_considered(
-                merged.get("provider_failover_profiles_considered"),
-                max_provider_hops=max_provider_hops,
-            )
-        return merged
-    async def _attempt_row_provider_failover(
-        self,
-        *,
-        exc: Exception,
-        system_prompt: str,
-        user_prompt: str,
-        result_type: type[ResultType],
-        complexity: str,
-        max_tokens: int,
-        prompt_type: str,
-        deps: Any | None,
-        toolsets: list[Any] | None,
-        max_turns: int | None,
-        max_tool_calls: int | None,
-        pre_retry_telemetry: dict[str, Any] | None,
-        current_run_telemetry: dict[str, Any] | None,
-        row_failover_context: RowFailoverContext | None,
-        provider_health_memory: ProviderHealthMemory,
-    ) -> ResultType | None:
-        classification = ProviderFailureClassifier.classify(exc)
-        if not classification.is_failoverable:
-            return None
-        configured_profiles = self._resolve_row_failover_profiles()
-        if not configured_profiles:
-            return None
-        max_hops = max(0, int(getattr(self.settings, "row_failover_max_provider_hops", 2) or 0))
-        if max_hops <= 0:
-            return None
-        current_provider = self._provider_identity()
-        failover_context = row_failover_context or RowFailoverContext(
-            row_id=str(getattr(deps, "project_id", "") or ""),
-            check_id=prompt_type,
-            original_provider=current_provider,
-            failover_chain=[current_provider],
-        )
-        failover_context = truncate_row_failover_context(
-            failover_context,
-            max_provider_hops=max_hops,
-        )
-        considered_profiles = truncate_failover_profiles_considered(
-            configured_profiles,
-            max_provider_hops=max_hops,
-        )
-        if failover_context.failover_count >= max_hops:
-            if current_run_telemetry is not None:
-                current_run_telemetry["provider_failover_attempted"] = True
-                current_run_telemetry["provider_failover_chain"] = list(failover_context.failover_chain or [])
-                current_run_telemetry["provider_failover_final_provider"] = current_provider
-                current_run_telemetry["provider_failover_original_provider"] = (
-                    failover_context.original_provider or current_provider
-                )
-                current_run_telemetry["provider_failover_profiles_considered"] = list(considered_profiles)
-                current_run_telemetry["provider_failover_reason"] = classification.classification_reason
-                current_run_telemetry["provider_failover_hops"] = int(failover_context.failover_count)
-                current_run_telemetry["timeout_kind"] = TimeoutKind.TIMEOUT_FAILOVER_EXHAUSTED.value
-            return None
-        next_profile: str | None = None
-        for candidate in configured_profiles:
-            if candidate == current_provider or candidate in failover_context.failover_chain:
-                continue
-            if provider_health_memory.should_skip_provider(candidate):
-                continue
-            next_profile = candidate
-            break
-        if next_profile is None:
-            if current_run_telemetry is not None:
-                current_run_telemetry["provider_failover_attempted"] = True
-                current_run_telemetry["provider_failover_chain"] = list(failover_context.failover_chain or [])
-                current_run_telemetry["provider_failover_final_provider"] = current_provider
-                current_run_telemetry["provider_failover_original_provider"] = (
-                    failover_context.original_provider or current_provider
-                )
-                current_run_telemetry["provider_failover_profiles_considered"] = list(considered_profiles)
-                current_run_telemetry["provider_failover_reason"] = classification.classification_reason
-                current_run_telemetry["provider_failover_hops"] = int(failover_context.failover_count)
-                current_run_telemetry["timeout_kind"] = TimeoutKind.TIMEOUT_FAILOVER_EXHAUSTED.value
-            return None
-        provider_health_memory.total_hops += 1
-        next_context = RowFailoverContext(
-            row_id=failover_context.row_id,
-            check_id=failover_context.check_id,
-            evidence_refs=list(failover_context.evidence_refs or []),
-            route_state=dict(failover_context.route_state or {}),
-            partial_progress=dict(failover_context.partial_progress or {}),
-            original_provider=failover_context.original_provider or current_provider,
-            failover_count=int(failover_context.failover_count) + 1,
-            failover_chain=[*list(failover_context.failover_chain or []), next_profile],
-            timeout_telemetry_snapshot=dict(failover_context.timeout_telemetry_snapshot or {}),
-        )
-        next_context = truncate_row_failover_context(next_context, max_provider_hops=max_hops)
-        if current_run_telemetry is not None:
-            current_run_telemetry["provider_failover_attempted"] = True
-            current_run_telemetry["provider_failover_chain"] = list(next_context.failover_chain)
-            current_run_telemetry["provider_failover_final_provider"] = next_profile
-            current_run_telemetry["provider_failover_original_provider"] = (
-                next_context.original_provider or current_provider
-            )
-            current_run_telemetry["provider_failover_profiles_considered"] = list(considered_profiles)
-            current_run_telemetry["provider_failover_reason"] = classification.classification_reason
-            current_run_telemetry["provider_failover_hops"] = int(next_context.failover_count)
-        self.logger.warning(
-            "pydantic_agent_provider_failover_started",
-            result_type=result_type.__name__,
-            current_provider=current_provider,
-            next_provider=next_profile,
-            failure_class=classification.failure_class.value,
-            reason=classification.classification_reason,
-            failover_count=next_context.failover_count,
-            max_provider_hops=max_hops,
-        )
-        fallback_settings = inherit_runtime_llm_policy(next_profile, source_llm=self.settings)
-        with self._temporary_agent_settings(fallback_settings):
-            return await self._run_pydantic_agent(
-                system_prompt=system_prompt,
-                user_prompt=user_prompt,
-                result_type=result_type,
-                complexity=complexity,
-                max_tokens=max_tokens,
-                prompt_type=prompt_type,
-                deps=deps,
-                toolsets=toolsets,
-                max_turns=max_turns,
-                max_tool_calls=max_tool_calls,
-                pre_retry_telemetry=self._merge_failover_telemetry(pre_retry_telemetry, current_run_telemetry),
-                row_failover_context=next_context,
-                provider_health_memory=provider_health_memory,
-            )
-    @property
-    def llm_available(self) -> bool:
-        """Check if an LLM client/model is available."""
-        return self._pydantic_model is not None or self.client is not None
-    async def _call_llm(self, system: str, user: str) -> str:
-        """Helper to call LLM."""
-        if self._pydantic_model is not None:
-            result = await self._run_pydantic_agent(
-                system_prompt=system,
-                user_prompt=user,
-                result_type=str,
-                complexity="standard",
-                max_tokens=4096,
-                prompt_type="agent_call",
-            )
-            return result
-        if not self.client:
-            raise RuntimeError("LLM client not available")
-        stream_enabled = bool(getattr(self.settings, "stream", False))
-        def _create_message():
-            request_payload: dict[str, Any] = {
-                "model": self.settings.model_standard,
-                "max_tokens": 4096,
-                "temperature": 0.0,
-                "system": system,
-                "messages": [{"role": "user", "content": user}],
-            }
-            if self.settings.protocol == LLMProtocolType.ANTHROPIC and stream_enabled:
-                request_payload["stream"] = True
-            return self.client.messages.create(**request_payload)  # type: ignore[union-attr]
-        message = await asyncio.to_thread(_create_message)
-        if self.settings.protocol == LLMProtocolType.ANTHROPIC:
-            try:
-                response_text, used_sse_adapter = await asyncio.to_thread(
-                    self._extract_anthropic_response_text_with_metadata,
-                    message,
-                )
-            except Exception as exc:
-                if not stream_enabled and self._is_anthropic_sse_parse_error(exc):
-                    self.logger.warning(
-                        "anthropic_non_stream_sse_parse_failed_no_fallback",
-                        protocol=self.settings.protocol.value,
-                        configured_stream=stream_enabled,
-                        fallback_allowed=False,
-                        adapter_path="anthropic_sse_stream_adapter",
-                        error_type=type(exc).__name__,
-                        error=str(exc),
-                    )
-                raise
-            if not stream_enabled and used_sse_adapter:
-                self.logger.info(
-                    "anthropic_non_stream_sse_adapter_used",
-                    protocol=self.settings.protocol.value,
-                    configured_stream=stream_enabled,
-                    adapter_path="anthropic_sse_stream_adapter",
-                )
-            return response_text
-        content_parts = []
-        for block in message.content:
-            text = getattr(block, "text", None)
-            if text:
-                content_parts.append(text)
-        return "\n".join(content_parts)
-    @staticmethod
-    def _is_anthropic_sse_parse_error(exc: Exception) -> bool:
-        message = str(exc).lower()
-        return "sse" in message or "event-stream" in message
-    def _normalize_anthropic_text_payload_with_diagnostics(self, raw: str, *, source: str) -> str:
-        payload = str(raw or "")
-        configured_stream = bool(getattr(self.settings, "stream", False))
-        adapter_used = (not configured_stream) and self._looks_like_sse_payload(payload)
-        if adapter_used:
-            self.logger.info(
-                "anthropic_non_stream_sse_adapter_used",
-                protocol=self.settings.protocol.value,
-                configured_stream=configured_stream,
-                adapter_path="anthropic_sse_stream_adapter",
-                source=source,
-            )
-        try:
-            return self._normalize_anthropic_text_payload(payload)
-        except Exception as exc:
-            if (not configured_stream) and self._is_anthropic_sse_parse_error(exc):
-                self.logger.warning(
-                    "anthropic_non_stream_sse_parse_failed_no_fallback",
-                    protocol=self.settings.protocol.value,
-                    configured_stream=configured_stream,
-                    fallback_allowed=False,
-                    adapter_path="anthropic_sse_stream_adapter",
-                    source=source,
-                    error_type=type(exc).__name__,
-                    error=str(exc),
-                )
-            raise
-    @staticmethod
-    def _looks_like_sse_payload(raw: str) -> bool:
-        lines = [line.strip() for line in str(raw or "").splitlines() if line.strip()]
-        if not lines:
-            return False
-        markers = ("event:", "data:", "id:", "retry:", ":")
-        return all(line.startswith(markers) for line in lines[: min(3, len(lines))])
-    @classmethod
-    def _parse_sse_data_payloads(cls, raw: str) -> list[str]:
-        payloads: list[str] = []
-        data_lines: list[str] = []
-        saw_sse_marker = False
-        def _flush() -> None:
-            if not data_lines:
-                return
-            data_payload = "\n".join(data_lines).strip()
-            data_lines.clear()
-            if not data_payload or data_payload == "[DONE]":
-                return
-            payloads.append(data_payload)
-        for line in str(raw or "").splitlines():
-            stripped = line.strip()
-            if not stripped:
-                _flush()
-                continue
-            if stripped == "[DONE]":
-                saw_sse_marker = True
-                _flush()
-                continue
-            if stripped.startswith(":"):
-                saw_sse_marker = True
-                continue
-            if stripped.startswith("data:"):
-                saw_sse_marker = True
-                data_lines.append(stripped[5:].lstrip())
-                continue
-            if stripped.startswith("event:") or stripped.startswith("id:") or stripped.startswith("retry:"):
-                saw_sse_marker = True
-                continue
-            raise RuntimeError(f"Unsupported SSE line format: {stripped!r}")
-        _flush()
-        if not saw_sse_marker:
-            raise RuntimeError("Payload is not SSE-formatted")
-        return payloads
-    @staticmethod
-    def _extract_text_from_content_blocks(content: Any) -> list[str]:
-        text_parts: list[str] = []
-        if isinstance(content, str):
-            if content:
-                text_parts.append(content)
-            return text_parts
-        if not isinstance(content, list):
-            return text_parts
-        for block in content:
-            if isinstance(block, str):
-                if block:
-                    text_parts.append(block)
-                continue
-            if isinstance(block, dict):
-                block_text = block.get("text")
-                if isinstance(block_text, str) and block_text:
-                    text_parts.append(block_text)
-                continue
-            block_text = getattr(block, "text", None)
-            if isinstance(block_text, str) and block_text:
-                text_parts.append(block_text)
-        return text_parts
-    @classmethod
-    def _extract_text_from_anthropic_payload(cls, payload: dict[str, Any]) -> list[str]:
-        text_parts: list[str] = []
-        direct_text = payload.get("text")
-        if isinstance(direct_text, str) and direct_text:
-            text_parts.append(direct_text)
-        event_type = str(payload.get("type") or "").strip().lower()
-        if event_type == "content_block_delta":
-            delta = payload.get("delta")
-            if isinstance(delta, dict):
-                delta_text = delta.get("text") or delta.get("content_delta") or delta.get("delta")
-                if isinstance(delta_text, str) and delta_text:
-                    text_parts.append(delta_text)
-        elif event_type == "content_block_start":
-            content_block = payload.get("content_block")
-            if isinstance(content_block, dict):
-                block_text = content_block.get("text")
-                if isinstance(block_text, str) and block_text:
-                    text_parts.append(block_text)
-        elif event_type == "message_start":
-            message = payload.get("message")
-            if isinstance(message, dict):
-                text_parts.extend(cls._extract_text_from_content_blocks(message.get("content")))
-        choices = payload.get("choices")
-        if isinstance(choices, list):
-            for choice in choices:
-                if not isinstance(choice, dict):
-                    continue
-                delta = choice.get("delta")
-                if isinstance(delta, dict):
-                    delta_content = delta.get("content")
-                    if isinstance(delta_content, str) and delta_content:
-                        text_parts.append(delta_content)
-        return text_parts
-    @classmethod
-    def _normalize_strict_anthropic_sse_payload(cls, raw: str) -> str:
-        text_parts: list[str] = []
-        for data_payload in cls._parse_sse_data_payloads(raw):
-            try:
-                parsed = json.loads(data_payload)
-            except json.JSONDecodeError as exc:
-                raise RuntimeError(f"Invalid SSE JSON payload: {data_payload!r}") from exc
-            if isinstance(parsed, dict):
-                text_parts.extend(cls._extract_text_from_anthropic_payload(parsed))
-            elif isinstance(parsed, str):
-                stripped = parsed.strip()
-                if stripped and stripped != "[DONE]":
-                    text_parts.append(stripped)
-            else:
-                raise RuntimeError(f"Unsupported SSE payload type: {type(parsed).__name__}")
-        if not text_parts:
-            raise RuntimeError("Anthropic SSE payload did not contain text deltas")
-        return "".join(text_parts)
-    @classmethod
-    def _normalize_anthropic_text_payload(cls, raw: str) -> str:
-        text = str(raw or "")
-        if not text.strip():
-            return ""
-        if cls._looks_like_sse_payload(text):
-            return cls._normalize_strict_anthropic_sse_payload(text)
-        return text
-    @classmethod
-    def _extract_anthropic_text_from_stream_event(cls, event: Any) -> list[str]:
-        if event is None:
-            return []
-        if isinstance(event, bytes):
-            event = event.decode("utf-8", errors="replace")
-        if isinstance(event, str):
-            stripped = event.strip()
-            if not stripped:
-                return []
-            if cls._looks_like_sse_payload(stripped):
-                return [cls._normalize_strict_anthropic_sse_payload(stripped)]
-            if stripped.startswith("{") and stripped.endswith("}"):
-                try:
-                    decoded = json.loads(stripped)
-                except json.JSONDecodeError:
-                    return [stripped]
-                if isinstance(decoded, dict):
-                    extracted = cls._extract_text_from_anthropic_payload(decoded)
-                    return extracted or [stripped]
-                return [stripped]
-            return [stripped]
-        if isinstance(event, dict):
-            return cls._extract_text_from_anthropic_payload(event)
-        text_parts: list[str] = []
-        event_type = str(getattr(event, "type", "") or "").strip().lower()
-        if event_type == "content_block_delta":
-            delta = getattr(event, "delta", None)
-            delta_text = getattr(delta, "text", None) or getattr(delta, "content_delta", None)
-            if isinstance(delta_text, str) and delta_text:
-                text_parts.append(delta_text)
-        elif event_type == "content_block_start":
-            content_block = getattr(event, "content_block", None)
-            block_text = getattr(content_block, "text", None)
-            if isinstance(block_text, str) and block_text:
-                text_parts.append(block_text)
-        elif event_type == "message_start":
-            message = getattr(event, "message", None)
-            content = getattr(message, "content", None) if message is not None else None
-            text_parts.extend(cls._extract_text_from_content_blocks(content))
-        direct_text = getattr(event, "text", None)
-        if isinstance(direct_text, str) and direct_text:
-            text_parts.append(direct_text)
-        return text_parts
-    @classmethod
-    def _consume_anthropic_stream_text(cls, stream_response: Any) -> str:
-        text_parts: list[str] = []
-        consumed_events = False
-        text_stream = getattr(stream_response, "text_stream", None)
-        if text_stream is not None:
-            for chunk in text_stream:
-                consumed_events = True
-                normalized = cls._normalize_anthropic_text_payload(str(chunk))
-                if normalized:
-                    text_parts.append(normalized)
-        elif hasattr(stream_response, "__iter__"):
-            for event in stream_response:
-                consumed_events = True
-                text_parts.extend(cls._extract_anthropic_text_from_stream_event(event))
-        if not consumed_events:
-            raise RuntimeError("Anthropic stream response was not iterable")
-        if not text_parts:
-            raise RuntimeError("Anthropic stream response did not contain text")
-        return "".join(text_parts)
-    @classmethod
-    def _extract_anthropic_response_text(cls, response: Any) -> str:
-        text, _used_sse_adapter = cls._extract_anthropic_response_text_with_metadata(response)
-        return text
-    @classmethod
-    def _extract_anthropic_response_text_with_metadata(cls, response: Any) -> tuple[str, bool]:
-        if response is None:
-            raise RuntimeError("Anthropic response is empty")
-        if isinstance(response, (str, bytes)):
-            raw_response = response.decode("utf-8", errors="replace") if isinstance(response, bytes) else response
-            normalized = cls._normalize_anthropic_text_payload(raw_response)
-            if not normalized:
-                raise RuntimeError("Anthropic response text is empty")
-            return normalized, cls._looks_like_sse_payload(raw_response)
-        content = getattr(response, "content", None)
-        if isinstance(content, list):
-            parts: list[str] = []
-            for block in content:
-                block_text = getattr(block, "text", None)
-                if isinstance(block_text, str) and block_text:
-                    parts.append(cls._normalize_anthropic_text_payload(block_text))
-            if parts:
-                return "\n".join(part for part in parts if part).strip(), False
-        if hasattr(response, "__enter__") and hasattr(response, "__exit__"):
-            with response as managed:
-                return cls._consume_anthropic_stream_text(managed), True
-        return cls._consume_anthropic_stream_text(response), True
-    def _build_pydantic_ai_model(self, model_name: str):
-        """Create a PydanticAI model wrapper based on LLM settings."""
-        from pydantic_ai.models.anthropic import AnthropicModel
-        from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel
-        from pydantic_ai.providers.openai import OpenAIProvider
-        protocol = self.settings.protocol
-        protocol_value = self._protocol_value(protocol)
-        if protocol_value not in PYDANTIC_AI_MODEL_SUPPORTED_PROTOCOL_VALUES:
-            raise ValueError(f"Unsupported protocol for PydanticAI model builder: {protocol_value}")
-        if protocol == LLMProtocolType.GEMINI:
-            from pydantic_ai.models.google import GoogleModel
-            from pydantic_ai.providers.google import GoogleProvider
-            api_key = resolve_protocol_api_key(self.settings, protocol, allow_openai_dummy=False)
-            provider = GoogleProvider(
-                api_key=api_key,
-                base_url=resolve_protocol_base_url(self.settings, protocol),
-            )
-            return GoogleModel(model_name, provider=provider, settings=self._model_settings)
-        if protocol == LLMProtocolType.ANTHROPIC:
-            api_key = resolve_protocol_api_key(self.settings, protocol, allow_openai_dummy=False)
-            provider = "anthropic"
-            base_url = resolve_protocol_base_url(self.settings, protocol)
-            if api_key or base_url:
-                from vds_audit_orchestrator.llm.provider import build_pydantic_anthropic_provider
-                provider = build_pydantic_anthropic_provider(
-                    api_key=api_key,
-                    base_url=base_url,
-                )
-            return AnthropicModel(model_name, provider=provider, settings=self._model_settings)
-        if self._is_openai_codex_protocol(protocol):
-            from openai import AsyncOpenAI
-            try:
-                from vds_audit_orchestrator.llm.codex_oauth import get_codex_credentials
-            except ImportError as exc:  # pragma: no cover - guarded by protocol path
-                raise RuntimeError("openai-codex protocol requires llm.codex_oauth.get_codex_credentials") from exc
-            credentials = get_codex_credentials()
-            access_token = self._resolve_codex_access_token(credentials)
-            chatgpt_account_id = self._resolve_codex_account_id(credentials)
-            self._warn_if_unvalidated_codex_model(model_name)
-            base_url = resolve_protocol_base_url(self.settings, protocol)
-            if not base_url:
-                raise ValueError("openai-codex protocol requires VDS_AUDIT_LLM__BASE_URL")
-            default_headers = {"ChatGPT-Account-Id": chatgpt_account_id} if chatgpt_account_id else None
-            openai_client = AsyncOpenAI(
-                api_key=access_token,
-                base_url=base_url,
-                default_headers=default_headers,
-            )
-            provider = OpenAIProvider(openai_client=openai_client)
-            model_settings = self._build_openai_codex_model_settings(self._model_settings)
-            return OpenAIResponsesModel(
-                model_name,
-                provider=provider,
-                settings=model_settings,
-            )
-        base_url = resolve_protocol_base_url(self.settings, protocol)
-        api_key = resolve_openai_protocol_api_key(self.settings)
-        provider = OpenAIProvider(api_key=api_key, base_url=base_url)
-        return OpenAIChatModel(model_name, provider=provider, settings=self._model_settings)
-    @staticmethod
-    def _protocol_value(protocol: Any) -> str:
-        return str(getattr(protocol, "value", protocol) or "").strip().lower()
-    @classmethod
-    def _is_openai_codex_protocol(cls, protocol: Any | None = None) -> bool:
-        return cls._protocol_value(protocol) == OPENAI_CODEX_PROTOCOL
-    @staticmethod
-    def _read_credential_field(credentials: Any, *field_names: str) -> str | None:
-        for field_name in field_names:
-            if isinstance(credentials, dict):
-                raw_value = credentials.get(field_name)
-            else:
-                raw_value = getattr(credentials, field_name, None)
-            value = str(raw_value or "").strip()
-            if value:
-                return value
-        return None
-    @classmethod
-    def _resolve_codex_access_token(cls, credentials: Any) -> str:
-        access_token = cls._read_credential_field(credentials, "access_token", "token")
-        if not access_token:
-            raise ValueError("Codex OAuth credentials missing access token")
-        return access_token
-    @classmethod
-    def _resolve_codex_account_id(cls, credentials: Any) -> str | None:
-        return cls._read_credential_field(credentials, "chatgpt_account_id", "account_id")
-    @staticmethod
-    def _build_openai_codex_model_settings(settings: ModelSettings | None) -> ModelSettings:
-        merged: dict[str, Any] = {}
-        if isinstance(settings, dict):
-            merged.update(settings)
-        elif settings is not None:
-            model_dump = getattr(settings, "model_dump", None)
-            if callable(model_dump):
-                dumped = model_dump()
-                if isinstance(dumped, dict):
-                    merged.update(dumped)
-        merged["openai_store"] = False
-        return ModelSettings(**merged)
-    async def _run_streaming_pydantic_operation(
-        self,
-        *,
-        agent: Any,
-        user_prompt: str,
-        run_kwargs: dict[str, Any],
-    ) -> _PydanticStreamResultWrapper:
-        stream_result: Any | None = None
-        try:
-            async with agent.run_stream(user_prompt, **run_kwargs) as active_stream_result:
-                stream_result = active_stream_result
-                output = await stream_result.get_output()
-                return _PydanticStreamResultWrapper(output=output, streamed_result=stream_result)
-        except Exception as exc:
-            self._attach_streamed_output_payload(exc, stream_result)
-            if self._is_codex_streaming_requirement_error(exc):
-                raise RuntimeError(
-                    "Codex subscription provider requires streaming mode; use openai-codex with run_stream() semantics."
-                ) from exc
-            raise
-    @staticmethod
-    def _extract_streamed_output_payload(stream_result: Any | None) -> dict[str, Any] | str | None:
-        response = getattr(stream_result, "response", None)
-        tool_calls = getattr(response, "tool_calls", None)
-        if isinstance(tool_calls, list):
-            for tool_call in reversed(tool_calls):
-                args_as_dict = getattr(tool_call, "args_as_dict", None)
-                if callable(args_as_dict):
-                    try:
-                        payload = args_as_dict()
-                    except Exception:
-                        payload = None
-                    if isinstance(payload, dict) and payload:
-                        return payload
-                args_as_json_str = getattr(tool_call, "args_as_json_str", None)
-                if callable(args_as_json_str):
-                    try:
-                        payload = args_as_json_str()
-                    except Exception:
-                        payload = None
-                    if isinstance(payload, str) and payload.strip():
-                        return payload
-                payload = getattr(tool_call, "args", None)
-                if isinstance(payload, dict) and payload:
-                    return payload
-                if isinstance(payload, str) and payload.strip():
-                    return payload
-        return None
-    @classmethod
-    def _attach_streamed_output_payload(cls, exc: Exception, stream_result: Any | None) -> None:
-        payload = cls._extract_streamed_output_payload(stream_result)
-        if payload is None:
-            return
-        with contextlib.suppress(Exception):
-            exc.streamed_output_payload = payload
-        if getattr(exc, "body", None) is None:
-            with contextlib.suppress(Exception):
-                exc.body = payload
-        if getattr(exc, "response", None) is None:
-            with contextlib.suppress(Exception):
-                exc.response = payload
-    def _resolve_agent_stream_mode(self) -> tuple[bool, str]:
-        if self._is_openai_codex_protocol(self.settings.protocol):
-            return True, "protocol_override"
-        if bool(getattr(self.settings, "agent_stream", True)):
-            return True, "profile"
-        if bool(getattr(self.settings, "stream", False)):
-            return True, "transport_floor"
-        return False, "profile"
-    def _build_agent_operation(
-        self,
-        *,
-        agent: Any,
-        user_prompt: str,
-        run_kwargs: dict[str, Any],
-    ) -> Coroutine[Any, Any, Any]:
-        use_streaming, source = self._resolve_agent_stream_mode()
-        self.logger.info(
-            "pydantic_agent_stream_mode",
-            mode="stream" if use_streaming else "non_stream",
-            source=source,
-            protocol=self._protocol_value(self.settings.protocol),
-            model_name=getattr(self.settings, "model_standard", None),
-        )
-        if use_streaming:
-            return self._run_streaming_pydantic_operation(
-                agent=agent,
-                user_prompt=user_prompt,
-                run_kwargs=dict(run_kwargs),
-            )
-        return agent.run(user_prompt, **run_kwargs)
-    @staticmethod
-    def _is_codex_streaming_requirement_error(exc: Exception) -> bool:
-        message = str(exc).lower()
-        return "stream must be set to true" in message or "stream=true" in message or "streaming required" in message
-    def _warn_if_unvalidated_codex_model(self, model_name: str) -> None:
-        if str(model_name).strip().lower() in OPENAI_CODEX_BASELINE_MODELS:
-            return
-        self.logger.warning(
-            "codex_unsupported_model_name",
-            model_name=model_name,
-            baseline_models=sorted(OPENAI_CODEX_BASELINE_MODELS),
-        )
-    async def _run_pydantic_agent(
-        self,
-        *,
-        system_prompt: str,
-        user_prompt: str,
-        result_type: type[ResultType],
-        complexity: str = "standard",
-        max_tokens: int = 4096,
-        prompt_type: str = "agent",
-        deps: Any | None = None,
-        toolsets: list[Any] | None = None,
-        max_turns: int | None = None,
-        max_tool_calls: int | None = None,
-        pre_retry_telemetry: dict[str, Any] | None = None,
-        row_failover_context: RowFailoverContext | None = None,
-        provider_health_memory: ProviderHealthMemory | None = None,
-    ) -> ResultType:
-        """Run a PydanticAI agent and return typed output.
-        Args:
-            toolsets: Optional list of PydanticAI toolsets (Phase 41).
-                When provided, the agent is created with tool-use capabilities
-                for multi-turn agentic investigation.
-            max_turns: Optional max request/turn limit for bounded agentic runs.
-            max_tool_calls: Optional max tool call limit for bounded agentic runs.
-        """
-        import time
-        if not self.settings.enabled:
-            raise RuntimeError("LLM is disabled")
-        operation_state: dict[str, Any] = {
-            "operation_id": uuid4().hex,
-            "operation_kind": "pydantic_agent_run",
-            "operation_status": "started",
-            "operation_outcome": "started",
-            "operation_started_at": time.monotonic(),
-            "model_name": self.settings.model_standard,
-            "event_stream_enabled": False,
-            "rate_limit_retry_attempts": 0,
-            "tool_call_retry_attempts": 0,
-            "codex_auth_retry_attempts": 0,
-            "run_telemetry": None,
-            "error": None,
-        }
-        @contextlib.contextmanager
-        def _operation_lifecycle_scope():
-            self.logger.info(
-                "agent_operation_lifecycle",
-                operation_id=operation_state["operation_id"],
-                operation_kind=operation_state["operation_kind"],
-                operation_status="started",
-                operation_outcome=operation_state["operation_outcome"],
-                result_type=result_type.__name__,
-                prompt_type=prompt_type,
-                protocol=self.settings.protocol.value,
-                model_name=operation_state.get("model_name"),
-            )
-            try:
-                yield
-            except Exception as exc:
-                operation_state["operation_status"] = "failed"
-                operation_state["operation_outcome"] = "error"
-                operation_state["error"] = str(exc)
-                raise
-            finally:
-                elapsed_sec = max(0.0, time.monotonic() - float(operation_state["operation_started_at"]))
-                run_telemetry = operation_state.get("run_telemetry")
-                event_tool_calls_started = 0
-                event_tool_calls_completed = 0
-                event_skill_tool_calls = 0
-                event_skill_effective_tool_calls = 0
-                observed_tool_names: list[str] | None = None
-                if isinstance(run_telemetry, dict):
-                    event_tool_calls_started = int(run_telemetry.get("event_tool_calls_started", 0) or 0)
-                    event_tool_calls_completed = int(
-                        run_telemetry.get("event_tool_calls_completed", run_telemetry.get("event_tool_calls", 0)) or 0
-                    )
-                    event_skill_tool_calls = int(run_telemetry.get("event_skill_tool_calls", 0) or 0)
-                    event_skill_effective_tool_calls = int(
-                        run_telemetry.get("event_skill_effective_tool_calls", 0) or 0
-                    )
-                    observed_tool_names = list(run_telemetry.get("event_tool_names") or [])
-                    run_telemetry["operation_id"] = operation_state["operation_id"]
-                    run_telemetry["operation_kind"] = operation_state["operation_kind"]
-                    run_telemetry["operation_status"] = operation_state["operation_status"]
-                    run_telemetry["operation_outcome"] = operation_state["operation_outcome"]
-                    run_telemetry["operation_elapsed_sec"] = round(elapsed_sec, 3)
-                    run_telemetry["operation_rate_limit_retry_attempts"] = int(
-                        operation_state.get("rate_limit_retry_attempts", 0) or 0
-                    )
-                    run_telemetry["operation_tool_call_retry_attempts"] = int(
-                        operation_state.get("tool_call_retry_attempts", 0) or 0
-                    )
-                    run_telemetry["operation_codex_auth_retry_attempts"] = int(
-                        operation_state.get("codex_auth_retry_attempts", 0) or 0
-                    )
-                    if operation_state.get("error"):
-                        run_telemetry["operation_error"] = str(operation_state["error"])
-                log_method = (
-                    self.logger.warning if operation_state["operation_status"] == "failed" else self.logger.info
-                )
-                log_method(
-                    "agent_operation_lifecycle",
-                    operation_id=operation_state["operation_id"],
-                    operation_kind=operation_state["operation_kind"],
-                    operation_status=operation_state["operation_status"],
-                    operation_outcome=operation_state["operation_outcome"],
-                    elapsed_sec=round(elapsed_sec, 2),
-                    result_type=result_type.__name__,
-                    prompt_type=prompt_type,
-                    protocol=self.settings.protocol.value,
-                    model_name=operation_state.get("model_name"),
-                    event_stream_enabled=bool(operation_state.get("event_stream_enabled", False)),
-                    rate_limit_retry_attempts=int(operation_state.get("rate_limit_retry_attempts", 0) or 0),
-                    tool_call_retry_attempts=int(operation_state.get("tool_call_retry_attempts", 0) or 0),
-                    codex_auth_retry_attempts=int(operation_state.get("codex_auth_retry_attempts", 0) or 0),
-                    event_tool_calls_started=event_tool_calls_started,
-                    event_tool_calls_completed=event_tool_calls_completed,
-                    event_skill_tool_calls=event_skill_tool_calls,
-                    event_skill_effective_tool_calls=event_skill_effective_tool_calls,
-                    observed_tool_names=observed_tool_names,
-                    error=operation_state.get("error"),
-                )
-        with log_context(agent=self.__class__.__name__, phase="agent"), _operation_lifecycle_scope():
-            provider_health_memory = provider_health_memory or ProviderHealthMemory()
-            self.logger.info(
-                "pydantic_agent_call_started",
-                result_type=result_type.__name__,
-                complexity=complexity,
-                protocol=self.settings.protocol.value,
-                operation_id=operation_state["operation_id"],
-                operation_kind=operation_state["operation_kind"],
-            )
-            start_time = time.monotonic()
-            system_prompt_chars = len(system_prompt)
-            user_prompt_chars = len(user_prompt)
-            total_prompt_chars = system_prompt_chars + user_prompt_chars
-            approx_prompt_tokens = max(1, total_prompt_chars // 4)
-            self._log_trace_payload(
-                event_name="pydantic_agent_trace_prompt",
-                elapsed_sec=0.0,
-                payload={
-                    "operation_id": operation_state["operation_id"],
-                    "result_type": result_type.__name__,
-                    "complexity": complexity,
-                    "protocol": self.settings.protocol.value,
-                    "system_prompt": system_prompt,
-                    "user_prompt": user_prompt,
-                    "max_turns": max_turns,
-                    "max_tool_calls": max_tool_calls,
-                },
-            )
-            if self._pydantic_model is None and self.client is not None:
-                response = await self._call_llm(system_prompt, user_prompt)
-                payload = self._strip_json_fence(response)
-                elapsed = time.monotonic() - start_time
-                operation_state["operation_status"] = "completed"
-                operation_state["operation_outcome"] = "direct_call"
-                self.logger.info(
-                    "pydantic_agent_call_complete",
-                    elapsed_sec=round(elapsed, 2),
-                    mode="direct_call",
-                    operation_id=operation_state["operation_id"],
-                )
-                return result_type.model_validate_json(payload)  # type: ignore[return-value]
-            from vds_audit_orchestrator.logging_config import audit_otel_span as trace_span
-            # Select model based on complexity
-            model_name = self.settings.select_model(complexity) if hasattr(self.settings, "select_model") else None
-            selected_model = model_name or self.settings.model_standard
-            operation_state["model_name"] = selected_model
-            self.logger.debug(
-                "pydantic_agent_created",
-                model_name=selected_model,
-                output_type=result_type.__name__,
-            )
-            self.logger.info(
-                "pydantic_agent_prompt_metrics",
-                result_type=result_type.__name__,
-                model_name=selected_model,
-                system_prompt_chars=system_prompt_chars,
-                user_prompt_chars=user_prompt_chars,
-                total_prompt_chars=total_prompt_chars,
-                approx_prompt_tokens=approx_prompt_tokens,
-                max_tokens=max_tokens or self.settings.max_tokens_per_request,
-                toolset_count=len(toolsets or []),
-                has_deps=deps is not None,
-                max_turns=max_turns,
-                max_tool_calls=max_tool_calls,
-            )
-            # Use direct Agent creation with the selected model for test compatibility
-            from pydantic_ai import Agent
-            from pydantic_ai.settings import ModelSettings
-            from pydantic_ai.usage import UsageLimits
-            # Build the model - use cached or build new
-            model = self._pydantic_model or self._build_pydantic_ai_model(selected_model)
-            agent_retries, output_retries = self._resolve_agent_retry_policy()
-            agent_kwargs: dict[str, Any] = {
-                "model": model,
-                "name": self.__class__.__name__,
-                "output_type": result_type,
-                "instructions": system_prompt,
-                "retries": agent_retries,
-                "output_retries": output_retries,
-                "model_settings": ModelSettings(
-                    **self._build_run_model_settings(max_tokens or self.settings.max_tokens_per_request)
-                ),
-            }
-            self.logger.debug(
-                "pydantic_agent_retry_policy",
-                retries=agent_retries,
-                output_retries=output_retries,
-                configured_retries=getattr(self.settings, "agent_retries", None),
-                configured_output_retries=getattr(self.settings, "output_retries", None),
-                protocol=self.settings.protocol.value,
-            )
-            skill_policy_diagnostics = self._collect_skill_policy_diagnostics(toolsets)
-            if toolsets:
-                agent_kwargs["toolsets"] = toolsets
-                self.logger.info(
-                    "pydantic_agent_toolsets_attached",
-                    toolset_count=len(toolsets),
-                )
-            if skill_policy_diagnostics:
-                self.logger.info("pydantic_agent_skill_policy_diagnostics", **skill_policy_diagnostics)
-            agent: Agent[Any, Any] = Agent(**agent_kwargs)
-            usage_limits: UsageLimits | None = None
-            request_limit, tool_calls_limit = self._resolve_pydantic_usage_limits(
-                max_turns=max_turns,
-                max_tool_calls=max_tool_calls,
-            )
-            if request_limit is not None or tool_calls_limit is not None:
-                usage_limits = UsageLimits(
-                    request_limit=request_limit,
-                    tool_calls_limit=tool_calls_limit,
-                )
-                self.logger.info(
-                    "pydantic_agent_usage_limits_applied",
-                    max_turns=max_turns,
-                    max_tool_calls=max_tool_calls,
-                    request_limit=request_limit,
-                    tool_calls_limit=tool_calls_limit,
-                )
-            with trace_span(
-                "llm.call",
-                {
-                    "llm.model": model_name or self.settings.model_standard,
-                    "llm.protocol": self.settings.protocol.value,
-                },
-            ):
-                run_telemetry: dict[str, Any] = {
-                    "operation_id": operation_state["operation_id"],
-                    "operation_kind": operation_state["operation_kind"],
-                    "result_type": result_type.__name__,
-                    "protocol": self.settings.protocol.value,
-                    "model_name": selected_model,
-                    "event_tool_calls": 0,
-                    "event_tool_calls_started": 0,
-                    "event_tool_effective_calls": 0,
-                    "event_skill_tool_calls": 0,
-                    "event_skill_discovery_tool_calls": 0,
-                    "event_skill_bootstrap_tool_calls": 0,
-                    "event_skill_execution_tool_calls": 0,
-                    "event_skill_execution_effective_tool_calls": 0,
-                    "event_tool_calls_completed": 0,
-                    "event_skill_effective_tool_calls": 0,
-                    "event_tool_names": [],
-                    "event_tool_signatures": [],
-                    "event_tool_count_by_name": {},
-                    "event_tool_effective_count_by_name": {},
-                    "event_skill_tool_count_by_name": {},
-                    "event_tool_latencies_ms": [],
-                    "_tool_call_started_at": {},
-                    "event_last_seen_at": time.monotonic(),
-                    "event_last_tool_activity_at": None,
-                }
-                operation_state["run_telemetry"] = run_telemetry
-                if skill_policy_diagnostics:
-                    run_telemetry["skill_policy_diagnostics"] = skill_policy_diagnostics
-                # Keep latest telemetry available even when the run fails (e.g., request_limit),
-                # so higher-level retry/guard logic can inspect partial tool usage.
-                self._last_agent_run_telemetry = run_telemetry
-                run_kwargs: dict[str, Any] = {}
-                if deps is not None:
-                    run_kwargs["deps"] = deps
-                if usage_limits is not None:
-                    run_kwargs["usage_limits"] = usage_limits
-                event_stream_requested = bool(getattr(self.settings, "agent_event_stream_enabled", True))
-                suppress_event_stream_for_endpoint = (
-                    event_stream_requested and self._should_suppress_anthropic_tool_choice()
-                )
-                if event_stream_requested and not suppress_event_stream_for_endpoint:
-                    run_kwargs["event_stream_handler"] = self._build_event_stream_handler(start_time, run_telemetry)
-                elif suppress_event_stream_for_endpoint:
-                    self.logger.info(
-                        "anthropic_proxy_event_stream_disabled",
-                        base_url=self.settings.base_url,
-                        protocol=self.settings.protocol.value,
-                    )
-                timeout_policy = self._resolve_agent_timeout_policy(complexity)
-                self.logger.info(
-                    "pydantic_agent_timeout_policy",
-                    result_type=result_type.__name__,
-                    initial_timeout_seconds=round(timeout_policy.initial_timeout_seconds, 2),
-                    max_timeout_seconds=round(timeout_policy.max_timeout_seconds, 2),
-                    extension_seconds=round(timeout_policy.extension_seconds, 2),
-                    extension_attempts=timeout_policy.extension_attempts,
-                    heartbeat_seconds=round(timeout_policy.heartbeat_seconds, 2),
-                    idle_post_tool_seconds=round(timeout_policy.idle_post_tool_seconds, 2),
-                )
-                event_stream_enabled = "event_stream_handler" in run_kwargs
-                operation_state["event_stream_enabled"] = event_stream_enabled
-                rate_limit_retry_attempt = 0
-                max_rate_limit_retries = max(0, int(getattr(self.settings, "agent_rate_limit_retry_attempts", 2) or 0))
-                base_rate_limit_backoff = float(
-                    getattr(self.settings, "agent_rate_limit_retry_backoff_seconds", 2.0) or 2.0
-                )
-                max_retry_after_seconds = 120.0
-                tool_call_retry_attempt = 0
-                max_tool_call_retries = max(0, int(getattr(self.settings, "agent_tool_call_retry_attempts", 2) or 0))
-                base_tool_call_backoff = float(
-                    getattr(self.settings, "agent_tool_call_retry_backoff_seconds", 1.0) or 1.0
-                )
-                codex_auth_retry_attempt = 0
-                max_codex_auth_retries = 1
-                try:
-                    result = await self._run_with_timeout_policy(
-                        operation=self._build_agent_operation(
-                            agent=agent, user_prompt=user_prompt, run_kwargs=run_kwargs
-                        ),
-                        timeout_policy=timeout_policy,
-                        result_type_name=result_type.__name__,
-                        run_telemetry=run_telemetry,
-                    )
-                except Exception as exc:
-                    recovered_output = self._recover_output_from_final_result_validation_error(
-                        result_type=result_type,
-                        exc=exc,
-                        telemetry=run_telemetry,
-                    )
-                    if recovered_output is not None:
-                        self.logger.warning(
-                            "pydantic_agent_output_recovered_from_final_result_payload",
-                            result_type=result_type.__name__,
-                        )
-                        run_telemetry["output_recovered_from_final_result_payload"] = True
-                        run_telemetry.pop("_tool_call_started_at", None)
-                        self._last_agent_run_telemetry = run_telemetry
-                        operation_state["operation_status"] = "completed"
-                        operation_state["operation_outcome"] = "recovered_output"
-                        return recovered_output
-                    if (
-                        self.settings.protocol == LLMProtocolType.ANTHROPIC
-                        and bool(getattr(self.settings, "base_url", ""))
-                        and self._is_tool_choice_protocol_mismatch(exc)
-                    ):
-                        raise RuntimeError(
-                            "anthropic protocol mismatch: endpoint rejected tool_choice/function payload. "
-                            "Use an Anthropic Messages-compatible proxy."
-                        ) from exc
-                    auth_recovered = False
-                    while (
-                        self._is_openai_codex_protocol(self.settings.protocol)
-                        and self._is_auth_error(exc)
-                        and codex_auth_retry_attempt < max_codex_auth_retries
-                    ):
-                        codex_auth_retry_attempt += 1
-                        operation_state["codex_auth_retry_attempts"] = codex_auth_retry_attempt
-                        self.logger.warning(
-                            "pydantic_agent_codex_auth_retry",
-                            result_type=result_type.__name__,
-                            retry_attempt=codex_auth_retry_attempt,
-                            retry_attempts=max_codex_auth_retries,
-                            error=str(exc),
-                        )
-                        try:
-                            from vds_audit_orchestrator.llm.codex_oauth import force_refresh_codex_credentials
-                            force_refresh_codex_credentials()
-                            refreshed_model = self._build_pydantic_ai_model(selected_model)
-                            self._pydantic_model = refreshed_model
-                            refreshed_agent_kwargs = dict(agent_kwargs)
-                            refreshed_agent_kwargs["model"] = refreshed_model
-                            agent = Agent(**refreshed_agent_kwargs)
-                            result = await self._run_with_timeout_policy(
-                                operation=self._build_agent_operation(
-                                    agent=agent, user_prompt=user_prompt, run_kwargs=run_kwargs
-                                ),
-                                timeout_policy=timeout_policy,
-                                result_type_name=result_type.__name__,
-                                run_telemetry=run_telemetry,
-                            )
-                            auth_recovered = True
-                            self.logger.info(
-                                "pydantic_agent_codex_auth_recovered",
-                                result_type=result_type.__name__,
-                                retry_attempt=codex_auth_retry_attempt,
-                            )
-                            break
-                        except Exception as auth_retry_exc:
-                            exc = auth_retry_exc
-                    rate_limit_recovered = False
-                    if auth_recovered:
-                        rate_limit_recovered = True
-                    while self._is_rate_limit_error(exc) and rate_limit_retry_attempt < max_rate_limit_retries:
-                        rate_limit_retry_attempt += 1
-                        operation_state["rate_limit_retry_attempts"] = rate_limit_retry_attempt
-                        classification = ProviderFailureClassifier.classify(exc)
-                        retry_backoff = classification.retry_after_seconds
-                        if retry_backoff is not None:
-                            retry_backoff = min(max_retry_after_seconds, max(0.0, retry_backoff))
-                        if retry_backoff is None:
-                            retry_backoff = min(
-                                30.0,
-                                base_rate_limit_backoff * (2 ** (rate_limit_retry_attempt - 1)),
-                            )
-                        # Keep tiny jitter to reduce synchronized retry bursts from parallel agents.
-                        retry_backoff += random.uniform(0.0, min(1.0, retry_backoff * 0.2))
-                        run_telemetry["provider_transient_retry_count"] = rate_limit_retry_attempt
-                        run_telemetry["retry_after_honored"] = classification.retry_after_seconds is not None
-                        run_telemetry["retry_after_seconds"] = (
-                            round(retry_backoff, 2) if classification.retry_after_seconds is not None else 0.0
-                        )
-                        run_telemetry["provider_retry_class"] = classification.failure_class.value
-                        run_telemetry["provider_retry_reason"] = classification.classification_reason
-                        self.logger.warning(
-                            "pydantic_agent_rate_limit_retry",
-                            result_type=result_type.__name__,
-                            retry_attempt=rate_limit_retry_attempt,
-                            retry_attempts=max_rate_limit_retries,
-                            backoff_seconds=round(retry_backoff, 2),
-                            retry_after_honored=classification.retry_after_seconds is not None,
-                            provider_failure_class=classification.failure_class.value,
-                            retry_reason=classification.classification_reason,
-                            error=str(exc),
-                        )
-                        await asyncio.sleep(retry_backoff)
-                        try:
-                            result = await self._run_with_timeout_policy(
-                                operation=self._build_agent_operation(
-                                    agent=agent, user_prompt=user_prompt, run_kwargs=run_kwargs
-                                ),
-                                timeout_policy=timeout_policy,
-                                result_type_name=result_type.__name__,
-                                run_telemetry=run_telemetry,
-                            )
-                            rate_limit_recovered = True
-                            self.logger.info(
-                                "pydantic_agent_rate_limit_recovered",
-                                result_type=result_type.__name__,
-                                retry_attempt=rate_limit_retry_attempt,
-                                provider_failure_class=classification.failure_class.value,
-                            )
-                            break
-                        except Exception as rate_limit_retry_exc:
-                            exc = rate_limit_retry_exc
-                    tool_call_recovered = False
-                    while self._is_tool_call_argument_error(exc) and tool_call_retry_attempt < max_tool_call_retries:
-                        tool_call_retry_attempt += 1
-                        operation_state["tool_call_retry_attempts"] = tool_call_retry_attempt
-                        retry_backoff = min(20.0, base_tool_call_backoff * (2 ** (tool_call_retry_attempt - 1)))
-                        retry_backoff += random.uniform(0.0, min(0.5, retry_backoff * 0.2))
-                        self.logger.warning(
-                            "pydantic_agent_tool_call_retry",
-                            result_type=result_type.__name__,
-                            retry_attempt=tool_call_retry_attempt,
-                            retry_attempts=max_tool_call_retries,
-                            backoff_seconds=round(retry_backoff, 2),
-                            error=str(exc),
-                        )
-                        await asyncio.sleep(retry_backoff)
-                        try:
-                            result = await self._run_with_timeout_policy(
-                                operation=self._build_agent_operation(
-                                    agent=agent, user_prompt=user_prompt, run_kwargs=run_kwargs
-                                ),
-                                timeout_policy=timeout_policy,
-                                result_type_name=result_type.__name__,
-                                run_telemetry=run_telemetry,
-                            )
-                            tool_call_recovered = True
-                            self.logger.info(
-                                "pydantic_agent_tool_call_recovered",
-                                result_type=result_type.__name__,
-                                retry_attempt=tool_call_retry_attempt,
-                            )
-                            break
-                        except Exception as tool_call_retry_exc:
-                            exc = tool_call_retry_exc
-                    if not rate_limit_recovered and not tool_call_recovered:
-                        should_retry_without_stream = event_stream_enabled and self._should_retry_without_event_stream(
-                            exc
-                        )
-                        if not should_retry_without_stream:
-                            failover_result = await self._attempt_row_provider_failover(
-                                exc=exc,
-                                system_prompt=system_prompt,
-                                user_prompt=user_prompt,
-                                result_type=result_type,
-                                complexity=complexity,
-                                max_tokens=max_tokens,
-                                prompt_type=prompt_type,
-                                deps=deps,
-                                toolsets=toolsets,
-                                max_turns=max_turns,
-                                max_tool_calls=max_tool_calls,
-                                pre_retry_telemetry=pre_retry_telemetry,
-                                current_run_telemetry=run_telemetry,
-                                row_failover_context=row_failover_context,
-                                provider_health_memory=provider_health_memory,
-                            )
-                            if failover_result is not None:
-                                operation_state["operation_status"] = "completed"
-                                operation_state["operation_outcome"] = "provider_failover"
-                                run_telemetry.pop("_tool_call_started_at", None)
-                                self._last_agent_run_telemetry = self._merge_failover_telemetry(
-                                    pre_retry_telemetry,
-                                    run_telemetry,
-                                )
-                                return failover_result
-                            elapsed = time.monotonic() - start_time
-                            tool_efficiency = self._build_tool_efficiency_summary(run_telemetry)
-                            if tool_efficiency:
-                                run_telemetry["tool_efficiency"] = tool_efficiency
-                            run_telemetry["error"] = str(exc)
-                            run_telemetry.pop("_tool_call_started_at", None)
-                            self._last_agent_run_telemetry = run_telemetry
-                            self.logger.warning(
-                                "pydantic_agent_call_failed",
-                                elapsed_sec=round(elapsed, 2),
-                                mode="pydantic_ai",
-                                result_type=result_type.__name__,
-                                operation_id=operation_state["operation_id"],
-                                event_tool_calls_started=int(run_telemetry.get("event_tool_calls_started", 0) or 0),
-                                event_tool_calls_completed=int(
-                                    run_telemetry.get(
-                                        "event_tool_calls_completed", run_telemetry.get("event_tool_calls", 0)
-                                    )
-                                    or 0
-                                ),
-                                event_skill_tool_calls=int(run_telemetry.get("event_skill_tool_calls", 0) or 0),
-                                event_skill_effective_tool_calls=int(
-                                    run_telemetry.get("event_skill_effective_tool_calls", 0) or 0
-                                ),
-                                observed_tool_names=run_telemetry.get("event_tool_names"),
-                                **self._exception_diagnostics(exc),
-                            )
-                            raise
-                        self.logger.warning(
-                            "pydantic_agent_event_stream_fallback",
-                            error=str(exc),
-                        )
-                        run_kwargs.pop("event_stream_handler", None)
-                        run_telemetry["event_stream_fallback_activated"] = True
-                        try:
-                            result = await self._run_with_timeout_policy(
-                                operation=self._build_agent_operation(
-                                    agent=agent, user_prompt=user_prompt, run_kwargs=run_kwargs
-                                ),
-                                timeout_policy=timeout_policy,
-                                result_type_name=result_type.__name__,
-                                run_telemetry=run_telemetry,
-                            )
-                            # FR-70 / TSK-641.2: Populate telemetry from result when
-                            # event stream was removed.  Without this the telemetry
-                            # dict stays at zeros and post-run guards raise false
-                            # RuntimeError("agentic usage guard unsatisfied").
-                            self._populate_telemetry_from_result(result, run_telemetry)
-                        except Exception as fallback_exc:
-                            fallback_tool_call_recovered = False
-                            fallback_tool_call_retry_attempt = 0
-                            while (
-                                self._is_tool_call_argument_error(fallback_exc)
-                                and fallback_tool_call_retry_attempt < max_tool_call_retries
-                            ):
-                                fallback_tool_call_retry_attempt += 1
-                                operation_state["tool_call_retry_attempts"] = max(
-                                    int(operation_state.get("tool_call_retry_attempts", 0) or 0),
-                                    fallback_tool_call_retry_attempt,
-                                )
-                                retry_backoff = min(
-                                    20.0,
-                                    base_tool_call_backoff * (2 ** (fallback_tool_call_retry_attempt - 1)),
-                                )
-                                retry_backoff += random.uniform(0.0, min(0.5, retry_backoff * 0.2))
-                                self.logger.warning(
-                                    "pydantic_agent_tool_call_retry",
-                                    result_type=result_type.__name__,
-                                    retry_attempt=fallback_tool_call_retry_attempt,
-                                    retry_attempts=max_tool_call_retries,
-                                    backoff_seconds=round(retry_backoff, 2),
-                                    error=str(fallback_exc),
-                                    retry_mode="event_stream_fallback",
-                                )
-                                await asyncio.sleep(retry_backoff)
-                                try:
-                                    result = await self._run_with_timeout_policy(
-                                        operation=self._build_agent_operation(
-                                            agent=agent, user_prompt=user_prompt, run_kwargs=run_kwargs
-                                        ),
-                                        timeout_policy=timeout_policy,
-                                        result_type_name=result_type.__name__,
-                                        run_telemetry=run_telemetry,
-                                    )
-                                    fallback_tool_call_recovered = True
-                                    self.logger.info(
-                                        "pydantic_agent_tool_call_recovered",
-                                        result_type=result_type.__name__,
-                                        retry_attempt=fallback_tool_call_retry_attempt,
-                                        retry_mode="event_stream_fallback",
-                                    )
-                                    break
-                                except Exception as fallback_tool_call_retry_exc:
-                                    fallback_exc = fallback_tool_call_retry_exc
-                            if fallback_tool_call_recovered:
-                                self._populate_telemetry_from_result(result, run_telemetry)
-                                elapsed = time.monotonic() - start_time
-                                self.logger.info(
-                                    "pydantic_agent_fallback_recovered",
-                                    elapsed_sec=round(elapsed, 2),
-                                    result_type=result_type.__name__,
-                                    mode="pydantic_ai",
-                                )
-                                # Continue to normal post-processing with recovered result.
-                                pass
-                            else:
-                                recovered_output = self._recover_output_from_final_result_validation_error(
-                                    result_type=result_type,
-                                    exc=fallback_exc,
-                                    telemetry=run_telemetry,
-                                )
-                                if recovered_output is not None:
-                                    self.logger.warning(
-                                        "pydantic_agent_output_recovered_from_final_result_payload",
-                                        result_type=result_type.__name__,
-                                    )
-                                    run_telemetry["output_recovered_from_final_result_payload"] = True
-                                    run_telemetry.pop("_tool_call_started_at", None)
-                                    self._last_agent_run_telemetry = run_telemetry
-                                    operation_state["operation_status"] = "completed"
-                                    operation_state["operation_outcome"] = "recovered_output"
-                                    return recovered_output
-                                failover_result = await self._attempt_row_provider_failover(
-                                    exc=fallback_exc,
-                                    system_prompt=system_prompt,
-                                    user_prompt=user_prompt,
-                                    result_type=result_type,
-                                    complexity=complexity,
-                                    max_tokens=max_tokens,
-                                    prompt_type=prompt_type,
-                                    deps=deps,
-                                    toolsets=toolsets,
-                                    max_turns=max_turns,
-                                    max_tool_calls=max_tool_calls,
-                                    pre_retry_telemetry=pre_retry_telemetry,
-                                    current_run_telemetry=run_telemetry,
-                                    row_failover_context=row_failover_context,
-                                    provider_health_memory=provider_health_memory,
-                                )
-                                if failover_result is not None:
-                                    operation_state["operation_status"] = "completed"
-                                    operation_state["operation_outcome"] = "provider_failover"
-                                    run_telemetry.pop("_tool_call_started_at", None)
-                                    self._last_agent_run_telemetry = self._merge_failover_telemetry(
-                                        pre_retry_telemetry,
-                                        run_telemetry,
-                                    )
-                                    return failover_result
-                                elapsed = time.monotonic() - start_time
-                                tool_efficiency = self._build_tool_efficiency_summary(run_telemetry)
-                                if tool_efficiency:
-                                    run_telemetry["tool_efficiency"] = tool_efficiency
-                                run_telemetry["error"] = str(fallback_exc)
-                                run_telemetry.pop("_tool_call_started_at", None)
-                                self._last_agent_run_telemetry = run_telemetry
-                                self.logger.warning(
-                                    "pydantic_agent_call_failed",
-                                    elapsed_sec=round(elapsed, 2),
-                                    mode="pydantic_ai",
-                                    result_type=result_type.__name__,
-                                    operation_id=operation_state["operation_id"],
-                                    event_tool_calls_started=int(run_telemetry.get("event_tool_calls_started", 0) or 0),
-                                    event_tool_calls_completed=int(
-                                        run_telemetry.get(
-                                            "event_tool_calls_completed", run_telemetry.get("event_tool_calls", 0)
-                                        )
-                                        or 0
-                                    ),
-                                    event_skill_tool_calls=int(run_telemetry.get("event_skill_tool_calls", 0) or 0),
-                                    event_skill_effective_tool_calls=int(
-                                        run_telemetry.get("event_skill_effective_tool_calls", 0) or 0
-                                    ),
-                                    observed_tool_names=run_telemetry.get("event_tool_names"),
-                                    **self._exception_diagnostics(fallback_exc),
-                                )
-                                raise
-            elapsed = time.monotonic() - start_time
-            raw_output = result.output if hasattr(result, "output") else getattr(result, "data", None)
-            output = self._normalize_agent_output(raw_output, result_type)
-            if not event_stream_enabled:
-                # Non-stream Anthropic/proxy paths do not emit event callbacks, so
-                # backfill tool/usage telemetry from RunResult to keep grounding
-                # guards and efficiency metrics accurate.
-                self._populate_telemetry_from_result(result, run_telemetry)
-            # Log response summary for debugging
-            response_summary = self._summarize_output(output)
-            self.logger.info(
-                "pydantic_agent_call_complete",
-                elapsed_sec=round(elapsed, 2),
-                mode="pydantic_ai",
-                result_type=result_type.__name__,
-                operation_id=operation_state["operation_id"],
-                **response_summary,
-            )
-            usage_summary = self._record_pydantic_usage(result, prompt_type=f"{prompt_type}:{self.__class__.__name__}")
-            run_telemetry["usage"] = usage_summary or {}
-            tool_efficiency = self._build_tool_efficiency_summary(run_telemetry)
-            if tool_efficiency:
-                run_telemetry["tool_efficiency"] = tool_efficiency
-                self.logger.info(
-                    "pydantic_agent_tool_efficiency",
-                    result_type=result_type.__name__,
-                    **tool_efficiency,
-                )
-            if pre_retry_telemetry:
-                combined_telemetry = dict(pre_retry_telemetry)
-                for key in [
-                    "event_tool_calls",
-                    "event_tool_calls_started",
-                    "event_tool_calls_completed",
-                    "event_tool_effective_calls",
-                    "event_skill_tool_calls",
-                    "event_skill_discovery_tool_calls",
-                    "event_skill_bootstrap_tool_calls",
-                    "event_skill_execution_tool_calls",
-                    "event_skill_execution_effective_tool_calls",
-                    "event_skill_effective_tool_calls",
-                ]:
-                    combined_telemetry[key] = int(combined_telemetry.get(key, 0) or 0) + int(
-                        run_telemetry.get(key, 0) or 0
-                    )
-                for dict_key in [
-                    "event_tool_count_by_name",
-                    "event_tool_effective_count_by_name",
-                    "event_skill_tool_count_by_name",
-                ]:
-                    combined_telemetry[dict_key] = dict(combined_telemetry.get(dict_key, {}))
-                    for k, v in (run_telemetry.get(dict_key) or {}).items():
-                        combined_telemetry[dict_key][k] = int(combined_telemetry[dict_key].get(k, 0) or 0) + int(v or 0)
-                for list_key in ["event_tool_names", "event_tool_signatures"]:
-                    combined_telemetry[list_key] = list(combined_telemetry.get(list_key, [])) + list(
-                        run_telemetry.get(list_key) or []
-                    )
-                merged_turn_effectiveness: dict[str, dict[str, Any]] = {}
-                merged_turn_index = 0
-                for source in (pre_retry_telemetry, run_telemetry):
-                    turn_payloads = source.get("event_turn_effectiveness")
-                    if not isinstance(turn_payloads, dict):
-                        continue
-                    for _, payload in sorted(
-                        (
-                            (int(str(turn_key)), value)
-                            for turn_key, value in turn_payloads.items()
-                            if str(turn_key).strip().isdigit() and isinstance(value, dict)
-                        ),
-                        key=lambda item: item[0],
-                    ):
-                        merged_turn_index += 1
-                        merged_turn_effectiveness[str(merged_turn_index)] = dict(payload)
-                if merged_turn_effectiveness:
-                    combined_telemetry["event_turn_effectiveness"] = merged_turn_effectiveness
-                    combined_telemetry["event_turn_index"] = merged_turn_index
-                if isinstance(run_telemetry.get("skill_policy_diagnostics"), dict):
-                    combined_telemetry["skill_policy_diagnostics"] = dict(run_telemetry["skill_policy_diagnostics"])
-                combined_telemetry = self._merge_failover_telemetry(combined_telemetry, run_telemetry)
-                quality_summary = self._build_response_quality_summary(output, combined_telemetry)
-            else:
-                combined_telemetry = dict(run_telemetry)
-                quality_summary = self._build_response_quality_summary(output, run_telemetry)
-            quality_log = self.logger.info if bool(quality_summary.get("quality_ok", True)) else self.logger.warning
-            quality_log(
-                "pydantic_agent_response_quality",
-                result_type=result_type.__name__,
-                **quality_summary,
-            )
-            combined_telemetry.pop("_tool_call_started_at", None)
-            combined_telemetry.pop("_tool_call_turn_index", None)
-            self._last_agent_run_telemetry = combined_telemetry
-            self._log_trace_payload(
-                event_name="pydantic_agent_trace_response",
-                elapsed_sec=elapsed,
-                payload={
-                    "operation_id": operation_state["operation_id"],
-                    "result_type": result_type.__name__,
-                    "response_summary": response_summary,
-                    "response_quality": quality_summary,
-                    "usage": usage_summary or {},
-                    "tool_efficiency": tool_efficiency,
-                    "skill_policy_diagnostics": combined_telemetry.get("skill_policy_diagnostics"),
-                    "output": output,
-                },
-            )
-            operation_state["operation_status"] = "completed"
-            operation_state["operation_outcome"] = "success"
-            return output  # type: ignore[return-value]
-    def _resolve_agent_retry_policy(self) -> tuple[int, int]:
-        """Resolve model and output retry policy for PydanticAI runs.
-        Uses runtime settings by default. For Anthropic structured output,
-        enforce a small minimum output retry floor to absorb transient shape mismatches.
-        """
-        configured_retries = int(getattr(self.settings, "agent_retries", 2) or 0)
-        configured_output_retries = int(getattr(self.settings, "output_retries", 2) or 0)
-        retries = max(configured_retries, 0)
-        output_retries = max(configured_output_retries, 0)
-        if self._should_suppress_anthropic_tool_choice():
-            # Anthropic-compatible proxy gateways are more likely to emit
-            # transient malformed tool-call JSON. Keep a small retry floor to
-            # allow tool-call correction without manual reruns.
-            retries = max(retries, 3)
-            output_retries = max(output_retries, 2)
-        if self.settings.protocol == LLMProtocolType.ANTHROPIC and str(self.settings.model_standard).lower().startswith(
-            "claude"
-        ):
-            # Anthropic-compatible proxies can occasionally append trailing
-            # characters after otherwise-valid structured output; keep a slightly
-            # higher validation retry floor to improve first-pass success.
-            output_retries = max(output_retries, 4)
-        return retries, output_retries
-    def _build_event_stream_handler(self, run_started_at: float, telemetry: dict[str, Any] | None = None):
-        """Build realtime PydanticAI stream handler for observability."""
-        async def _event_stream_handler(_run_context: Any, event_stream: Any) -> None:
-            async for event in event_stream:
-                self._log_pydantic_agent_event(event=event, run_started_at=run_started_at, telemetry=telemetry)
-        return _event_stream_handler
-    @staticmethod
-    def _should_retry_without_event_stream(exc: Exception) -> bool:
-        """Return True when event streaming is unsupported for the active test/runtime model."""
-        message = str(exc).lower()
-        return (
-            "stream_function" in message
-            or "event_stream_handler" in message
-            # Anthropic-compatible local proxies can emit tool-call deltas that
-            # collide with ThinkingPart accumulation under event stream mode.
-            # Retrying once without event_stream_handler avoids this parser path.
-            or "cannot apply a tool call delta to existing_part=thinkingpart" in message
-        )
-    @staticmethod
-    def _is_rate_limit_error(exc: Exception) -> bool:
-        """Return True when provider failed with a retryable transient/quota class."""
-        classification = ProviderFailureClassifier.classify(exc)
-        return classification.failure_class in {
-            ProviderFailureClass.RETRYABLE_TRANSIENT,
-            ProviderFailureClass.QUOTA_OR_CAPACITY,
-            ProviderFailureClass.PROVIDER_DEGRADED,
-        }
-    @staticmethod
-    def _is_auth_error(exc: Exception) -> bool:
-        """Return True when provider rejected request due to auth/token failure."""
-        return ProviderFailureClassifier.classify(exc).failure_class == ProviderFailureClass.TERMINAL_AUTH
-    @staticmethod
-    def _is_tool_call_argument_error(exc: Exception) -> bool:
-        """Return True when model output contains malformed/missing tool args JSON."""
-        classification = ProviderFailureClassifier.classify(exc)
-        raw_message = str(classification.raw_message or "")
-        normalized = raw_message.lower()
-        has_tool_call_signal = (
-            "toolretryerror" in normalized
-            or "tool call" in normalized
-            or "tool_call" in normalized
-            or "tool '" in normalized
-            or "include your response in a tool call" in normalized
-            or "return text or include your response in a tool call" in normalized
-        )
-        has_json_shape_signal = (
-            "jsondecodeerror" in normalized
-            or "eof while parsing an object" in normalized
-            or "expecting value" in normalized
-            or "unterminated string" in normalized
-        )
-        return (
-            classification.classification_reason == "tool_call_argument_or_validation_error"
-            or classification.failure_class == ProviderFailureClass.NON_PROVIDER_BUG
-        ) and (has_tool_call_signal or has_json_shape_signal)
-    @staticmethod
-    def _is_tool_choice_protocol_mismatch(exc: Exception) -> bool:
-        """Return True when upstream rejects tool_choice/function payload shape."""
-        message = str(exc).lower()
-        cause = getattr(exc, "__cause__", None)
-        cause_message = str(cause).lower() if cause is not None else ""
-        combined = f"{message}\n{cause_message}"
-        has_tool_choice = "tool_choice" in combined
-        invalid_function = (
-            "invalid value for `function`" in combined
-            or "invalid value for function" in combined
-            or '"function": null' in combined
-        )
-        return has_tool_choice and invalid_function
-    @staticmethod
-    def _percentile(values: list[float], quantile: float) -> float:
-        if not values:
-            return 0.0
-        sorted_vals = sorted(float(v) for v in values)
-        if len(sorted_vals) == 1:
-            return sorted_vals[0]
-        q = min(1.0, max(0.0, float(quantile)))
-        idx = round((len(sorted_vals) - 1) * q)
-        return sorted_vals[idx]
-    @staticmethod
-    def _tool_family_tool_names() -> dict[str, tuple[str, ...]]:
-        return {
-            "lexical": (
-                "grep_search",
-                "rg_search",
-                "search_evidence",
-                "read_evidence_document",
-            ),
-            "structural": (
-                "ast_grep_search",
-                "get_definition",
-                "find_references",
-                "workspace_symbol",
-            ),
-            "vector_docs": ("search_evidence_vector",),
-            "vector_code": ("search_code_vector",),
-        }
-    @classmethod
-    def _resolve_tool_family(cls, tool_name: str) -> str | None:
-        normalized = str(tool_name or "").strip()
-        if not normalized:
-            return None
-        for family_name, tool_names in cls._tool_family_tool_names().items():
-            if normalized in tool_names:
-                return family_name
-        return None
-    @staticmethod
-    def _family_coverage_markers(family_name: str) -> dict[str, bool]:
-        if family_name == "vector_docs":
-            return {"requirements": True, "docs": True, "code": False}
-        if family_name == "vector_code":
-            return {"requirements": True, "docs": False, "code": True}
-        if family_name == "structural":
-            return {"requirements": True, "docs": False, "code": True}
-        if family_name == "lexical":
-            return {"requirements": True, "docs": False, "code": False}
-        return {"requirements": False, "docs": False, "code": False}
-    @staticmethod
-    def _coverage_contribution_level(markers: dict[str, bool], *, skill_effective_calls: int = 0) -> str:
-        requirements = bool(markers.get("requirements"))
-        docs = bool(markers.get("docs"))
-        code = bool(markers.get("code"))
-        if requirements and docs and code:
-            return "high"
-        if requirements and (docs or code):
-            return "medium"
-        if requirements or skill_effective_calls > 0:
-            return "low"
-        return "none"
-    @staticmethod
-    def _is_skill_tool_name(tool_name: str) -> bool:
-        normalized = str(tool_name or "").strip().lower()
-        return normalized in {"list_skills", "load_skill", "read_skill_resource", "run_skill_script"}
-    @classmethod
-    def _is_effective_skill_tool_payload(cls, *, tool_name: str, payload: Any) -> bool:
-        """Skill-specific usefulness guard to avoid discovery/read inflation."""
-        normalized = str(tool_name or "").strip().lower()
-        if normalized in {"list_skills", "load_skill"}:
-            return False
-        if normalized == "run_skill_script":
-            if not isinstance(payload, dict):
-                return False
-            success_value = payload.get("success")
-            stdout_value = payload.get("stdout")
-            if isinstance(success_value, bool) and isinstance(stdout_value, str):
-                return bool(success_value and stdout_value.strip())
-            explicit_effective = payload.get("effective")
-            return bool(explicit_effective) if isinstance(explicit_effective, bool) else False
-        if normalized == "read_skill_resource":
-            if not isinstance(payload, dict):
-                return False
-            if bool(payload.get("benefit_signal")) or bool(payload.get("applied")):
-                return True
-            evidence_refs = payload.get("evidence_refs")
-            if isinstance(evidence_refs, list):
-                return any(str(item).strip() for item in evidence_refs)
-            return False
-        return False
-    @classmethod
-    def _is_effective_tool_payload(cls, payload: Any) -> bool:
-        """Best-effort usefulness check for tool result payloads."""
-        if payload is None:
-            return False
-        if isinstance(payload, bool):
-            return payload
-        if isinstance(payload, (int, float)):
-            return payload > 0
-        if isinstance(payload, str):
-            text = payload.strip()
-            if not text:
-                return False
-            if text[0] in "{[":
-                try:
-                    parsed = json.loads(text)
-                except Exception:
-                    return True
-                return cls._is_effective_tool_payload(parsed)
-            return True
-        if isinstance(payload, list):
-            if not payload:
-                return False
-            if all(isinstance(item, str) for item in payload):
-                return any(str(item).strip() for item in payload)
-            return True
-        if isinstance(payload, dict):
-            success_value = payload.get("success")
-            if isinstance(success_value, bool) and not success_value:
-                # Explicit failures with no recovery signal should not be counted as effective.
-                if not any(
-                    key in payload
-                    for key in (
-                        "hits",
-                        "refs",
-                        "accepted_count",
-                        "content",
-                        "ref",
-                        "path",
-                        "results",
-                        "result",
-                    )
-                ):
-                    return False
-            hits = payload.get("hits")
-            if isinstance(hits, list):
-                return len(hits) > 0
-            refs = payload.get("refs")
-            if isinstance(refs, list):
-                return len(refs) > 0
-            accepted_count = payload.get("accepted_count")
-            if isinstance(accepted_count, int):
-                return accepted_count > 0
-            content = payload.get("content")
-            if isinstance(content, str):
-                return bool(content.strip())
-            results = payload.get("results")
-            if isinstance(results, list):
-                return len(results) > 0
-            result_obj = payload.get("result")
-            if result_obj is not None:
-                return cls._is_effective_tool_payload(result_obj)
-            ref_value = payload.get("ref")
-            if isinstance(ref_value, str):
-                return bool(ref_value.strip())
-            found_value = payload.get("found")
-            if isinstance(found_value, bool):
-                return found_value
-            path_value = payload.get("path")
-            if isinstance(path_value, str) and path_value.strip():
-                return True
-            error_value = payload.get("error")
-            if isinstance(error_value, str) and error_value.strip():
-                return False
-            if isinstance(success_value, bool):
-                return success_value
-            return bool(payload)
-        return bool(payload)
-    @classmethod
-    def _is_effective_tool_result_event(cls, result: Any, *, tool_name: str) -> bool:
-        """Determine whether a tool-result event produced useful output."""
-        normalized_tool_name = str(tool_name or "").strip().lower()
-        # Internal final_result tool events should not contribute to tool usefulness metrics.
-        if normalized_tool_name == "final_result":
-            return False
-        if cls._is_skill_tool_name(normalized_tool_name):
-            for attr in ("output", "content", "result", "data"):
-                if not hasattr(result, attr):
-                    continue
-                value = getattr(result, attr, None)
-                if value is None:
-                    continue
-                return cls._is_effective_skill_tool_payload(tool_name=normalized_tool_name, payload=value)
-            return False
-        for attr in ("output", "content", "result", "data"):
-            if not hasattr(result, attr):
-                continue
-            value = getattr(result, attr, None)
-            if value is None:
-                continue
-            if cls._is_effective_tool_payload(value):
-                return True
-        success_attr = getattr(result, "success", None)
-        if isinstance(success_attr, bool):
-            return success_attr
-        return False
-    @classmethod
-    def _build_tool_efficiency_summary(cls, telemetry: dict[str, Any] | None) -> dict[str, Any]:
-        """Build compact tool/skill efficiency metrics for monitoring."""
-        if not isinstance(telemetry, dict):
-            return {}
-        usage = telemetry.get("usage")
-        usage_tool_calls = int(usage.get("tool_calls", 0) or 0) if isinstance(usage, dict) else 0
-        started = int(telemetry.get("event_tool_calls_started", 0) or 0)
-        completed = int(telemetry.get("event_tool_calls_completed", telemetry.get("event_tool_calls", 0)) or 0)
-        effective_calls = int(telemetry.get("event_tool_effective_calls", 0) or 0)
-        effective_completed = max(completed, usage_tool_calls)
-        if started <= 0 and effective_completed <= 0:
-            return {}
-        if effective_calls <= 0 and "event_tool_effective_calls" not in telemetry and effective_completed > 0:
-            # Backward-compatible fallback for historical telemetry that only tracked completion.
-            effective_calls = effective_completed
-        tool_count_by_name = telemetry.get("event_tool_count_by_name")
-        if not isinstance(tool_count_by_name, dict):
-            tool_count_by_name = {}
-        unique_tools_by_name = (
-            len(tool_count_by_name) if tool_count_by_name else len(set(telemetry.get("event_tool_names") or []))
-        )
-        tool_signatures_raw = telemetry.get("event_tool_signatures")
-        unique_tools_by_signature = (
-            len({str(sig) for sig in tool_signatures_raw})
-            if isinstance(tool_signatures_raw, list) and tool_signatures_raw
-            else 0
-        )
-        unique_tools = unique_tools_by_signature if unique_tools_by_signature > 0 else unique_tools_by_name
-        repeated_calls = max(0, effective_completed - unique_tools)
-        completion_rate = (effective_completed / started) if started > 0 else 1.0
-        skill_count_by_name = telemetry.get("event_skill_tool_count_by_name")
-        if not isinstance(skill_count_by_name, dict):
-            skill_count_by_name = {}
-        if not skill_count_by_name and isinstance(tool_count_by_name, dict):
-            # Fallback: recover skill-class counters from generic tool names when
-            # event-stream skill fields were not populated.
-            recovered_skill_counts = {
-                skill_tool: int(tool_count_by_name.get(skill_tool, 0) or 0)
-                for skill_tool in ("list_skills", "load_skill", "read_skill_resource", "run_skill_script")
-                if int(tool_count_by_name.get(skill_tool, 0) or 0) > 0
-            }
-            if recovered_skill_counts:
-                skill_count_by_name = recovered_skill_counts
-        skill_discovery_calls = int(
-            telemetry.get("event_skill_discovery_tool_calls", skill_count_by_name.get("list_skills", 0)) or 0
-        )
-        skill_bootstrap_calls = int(
-            telemetry.get("event_skill_bootstrap_tool_calls", skill_count_by_name.get("load_skill", 0)) or 0
-        )
-        derived_skill_execution_calls = int(skill_count_by_name.get("read_skill_resource", 0) or 0) + int(
-            skill_count_by_name.get("run_skill_script", 0) or 0
-        )
-        if "event_skill_execution_tool_calls" in telemetry:
-            skill_execution_calls = int(telemetry.get("event_skill_execution_tool_calls", 0) or 0)
-        else:
-            # FR-123: execution effectiveness is derived from execution-class skill tools only.
-            skill_execution_calls = derived_skill_execution_calls
-        legacy_effective_calls = int(telemetry.get("event_skill_effective_tool_calls", 0) or 0)
-        skill_total = int(telemetry.get("event_skill_tool_calls", 0) or 0)
-        if skill_total <= 0:
-            skill_total = skill_discovery_calls + skill_bootstrap_calls + skill_execution_calls
-        if skill_total <= 0 and legacy_effective_calls > 0:
-            # Preserve visibility for legacy telemetry payloads without over-crediting execution usage.
-            skill_total = legacy_effective_calls
-        tool_effective_count_by_name = telemetry.get("event_tool_effective_count_by_name")
-        derived_skill_execution_effective_calls = 0
-        if isinstance(tool_effective_count_by_name, dict):
-            derived_skill_execution_effective_calls = int(
-                tool_effective_count_by_name.get("read_skill_resource", 0) or 0
-            ) + int(tool_effective_count_by_name.get("run_skill_script", 0) or 0)
-        if "event_skill_execution_effective_tool_calls" in telemetry:
-            skill_execution_effective_calls = int(telemetry.get("event_skill_execution_effective_tool_calls", 0) or 0)
-        else:
-            skill_execution_effective_calls = derived_skill_execution_effective_calls
-        skill_execution_effective_calls = max(0, min(skill_execution_effective_calls, skill_execution_calls))
-        skill_effective = skill_execution_effective_calls
-        skill_execution_rate = (skill_execution_calls / skill_total) if skill_total > 0 else None
-        skill_effective_rate = (skill_effective / skill_total) if skill_total > 0 else None
-        skill_execution_effective_rate = (
-            (skill_execution_effective_calls / skill_execution_calls) if skill_execution_calls > 0 else None
-        )
-        skill_bootstrap_only_calls = skill_bootstrap_calls if (skill_execution_calls == 0 and skill_total > 0) else 0
-        skill_bootstrap_only_rate = (skill_bootstrap_only_calls / skill_total) if skill_total > 0 else None
-        skill_discovery_ratio = (skill_discovery_calls / skill_total) if skill_total > 0 else None
-        latencies_raw = telemetry.get("event_tool_latencies_ms")
-        latency_values = (
-            [float(v) for v in latencies_raw if isinstance(v, (int, float))] if isinstance(latencies_raw, list) else []
-        )
-        avg_latency_ms = (sum(latency_values) / len(latency_values)) if latency_values else None
-        p95_latency_ms = cls._percentile(latency_values, 0.95) if latency_values else None
-        summary: dict[str, Any] = {
-            "tool_calls_started": started,
-            "tool_calls_completed": effective_completed,
-            "tool_calls_effective": effective_calls,
-            "tool_calls_unique": unique_tools,
-            "tool_calls_repeated": repeated_calls,
-            "tool_completion_rate": round(completion_rate, 3),
-        }
-        if effective_completed > 0:
-            summary["tool_effective_rate"] = round(effective_calls / effective_completed, 3)
-        if unique_tools_by_signature > 0:
-            summary["tool_name_unique"] = unique_tools_by_name
-            summary["tool_signature_unique"] = unique_tools_by_signature
-            summary["tool_repetition_basis"] = "signature"
-        if effective_completed > 0:
-            summary["tool_repetition_rate"] = round(repeated_calls / effective_completed, 3)
-        summary["skill_calls_total"] = skill_total
-        summary["skill_calls_discovery"] = skill_discovery_calls
-        summary["skill_calls_bootstrap"] = skill_bootstrap_calls
-        summary["skill_calls_execution"] = skill_execution_calls
-        summary["skill_calls_execution_effective"] = skill_execution_effective_calls
-        summary["skill_calls_bootstrap_only"] = skill_bootstrap_only_calls
-        summary["skill_calls_effective"] = skill_effective
-        if legacy_effective_calls > 0 and legacy_effective_calls != skill_effective:
-            summary["legacy_skill_effective_calls_observed"] = legacy_effective_calls
-        if skill_execution_rate is not None:
-            summary["skill_execution_rate"] = round(skill_execution_rate, 3)
-        if skill_execution_effective_rate is not None:
-            summary["skill_execution_effective_rate"] = round(skill_execution_effective_rate, 3)
-        if skill_effective_rate is not None:
-            summary["skill_effective_rate"] = round(skill_effective_rate, 3)
-        if skill_bootstrap_only_rate is not None:
-            summary["skill_bootstrap_only_rate"] = round(skill_bootstrap_only_rate, 3)
-        if skill_discovery_ratio is not None:
-            summary["skill_discovery_ratio"] = round(skill_discovery_ratio, 3)
-        if avg_latency_ms is not None:
-            summary["tool_latency_avg_ms"] = round(avg_latency_ms)
-        if p95_latency_ms is not None:
-            summary["tool_latency_p95_ms"] = round(p95_latency_ms)
-        family_tool_names = cls._tool_family_tool_names()
-        family_calls: dict[str, int] = {}
-        family_effectiveness: dict[str, float] = {}
-        for family_name, tool_names in family_tool_names.items():
-            calls = sum(int(tool_count_by_name.get(tool_name, 0) or 0) for tool_name in tool_names)
-            family_calls[family_name] = calls
-            if effective_completed > 0:
-                family_effectiveness[family_name] = round(calls / effective_completed, 3)
-            else:
-                family_effectiveness[family_name] = 0.0
-        summary["tool_family_calls"] = family_calls
-        summary["tool_family_effectiveness"] = family_effectiveness
-        turn_effectiveness_raw = telemetry.get("event_turn_effectiveness")
-        if isinstance(turn_effectiveness_raw, dict) and turn_effectiveness_raw:
-            turn_entries: list[dict[str, Any]] = []
-            turn_effective_count = 0
-            coverage_turn_count = 0
-            aggregate_markers = {"requirements": False, "docs": False, "code": False}
-            ordered_turn_items = sorted(
-                (
-                    (int(str(turn_key)), payload)
-                    for turn_key, payload in turn_effectiveness_raw.items()
-                    if str(turn_key).strip().isdigit() and isinstance(payload, dict)
-                ),
-                key=lambda item: item[0],
-            )
-            for turn_index, payload in ordered_turn_items:
-                turn_tool_calls = int(payload.get("tool_calls", 0) or 0)
-                turn_effective_tool_calls = int(payload.get("effective_tool_calls", turn_tool_calls) or 0)
-                turn_skill_discovery_calls = int(payload.get("skill_discovery_calls", 0) or 0)
-                turn_skill_bootstrap_calls = int(payload.get("skill_bootstrap_calls", 0) or 0)
-                turn_skill_execution_calls = int(
-                    payload.get("skill_execution_calls", payload.get("skill_effective_calls", 0)) or 0
-                )
-                turn_skill_execution_effective_calls = int(
-                    payload.get("skill_execution_effective_calls", payload.get("skill_effective_calls", 0)) or 0
-                )
-                turn_skill_execution_effective_calls = max(
-                    0,
-                    min(turn_skill_execution_effective_calls, turn_skill_execution_calls),
-                )
-                turn_skill_calls = int(payload.get("skill_calls", 0) or 0)
-                if turn_skill_calls <= 0:
-                    turn_skill_calls = (
-                        turn_skill_discovery_calls + turn_skill_bootstrap_calls + turn_skill_execution_calls
-                    )
-                turn_skill_execution_rate = (
-                    round(turn_skill_execution_calls / turn_skill_calls, 3)
-                    if turn_skill_calls > 0
-                    else (1.0 if turn_skill_execution_calls > 0 else 0.0)
-                )
-                turn_skill_effective_calls = turn_skill_execution_effective_calls
-                turn_skill_effective_rate = (
-                    round(turn_skill_effective_calls / turn_skill_calls, 3)
-                    if turn_skill_calls > 0
-                    else (1.0 if turn_skill_effective_calls > 0 else 0.0)
-                )
-                turn_skill_execution_effective_rate = (
-                    round(turn_skill_execution_effective_calls / turn_skill_execution_calls, 3)
-                    if turn_skill_execution_calls > 0
-                    else 0.0
-                )
-                turn_skill_bootstrap_only_calls = (
-                    turn_skill_bootstrap_calls if (turn_skill_effective_calls == 0 and turn_skill_calls > 0) else 0
-                )
-                turn_skill_bootstrap_only_rate = (
-                    round(turn_skill_bootstrap_only_calls / turn_skill_calls, 3) if turn_skill_calls > 0 else 0.0
-                )
-                turn_family_calls_raw = payload.get("tool_family_effective_calls")
-                if not isinstance(turn_family_calls_raw, dict):
-                    turn_family_calls_raw = payload.get("tool_family_calls")
-                turn_family_calls = {
-                    family_name: int((turn_family_calls_raw or {}).get(family_name, 0) or 0)
-                    for family_name in family_tool_names
-                }
-                turn_effective_denominator = (
-                    turn_effective_tool_calls if turn_effective_tool_calls > 0 else turn_tool_calls
-                )
-                turn_family_effectiveness = {
-                    family_name: round((calls / turn_effective_denominator), 3)
-                    if turn_effective_denominator > 0
-                    else 0.0
-                    for family_name, calls in turn_family_calls.items()
-                }
-                raw_markers = payload.get("coverage_contribution_markers")
-                if isinstance(raw_markers, dict):
-                    coverage_markers = {
-                        "requirements": bool(raw_markers.get("requirements")),
-                        "docs": bool(raw_markers.get("docs")),
-                        "code": bool(raw_markers.get("code")),
-                    }
-                else:
-                    coverage_markers = {"requirements": False, "docs": False, "code": False}
-                    for family_name, calls in turn_family_calls.items():
-                        if calls <= 0:
-                            continue
-                        family_markers = cls._family_coverage_markers(family_name)
-                        for marker_key in coverage_markers:
-                            coverage_markers[marker_key] = bool(
-                                coverage_markers[marker_key] or family_markers[marker_key]
-                            )
-                contribution_level = cls._coverage_contribution_level(
-                    coverage_markers,
-                    skill_effective_calls=turn_skill_effective_calls,
-                )
-                if turn_effective_tool_calls > 0 and (
-                    turn_skill_execution_effective_calls > 0 or any(count > 0 for count in turn_family_calls.values())
-                ):
-                    turn_effective_count += 1
-                if contribution_level != "none":
-                    coverage_turn_count += 1
-                for marker_key in aggregate_markers:
-                    aggregate_markers[marker_key] = aggregate_markers[marker_key] or coverage_markers[marker_key]
-                turn_entries.append(
-                    {
-                        "turn_index": turn_index,
-                        "tool_calls": turn_tool_calls,
-                        "effective_tool_calls": turn_effective_tool_calls,
-                        "skill_calls": turn_skill_calls,
-                        "skill_discovery_calls": turn_skill_discovery_calls,
-                        "skill_bootstrap_calls": turn_skill_bootstrap_calls,
-                        "skill_execution_calls": turn_skill_execution_calls,
-                        "skill_execution_effective_calls": turn_skill_execution_effective_calls,
-                        "skill_execution_rate": turn_skill_execution_rate,
-                        "skill_execution_effective_rate": turn_skill_execution_effective_rate,
-                        "skill_bootstrap_only_calls": turn_skill_bootstrap_only_calls,
-                        "skill_bootstrap_only_rate": turn_skill_bootstrap_only_rate,
-                        "skill_effective_calls": turn_skill_effective_calls,
-                        "skill_effective_rate": turn_skill_effective_rate,
-                        "tool_family_calls": turn_family_calls,
-                        "tool_family_effectiveness": turn_family_effectiveness,
-                        "coverage_contribution_markers": coverage_markers,
-                        "requirement_coverage_contribution_level": contribution_level,
-                    }
-                )
-            if turn_entries:
-                turn_count = len(turn_entries)
-                summary["turn_effectiveness_summary"] = turn_entries
-                summary["turn_effective_rate"] = round(turn_effective_count / turn_count, 3)
-                summary["turn_requirement_coverage_contribution_rate"] = round(coverage_turn_count / turn_count, 3)
-                summary["turn_requirement_coverage_contribution_markers"] = aggregate_markers
-        return summary
-    @staticmethod
-    def _extract_tool_names(raw_name: Any) -> list[str]:
-        value = str(raw_name or "").strip()
-        if not value:
-            return []
-        normalized = value.replace("/", " ").replace(",", " ").replace(";", " ")
-        candidates = [part.strip() for part in re.split(r"\s+", normalized) if part.strip()]
-        known = (
-            "list_directory",
-            "read_file",
-            "grep_search",
-            "rg_search",
-            "ast_grep_search",
-            "list_evidence_documents",
-            "search_evidence",
-            "read_evidence_document",
-            "search_evidence_vector",
-            "search_code_vector",
-            "vector_retrieval_status",
-            "get_definition",
-            "find_references",
-            "workspace_symbol",
-            "list_skills",
-            "load_skill",
-            "read_skill_resource",
-            "run_skill_script",
-        )
-        if len(candidates) == 1 and candidates[0] == value:
-            known_sorted = sorted(known, key=len, reverse=True)
-            matched: list[str] = []
-            cursor = 0
-            while cursor < len(value):
-                token = next((name for name in known_sorted if value.startswith(name, cursor)), None)
-                if token is None:
-                    matched = []
-                    break
-                matched.append(token)
-                cursor += len(token)
-            if matched and cursor == len(value):
-                return matched
-        return candidates
-    def _log_pydantic_agent_event(
-        self,
-        *,
-        event: Any,
-        run_started_at: float,
-        telemetry: dict[str, Any] | None = None,
-    ) -> None:
-        """Emit structured logs for PydanticAI runtime events."""
-        import time
-        event_name = type(event).__name__
-        elapsed = round(max(0.0, time.monotonic() - run_started_at), 2)
-        if isinstance(telemetry, dict):
-            telemetry["event_last_seen_at"] = time.monotonic()
-            telemetry.setdefault("event_tool_effective_calls", 0)
-            telemetry.setdefault("event_skill_execution_effective_tool_calls", 0)
-            effective_count_by_name = telemetry.get("event_tool_effective_count_by_name")
-            if not isinstance(effective_count_by_name, dict):
-                telemetry["event_tool_effective_count_by_name"] = {}
-        if event_name == "FunctionToolCallEvent":
-            part = getattr(event, "part", None)
-            raw_tool_name = getattr(part, "tool_name", None) or getattr(part, "name", None) or "unknown"
-            tool_names = self._extract_tool_names(raw_tool_name)
-            primary_tool_name = tool_names[0] if tool_names else str(raw_tool_name)
-            tool_call_id = getattr(part, "tool_call_id", None)
-            raw_args = getattr(part, "args", None)
-            args_repr = ""
-            if raw_args is not None:
-                try:
-                    args_repr = json.dumps(raw_args, sort_keys=True, default=str, ensure_ascii=True)
-                except Exception:
-                    args_repr = str(raw_args)
-            self._log_trace_payload(
-                event_name="pydantic_agent_trace_tool_call",
-                elapsed_sec=elapsed,
-                payload={
-                    "tool_name": primary_tool_name,
-                    "tool_names": tool_names,
-                    "tool_call_id": tool_call_id,
-                    "args": raw_args,
-                },
-            )
-            if isinstance(telemetry, dict):
-                turn_index = int(telemetry.get("event_turn_index", 0) or 0) + 1
-                telemetry["event_turn_index"] = turn_index
-                telemetry["event_tool_calls_started"] = int(telemetry.get("event_tool_calls_started", 0) or 0) + 1
-                telemetry["event_last_tool_activity_at"] = time.monotonic()
-                signatures = telemetry.get("event_tool_signatures")
-                if not isinstance(signatures, list):
-                    signatures = []
-                signatures.append(f"{primary_tool_name}|{args_repr}")
-                telemetry["event_tool_signatures"] = signatures
-                if tool_call_id:
-                    started_at_map = telemetry.get("_tool_call_started_at")
-                    if not isinstance(started_at_map, dict):
-                        started_at_map = {}
-                    started_at_map[str(tool_call_id)] = time.monotonic()
-                    telemetry["_tool_call_started_at"] = started_at_map
-                    turn_map = telemetry.get("_tool_call_turn_index")
-                    if not isinstance(turn_map, dict):
-                        turn_map = {}
-                    turn_map[str(tool_call_id)] = turn_index
-                    telemetry["_tool_call_turn_index"] = turn_map
-            logged_turn_index = int(telemetry.get("event_turn_index", 0) or 0) if isinstance(telemetry, dict) else None
-            self.logger.info(
-                "pydantic_agent_tool_call_started",
-                elapsed_sec=elapsed,
-                tool_name=primary_tool_name,
-                tool_names=tool_names if len(tool_names) > 1 else None,
-                tool_call_id=tool_call_id,
-                turn_index=logged_turn_index,
-            )
-            return
-        if event_name == "FunctionToolResultEvent":
-            result = getattr(event, "result", None)
-            raw_tool_name = getattr(result, "tool_name", None) or getattr(result, "name", None) or "unknown"
-            tool_names = self._extract_tool_names(raw_tool_name)
-            primary_tool_name = tool_names[0] if tool_names else str(raw_tool_name)
-            effective = self._is_effective_tool_result_event(result, tool_name=primary_tool_name)
-            tool_call_id = getattr(result, "tool_call_id", None)
-            turn_index = None
-            if isinstance(telemetry, dict):
-                turn_map = telemetry.get("_tool_call_turn_index")
-                if isinstance(turn_map, dict) and tool_call_id:
-                    raw_turn = turn_map.pop(str(tool_call_id), None)
-                    if isinstance(raw_turn, int):
-                        turn_index = raw_turn
-                    telemetry["_tool_call_turn_index"] = turn_map
-            self.logger.info(
-                "pydantic_agent_tool_call_completed",
-                elapsed_sec=elapsed,
-                tool_name=primary_tool_name,
-                tool_names=tool_names if len(tool_names) > 1 else None,
-                tool_call_id=tool_call_id,
-                turn_index=turn_index,
-                effective=effective,
-            )
-            self._log_trace_payload(
-                event_name="pydantic_agent_trace_tool_result",
-                elapsed_sec=elapsed,
-                payload={
-                    "tool_name": primary_tool_name,
-                    "tool_names": tool_names,
-                    "tool_call_id": tool_call_id,
-                    "result": {
-                        "name": getattr(result, "name", None),
-                        "tool_name": getattr(result, "tool_name", None),
-                        "content": getattr(result, "content", None),
-                        "output": getattr(result, "output", None),
-                    },
-                },
-            )
-            if isinstance(telemetry, dict):
-                telemetry["event_tool_calls"] = int(telemetry.get("event_tool_calls", 0) or 0) + 1
-                telemetry["event_tool_calls_completed"] = int(telemetry.get("event_tool_calls_completed", 0) or 0) + 1
-                telemetry["event_last_tool_activity_at"] = time.monotonic()
-                if effective:
-                    telemetry["event_tool_effective_calls"] = (
-                        int(telemetry.get("event_tool_effective_calls", 0) or 0) + 1
-                    )
-                seen_tools = telemetry.get("event_tool_names")
-                if not isinstance(seen_tools, list):
-                    seen_tools = []
-                if primary_tool_name and primary_tool_name not in seen_tools:
-                    seen_tools.append(primary_tool_name)
-                telemetry["event_tool_names"] = seen_tools
-                if primary_tool_name == "final_result":
-                    result_content = getattr(result, "content", None)
-                    if isinstance(result_content, list):
-                        for item in result_content:
-                            if not isinstance(item, dict):
-                                continue
-                            if str(item.get("type") or "") != "json_invalid":
-                                continue
-                            raw_invalid_input = item.get("input")
-                            if isinstance(raw_invalid_input, str) and raw_invalid_input.strip():
-                                telemetry["event_final_result_invalid_input"] = raw_invalid_input
-                                break
-                tool_count_by_name = telemetry.get("event_tool_count_by_name")
-                if not isinstance(tool_count_by_name, dict):
-                    tool_count_by_name = {}
-                tool_count_by_name[primary_tool_name] = int(tool_count_by_name.get(primary_tool_name, 0) or 0) + 1
-                telemetry["event_tool_count_by_name"] = tool_count_by_name
-                if effective:
-                    effective_count_by_name = telemetry.get("event_tool_effective_count_by_name")
-                    if not isinstance(effective_count_by_name, dict):
-                        effective_count_by_name = {}
-                    effective_count_by_name[primary_tool_name] = (
-                        int(effective_count_by_name.get(primary_tool_name, 0) or 0) + 1
-                    )
-                    telemetry["event_tool_effective_count_by_name"] = effective_count_by_name
-                started_at_map = telemetry.get("_tool_call_started_at")
-                if isinstance(started_at_map, dict) and tool_call_id:
-                    started_at = started_at_map.pop(str(tool_call_id), None)
-                    if isinstance(started_at, (int, float)):
-                        latencies = telemetry.get("event_tool_latencies_ms")
-                        if not isinstance(latencies, list):
-                            latencies = []
-                        latencies.append(int(max(0.0, (time.monotonic() - float(started_at)) * 1000)))
-                        telemetry["event_tool_latencies_ms"] = latencies
-                    telemetry["_tool_call_started_at"] = started_at_map
-                skill_discovery_tools = {"list_skills"}
-                skill_bootstrap_tools = {"load_skill"}
-                skill_execution_tools = {"read_skill_resource", "run_skill_script"}
-                skill_tools = skill_discovery_tools | skill_bootstrap_tools | skill_execution_tools
-                if primary_tool_name in skill_tools:
-                    telemetry["event_skill_tool_calls"] = int(telemetry.get("event_skill_tool_calls", 0) or 0) + 1
-                    skill_count_by_name = telemetry.get("event_skill_tool_count_by_name")
-                    if not isinstance(skill_count_by_name, dict):
-                        skill_count_by_name = {}
-                    skill_count_by_name[primary_tool_name] = int(skill_count_by_name.get(primary_tool_name, 0) or 0) + 1
-                    telemetry["event_skill_tool_count_by_name"] = skill_count_by_name
-                if primary_tool_name in skill_discovery_tools:
-                    telemetry["event_skill_discovery_tool_calls"] = (
-                        int(telemetry.get("event_skill_discovery_tool_calls", 0) or 0) + 1
-                    )
-                if primary_tool_name in skill_bootstrap_tools:
-                    telemetry["event_skill_bootstrap_tool_calls"] = (
-                        int(telemetry.get("event_skill_bootstrap_tool_calls", 0) or 0) + 1
-                    )
-                if primary_tool_name in skill_execution_tools:
-                    telemetry["event_skill_execution_tool_calls"] = (
-                        int(telemetry.get("event_skill_execution_tool_calls", 0) or 0) + 1
-                    )
-                    if effective:
-                        telemetry["event_skill_execution_effective_tool_calls"] = (
-                            int(telemetry.get("event_skill_execution_effective_tool_calls", 0) or 0) + 1
-                        )
-                        # Legacy field retained for backward-compatible consumers.
-                        telemetry["event_skill_effective_tool_calls"] = (
-                            int(telemetry.get("event_skill_effective_tool_calls", 0) or 0) + 1
-                        )
-                if isinstance(turn_index, int) and turn_index > 0:
-                    turn_key = str(turn_index)
-                    turn_payloads = telemetry.get("event_turn_effectiveness")
-                    if not isinstance(turn_payloads, dict):
-                        turn_payloads = {}
-                    current_payload = turn_payloads.get(turn_key)
-                    if not isinstance(current_payload, dict):
-                        current_payload = {}
-                    current_payload["tool_calls"] = int(current_payload.get("tool_calls", 0) or 0) + 1
-                    if effective:
-                        current_payload["effective_tool_calls"] = (
-                            int(current_payload.get("effective_tool_calls", 0) or 0) + 1
-                        )
-                    if primary_tool_name in skill_tools:
-                        current_payload["skill_calls"] = int(current_payload.get("skill_calls", 0) or 0) + 1
-                    if primary_tool_name in skill_discovery_tools:
-                        current_payload["skill_discovery_calls"] = (
-                            int(current_payload.get("skill_discovery_calls", 0) or 0) + 1
-                        )
-                    if primary_tool_name in skill_bootstrap_tools:
-                        current_payload["skill_bootstrap_calls"] = (
-                            int(current_payload.get("skill_bootstrap_calls", 0) or 0) + 1
-                        )
-                    if primary_tool_name in skill_execution_tools:
-                        current_payload["skill_execution_calls"] = (
-                            int(current_payload.get("skill_execution_calls", 0) or 0) + 1
-                        )
-                        if effective:
-                            current_payload["skill_execution_effective_calls"] = (
-                                int(current_payload.get("skill_execution_effective_calls", 0) or 0) + 1
-                            )
-                            # Legacy per-turn field kept in sync with effective execution semantics.
-                            current_payload["skill_effective_calls"] = (
-                                int(current_payload.get("skill_effective_calls", 0) or 0) + 1
-                            )
-                    family_name = self._resolve_tool_family(primary_tool_name)
-                    family_calls_raw = current_payload.get("tool_family_calls")
-                    if not isinstance(family_calls_raw, dict):
-                        family_calls_raw = {}
-                    for family_key in self._tool_family_tool_names():
-                        family_calls_raw[family_key] = int(family_calls_raw.get(family_key, 0) or 0)
-                    if family_name:
-                        family_calls_raw[family_name] = int(family_calls_raw.get(family_name, 0) or 0) + 1
-                    current_payload["tool_family_calls"] = family_calls_raw
-                    family_effective_calls_raw = current_payload.get("tool_family_effective_calls")
-                    if not isinstance(family_effective_calls_raw, dict):
-                        family_effective_calls_raw = {}
-                    for family_key in self._tool_family_tool_names():
-                        family_effective_calls_raw[family_key] = int(family_effective_calls_raw.get(family_key, 0) or 0)
-                    if family_name and effective:
-                        family_effective_calls_raw[family_name] = (
-                            int(family_effective_calls_raw.get(family_name, 0) or 0) + 1
-                        )
-                    current_payload["tool_family_effective_calls"] = family_effective_calls_raw
-                    raw_markers = current_payload.get("coverage_contribution_markers")
-                    markers = {
-                        "requirements": bool((raw_markers or {}).get("requirements"))
-                        if isinstance(raw_markers, dict)
-                        else False,
-                        "docs": bool((raw_markers or {}).get("docs")) if isinstance(raw_markers, dict) else False,
-                        "code": bool((raw_markers or {}).get("code")) if isinstance(raw_markers, dict) else False,
-                    }
-                    if family_name and effective:
-                        family_markers = self._family_coverage_markers(family_name)
-                        for marker_key in markers:
-                            markers[marker_key] = bool(markers[marker_key] or family_markers[marker_key])
-                    current_payload["coverage_contribution_markers"] = markers
-                    turn_payloads[turn_key] = current_payload
-                    telemetry["event_turn_effectiveness"] = turn_payloads
-            return
-        if event_name == "FinalResultEvent":
-            turn_count = None
-            if isinstance(telemetry, dict):
-                turn_count = int(telemetry.get("event_turn_index", 0) or 0)
-            self.logger.info(
-                "pydantic_agent_final_result_event",
-                elapsed_sec=elapsed,
-                turn_count=turn_count,
-            )
-            return
-        if event_name == "PartDeltaEvent" and getattr(self.settings, "agent_event_stream_log_text_deltas", False):
-            delta = getattr(event, "delta", None)
-            delta_text = (
-                getattr(delta, "content_delta", None)
-                or getattr(delta, "text", None)
-                or getattr(delta, "delta", None)
-                or ""
-            )
-            if delta_text:
-                self.logger.info(
-                    "pydantic_agent_text_delta",
-                    elapsed_sec=elapsed,
-                    delta_preview=str(delta_text)[:200],
-                )
-            return
-    def _summarize_output(self, output: Any) -> dict[str, Any]:
-        """Summarize agent output for logging.
-        Extracts key metrics from the output without logging full content.
-        """
-        summary: dict[str, Any] = {}
-        # Common fields across output models
-        if hasattr(output, "confidence"):
-            summary["confidence"] = round(output.confidence, 2)
-        if hasattr(output, "score"):
-            summary["score"] = output.score
-        if hasattr(output, "score_missing"):
-            summary["score_missing"] = bool(getattr(output, "score_missing", False))
-        if hasattr(output, "posture_score"):
-            summary["posture_score"] = output.posture_score
-        # Count findings/vulnerabilities
-        if hasattr(output, "issues") and output.issues:
-            summary["issues_count"] = len(output.issues)
-        if hasattr(output, "vulnerabilities") and output.vulnerabilities:
-            summary["vulnerabilities_count"] = len(output.vulnerabilities)
-        if hasattr(output, "assessments") and output.assessments:
-            summary["assessments_count"] = len(output.assessments)
-        # Tech stack if available
-        if hasattr(output, "tech_stack") and output.tech_stack:
-            summary["tech_stack"] = output.tech_stack[:3]  # First 3 items
-        return summary
-    def _normalize_agent_output(self, output: Any, result_type: type[ResultType]) -> Any:
-        """Normalize stream/text payloads into expected result type shape."""
-        if result_type is str:
-            if isinstance(output, str):
-                if self.settings.protocol == LLMProtocolType.ANTHROPIC:
-                    return self._normalize_anthropic_text_payload_with_diagnostics(
-                        output,
-                        source="normalize_agent_output:str",
-                    )
-                return output
-            if output is None:
-                return ""
-            if isinstance(output, bytes):
-                decoded = output.decode("utf-8", errors="replace")
-                if self.settings.protocol == LLMProtocolType.ANTHROPIC:
-                    return self._normalize_anthropic_text_payload_with_diagnostics(
-                        decoded,
-                        source="normalize_agent_output:bytes",
-                    )
-                return decoded
-            if isinstance(output, (dict, list)):
-                return json.dumps(output, ensure_ascii=False)
-            return str(output)
-        if isinstance(output, result_type):
-            return output
-        validator_json = getattr(result_type, "model_validate_json", None)
-        validator_obj = getattr(result_type, "model_validate", None)
-        if isinstance(output, str):
-            text_payload = output
-            if self.settings.protocol == LLMProtocolType.ANTHROPIC:
-                text_payload = self._normalize_anthropic_text_payload_with_diagnostics(
-                    text_payload,
-                    source="normalize_agent_output:model",
-                )
-            normalized_payload = self._strip_json_fence(text_payload)
-            if callable(validator_json):
-                try:
-                    return validator_json(normalized_payload)
-                except Exception:
-                    for candidate in reversed(self._extract_json_object_candidates(normalized_payload)):
-                        try:
-                            return validator_json(candidate)
-                        except Exception:
-                            continue
-            if callable(validator_obj):
-                try:
-                    decoded = json.loads(normalized_payload)
-                except Exception:
-                    decoded = None
-                if decoded is not None:
-                    try:
-                        return validator_obj(decoded)
-                    except Exception:
-                        pass
-            return output
-        if callable(validator_obj) and isinstance(output, (dict, list)):
-            try:
-                return validator_obj(output)
-            except Exception:
-                return output
-        return output
-    def _build_response_quality_summary(self, output: Any, telemetry: dict[str, Any] | None = None) -> dict[str, Any]:
-        """Build response-quality diagnostics from output + observed tool activity."""
-        summary = self._summarize_output(output)
-        quality: dict[str, Any] = {}
-        quality["score"] = summary.get("score")
-        quality["score_missing"] = bool(summary.get("score_missing", False))
-        quality["confidence"] = summary.get("confidence")
-        quality["issues_count"] = summary.get("issues_count", 0)
-        quality["vulnerabilities_count"] = summary.get("vulnerabilities_count", 0)
-        quality["assessments_count"] = summary.get("assessments_count", 0)
-        has_reasoning_field = hasattr(output, "reasoning")
-        quality["reasoning_chars"] = len(str(getattr(output, "reasoning", "") or "")) if has_reasoning_field else 0
-        quality["files_analyzed"] = summary.get("files_analyzed", 0)
-        tech_stack = summary.get("tech_stack")
-        quality["tech_stack"] = tech_stack
-        tool_count_by_name = telemetry.get("event_tool_count_by_name") if isinstance(telemetry, dict) else {}
-        if not isinstance(tool_count_by_name, dict):
-            tool_count_by_name = {}
-        observed_tool_calls = int(
-            (telemetry or {}).get("event_tool_calls_completed", (telemetry or {}).get("event_tool_calls", 0)) or 0
-        )
-        quality["observed_tool_calls"] = observed_tool_calls
-        has_effective_counter = isinstance(telemetry, dict) and "event_tool_effective_calls" in telemetry
-        observed_tool_effective_calls = int(
-            (telemetry or {}).get("event_tool_effective_calls", observed_tool_calls) or 0
-        )
-        quality["observed_tool_effective_calls"] = observed_tool_effective_calls
-        quality["observed_read_file_calls"] = int(tool_count_by_name.get("read_file", 0) or 0)
-        observed_skill_execution_calls = int(
-            (telemetry or {}).get(
-                "event_skill_execution_tool_calls", (telemetry or {}).get("event_skill_effective_tool_calls", 0)
-            )
-            or 0
-        )
-        observed_skill_effective_calls = int(
-            (telemetry or {}).get(
-                "event_skill_execution_effective_tool_calls",
-                (telemetry or {}).get("event_skill_effective_tool_calls", 0),
-            )
-            or 0
-        )
-        quality["observed_skill_execution_calls"] = observed_skill_execution_calls
-        quality["observed_skill_effective_calls"] = observed_skill_effective_calls
-        efficiency_summary = self._build_tool_efficiency_summary(telemetry if isinstance(telemetry, dict) else None)
-        turn_effectiveness_summary = efficiency_summary.get("turn_effectiveness_summary")
-        if isinstance(turn_effectiveness_summary, list):
-            quality["turn_effectiveness_summary"] = turn_effectiveness_summary
-            quality["observed_turn_count"] = len(turn_effectiveness_summary)
-        requirement_markers = efficiency_summary.get("turn_requirement_coverage_contribution_markers")
-        if isinstance(requirement_markers, dict):
-            quality["requirement_coverage_contribution_markers"] = {
-                "requirements": bool(requirement_markers.get("requirements")),
-                "docs": bool(requirement_markers.get("docs")),
-                "code": bool(requirement_markers.get("code")),
-            }
-        requirement_rate = efficiency_summary.get("turn_requirement_coverage_contribution_rate")
-        if isinstance(requirement_rate, (int, float)):
-            quality["requirement_coverage_contribution_rate"] = round(float(requirement_rate), 3)
-        turn_effective_rate = efficiency_summary.get("turn_effective_rate")
-        if isinstance(turn_effective_rate, (int, float)):
-            quality["turn_effective_rate"] = round(float(turn_effective_rate), 3)
-        flags: list[str] = []
-        if has_reasoning_field and quality["reasoning_chars"] < 80:
-            flags.append("reasoning_too_short")
-        if quality["observed_read_file_calls"] >= 3 and isinstance(tech_stack, list):
-            normalized_stack = [str(item).strip().lower() for item in tech_stack]
-            if normalized_stack and all(("unknown" in item) for item in normalized_stack):
-                flags.append("unknown_tech_stack_with_code_reads")
-        score = quality.get("score")
-        score_present = not bool(quality.get("score_missing", False))
-        quality["score_present"] = score_present
-        high_activity_calls = observed_tool_effective_calls if has_effective_counter else observed_tool_calls
-        if score_present and isinstance(score, (int, float)) and float(score) <= 2.5 and high_activity_calls >= 8:
-            flags.append("very_low_score_after_high_tool_activity")
-        if hasattr(output, "notes") and hasattr(output, "recommendations"):
-            doc_signal_count = (
-                len(getattr(output, "notes", []) or [])
-                + len(getattr(output, "recommendations", []) or [])
-                + len(getattr(output, "missing_sections", []) or [])
-                + len(getattr(output, "strengths", []) or [])
-            )
-            quality["doc_signal_count"] = doc_signal_count
-            if quality["observed_tool_calls"] >= 1 and doc_signal_count == 0:
-                flags.append("docs_signal_too_thin")
-        if hasattr(output, "vulnerabilities") and hasattr(output, "recommendations"):
-            security_signal_count = (
-                len(getattr(output, "vulnerabilities", []) or [])
-                + len(getattr(output, "recommendations", []) or [])
-                + len(getattr(output, "security_controls", []) or [])
-            )
-            quality["security_signal_count"] = security_signal_count
-            if quality["observed_tool_calls"] >= 8 and security_signal_count == 0:
-                flags.append("security_signal_too_thin")
-        quality["quality_flags"] = flags
-        quality["quality_ok"] = len(flags) == 0
-        return quality
-    @staticmethod
-    def _strip_json_fence(text: str) -> str:
-        trimmed = text.strip()
-        if trimmed.startswith("```"):
-            lines = trimmed.splitlines()
-            if len(lines) >= 2 and lines[-1].strip() == "```":
-                return "\n".join(lines[1:-1]).strip()
-            return "\n".join(lines[1:]).strip()
-        return trimmed
-    def _build_model_settings(self) -> ModelSettings:
-        settings: dict[str, Any] = {
-            "temperature": 0.0,
-            "max_tokens": self.settings.max_tokens_per_request,
-        }
-        from vds_audit_orchestrator.config import inject_reasoning_effort
-        inject_reasoning_effort(settings, self.settings.reasoning_effort, self.settings.protocol)
-        if self._should_disable_parallel_tool_calls():
-            # Local OpenAI-compatible gateways often perform better with serialized
-            # tool invocation; this reduces bursty tool-call fan-out and improves
-            # bounded-budget stability.
-            settings["parallel_tool_calls"] = False
-            self.logger.info(
-                "openai_parallel_tool_calls_disabled",
-                base_url=self.settings.base_url,
-                protocol=self.settings.protocol.value,
-            )
-        if self._should_suppress_anthropic_tool_choice():
-            # Some Anthropic-compatible proxy gateways reject tool_choice=auto after
-            # translation to OpenAI-compatible payloads; explicit null suppresses this
-            # field while preserving tool definitions/calls.
-            settings["extra_body"] = build_structured_output_extra_body(
-                self.settings.base_url,
-                self.settings.protocol,
-            )
-            self.logger.info(
-                "anthropic_proxy_tool_choice_suppressed",
-                base_url=self.settings.base_url,
-                protocol=self.settings.protocol.value,
-            )
-        elif self._should_suppress_openai_tool_choice():
-            settings["extra_body"] = build_structured_output_extra_body(
-                self.settings.base_url,
-                self.settings.protocol,
-            )
-            self.logger.info(
-                "openai_proxy_tool_choice_suppressed",
-                base_url=self.settings.base_url,
-                protocol=self.settings.protocol.value,
-            )
-        if self._is_openai_codex_protocol(self.settings.protocol):
-            settings["openai_store"] = False
-        return ModelSettings(
-            **settings,
-        )
-    def _build_run_model_settings(self, max_tokens: int) -> dict[str, Any]:
-        """Build per-run model settings kwargs used by Agent(...)."""
-        settings: dict[str, Any] = {
-            "temperature": 0.0,
-            "max_tokens": max_tokens,
-        }
-        from vds_audit_orchestrator.config import inject_reasoning_effort
-        inject_reasoning_effort(settings, self.settings.reasoning_effort, self.settings.protocol)
-        if self._should_disable_parallel_tool_calls():
-            settings["parallel_tool_calls"] = False
-        if self._should_suppress_anthropic_tool_choice() or self._should_suppress_openai_tool_choice():
-            settings["extra_body"] = build_structured_output_extra_body(
-                self.settings.base_url,
-                self.settings.protocol,
-            )
-        if self._is_openai_codex_protocol(self.settings.protocol):
-            settings["openai_store"] = False
-        return settings
-    def _should_disable_parallel_tool_calls(self) -> bool:
-        """Disable parallel tool calls for local OpenAI-compatible endpoints."""
-        if self.settings.protocol != LLMProtocolType.OPENAI:
-            return False
-        endpoint = str(self.settings.base_url or "").strip()
-        if not endpoint:
-            endpoint = "http://127.0.0.1:11434"
-        parsed = urlparse(endpoint)
-        host = (parsed.hostname or "").lower()
-        return host in {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
-    def _should_suppress_anthropic_tool_choice(self) -> bool:
-        """Return True for non-official Anthropic endpoints with tool_choice incompatibility."""
-        return should_suppress_anthropic_tool_choice(self.settings.base_url, self.settings.protocol)
-    def _should_suppress_openai_tool_choice(self) -> bool:
-        """Return True for OpenAI-compatible endpoints with explicit tool_choice incompatibility."""
-        return should_suppress_openai_tool_choice(self.settings.base_url, self.settings.protocol)
-    # ------------------------------------------------------------------
-    # FR-70 / TSK-641.2: Event stream telemetry resilience
-    # ------------------------------------------------------------------
-    def _populate_telemetry_from_result(
-        self,
-        result: Any,
-        run_telemetry: dict[str, Any],
-    ) -> None:
-        """Populate *run_telemetry* from PydanticAI ``RunResult`` when the event
-        stream handler was removed (fallback path).
-        This prevents post-run guards from seeing zeroed-out counters and
-        raising ``RuntimeError("agentic usage guard unsatisfied")`` even though
-        tools actually ran successfully.
-        Three fallback data sources are used:
-        1. ``result.usage()`` — request count, token counts, tool_calls count.
-        2. ``result.all_messages()`` — walk message parts to count tool-call
-           and tool-return parts and extract tool names.
-        3. If neither works the dict is left unchanged (guards will still see
-           whatever partial data the event stream captured before failing).
-        """
-        # -- Fallback 1: result.usage() ---------------------------------
-        try:
-            usage = result.usage()
-            if usage is not None:
-                usage_tool_calls = int(getattr(usage, "tool_calls", 0) or 0)
-                usage_requests = int(getattr(usage, "requests", 0) or 0)
-                # Only backfill when event-stream counters are still at zero.
-                if int(run_telemetry.get("event_tool_calls_completed", 0) or 0) < 1 and usage_tool_calls > 0:
-                    run_telemetry["event_tool_calls_completed"] = usage_tool_calls
-                    run_telemetry["event_tool_calls_started"] = usage_tool_calls
-                    run_telemetry["event_tool_calls"] = usage_tool_calls
-                if int(run_telemetry.get("event_tool_effective_calls", 0) or 0) < 1 and usage_tool_calls > 0:
-                    # Usage payload has no per-tool usefulness signal; preserve monotonic counters.
-                    run_telemetry["event_tool_effective_calls"] = usage_tool_calls
-                if not run_telemetry.get("usage"):
-                    run_telemetry["usage"] = {
-                        "requests": usage_requests,
-                        "input_tokens": int(getattr(usage, "input_tokens", 0) or 0),
-                        "output_tokens": int(getattr(usage, "output_tokens", 0) or 0),
-                        "tool_calls": usage_tool_calls,
-                    }
-        except Exception:
-            pass  # usage() may not be available on all result types
-        # -- Fallback 2: result.all_messages() ---------------------------
-        try:
-            messages = result.all_messages() if callable(getattr(result, "all_messages", None)) else []
-            if messages and not run_telemetry.get("event_tool_names"):
-                tool_names: list[str] = []
-                tool_call_count = 0
-                for msg in messages:
-                    parts = getattr(msg, "parts", None)
-                    if not parts:
-                        continue
-                    for part in parts:
-                        part_kind = getattr(part, "part_kind", "")
-                        if part_kind == "tool-call":
-                            tool_call_count += 1
-                            tool_name = getattr(part, "tool_name", None)
-                            if tool_name:
-                                tool_names.append(tool_name)
-                if tool_names:
-                    run_telemetry["event_tool_names"] = list(set(tool_names))
-                    # Build count-by-name
-                    count_by_name: dict[str, int] = {}
-                    for tn in tool_names:
-                        count_by_name[tn] = count_by_name.get(tn, 0) + 1
-                    run_telemetry["event_tool_count_by_name"] = count_by_name
-                    # Populate skill-class telemetry from generic tool counters when
-                    # event-stream skill counters are missing.
-                    derived_skill_counts = {
-                        name: int(count_by_name.get(name, 0) or 0)
-                        for name in ("list_skills", "load_skill", "read_skill_resource", "run_skill_script")
-                        if int(count_by_name.get(name, 0) or 0) > 0
-                    }
-                    if derived_skill_counts:
-                        existing_skill_counts = run_telemetry.get("event_skill_tool_count_by_name")
-                        merged_skill_counts: dict[str, int] = (
-                            {str(k): int(v or 0) for k, v in existing_skill_counts.items() if str(k).strip()}
-                            if isinstance(existing_skill_counts, dict)
-                            else {}
-                        )
-                        for skill_tool, count in derived_skill_counts.items():
-                            merged_skill_counts[skill_tool] = max(
-                                int(merged_skill_counts.get(skill_tool, 0) or 0), count
-                            )
-                        run_telemetry["event_skill_tool_count_by_name"] = merged_skill_counts
-                        derived_discovery = int(merged_skill_counts.get("list_skills", 0) or 0)
-                        derived_bootstrap = int(merged_skill_counts.get("load_skill", 0) or 0)
-                        derived_execution = int(merged_skill_counts.get("read_skill_resource", 0) or 0) + int(
-                            merged_skill_counts.get("run_skill_script", 0) or 0
-                        )
-                        effective_count_by_name = run_telemetry.get("event_tool_effective_count_by_name")
-                        derived_execution_effective = 0
-                        if isinstance(effective_count_by_name, dict):
-                            derived_execution_effective = int(
-                                effective_count_by_name.get("read_skill_resource", 0) or 0
-                            ) + int(effective_count_by_name.get("run_skill_script", 0) or 0)
-                        if (
-                            int(run_telemetry.get("event_skill_discovery_tool_calls", 0) or 0) < 1
-                            and derived_discovery > 0
-                        ):
-                            run_telemetry["event_skill_discovery_tool_calls"] = derived_discovery
-                        if (
-                            int(run_telemetry.get("event_skill_bootstrap_tool_calls", 0) or 0) < 1
-                            and derived_bootstrap > 0
-                        ):
-                            run_telemetry["event_skill_bootstrap_tool_calls"] = derived_bootstrap
-                        if (
-                            int(run_telemetry.get("event_skill_execution_tool_calls", 0) or 0) < 1
-                            and derived_execution > 0
-                        ):
-                            run_telemetry["event_skill_execution_tool_calls"] = derived_execution
-                        if (
-                            int(run_telemetry.get("event_skill_execution_effective_tool_calls", 0) or 0) < 1
-                            and derived_execution_effective > 0
-                        ):
-                            run_telemetry["event_skill_execution_effective_tool_calls"] = derived_execution_effective
-                        if int(run_telemetry.get("event_skill_tool_calls", 0) or 0) < 1:
-                            run_telemetry["event_skill_tool_calls"] = sum(merged_skill_counts.values())
-                        if (
-                            int(run_telemetry.get("event_skill_effective_tool_calls", 0) or 0) < 1
-                            and derived_execution_effective > 0
-                        ):
-                            run_telemetry["event_skill_effective_tool_calls"] = derived_execution_effective
-                if tool_call_count > 0 and int(run_telemetry.get("event_tool_calls_completed", 0) or 0) < 1:
-                    run_telemetry["event_tool_calls_completed"] = tool_call_count
-                    run_telemetry["event_tool_calls_started"] = tool_call_count
-                    run_telemetry["event_tool_calls"] = tool_call_count
-                if tool_call_count > 0 and int(run_telemetry.get("event_tool_effective_calls", 0) or 0) < 1:
-                    run_telemetry["event_tool_effective_calls"] = tool_call_count
-        except Exception:
-            pass  # all_messages() may not be available
-        fallback_tool_calls = int(run_telemetry.get("event_tool_calls_completed", 0) or 0)
-        self.logger.info(
-            "event_stream_telemetry_fallback_result",
-            fallback_tool_calls=fallback_tool_calls,
-            fallback_tool_names=run_telemetry.get("event_tool_names"),
-            fallback_source="result.usage+all_messages",
-        )
-    def _record_pydantic_usage(self, result: Any, prompt_type: str) -> dict[str, int] | None:
-        """Record PydanticAI RunResult usage with the global tracker."""
-        try:
-            usage = result.usage()
-        except Exception:
-            return None
-        if usage is None:
-            return None
-        input_tokens = int(getattr(usage, "input_tokens", 0) or 0)
-        output_tokens = int(getattr(usage, "output_tokens", 0) or 0)
-        request_count = int(getattr(usage, "requests", 0) or 0)
-        tool_calls = int(getattr(usage, "tool_calls", 0) or 0)
-        model_name = self._get_model_name()
-        record = global_tracker.record_usage(
-            model=model_name,
-            prompt_type=prompt_type,
-            input_tokens=input_tokens,
-            output_tokens=output_tokens,
-        )
-        self.logger.info(
-            "pydantic_agent_usage_summary",
-            prompt_type=prompt_type,
-            model_name=model_name,
-            input_tokens=input_tokens,
-            output_tokens=output_tokens,
-            request_count=request_count,
-            tool_calls=tool_calls,
-        )
-        record_llm_usage(
-            model=model_name,
-            prompt_tokens=input_tokens,
-            completion_tokens=output_tokens,
-            cost=record.cost,
-        )
-        return {
-            "input_tokens": input_tokens,
-            "output_tokens": output_tokens,
-            "request_count": request_count,
-            "tool_calls": tool_calls,
-        }
-    def _get_model_name(self) -> str:
-        if self._pydantic_model is not None and hasattr(self._pydantic_model, "model_name"):
-            return str(self._pydantic_model.model_name)
-        return self.settings.model_standard
-    def _result(
-        self,
-        agent_name: str,
-        success: bool,
-        findings: list[dict[str, Any]] | None = None,
-        metadata: dict[str, Any] | None = None,
-        error: str | AuditError | None = None,
-        spawned_tasks: list[dict[str, Any]] | None = None,
-    ) -> AgentResult:
-        """Create an AgentResult with consistent pattern.
-        Args:
-            agent_name: Name identifier for this agent
-            success: Whether the analysis succeeded
-            findings: List of finding dictionaries
-            metadata: Additional metadata about the analysis
-            error: Error message if analysis failed
-            spawned_tasks: List of task specs to be spawned by the dispatcher
-        Returns:
-            AgentResult instance
-        """
-        normalized_error: str | None
-        normalized_error = str(error) if isinstance(error, AuditError) else error
-        return AgentResult(
-            agent_name=agent_name,
-            success=success,
-            findings=findings or [],
-            metadata=metadata or {},
-            error=normalized_error,
-            spawned_tasks=spawned_tasks or [],
-        )
-    def _spawn_task(
-        self,
-        task_type: str,
-        description: str,
-        assignee: str,
-        input_data: dict[str, Any] | None = None,
-        priority: int = 100,
-    ) -> dict[str, Any]:
-        """Create a task spec to be spawned by the dispatcher.
-        Args:
-            task_type: Type of task (e.g., 'api_analysis', 'security_scan')
-            description: Human-readable description of the task
-            assignee: Agent name that should handle this task
-            input_data: Optional input data for the spawned task
-            priority: Task priority (lower = higher priority, default 100)
-        Returns:
-            Task specification dictionary for the dispatcher
-        """
-        return {
-            "type": task_type,
-            "description": description,
-            "assignee": assignee,
-            "input_data": input_data or {},
-            "priority": priority,
-            "created_by": self.__class__.__name__,
-        }