npm - @ngocsangairvds/vsaf - Versions diffs - 3.2.13 → 3.2.15 - Mend

@ngocsangairvds/vsaf 3.2.13 → 3.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1441) hide show

package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/batch_processor.py DELETED Viewed

@@ -1,4776 +0,0 @@
-"""Batch Row Processor - Processes checklist rows in configurable batches.
-Provides:
-- Configurable batch sizes (FR-3.1)
-- Per-row and per-batch timeouts (FR-3.2)
-- Checkpoint/resume with full row results (FR-3.3)
-- Progress logging
-- Target filtering for dynamic analysis (FR-25)
-"""
-from __future__ import annotations
-import asyncio
-import contextlib
-import copy
-import inspect
-import json
-import math
-import os
-from collections.abc import Awaitable, Callable
-from dataclasses import dataclass, field
-from datetime import UTC, datetime
-from pathlib import Path
-# Import GroundingValidator and TargetSelection only for typing, avoid circular if possible
-from typing import TYPE_CHECKING, Any, cast
-from structlog import get_logger
-from vds_agent_core.llm.budget import BudgetExceededError
-from vds_agent_core.profiles import resolve_default_failover_profiles
-from vds_audit_orchestrator.engine.provider_failure_classifier import (
-    ProviderFailureClass,
-    ProviderFailureClassification,
-    ProviderHealthMemory,
-    RowFailoverContext,
-    TimeoutKind,
-)
-from vds_audit_orchestrator.engine.row_evaluator import RowEvaluator
-from vds_audit_orchestrator.engine.row_evaluator_types import RowEvaluationResult, requires_app_config_only
-from vds_audit_orchestrator.errors import AUDIT_ERROR_CODES
-from vds_audit_orchestrator.evidence.matcher import MatcherProtocol, RowEvidenceContext
-from vds_audit_orchestrator.models.checklist import (
-    VERIFICATION_REASON_EXCERPT_VERIFIED_IN_CONTEXT,
-    VERIFICATION_REASON_FALLBACK_REF_INHERITED,
-    EvidenceAnchor,
-    RowProvenance,
-    RowStatus,
-    ScoreBreakdown,
-    normalize_verification_reason,
-)
-from vds_audit_orchestrator.runtime_profiles import inherit_runtime_llm_policy
-if TYPE_CHECKING:
-    from vds_audit_orchestrator.engine.grounding_validator import GroundingValidator
-    from vds_audit_orchestrator.engine.target_selector import TargetSelection
-    from vds_audit_orchestrator.models.template import AuditCheck, AuditTemplate
-logger = get_logger()
-_TIMEOUT_PROGRESS_COUNTER_KEYS: tuple[str, ...] = (
-    "tool_calls",
-    "skill_calls",
-    "skill_execution_calls",
-    "skill_effective_calls",
-)
-def _convert_datetimes(obj: Any) -> Any:
-    """Recursively convert datetime objects to ISO strings for JSON serialization."""
-    if isinstance(obj, datetime):
-        return obj.isoformat()
-    if isinstance(obj, dict):
-        return {k: _convert_datetimes(v) for k, v in obj.items()}
-    if isinstance(obj, list):
-        return [_convert_datetimes(item) for item in obj]
-    return obj
-def _sanitize_retrieval_trace(trace: Any) -> dict[str, Any] | None:
-    """Return JSON-serializable retrieval trace dict or None."""
-    if not isinstance(trace, dict):
-        return None
-    normalized = _convert_datetimes(trace)
-    try:
-        json.dumps(normalized, ensure_ascii=False)
-    except TypeError:
-        return None
-    return normalized
-def _merge_retrieval_trace_payloads(
-    matcher_trace: dict[str, Any] | None,
-    row_trace: dict[str, Any] | None,
-) -> dict[str, Any] | None:
-    """Merge matcher + row retrieval traces without losing non-null matcher context."""
-    base = dict(matcher_trace or {})
-    overlay = dict(row_trace or {})
-    if not base and not overlay:
-        return None
-    if not base:
-        return overlay
-    if not overlay:
-        return base
-    merged = dict(base)
-    preserve_empty_overrides = bool(
-        (
-            overlay.get("app_config_only_enforced") if isinstance(overlay.get("app_config_only_enforced"), dict) else {}
-        ).get("applied")
-    ) or RowEvaluator._is_app_config_only_requirement(retrieval_trace=overlay)
-    for key, value in overlay.items():
-        if value is None:
-            continue
-        existing = merged.get(key)
-        if isinstance(value, list):
-            if not value and isinstance(existing, list) and existing and not preserve_empty_overrides:
-                continue
-            merged[key] = value
-            continue
-        if isinstance(value, dict):
-            if not value and isinstance(existing, dict) and existing and not preserve_empty_overrides:
-                continue
-            if not value and preserve_empty_overrides:
-                merged[key] = value
-                continue
-            if isinstance(existing, dict):
-                nested = dict(existing)
-                for nested_key, nested_value in value.items():
-                    if nested_value is None:
-                        continue
-                    prior_nested = nested.get(nested_key)
-                    if (
-                        isinstance(nested_value, list)
-                        and not nested_value
-                        and isinstance(prior_nested, list)
-                        and prior_nested
-                        and not preserve_empty_overrides
-                    ):
-                        continue
-                    if (
-                        isinstance(nested_value, dict)
-                        and not nested_value
-                        and isinstance(prior_nested, dict)
-                        and prior_nested
-                        and not preserve_empty_overrides
-                    ):
-                        continue
-                    if isinstance(nested_value, dict) and not nested_value and preserve_empty_overrides:
-                        nested[nested_key] = nested_value
-                        continue
-                    nested[nested_key] = nested_value
-                merged[key] = nested
-                continue
-            merged[key] = value
-            continue
-        merged[key] = value
-    return merged
-def _default_batch_error_retry_limit() -> int:
-    """Return batch-level error retry limit from env, defaulting to 1 (FR-145.2)."""
-    try:
-        return max(0, int(os.environ.get("VDS_AUDIT_BATCH_ERROR_RETRY_LIMIT", "2")))
-    except (ValueError, TypeError):
-        return 1
-# ---------------------------------------------------------------------------
-# Phase 146: Quota-aware recovery configuration (FR-146.6)
-# ---------------------------------------------------------------------------
-def _quota_wait_max_seconds() -> float:
-    """Return the maximum seconds to wait when all providers are exhausted (AC-146.6.3).
-    Env: VDS_AUDIT_QUOTA_WAIT_MAX_SECONDS, default 30.0.
-    Set to 0 to disable bounded wait-and-resume entirely.
-    """
-    try:
-        return max(0.0, float(os.environ.get("VDS_AUDIT_QUOTA_WAIT_MAX_SECONDS", "30")))
-    except (ValueError, TypeError):
-        return 30.0
-def _exhausted_provider_verification_limit() -> int:
-    """Return max verification retries for a provider already known to be exhausted (AC-146.5.3).
-    Env: VDS_AUDIT_EXHAUSTED_PROVIDER_RETRIES, default 0.
-    When 0, known-exhausted providers are skipped entirely during retry sweeps.
-    """
-    try:
-        return max(0, int(os.environ.get("VDS_AUDIT_EXHAUSTED_PROVIDER_RETRIES", "0")))
-    except (ValueError, TypeError):
-        return 0
-def _all_providers_exhausted_reason(
-    health_memory: ProviderHealthMemory,
-    *,
-    failover_profiles: list[str],
-) -> str | None:
-    """Return an exhaustion reason code if all eligible providers are unusable (AC-146.6.4).
-    Returns:
-        ``quota_all_providers_exhausted`` when at least one provider is in
-        cooldown/constrained/degraded state (temporary), ``None`` when at
-        least one provider is still dispatchable, or
-        ``provider_all_auth_blocked`` when every provider is auth-blocked
-        (permanent for the current run).
-    """
-    if not failover_profiles:
-        return None
-    any_dispatchable = False
-    any_temporary = False
-    for profile in failover_profiles:
-        state = health_memory.get_state(profile)
-        if state.is_auth_blocked():
-            continue
-        if state.is_in_cooldown() or state.should_skip_dispatch():
-            any_temporary = True
-            continue
-        any_dispatchable = True
-        break
-    if any_dispatchable:
-        return None
-    if any_temporary:
-        return "quota_all_providers_exhausted"
-    # All auth-blocked — permanent for this run
-    if failover_profiles:
-        return "provider_all_auth_blocked"
-    return None
-@dataclass
-class BatchConfig:
-    """Configuration for batch processing (FR-3.1, FR-3.2, FR-25, FR-188, FR-145.2).
-    Attributes:
-        batch_size: Number of rows per batch (default 5).
-        row_timeout_ms: Timeout per row in milliseconds (default 45s).
-        row_progress_lease_seconds: Renewable liveness lease window in seconds.
-            Default 0 keeps legacy timeout-extension semantics.
-        row_stall_detection_seconds: No-progress stall threshold in seconds when
-            lease mode is enabled.
-        row_absolute_timeout_ms: Hard safety cap per row when lease mode is enabled.
-        batch_timeout_ms: Optional batch timeout (default: batch_size * effective row timeout ceiling + overhead).
-        batch_overhead_ms: Slack time for formatting + bookkeeping (default 15s).
-        batch_error_retry_limit: Max post-pass retry attempts for ERROR rows (FR-145.2).
-            Env: VDS_AUDIT_BATCH_ERROR_RETRY_LIMIT, default 2. Set 0 to disable.
-        checkpoint_dir: Directory for checkpoint files.
-        resume_from_checkpoint: Whether to resume from existing checkpoint.
-        target_selection: Optional target selection for dynamic analysis (FR-25).
-        force_refresh_targets: Re-evaluate targeted rows even if checkpointed (FR-25).
-        row_concurrency: Max rows dispatched in parallel within a batch (FR-188).
-            Default 1 (sequential, safe). Values >1 enable asyncio.gather dispatch
-            with a semaphore-bounded concurrency limit.
-    """
-    batch_size: int = 5  # FR-3.1: Default 5-10 rows
-    row_timeout_ms: int = 360_000  # Generous: 6 minutes per row (was 120s; profile YAML may override to 600s+)
-    row_progress_lease_seconds: int = 0  # FR-112.1: 0 preserves legacy timeout-extension mode
-    row_stall_detection_seconds: int = 60  # FR-112.1: stall threshold (was 30s)
-    row_absolute_timeout_ms: int = 1_200_000  # Generous: 20-minute hard safety cap (was 600s)
-    row_timeout_progress_extension_enabled: bool = True  # FR-72/73 follow-up: adaptive extension on effective progress
-    row_timeout_progress_retry_attempts: int = 5  # Generous: 5 extension attempts (was 2)
-    row_timeout_progress_extension_ms: int = 60_000  # Generous: 60s per extension (was 30s)
-    row_timeout_progress_max_ms: int = 600_000  # Generous: 10-minute ceiling after extensions (was 240s)
-    batch_timeout_ms: int | None = None  # Default: batch_size * effective row timeout ceiling + overhead
-    batch_overhead_ms: int = 15_000  # Slack for formatting + bookkeeping
-    batch_error_retry_limit: int = field(default_factory=_default_batch_error_retry_limit)  # FR-145.2
-    checkpoint_dir: Path | None = None
-    resume_from_checkpoint: bool = False
-    # FR-25: Dynamic Analysis Support - Target filtering
-    target_selection: TargetSelection | None = None
-    run_context: dict[str, Any] = field(default_factory=dict)
-    force_refresh_targets: bool = True  # Re-evaluate targeted rows even if checkpointed
-    # FR-185: Repo-type scoped N/A indices — skip LLM calls entirely for these rows
-    scoped_na_indices: set[int] = field(default_factory=set)
-    # FR-188: Parallel row dispatch within a batch
-    row_concurrency: int = 4  # Phase 158 default; >1 enables asyncio.gather dispatch
-    # FR-128.3 / Phase 158: adaptive concurrency is enabled by default.
-    adaptive_row_concurrency: bool = True
-    adaptive_row_concurrency_min: int = 2
-    adaptive_row_concurrency_max: int = 8
-    adaptive_row_concurrency_increase_threshold_ms: int = 1000
-    adaptive_row_concurrency_decrease_threshold_ms: int = 5000
-# ---------------------------------------------------------------------------
-# FR-145.2: Resilience budget enforcement
-# ---------------------------------------------------------------------------
-# Reason codes in retry_metadata that indicate a non-provider failure.
-# These rows will not benefit from retrying with a different provider.
-_NON_FAILOVERABLE_REASONS: frozenset[str] = frozenset(
-    {
-        ProviderFailureClass.TERMINAL_AUTH.value,
-        ProviderFailureClass.NON_PROVIDER_BUG.value,
-        "terminal_auth",
-        "non_provider_bug",
-        "schema_validation",
-        "template_mismatch",
-        "row_backend_invalid_env_value",
-        "target_resolution",
-    }
-)
-@dataclass
-class ResilienceBudget:
-    """Bounded retry budget shared across post-pass sweeps (FR-145.2, NFR-145.2).
-    The budget is generous and error-driven with a generous floor:
-        total = max(error_count * 3, max(15, ceil(total_rows * 0.35)))
-    For a 55-row batch with 12 ERRORs: max(36, max(15, 20)) = 36 retries.
-    Args:
-        total_rows: Total rows in the primary pass.
-        error_count: Number of ERROR rows from the primary pass.
-    """
-    total_rows: int
-    error_count: int
-    calls_used: int = 0
-    @property
-    def total(self) -> int:
-        """Return total resilience budget calls allowed."""
-        return max(self.error_count * 3, 15, math.ceil(self.total_rows * 0.35))
-    @property
-    def remaining(self) -> int:
-        return max(0, self.total - self.calls_used)
-    def consume(self) -> bool:
-        """Consume one budget slot. Returns False if budget exhausted."""
-        if self.calls_used >= self.total:
-            return False
-        self.calls_used += 1
-        return True
-@dataclass
-class _ResilienceRetryStats:
-    """Mutable accumulator for retry sweep statistics (FR-145.7).
-    Passed by reference into ``_retry_error_rows`` so the caller can read
-    the final counts without the method returning a tuple.
-    """
-    error_rows_before_retry: int = 0
-    retry_attempts: int = 0
-    retry_successes: int = 0
-    budget_used: int = 0
-    budget_total: int = 0
-    failover_hops: int = 0
-    providers_used: list[str] = field(default_factory=list)
-    quota_exhausted_rows: int = 0
-    quota_wait_attempted_rows: int = 0
-    quota_wait_recovered_rows: int = 0
-    # Phase 166: Prompt-level retry/failover telemetry
-    prompt_retry_attempts: int = 0
-    prompt_retry_recoveries: int = 0
-    prompt_failover_attempts: int = 0
-    prompt_failover_recoveries: int = 0
-    prompt_failover_exhausted: int = 0
-    synthesis_fallback_count: int = 0
-@dataclass
-class BatchResult:
-    """Result of processing a single batch."""
-    batch_index: int
-    start_row: int
-    end_row: int
-    results: list[RowEvaluationResult]
-    duration_ms: int
-    success_count: int
-    error_count: int
-    timed_out: bool = False
-    skipped_count: int = 0  # FR-25: Rows skipped due to targeting
-    postproc_overlap_ms: int = 0  # FR-193: Post-processing streaming overlap
-class BatchBudgetExceededError(RuntimeError):
-    """Raised when strict budget mode aborts a batch mid-execution."""
-    def __init__(
-        self,
-        *,
-        message: str,
-        context: dict[str, Any],
-        partial_results: list[RowEvaluationResult],
-        success_count: int,
-        error_count: int,
-        skipped_count: int,
-    ) -> None:
-        super().__init__(message)
-        self.context = context
-        self.partial_results = partial_results
-        self.success_count = success_count
-        self.error_count = error_count
-        self.skipped_count = skipped_count
-@dataclass(frozen=True)
-class BatchProgress:
-    """Progress snapshot emitted after a processed batch."""
-    batch_index: int
-    batch_count: int
-    start_row: int
-    end_row: int
-    total_rows: int
-    completed_rows: int
-    processed_batches: int
-    skipped_batches: int
-    success_count: int
-    error_count: int
-    skipped_count: int
-    duration_ms: int
-    timed_out: bool
-    emission_kind: str = "batch"
-    current_row_id: str | None = None
-    current_check_id: str | None = None
-BatchProgressCallback = Callable[[BatchProgress, list[RowEvaluationResult]], Awaitable[None] | None]
-RowProgressCallback = Callable[
-    [int, int, int, list[RowEvaluationResult], int, int, int, RowEvaluationResult | None, str],
-    Awaitable[None] | None,
-]
-@dataclass
-class BatchCheckpoint:
-    """Checkpoint for resumable processing (FR-3.3).
-    CRITICAL: Stores full row results to enable complete reconstruction on resume.
-    """
-    thread_id: str
-    total_rows: int
-    template_hash: str  # FR-3.3: For cache invalidation
-    evidence_hash: str  # FR-3.3: For cache invalidation
-    completed_batches: list[int]
-    # FR-3.3: Row ID -> serialized RowEvaluationResult (full results, not just status)
-    results_by_row_id: dict[str, dict[str, Any]]
-    last_updated: datetime = field(default_factory=lambda: datetime.now(UTC))
-    def save(self, path: Path) -> None:
-        """Save checkpoint to file."""
-        data = {
-            "thread_id": self.thread_id,
-            "total_rows": self.total_rows,
-            "template_hash": self.template_hash,
-            "evidence_hash": self.evidence_hash,
-            "completed_batches": self.completed_batches,
-            "results_by_row_id": self.results_by_row_id,
-            "last_updated": self.last_updated.isoformat(),
-        }
-        path.parent.mkdir(parents=True, exist_ok=True)
-        path.write_text(json.dumps(data, indent=2, ensure_ascii=False))
-    @classmethod
-    def load(cls, path: Path) -> BatchCheckpoint | None:
-        """Load checkpoint from file."""
-        if not path.exists():
-            return None
-        try:
-            data = json.loads(path.read_text())
-            return cls(
-                thread_id=data["thread_id"],
-                total_rows=data["total_rows"],
-                template_hash=data.get("template_hash", ""),
-                evidence_hash=data.get("evidence_hash", ""),
-                completed_batches=data["completed_batches"],
-                results_by_row_id=data.get("results_by_row_id", {}),
-                last_updated=datetime.fromisoformat(data["last_updated"]),
-            )
-        except Exception as e:
-            logger.warning("checkpoint_load_failed", error=str(e))
-            return None
-    @staticmethod
-    def serialize_row_result(result: RowEvaluationResult) -> dict[str, Any]:
-        """Serialize a RowEvaluationResult for checkpoint storage (FR-3.3)."""
-        # Basic serialization - assumes RowEvaluationResult and children have model_dump or similar
-        # Since RowEvaluationResult is Pydantic model (usually), model_dump is available
-        # But let's be safe and use json-compatible dict
-        # Helper to safely dump models
-        def safe_dump(obj: Any) -> Any:
-            if obj is None:
-                return None
-            if hasattr(obj, "model_dump"):
-                dumped = obj.model_dump()
-                # Convert datetime objects to ISO strings
-                return _convert_datetimes(dumped)
-            if hasattr(obj, "dict"):
-                dumped = obj.dict()
-                return _convert_datetimes(dumped)
-            if isinstance(obj, datetime):
-                return obj.isoformat()
-            return obj
-        retrieval_trace = _sanitize_retrieval_trace(result.retrieval_trace)
-        return {
-            "row_id": result.row_id,
-            "check_id": result.check_id,
-            "status": result.status.value if hasattr(result.status, "value") else str(result.status),
-            "score": result.score,
-            "score_breakdown": safe_dump(result.score_breakdown),
-            "reason": result.reason,
-            "finding": result.finding,
-            "evidence_anchors": [safe_dump(e) for e in (result.evidence_anchors or [])],
-            "provenance": safe_dump(result.provenance),
-            "severity": result.severity,
-            "priority": result.priority,
-            "effort": result.effort,
-            "recommendation": result.recommendation,
-            "fix_suggestions": result.fix_suggestions,
-            "score_1_5": result.score_1_5,
-            "cache_hit": result.cache_hit,
-            "error_message": result.error_message,
-            "retry_count": result.retry_count,
-            "retry_metadata": result.retry_metadata,
-            "retrieval_trace": retrieval_trace,
-            "coverage_requirements": result.coverage_requirements,
-            "coverage_code": result.coverage_code,
-            "coverage_docs": result.coverage_docs,
-            "coverage_confidence": result.coverage_confidence,
-            "coverage_deterministic_downgrade_flag": result.coverage_deterministic_downgrade_flag,
-        }
-    @staticmethod
-    def deserialize_row_result(data: dict[str, Any]) -> RowEvaluationResult:
-        """Deserialize a RowEvaluationResult from checkpoint (FR-3.3)."""
-        # We need to reconstruct the objects. This requires importing the types.
-        # This is best effort reconstruction for resumption.
-        from vds_audit_orchestrator.engine.row_evaluator_types import RowEvaluationResult
-        from vds_audit_orchestrator.models.checklist import (
-            EvidenceAnchor,
-            RowProvenance,
-            RowStatus,
-            ScoreBreakdown,
-        )
-        # Handle status enum
-        status_val = data.get("status", "ERROR")
-        try:
-            status = RowStatus(status_val)
-        except ValueError:
-            status = RowStatus.ERROR
-        # Reconstruct complex objects
-        score_breakdown = (
-            ScoreBreakdown(**data.get("score_breakdown", {}))
-            if data.get("score_breakdown")
-            else ScoreBreakdown.compute(data.get("score", 0.0))
-        )
-        anchors = []
-        for a_data in data.get("evidence_anchors", []):
-            anchors.append(EvidenceAnchor(**a_data))
-        provenance = RowProvenance(**data.get("provenance", {})) if data.get("provenance") else None
-        # Remove keys that are not fields of RowEvaluationResult or handle extra fields
-        # Ideally verify against RowEvaluationResult.__init__ args
-        return RowEvaluationResult(
-            row_id=data["row_id"],
-            check_id=data["check_id"],
-            status=status,
-            score=data.get("score", 0.0),
-            score_breakdown=score_breakdown,
-            reason=data.get("reason") or "",
-            reasoning=data.get("reasoning") or "",
-            finding=data.get("finding") or "",
-            evidence_anchors=anchors,
-            provenance=provenance,  # type: ignore[arg-type]
-            severity=data.get("severity") or "",
-            priority=data.get("priority") or "",
-            effort=data.get("effort") or "",
-            recommendation=data.get("recommendation") or "",
-            fix_suggestions=data.get("fix_suggestions") or [],
-            score_1_5=data.get("score_1_5"),
-            cache_hit=data.get("cache_hit", False),
-            error_message=data.get("error_message"),
-            retry_count=max(0, int(data.get("retry_count", 0))),
-            retry_metadata=dict(data.get("retry_metadata") or {}),
-            retrieval_trace=_sanitize_retrieval_trace(data.get("retrieval_trace")),
-            coverage_requirements=data.get("coverage_requirements"),
-            coverage_code=data.get("coverage_code"),
-            coverage_docs=data.get("coverage_docs"),
-            coverage_confidence=data.get("coverage_confidence"),
-            coverage_deterministic_downgrade_flag=bool(data.get("coverage_deterministic_downgrade_flag", False)),
-        )
-class BatchRowProcessor:
-    """Processes checklist rows in batches with checkpoint support.
-    Supports targeted row execution (FR-25) where only specific rows are
-    evaluated while preserving checkpoint state for non-targeted rows.
-    """
-    def __init__(
-        self,
-        template: AuditTemplate,
-        evaluator: RowEvaluator,
-        evidence_matcher: MatcherProtocol,
-        config: BatchConfig,
-        *,
-        grounding_validator: GroundingValidator | None = None,
-    ):
-        """Initialize batch processor.
-        Args:
-            template: Audit template with checks.
-            evaluator: Row evaluator instance.
-            evidence_matcher: Evidence matcher for per-row context.
-            config: Batch processing configuration.
-            grounding_validator: Optional validator for FR-7.
-        """
-        self.template = template
-        self.evaluator = evaluator
-        self.evidence_matcher = evidence_matcher
-        self.config = config
-        self.grounding_validator = grounding_validator
-        # Flatten checks for easier batch processing
-        self._checks: list[tuple[str, AuditCheck]] = []
-        for section in template.sections:
-            for check in section.checks:
-                self._checks.append((section.id, check))
-        self.last_progress: BatchProgress | None = None
-        self.last_execution_summary: dict[str, int] = {
-            "processed_batches": 0,
-            "skipped_batches": 0,
-            "total_results": 0,
-            "batch_count": 0,
-        }
-        # FR-145.7: Per-run resilience telemetry; reset at process_all start.
-        self.last_resilience_summary: dict[str, Any] | None = None
-        self._resilience_stats_accumulator: list[_ResilienceRetryStats] = []
-        self._batch_match_cache: dict[tuple[str, str | None, str, bool], RowEvidenceContext] = {}
-        self._batch_match_inflight: dict[tuple[str, str | None, str, bool], asyncio.Future[RowEvidenceContext]] = {}
-        self._batch_match_lock = asyncio.Lock()
-        self._adaptive_row_concurrency_current = max(1, int(self.config.row_concurrency or 1))
-    @staticmethod
-    def _preflight_provider_checks_from_run_context(run_context: dict[str, Any] | None) -> dict[str, dict[str, Any]]:
-        if not isinstance(run_context, dict):
-            return {}
-        raw_checks = run_context.get("profile_availability_checks")
-        if not isinstance(raw_checks, list):
-            return {}
-        normalized: dict[str, dict[str, Any]] = {}
-        for item in raw_checks:
-            if not isinstance(item, dict):
-                continue
-            profile_name = str(item.get("profile") or "").strip()
-            if not profile_name:
-                continue
-            normalized[profile_name] = dict(item)
-        return normalized
-    async def _seed_batch_health_memory_from_preflight(self) -> None:
-        runtime_context = self.config.run_context if isinstance(self.config.run_context, dict) else {}
-        preflight_checks = self._preflight_provider_checks_from_run_context(runtime_context)
-        if not preflight_checks:
-            return
-        for provider_name, probe in preflight_checks.items():
-            await self._batch_health_memory.apply_preflight_probe(provider_name, probe)
-    @staticmethod
-    async def _record_runtime_provider_result(
-        health_memory: ProviderHealthMemory | None,
-        *,
-        provider_name: str | None,
-        result: RowEvaluationResult,
-    ) -> None:
-        if health_memory is None:
-            return
-        normalized_provider = str(provider_name or "").strip()
-        if not normalized_provider:
-            return
-        retry_metadata = dict(result.retry_metadata or {}) if isinstance(result.retry_metadata, dict) else {}
-        if result.status == RowStatus.ERROR:
-            reason_code = str(retry_metadata.get("reason_code") or "").strip().lower()
-            if not reason_code:
-                return
-            failure_class = {
-                "provider_authentication_failed": ProviderFailureClass.TERMINAL_AUTH,
-                "provider_authorization_failed": ProviderFailureClass.TERMINAL_AUTH,
-                "provider_rate_limited": ProviderFailureClass.QUOTA_OR_CAPACITY,
-                "provider_server_error": ProviderFailureClass.RETRYABLE_TRANSIENT,
-                "provider_transient_error": ProviderFailureClass.RETRYABLE_TRANSIENT,
-                "provider_http_error": ProviderFailureClass.RETRYABLE_TRANSIENT,
-            }.get(reason_code)
-            if failure_class is None:
-                return
-            retry_after_raw = retry_metadata.get("retry_after_seconds")
-            retry_after_seconds: float | None
-            try:
-                retry_after_seconds = float(retry_after_raw) if retry_after_raw is not None else None
-            except (TypeError, ValueError):
-                retry_after_seconds = None
-            await health_memory.record_failure(
-                normalized_provider,
-                classification=ProviderFailureClassification(
-                    failure_class=failure_class,
-                    is_failoverable=failure_class != ProviderFailureClass.TERMINAL_AUTH,
-                    retry_after_seconds=retry_after_seconds,
-                    raw_status_code=None,
-                    raw_message=reason_code,
-                    classification_reason=reason_code,
-                ),
-            )
-            return
-        await health_memory.record_success(normalized_provider)
-    # ------------------------------------------------------------------
-    # Phase 146: Bounded all-providers-exhausted recovery (FR-146.6)
-    # ------------------------------------------------------------------
-    @staticmethod
-    async def _bounded_wait_for_provider_recovery(
-        health_memory: ProviderHealthMemory,
-        *,
-        failover_profiles: list[str],
-        max_wait_seconds: float,
-        poll_interval_seconds: float = 2.0,
-        row_id: str | None = None,
-        check_id: str | None = None,
-    ) -> str | None:
-        """Wait bounded time for a provider to recover from cooldown/exhaustion (AC-146.6.1).
-        Uses ``nearest_recovery_seconds()`` from the shared provider health state
-        to estimate the earliest possible recovery.  The wait is capped by
-        ``max_wait_seconds`` (from ``VDS_AUDIT_QUOTA_WAIT_MAX_SECONDS``).
-        Returns:
-            A recovered profile name, or ``None`` if no provider became eligible
-            within the cap.
-        Telemetry:
-            Emits structured log events for wait-start, provider-recovered, and
-            wait-expired boundaries so operators can diagnose quota timing.
-        """
-        import time as _time
-        if max_wait_seconds <= 0 or not failover_profiles:
-            return None
-        # Check if any provider is only temporarily blocked (not auth-blocked).
-        has_temporary_blockage = any(
-            not health_memory.get_state(p).is_auth_blocked() and health_memory.get_state(p).should_skip_dispatch()
-            for p in failover_profiles
-        )
-        if not has_temporary_blockage:
-            return None
-        # Determine how long to wait: min of configured cap and nearest recovery hint.
-        recovery_hint = health_memory.nearest_recovery_seconds()
-        if recovery_hint is not None and recovery_hint > 0:
-            wait_target = min(max_wait_seconds, recovery_hint + 1.0)
-        else:
-            wait_target = max_wait_seconds
-        logger.info(
-            "quota_bounded_wait_started",
-            row_id=row_id,
-            check_id=check_id,
-            max_wait_seconds=max_wait_seconds,
-            recovery_hint_seconds=recovery_hint,
-            wait_target_seconds=wait_target,
-            failover_profiles=failover_profiles,
-        )
-        deadline = _time.monotonic() + wait_target
-        recovered_profile: str | None = None
-        while _time.monotonic() < deadline:
-            remaining = deadline - _time.monotonic()
-            if remaining <= 0:
-                break
-            sleep_seconds = min(poll_interval_seconds, remaining)
-            await asyncio.sleep(sleep_seconds)
-            # Check if any provider has become eligible.
-            for candidate in failover_profiles:
-                state = health_memory.get_state(candidate)
-                if not state.is_auth_blocked() and not state.should_skip_dispatch():
-                    recovered_profile = candidate
-                    break
-            if recovered_profile is not None:
-                break
-        if recovered_profile is not None:
-            logger.info(
-                "quota_bounded_wait_provider_recovered",
-                row_id=row_id,
-                check_id=check_id,
-                recovered_provider=recovered_profile,
-                elapsed_seconds=wait_target - (deadline - _time.monotonic()),
-            )
-        else:
-            logger.warning(
-                "quota_bounded_wait_expired",
-                row_id=row_id,
-                check_id=check_id,
-                max_wait_seconds=max_wait_seconds,
-                failover_profiles=failover_profiles,
-            )
-        return recovered_profile
-    @staticmethod
-    def _annotate_quota_exhaustion(
-        result: RowEvaluationResult,
-        *,
-        health_memory: ProviderHealthMemory,
-        failover_profiles: list[str],
-        wait_attempted: bool = False,
-        wait_expired: bool = False,
-    ) -> RowEvaluationResult:
-        """Annotate a row result with quota-exhaustion metadata (AC-146.6.4).
-        Mutates ``retry_metadata`` in-place and returns the same result.
-        """
-        reason = _all_providers_exhausted_reason(health_memory, failover_profiles=failover_profiles)
-        if reason is None:
-            return result
-        metadata = dict(result.retry_metadata or {})
-        metadata["quota_all_providers_exhausted"] = True
-        metadata["quota_exhaustion_reason"] = reason
-        metadata["quota_exhaustion_wait_attempted"] = wait_attempted
-        metadata["quota_exhaustion_wait_expired"] = wait_expired
-        # Attach per-provider skip reasons for telemetry clarity (AC-146.7.2).
-        provider_skip_reasons: dict[str, str] = {}
-        for profile in failover_profiles:
-            state = health_memory.get_state(profile)
-            if state.is_auth_blocked():
-                provider_skip_reasons[profile] = "auth_blocked"
-            elif state.is_in_cooldown():
-                provider_skip_reasons[profile] = "cooldown"
-            elif state.rate_limit.request_capacity_known_empty():
-                provider_skip_reasons[profile] = "request_quota_empty"
-            elif state.rate_limit.token_capacity_known_empty():
-                provider_skip_reasons[profile] = "token_quota_empty"
-            elif state.should_skip_dispatch():
-                provider_skip_reasons[profile] = "degraded"
-            else:
-                provider_skip_reasons[profile] = "eligible"
-        metadata["quota_provider_skip_reasons"] = provider_skip_reasons
-        result.retry_metadata = metadata
-        return result
-    @staticmethod
-    def _build_match_requirement_text(check: AuditCheck) -> str:
-        """Augment matcher query with canonical checklist hints."""
-        base = (check.description or check.name or "").strip()
-        config = check.check_config if isinstance(check.check_config, dict) else {}
-        extras = [
-            str(config.get("confluence_rule") or "").strip(),
-            str(config.get("confluence_main") or "").strip(),
-            str(config.get("confluence_eval_guide") or "").strip(),
-            str(config.get("confluence_evidence_hint") or "").strip(),
-            str(config.get("evidence_required") or "").strip(),
-        ]
-        suffix = " ".join(part for part in extras if part)
-        if base and suffix:
-            return f"{base} {suffix}"
-        return base or suffix
-    @staticmethod
-    def _row_requires_app_config_only(check: AuditCheck) -> bool:
-        """Return True when row guidance explicitly restricts retrieval to app-config."""
-        config = check.check_config if isinstance(check.check_config, dict) else {}
-        return requires_app_config_only(
-            check.description or check.name or "",
-            config.get("confluence_rule"),
-            config.get("confluence_main"),
-            config.get("confluence_eval_guide"),
-            config.get("confluence_evidence_hint"),
-            config.get("detailed_guidance"),
-            config.get("confluence_notes"),
-            config.get("evidence_required"),
-        )
-    @staticmethod
-    def _empty_evidence_context(*, row_id: str, requirement_text: str) -> RowEvidenceContext:
-        """Return an empty evidence context for rows that must bypass matcher retrieval."""
-        return RowEvidenceContext(
-            row_id=row_id,
-            requirement_text=requirement_text,
-            matched_docs=[],
-            matched_code=[],
-            evidence_refs=[],
-            total_chars=0,
-            truncated=False,
-            retrieval_trace=None,
-        )
-    @staticmethod
-    def _clone_evidence_context(
-        *, context: RowEvidenceContext, row_id: str, requirement_text: str
-    ) -> RowEvidenceContext:
-        """Clone matcher context so cached retrieval can be reused safely per row."""
-        return RowEvidenceContext(
-            row_id=row_id,
-            requirement_text=requirement_text,
-            matched_docs=copy.deepcopy(getattr(context, "matched_docs", []) or []),
-            matched_code=copy.deepcopy(getattr(context, "matched_code", []) or []),
-            evidence_refs=list(getattr(context, "evidence_refs", []) or []),
-            total_chars=int(getattr(context, "total_chars", 0) or 0),
-            truncated=bool(getattr(context, "truncated", False)),
-            retrieval_trace=copy.deepcopy(getattr(context, "retrieval_trace", None)),
-        )
-    async def _get_or_match_evidence_context(
-        self,
-        *,
-        row_id: str,
-        requirement_text: str,
-        section_id: str | None,
-        app_config_only: bool,
-    ) -> RowEvidenceContext:
-        """Return row evidence context with in-batch dedup for parallel rows."""
-        if app_config_only:
-            return self._empty_evidence_context(row_id=row_id, requirement_text=requirement_text)
-        cache_key = (
-            requirement_text,
-            str(section_id) if section_id is not None else None,
-            str(getattr(self.evidence_matcher, "retrieval_mode", "unknown") or "unknown"),
-            False,
-        )
-        async with self._batch_match_lock:
-            cached = self._batch_match_cache.get(cache_key)
-            if cached is not None:
-                return self._clone_evidence_context(
-                    context=cached,
-                    row_id=row_id,
-                    requirement_text=requirement_text,
-                )
-            inflight = self._batch_match_inflight.get(cache_key)
-            if inflight is None:
-                loop = asyncio.get_running_loop()
-                inflight = loop.create_future()
-                self._batch_match_inflight[cache_key] = inflight
-                owner = True
-            else:
-                owner = False
-        if owner:
-            try:
-                matched = await asyncio.to_thread(
-                    self.evidence_matcher.match_row,
-                    row_id=row_id,
-                    requirement_text=requirement_text,
-                    section_id=section_id,
-                )
-                canonical = self._clone_evidence_context(
-                    context=matched,
-                    row_id="__shared__",
-                    requirement_text=requirement_text,
-                )
-                async with self._batch_match_lock:
-                    self._batch_match_cache[cache_key] = canonical
-                    pending = self._batch_match_inflight.pop(cache_key, None)
-                    if pending is not None and not pending.done():
-                        pending.set_result(canonical)
-                return self._clone_evidence_context(
-                    context=canonical,
-                    row_id=row_id,
-                    requirement_text=requirement_text,
-                )
-            except Exception as exc:
-                async with self._batch_match_lock:
-                    pending = self._batch_match_inflight.pop(cache_key, None)
-                    if pending is not None and not pending.done():
-                        pending.set_exception(exc)
-                raise
-        shared_context = await inflight
-        return self._clone_evidence_context(
-            context=shared_context,
-            row_id=row_id,
-            requirement_text=requirement_text,
-        )
-    def _effective_row_concurrency(self) -> int:
-        """Return current row concurrency after optional adaptive adjustment."""
-        if not self.config.adaptive_row_concurrency:
-            return max(1, int(self.config.row_concurrency or 1))
-        configured = max(1, int(self.config.row_concurrency or 1))
-        lower = max(1, int(self.config.adaptive_row_concurrency_min or 1))
-        upper = max(lower, int(self.config.adaptive_row_concurrency_max or configured))
-        return max(lower, min(upper, int(self._adaptive_row_concurrency_current or configured)))
-    def _repo_row_distribution_profiles(self) -> list[str]:
-        runtime_context = self.config.run_context if isinstance(self.config.run_context, dict) else {}
-        raw_profiles = runtime_context.get("repo_distribution_profiles")
-        if not isinstance(raw_profiles, (list, tuple)):
-            return []
-        ordered: list[str] = []
-        seen: set[str] = set()
-        for item in raw_profiles:
-            normalized = str(item or "").strip()
-            if not normalized or normalized in seen:
-                continue
-            seen.add(normalized)
-            ordered.append(normalized)
-        return ordered
-    def _repo_row_distribution_enabled(self) -> bool:
-        runtime_context = self.config.run_context if isinstance(self.config.run_context, dict) else {}
-        mode = str(runtime_context.get("repo_profile_execution_mode") or "single").strip().lower()
-        return mode == "distributed" and len(self._repo_row_distribution_profiles()) > 1
-    def _assigned_repo_profile_for_row(self, row_index: int) -> str | None:
-        profiles = self._repo_row_distribution_profiles()
-        if not self._repo_row_distribution_enabled() or not profiles:
-            return None
-        return profiles[row_index % len(profiles)]
-    def _update_adaptive_row_concurrency(self, batch_result: BatchResult) -> None:
-        """Adjust opt-in row concurrency between batches based on observed batch health."""
-        if not self.config.adaptive_row_concurrency:
-            return
-        current = self._effective_row_concurrency()
-        lower = max(1, int(self.config.adaptive_row_concurrency_min or 1))
-        upper = max(lower, int(self.config.adaptive_row_concurrency_max or current))
-        increase_threshold_ms = max(1, int(self.config.adaptive_row_concurrency_increase_threshold_ms or 1000))
-        decrease_threshold_ms = max(
-            increase_threshold_ms, int(self.config.adaptive_row_concurrency_decrease_threshold_ms or 5000)
-        )
-        next_value = current
-        reason = None
-        if batch_result.timed_out or batch_result.error_count > 0 or batch_result.duration_ms >= decrease_threshold_ms:
-            if current > lower:
-                next_value = current - 1
-                reason = "batch_pressure"
-        elif (
-            batch_result.duration_ms <= increase_threshold_ms
-            and batch_result.error_count == 0
-            and not batch_result.timed_out
-        ) and current < upper:
-            next_value = current + 1
-            reason = "batch_healthy"
-        self._adaptive_row_concurrency_current = next_value
-        if reason is not None and next_value != current:
-            logger.info(
-                "adaptive_row_concurrency_adjusted",
-                previous=current,
-                current=next_value,
-                reason=reason,
-                batch_index=batch_result.batch_index,
-                batch_duration_ms=batch_result.duration_ms,
-                batch_error_count=batch_result.error_count,
-                batch_timed_out=batch_result.timed_out,
-                **self.config.run_context,
-            )
-    def _is_row_targeted(self, row_index: int, check_id: str) -> bool:
-        """Check if a row is targeted for evaluation (FR-25).
-        Args:
-            row_index: 0-based row index.
-            check_id: The check ID for this row.
-        Returns:
-            True if the row should be evaluated, False if it should be skipped.
-            Returns True if no target selection is configured (process all rows).
-        """
-        if self.config.target_selection is None:
-            return True
-        # Check if row index is in target set
-        if self.config.target_selection.matches_row_index(row_index):
-            return True
-        # Check if check ID is in target set (for CHECK_IDS type)
-        return bool(self.config.target_selection.matches_check_id(check_id.upper()))
-    def _batch_has_targeted_rows(self, start_idx: int, end_idx: int) -> bool:
-        """Check if a batch contains any targeted rows (FR-25.2).
-        Args:
-            start_idx: Start index of the batch (inclusive).
-            end_idx: End index of the batch (exclusive).
-        Returns:
-            True if the batch has at least one targeted row.
-            Returns True if no target selection is configured.
-        """
-        if self.config.target_selection is None:
-            return True
-        for idx in range(start_idx, end_idx):
-            _, check = self._checks[idx]
-            if self._is_row_targeted(idx, check.id):
-                return True
-        return False
-    def _should_emit_row_progress_callback(
-        self,
-        current_result: RowEvaluationResult | None,
-        outcome: str,
-    ) -> bool:
-        """Suppress noisy row-level progress events for skipped non-target rows."""
-        return not (outcome == "skipped" and current_result is None and self.config.target_selection is not None)
-    @staticmethod
-    def _build_inflight_row_progress_result(check: AuditCheck, row_id: str) -> RowEvaluationResult:
-        """Create an in-memory placeholder used only for row-start progress telemetry."""
-        return RowEvaluationResult(
-            row_id=row_id,
-            check_id=check.id,
-            status=RowStatus.NA,
-            score=0.0,
-            score_breakdown=ScoreBreakdown.compute(0.0),
-            reason="Đang chờ đánh giá theo tiến độ batch.",
-            finding="Kết quả sẽ được cập nhật khi batch chứa hàng này hoàn tất.",
-            evidence_anchors=[],
-            provenance=RowProvenance(
-                row_llm_mode="selective",
-                protocol=None,
-                model=None,
-                template_hash="",
-                rubric_version="1",
-                evidence_hash="",
-                evaluated_at=datetime.now(UTC),
-            ),
-        )
-    def _should_evaluate_row(
-        self,
-        row_index: int,
-        check_id: str,
-        row_id: str,
-        checkpoint: BatchCheckpoint,
-    ) -> bool:
-        """Determine if a row should be evaluated (FR-25.3, FR-25.4, FR-185).
-        Considers:
-        - Whether the row is scoped N/A by repo-type policy (FR-185)
-        - Whether the row is targeted
-        - Whether the row has prior checkpoint results
-        - Whether force_refresh_targets is enabled
-        Args:
-            row_index: 0-based row index.
-            check_id: The check ID for this row.
-            row_id: The unique row identifier.
-            checkpoint: Current checkpoint state.
-        Returns:
-            True if the row should be evaluated, False if it should be skipped
-            or checkpoint result should be used.
-        """
-        # FR-185: Skip rows marked N/A by repo-type scoping policy entirely.
-        # This prevents evidence retrieval, prompt construction, and LLM calls
-        # for structurally inapplicable rows (e.g., DB rows for frontend repos).
-        if row_index in self.config.scoped_na_indices:
-            return False
-        is_targeted = self._is_row_targeted(row_index, check_id)
-        has_checkpoint_result = row_id in checkpoint.results_by_row_id
-        if not is_targeted:
-            # Non-targeted rows: never evaluate, just preserve checkpoint if available
-            return False
-        # Targeted rows
-        if self.config.force_refresh_targets:
-            # FR-25.4: Re-evaluate targeted rows even if checkpointed
-            return True
-        # Only evaluate if no checkpoint result exists
-        return not has_checkpoint_result
-    async def process_all(
-        self,
-        thread_id: str,
-        project_profile: dict[str, Any] | None = None,
-        progress_callback: BatchProgressCallback | None = None,
-    ) -> list[RowEvaluationResult]:
-        """Process all rows in batches with checkpoint/resume (FR-3.3) and targeting (FR-25)."""
-        # FR-145.7: Reset resilience accumulator so each run starts fresh.
-        self._resilience_stats_accumulator = []
-        self.last_resilience_summary = None
-        total_rows = len(self._checks)
-        # Load checkpoint if resuming
-        checkpoint_path = self._get_checkpoint_path(thread_id)
-        checkpoint: BatchCheckpoint | None = None
-        if self.config.resume_from_checkpoint:
-            checkpoint = BatchCheckpoint.load(checkpoint_path)
-            if checkpoint:
-                # Validate checkpoint matches current template/evidence
-                current_evidence_hash = self.evaluator.evidence_hash or ""
-                if (
-                    checkpoint.template_hash != self.evaluator.template_hash
-                    or checkpoint.evidence_hash != current_evidence_hash
-                ):
-                    logger.warning(
-                        "checkpoint_hash_mismatch_starting_fresh",
-                        checkpoint_template=checkpoint.template_hash,
-                        current_template=self.evaluator.template_hash,
-                    )
-                    checkpoint = None
-                else:
-                    logger.info(
-                        "resuming_from_checkpoint",
-                        completed_rows=len(checkpoint.results_by_row_id),
-                        total_rows=total_rows,
-                    )
-        if not checkpoint:
-            checkpoint = BatchCheckpoint(
-                thread_id=thread_id,
-                total_rows=total_rows,
-                template_hash=self.evaluator.template_hash,
-                evidence_hash=self.evaluator.evidence_hash or "",
-                completed_batches=[],
-                results_by_row_id={},
-            )
-        # FR-25: Log targeting info if configured
-        if self.config.target_selection:
-            target_count = len(self.config.target_selection.row_indices)
-            logger.info(
-                "targeted_execution_mode",
-                target_type=self.config.target_selection.target_type.value,
-                target_count=target_count,
-                total_rows=total_rows,
-                force_refresh=self.config.force_refresh_targets,
-            )
-        # FR-3.3: Seed results from checkpoint for complete result set on resume
-        # FR-25.3: For non-targeted rows, preserve checkpoint state
-        all_results: list[RowEvaluationResult] = []
-        checkpointed_row_ids: set[str] = set()
-        for row_data in checkpoint.results_by_row_id.values():
-            try:
-                result = BatchCheckpoint.deserialize_row_result(row_data)
-                checkpointed_row_ids.add(result.row_id)
-                all_results.append(result)
-            except Exception as e:
-                logger.warning("failed_to_deserialize_checkpoint_row", error=str(e))
-        # Calculate batches
-        batch_count = (total_rows + self.config.batch_size - 1) // self.config.batch_size
-        batches_skipped = 0
-        batches_processed = 0
-        async def _emit_row_progress(
-            batch_idx: int,
-            start_idx: int,
-            end_idx: int,
-            batch_results_so_far: list[RowEvaluationResult],
-            success_count_so_far: int,
-            error_count_so_far: int,
-            skipped_count_so_far: int,
-            current_result: RowEvaluationResult | None,
-            outcome: str,
-        ) -> None:
-            # Non-targeted rows without checkpoint state return no concrete result.
-            # Suppress row-level telemetry for those no-op skips to avoid null-id noise.
-            if outcome == "skipped" and current_result is None:
-                return
-            preview_results = list(all_results)
-            if outcome != "started":
-                for result in batch_results_so_far:
-                    preview_results = [res for res in preview_results if res.row_id != result.row_id]
-                    preview_results.append(result)
-                    checkpoint.results_by_row_id[result.row_id] = BatchCheckpoint.serialize_row_result(result)
-                checkpoint.last_updated = datetime.now(UTC)
-                checkpoint.save(checkpoint_path)
-            completed_rows = len(preview_results)
-            target_row_count = (
-                len(self.config.target_selection.row_indices) if self.config.target_selection is not None else None
-            )
-            targeted_completed_rows = (
-                min(completed_rows, target_row_count) if target_row_count is not None else completed_rows
-            )
-            completion_scope = "targeted" if target_row_count is not None else "template"
-            completion_pct = round((completed_rows / total_rows) * 100.0 if total_rows > 0 else 100.0, 2)
-            targeted_completion_pct = round(
-                (targeted_completed_rows / target_row_count) * 100.0 if target_row_count else completion_pct,
-                2,
-            )
-            progress = BatchProgress(
-                batch_index=batch_idx,
-                batch_count=batch_count,
-                start_row=start_idx,
-                end_row=end_idx,
-                total_rows=total_rows,
-                completed_rows=completed_rows,
-                processed_batches=batches_processed,
-                skipped_batches=batches_skipped,
-                success_count=success_count_so_far,
-                error_count=error_count_so_far,
-                skipped_count=skipped_count_so_far,
-                duration_ms=0,
-                timed_out=False,
-                emission_kind="row",
-                current_row_id=getattr(current_result, "row_id", None),
-                current_check_id=getattr(current_result, "check_id", None),
-            )
-            self.last_progress = progress
-            progress_context = dict(self.config.run_context or {})
-            if current_result is not None:
-                provenance = current_result.provenance
-                retrieval_trace = (
-                    current_result.retrieval_trace if isinstance(current_result.retrieval_trace, dict) else {}
-                )
-                row_distribution_profile = str(retrieval_trace.get("row_distribution_profile") or "").strip() or None
-                original_provider = (
-                    str(getattr(provenance, "original_provider", "") or "").strip() if provenance is not None else ""
-                ) or None
-                final_provider = (
-                    str(getattr(provenance, "final_provider", "") or "").strip() if provenance is not None else ""
-                ) or None
-                executed_profile = row_distribution_profile or final_provider or original_provider
-                if executed_profile:
-                    progress_context["executed_profile"] = executed_profile
-                    progress_context["active_profile"] = executed_profile
-                if original_provider:
-                    progress_context["origin_profile"] = original_provider
-                if row_distribution_profile:
-                    progress_context["row_distribution_profile"] = row_distribution_profile
-            logger.info(
-                "row_progress",
-                batch=f"{progress.batch_index + 1}/{progress.batch_count}",
-                completed_rows=progress.completed_rows,
-                total_rows=progress.total_rows,
-                completion_pct=completion_pct,
-                completion_scope=completion_scope,
-                targeted_completed_rows=targeted_completed_rows if target_row_count is not None else None,
-                targeted_total_rows=target_row_count,
-                targeted_completion_pct=targeted_completion_pct if target_row_count is not None else None,
-                processed_batches=progress.processed_batches,
-                skipped_batches=progress.skipped_batches,
-                row_id=progress.current_row_id,
-                check_id=progress.current_check_id,
-                row_outcome=outcome,
-                thread_id=thread_id,
-                **progress_context,
-            )
-            if progress_callback is not None:
-                try:
-                    maybe_awaitable = progress_callback(progress, list(preview_results))
-                    if inspect.isawaitable(maybe_awaitable):
-                        await maybe_awaitable
-                except Exception as e:
-                    logger.warning(
-                        "row_progress_callback_failed",
-                        batch=progress.batch_index + 1,
-                        row_id=progress.current_row_id,
-                        error=str(e),
-                    )
-        for batch_idx in range(batch_count):
-            start_idx = batch_idx * self.config.batch_size
-            end_idx = min(start_idx + self.config.batch_size, total_rows)
-            # FR-25.2: Skip whole batches when they contain no targeted rows
-            if not self._batch_has_targeted_rows(start_idx, end_idx):
-                logger.debug(
-                    "skipping_batch_no_targets",
-                    batch=batch_idx,
-                    rows=f"{start_idx + 1}-{end_idx}",
-                )
-                batches_skipped += 1
-                continue
-            # Skip completed batches (only if not in targeted mode with force_refresh)
-            # In targeted mode with force_refresh, we need to re-process targeted rows
-            if (batch_idx in checkpoint.completed_batches and self.config.target_selection is None) or not self.config.force_refresh_targets:
-                    logger.debug("skipping_completed_batch", batch=batch_idx)
-                    continue
-                # In force_refresh mode, we still process the batch but only targeted rows
-            # FR-3: Progress logging
-            logger.info(
-                "processing_batch",
-                batch=f"{batch_idx + 1}/{batch_count}",
-                rows=f"{start_idx + 1}-{end_idx}",
-                targeted=self.config.target_selection is not None,
-            )
-            # Process batch with timeout (FR-3.2)
-            try:
-                batch_result = await self._process_batch(
-                    batch_idx=batch_idx,
-                    start_idx=start_idx,
-                    end_idx=end_idx,
-                    project_profile=project_profile,
-                    checkpoint=checkpoint,
-                    thread_id=thread_id,
-                    row_progress_callback=_emit_row_progress,
-                )
-            except BatchBudgetExceededError as exc:
-                for r in exc.partial_results:
-                    all_results = [res for res in all_results if res.row_id != r.row_id]
-                    all_results.append(r)
-                    checkpoint.results_by_row_id[r.row_id] = BatchCheckpoint.serialize_row_result(r)
-                checkpoint.last_updated = datetime.now(UTC)
-                checkpoint.save(checkpoint_path)
-                exc.context["checkpoint_path"] = str(checkpoint_path)
-                exc.context["completed_rows_persisted"] = len(all_results)
-                exc.partial_results = list(all_results)
-                logger.exception("batch_budget_exceeded", **exc.context)
-                raise
-            # FR-25.3: Merge results - update targeted rows, preserve non-targeted
-            for r in batch_result.results:
-                # Remove old result if exists (for force_refresh case)
-                all_results = [res for res in all_results if res.row_id != r.row_id]
-                all_results.append(r)
-                # Update checkpoint
-                checkpoint.results_by_row_id[r.row_id] = BatchCheckpoint.serialize_row_result(r)
-            # FR-3.3: Update checkpoint
-            if batch_idx not in checkpoint.completed_batches:
-                checkpoint.completed_batches.append(batch_idx)
-            checkpoint.last_updated = datetime.now(UTC)
-            checkpoint.save(checkpoint_path)
-            batches_processed += 1
-            progress = BatchProgress(
-                batch_index=batch_idx,
-                batch_count=batch_count,
-                start_row=start_idx,
-                end_row=end_idx,
-                total_rows=total_rows,
-                completed_rows=len(all_results),
-                processed_batches=batches_processed,
-                skipped_batches=batches_skipped,
-                success_count=batch_result.success_count,
-                error_count=batch_result.error_count,
-                skipped_count=batch_result.skipped_count,
-                duration_ms=batch_result.duration_ms,
-                timed_out=batch_result.timed_out,
-            )
-            self.last_progress = progress
-            target_row_count = (
-                len(self.config.target_selection.row_indices) if self.config.target_selection is not None else None
-            )
-            targeted_completed_rows = (
-                min(progress.completed_rows, target_row_count)
-                if target_row_count is not None
-                else progress.completed_rows
-            )
-            completion_scope = "targeted" if target_row_count is not None else "template"
-            targeted_completion_pct = round(
-                (targeted_completed_rows / target_row_count) * 100.0 if target_row_count else 100.0,
-                2,
-            )
-            logger.info(
-                "batch_progress",
-                batch=f"{progress.batch_index + 1}/{progress.batch_count}",
-                completed_rows=progress.completed_rows,
-                total_rows=progress.total_rows,
-                completion_pct=round(
-                    (progress.completed_rows / progress.total_rows) * 100.0 if progress.total_rows > 0 else 100.0,
-                    2,
-                ),
-                completion_scope=completion_scope,
-                targeted_completed_rows=targeted_completed_rows if target_row_count is not None else None,
-                targeted_total_rows=target_row_count,
-                targeted_completion_pct=targeted_completion_pct if target_row_count is not None else None,
-                processed_batches=progress.processed_batches,
-                skipped_batches=progress.skipped_batches,
-                thread_id=thread_id,
-                **self.config.run_context,
-            )
-            if progress_callback is not None:
-                try:
-                    maybe_awaitable = progress_callback(progress, list(all_results))
-                    if inspect.isawaitable(maybe_awaitable):
-                        await maybe_awaitable
-                except Exception as e:
-                    logger.warning(
-                        "batch_progress_callback_failed",
-                        batch=progress.batch_index + 1,
-                        error=str(e),
-                    )
-            logger.info(
-                "batch_complete",
-                batch=batch_idx + 1,
-                success=batch_result.success_count,
-                errors=batch_result.error_count,
-                skipped=batch_result.skipped_count,
-                duration_ms=batch_result.duration_ms,
-                timed_out=batch_result.timed_out,
-            )
-        # FR-25: Log summary
-        if self.config.target_selection:
-            target_row_count = len(self.config.target_selection.row_indices)
-            logger.info(
-                "targeted_execution_complete",
-                batches_processed=batches_processed,
-                batches_skipped=batches_skipped,
-                total_results=len(all_results),
-                completion_scope="targeted",
-                targeted_completed_rows=len(all_results),
-                targeted_total_rows=target_row_count,
-                targeted_completion_pct=round(
-                    (len(all_results) / target_row_count) * 100.0 if target_row_count else 100.0, 2
-                ),
-                thread_id=thread_id,
-                **self.config.run_context,
-            )
-        self.last_execution_summary = {
-            "processed_batches": batches_processed,
-            "skipped_batches": batches_skipped,
-            "total_results": len(all_results),
-            "batch_count": batch_count,
-        }
-        # Phase 166: Accumulate prompt-level retry/failover telemetry from row results.
-        _prompt_stats = _ResilienceRetryStats()
-        for row_result in all_results:
-            tel = self._extract_prompt_level_telemetry(row_result)
-            _prompt_stats.prompt_retry_attempts += tel["prompt_retry_attempts"]
-            _prompt_stats.prompt_retry_recoveries += tel["prompt_retry_recoveries"]
-            _prompt_stats.prompt_failover_attempts += tel["prompt_failover_attempts"]
-            _prompt_stats.prompt_failover_recoveries += tel["prompt_failover_recoveries"]
-            _prompt_stats.prompt_failover_exhausted += tel["prompt_failover_exhausted"]
-            _prompt_stats.synthesis_fallback_count += tel["synthesis_fallback_count"]
-        self._resilience_stats_accumulator.append(_prompt_stats)
-        # FR-145.7: Build and emit full resilience_summary at batch completion.
-        # Always build, regardless of whether retries ran — callers rely on the dict.
-        final_error_count = sum(1 for r in all_results if r.status == RowStatus.ERROR)
-        # error_rows_before_retry: sum of "before" snapshots across all batch stats.
-        error_rows_before_retry = sum(s.error_rows_before_retry for s in self._resilience_stats_accumulator)
-        retry_attempts = sum(s.retry_attempts for s in self._resilience_stats_accumulator)
-        retry_successes = sum(s.retry_successes for s in self._resilience_stats_accumulator)
-        retry_budget_used = sum(s.budget_used for s in self._resilience_stats_accumulator)
-        retry_budget_total = sum(s.budget_total for s in self._resilience_stats_accumulator)
-        failover_hops_total = sum(s.failover_hops for s in self._resilience_stats_accumulator)
-        quota_exhausted_rows = sum(s.quota_exhausted_rows for s in self._resilience_stats_accumulator)
-        quota_wait_attempted_rows = sum(s.quota_wait_attempted_rows for s in self._resilience_stats_accumulator)
-        quota_wait_recovered_rows = sum(s.quota_wait_recovered_rows for s in self._resilience_stats_accumulator)
-        # Phase 166: Aggregate prompt-level retry/failover telemetry.
-        prompt_retry_attempts = sum(s.prompt_retry_attempts for s in self._resilience_stats_accumulator)
-        prompt_retry_recoveries = sum(s.prompt_retry_recoveries for s in self._resilience_stats_accumulator)
-        prompt_failover_attempts = sum(s.prompt_failover_attempts for s in self._resilience_stats_accumulator)
-        prompt_failover_recoveries = sum(s.prompt_failover_recoveries for s in self._resilience_stats_accumulator)
-        prompt_failover_exhausted = sum(s.prompt_failover_exhausted for s in self._resilience_stats_accumulator)
-        synthesis_fallback_count = sum(s.synthesis_fallback_count for s in self._resilience_stats_accumulator)
-        # Deduplicate providers used across batches.
-        providers_used: list[str] = []
-        for s in self._resilience_stats_accumulator:
-            for p in s.providers_used:
-                if p not in providers_used:
-                    providers_used.append(p)
-        # Build provider_health_summary from shared health memory if available.
-        health_memory: ProviderHealthMemory | None = getattr(self, "_batch_health_memory", None)
-        provider_health_summary: dict[str, dict[str, Any]] = {}
-        if health_memory is not None:
-            all_providers = (
-                set(health_memory.consecutive_failures)
-                | set(health_memory.total_retries)
-                | set(health_memory.cooldown_until)
-                | set(health_memory.states)
-            )
-            for provider in sorted(all_providers):
-                state = health_memory.get_state(provider)
-                provider_health_summary[provider] = {
-                    "failures": health_memory.consecutive_failures.get(provider, 0),
-                    "cooldowns": 1 if health_memory.is_in_cooldown(provider) else 0,
-                    "retries": health_memory.total_retries.get(provider, 0),
-                    "quota_status": state.quota_status.value,
-                    "last_reason_code": state.last_reason_code,
-                    "retry_after_seconds": state.rate_limit.retry_after_seconds,
-                    "remaining_requests": state.rate_limit.remaining_requests,
-                    "remaining_tokens": state.rate_limit.remaining_tokens,
-                }
-        self.last_resilience_summary = {
-            "total_rows": total_rows,
-            "error_rows_before_retry": error_rows_before_retry,
-            "error_rows_after_retry": final_error_count,
-            "retry_attempts": retry_attempts,
-            "retry_successes": retry_successes,
-            "retry_budget_used": retry_budget_used,
-            "retry_budget_total": retry_budget_total,
-            "failover_hops_total": failover_hops_total,
-            "providers_used": providers_used,
-            "provider_health_summary": provider_health_summary,
-            # Phase 146 quota-exhaustion telemetry (AC-146.7.3)
-            "quota_exhausted_rows": quota_exhausted_rows,
-            "quota_wait_attempted_rows": quota_wait_attempted_rows,
-            "quota_wait_recovered_rows": quota_wait_recovered_rows,
-            # Phase 166: Prompt-level retry/failover telemetry
-            "prompt_retry_attempts": prompt_retry_attempts,
-            "prompt_retry_recoveries": prompt_retry_recoveries,
-            "prompt_failover_attempts": prompt_failover_attempts,
-            "prompt_failover_recoveries": prompt_failover_recoveries,
-            "prompt_failover_exhausted": prompt_failover_exhausted,
-            "synthesis_fallback_count": synthesis_fallback_count,
-        }
-        logger.info(
-            "resilience_summary",
-            **self.last_resilience_summary,
-            thread_id=thread_id,
-        )
-        return all_results
-    async def _process_batch(
-        self,
-        batch_idx: int,
-        start_idx: int,
-        end_idx: int,
-        project_profile: dict[str, Any] | None,
-        checkpoint: BatchCheckpoint,
-        thread_id: str,
-        row_progress_callback: RowProgressCallback | None = None,
-    ) -> BatchResult:
-        """Process a single batch of rows with per-row timeout (FR-3.2) and targeting (FR-25).
-        In mixed batches (some targeted, some not), only targeted rows are evaluated.
-        Non-targeted rows preserve their checkpoint state (FR-25.3).
-        When row_concurrency > 1 (FR-188), rows are dispatched in parallel via
-        asyncio.gather with a semaphore-bounded concurrency limit.
-        """
-        self._batch_match_cache = {}
-        self._batch_match_inflight = {}
-        inline_batch_stats = _ResilienceRetryStats()
-        # FR-145.4 (AC-145.4.1): Create one shared ProviderHealthMemory per batch run.
-        # Stored as an instance attribute so _process_single_row (parallel path) and
-        # the serial loop below both share the same instance without extra arg threading.
-        self._batch_health_memory = ProviderHealthMemory()
-        await self._seed_batch_health_memory_from_preflight()
-        # FR-145.1 (AC-145.1.1): Resolve default failover profiles for this batch.
-        # When the operator has not explicitly configured row_failover_profiles, auto-derive
-        # from all configured runtime profiles minus the active profile.
-        import os as _os
-        _failover_env = _os.getenv("VDS_AUDIT_LLM__ROW_FAILOVER_PROFILES")
-        if _failover_env is None:
-            _active = str(_os.getenv("VDS_AUDIT_ACTIVE_PROFILE") or "").strip() or None
-            self._batch_failover_profiles: list[str] = resolve_default_failover_profiles(_active)
-            if self._batch_failover_profiles:
-                logger.info(
-                    "provider_failover_auto_enabled",
-                    active_profile=_active,
-                    failover_profiles=self._batch_failover_profiles,
-                    source="auto_derived",
-                )
-        else:
-            import json as _json
-            try:
-                _raw = _json.loads(_failover_env)
-                self._batch_failover_profiles = (
-                    [str(p).strip() for p in _raw if str(p).strip()] if isinstance(_raw, list) else []
-                )
-            except Exception:
-                self._batch_failover_profiles = [p.strip() for p in _failover_env.split(",") if p.strip()]
-        # FR-188: Dispatch to parallel path when concurrency > 1
-        if self.config.row_concurrency > 1:
-            return await self._process_batch_parallel(
-                batch_idx=batch_idx,
-                start_idx=start_idx,
-                end_idx=end_idx,
-                project_profile=project_profile,
-                checkpoint=checkpoint,
-                thread_id=thread_id,
-                row_progress_callback=row_progress_callback,
-                batch_stats=inline_batch_stats,
-            )
-        import time
-        start_time = time.monotonic()
-        results: list[RowEvaluationResult] = []
-        success_count = 0
-        error_count = 0
-        skipped_count = 0
-        timed_out = False
-        next_row_index = start_idx
-        try:
-            effective_batch_timeout_ms = self.effective_batch_timeout_ms()
-            async with asyncio.timeout(effective_batch_timeout_ms / 1000):
-                for idx in range(start_idx, end_idx):
-                    next_row_index = idx
-                    section_id, check = self._checks[idx]
-                    row_id = f"{check.id}:row_{idx}"
-                    # FR-25.3, FR-25.4: Check if row should be evaluated
-                    if not self._should_evaluate_row(idx, check.id, row_id, checkpoint):
-                        # Preserve checkpoint state for non-targeted rows
-                        preserved_result: RowEvaluationResult | None = None
-                        if row_id in checkpoint.results_by_row_id:
-                            try:
-                                preserved_result = BatchCheckpoint.deserialize_row_result(
-                                    checkpoint.results_by_row_id[row_id]
-                                )
-                                results.append(preserved_result)
-                                logger.debug(
-                                    "preserving_checkpoint_result",
-                                    row_id=row_id,
-                                    row_index=idx,
-                                )
-                            except Exception as e:
-                                logger.warning(
-                                    "failed_to_preserve_checkpoint_result",
-                                    row_id=row_id,
-                                    error=str(e),
-                                )
-                        skipped_count += 1
-                        if row_progress_callback is not None and self._should_emit_row_progress_callback(
-                            preserved_result,
-                            "skipped",
-                        ):
-                            maybe_awaitable = row_progress_callback(
-                                batch_idx,
-                                start_idx,
-                                end_idx,
-                                list(results),
-                                success_count,
-                                error_count,
-                                skipped_count,
-                                preserved_result,
-                                "skipped",
-                            )
-                            if inspect.isawaitable(maybe_awaitable):
-                                await maybe_awaitable
-                        next_row_index = idx + 1
-                        continue
-                    if row_progress_callback is not None:
-                        inflight_result = self._build_inflight_row_progress_result(check, row_id)
-                        maybe_awaitable = row_progress_callback(
-                            batch_idx,
-                            start_idx,
-                            end_idx,
-                            list(results),
-                            success_count,
-                            error_count,
-                            skipped_count,
-                            inflight_result,
-                            "started",
-                        )
-                        if inspect.isawaitable(maybe_awaitable):
-                            await maybe_awaitable
-                    match_requirement_text = self._build_match_requirement_text(check)
-                    app_config_only = self._row_requires_app_config_only(check)
-                    # App-config-only rows must bypass generic matcher retrieval entirely.
-                    evidence_context = await self._get_or_match_evidence_context(
-                        row_id=row_id,
-                        requirement_text=match_requirement_text,
-                        section_id=section_id,
-                        app_config_only=app_config_only,
-                    )
-                    formatted_evidence = "" if app_config_only else evidence_context.format_for_llm()
-                    evidence_refs = evidence_context.evidence_refs  # FR-2.4
-                    try:
-                        result = await self._evaluate_row_with_timeout_mode(
-                            check=check,
-                            row_id=row_id,
-                            row_index=idx,
-                            evidence_context=formatted_evidence,
-                            evidence_refs=evidence_refs,
-                            project_profile=project_profile,
-                            thread_id=thread_id,
-                            matcher_retrieval_trace=getattr(evidence_context, "retrieval_trace", None),
-                            provider_health_memory=self._batch_health_memory,
-                        )
-                    except BudgetExceededError as exc:
-                        usage = exc.usage
-                        context = {
-                            "kind": "strict_budget_exceeded",
-                            "audit_error_key": "LLM_BUDGET_EXCEEDED",
-                            "audit_error_code": AUDIT_ERROR_CODES["LLM_BUDGET_EXCEEDED"].code,
-                            "batch_index": batch_idx + 1,
-                            "row_index": idx + 1,
-                            "row_id": row_id,
-                            "check_id": check.id,
-                            "scope": usage.scope.value,
-                            "scope_id": usage.scope_id,
-                            "used_dollars": usage.used_dollars,
-                            "limit_dollars": usage.limit_dollars,
-                            "status": usage.status.value,
-                            "error_context": getattr(exc, "error_context", None),
-                        }
-                        raise BatchBudgetExceededError(
-                            message=str(exc),
-                            context=context,
-                            partial_results=list(results),
-                            success_count=success_count,
-                            error_count=error_count,
-                            skipped_count=skipped_count,
-                        ) from exc
-                    finally:
-                        self._clear_row_runtime_progress(row_id)
-                    matcher_trace = _sanitize_retrieval_trace(getattr(evidence_context, "retrieval_trace", None))
-                    row_trace = _sanitize_retrieval_trace(result.retrieval_trace)
-                    merged_trace = _merge_retrieval_trace_payloads(matcher_trace, row_trace)
-                    if isinstance(merged_trace, dict):
-                        # Preserve matcher retrieval context (docs/code candidates, backend mode)
-                        # while allowing row-evaluator trace keys to take precedence.
-                        result.retrieval_trace = _sanitize_retrieval_trace(merged_trace)
-                    # TSK-145.13/14: Timeout-aware failover — attempt ONE failover
-                    # evaluation for STALL/NO_PROGRESS/CHURN_DETECTED timeouts before
-                    # finalising ERROR. TIMEOUT_WITH_GROUNDING rows are excluded.
-                    if result.status == RowStatus.ERROR:
-                        timeout_kind_in_trace = str((result.retrieval_trace or {}).get("timeout_kind") or "").strip()
-                        if self._is_timeout_failoverable(timeout_kind_in_trace) and self._batch_failover_profiles:
-                            result = await self._attempt_timeout_failover(
-                                timeout_result=result,
-                                check=check,
-                                row_index=idx,
-                                evidence_context=formatted_evidence,
-                                evidence_refs=evidence_refs,
-                                project_profile=project_profile,
-                                health_memory=self._batch_health_memory,
-                                failover_profiles=self._batch_failover_profiles,
-                                stats=inline_batch_stats,
-                            )
-                    # FR-7: Validate grounding if validator configured
-                    if self.grounding_validator:
-                        result = self.grounding_validator.validate(
-                            result,
-                            allowed_refs=evidence_refs,
-                            evidence_context=formatted_evidence,
-                        )
-                    results.append(result)
-                    if result.status == RowStatus.ERROR:
-                        error_count += 1
-                    else:
-                        success_count += 1
-                    if row_progress_callback is not None:
-                        maybe_awaitable = row_progress_callback(
-                            batch_idx,
-                            start_idx,
-                            end_idx,
-                            list(results),
-                            success_count,
-                            error_count,
-                            skipped_count,
-                            result,
-                            "error" if result.status == RowStatus.ERROR else "success",
-                        )
-                        if inspect.isawaitable(maybe_awaitable):
-                            await maybe_awaitable
-                    next_row_index = idx + 1
-        except TimeoutError:
-            logger.warning(
-                "batch_timeout",
-                batch=batch_idx,
-                completed=len(results),
-                expected=end_idx - start_idx,
-            )
-            timed_out = True
-            # Mark remaining rows as ERROR (FR-3.2: graceful degradation)
-            for idx in range(next_row_index, end_idx):
-                section_id, check = self._checks[idx]
-                row_id = f"{check.id}:row_{idx}"
-                # FR-25: Only create error for targeted rows, preserve checkpoint for others
-                if self._should_evaluate_row(idx, check.id, row_id, checkpoint):
-                    progress = self._normalize_timeout_progress_snapshot(self._get_row_runtime_progress(row_id))
-                    degraded_timeout_finalize = self._should_degrade_timeout_finalize(progress)
-                    timeout_result = self._create_timeout_result(
-                        row_id,
-                        check.id,
-                        "Batch timeout exceeded",
-                        degraded_timeout_finalize=degraded_timeout_finalize,
-                        progress=progress,
-                    )
-                    timeout_trace = {
-                        "timeout_retry_attempts": 0,
-                        "timeout_progress_snapshot": progress,
-                        "timeout_extended": False,
-                        "timeout_degraded_finalize": degraded_timeout_finalize,
-                        "timeout_kind": self._derive_timeout_kind(
-                            progress=progress,
-                            degraded_timeout_finalize=degraded_timeout_finalize,
-                            batch_timeout_exceeded=True,
-                        ),
-                        "timeout_terminal_status": timeout_result.status.value,
-                        "batch_timeout_exceeded": True,
-                    }
-                    self._attach_timeout_progress_telemetry(
-                        timeout_trace=timeout_trace,
-                        progress=progress,
-                        row_id=row_id,
-                        check_id=check.id,
-                    )
-                    timeout_result.retrieval_trace = _sanitize_retrieval_trace(
-                        self._sanitize_timeout_trace_for_app_config_only(
-                            timeout_trace,
-                            progress=progress,
-                            matcher_retrieval_trace=None,
-                        )
-                    )
-                    timeout_result = RowEvaluator._sanitize_app_config_only_result(timeout_result)
-                    timeout_result = RowEvaluator._salvage_app_config_only_timeout_result(timeout_result)
-                    results.append(timeout_result)
-                    error_count += 1
-                elif row_id in checkpoint.results_by_row_id:
-                    try:
-                        preserved_result = BatchCheckpoint.deserialize_row_result(checkpoint.results_by_row_id[row_id])
-                        results.append(preserved_result)
-                    except Exception:
-                        pass
-                    skipped_count += 1
-        duration_ms = int((time.monotonic() - start_time) * 1000)
-        # FR-145.2: Post-pass ERROR retry sweep.
-        # Only runs when batch_error_retry_limit > 0 and there are ERROR rows.
-        # Requires _batch_health_memory and _batch_failover_profiles to be set
-        # (threaded by the batch loop — see TSK-145.1 / TSK-145.3).
-        if not timed_out and error_count > 0 and self.config.batch_error_retry_limit > 0:
-            health_memory: ProviderHealthMemory | None = getattr(self, "_batch_health_memory", None)
-            failover_profiles: list[str] = list(getattr(self, "_batch_failover_profiles", None) or [])
-            if health_memory is not None and failover_profiles:
-                budget = ResilienceBudget(
-                    total_rows=len(results),
-                    error_count=error_count,
-                )
-                retried = await self._retry_error_rows(
-                    results=results,
-                    health_memory=health_memory,
-                    budget=budget,
-                    failover_profiles=failover_profiles,
-                    project_profile=project_profile,
-                    stats=inline_batch_stats,
-                )
-                # Recount after retry pass.
-                new_error_count = sum(1 for r in retried if r.status == RowStatus.ERROR)
-                new_success_count = sum(
-                    1
-                    for r in retried
-                    if r.status != RowStatus.ERROR
-                    and r.row_id in {orig.row_id for orig in results if orig.status == RowStatus.ERROR}
-                )
-                if budget.calls_used > 0:
-                    logger.info(
-                        "resilience_retry_sweep_complete",
-                        batch_index=batch_idx,
-                        errors_before=error_count,
-                        errors_after=new_error_count,
-                        retries_attempted=budget.calls_used,
-                        budget_total=budget.total,
-                    )
-                results = retried
-                error_count = new_error_count
-                success_count += new_success_count
-                # TSK-145.9: Low-confidence re-evaluation sweep.
-                # Shares the same budget instance — any budget consumed by ERROR
-                # retries above reduces what is available here.
-                reevaled = await self._reevaluate_low_confidence_rows(
-                    results=results,
-                    health_memory=health_memory,
-                    budget=budget,
-                    failover_profiles=failover_profiles,
-                    project_profile=project_profile,
-                )
-                reeval_improved = sum(
-                    1
-                    for orig, new in zip(results, reevaled, strict=False)
-                    if (new.score_1_5 or 0) > (orig.score_1_5 or 0)
-                )
-                if reeval_improved > 0:
-                    logger.info(
-                        "low_confidence_reeval_sweep_complete",
-                        batch_index=batch_idx,
-                        rows_improved=reeval_improved,
-                        budget_calls_used=budget.calls_used,
-                        budget_total=budget.total,
-                    )
-                results = reevaled
-        self._resilience_stats_accumulator.append(inline_batch_stats)
-        return BatchResult(
-            batch_index=batch_idx,
-            start_row=start_idx,
-            end_row=end_idx,
-            results=results,
-            duration_ms=duration_ms,
-            success_count=success_count,
-            error_count=error_count,
-            timed_out=timed_out,
-            skipped_count=skipped_count,
-        )
-    async def _process_single_row(
-        self,
-        idx: int,
-        batch_idx: int,
-        project_profile: dict[str, Any] | None,
-        checkpoint: BatchCheckpoint,
-        thread_id: str,
-        semaphore: asyncio.Semaphore,
-        checkpoint_lock: asyncio.Lock,
-        batch_stats: _ResilienceRetryStats | None = None,
-    ) -> tuple[RowEvaluationResult | None, str]:
-        """Process a single row under semaphore guard (FR-188).
-        Returns (result, outcome) where outcome is 'success', 'error', or 'skipped'.
-        Budget exceptions propagate to the caller.
-        """
-        section_id, check = self._checks[idx]
-        row_id = f"{check.id}:row_{idx}"
-        # FR-25.3, FR-25.4: Check if row should be processed
-        if not self._should_evaluate_row(idx, check.id, row_id, checkpoint):
-            async with checkpoint_lock:
-                if row_id in checkpoint.results_by_row_id:
-                    try:
-                        preserved_result = BatchCheckpoint.deserialize_row_result(checkpoint.results_by_row_id[row_id])
-                        logger.debug(
-                            "preserving_checkpoint_result",
-                            row_id=row_id,
-                            row_index=idx,
-                        )
-                        return preserved_result, "skipped"
-                    except Exception as e:
-                        logger.warning(
-                            "failed_to_preserve_checkpoint_result",
-                            row_id=row_id,
-                            error=str(e),
-                        )
-            return None, "skipped"
-        async with semaphore:
-            match_requirement_text = self._build_match_requirement_text(check)
-            app_config_only = self._row_requires_app_config_only(check)
-            evidence_context = await self._get_or_match_evidence_context(
-                row_id=row_id,
-                requirement_text=match_requirement_text,
-                section_id=section_id,
-                app_config_only=app_config_only,
-            )
-            formatted_evidence = "" if app_config_only else evidence_context.format_for_llm()
-            evidence_refs = evidence_context.evidence_refs
-            result: RowEvaluationResult
-            try:
-                result = await self._evaluate_row_with_timeout_mode(
-                    check=check,
-                    row_id=row_id,
-                    row_index=idx,
-                    evidence_context=formatted_evidence,
-                    evidence_refs=evidence_refs,
-                    project_profile=project_profile,
-                    thread_id=thread_id,
-                    matcher_retrieval_trace=getattr(evidence_context, "retrieval_trace", None),
-                    provider_health_memory=getattr(self, "_batch_health_memory", None),
-                )
-            except BudgetExceededError:
-                raise
-            finally:
-                self._clear_row_runtime_progress(row_id)
-            matcher_trace = _sanitize_retrieval_trace(getattr(evidence_context, "retrieval_trace", None))
-            row_trace = _sanitize_retrieval_trace(result.retrieval_trace)
-            merged_trace = _merge_retrieval_trace_payloads(matcher_trace, row_trace)
-            if isinstance(merged_trace, dict):
-                result.retrieval_trace = _sanitize_retrieval_trace(merged_trace)
-                result = RowEvaluator._sanitize_app_config_only_result(result)
-            # TSK-145.13/14: Timeout-aware failover — attempt ONE failover
-            # evaluation for STALL/NO_PROGRESS timeouts before finalising ERROR.
-            # TIMEOUT_WITH_GROUNDING rows are excluded (already have grounding).
-            if result.status == RowStatus.ERROR:
-                timeout_kind_in_trace = str((result.retrieval_trace or {}).get("timeout_kind") or "").strip()
-                if self._is_timeout_failoverable(timeout_kind_in_trace):
-                    health_memory_for_failover: ProviderHealthMemory | None = getattr(
-                        self, "_batch_health_memory", None
-                    )
-                    failover_profiles_for_timeout: list[str] = list(
-                        getattr(self, "_batch_failover_profiles", None) or []
-                    )
-                    if health_memory_for_failover is not None and failover_profiles_for_timeout:
-                        result = await self._attempt_timeout_failover(
-                            timeout_result=result,
-                            check=check,
-                            row_index=idx,
-                            evidence_context=formatted_evidence,
-                            evidence_refs=evidence_refs,
-                            project_profile=project_profile,
-                            health_memory=health_memory_for_failover,
-                            failover_profiles=failover_profiles_for_timeout,
-                        )
-            if self.grounding_validator:
-                result = self.grounding_validator.validate(
-                    result,
-                    allowed_refs=evidence_refs,
-                    evidence_context=formatted_evidence,
-                )
-            outcome = "error" if result.status == RowStatus.ERROR else "success"
-            return result, outcome
-    async def _evaluate_row_with_timeout_mode(
-        self,
-        *,
-        check: AuditCheck,
-        row_id: str,
-        row_index: int,
-        evidence_context: str,
-        evidence_refs: list[str],
-        project_profile: dict[str, Any] | None,
-        thread_id: str,
-        matcher_retrieval_trace: dict[str, Any] | None,
-        provider_health_memory: Any | None = None,
-    ) -> RowEvaluationResult:
-        if self._lease_timeout_mode_enabled():
-            return await self._evaluate_row_with_lease_timeout(
-                check=check,
-                row_id=row_id,
-                row_index=row_index,
-                evidence_context=evidence_context,
-                evidence_refs=evidence_refs,
-                project_profile=project_profile,
-                thread_id=thread_id,
-                matcher_retrieval_trace=matcher_retrieval_trace,
-                provider_health_memory=provider_health_memory,
-            )
-        return await self._evaluate_row_with_legacy_timeout(
-            check=check,
-            row_id=row_id,
-            row_index=row_index,
-            evidence_context=evidence_context,
-            evidence_refs=evidence_refs,
-            project_profile=project_profile,
-            thread_id=thread_id,
-            matcher_retrieval_trace=matcher_retrieval_trace,
-            provider_health_memory=provider_health_memory,
-        )
-    def _build_row_assigned_evaluator(self, assigned_profile: str) -> RowEvaluator:
-        runtime_context = self.config.run_context if isinstance(self.config.run_context, dict) else {}
-        cloned_config = copy.deepcopy(self.evaluator.config)
-        cloned_runtime_context = dict(getattr(cloned_config, "runtime_context", {}) or {})
-        source_llm = getattr(self.evaluator, "config", None)
-        source_llm = getattr(source_llm, "llm", None) if source_llm is not None else None
-        inherited_llm = inherit_runtime_llm_policy(assigned_profile, source_llm=source_llm)
-        cloned_runtime_context["repo_row_distribution_enabled"] = True
-        cloned_runtime_context["repo_distribution_profiles"] = list(self._repo_row_distribution_profiles())
-        cloned_runtime_context["row_distribution_profile"] = assigned_profile
-        cloned_runtime_context["active_profile"] = assigned_profile
-        cloned_runtime_context["repo_primary_profile"] = (
-            str(runtime_context.get("repo_primary_profile") or runtime_context.get("active_profile") or "").strip()
-            or assigned_profile
-        )
-        cloned_config.runtime_context = cloned_runtime_context
-        cloned_config.protocol = (
-            str(
-                getattr(getattr(inherited_llm, "protocol", None), "value", getattr(inherited_llm, "protocol", None))
-                or cloned_config.protocol
-                or ""
-            )
-            or None
-        )
-        cloned_config.model = str(getattr(inherited_llm, "model_standard", "") or cloned_config.model or "") or None
-        cloned_config.llm = inherited_llm
-        return RowEvaluator(
-            config=cloned_config,
-            template=self.template,
-            evidence_bundle=self.evaluator.evidence_bundle,
-        )
-    @staticmethod
-    def _apply_assigned_profile_provenance(
-        result: RowEvaluationResult,
-        *,
-        assigned_profile: str,
-    ) -> RowEvaluationResult:
-        provenance = (
-            result.provenance.model_copy(deep=True)
-            if result.provenance is not None
-            else RowProvenance(
-                row_llm_mode="unknown",
-                template_hash="unknown",
-                evidence_hash="unknown",
-            )
-        )
-        if not provenance.original_provider:
-            provenance.original_provider = assigned_profile
-        if not provenance.final_provider:
-            provenance.final_provider = assigned_profile
-        if not provenance.failover_chain:
-            provenance.failover_chain = [assigned_profile]
-        result.provenance = provenance
-        trace = dict(result.retrieval_trace or {}) if isinstance(result.retrieval_trace, dict) else {}
-        trace.setdefault("row_distribution_profile", assigned_profile)
-        trace.setdefault("provider_failover_chain", [assigned_profile])
-        trace.setdefault("provider_failover_final_provider", assigned_profile)
-        result.retrieval_trace = trace
-        return result
-    async def _invoke_row_evaluator(
-        self,
-        *,
-        check: AuditCheck,
-        row_id: str,
-        row_index: int,
-        evidence_context: str,
-        evidence_refs: list[str],
-        project_profile: dict[str, Any] | None,
-        provider_health_memory: Any | None = None,
-    ) -> RowEvaluationResult:
-        assigned_profile = self._assigned_repo_profile_for_row(row_index)
-        evaluator = self.evaluator
-        if assigned_profile is not None:
-            evaluator = self._build_row_assigned_evaluator(assigned_profile)
-        result = await evaluator.aevaluate_row(
-            check=check,
-            evidence=None,
-            row_id=row_id,
-            row_index=row_index,
-            evidence_context=evidence_context,
-            evidence_refs=evidence_refs,
-            project_profile=project_profile,
-            provider_health_memory=provider_health_memory,
-        )
-        if assigned_profile is not None:
-            result = self._apply_assigned_profile_provenance(result, assigned_profile=assigned_profile)
-        provider_name = str(getattr(getattr(result, "provenance", None), "final_provider", None) or "").strip()
-        if not provider_name:
-            provider_name = str(getattr(getattr(result, "provenance", None), "original_provider", None) or "").strip()
-        if not provider_name:
-            provider_name = assigned_profile
-        await self._record_runtime_provider_result(
-            cast("ProviderHealthMemory | None", provider_health_memory),
-            provider_name=provider_name,
-            result=result,
-        )
-        return result
-    # ---------------------------------------------------------------------------
-    # FR-145.2: Post-pass ERROR retry sweep
-    # ---------------------------------------------------------------------------
-    def _select_healthy_provider(
-        self,
-        failover_profiles: list[str],
-        health_memory: ProviderHealthMemory,
-        *,
-        exclude: set[str],
-    ) -> str | None:
-        """Select the next healthy provider, skipping cooled-down and excluded ones.
-        Args:
-            failover_profiles: Ordered list of candidate profile names.
-            health_memory: Shared health state accumulated during the primary pass.
-            exclude: Provider names to skip (original + already tried).
-        Returns:
-            A profile name to retry with, or None if all are blocked/cooled down.
-        """
-        for candidate in failover_profiles:
-            if candidate in exclude:
-                continue
-            if health_memory.should_skip_provider(candidate):
-                continue
-            return candidate
-        return None
-    def _build_retry_evaluator(self, profile: str) -> RowEvaluator:
-        """Build a RowEvaluator configured to use the given profile for retry."""
-        return self._build_row_assigned_evaluator(profile)
-    @staticmethod
-    def _is_timeout_failoverable(timeout_kind: str) -> bool:
-        """Return True if the timeout kind should trigger failover (TSK-145.13).
-        TIMEOUT_WITH_GROUNDING must NEVER trigger failover — it already has
-        partial grounding and a FAIL result. STALL, NO_PROGRESS, and
-        CHURN_DETECTED indicate that the primary provider was unable to make
-        durable forward progress, so a second provider may succeed.
-        """
-        return timeout_kind in {
-            TimeoutKind.TIMEOUT_PROVIDER_STALL.value,
-            TimeoutKind.TIMEOUT_NO_PROGRESS.value,
-            TimeoutKind.TIMEOUT_CHURN_DETECTED.value,
-        }
-    async def _attempt_timeout_failover(
-        self,
-        *,
-        timeout_result: RowEvaluationResult,
-        check: AuditCheck,
-        row_index: int,
-        evidence_context: str,
-        evidence_refs: list[str],
-        project_profile: dict[str, Any] | None,
-        health_memory: ProviderHealthMemory,
-        failover_profiles: list[str],
-        stats: _ResilienceRetryStats | None = None,
-    ) -> RowEvaluationResult:
-        """Attempt ONE failover evaluation for a timed-out row (TSK-145.13/14).
-        Called BEFORE the timeout result is finalised as ERROR.  Returns the
-        failover result on success, or the original timeout_result unchanged if
-        failover is not possible or also fails.
-        Rules:
-        - Only TIMEOUT_PROVIDER_STALL and TIMEOUT_NO_PROGRESS are eligible.
-        - TIMEOUT_WITH_GROUNDING is never retried (has partial grounding).
-        - Exactly one failover attempt is made (no retry loop).
-        - Providers already in the failover chain are excluded.
-        """
-        # Extract the timeout_kind from the retrieval trace.
-        timeout_trace = dict(timeout_result.retrieval_trace or {})
-        timeout_kind = str(timeout_trace.get("timeout_kind") or "").strip()
-        if not timeout_kind or not self._is_timeout_failoverable(timeout_kind):
-            return timeout_result
-        if not failover_profiles:
-            return timeout_result
-        # Determine providers to exclude: original provider + any prior chain.
-        original_provider = str(
-            getattr(getattr(timeout_result, "provenance", None), "original_provider", None) or ""
-        ).strip()
-        chain = list(getattr(getattr(timeout_result, "provenance", None), "failover_chain", None) or [])
-        exclude: set[str] = set(filter(None, [original_provider, *chain]))
-        next_provider = self._select_healthy_provider(
-            failover_profiles,
-            health_memory,
-            exclude=exclude,
-        )
-        # Phase 146 (FR-146.6): Bounded wait-and-resume when all providers are
-        # temporarily exhausted during timeout failover (TSK-146.10).
-        if next_provider is None:
-            exhaustion_reason = _all_providers_exhausted_reason(
-                health_memory,
-                failover_profiles=failover_profiles,
-            )
-            if exhaustion_reason == "quota_all_providers_exhausted":
-                quota_wait_max = _quota_wait_max_seconds()
-                if quota_wait_max > 0:
-                    recovered = await self._bounded_wait_for_provider_recovery(
-                        health_memory,
-                        failover_profiles=failover_profiles,
-                        max_wait_seconds=quota_wait_max,
-                        row_id=timeout_result.row_id,
-                        check_id=timeout_result.check_id,
-                    )
-                    if recovered is not None and recovered not in exclude:
-                        next_provider = recovered
-            if next_provider is None:
-                # Annotate timeout result with exhaustion metadata (AC-146.6.4 / TSK-146.11).
-                self._annotate_quota_exhaustion(
-                    timeout_result,
-                    health_memory=health_memory,
-                    failover_profiles=failover_profiles,
-                    wait_attempted=exhaustion_reason == "quota_all_providers_exhausted"
-                    and _quota_wait_max_seconds() > 0,
-                    wait_expired=True,
-                )
-                logger.debug(
-                    "timeout_failover_no_healthy_provider",
-                    row_id=timeout_result.row_id,
-                    check_id=timeout_result.check_id,
-                    timeout_kind=timeout_kind,
-                    exclude=sorted(exclude),
-                    exhaustion_reason=exhaustion_reason,
-                )
-                return timeout_result
-        # Build RowFailoverContext with partial evidence from the timed-out result (TSK-145.14).
-        progress_snapshot = timeout_trace.get("timeout_progress_snapshot")
-        partial_evidence_refs: list[dict[str, Any]] = []
-        if isinstance(progress_snapshot, dict):
-            raw_refs = progress_snapshot.get("evidence_refs")
-            if isinstance(raw_refs, list):
-                partial_evidence_refs = [{"ref": str(r)} for r in raw_refs if r]
-        failover_context = RowFailoverContext(
-            row_id=timeout_result.row_id,
-            check_id=timeout_result.check_id,
-            evidence_refs=partial_evidence_refs,
-            route_state={},
-            partial_progress={
-                "timeout_kind": timeout_kind,
-                "timeout_snapshot": dict(progress_snapshot) if isinstance(progress_snapshot, dict) else {},
-            },
-            original_provider=original_provider or None,
-            failover_count=len(chain),
-            failover_chain=chain + ([original_provider] if original_provider else []),
-            timeout_telemetry_snapshot={
-                k: v
-                for k, v in timeout_trace.items()
-                if k in {"timeout_kind", "timeout_scope", "stall_duration_seconds", "stall_reason"}
-            },
-        )
-        if stats is not None and next_provider not in stats.providers_used:
-            stats.providers_used.append(next_provider)
-        if stats is not None:
-            stats.failover_hops += 1
-        # Evidence refs for the re-evaluation call.
-        evidence_refs_list: list[str] = list(
-            (timeout_result.retry_metadata or {}).get("evidence_refs", []) or evidence_refs or []
-        )
-        evidence_context_str = str(timeout_trace.get("evidence_context_str", "") or evidence_context)
-        logger.info(
-            "timeout_failover_attempt",
-            row_id=timeout_result.row_id,
-            check_id=timeout_result.check_id,
-            timeout_kind=timeout_kind,
-            original_provider=original_provider or "unknown",
-            failover_provider=next_provider,
-            failover_context_partial_refs=len(failover_context.evidence_refs),
-        )
-        try:
-            failover_evaluator = self._build_retry_evaluator(next_provider)
-            failover_result = await failover_evaluator.aevaluate_row(
-                check=check,
-                evidence=None,
-                row_id=timeout_result.row_id,
-                row_index=row_index,
-                evidence_context=evidence_context_str,
-                evidence_refs=evidence_refs_list,
-                project_profile=project_profile,
-            )
-            failover_result = self._apply_assigned_profile_provenance(
-                failover_result,
-                assigned_profile=next_provider,
-            )
-        except Exception as exc:
-            logger.warning(
-                "timeout_failover_failed",
-                row_id=timeout_result.row_id,
-                check_id=timeout_result.check_id,
-                failover_provider=next_provider,
-                error=str(exc),
-            )
-            return timeout_result
-        if failover_result.status == RowStatus.ERROR:
-            logger.debug(
-                "timeout_failover_still_error",
-                row_id=timeout_result.row_id,
-                check_id=timeout_result.check_id,
-                failover_provider=next_provider,
-            )
-            return timeout_result
-        # Failover succeeded — annotate the result with failover provenance.
-        failover_result.retry_count = timeout_result.retry_count + 1
-        failover_result.retry_metadata = {
-            **(failover_result.retry_metadata or {}),
-            "timeout_failover_pass": True,
-            "timeout_failover_original_provider": original_provider or "unknown",
-            "timeout_failover_provider": next_provider,
-            "timeout_failover_kind": timeout_kind,
-            "timeout_failover_context": {
-                "failover_count": failover_context.failover_count,
-                "failover_chain": failover_context.failover_chain,
-                "partial_progress": failover_context.partial_progress,
-            },
-        }
-        if stats is not None:
-            stats.retry_successes += 1
-        logger.info(
-            "timeout_failover_succeeded",
-            row_id=timeout_result.row_id,
-            check_id=timeout_result.check_id,
-            timeout_kind=timeout_kind,
-            original_provider=original_provider or "unknown",
-            failover_provider=next_provider,
-            new_status=failover_result.status.value,
-        )
-        return failover_result
-    async def _retry_error_rows(
-        self,
-        results: list[RowEvaluationResult],
-        health_memory: ProviderHealthMemory,
-        budget: ResilienceBudget,
-        failover_profiles: list[str],
-        project_profile: dict[str, Any] | None,
-        stats: _ResilienceRetryStats | None = None,
-    ) -> list[RowEvaluationResult]:
-        """Post-pass retry sweep for failoverable ERROR rows (FR-145.2).
-        Iterates results from the primary pass. For each ERROR row that is
-        failoverable and within budget, re-evaluates using a healthy alternative
-        provider. Replaces the original result only on success.
-        The ``retry_count`` on the replacement result is incremented to signal
-        that a retry occurred. The ``retry_metadata`` carries provenance of the
-        retry (``resilience_retry_pass``, original provider, new provider).
-        Args:
-            results: Primary-pass results (not mutated; returns a new list).
-            health_memory: Shared ProviderHealthMemory from the primary pass.
-            budget: ResilienceBudget instance (mutated in place via consume()).
-            failover_profiles: Ordered list of alternative provider names.
-            project_profile: Project-level profile dict passed to the evaluator.
-            stats: Optional accumulator for resilience telemetry (FR-145.7).
-        Returns:
-            New list with ERROR rows replaced where retry succeeded.
-        """
-        if not failover_profiles or budget.remaining <= 0:
-            return list(results)
-        updated = list(results)
-        # Seed stats with pre-retry snapshot so callers get before/after counts.
-        if stats is not None:
-            stats.error_rows_before_retry = sum(1 for r in results if r.status == RowStatus.ERROR)
-            stats.budget_total = budget.total
-        for i, result in enumerate(results):
-            if result.status != RowStatus.ERROR:
-                continue
-            # Determine if this error is failoverable via reason_code in retry_metadata.
-            classification_reason = str((result.retry_metadata or {}).get("reason_code", "")).strip().lower()
-            if classification_reason in _NON_FAILOVERABLE_REASONS:
-                logger.debug(
-                    "resilience_retry_skipped_non_failoverable",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                    reason_code=classification_reason,
-                )
-                continue
-            # Check budget before attempting retry.
-            if not budget.consume():
-                logger.info(
-                    "resilience_budget_exhausted",
-                    remaining_errors=sum(1 for r in results[i:] if r.status == RowStatus.ERROR),
-                    calls_used=budget.calls_used,
-                    budget_total=budget.total,
-                )
-                break
-            if stats is not None:
-                stats.retry_attempts += 1
-                stats.budget_used = budget.calls_used
-            # Determine which providers to exclude: original + any already tried.
-            original_provider = str(getattr(result.provenance, "original_provider", None) or "").strip()
-            chain = list(getattr(result.provenance, "failover_chain", None) or [])
-            exclude: set[str] = set(filter(None, [original_provider, *chain]))
-            next_provider = self._select_healthy_provider(
-                failover_profiles,
-                health_memory,
-                exclude=exclude,
-            )
-            # Phase 146 (FR-146.6): Bounded wait-and-resume when all providers are
-            # temporarily exhausted but not permanently blocked (TSK-146.10).
-            wait_attempted = False
-            wait_expired = False
-            if next_provider is None:
-                exhaustion_reason = _all_providers_exhausted_reason(
-                    health_memory,
-                    failover_profiles=failover_profiles,
-                )
-                if exhaustion_reason == "quota_all_providers_exhausted":
-                    # All providers are in temporary cooldown — attempt bounded wait.
-                    quota_wait_max = _quota_wait_max_seconds()
-                    if quota_wait_max > 0:
-                        wait_attempted = True
-                        recovered = await self._bounded_wait_for_provider_recovery(
-                            health_memory,
-                            failover_profiles=failover_profiles,
-                            max_wait_seconds=quota_wait_max,
-                            row_id=result.row_id,
-                            check_id=result.check_id,
-                        )
-                        if recovered is not None and recovered not in exclude:
-                            next_provider = recovered
-                        else:
-                            wait_expired = True
-                    else:
-                        wait_expired = True
-                if next_provider is None:
-                    # Annotate with exhaustion metadata (AC-146.6.4 / TSK-146.11).
-                    self._annotate_quota_exhaustion(
-                        result,
-                        health_memory=health_memory,
-                        failover_profiles=failover_profiles,
-                        wait_attempted=wait_attempted,
-                        wait_expired=wait_expired,
-                    )
-                    logger.debug(
-                        "resilience_retry_no_healthy_provider",
-                        row_id=result.row_id,
-                        check_id=result.check_id,
-                        exclude=sorted(exclude),
-                        exhaustion_reason=exhaustion_reason,
-                        wait_attempted=wait_attempted,
-                        wait_expired=wait_expired,
-                    )
-                    continue
-            # Build RowFailoverContext carrying forward evidence from the original result.
-            failover_context = RowFailoverContext(
-                row_id=result.row_id,
-                check_id=result.check_id,
-                evidence_refs=list(
-                    (result.retry_metadata or {}).get("evidence_refs", [])
-                    or (getattr(result, "evidence_anchors", None) and [str(a) for a in (result.evidence_anchors or [])])
-                    or []
-                ),
-                route_state=dict((result.retry_metadata or {}).get("route_state", {})),
-                partial_progress=dict((result.retry_metadata or {}).get("partial_progress", {})),
-                original_provider=original_provider or None,
-                failover_count=len(chain),
-                failover_chain=chain + ([original_provider] if original_provider else []),
-                timeout_telemetry_snapshot={},
-            )
-            retry_evaluator = self._build_retry_evaluator(next_provider)
-            if stats is not None and next_provider not in stats.providers_used:
-                stats.providers_used.append(next_provider)
-            if stats is not None:
-                stats.failover_hops += 1
-            # Reconstruct evidence for the row from the original retrieval trace.
-            evidence_context_str = str((result.retrieval_trace or {}).get("evidence_context_str", ""))
-            evidence_refs_list: list[str] = list((result.retry_metadata or {}).get("evidence_refs", []) or [])
-            # Determine row_index: extract from row_id "check_id:row_N" pattern.
-            try:
-                row_index = int(result.row_id.split(":row_")[-1])
-            except (ValueError, IndexError):
-                row_index = 0
-            # Find the original AuditCheck for this result.
-            check_match: AuditCheck | None = None
-            for _section_id, check in self._checks:
-                if check.id == result.check_id:
-                    check_match = check
-                    break
-            if check_match is None:
-                logger.debug(
-                    "resilience_retry_check_not_found",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                )
-                continue
-            logger.info(
-                "resilience_retry_attempt",
-                row_id=result.row_id,
-                check_id=result.check_id,
-                original_provider=original_provider or "unknown",
-                retry_provider=next_provider,
-                budget_calls_used=budget.calls_used,
-                budget_total=budget.total,
-            )
-            try:
-                retry_result = await retry_evaluator.aevaluate_row(
-                    check=check_match,
-                    evidence=None,
-                    row_id=result.row_id,
-                    row_index=row_index,
-                    evidence_context=evidence_context_str,
-                    evidence_refs=evidence_refs_list,
-                    project_profile=project_profile,
-                )
-                retry_result = self._apply_assigned_profile_provenance(
-                    retry_result,
-                    assigned_profile=next_provider,
-                )
-            except Exception as exc:
-                logger.warning(
-                    "resilience_retry_failed",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                    retry_provider=next_provider,
-                    error=str(exc),
-                )
-                continue
-            if retry_result.status == RowStatus.ERROR:
-                logger.debug(
-                    "resilience_retry_still_error",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                    retry_provider=next_provider,
-                )
-                continue
-            # Retry succeeded — replace the original result and annotate.
-            retry_result.retry_count = result.retry_count + 1
-            retry_result.retry_metadata = {
-                **(retry_result.retry_metadata or {}),
-                "resilience_retry_pass": True,
-                "resilience_original_provider": original_provider or "unknown",
-                "resilience_retry_provider": next_provider,
-                "resilience_failover_context": {
-                    "failover_count": failover_context.failover_count,
-                    "failover_chain": failover_context.failover_chain,
-                },
-            }
-            updated[i] = retry_result
-            if stats is not None:
-                stats.retry_successes += 1
-            logger.info(
-                "resilience_retry_succeeded",
-                row_id=result.row_id,
-                check_id=result.check_id,
-                original_provider=original_provider or "unknown",
-                retry_provider=next_provider,
-                new_status=retry_result.status.value,
-            )
-        # Sync final budget_used after the loop (some budget slots may be consumed
-        # without a matching attempt when budget is already 0 on entry).
-        if stats is not None:
-            stats.budget_used = budget.calls_used
-            # Phase 146 telemetry: count rows with quota-exhaustion annotation.
-            stats.quota_exhausted_rows = sum(
-                1
-                for r in updated
-                if r.status == RowStatus.ERROR
-                and isinstance(r.retry_metadata, dict)
-                and r.retry_metadata.get("quota_all_providers_exhausted")
-            )
-            stats.quota_wait_attempted_rows = sum(
-                1
-                for r in updated
-                if isinstance(r.retry_metadata, dict) and r.retry_metadata.get("quota_exhaustion_wait_attempted")
-            )
-            stats.quota_wait_recovered_rows = sum(
-                1
-                for r in updated
-                if isinstance(r.retry_metadata, dict) and r.retry_metadata.get("quota_exhaustion_wait_recovered")
-            )
-        return updated
-    # ---------------------------------------------------------------------------
-    # TSK-145.9/145.10/145.11: Post-pass low-confidence re-evaluation sweep
-    # ---------------------------------------------------------------------------
-    async def _reevaluate_low_confidence_rows(
-        self,
-        results: list[RowEvaluationResult],
-        health_memory: ProviderHealthMemory,
-        budget: ResilienceBudget,
-        failover_profiles: list[str],
-        project_profile: dict[str, Any] | None,
-    ) -> list[RowEvaluationResult]:
-        """Post-pass re-evaluation sweep for PARTIAL or low-score rows (TSK-145.9).
-        Runs AFTER ``_retry_error_rows``, sharing the same ``ResilienceBudget``
-        instance.  If the ERROR retry sweep exhausted the budget, no re-evals
-        happen (budget.remaining == 0).
-        Eligible rows:
-        - status == PARTIAL, OR
-        - score_1_5 is not None and score_1_5 <= threshold
-          (default threshold: 2; override via
-           ``VDS_AUDIT_LOW_CONFIDENCE_REEVAL_THRESHOLD`` env var).
-        For each eligible row the method:
-        1. Consumes one budget slot (skips if exhausted).
-        2. Selects a healthy provider, preferring one different from the
-           original.
-        3. Re-evaluates using the same ``aevaluate_row`` interface but with a
-           ``prior_result_context`` appended to the evidence so the model is
-           instructed to critically re-examine rather than anchor on the prior
-           assessment (TSK-145.10).
-        4. Replaces the result ONLY if ``reeval_score_1_5 > original_score_1_5``
-           (TSK-145.11); otherwise keeps the original.
-        Args:
-            results: Primary-pass results (after ERROR retry sweep); not
-                mutated — returns a new list.
-            health_memory: Shared ProviderHealthMemory.
-            budget: ResilienceBudget instance shared with ERROR retry sweep;
-                mutated in place via consume().
-            failover_profiles: Ordered list of alternative provider names.
-            project_profile: Project-level profile dict passed to evaluator.
-        Returns:
-            New list with low-confidence rows replaced where re-eval improved
-            the score.
-        """
-        import os as _os
-        if budget.remaining <= 0:
-            return list(results)
-        # Parse threshold from env (default 2 → score_1_5 in {1, 2} are eligible).
-        try:
-            threshold = int((_os.getenv("VDS_AUDIT_LOW_CONFIDENCE_REEVAL_THRESHOLD") or "2").strip())
-        except ValueError:
-            threshold = 2
-        updated = list(results)
-        for i, result in enumerate(results):
-            # Eligibility: PARTIAL status OR low score_1_5.
-            is_partial = result.status == RowStatus.PARTIAL
-            score_1_5 = result.score_1_5
-            is_low_score = score_1_5 is not None and score_1_5 <= threshold
-            if not (is_partial or is_low_score):
-                continue
-            # Check budget before attempting re-eval.
-            if not budget.consume():
-                logger.info(
-                    "low_confidence_reeval_budget_exhausted",
-                    remaining_eligible=sum(
-                        1
-                        for r in results[i:]
-                        if r.status == RowStatus.PARTIAL or (r.score_1_5 is not None and r.score_1_5 <= threshold)
-                    ),
-                    calls_used=budget.calls_used,
-                    budget_total=budget.total,
-                )
-                break
-            # Prefer a provider different from the original.
-            original_provider = str(getattr(result.provenance, "original_provider", None) or "").strip()
-            chain = list(getattr(result.provenance, "failover_chain", None) or [])
-            exclude: set[str] = set(filter(None, [original_provider, *chain]))
-            next_provider = self._select_healthy_provider(
-                failover_profiles,
-                health_memory,
-                exclude=exclude,
-            )
-            # Fall back to any provider (including original) if all are excluded.
-            if next_provider is None and failover_profiles:
-                next_provider = self._select_healthy_provider(
-                    failover_profiles,
-                    health_memory,
-                    exclude=set(),
-                )
-            if next_provider is None:
-                logger.debug(
-                    "low_confidence_reeval_no_healthy_provider",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                )
-                continue
-            # Find the original AuditCheck for this result.
-            check_match = None
-            for _section_id, check in self._checks:
-                if check.id == result.check_id:
-                    check_match = check
-                    break
-            if check_match is None:
-                logger.debug(
-                    "low_confidence_reeval_check_not_found",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                )
-                continue
-            # Determine row_index from row_id "check_id:row_N" pattern.
-            try:
-                row_index = int(result.row_id.split(":row_")[-1])
-            except (ValueError, IndexError):
-                row_index = 0
-            # Reconstruct evidence context from the original retrieval trace.
-            evidence_context_str = str((result.retrieval_trace or {}).get("evidence_context_str", ""))
-            evidence_refs_list: list[str] = list((result.retry_metadata or {}).get("evidence_refs", []) or [])
-            # Build prior_result_context to instruct the model not to anchor
-            # on the prior assessment (TSK-145.10).
-            prior_result_context: dict[str, Any] = {
-                "prior_score_1_5": score_1_5,
-                "prior_status": result.status.value,
-                "prior_reason": result.reason or "",
-                "prior_evidence_refs": evidence_refs_list[:20],
-                "reeval_instruction": (
-                    "Critically re-examine the evidence. "
-                    "Do not simply agree with the prior assessment. "
-                    "The previous evaluation may have been incomplete or anchored incorrectly."
-                ),
-            }
-            # Append prior context to evidence_context so the evaluator prompt
-            # carries re-eval framing without requiring interface changes.
-            reeval_evidence_context = (
-                f"{evidence_context_str}\n\n"
-                "[Re-evaluation context]\n"
-                f"Prior score (1-5): {prior_result_context['prior_score_1_5']}\n"
-                f"Prior status: {prior_result_context['prior_status']}\n"
-                f"Prior reason: {prior_result_context['prior_reason']}\n"
-                f"{prior_result_context['reeval_instruction']}"
-            )
-            reeval_evaluator = self._build_retry_evaluator(next_provider)
-            logger.info(
-                "low_confidence_reeval_attempted",
-                row_id=result.row_id,
-                check_id=result.check_id,
-                original_provider=original_provider or "unknown",
-                reeval_provider=next_provider,
-                prior_score_1_5=score_1_5,
-                prior_status=result.status.value,
-                budget_calls_used=budget.calls_used,
-                budget_total=budget.total,
-            )
-            try:
-                reeval_result = await reeval_evaluator.aevaluate_row(
-                    check=check_match,
-                    evidence=None,
-                    row_id=result.row_id,
-                    row_index=row_index,
-                    evidence_context=reeval_evidence_context,
-                    evidence_refs=evidence_refs_list,
-                    project_profile=project_profile,
-                )
-                reeval_result = self._apply_assigned_profile_provenance(
-                    reeval_result,
-                    assigned_profile=next_provider,
-                )
-            except Exception as exc:
-                logger.warning(
-                    "low_confidence_reeval_failed",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                    reeval_provider=next_provider,
-                    error=str(exc),
-                )
-                continue
-            # TSK-145.11: Replace only if re-eval score is strictly higher.
-            original_score = result.score_1_5 or 0
-            reeval_score = reeval_result.score_1_5 or 0
-            if reeval_score > original_score:
-                reeval_result.retry_count = result.retry_count + 1
-                reeval_result.retry_metadata = {
-                    **(reeval_result.retry_metadata or {}),
-                    "low_confidence_reeval_pass": True,
-                    "low_confidence_original_provider": original_provider or "unknown",
-                    "low_confidence_reeval_provider": next_provider,
-                    "low_confidence_prior_score_1_5": original_score,
-                    "low_confidence_prior_result_context": prior_result_context,
-                }
-                updated[i] = reeval_result
-                logger.info(
-                    "low_confidence_reeval_improved",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                    original_score=original_score,
-                    new_score=reeval_score,
-                    reeval_provider=next_provider,
-                )
-            else:
-                logger.info(
-                    "low_confidence_reeval_kept_original",
-                    row_id=result.row_id,
-                    check_id=result.check_id,
-                    original_score=original_score,
-                    reeval_score=reeval_score,
-                )
-        return updated
-    def _build_timeout_result_with_trace(
-        self,
-        *,
-        row_id: str,
-        check_id: str,
-        message: str,
-        progress: dict[str, Any] | None,
-        timeout_trace: dict[str, Any],
-        matcher_retrieval_trace: dict[str, Any] | None,
-    ) -> RowEvaluationResult:
-        progress = self._sanitize_timeout_progress_snapshot_for_app_config_only(
-            progress,
-            matcher_retrieval_trace=matcher_retrieval_trace,
-        )
-        timeout_trace = self._sanitize_timeout_trace_for_app_config_only(
-            timeout_trace,
-            progress=progress,
-            matcher_retrieval_trace=matcher_retrieval_trace,
-        )
-        degraded_timeout_finalize = bool(timeout_trace.get("timeout_degraded_finalize"))
-        result = self._create_timeout_result(
-            row_id,
-            check_id,
-            message,
-            degraded_timeout_finalize=degraded_timeout_finalize,
-            progress=progress,
-        )
-        self._attach_timeout_progress_telemetry(
-            timeout_trace=timeout_trace,
-            progress=progress,
-            row_id=row_id,
-            check_id=check_id,
-        )
-        base_trace = _sanitize_retrieval_trace(matcher_retrieval_trace) or {}
-        result.retrieval_trace = _sanitize_retrieval_trace({**base_trace, **timeout_trace})
-        result = RowEvaluator._sanitize_app_config_only_result(result)
-        return RowEvaluator._salvage_app_config_only_timeout_result(result)
-    @staticmethod
-    def _build_timeout_progress_config_rows(
-        *,
-        progress: dict[str, Any] | None,
-        matcher_retrieval_trace: dict[str, Any] | None,
-    ) -> list[dict[str, Any]]:
-        config_rows: list[dict[str, Any]] = []
-        for container in (
-            matcher_retrieval_trace,
-            (matcher_retrieval_trace or {}).get("tool_first_loop")
-            if isinstance(matcher_retrieval_trace, dict)
-            else None,
-            progress,
-        ):
-            if not isinstance(container, dict):
-                continue
-            raw_rows = container.get("config_repo_companion_refs")
-            if not isinstance(raw_rows, list):
-                continue
-            for row in raw_rows:
-                if isinstance(row, dict):
-                    config_rows.append(dict(row))
-        if config_rows:
-            return config_rows
-        fallback_rows: list[dict[str, Any]] = []
-        raw_refs = progress.get("evidence_refs") if isinstance(progress, dict) else None
-        for ref in raw_refs if isinstance(raw_refs, list) else []:
-            ref_value = RowEvaluator._normalize_trace_ref(ref)
-            if not RowEvaluator._is_config_ref_value(ref_value):
-                continue
-            parts = Path(ref_value).parts
-            env_name = str(parts[0]).strip().lower() if len(parts) >= 2 else ""
-            fallback_rows.append(
-                {
-                    "ref_value": ref_value,
-                    "ref": ref_value,
-                    "source_repo": "app-config",
-                    "metadata": {
-                        "env": env_name,
-                        "source_repo_slug": "app-config",
-                        "config_root": "app-config",
-                    },
-                }
-            )
-        return fallback_rows
-    @classmethod
-    def _compute_timeout_app_config_sanitization(
-        cls,
-        *,
-        progress: dict[str, Any] | None,
-        matcher_retrieval_trace: dict[str, Any] | None,
-    ) -> tuple[dict[str, Any] | None, list[dict[str, Any]], set[str]]:
-        if not isinstance(progress, dict):
-            return None, [], set()
-        interpretation = (
-            progress.get("requirement_interpretation")
-            if isinstance(progress.get("requirement_interpretation"), dict)
-            else {}
-        )
-        sanitized_interpretation = RowEvaluator._sanitize_requirement_interpretation_for_app_config_only(interpretation)
-        if not RowEvaluator._is_app_config_only_requirement(
-            requirement_interpretation=sanitized_interpretation,
-        ):
-            return None, [], set()
-        raw_refs = progress.get("evidence_refs") if isinstance(progress.get("evidence_refs"), list) else []
-        requirement_text = str(
-            sanitized_interpretation.get("intent") or progress.get("requirement_intent") or ""
-        ).strip()
-        requirement_guidance = str(sanitized_interpretation.get("detailed_guidance") or "").strip() or None
-        config_rows = cls._build_timeout_progress_config_rows(
-            progress=progress,
-            matcher_retrieval_trace=matcher_retrieval_trace,
-        )
-        selected_rows = RowEvaluator._select_app_config_only_rows(
-            requirement_text=requirement_text,
-            requirement_guidance=requirement_guidance,
-            requirement_interpretation=sanitized_interpretation,
-            config_rows=config_rows,
-        )
-        allowed_ref_values = RowEvaluator._allowed_app_config_only_ref_values(
-            requirement_text=requirement_text,
-            requirement_guidance=requirement_guidance,
-            requirement_interpretation=sanitized_interpretation,
-            config_rows=config_rows,
-            fallback_refs=raw_refs,
-        )
-        return sanitized_interpretation, selected_rows, allowed_ref_values
-    @classmethod
-    def _sanitize_timeout_progress_snapshot_for_app_config_only(
-        cls,
-        progress: dict[str, Any] | None,
-        *,
-        matcher_retrieval_trace: dict[str, Any] | None,
-    ) -> dict[str, Any] | None:
-        if not isinstance(progress, dict):
-            return progress
-        sanitized_interpretation, selected_rows, allowed_ref_values = cls._compute_timeout_app_config_sanitization(
-            progress=progress,
-            matcher_retrieval_trace=matcher_retrieval_trace,
-        )
-        if sanitized_interpretation is None:
-            return progress
-        normalized = dict(progress)
-        normalized["requirement_interpretation"] = sanitized_interpretation
-        normalized["evidence_refs"] = RowEvaluator._filter_refs_for_app_config_only(
-            normalized.get("evidence_refs"),
-            allowed_ref_values=allowed_ref_values,
-        )
-        normalized["config_repo_companion_refs"] = selected_rows
-        return normalized
-    @classmethod
-    def _sanitize_timeout_trace_for_app_config_only(
-        cls,
-        timeout_trace: dict[str, Any],
-        *,
-        progress: dict[str, Any] | None,
-        matcher_retrieval_trace: dict[str, Any] | None,
-    ) -> dict[str, Any]:
-        normalized = dict(timeout_trace)
-        sanitized_progress = cls._sanitize_timeout_progress_snapshot_for_app_config_only(
-            progress,
-            matcher_retrieval_trace=matcher_retrieval_trace,
-        )
-        normalized["timeout_progress_snapshot"] = sanitized_progress
-        sanitized_interpretation, selected_rows, allowed_ref_values = cls._compute_timeout_app_config_sanitization(
-            progress=sanitized_progress,
-            matcher_retrieval_trace=matcher_retrieval_trace,
-        )
-        if sanitized_interpretation is None:
-            return normalized
-        filtered_refs = (
-            RowEvaluator._filter_refs_for_app_config_only(
-                sanitized_progress.get("evidence_refs") if isinstance(sanitized_progress, dict) else [],
-                allowed_ref_values=allowed_ref_values,
-            )
-            if isinstance(sanitized_progress, dict)
-            else []
-        )
-        preserved_trace_steps: list[dict[str, Any]] = []
-        progress_loop_trace = (
-            sanitized_progress.get("tool_first_loop")
-            if isinstance(sanitized_progress, dict) and isinstance(sanitized_progress.get("tool_first_loop"), dict)
-            else {}
-        )
-        raw_trace_steps = progress_loop_trace.get("trace_steps") if isinstance(progress_loop_trace, dict) else None
-        if isinstance(raw_trace_steps, list):
-            for step in raw_trace_steps:
-                if not isinstance(step, dict):
-                    continue
-                tool_name = str(step.get("tool") or "").strip()
-                output = step.get("output")
-                updated_step = dict(step)
-                if tool_name == "read_code_file" and isinstance(output, dict):
-                    path_value = RowEvaluator._normalize_trace_ref(
-                        output.get("path") or output.get("file_path") or output.get("source_path")
-                    )
-                    if not RowEvaluator._is_config_ref_value(path_value):
-                        continue
-                    if allowed_ref_values and path_value not in allowed_ref_values:
-                        continue
-                elif tool_name == "record_evidence_refs" and isinstance(output, dict):
-                    sanitized_output = dict(output)
-                    for key in ("refs", "evidence_refs"):
-                        if isinstance(output.get(key), list):
-                            sanitized_output[key] = RowEvaluator._filter_trace_entries_for_app_config_only(
-                                output.get(key),
-                                allowed_ref_values=allowed_ref_values,
-                            )
-                    updated_step["output"] = sanitized_output
-                preserved_trace_steps.append(updated_step)
-        normalized["requirement_interpretation"] = sanitized_interpretation
-        normalized["requirement_intent"] = str(
-            sanitized_interpretation.get("intent") or normalized.get("requirement_intent") or ""
-        ).strip()
-        normalized["evidence_refs"] = filtered_refs
-        normalized["config_repo_companion_refs"] = selected_rows
-        normalized["cross_repo_evidence_refs"] = []
-        normalized["prior_knowledge_refs"] = []
-        normalized.pop("docs", None)
-        normalized["code"] = {"top_k": [], "hits": []}
-        normalized["tool_first_loop"] = {
-            "evidence_refs": filtered_refs,
-            "config_repo_companion_refs": selected_rows,
-            "cross_repo_evidence_refs": [],
-            "prior_knowledge_refs": [],
-            "trace_steps": preserved_trace_steps,
-        }
-        normalized["evidence_ref_sets"] = {
-            "final_cited_refs": filtered_refs[:50],
-            "final_cited_ref_count": len(filtered_refs),
-            "exploratory_refs": [],
-            "exploratory_ref_count": 0,
-            "dropped_refs": [],
-            "dropped_exploratory_ref_count": 0,
-            "final_cited_ref_modality": {"docs": False, "code": False},
-        }
-        normalized["app_config_only_enforced"] = {
-            "applied": True,
-            "allowed_ref_values": sorted(allowed_ref_values)[:20],
-            "final_anchor_count": 0,
-        }
-        return normalized
-    async def _evaluate_row_with_legacy_timeout(
-        self,
-        *,
-        check: AuditCheck,
-        row_id: str,
-        row_index: int,
-        evidence_context: str,
-        evidence_refs: list[str],
-        project_profile: dict[str, Any] | None,
-        thread_id: str,
-        matcher_retrieval_trace: dict[str, Any] | None,
-        provider_health_memory: Any | None = None,
-    ) -> RowEvaluationResult:
-        row_timeout_ms = int(self.config.row_timeout_ms)
-        timeout_retry_attempt = 0
-        last_progress_summary: dict[str, int] | None = None
-        while True:
-            try:
-                async with asyncio.timeout(row_timeout_ms / 1000):
-                    return await self._invoke_row_evaluator(
-                        check=check,
-                        row_id=row_id,
-                        row_index=row_index,
-                        evidence_context=evidence_context,
-                        evidence_refs=evidence_refs,
-                        project_profile=project_profile,
-                        provider_health_memory=provider_health_memory,
-                    )
-            except TimeoutError:
-                raw_progress = self._get_row_runtime_progress(row_id)
-                progress = self._normalize_timeout_progress_snapshot(raw_progress)
-                extension_decision = self._decide_row_timeout_extension(
-                    timeout_retry_attempt=timeout_retry_attempt,
-                    current_timeout_ms=row_timeout_ms,
-                    progress=raw_progress,
-                    previous_progress_summary=last_progress_summary,
-                )
-                if bool(extension_decision.get("allow")):
-                    if isinstance(extension_decision.get("progress_summary"), dict):
-                        last_progress_summary = dict(cast("dict[str, int]", extension_decision.get("progress_summary")))
-                    next_timeout_ms = self._next_row_timeout_ms(row_timeout_ms, timeout_retry_attempt)
-                    timeout_retry_attempt += 1
-                    logger.warning(
-                        "row_timeout_extended_for_effective_progress",
-                        row_id=row_id,
-                        check_id=check.id,
-                        thread_id=thread_id,
-                        run_id=thread_id,
-                        audit_id=thread_id,
-                        timeout_retry_attempt=timeout_retry_attempt,
-                        previous_timeout_ms=row_timeout_ms,
-                        next_timeout_ms=next_timeout_ms,
-                        progress=progress,
-                    )
-                    row_timeout_ms = next_timeout_ms
-                    continue
-                degraded_timeout_finalize = self._should_degrade_timeout_finalize(progress)
-                timeout_trace = {
-                    "timeout_retry_attempts": timeout_retry_attempt,
-                    "timeout_progress_snapshot": progress,
-                    "timeout_extended": timeout_retry_attempt > 0,
-                    "timeout_extension_denial_reason": str(extension_decision.get("reason") or "unknown"),
-                    "timeout_degraded_finalize": degraded_timeout_finalize,
-                    "timeout_kind": self._derive_timeout_kind(
-                        progress=progress,
-                        degraded_timeout_finalize=degraded_timeout_finalize,
-                    ),
-                    "timeout_terminal_status": (RowStatus.FAIL if degraded_timeout_finalize else RowStatus.ERROR).value,
-                }
-                extension_progress_delta = extension_decision.get("progress_delta")
-                if isinstance(extension_progress_delta, dict):
-                    timeout_trace["timeout_extension_progress_delta"] = dict(extension_progress_delta)
-                return self._build_timeout_result_with_trace(
-                    row_id=row_id,
-                    check_id=check.id,
-                    message="Row timeout exceeded",
-                    progress=progress,
-                    timeout_trace=timeout_trace,
-                    matcher_retrieval_trace=matcher_retrieval_trace,
-                )
-    def _detect_lease_progress_signal(
-        self,
-        previous_progress: dict[str, Any] | None,
-        current_progress: dict[str, Any] | None,
-    ) -> str | None:
-        if not isinstance(current_progress, dict):
-            return None
-        previous = previous_progress if isinstance(previous_progress, dict) else {}
-        previous_seen = self._coerce_progress_timestamp(previous.get("event_last_seen_at"))
-        current_seen = self._coerce_progress_timestamp(current_progress.get("event_last_seen_at"))
-        if current_seen > previous_seen:
-            return "event_stream"
-        for key, signal_type in (
-            ("event_tool_calls_started", "tool_call_started"),
-            ("event_tool_calls_completed", "tool_call_completed"),
-            ("event_skill_execution_effective_tool_calls", "skill_effective"),
-            ("event_turn_index", "partial_output"),
-            ("steps_executed", "route_transition"),
-            ("steps_unique_tools", "route_transition"),
-            ("evidence_refs_count", "evidence_growth"),
-        ):
-            if self._coerce_progress_counter(current_progress.get(key)) > self._coerce_progress_counter(
-                previous.get(key)
-            ):
-                return signal_type
-        current_summary = self._extract_progress_summary(current_progress)
-        previous_summary = self._extract_progress_summary(previous)
-        progress_delta = self._compute_progress_delta(previous_summary, current_summary)
-        if isinstance(progress_delta, dict) and max(progress_delta.values(), default=0) > 0:
-            return "tool_activity"
-        return None
-    def _derive_lease_stall_reason(
-        self,
-        *,
-        elapsed_since_progress_seconds: float,
-        lease_window_seconds: float,
-        stall_detection_seconds: float,
-        progress: dict[str, Any] | None,
-    ) -> str:
-        if elapsed_since_progress_seconds >= lease_window_seconds:
-            return "lease_expired"
-        termination_reason = str((progress or {}).get("termination_reason") or "").strip().lower()
-        if termination_reason in {"idle_after_tool", "post_tool_idle", "idle_post_tool"}:
-            return "idle_after_tool"
-        if termination_reason in {"provider_stall", "stream_stall"}:
-            return "provider_stall"
-        if termination_reason in {"tool_churn", "agent_tool_churn", "churn"}:
-            return "tool_churn"
-        if elapsed_since_progress_seconds >= stall_detection_seconds:
-            return "stall_detection_threshold"
-        return "no_progress"
-    async def _evaluate_row_with_lease_timeout(
-        self,
-        *,
-        check: AuditCheck,
-        row_id: str,
-        row_index: int,
-        evidence_context: str,
-        evidence_refs: list[str],
-        project_profile: dict[str, Any] | None,
-        thread_id: str,
-        matcher_retrieval_trace: dict[str, Any] | None,
-        provider_health_memory: Any | None = None,
-    ) -> RowEvaluationResult:
-        import time
-        absolute_timeout_seconds = max(0.05, float(self.config.row_absolute_timeout_ms) / 1000.0)
-        lease_window_seconds = max(0.05, float(self.config.row_progress_lease_seconds))
-        stall_detection_seconds = max(
-            0.05,
-            min(float(self.config.row_stall_detection_seconds), lease_window_seconds),
-        )
-        poll_interval_seconds = min(1.0, stall_detection_seconds, lease_window_seconds)
-        task = asyncio.create_task(
-            self._invoke_row_evaluator(
-                check=check,
-                row_id=row_id,
-                row_index=row_index,
-                evidence_context=evidence_context,
-                evidence_refs=evidence_refs,
-                project_profile=project_profile,
-                provider_health_memory=provider_health_memory,
-            )
-        )
-        started_at = time.monotonic()
-        last_progress_at = started_at
-        previous_progress: dict[str, Any] | None = None
-        previous_progress_summary: dict[str, int] | None = None
-        liveness_lease_renewals = 0
-        last_progress_signal: float | None = None
-        last_progress_signal_type: str | None = None
-        try:
-            while True:
-                now = time.monotonic()
-                remaining_absolute_seconds = absolute_timeout_seconds - (now - started_at)
-                if remaining_absolute_seconds <= 0:
-                    progress = self._normalize_timeout_progress_snapshot(self._get_row_runtime_progress(row_id))
-                    degraded_timeout_finalize = self._should_degrade_timeout_finalize(progress)
-                    stall_duration_seconds = max(0.0, now - last_progress_at)
-                    timeout_trace = {
-                        "timeout_retry_attempts": 0,
-                        "timeout_progress_snapshot": progress,
-                        "timeout_extended": liveness_lease_renewals > 0,
-                        "timeout_extension_denial_reason": "absolute_timeout_reached",
-                        "timeout_degraded_finalize": degraded_timeout_finalize,
-                        "timeout_kind": self._derive_timeout_kind(
-                            progress=progress,
-                            degraded_timeout_finalize=degraded_timeout_finalize,
-                            absolute_timeout_exceeded=True,
-                        ),
-                        "timeout_terminal_status": (
-                            RowStatus.FAIL if degraded_timeout_finalize else RowStatus.ERROR
-                        ).value,
-                        "timeout_scope": "row_absolute_cap",
-                        "liveness_lease_renewals": liveness_lease_renewals,
-                        "last_progress_signal": last_progress_signal,
-                        "last_progress_signal_type": last_progress_signal_type,
-                        "stall_duration_seconds": round(stall_duration_seconds, 3),
-                        "stall_reason": "absolute_timeout_reached",
-                    }
-                    return self._build_timeout_result_with_trace(
-                        row_id=row_id,
-                        check_id=check.id,
-                        message="Row absolute timeout exceeded",
-                        progress=progress,
-                        timeout_trace=timeout_trace,
-                        matcher_retrieval_trace=matcher_retrieval_trace,
-                    )
-                done, _ = await asyncio.wait({task}, timeout=min(poll_interval_seconds, remaining_absolute_seconds))
-                if task in done:
-                    return await task
-                progress = self._normalize_timeout_progress_snapshot(self._get_row_runtime_progress(row_id))
-                signal_type = self._detect_lease_progress_signal(previous_progress, progress)
-                extension_decision = self._decide_row_timeout_extension(
-                    timeout_retry_attempt=liveness_lease_renewals,
-                    current_timeout_ms=int(self.config.row_timeout_ms),
-                    progress=progress,
-                    previous_progress_summary=previous_progress_summary,
-                )
-                progress_summary = extension_decision.get("progress_summary")
-                if not isinstance(progress_summary, dict):
-                    progress_summary = self._extract_progress_summary(progress)
-                if isinstance(progress_summary, dict):
-                    previous_progress_summary = dict(progress_summary)
-                previous_progress = progress
-                if (
-                    signal_type is not None
-                    or bool(extension_decision.get("allow"))
-                    or self._has_strong_repeat_progress(progress)
-                ):
-                    now = time.monotonic()
-                    last_progress_at = now
-                    last_progress_signal = round(now - started_at, 3)
-                    last_progress_signal_type = signal_type or str(
-                        extension_decision.get("reason") or "progress_heuristic"
-                    )
-                    liveness_lease_renewals += 1
-                    logger.warning(
-                        "row_timeout_lease_renewed",
-                        row_id=row_id,
-                        check_id=check.id,
-                        thread_id=thread_id,
-                        run_id=thread_id,
-                        audit_id=thread_id,
-                        liveness_lease_renewals=liveness_lease_renewals,
-                        last_progress_signal=last_progress_signal,
-                        last_progress_signal_type=last_progress_signal_type,
-                        progress=progress,
-                    )
-                    continue
-                now = time.monotonic()
-                elapsed_since_progress_seconds = max(0.0, now - last_progress_at)
-                if elapsed_since_progress_seconds < min(stall_detection_seconds, lease_window_seconds):
-                    continue
-                degraded_timeout_finalize = self._should_degrade_timeout_finalize(progress)
-                stall_reason = self._derive_lease_stall_reason(
-                    elapsed_since_progress_seconds=elapsed_since_progress_seconds,
-                    lease_window_seconds=lease_window_seconds,
-                    stall_detection_seconds=stall_detection_seconds,
-                    progress=progress,
-                )
-                timeout_trace = {
-                    "timeout_retry_attempts": 0,
-                    "timeout_progress_snapshot": progress,
-                    "timeout_extended": liveness_lease_renewals > 0,
-                    "timeout_extension_denial_reason": str(extension_decision.get("reason") or stall_reason),
-                    "timeout_degraded_finalize": degraded_timeout_finalize,
-                    "timeout_kind": self._derive_timeout_kind(
-                        progress=progress,
-                        degraded_timeout_finalize=degraded_timeout_finalize,
-                    ),
-                    "timeout_terminal_status": (RowStatus.FAIL if degraded_timeout_finalize else RowStatus.ERROR).value,
-                    "timeout_scope": "row_progress_lease",
-                    "liveness_lease_renewals": liveness_lease_renewals,
-                    "last_progress_signal": last_progress_signal,
-                    "last_progress_signal_type": last_progress_signal_type,
-                    "stall_duration_seconds": round(elapsed_since_progress_seconds, 3),
-                    "stall_reason": stall_reason,
-                }
-                extension_progress_delta = extension_decision.get("progress_delta")
-                if isinstance(extension_progress_delta, dict):
-                    timeout_trace["timeout_extension_progress_delta"] = dict(extension_progress_delta)
-                return self._build_timeout_result_with_trace(
-                    row_id=row_id,
-                    check_id=check.id,
-                    message="Row progress lease expired",
-                    progress=progress,
-                    timeout_trace=timeout_trace,
-                    matcher_retrieval_trace=matcher_retrieval_trace,
-                )
-        finally:
-            if not task.done():
-                task.cancel()
-                with contextlib.suppress(asyncio.CancelledError):
-                    await task
-    async def _process_batch_parallel(
-        self,
-        batch_idx: int,
-        start_idx: int,
-        end_idx: int,
-        project_profile: dict[str, Any] | None,
-        checkpoint: BatchCheckpoint,
-        thread_id: str,
-        row_progress_callback: RowProgressCallback | None = None,
-        batch_stats: _ResilienceRetryStats | None = None,
-    ) -> BatchResult:
-        """Process rows in parallel with semaphore-bounded concurrency (FR-188).
-        Checkpoint writes and diagnostics dict access are serialized under a lock
-        to preserve correctness under concurrency.
-        """
-        import time
-        start_time = time.monotonic()
-        effective_concurrency = self._effective_row_concurrency()
-        logger.info(
-            "row_concurrency_active",
-            effective_concurrency=effective_concurrency,
-            batch_size=end_idx - start_idx,
-            adaptive_mode=bool(self.config.adaptive_row_concurrency),
-            profile=str((project_profile or {}).get("profile") or self.config.run_context.get("active_profile") or ""),
-        )
-        semaphore = asyncio.Semaphore(effective_concurrency)
-        checkpoint_lock = asyncio.Lock()
-        progress_lock = asyncio.Lock()
-        consumer = PostProcessingConsumer()
-        consume_task = asyncio.create_task(consumer.consume())
-        interim_results: list[RowEvaluationResult] = []
-        interim_success_count = 0
-        interim_error_count = 0
-        interim_skipped_count = 0
-        async def _guarded_row(idx: int) -> tuple[int, RowEvaluationResult | None, str]:
-            """Wrapper that never lets BudgetExceededError escape gather."""
-            nonlocal interim_success_count, interim_error_count, interim_skipped_count
-            try:
-                _section_id, check = self._checks[idx]
-                row_id = f"{check.id}:row_{idx}"
-                if row_progress_callback is not None:
-                    async with progress_lock:
-                        inflight_result = self._build_inflight_row_progress_result(check, row_id)
-                        maybe_awaitable = row_progress_callback(
-                            batch_idx,
-                            start_idx,
-                            end_idx,
-                            sorted(interim_results, key=lambda item: item.row_id),
-                            interim_success_count,
-                            interim_error_count,
-                            interim_skipped_count,
-                            inflight_result,
-                            "started",
-                        )
-                        if inspect.isawaitable(maybe_awaitable):
-                            await maybe_awaitable
-                result, outcome = await self._process_single_row(
-                    idx=idx,
-                    batch_idx=batch_idx,
-                    project_profile=project_profile,
-                    checkpoint=checkpoint,
-                    thread_id=thread_id,
-                    semaphore=semaphore,
-                    checkpoint_lock=checkpoint_lock,
-                    batch_stats=batch_stats,
-                )
-                if result is not None:
-                    await consumer.put(result)
-                if row_progress_callback is not None:
-                    async with progress_lock:
-                        if result is not None:
-                            interim_results[:] = [res for res in interim_results if res.row_id != result.row_id]
-                            interim_results.append(result)
-                        if outcome == "error":
-                            interim_error_count += 1
-                        elif outcome == "skipped":
-                            interim_skipped_count += 1
-                        else:
-                            interim_success_count += 1
-                        if self._should_emit_row_progress_callback(result, outcome):
-                            maybe_awaitable = row_progress_callback(
-                                batch_idx,
-                                start_idx,
-                                end_idx,
-                                sorted(interim_results, key=lambda item: item.row_id),
-                                interim_success_count,
-                                interim_error_count,
-                                interim_skipped_count,
-                                result,
-                                outcome,
-                            )
-                            if inspect.isawaitable(maybe_awaitable):
-                                await maybe_awaitable
-                return idx, result, outcome
-            except BudgetExceededError as exc:
-                # BudgetExceededError escapes _process_single_row -> _guarded_row
-                # Convert to error result to allow batch to continue
-                logger.warning("parallel_row_budget_exceeded", row_index=idx, error=str(exc))
-                _section_id, check = self._checks[idx]
-                row_id = f"{check.id}:row_{idx}"
-                err_result = self._create_timeout_result(row_id, check.id, f"Budget exceeded: {exc}")
-                await consumer.put(err_result)
-                return idx, err_result, "error"
-            except Exception as exc:
-                logger.warning("parallel_row_error", row_index=idx, error=str(exc))
-                _section_id, check = self._checks[idx]
-                row_id = f"{check.id}:row_{idx}"
-                err_result = self._create_timeout_result(row_id, check.id, f"Parallel row error: {exc}")
-                await consumer.put(err_result)
-                return idx, err_result, "error"
-        timed_out = False
-        # P161-E2 TSK-161.11: Non-targeted row skip for single-row shards.
-        # When target_selection specifies a single row index, only create
-        # _guarded_row() coroutines for that index.  Eliminates 58 N/A
-        # iterations per child (60-120 min cumulative waste eliminated).
-        _row_indices = range(start_idx, end_idx)
-        if (
-            self.config.target_selection is not None
-            and len(self.config.target_selection.row_indices) == 1
-            and os.environ.get("VDS_AUDIT_CANONICAL_CHECKLIST_JSON")
-        ):
-            _target_idx = next(iter(self.config.target_selection.row_indices))
-            if start_idx <= _target_idx < end_idx:
-                _row_indices = [_target_idx]
-                logger.info(
-                    "single_row_shard_skip_non_targeted",
-                    targeted_index=_target_idx,
-                    original_range=f"{start_idx}-{end_idx}",
-                    skipped_count=(end_idx - start_idx) - 1,
-                )
-        try:
-            effective_batch_timeout_ms = self.effective_batch_timeout_ms()
-            async with asyncio.timeout(effective_batch_timeout_ms / 1000):
-                gather_results = await asyncio.gather(
-                    *[_guarded_row(idx) for idx in _row_indices],
-                    return_exceptions=False,
-                )
-        except TimeoutError:
-            await consumer.finalize()
-            await consume_task
-            logger.warning(
-                "batch_timeout",
-                batch=batch_idx,
-                completed=0,
-                expected=end_idx - start_idx,
-            )
-            # Create timeout results for all rows in the batch
-            results: list[RowEvaluationResult] = []
-            error_count = 0
-            skipped_count = 0
-            for idx in range(start_idx, end_idx):
-                _section_id, check = self._checks[idx]
-                row_id = f"{check.id}:row_{idx}"
-                if self._should_evaluate_row(idx, check.id, row_id, checkpoint):
-                    progress = self._normalize_timeout_progress_snapshot(self._get_row_runtime_progress(row_id))
-                    degraded = self._should_degrade_timeout_finalize(progress)
-                    timeout_result = self._create_timeout_result(
-                        row_id,
-                        check.id,
-                        "Batch timeout exceeded",
-                        degraded_timeout_finalize=degraded,
-                        progress=progress,
-                    )
-                    timeout_trace = {
-                        "timeout_retry_attempts": 0,
-                        "timeout_progress_snapshot": progress,
-                        "timeout_kind": self._derive_timeout_kind(
-                            progress=progress,
-                            degraded_timeout_finalize=degraded,
-                            batch_timeout_exceeded=True,
-                        ),
-                        "timeout_extended": False,
-                        "timeout_degraded_finalize": degraded,
-                        "timeout_terminal_status": timeout_result.status.value,
-                        "batch_timeout_exceeded": True,
-                    }
-                    self._attach_timeout_progress_telemetry(
-                        timeout_trace=timeout_trace,
-                        progress=progress,
-                        row_id=row_id,
-                        check_id=check.id,
-                    )
-                    timeout_result.retrieval_trace = _sanitize_retrieval_trace(
-                        self._sanitize_timeout_trace_for_app_config_only(
-                            timeout_trace,
-                            progress=progress,
-                            matcher_retrieval_trace=None,
-                        )
-                    )
-                    timeout_result = RowEvaluator._sanitize_app_config_only_result(timeout_result)
-                    timeout_result = RowEvaluator._salvage_app_config_only_timeout_result(timeout_result)
-                    results.append(timeout_result)
-                    error_count += 1
-                elif row_id in checkpoint.results_by_row_id:
-                    try:
-                        preserved = BatchCheckpoint.deserialize_row_result(checkpoint.results_by_row_id[row_id])
-                        results.append(preserved)
-                    except Exception:
-                        pass
-                    skipped_count += 1
-            duration_ms = int((time.monotonic() - start_time) * 1000)
-            return BatchResult(
-                batch_index=batch_idx,
-                start_row=start_idx,
-                end_row=end_idx,
-                results=results,
-                duration_ms=duration_ms,
-                success_count=0,
-                error_count=error_count,
-                timed_out=True,
-                skipped_count=skipped_count,
-            )
-        # Finalize consumer and collect overlap diagnostics (FR-193)
-        await consumer.finalize()
-        postproc_diagnostics = await consume_task
-        # Assemble results in index order
-        results = []
-        success_count = 0
-        error_count = 0
-        skipped_count = 0
-        for _, result, outcome in sorted(gather_results, key=lambda x: x[0]):
-            if result is not None:
-                results.append(result)
-            if outcome == "error":
-                error_count += 1
-            elif outcome == "skipped":
-                skipped_count += 1
-            else:
-                success_count += 1
-        # FR-145.2: Post-pass ERROR retry sweep (parallel path mirror of serial path).
-        # Only runs when batch_error_retry_limit > 0 and there are ERROR rows.
-        if not timed_out and error_count > 0 and self.config.batch_error_retry_limit > 0:
-            health_memory: ProviderHealthMemory | None = getattr(self, "_batch_health_memory", None)
-            failover_profiles: list[str] = list(getattr(self, "_batch_failover_profiles", None) or [])
-            if health_memory is not None and failover_profiles:
-                budget = ResilienceBudget(
-                    total_rows=len(results),
-                    error_count=error_count,
-                )
-                retried = await self._retry_error_rows(
-                    results=results,
-                    health_memory=health_memory,
-                    budget=budget,
-                    failover_profiles=failover_profiles,
-                    project_profile=project_profile,
-                    stats=batch_stats,
-                )
-                self._resilience_stats_accumulator.append(batch_stats)
-                new_error_count = sum(1 for r in retried if r.status == RowStatus.ERROR)
-                new_success_count = sum(
-                    1
-                    for r in retried
-                    if r.status != RowStatus.ERROR
-                    and r.row_id in {orig.row_id for orig in results if orig.status == RowStatus.ERROR}
-                )
-                if budget.calls_used > 0:
-                    logger.info(
-                        "resilience_retry_sweep_complete",
-                        batch_index=batch_idx,
-                        errors_before=error_count,
-                        errors_after=new_error_count,
-                        retries_attempted=budget.calls_used,
-                        budget_total=budget.total,
-                    )
-                results = retried
-                error_count = new_error_count
-                success_count += new_success_count
-        # FR-13.3b: Check for budget errors in parallel results and raise BatchBudgetExceededError
-        budget_error_results = [
-            r for r in results
-            if "Budget exceeded" in (getattr(r, "reason", "") or "")
-            or "Budget exceeded" in (getattr(r, "error_message", "") or "")
-        ]
-        if budget_error_results:
-            # Get the budget info from first error result
-            budget_result = budget_error_results[0]
-            # Extract row details from the error result
-            row_id = budget_result.row_id
-            check_id = budget_result.check_id
-            # Parse row index from row_id (format: "check_id:row_N")
-            row_index = None
-            if ":row_" in row_id:
-                try:
-                    row_index = int(row_id.split(":row_")[-1]) + 1  # 1-based index
-                except ValueError:
-                    row_index = None
-            context = {
-                "kind": "strict_budget_exceeded",
-                "audit_error_key": "LLM_BUDGET_EXCEEDED",
-                "audit_error_code": AUDIT_ERROR_CODES["LLM_BUDGET_EXCEEDED"].code,
-                "batch_index": batch_idx + 1,
-                "row_index": row_index,
-                "check_id": check_id,
-                "row_id": row_id,
-                "status": "exceeded",
-                "scope": "audit",
-            }
-            raise BatchBudgetExceededError(
-                message=str(budget_result.reason or budget_result.error_message),
-                context=context,
-                partial_results=list(results),
-                success_count=success_count,
-                error_count=error_count,
-                skipped_count=skipped_count,
-            )
-        postproc_overlap_ms = int(postproc_diagnostics.get("overlap_seconds", 0.0) * 1000)
-        duration_ms = int((time.monotonic() - start_time) * 1000)
-        return BatchResult(
-            batch_index=batch_idx,
-            start_row=start_idx,
-            end_row=end_idx,
-            results=results,
-            duration_ms=duration_ms,
-            success_count=success_count,
-            error_count=error_count,
-            timed_out=False,
-            skipped_count=skipped_count,
-            postproc_overlap_ms=postproc_overlap_ms,
-        )
-    def _get_row_runtime_progress(self, row_id: str) -> dict[str, Any] | None:
-        getter = getattr(self.evaluator, "get_row_runtime_progress", None)
-        if not callable(getter):
-            return None
-        progress = getter(row_id)
-        if not isinstance(progress, dict):
-            return None
-        return dict(progress)
-    @staticmethod
-    def _attach_timeout_progress_telemetry(
-        *,
-        timeout_trace: dict[str, Any],
-        progress: dict[str, Any] | None,
-        row_id: str | None = None,
-        check_id: str | None = None,
-    ) -> None:
-        if not isinstance(timeout_trace, dict) or not isinstance(progress, dict):
-            return
-        prompt_summary = progress.get("prompt_tool_telemetry_summary")
-        if isinstance(prompt_summary, dict):
-            timeout_trace["prompt_tool_telemetry_summary"] = dict(prompt_summary)
-        prompt_payload = progress.get("prompt_tool_telemetry")
-        if isinstance(prompt_payload, dict):
-            timeout_trace["prompt_tool_telemetry"] = dict(prompt_payload)
-        interpretation = progress.get("requirement_interpretation")
-        if isinstance(interpretation, dict):
-            timeout_trace["requirement_interpretation"] = dict(interpretation)
-        for key in (
-            "route_id",
-            "route_mode",
-            "route_reason",
-            "selection_blocked_reason",
-        ):
-            value = progress.get(key)
-            if value not in (None, False, ""):
-                timeout_trace.setdefault(key, value)
-        for key in (
-            "route_transition_reasons",
-            "project_artifact_provenance_summary",
-            "cross_repo_evidence_refs",
-            "config_repo_companion_refs",
-        ):
-            value = progress.get(key)
-            if isinstance(value, list) and value:
-                timeout_trace.setdefault(key, list(value))
-        for key in (
-            "docs_intent_utilization",
-            "code_ranking_diagnostics",
-            "project_artifact_readiness",
-        ):
-            value = progress.get(key)
-            if isinstance(value, dict) and value:
-                timeout_trace.setdefault(key, dict(value))
-        timeout_kind = str(timeout_trace.get("timeout_kind") or "").strip()
-        timeout_terminal_status = str(timeout_trace.get("timeout_terminal_status") or "").strip().upper() or "ERROR"
-        raw_refs = progress.get("evidence_refs")
-        fallback_anchor_count = len(raw_refs) if isinstance(raw_refs, list) else 0
-        if "grounding_validation" not in timeout_trace:
-            timeout_trace["grounding_validation"] = {
-                "complete": False,
-                "timeout_incomplete": True,
-                "timeout_kind": timeout_kind or False,
-                "total_anchors": fallback_anchor_count,
-                "valid_count": 0,
-                "invalid_count": 0,
-                "all_grounded": False,
-                "grounding_ratio": 0.0,
-                "strict_mode": False,
-                "strict_mode_zero_valid_error": False,
-                "invalid_ref_values": [],
-                "gap_signal": False,
-            }
-        if "evidence_gap_diagnostics" not in timeout_trace:
-            interpretation_mapping = interpretation if isinstance(interpretation, dict) else {}
-            readiness_mapping = (
-                dict(progress.get("project_artifact_readiness"))
-                if isinstance(progress.get("project_artifact_readiness"), dict)
-                else {}
-            )
-            diagnostics: dict[str, Any] = {
-                "applied": False,
-                "classification": False,
-                "gap_type": False,
-                "status_considered": timeout_terminal_status,
-                "dominant_unverified_reason": False,
-                "verified_true_count": 0,
-                "verified_false_count": fallback_anchor_count,
-                "incomplete_due_to_timeout": True,
-                "timeout_kind": timeout_kind or False,
-            }
-            if bool(interpretation_mapping.get("project_scope_required")):
-                authoritative_ready = bool(readiness_mapping.get("authoritative_artifact_ready"))
-                missing_artifacts = list(readiness_mapping.get("missing_artifact_types") or [])
-                if readiness_mapping and (not authoritative_ready or bool(missing_artifacts)):
-                    diagnostics.update(
-                        {
-                            "applied": True,
-                            "classification": "confirmed_gap",
-                            "gap_type": "authoritative_project_scope_artifact_missing",
-                        }
-                    )
-            timeout_trace["evidence_gap_diagnostics"] = diagnostics
-        skill_policy_preview = progress.get("skill_policy_preview")
-        if not isinstance(skill_policy_preview, dict):
-            return
-        def _safe_int(value: Any) -> int:
-            try:
-                return int(value or 0)
-            except (TypeError, ValueError):
-                return 0
-        skill_policy_timeout = dict(skill_policy_preview)
-        skill_policy_timeout.setdefault("incomplete_due_to_timeout", True)
-        if isinstance(prompt_summary, dict):
-            skill_policy_timeout["observed_skill_calls"] = max(
-                _safe_int(skill_policy_timeout.get("observed_skill_calls")),
-                _safe_int(prompt_summary.get("skill_calls")),
-            )
-            skill_policy_timeout["observed_skill_execution_calls"] = max(
-                _safe_int(skill_policy_timeout.get("observed_skill_execution_calls")),
-                _safe_int(prompt_summary.get("skill_execution_calls")),
-            )
-            skill_policy_timeout["observed_skill_effective_calls"] = max(
-                _safe_int(skill_policy_timeout.get("observed_skill_effective_calls")),
-                _safe_int(prompt_summary.get("skill_effective_calls")),
-            )
-        timeout_trace["skill_policy_retry"] = skill_policy_timeout
-        logger.info(
-            "row_skill_policy_timeout_snapshot",
-            row_id=row_id,
-            check_id=check_id,
-            skill_metrics_scope="timeout_progress_snapshot",
-            required=bool(skill_policy_timeout.get("required")),
-            policy_mode=str(skill_policy_timeout.get("policy_mode") or "unknown"),
-            strict_require_effective_skill=bool(skill_policy_timeout.get("strict_require_effective_skill")),
-            enforcement_enabled=bool(skill_policy_timeout.get("enforcement_enabled")),
-            incomplete_due_to_timeout=True,
-            observed_skill_calls=_safe_int(skill_policy_timeout.get("observed_skill_calls")),
-            observed_skill_execution_calls=_safe_int(skill_policy_timeout.get("observed_skill_execution_calls")),
-            observed_skill_effective_calls=_safe_int(skill_policy_timeout.get("observed_skill_effective_calls")),
-        )
-    def effective_row_timeout_ceiling_ms(self) -> int:
-        """Return maximum per-attempt row timeout reachable under extension policy."""
-        if self._lease_timeout_mode_enabled():
-            return max(1, int(self.config.row_absolute_timeout_ms))
-        base_timeout_ms = max(1, int(self.config.row_timeout_ms))
-        if not bool(self.config.row_timeout_progress_extension_enabled):
-            return base_timeout_ms
-        extension_steps = self._effective_timeout_extension_steps(base_timeout_ms)
-        if extension_steps <= 0:
-            return base_timeout_ms
-        timeout_ms = base_timeout_ms
-        for attempt in range(extension_steps):
-            next_timeout_ms = self._next_row_timeout_ms(timeout_ms, attempt)
-            if next_timeout_ms <= timeout_ms:
-                break
-            timeout_ms = next_timeout_ms
-        return timeout_ms
-    def effective_row_timeout_envelope_ms(self) -> int:
-        """Return cumulative per-row timeout envelope across all retry attempts.
-        Row timeout retries currently re-run row evaluation with progressively larger
-        per-attempt timeouts. The total elapsed time can therefore exceed the final
-        per-attempt timeout ceiling. Batch timeout budgeting must account for this
-        cumulative envelope to avoid preempting rows that are still making progress.
-        """
-        if self._lease_timeout_mode_enabled():
-            return max(1, int(self.config.row_absolute_timeout_ms))
-        current_timeout_ms = max(1, int(self.config.row_timeout_ms))
-        envelope_ms = current_timeout_ms
-        if not bool(self.config.row_timeout_progress_extension_enabled):
-            return envelope_ms
-        extension_steps = self._effective_timeout_extension_steps(current_timeout_ms)
-        if extension_steps <= 0:
-            return envelope_ms
-        for attempt in range(extension_steps):
-            next_timeout_ms = self._next_row_timeout_ms(current_timeout_ms, attempt)
-            if next_timeout_ms <= current_timeout_ms:
-                break
-            envelope_ms += next_timeout_ms
-            current_timeout_ms = next_timeout_ms
-        return envelope_ms
-    def effective_batch_timeout_ms(self) -> int:
-        """Return effective batch timeout with cumulative progress-aware retry headroom."""
-        if self.config.batch_timeout_ms is not None:
-            return int(self.config.batch_timeout_ms)
-        return int(self.config.batch_size * self.effective_row_timeout_envelope_ms() + self.config.batch_overhead_ms)
-    def _lease_timeout_mode_enabled(self) -> bool:
-        return int(self.config.row_progress_lease_seconds or 0) > 0
-    def _clear_row_runtime_progress(self, row_id: str) -> None:
-        clearer = getattr(self.evaluator, "clear_row_runtime_progress", None)
-        if callable(clearer):
-            clearer(row_id)
-    def _effective_timeout_extension_steps(self, base_timeout_ms: int) -> int:
-        """Return extension steps to budget when progress-based cap override is enabled."""
-        retry_attempts = max(0, int(self.config.row_timeout_progress_retry_attempts))
-        if retry_attempts <= 0:
-            return 0
-        max_timeout_ms = max(int(self.config.row_timeout_ms), int(self.config.row_timeout_progress_max_ms))
-        if max_timeout_ms <= base_timeout_ms:
-            return retry_attempts
-        extension_ms = max(1, int(self.config.row_timeout_progress_extension_ms))
-        steps_to_max = math.ceil((max_timeout_ms - base_timeout_ms) / extension_ms)
-        # Runtime can continue beyond retry cap when telemetry still shows progress;
-        # batch budgeting should include that headroom to avoid premature batch timeout.
-        return max(retry_attempts, steps_to_max)
-    def _next_row_timeout_ms(self, current_timeout_ms: int, timeout_retry_attempt: int) -> int:
-        max_timeout_ms = max(int(self.config.row_timeout_ms), int(self.config.row_timeout_progress_max_ms))
-        base_extension_ms = max(1, int(self.config.row_timeout_progress_extension_ms))
-        # Keep increment size stable across retries so progressing rows can
-        # accumulate enough wall-clock budget before finalization.
-        extension_ms = base_extension_ms
-        return min(max_timeout_ms, current_timeout_ms + extension_ms)
-    @staticmethod
-    def _coerce_progress_counter(value: Any) -> int:
-        if isinstance(value, bool):
-            return int(value)
-        if isinstance(value, int | float):
-            return max(0, int(value))
-        if isinstance(value, str):
-            try:
-                return max(0, int(float(value.strip())))
-            except ValueError:
-                return 0
-        return 0
-    @staticmethod
-    def _coerce_progress_timestamp(value: Any) -> float:
-        if isinstance(value, bool):
-            return 0.0
-        if isinstance(value, int | float):
-            return float(value)
-        if isinstance(value, str):
-            try:
-                return float(value.strip())
-            except ValueError:
-                return 0.0
-        return 0.0
-    def _derive_progress_summary_from_snapshot(
-        self,
-        progress: dict[str, Any] | None,
-        *,
-        include_step_estimate: bool = True,
-    ) -> dict[str, int] | None:
-        if not isinstance(progress, dict):
-            return None
-        prompt_summary = progress.get("prompt_tool_telemetry_summary")
-        skill_preview = progress.get("skill_policy_preview")
-        summary: dict[str, int] = dict.fromkeys(_TIMEOUT_PROGRESS_COUNTER_KEYS, 0)
-        signal_detected = False
-        if isinstance(prompt_summary, dict):
-            for key in _TIMEOUT_PROGRESS_COUNTER_KEYS:
-                value = self._coerce_progress_counter(prompt_summary.get(key))
-                summary[key] = max(summary[key], value)
-                signal_detected = signal_detected or value > 0
-        if isinstance(skill_preview, dict):
-            observed_skill_calls = self._coerce_progress_counter(skill_preview.get("observed_skill_calls"))
-            observed_skill_execution_calls = self._coerce_progress_counter(
-                skill_preview.get("observed_skill_execution_calls")
-            )
-            observed_skill_effective_calls = self._coerce_progress_counter(
-                skill_preview.get("observed_skill_effective_calls")
-            )
-            summary["skill_calls"] = max(summary["skill_calls"], observed_skill_calls)
-            summary["skill_execution_calls"] = max(summary["skill_execution_calls"], observed_skill_execution_calls)
-            summary["skill_effective_calls"] = max(summary["skill_effective_calls"], observed_skill_effective_calls)
-            signal_detected = signal_detected or any(
-                value > 0
-                for value in (
-                    observed_skill_calls,
-                    observed_skill_execution_calls,
-                    observed_skill_effective_calls,
-                )
-            )
-        if include_step_estimate:
-            tool_calls_from_steps = self._coerce_progress_counter(progress.get("steps_executed"))
-            summary["tool_calls"] = max(summary["tool_calls"], tool_calls_from_steps)
-            signal_detected = signal_detected or tool_calls_from_steps > 0
-        if signal_detected or isinstance(prompt_summary, dict) or isinstance(skill_preview, dict):
-            return summary
-        return None
-    def _normalize_timeout_progress_snapshot(self, progress: dict[str, Any] | None) -> dict[str, Any] | None:
-        if not isinstance(progress, dict):
-            return progress
-        normalized = dict(progress)
-        original_prompt_summary = progress.get("prompt_tool_telemetry_summary")
-        original_prompt_payload = progress.get("prompt_tool_telemetry")
-        original_skill_preview = progress.get("skill_policy_preview")
-        summary = self._derive_progress_summary_from_snapshot(normalized, include_step_estimate=True)
-        if not isinstance(summary, dict):
-            return normalized
-        normalized["prompt_tool_telemetry_summary"] = dict(summary)
-        normalized["timeout_progress_summary_source"] = (
-            "explicit" if isinstance(original_prompt_summary, dict) else "derived"
-        )
-        normalized["timeout_progress_has_explicit_prompt_telemetry"] = isinstance(original_prompt_payload, dict)
-        normalized["timeout_progress_has_explicit_skill_preview"] = isinstance(original_skill_preview, dict)
-        prompt_payload = normalized.get("prompt_tool_telemetry")
-        if isinstance(prompt_payload, dict):
-            merged_payload = dict(prompt_payload)
-            merged_payload["event_tool_calls_completed"] = max(
-                self._coerce_progress_counter(merged_payload.get("event_tool_calls_completed")),
-                self._coerce_progress_counter(summary.get("tool_calls")),
-            )
-            merged_payload["event_skill_tool_calls"] = max(
-                self._coerce_progress_counter(merged_payload.get("event_skill_tool_calls")),
-                self._coerce_progress_counter(summary.get("skill_calls")),
-            )
-            merged_payload["event_skill_execution_tool_calls"] = max(
-                self._coerce_progress_counter(merged_payload.get("event_skill_execution_tool_calls")),
-                self._coerce_progress_counter(summary.get("skill_execution_calls")),
-            )
-            merged_payload["event_skill_effective_tool_calls"] = max(
-                self._coerce_progress_counter(merged_payload.get("event_skill_effective_tool_calls")),
-                self._coerce_progress_counter(summary.get("skill_effective_calls")),
-            )
-            merged_payload["event_skill_execution_effective_tool_calls"] = max(
-                self._coerce_progress_counter(merged_payload.get("event_skill_execution_effective_tool_calls")),
-                self._coerce_progress_counter(summary.get("skill_effective_calls")),
-            )
-            normalized["prompt_tool_telemetry"] = merged_payload
-        skill_preview = normalized.get("skill_policy_preview")
-        if isinstance(skill_preview, dict):
-            merged_skill_preview = dict(skill_preview)
-            merged_skill_preview["observed_skill_calls"] = max(
-                self._coerce_progress_counter(merged_skill_preview.get("observed_skill_calls")),
-                self._coerce_progress_counter(summary.get("skill_calls")),
-            )
-            merged_skill_preview["observed_skill_execution_calls"] = max(
-                self._coerce_progress_counter(merged_skill_preview.get("observed_skill_execution_calls")),
-                self._coerce_progress_counter(summary.get("skill_execution_calls")),
-            )
-            merged_skill_preview["observed_skill_effective_calls"] = max(
-                self._coerce_progress_counter(merged_skill_preview.get("observed_skill_effective_calls")),
-                self._coerce_progress_counter(summary.get("skill_effective_calls")),
-            )
-            normalized["skill_policy_preview"] = merged_skill_preview
-        return normalized
-    def _extract_progress_summary(self, progress: dict[str, Any] | None) -> dict[str, int] | None:
-        summary = self._derive_progress_summary_from_snapshot(progress, include_step_estimate=False)
-        if isinstance(summary, dict):
-            return dict(summary)
-        return None
-    def _has_strong_repeat_progress(self, progress: dict[str, Any] | None) -> bool:
-        """Allow bounded repeat extension when repeated snapshots still show strong work."""
-        if not isinstance(progress, dict):
-            return False
-        if not bool(progress.get("effective_progress")) or not self._progress_has_grounding_signal(progress):
-            return False
-        # Prevent stale pre-LLM "completed" snapshots from repeatedly extending timeout.
-        # Strong repeat extension is only valid while tool-first loop itself is still timing out.
-        termination_reason = str(progress.get("termination_reason") or "").strip().lower()
-        if termination_reason and termination_reason != "timeout":
-            return False
-        steps_executed = self._coerce_progress_counter(progress.get("steps_executed"))
-        steps_unique_tools = self._coerce_progress_counter(progress.get("steps_unique_tools"))
-        repetition_raw = progress.get("step_repetition_rate")
-        repetition_rate = float(repetition_raw) if repetition_raw is not None else 1.0
-        return steps_executed >= 3 and steps_unique_tools >= 2 and repetition_rate <= 0.6
-    @staticmethod
-    def _compute_progress_delta(
-        previous_summary: dict[str, int] | None,
-        current_summary: dict[str, int] | None,
-    ) -> dict[str, int] | None:
-        if not isinstance(previous_summary, dict) or not isinstance(current_summary, dict):
-            return None
-        delta: dict[str, int] = {}
-        for key in _TIMEOUT_PROGRESS_COUNTER_KEYS:
-            previous_value = max(0, int(previous_summary.get(key, 0)))
-            current_value = max(0, int(current_summary.get(key, 0)))
-            delta[key] = max(0, current_value - previous_value)
-        return delta
-    def _decide_row_timeout_extension(
-        self,
-        *,
-        timeout_retry_attempt: int,
-        current_timeout_ms: int,
-        progress: dict[str, Any] | None,
-        previous_progress_summary: dict[str, int] | None,
-    ) -> dict[str, Any]:
-        decision: dict[str, Any] = {
-            "allow": False,
-            "reason": "not_allowed",
-            "progress_summary": None,
-            "progress_delta": None,
-        }
-        if not bool(self.config.row_timeout_progress_extension_enabled):
-            decision["reason"] = "extension_disabled"
-            return decision
-        retry_attempts = max(0, int(self.config.row_timeout_progress_retry_attempts))
-        if retry_attempts <= 0:
-            decision["reason"] = "attempt_cap_reached"
-            return decision
-        extension_cap_reached = timeout_retry_attempt >= retry_attempts
-        if current_timeout_ms >= max(int(self.config.row_timeout_ms), int(self.config.row_timeout_progress_max_ms)):
-            decision["reason"] = "max_timeout_reached"
-            return decision
-        if not isinstance(progress, dict):
-            decision["reason"] = "missing_progress_snapshot"
-            return decision
-        if not bool(progress.get("effective_progress")):
-            decision["reason"] = "ineffective_progress_snapshot"
-            return decision
-        if not self._progress_has_grounding_signal(progress):
-            decision["reason"] = "missing_grounding_signal"
-            return decision
-        termination_reason = str(progress.get("termination_reason") or "").strip().lower()
-        if timeout_retry_attempt < 1 and termination_reason and termination_reason != "timeout":
-            decision["reason"] = "termination_reason_not_timeout"
-            return decision
-        if timeout_retry_attempt >= 1:
-            # Repeat extensions remain bounded, but should continue when
-            # telemetry still shows meaningful work.
-            repetition_raw = progress.get("step_repetition_rate")
-            repetition_rate = float(repetition_raw) if repetition_raw is not None else 1.0
-            if repetition_rate >= 0.7:
-                decision["reason"] = "high_step_repetition_rate"
-                return decision
-            current_summary = self._extract_progress_summary(progress)
-            decision["progress_summary"] = current_summary
-            if current_summary is not None and previous_progress_summary is not None:
-                progress_delta = self._compute_progress_delta(previous_progress_summary, current_summary)
-                decision["progress_delta"] = progress_delta
-                if isinstance(progress_delta, dict) and max(progress_delta.values(), default=0) > 0:
-                    decision["allow"] = True
-                    decision["reason"] = (
-                        "repeat_extension_progress_delta_beyond_cap"
-                        if extension_cap_reached
-                        else "repeat_extension_progress_delta"
-                    )
-                    return decision
-            # Strong grounded progress may still justify repeat extensions even
-            # when prompt telemetry counters remain stationary across retries.
-            if self._has_strong_repeat_progress(progress):
-                decision["allow"] = True
-                if current_summary is None:
-                    decision["progress_summary"] = {
-                        "tool_calls": self._coerce_progress_counter(progress.get("steps_executed")),
-                        "skill_calls": 0,
-                        "skill_execution_calls": 0,
-                        "skill_effective_calls": 0,
-                    }
-                    decision["reason"] = (
-                        "repeat_extension_strong_progress_no_prompt_summary_beyond_cap"
-                        if extension_cap_reached
-                        else "repeat_extension_strong_progress_no_prompt_summary"
-                    )
-                elif previous_progress_summary is None:
-                    decision["reason"] = (
-                        "repeat_extension_strong_progress_missing_previous_summary_beyond_cap"
-                        if extension_cap_reached
-                        else "repeat_extension_strong_progress_missing_previous_summary"
-                    )
-                else:
-                    decision["reason"] = (
-                        "repeat_extension_strong_progress_stationary_summary_beyond_cap"
-                        if extension_cap_reached
-                        else "repeat_extension_strong_progress_stationary_summary"
-                    )
-                return decision
-            if current_summary is None:
-                decision["reason"] = (
-                    "missing_prompt_telemetry_for_repeat_extension_beyond_cap"
-                    if extension_cap_reached
-                    else "missing_prompt_telemetry_for_repeat_extension"
-                )
-            elif previous_progress_summary is None:
-                decision["reason"] = (
-                    "missing_previous_progress_summary_for_repeat_extension_beyond_cap"
-                    if extension_cap_reached
-                    else "missing_previous_progress_summary_for_repeat_extension"
-                )
-            else:
-                decision["reason"] = (
-                    "no_delta_since_last_timeout_beyond_cap" if extension_cap_reached else "no_delta_since_last_timeout"
-                )
-                decision["progress_delta"] = self._compute_progress_delta(previous_progress_summary, current_summary)
-            return decision
-        if extension_cap_reached:
-            decision["reason"] = "attempt_cap_reached"
-            return decision
-        decision["allow"] = True
-        decision["reason"] = "first_extension_allowed"
-        decision["progress_summary"] = self._extract_progress_summary(progress)
-        return decision
-    def _progress_has_grounding_signal(self, progress: dict[str, Any] | None) -> bool:
-        if not isinstance(progress, dict):
-            return False
-        evidence_refs_count = progress.get("evidence_refs_count")
-        if isinstance(evidence_refs_count, bool):
-            return bool(evidence_refs_count)
-        if isinstance(evidence_refs_count, int | float):
-            return evidence_refs_count > 0
-        if isinstance(evidence_refs_count, str):
-            try:
-                return float(evidence_refs_count) > 0
-            except ValueError:
-                return False
-        if isinstance(evidence_refs_count, list | tuple | set | dict):
-            return len(evidence_refs_count) > 0
-        return False
-    @staticmethod
-    def _progress_has_recorded_or_verified_grounding(progress: dict[str, Any] | None) -> bool:
-        if not isinstance(progress, dict):
-            return False
-        if bool(progress.get("record_evidence_refs_contract_met")):
-            return True
-        raw_payload = progress.get("evidence_ref_verifications")
-        entries: list[dict[str, Any]] = []
-        if isinstance(raw_payload, dict):
-            for ref_value, payload in raw_payload.items():
-                if isinstance(payload, dict):
-                    merged = dict(payload)
-                    merged.setdefault("ref_value", str(ref_value or "").strip())
-                    entries.append(merged)
-        elif isinstance(raw_payload, list):
-            entries = [entry for entry in raw_payload if isinstance(entry, dict)]
-        for entry in entries:
-            if not bool(entry.get("verified")):
-                continue
-            excerpt = str(entry.get("excerpt") or "").strip()
-            if excerpt:
-                return True
-        return False
-    def _should_degrade_timeout_finalize(self, progress: dict[str, Any] | None) -> bool:
-        """Return whether timeout should degrade to grounded terminal status (FR-128)."""
-        if not isinstance(progress, dict):
-            return False
-        if not bool(progress.get("effective_progress")):
-            return False
-        has_grounding_signal = self._progress_has_grounding_signal(progress)
-        if not has_grounding_signal:
-            return False
-        termination_reason = str(progress.get("termination_reason") or "").strip().lower()
-        if termination_reason == "completed":
-            if self._progress_has_recorded_or_verified_grounding(progress):
-                return True
-            explicit_prompt_summary = progress.get("prompt_tool_telemetry_summary")
-            has_explicit_prompt_summary = progress.get("timeout_progress_summary_source") == "explicit" and isinstance(
-                explicit_prompt_summary, dict
-            )
-            if has_explicit_prompt_summary and any(
-                self._coerce_progress_counter(explicit_prompt_summary.get(key)) > 0
-                for key in _TIMEOUT_PROGRESS_COUNTER_KEYS
-            ):
-                return True
-            explicit_prompt_payload = progress.get("prompt_tool_telemetry")
-            if (
-                bool(progress.get("timeout_progress_has_explicit_prompt_telemetry"))
-                and isinstance(explicit_prompt_payload, dict)
-                and any(
-                    self._coerce_progress_counter(explicit_prompt_payload.get(key)) > 0
-                    for key in (
-                        "event_tool_calls_completed",
-                        "event_skill_tool_calls",
-                        "event_skill_execution_tool_calls",
-                        "event_skill_effective_tool_calls",
-                        "event_skill_execution_effective_tool_calls",
-                    )
-                )
-            ):
-                return True
-            explicit_skill_preview = progress.get("skill_policy_preview")
-            if (
-                bool(progress.get("timeout_progress_has_explicit_skill_preview"))
-                and isinstance(explicit_skill_preview, dict)
-                and any(
-                    self._coerce_progress_counter(explicit_skill_preview.get(key)) > 0
-                    for key in (
-                        "observed_skill_calls",
-                        "observed_skill_execution_calls",
-                        "observed_skill_effective_calls",
-                    )
-                )
-            ):
-                return True
-            # Phase 130: when the operator explicitly granted additional retry
-            # budget but the tool loop already reports a completed snapshot,
-            # finalize as grounded FAIL instead of a generic ERROR.
-            return max(0, int(self.config.row_timeout_progress_retry_attempts)) > 1
-        return True
-    @staticmethod
-    def _derive_timeout_kind(
-        *,
-        progress: dict[str, Any] | None,
-        degraded_timeout_finalize: bool,
-        batch_timeout_exceeded: bool = False,
-        absolute_timeout_exceeded: bool = False,
-    ) -> str:
-        if degraded_timeout_finalize:
-            return TimeoutKind.TIMEOUT_WITH_GROUNDING.value
-        if not isinstance(progress, dict):
-            return TimeoutKind.TIMEOUT_NO_PROGRESS.value
-        termination_reason = str(progress.get("termination_reason") or "").strip().lower()
-        if batch_timeout_exceeded:
-            return TimeoutKind.TIMEOUT_ABSOLUTE_CAP.value
-        if absolute_timeout_exceeded:
-            return TimeoutKind.TIMEOUT_ABSOLUTE_CAP.value
-        if termination_reason in {"idle_after_tool", "post_tool_idle", "idle_post_tool"}:
-            return TimeoutKind.TIMEOUT_IDLE_AFTER_TOOL.value
-        if termination_reason in {"tool_churn", "agent_tool_churn", "churn", "stagnation"}:
-            return TimeoutKind.TIMEOUT_CHURN_DETECTED.value
-        if termination_reason in {"provider_stall", "stream_stall"}:
-            return TimeoutKind.TIMEOUT_PROVIDER_STALL.value
-        return TimeoutKind.TIMEOUT_NO_PROGRESS.value
-    @staticmethod
-    def _build_timeout_fallback_verification_index(progress: dict[str, Any] | None) -> dict[str, dict[str, Any]]:
-        if not isinstance(progress, dict):
-            return {}
-        raw_payload = progress.get("evidence_ref_verifications")
-        verification_index: dict[str, dict[str, Any]] = {}
-        if isinstance(raw_payload, dict):
-            for key, value in raw_payload.items():
-                ref_value = str(key or "").strip()
-                if not ref_value:
-                    continue
-                verification_index[ref_value] = value if isinstance(value, dict) else {"verified": bool(value)}
-            return verification_index
-        if not isinstance(raw_payload, list):
-            return {}
-        for entry in raw_payload:
-            if not isinstance(entry, dict):
-                continue
-            ref_value = str(entry.get("ref_value") or entry.get("ref") or entry.get("evidence_ref") or "").strip()
-            if not ref_value:
-                continue
-            verification_index[ref_value] = dict(entry)
-        return verification_index
-    def _create_timeout_result(
-        self,
-        row_id: str,
-        check_id: str,
-        message: str,
-        *,
-        degraded_timeout_finalize: bool = False,
-        progress: dict[str, Any] | None = None,
-    ) -> RowEvaluationResult:
-        """Create timeout result (ERROR default, degraded FAIL when grounded progress exists)."""
-        degraded = bool(degraded_timeout_finalize)
-        evidence_anchors: list[EvidenceAnchor] = []
-        if degraded and isinstance(progress, dict):
-            verification_index = self._build_timeout_fallback_verification_index(progress)
-            raw_refs = progress.get("evidence_refs")
-            if isinstance(raw_refs, list):
-                for ref in raw_refs[:20]:
-                    ref_value = str(ref or "").strip()
-                    if not ref_value:
-                        continue
-                    verification_payload = verification_index.get(ref_value, {})
-                    excerpt = str(verification_payload.get("excerpt") or "").strip()
-                    verified = bool(verification_payload.get("verified")) and bool(excerpt)
-                    verification_reason = VERIFICATION_REASON_FALLBACK_REF_INHERITED
-                    if verified:
-                        raw_reason = str(verification_payload.get("verification_reason") or "").strip()
-                        verification_reason = normalize_verification_reason(
-                            raw_reason or VERIFICATION_REASON_EXCERPT_VERIFIED_IN_CONTEXT,
-                            verified=True,
-                            excerpt=excerpt,
-                        )
-                    evidence_anchors.append(
-                        EvidenceAnchor(
-                            ref_type=(
-                                "url"
-                                if "://" in ref_value
-                                else "config"
-                                if RowEvaluator._is_config_ref_value(ref_value)
-                                else "doc_path"
-                            ),
-                            ref_value=ref_value,
-                            excerpt=excerpt if verified else "",
-                            verified=verified,
-                            verification_reason=verification_reason,
-                        )
-                    )
-        return RowEvaluationResult(
-            row_id=row_id,
-            check_id=check_id,
-            status=RowStatus.FAIL if degraded else RowStatus.ERROR,
-            score=0.0,
-            score_breakdown=ScoreBreakdown.compute(0.0),
-            reason=(
-                "Đánh giá kết thúc suy giảm sau timeout có tiến triển hiệu quả." if degraded else "Đánh giá bị timeout."
-            ),
-            reasoning="",
-            finding=(
-                "Có tín hiệu grounding tối thiểu trước timeout; trả về kết quả FAIL suy giảm."
-                if degraded
-                else "Không thể hoàn thành đánh giá do timeout."
-            ),
-            evidence_anchors=evidence_anchors,
-            provenance=RowProvenance(
-                row_llm_mode=self.evaluator.config.mode.value,
-                protocol=None,
-                model=None,
-                template_hash=self.evaluator.template_hash,
-                rubric_version="",
-                evidence_hash=self.evaluator.evidence_hash or "",
-                evaluated_at=datetime.now(UTC),
-                cache_hit=False,
-            ),
-            error_message=message,
-        )
-    def _get_checkpoint_path(self, thread_id: str) -> Path:
-        """Get checkpoint file path."""
-        if self.config.checkpoint_dir:
-            return self.config.checkpoint_dir / f"batch-checkpoint-{thread_id}.json"
-        return Path(f"/tmp/vds-audit-checkpoint-{thread_id}.json")
-    def get_checkpoint_path(self, thread_id: str) -> Path:
-        """Return the checkpoint file path for a given thread ID (FR-278).
-        Public accessor for callers that need to bind checkpoint location
-        into workflow-summary.json and run-history.json entries.
-        """
-        return self._get_checkpoint_path(thread_id)
-    @staticmethod
-    def _extract_prompt_level_telemetry(result: Any) -> dict[str, int]:
-        """Phase 166: Extract prompt-level retry/failover counts from a row result."""
-        counts: dict[str, int] = {
-            "prompt_retry_attempts": 0,
-            "prompt_retry_recoveries": 0,
-            "prompt_failover_attempts": 0,
-            "prompt_failover_recoveries": 0,
-            "prompt_failover_exhausted": 0,
-            "synthesis_fallback_count": 0,
-        }
-        if result is None:
-            return counts
-        provenance = getattr(result, "provenance", None) or {}
-        if isinstance(provenance, dict):
-            retry_meta = provenance.get("retry_metadata") or {}
-            if isinstance(retry_meta, dict):
-                provider_retry = retry_meta.get("provider_retry") or {}
-                if isinstance(provider_retry, dict):
-                    counts["prompt_retry_attempts"] += max(0, int(provider_retry.get("attempts_executed", 0) or 0))
-                    if provider_retry.get("recovered"):
-                        counts["prompt_retry_recoveries"] += 1
-                failover_tel = retry_meta.get("prompt_failover_telemetry") or {}
-                if isinstance(failover_tel, dict) and failover_tel.get("provider_failover_attempted"):
-                    counts["prompt_failover_attempts"] += 1
-                    timeout_kind = str(failover_tel.get("timeout_kind") or "")
-                    if timeout_kind == "timeout_failover_exhausted":
-                        counts["prompt_failover_exhausted"] += 1
-                    elif failover_tel.get("provider_failover_final_provider"):
-                        counts["prompt_failover_recoveries"] += 1
-                # Check for synthesis fallback
-                fallback_cause = retry_meta.get("fallback_cause") or provenance.get("fallback_cause")
-                if fallback_cause:
-                    counts["synthesis_fallback_count"] += 1
-        return counts
-class PostProcessingConsumer:
-    """Incremental row result consumer for streaming post-processing (FR-193).
-    Consumes ``RowEvaluationResult`` items from an asyncio queue as they arrive,
-    accumulating partial scores and computing overlap timing diagnostics.  A
-    ``None`` sentinel signals the end of the stream.
-    """
-    def __init__(self) -> None:
-        self._queue: asyncio.Queue[RowEvaluationResult | None] = asyncio.Queue()
-        self._partial_scores: list[float] = []
-        self._processed_count: int = 0
-        self._start_time: float = 0.0
-        self._overlap_seconds: float = 0.0
-    async def put(self, result: RowEvaluationResult) -> None:
-        """Enqueue a completed row result for incremental processing."""
-        await self._queue.put(result)
-    async def finalize(self) -> None:
-        """Signal end-of-stream by enqueuing a ``None`` sentinel."""
-        await self._queue.put(None)
-    async def consume(self) -> dict[str, Any]:
-        """Consume results until sentinel, return timing diagnostics."""
-        import time
-        self._start_time = time.monotonic()
-        while True:
-            item = await self._queue.get()
-            if item is None:
-                break
-            self._processed_count += 1
-            self._partial_scores.append(item.score)
-        end_time = time.monotonic()
-        self._overlap_seconds = end_time - self._start_time if self._start_time else 0.0
-        return {
-            "overlap_seconds": self._overlap_seconds,
-            "processed_count": self._processed_count,
-            "partial_scores": list(self._partial_scores),
-        }