npm - @ngocsangairvds/vsaf - Versions diffs - 3.2.14 → 3.2.16 - Mend

@ngocsangairvds/vsaf 3.2.14 → 3.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1442) hide show

package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_row_evaluator_dual_evidence.py DELETED Viewed

@@ -1,2987 +0,0 @@
-"""Tests for dual evidence anchoring (TSK-198).
-Tests that row evaluator correctly wires both doc evidence (Confluence URLs)
-and code evidence (code paths) into EvidenceAnchor lists for canonical checklist.
-"""
-from __future__ import annotations
-import pytest
-from vds_agent_core.agents.loop import (
-    AgentLoopResult,
-    AgentLoopStep,
-    LoopTerminationReason,
-)
-from vds_audit_orchestrator.agents.tool_registry import ToolName
-from vds_audit_orchestrator.engine.deterministic_evaluator import DeterministicEvaluator
-from vds_audit_orchestrator.engine.row_evaluator import RowEvaluator
-from vds_audit_orchestrator.engine.row_evaluator_types import RowEvaluationResult, RowEvaluatorConfig, RowLLMMode
-from vds_audit_orchestrator.evidence.code_pack import CodeEvidence, CodeEvidencePack
-from vds_audit_orchestrator.evidence.traceability import CodeAnchor, TraceRecord
-from vds_audit_orchestrator.models.checklist import (
-    EvidenceAnchor,
-    RowProvenance,
-    RowStatus,
-    ScoreBreakdown,
-)
-from vds_audit_orchestrator.models.evidence import Evidence, EvidenceStatus
-from vds_audit_orchestrator.models.template import AuditCheck
-# =============================================================================
-# Fixtures
-# =============================================================================
-@pytest.fixture
-def sample_check():
-    """Create a sample audit check for testing."""
-    return AuditCheck(
-        id="API-001",
-        name="API endpoint documented",
-        description="API endpoint /api/policies must be documented in Confluence",
-        check_type="api_documented",
-        weight=2.0,
-        section_id="api_documentation",
-    )
-@pytest.fixture
-def confluence_evidence():
-    """Create Confluence evidence with URL."""
-    confluence_url = "https://confluence.example.com/display/PROJECT/API-Docs"
-    return Evidence(
-        source="confluence",
-        source_id="confluence:page-123",
-        title="API Documentation",
-        content="# API Documentation\n\n## GET /api/policies\n\nReturns list of policies.",
-        content_hash="conf123",
-        status=EvidenceStatus.FOUND,
-        source_url=confluence_url,
-    )
-@pytest.fixture
-def code_evidence_pack():
-    """Create code evidence pack with API file."""
-    return CodeEvidencePack(
-        repo_path="/test/repo",
-        files=[
-            CodeEvidence(
-                file_path="src/api/routes.py",
-                content="@app.route('/api/policies')\ndef get_policies():\n    pass",
-                original_lines=50,
-                original_bytes=1000,
-                priority=1,
-            ),
-        ],
-    )
-@pytest.fixture
-def traceability_with_match():
-    """Create traceability record with code anchor matching the check."""
-    return [
-        TraceRecord(
-            mention_id="mention-001",
-            mention_type="endpoint",
-            mention_value="/api/policies",
-            source_page_id="page-123",
-            source_url="https://confluence.example.com/display/PROJECT/API-Docs",
-            code_anchors=[
-                CodeAnchor(
-                    file="src/api/routes.py",
-                    line=42,
-                    match="@app.route('/api/policies')",
-                ),
-                CodeAnchor(
-                    file="src/api/routes.py",
-                    line=43,
-                    match="def get_policies():",
-                ),
-            ],
-            confidence="high",
-        ),
-    ]
-@pytest.fixture
-def traceability_no_match():
-    """Create traceability record that doesn't match the check."""
-    return [
-        TraceRecord(
-            mention_id="mention-002",
-            mention_type="endpoint",
-            mention_value="/api/users",  # Different endpoint
-            source_page_id="page-456",
-            source_url="https://confluence.example.com/display/PROJECT/Users",
-            code_anchors=[
-                CodeAnchor(
-                    file="src/api/users.py",
-                    line=10,
-                    match="@app.route('/api/users')",
-                ),
-            ],
-            confidence="high",
-        ),
-    ]
-# =============================================================================
-# Dual Evidence Tests
-# =============================================================================
-class TestDualEvidenceAnchors:
-    """Test docs+code evidence anchoring (TSK-198)."""
-    def test_evaluate_with_code_evidence_only(self, sample_check, code_evidence_pack, traceability_with_match):
-        """Test row evaluation includes code anchors when traceability matches."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        # Evaluate with no doc evidence, but with code evidence via traceability
-        result = evaluator.evaluate(
-            check=sample_check,
-            evidence=None,
-            row_id="row_0",
-            code_evidence=code_evidence_pack,
-            traceability=traceability_with_match,
-        )
-        assert isinstance(result, RowEvaluationResult)
-        # Should have code anchors from traceability
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(code_anchors) > 0, "Should have code_path anchors from traceability"
-        # Verify code anchor format
-        first_code_anchor = code_anchors[0]
-        assert "src/api/routes.py" in first_code_anchor.ref_value
-        assert "#L" in first_code_anchor.ref_value  # Should have line number
-        assert first_code_anchor.verified is True
-    def test_evaluate_with_confluence_evidence_only(self, sample_check, confluence_evidence):
-        """Test row evaluation with only Confluence evidence (no code evidence)."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        result = evaluator.evaluate(
-            check=sample_check,
-            evidence=confluence_evidence,
-            row_id="row_0",
-        )
-        # Should have URL anchors from Confluence
-        url_anchors = [a for a in result.evidence_anchors if a.ref_type == "url"]
-        assert len(url_anchors) > 0, "Should have URL anchors from Confluence evidence"
-        # Should NOT have code anchors (no traceability provided)
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(code_anchors) == 0, "Should not have code anchors without traceability"
-def test_apply_trace_backed_anchor_verification_enriches_verified_doc_anchor_metadata() -> None:
-    result = RowEvaluationResult(
-        row_id="CL-004:row_3",
-        check_id="CL-004",
-        status=RowStatus.FAIL,
-        score=30.0,
-        score_breakdown=ScoreBreakdown.compute(raw_score=30.0),
-        reason="reason",
-        finding="finding",
-        provenance=RowProvenance(
-            evaluator_type="unit_test",
-            evaluator_version="test",
-            row_llm_mode="selective",
-            template_hash="template",
-            evidence_hash="evidence",
-        ),
-        evidence_anchors=[
-            EvidenceAnchor(
-                ref_type="confluence_chunk",
-                ref_value="chunk:abc123",
-                excerpt="MariaDB",
-                verified=True,
-                verification_reason="excerpt_verified_in_context",
-            )
-        ],
-        retrieval_trace={
-            "tool_first_loop": {
-                "trace_steps": [
-                    {
-                        "tool": ToolName.READ_DOC_CHUNK.value,
-                        "output": {
-                            "ref": "chunk:abc123",
-                            "chunk_id": "abc123",
-                            "page_id": "88718940",
-                            "page_title": "OpenAPI Specification",
-                            "source_url": "https://confluence.example.com/pages/viewpage.action?pageId=88718940",
-                            "content": "MariaDB sizing details",
-                        },
-                    }
-                ]
-            }
-        },
-    )
-    verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-    anchor = verified.evidence_anchors[0]
-    assert anchor.page_id == "88718940"
-    assert anchor.page_title == "OpenAPI Specification"
-    assert anchor.source_url == "https://confluence.example.com/pages/viewpage.action?pageId=88718940"
-    assert anchor.chunk_id == "abc123"
-    def test_evaluate_with_dual_evidence(
-        self, sample_check, confluence_evidence, code_evidence_pack, traceability_with_match
-    ):
-        """Test row evaluation with both docs and code evidence (TSK-198 primary test)."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        result = evaluator.evaluate(
-            check=sample_check,
-            evidence=confluence_evidence,
-            row_id="row_0",
-            code_evidence=code_evidence_pack,
-            traceability=traceability_with_match,
-        )
-        # Should have both URL and code anchors
-        anchor_types = {a.ref_type for a in result.evidence_anchors}
-        assert "url" in anchor_types, "Should have URL anchors from Confluence"
-        assert "code_path" in anchor_types, "Should have code_path anchors from traceability"
-        # Verify we have at least one of each
-        url_anchors = [a for a in result.evidence_anchors if a.ref_type == "url"]
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(url_anchors) >= 1, "Should have at least one URL anchor"
-        assert len(code_anchors) >= 1, "Should have at least one code anchor"
-        # Verify URL anchor has Confluence URL
-        assert any("confluence.example.com" in a.ref_value for a in url_anchors)
-        # Verify code anchor has file path and line number
-        assert any("#L" in a.ref_value for a in code_anchors)
-        assert any("src/api/routes.py" in a.ref_value for a in code_anchors)
-    def test_evaluate_without_code_evidence_graceful(self, sample_check, confluence_evidence):
-        """Test graceful degradation when code evidence is not provided."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        # Call with code_evidence=None (default)
-        result = evaluator.evaluate(
-            check=sample_check,
-            evidence=confluence_evidence,
-            row_id="row_0",
-        )
-        # Should work without error
-        assert result is not None
-        assert result.status in [RowStatus.PASS, RowStatus.PARTIAL, RowStatus.FAIL]
-        # Should have doc evidence but no code evidence
-        url_anchors = [a for a in result.evidence_anchors if a.ref_type == "url"]
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(url_anchors) > 0, "Should have URL anchors"
-        assert len(code_anchors) == 0, "Should have no code anchors without traceability"
-    def test_evaluate_with_no_matching_traces(
-        self, sample_check, confluence_evidence, code_evidence_pack, traceability_no_match
-    ):
-        """Test that non-matching traces don't produce code anchors."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        result = evaluator.evaluate(
-            check=sample_check,
-            evidence=confluence_evidence,
-            row_id="row_0",
-            code_evidence=code_evidence_pack,
-            traceability=traceability_no_match,  # Traces don't match check
-        )
-        # Should have URL anchors from Confluence
-        url_anchors = [a for a in result.evidence_anchors if a.ref_type == "url"]
-        assert len(url_anchors) > 0, "Should have URL anchors"
-        # Should NOT have code anchors (trace doesn't match)
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(code_anchors) == 0, "Should not have code anchors from non-matching traces"
-    def test_trace_matching_by_check_description(self):
-        """Test that traces match checks based on description content."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        # Check with endpoint in description
-        check = AuditCheck(
-            id="API-002",
-            name="User API documented",
-            description="The /api/users endpoint must be documented",
-            check_type="api_documented",
-            weight=1.0,
-            section_id="api",
-        )
-        # Trace with matching endpoint
-        traces = [
-            TraceRecord(
-                mention_id="m1",
-                mention_type="endpoint",
-                mention_value="/api/users",
-                source_url="https://confluence.example.com/page/1",
-                code_anchors=[
-                    CodeAnchor(file="api/users.py", line=10, match="@route('/api/users')"),
-                ],
-            ),
-        ]
-        result = evaluator.evaluate(
-            check=check,
-            evidence=None,
-            row_id="row_0",
-            traceability=traces,
-        )
-        # Should match and include code anchor
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(code_anchors) > 0, "Should match trace by description content"
-    def test_trace_matching_by_check_id(self):
-        """Test that traces match checks based on check ID."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        # Check with identifier in ID
-        check = AuditCheck(
-            id="JIRA-1234",
-            name="Story implementation",
-            description="Implement user story JIRA-1234",
-            check_type="requirement_coverage",
-            weight=1.0,
-            section_id="requirements",
-        )
-        # Trace with matching identifier
-        traces = [
-            TraceRecord(
-                mention_id="m1",
-                mention_type="identifier",
-                mention_value="JIRA-1234",
-                source_url="https://confluence.example.com/page/2",
-                code_anchors=[
-                    CodeAnchor(file="features/user_story.py", line=5, match="# JIRA-1234"),
-                ],
-            ),
-        ]
-        result = evaluator.evaluate(
-            check=check,
-            evidence=None,
-            row_id="row_0",
-            traceability=traces,
-        )
-        # Should match and include code anchor
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(code_anchors) > 0, "Should match trace by check ID"
-    def test_code_anchor_line_range_populated(self, sample_check, traceability_with_match):
-        """Test that code anchors have line_range populated."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        result = evaluator.evaluate(
-            check=sample_check,
-            evidence=None,
-            row_id="row_0",
-            traceability=traceability_with_match,
-        )
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(code_anchors) > 0
-        # Verify line_range is set
-        first_anchor = code_anchors[0]
-        assert first_anchor.line_range is not None, "Code anchor should have line_range"
-        assert isinstance(first_anchor.line_range, tuple)
-        assert len(first_anchor.line_range) == 2
-        assert first_anchor.line_range[0] > 0, "Line range should have positive line numbers"
-    def test_code_anchor_excerpt_truncated(self):
-        """Test that long code excerpts are truncated."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        # Create trace with long match text
-        long_match = "x" * 500
-        traces = [
-            TraceRecord(
-                mention_id="m1",
-                mention_type="endpoint",
-                mention_value="/api/long",
-                source_url="https://confluence.example.com/page/3",
-                code_anchors=[
-                    CodeAnchor(file="long.py", line=1, match=long_match),
-                ],
-            ),
-        ]
-        check = AuditCheck(
-            id="LONG-001",
-            name="Long test",
-            description="Test with /api/long endpoint",
-            check_type="test",
-            weight=1.0,
-            section_id="test",
-        )
-        result = evaluator.evaluate(
-            check=check,
-            evidence=None,
-            row_id="row_0",
-            traceability=traces,
-        )
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(code_anchors) > 0
-        # Verify excerpt is truncated to 200 chars
-        first_anchor = code_anchors[0]
-        assert first_anchor.excerpt is not None
-        assert len(first_anchor.excerpt) <= 200, "Excerpt should be truncated to 200 chars"
-    def test_multiple_code_anchors_from_single_trace(self):
-        """Test that multiple code anchors from one trace are all included."""
-        evaluator = DeterministicEvaluator(template_hash="test_hash")
-        # Trace with multiple code anchors
-        traces = [
-            TraceRecord(
-                mention_id="m1",
-                mention_type="endpoint",
-                mention_value="/api/multi",
-                source_url="https://confluence.example.com/page/4",
-                code_anchors=[
-                    CodeAnchor(file="api.py", line=10, match="route 1"),
-                    CodeAnchor(file="api.py", line=20, match="route 2"),
-                    CodeAnchor(file="api.py", line=30, match="route 3"),
-                ],
-            ),
-        ]
-        check = AuditCheck(
-            id="MULTI-001",
-            name="Multi anchor test",
-            description="Test /api/multi endpoint with multiple matches",
-            check_type="test",
-            weight=1.0,
-            section_id="test",
-        )
-        result = evaluator.evaluate(
-            check=check,
-            evidence=None,
-            row_id="row_0",
-            traceability=traces,
-        )
-        code_anchors = [a for a in result.evidence_anchors if a.ref_type == "code_path"]
-        assert len(code_anchors) == 3, "Should include all code anchors from trace"
-        # Verify they're all from the same file
-        assert all("api.py" in a.ref_value for a in code_anchors)
-# =============================================================================
-# Checklist Generator Integration
-# =============================================================================
-class TestChecklistGeneratorDualAnchors:
-    """Test checklist generator formats dual anchors correctly."""
-    def test_format_evidence_refs_urls_first(self):
-        """Test that _format_evidence_refs lists URLs before code paths."""
-        from vds_audit_orchestrator.models.checklist import EvidenceAnchor
-        from vds_audit_orchestrator.reports.checklist_generator import ChecklistGenerator
-        generator = ChecklistGenerator()
-        anchors = [
-            EvidenceAnchor(ref_type="code_path", ref_value="src/api.py#L42"),
-            EvidenceAnchor(ref_type="url", ref_value="https://confluence.example.com/page/1"),
-            EvidenceAnchor(ref_type="code_path", ref_value="src/routes.py#L10"),
-            EvidenceAnchor(ref_type="url", ref_value="https://confluence.example.com/page/2"),
-        ]
-        formatted = generator._format_evidence_refs(anchors)
-        lines = formatted.split("\n")
-        # URLs should come first (with 📄 emoji)
-        assert lines[0].startswith("📄")
-        assert lines[1].startswith("📄")
-        # Code paths should come after (with 💻 emoji)
-        assert lines[2].startswith("💻")
-        assert lines[3].startswith("💻")
-    def test_format_evidence_refs_empty_list(self):
-        """Test that empty anchor list returns empty string."""
-        from vds_audit_orchestrator.reports.checklist_generator import ChecklistGenerator
-        generator = ChecklistGenerator()
-        formatted = generator._format_evidence_refs([])
-        assert formatted == ""
-    def test_format_evidence_refs_only_urls(self):
-        """Test formatting with only URL anchors."""
-        from vds_audit_orchestrator.models.checklist import EvidenceAnchor
-        from vds_audit_orchestrator.reports.checklist_generator import ChecklistGenerator
-        generator = ChecklistGenerator()
-        anchors = [
-            EvidenceAnchor(ref_type="url", ref_value="https://confluence.example.com/page/1"),
-            EvidenceAnchor(ref_type="url", ref_value="https://confluence.example.com/page/2"),
-        ]
-        formatted = generator._format_evidence_refs(anchors)
-        lines = formatted.split("\n")
-        assert len(lines) == 2
-        assert all(line.startswith("📄") for line in lines)
-    def test_format_evidence_refs_only_code(self):
-        """Test formatting with only code path anchors."""
-        from vds_audit_orchestrator.models.checklist import EvidenceAnchor
-        from vds_audit_orchestrator.reports.checklist_generator import ChecklistGenerator
-        generator = ChecklistGenerator()
-        anchors = [
-            EvidenceAnchor(ref_type="code_path", ref_value="src/api.py#L42"),
-            EvidenceAnchor(ref_type="code_path", ref_value="src/routes.py#L10"),
-        ]
-        formatted = generator._format_evidence_refs(anchors)
-        lines = formatted.split("\n")
-        assert len(lines) == 2
-        assert all(line.startswith("💻") for line in lines)
-    def test_format_evidence_refs_mixed_types(self):
-        """Test formatting with mixed anchor types (url, code_path, other)."""
-        from vds_audit_orchestrator.models.checklist import EvidenceAnchor
-        from vds_audit_orchestrator.reports.checklist_generator import ChecklistGenerator
-        generator = ChecklistGenerator()
-        anchors = [
-            EvidenceAnchor(ref_type="url", ref_value="https://confluence.example.com/page/1"),
-            EvidenceAnchor(ref_type="code_path", ref_value="src/api.py#L42"),
-            EvidenceAnchor(ref_type="path", ref_value="README.md"),  # "other" type
-        ]
-        formatted = generator._format_evidence_refs(anchors)
-        lines = formatted.split("\n")
-        assert len(lines) == 3
-        # URL first
-        assert lines[0].startswith("📄")
-        # Code second
-        assert lines[1].startswith("💻")
-        # Other third (no emoji prefix, just type label)
-        assert "path:" in lines[2]
-class TestRowEvaluatorEvidenceRefSelection:
-    """Test synthesis ref prioritization/capping for tool-first rows."""
-    def test_prioritize_synthesis_evidence_refs_prefers_tool_loop_and_caps(
-        self,
-        monkeypatch: pytest.MonkeyPatch,
-    ) -> None:
-        monkeypatch.setenv("VDS_AUDIT_LLM__ROW_MAX_EVIDENCE_REFS", "3")
-        loop_result = AgentLoopResult(
-            termination_reason=LoopTerminationReason.COMPLETED,
-            steps_executed=6,
-            budget_remaining=9,
-            elapsed_seconds=0.5,
-            evidence_refs=["chunk:alpha", "src/service.py"],
-        )
-        selected_refs = RowEvaluator._prioritize_synthesis_evidence_refs(
-            row_id="CL-001:row_0",
-            requirement_text="",
-            requirement_guidance=None,
-            requirement_interpretation=None,
-            base_refs=["README.md", "src/service.py", "docs/runbook.md"],
-            loop_result=loop_result,
-            augmented_refs=["chunk:beta", "docs/runbook.md"],
-        )
-        assert selected_refs == ["chunk:alpha", "src/service.py", "chunk:beta"]
-    def test_prioritize_synthesis_evidence_refs_default_cap_without_loop(
-        self,
-        monkeypatch: pytest.MonkeyPatch,
-    ) -> None:
-        monkeypatch.delenv("VDS_AUDIT_LLM__ROW_MAX_EVIDENCE_REFS", raising=False)
-        base_refs = [f"ref-{idx}" for idx in range(1, 18)]
-        selected_refs = RowEvaluator._prioritize_synthesis_evidence_refs(
-            row_id="CL-002:row_1",
-            requirement_text="",
-            requirement_guidance=None,
-            requirement_interpretation=None,
-            base_refs=base_refs,
-            loop_result=None,
-            augmented_refs=None,
-        )
-        assert len(selected_refs) == 12
-        assert selected_refs == base_refs[:12]
-    def test_prioritize_synthesis_evidence_refs_prefers_recorded_refs_from_trace_steps(
-        self,
-        monkeypatch: pytest.MonkeyPatch,
-    ) -> None:
-        monkeypatch.setenv("VDS_AUDIT_LLM__ROW_MAX_EVIDENCE_REFS", "4")
-        loop_result = AgentLoopResult(
-            termination_reason=LoopTerminationReason.COMPLETED,
-            steps_executed=4,
-            budget_remaining=7,
-            elapsed_seconds=0.4,
-            evidence_refs=["docs/a.md", "src/a.py", "src/b.py"],
-            trace_steps=[
-                AgentLoopStep(
-                    step=1,
-                    tool=ToolName.RECORD_EVIDENCE_REFS,
-                    budget_cost=1,
-                    budget_remaining=9,
-                    output={
-                        "refs": [
-                            {"ref_type": "doc_path", "ref_value": "chunk:alpha"},
-                            {"ref_type": "code_path", "ref_value": "src/a.py"},
-                        ]
-                    },
-                )
-            ],
-        )
-        selected_refs = RowEvaluator._prioritize_synthesis_evidence_refs(
-            row_id="CL-003:row_2",
-            requirement_text="",
-            requirement_guidance=None,
-            requirement_interpretation=None,
-            base_refs=["README.md", "docs/a.md"],
-            loop_result=loop_result,
-            augmented_refs=["docs/b.md"],
-        )
-        assert selected_refs == ["src/a.py", "chunk:alpha", "docs/a.md", "src/b.py"]
-    def test_prioritize_synthesis_evidence_refs_does_not_reappend_seed_refs_after_tool_refs(
-        self,
-        monkeypatch: pytest.MonkeyPatch,
-    ) -> None:
-        monkeypatch.setenv("VDS_AUDIT_LLM__ROW_MAX_EVIDENCE_REFS", "12")
-        loop_result = AgentLoopResult(
-            termination_reason=LoopTerminationReason.COMPLETED,
-            steps_executed=3,
-            budget_remaining=8,
-            elapsed_seconds=0.4,
-            evidence_refs=["src/wiring/A.java", "attachment://57037192/57037316"],
-        )
-        selected_refs = RowEvaluator._prioritize_synthesis_evidence_refs(
-            row_id="CL-003:row_2",
-            requirement_text="",
-            requirement_guidance=None,
-            requirement_interpretation=None,
-            base_refs=[
-                "README.md",
-                "KBKT.docx",
-                "pom.xml",
-                "attachment://seed-only",
-            ],
-            loop_result=loop_result,
-            augmented_refs=["src/wiring/B.java"],
-        )
-        assert selected_refs == [
-            "src/wiring/A.java",
-            "attachment://57037192/57037316",
-            "src/wiring/B.java",
-            "README.md",
-            "KBKT.docx",
-            "pom.xml",
-            "attachment://seed-only",
-        ]
-class TestTraceBackedAnchorVerification:
-    """Test trace-backed verification quality for LLM-provided anchors."""
-    @staticmethod
-    def _make_result(anchor: EvidenceAnchor, trace_steps: list[dict[str, object]]) -> RowEvaluationResult:
-        return RowEvaluationResult(
-            row_id="CL-001:row_0",
-            check_id="CL-001",
-            status=RowStatus.PARTIAL,
-            score=50.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=50.0, coverage=1.0, confidence=1.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[anchor],
-            provenance=RowProvenance(
-                row_llm_mode="selective",
-                template_hash="template",
-                evidence_hash="evidence",
-            ),
-            retrieval_trace={
-                "tool_first_loop": {
-                    "trace_steps": trace_steps,
-                }
-            },
-        )
-    @staticmethod
-    def _make_result_from_anchors(
-        anchors: list[EvidenceAnchor], trace_steps: list[dict[str, object]]
-    ) -> RowEvaluationResult:
-        return RowEvaluationResult(
-            row_id="CL-001:row_0",
-            check_id="CL-001",
-            status=RowStatus.PARTIAL,
-            score=50.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=50.0, coverage=1.0, confidence=1.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=anchors,
-            provenance=RowProvenance(
-                row_llm_mode="selective",
-                template_hash="template",
-                evidence_hash="evidence",
-            ),
-            retrieval_trace={
-                "tool_first_loop": {
-                    "trace_steps": trace_steps,
-                }
-            },
-        )
-    def test_trace_verification_accepts_ellipsis_excerpt(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_file",
-            ref_value="src/service.py",
-            excerpt="import org.springframework.stereotype.Service; ... @Service @RequiredArgsConstructor",
-            verified=False,
-            verification_reason="llm_anchor_provided",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/service.py",
-                    "content": (
-                        "import org.springframework.stereotype.Service;\n"
-                        "@Service\n"
-                        "@RequiredArgsConstructor\n"
-                        "public class ServiceImpl {}"
-                    ),
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is True
-        assert updated_anchor.verification_reason == "excerpt_verified_in_context"
-    def test_trace_verification_marks_mismatch_when_excerpt_missing(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_file",
-            ref_value="src/service.py",
-            excerpt="no matching excerpt ... should fail",
-            verified=False,
-            verification_reason="llm_anchor_provided",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/service.py",
-                    "content": (
-                        "import org.springframework.stereotype.Service;\n"
-                        "@Service\n"
-                        "@RequiredArgsConstructor\n"
-                        "public class ServiceImpl {}"
-                    ),
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is False
-        assert updated_anchor.verification_reason == "excerpt_mismatch_or_unverified"
-    def test_trace_verification_recovers_doc_excerpt_from_read_chunk_when_llm_excerpt_mismatches(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="doc",
-            ref_value="chunk:abc123",
-            excerpt="This stale summary does not match the actual chunk content.",
-            verified=False,
-            verification_reason="llm_anchor_provided",
-        )
-        trace_steps = [
-            {
-                "tool": "read_doc_chunk",
-                "output": {
-                    "ref": "chunk:abc123",
-                    "content": (
-                        "System sizing baseline for DB Maria includes config tables, settings, "
-                        "frequency, and feature flags for CEP services."
-                    ),
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is True
-        assert updated_anchor.verification_reason == "excerpt_verified_in_context"
-        assert "DB Maria" in updated_anchor.excerpt
-    def test_trace_verification_accepts_formatting_only_whitespace_differences_for_code(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_file",
-            ref_value="src/config/DatabaseConfig.java",
-            excerpt=(
-                '@EnableJpaRepositories(entityManagerFactoryRef = "mainEntityManagerFactory", '
-                'transactionManagerRef = "mainTransactionManager", basePackages = '
-                '{"vn.com.viettel.vds.campaign.repository.campaign"})'
-            ),
-            verified=False,
-            verification_reason="llm_anchor_provided",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/config/DatabaseConfig.java",
-                    "content": (
-                        '@EnableJpaRepositories(entityManagerFactoryRef = "mainEntityManagerFactory",\n'
-                        '    transactionManagerRef = "mainTransactionManager", basePackages = {\n'
-                        '    "vn.com.viettel.vds.campaign.repository.campaign"})\n'
-                        '@EnableJpaAuditing(auditorAwareRef = "auditorProvider")\n'
-                    ),
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is True
-        assert updated_anchor.verification_reason == "excerpt_verified_in_context"
-    def test_trace_verification_decodes_escaped_newlines_for_code_excerpt_match(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_file",
-            ref_value="src/config/DatabaseConfig.java",
-            excerpt='@Configuration\\n@ConditionalOnProperty(value = "app.datasource.default.enable", havingValue = "true")',
-            verified=False,
-            verification_reason="llm_anchor_provided",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/config/DatabaseConfig.java",
-                    "content": (
-                        "@Configuration\n"
-                        '@ConditionalOnProperty(value = "app.datasource.default.enable", havingValue = "true")\n'
-                    ),
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is True
-        assert updated_anchor.verification_reason == "excerpt_verified_in_context"
-    def test_trace_verification_auto_verifies_llm_anchor_without_excerpt(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_file",
-            ref_value="src/service.py",
-            excerpt=None,
-            verified=False,
-            verification_reason="llm_anchor_provided",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/service.py",
-                    "content": "class ServiceImpl {}",
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is True
-        assert updated_anchor.verification_reason == "excerpt_verified_in_context"
-        assert updated_anchor.excerpt == "class ServiceImpl {}"
-    def test_trace_verification_auto_verifies_no_excerpt_reason_when_read_content_is_unique(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_file",
-            ref_value="src/service.py",
-            excerpt=None,
-            verified=False,
-            verification_reason="no_excerpt_to_verify",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/service.py",
-                    "content": "class ServiceImpl {}",
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is True
-        assert updated_anchor.verification_reason == "excerpt_verified_in_context"
-        assert updated_anchor.excerpt == "class ServiceImpl {}"
-    def test_trace_verification_backfills_code_line_range_from_read_trace(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_path",
-            ref_value="src/service.py",
-            excerpt=None,
-            verified=False,
-            verification_reason="no_excerpt_to_verify",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/service.py",
-                    "start_line": 12,
-                    "end_line": 34,
-                    "content": "class ServiceImpl {}",
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.line_range == (12, 34)
-        assert updated_anchor.verified is True
-        assert updated_anchor.verification_reason == "excerpt_verified_in_context"
-    def test_trace_verification_keeps_fallback_anchor_unverified_without_excerpt(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_file",
-            ref_value="src/service.py",
-            excerpt=None,
-            verified=False,
-            verification_reason="fallback_ref_inherited",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/service.py",
-                    "content": "class ServiceImpl {}",
-                },
-            }
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is False
-        assert updated_anchor.verification_reason == "no_excerpt_to_verify"
-    def test_trace_verification_keeps_anchor_unverified_when_read_matches_are_ambiguous(self) -> None:
-        anchor = EvidenceAnchor(
-            ref_type="code_path",
-            ref_value="src/auth/AuthenticationFilter.java",
-            excerpt=None,
-            verified=False,
-            verification_reason="llm_anchor_provided",
-        )
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "a/src/auth/AuthenticationFilter.java",
-                    "content": "@Component\npublic class AuthenticationFilter {}",
-                },
-            },
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "b/src/auth/AuthenticationFilter.java",
-                    "content": "@Service\npublic class AuthenticationFilter {}",
-                },
-            },
-        ]
-        result = self._make_result(anchor, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        updated_anchor = verified.evidence_anchors[0]
-        assert updated_anchor.verified is False
-        assert updated_anchor.verification_reason == "llm_anchor_provided"
-        assert updated_anchor.excerpt is None
-    def test_trace_verification_prunes_low_signal_unverified_anchors_when_verified_anchor_exists(self) -> None:
-        anchors = [
-            EvidenceAnchor(
-                ref_type="code_path",
-                ref_value="src/main.py",
-                excerpt="security.enabled=true",
-                verified=False,
-                verification_reason="llm_anchor_provided",
-            ),
-            EvidenceAnchor(
-                ref_type="code_path",
-                ref_value="src/fallback.py",
-                excerpt=None,
-                verified=False,
-                verification_reason="fallback_ref_inherited",
-            ),
-            EvidenceAnchor(
-                ref_type="code_path",
-                ref_value="src/other.py",
-                excerpt=None,
-                verified=False,
-                verification_reason="llm_anchor_provided",
-            ),
-        ]
-        trace_steps = [
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/main.py",
-                    "content": "security.enabled=true\n",
-                },
-            },
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "src/fallback.py",
-                    "content": "class Fallback {}\n",
-                },
-            },
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "a/src/other.py",
-                    "content": "different content A\n",
-                },
-            },
-            {
-                "tool": "read_code_file",
-                "output": {
-                    "path": "b/src/other.py",
-                    "content": "different content B\n",
-                },
-            },
-        ]
-        result = self._make_result_from_anchors(anchors, trace_steps)
-        verified = RowEvaluator._apply_trace_backed_anchor_verification(result)
-        assert [anchor.ref_value for anchor in verified.evidence_anchors] == ["src/main.py"]
-        assert verified.retrieval_trace.get("anchor_pruning", {}).get("dropped_low_signal_anchor_count") == 2
-        assert verified.retrieval_trace.get("anchor_pruning", {}).get("policy") == "prefer_verified_decisive_anchors"
-class TestPhase103GroundingHardGates:
-    """Phase 103: strict anchor identity + code-grounding hard gate behavior."""
-    def test_anchor_identity_contract_reports_invalid_cited_refs(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-004:row_3",
-            check_id="CL-004",
-            status=RowStatus.PARTIAL,
-            score=60.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=60.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/a.py",
-                    verified=False,
-                    verification_reason="llm_anchor_provided",
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/unlisted.py",
-                    verified=False,
-                    verification_reason="llm_anchor_provided",
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="selective", template_hash="t", evidence_hash="e"),
-            retrieval_trace={"tool_first_loop": {"evidence_refs": ["src/a.py", "docs/runbook.md"]}},
-        )
-        contract = RowEvaluator._build_anchor_identity_contract(result)
-        assert "src/a.py" in contract["allowed_anchor_ids"]
-        assert "src/unlisted.py" in contract["invalid_cited_anchor_ids"]
-        assert contract["allowed_count"] == 2
-        assert contract["cited_count"] == 2
-        assert contract["coverage_ratio"] == 1.0
-        assert contract["invalid_count"] == 1
-        assert contract["invalid_reason_code"] == "anchor_id_not_allowed"
-    def test_anchor_identity_contract_accepts_final_cited_refs_not_in_exploratory_set(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-017:row_16",
-            check_id="CL-017",
-            status=RowStatus.FAIL,
-            score=15.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=15.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/config/AppConfig.java",
-                    verified=False,
-                    verification_reason="llm_anchor_provided",
-                ),
-                EvidenceAnchor(
-                    ref_type="doc",
-                    ref_value="attachment://25123075/146146420",
-                    verified=False,
-                    verification_reason="llm_anchor_provided",
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="selective", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "tool_first_loop": {"evidence_refs": ["src/config/AuditorAwareImpl.java", "chunk:abdc897104a4f463"]},
-                "evidence_ref_sets": {
-                    "final_cited_refs": [
-                        "src/config/AppConfig.java",
-                        "attachment://25123075/146146420",
-                    ]
-                },
-            },
-        )
-        contract = RowEvaluator._build_anchor_identity_contract(result)
-        assert "src/config/AppConfig.java" in contract["allowed_anchor_ids"]
-        assert "attachment://25123075/146146420" in contract["allowed_anchor_ids"]
-        assert contract["invalid_cited_anchor_ids"] == []
-        assert contract["invalid_count"] == 0
-        assert contract["invalid_reason_code"] is False
-    def test_anchor_identity_contract_ignores_internal_analysis_refs(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-031:row_30",
-            check_id="CL-031",
-            status=RowStatus.ERROR,
-            score=0.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=0.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="confluence_chunk",
-                    ref_value="chunk:6eb413f3e9764797",
-                    verified=False,
-                    verification_reason="fallback_ref_inherited",
-                ),
-                EvidenceAnchor(
-                    ref_type="confluence_url",
-                    ref_value="http://confluence.digital.vn/display/TTCN24/PAR+Project+Audit",
-                    verified=False,
-                    verification_reason="fallback_ref_inherited",
-                ),
-                EvidenceAnchor(
-                    ref_type="doc_path",
-                    ref_value="references/row-analysis.md",
-                    verified=False,
-                    verification_reason="fallback_ref_inherited",
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="selective", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "tool_first_loop": {
-                    "evidence_refs": [
-                        "src/main/java/vn/com/viettel/vds/merchantbilling/core/domain/dto/UserInfoDto.java",
-                        "chunk:6eb413f3e9764797",
-                    ]
-                },
-                "evidence_ref_sets": {
-                    "final_cited_refs": [
-                        "chunk:6eb413f3e9764797",
-                        "http://confluence.digital.vn/display/TTCN24/PAR+Project+Audit",
-                        "references/row-analysis.md",
-                    ]
-                },
-            },
-        )
-        contract = RowEvaluator._build_anchor_identity_contract(result)
-        assert contract["cited_anchor_ids"] == ["chunk:6eb413f3e9764797"]
-        assert contract["invalid_cited_anchor_ids"] == []
-        assert contract["invalid_count"] == 0
-        assert contract["invalid_reason_code"] is False
-    def test_code_grounding_hard_gate_forces_error_when_verified_ratio_zero(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-004:row_3",
-            check_id="CL-004",
-            status=RowStatus.PARTIAL,
-            score=55.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=55.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/config/database.py",
-                    excerpt=None,
-                    verified=False,
-                    verification_reason="llm_anchor_provided",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="selective", template_hash="t", evidence_hash="e"),
-            retrieval_trace={},
-        )
-        coverage = RowEvaluator._build_row_verification_coverage(result.evidence_anchors)
-        gated = RowEvaluator._apply_code_grounding_hard_gate(
-            result=result,
-            verification_coverage=coverage,
-            requirement_interpretation={"code_target": True},
-        )
-        assert gated.status == RowStatus.ERROR
-        assert gated.error_message == "analysis_breakdown_excerpt_discipline"
-        assert gated.score == 0.0
-        assert isinstance(gated.retrieval_trace, dict)
-        assert gated.retrieval_trace.get("code_grounding_hard_gate", {}).get("applied") is True
-        assert gated.retrieval_trace.get("tooling_error_event", {}).get("event") == "pre_output_grounding_guard"
-        assert (
-            gated.retrieval_trace.get("tooling_error_event", {}).get("reason_code")
-            == "analysis_breakdown_excerpt_discipline"
-        )
-        assert (
-            gated.retrieval_trace.get("tooling_error_event", {}).get("trigger")
-            == "excerpt_discipline_zero_verified_code"
-        )
-        assert gated.retrieval_trace.get("code_grounding_hard_gate", {}).get("missing_excerpt_count") == 1
-        assert gated.retrieval_trace.get("code_grounding_hard_gate", {}).get("mismatched_excerpt_count") == 0
-    def test_code_grounding_hard_gate_uses_excerpt_discipline_for_mismatch_breakdown(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-004:row_3",
-            check_id="CL-004",
-            status=RowStatus.PARTIAL,
-            score=55.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=55.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/config/database.py",
-                    excerpt="DataSource bean",
-                    verified=False,
-                    verification_reason="excerpt_mismatch_or_unverified",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="selective", template_hash="t", evidence_hash="e"),
-            retrieval_trace={},
-        )
-        coverage = RowEvaluator._build_row_verification_coverage(result.evidence_anchors)
-        gated = RowEvaluator._apply_code_grounding_hard_gate(
-            result=result,
-            verification_coverage=coverage,
-            requirement_interpretation={"code_target": True},
-        )
-        assert gated.status == RowStatus.ERROR
-        assert gated.error_message == "analysis_breakdown_excerpt_discipline"
-        assert gated.retrieval_trace.get("code_grounding_hard_gate", {}).get("mismatched_excerpt_count") == 1
-    def test_code_grounding_hard_gate_downgrades_code_only_excerpt_collapse_to_fail(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-032:row_31",
-            check_id="CL-032",
-            status=RowStatus.PARTIAL,
-            score=55.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=55.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/config/app.py",
-                    excerpt="mismatch excerpt",
-                    verified=False,
-                    verification_reason="excerpt_mismatch_or_unverified",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="selective", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "route_mode": "code_only",
-                "tool_first_loop": {
-                    "route_mode": "code_only",
-                    "code_search_count": 1,
-                    "code_read_count": 1,
-                    "docs_search_count": 0,
-                    "docs_read_count": 0,
-                },
-            },
-        )
-        coverage = RowEvaluator._build_row_verification_coverage(result.evidence_anchors)
-        gated = RowEvaluator._apply_code_grounding_hard_gate(
-            result=result,
-            verification_coverage=coverage,
-            requirement_interpretation={"code_target": True},
-        )
-        assert gated.status == RowStatus.FAIL
-        assert gated.score == 9.0
-        assert gated.error_message is None
-        assert gated.retry_metadata["reason_code"] == "analysis_breakdown_excerpt_discipline"
-        assert gated.retrieval_trace.get("code_only_quality_floor", {}).get("applied") is True
-        assert gated.retrieval_trace.get("code_grounding_hard_gate", {}).get("downgraded_to_fail") is True
-    def test_code_grounding_hard_gate_downgrades_code_required_excerpt_collapse_to_fail(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-004:row_3",
-            check_id="CL-004",
-            status=RowStatus.PARTIAL,
-            score=55.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=55.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/com/example/DatabaseConfig.java",
-                    excerpt="mismatch excerpt",
-                    verified=False,
-                    verification_reason="excerpt_mismatch_or_unverified",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="selective", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "route_mode": "hybrid_code_first",
-                "tool_first_loop": {
-                    "route_mode": "hybrid_code_first",
-                    "code_search_count": 1,
-                    "code_read_count": 1,
-                    "docs_search_count": 1,
-                    "docs_read_count": 1,
-                },
-                "requirement_analysis": {
-                    "analysis_mode": "hybrid_balanced",
-                    "required_anchor_modalities": ["code", "docs"],
-                },
-            },
-        )
-        coverage = RowEvaluator._build_row_verification_coverage(result.evidence_anchors)
-        gated = RowEvaluator._apply_code_grounding_hard_gate(
-            result=result,
-            verification_coverage=coverage,
-            requirement_interpretation={"code_target": True},
-        )
-        assert gated.status == RowStatus.FAIL
-        assert gated.score == 9.0
-        assert gated.error_message is None
-        assert gated.retry_metadata["reason_code"] == "analysis_breakdown_excerpt_discipline"
-        assert gated.retrieval_trace.get("code_required_quality_floor", {}).get("applied") is True
-        assert gated.retrieval_trace.get("code_grounding_hard_gate", {}).get("downgraded_to_fail") is True
-    def test_excerpt_discipline_provisional_filter_marks_missing_excerpt_without_reason(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-004:row_9",
-            check_id="CL-004",
-            status=RowStatus.PARTIAL,
-            score=55.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=55.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/config/database.py",
-                    excerpt=None,
-                    verified=True,
-                    verification_reason="llm_anchor_provided",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="selective", template_hash="t", evidence_hash="e"),
-            retrieval_trace={},
-        )
-        filtered, diagnostics = RowEvaluator._apply_excerpt_discipline_provisional_filter(result)
-        assert diagnostics["applied"] is True
-        assert diagnostics["provisional_anchor_count"] == 1
-        assert "src/config/database.py" in diagnostics["provisional_anchor_ids"]
-        anchor = filtered.evidence_anchors[0]
-        assert anchor.verified is False
-        assert anchor.verification_reason == "provisional_no_excerpt"
-        assert anchor.excerpt_unavailable_reason == "excerpt_not_provided"
-    def test_recover_anchor_excerpt_from_same_file_multi_read_content(self) -> None:
-        excerpt = RowEvaluator._recover_anchor_excerpt_from_read_content(
-            anchor_ref="src/main/java/com/example/DatabaseConfig.java",
-            ref_type="code_path",
-            matched_contents=[
-                "package com.example;\n@Configuration\npublic class DatabaseConfig {\n",
-                "@Bean\npublic DataSource campaignDataSource() {\n    return null;\n}\n",
-            ],
-        )
-        assert excerpt
-        assert "DatabaseConfig" in excerpt or "campaignDataSource" in excerpt
-    def test_citation_precision_prefers_substantive_code_anchor_for_code_only(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-032:row_31",
-            check_id="CL-032",
-            status=RowStatus.FAIL,
-            score=9.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=9.0),
-            reason="Đọc code nhưng anchor cuối cùng chỉ còn file generic.",
-            finding="Evidence yếu.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="pom.xml",
-                    excerpt="<artifactId>campaign-noti-outbound-service</artifactId>",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/vn/com/viettel/vds/campaign/inapp/config/database/ElasticsearchClientConfig.java",
-                    excerpt="public class ElasticsearchClientConfig {",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 220),
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "route_mode": "code_only",
-                "tool_first_loop": {
-                    "route_mode": "code_only",
-                    "code_search_count": 1,
-                    "code_read_count": 3,
-                    "docs_search_count": 0,
-                    "docs_read_count": 0,
-                },
-                "evidence_ref_sets": {
-                    "final_cited_refs": ["pom.xml"],
-                    "final_cited_ref_count": 1,
-                    "citation_retention_rate": 0.3333,
-                },
-            },
-        )
-        filtered = RowEvaluator._apply_citation_precision_filter(result)
-        refs = [anchor.ref_value for anchor in filtered.evidence_anchors]
-        assert "src/main/java/vn/com/viettel/vds/campaign/inapp/config/database/ElasticsearchClientConfig.java" in refs
-        assert "pom.xml" not in refs
-        assert "prefer_substantive_code_anchor_for_code_only" in str(
-            filtered.retrieval_trace.get("evidence_ref_sets", {}).get("citation_precision_filter_reason", "")
-        ) or "replace_generic_code_anchor_for_code_only" in str(
-            filtered.retrieval_trace.get("evidence_ref_sets", {}).get("citation_precision_filter_reason", "")
-        )
-    def test_citation_precision_drops_docs_modality_for_code_only_when_substantive_code_exists(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-033:row_32",
-            check_id="CL-033",
-            status=RowStatus.FAIL,
-            score=12.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=12.0),
-            reason="Reasoning-linked citation filter reintroduced a docs-style anchor.",
-            finding="Code-first evidence degraded to generic refs.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="pom.xml",
-                    excerpt="<artifactId>campaign-noti-outbound-service</artifactId>",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-                EvidenceAnchor(
-                    ref_type="url",
-                    ref_value="https://confluence.example.local/pages/viewpage.action?pageId=123",
-                    excerpt="Deployment note",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/vn/com/viettel/vds/campaign/inapp/config/database/ElasticsearchClientConfig.java",
-                    excerpt="public class ElasticsearchClientConfig {",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 220),
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "route_mode": "code_only",
-                "tool_first_loop": {
-                    "route_mode": "code_only",
-                    "code_search_count": 1,
-                    "code_read_count": 2,
-                    "docs_search_count": 0,
-                    "docs_read_count": 0,
-                },
-                "evidence_ref_sets": {
-                    "citation_precision_filter_reason": "reasoning_linked_refs|preserve_docs_modality",
-                    "final_cited_refs": [
-                        "pom.xml",
-                        "https://confluence.example.local/pages/viewpage.action?pageId=123",
-                    ],
-                    "final_cited_ref_count": 2,
-                    "citation_retention_rate": 0.6667,
-                },
-            },
-        )
-        filtered = RowEvaluator._apply_citation_precision_filter(result)
-        refs = [anchor.ref_value for anchor in filtered.evidence_anchors]
-        assert "src/main/java/vn/com/viettel/vds/campaign/inapp/config/database/ElasticsearchClientConfig.java" in refs
-        assert "https://confluence.example.local/pages/viewpage.action?pageId=123" not in refs
-        assert "drop_docs_modality_for_code_only_with_substantive_code" in str(
-            filtered.retrieval_trace.get("evidence_ref_sets", {}).get("citation_precision_filter_reason", "")
-        )
-    def test_citation_precision_replaces_generic_code_anchor_after_reasoning_link_for_code_only(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-036:row_35",
-            check_id="CL-036",
-            status=RowStatus.FAIL,
-            score=9.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=9.0),
-            reason="The final cited file was pom.xml.",
-            finding="pom.xml was cited instead of ElasticsearchClientConfig.java.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="pom.xml",
-                    excerpt="<artifactId>campaign-noti-outbound-service</artifactId>",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/vn/com/viettel/vds/campaign/inapp/config/database/ElasticsearchClientConfig.java",
-                    excerpt="public class ElasticsearchClientConfig {",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 220),
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/vn/com/viettel/vds/campaign/inapp/dto/request/FCMDataDTO.java",
-                    excerpt="public class FCMDataDTO {",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 120),
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "route_mode": "code_only",
-                "tool_first_loop": {
-                    "route_mode": "code_only",
-                    "code_search_count": 1,
-                    "code_read_count": 3,
-                    "docs_search_count": 0,
-                    "docs_read_count": 0,
-                },
-                "evidence_ref_sets": {
-                    "citation_precision_filter_reason": "reasoning_linked_refs",
-                },
-            },
-        )
-        filtered = RowEvaluator._apply_citation_precision_filter(result)
-        refs = [anchor.ref_value for anchor in filtered.evidence_anchors]
-        assert "pom.xml" not in refs
-        assert (
-            "src/main/java/vn/com/viettel/vds/campaign/inapp/config/database/ElasticsearchClientConfig.java" in refs
-            or "src/main/java/vn/com/viettel/vds/campaign/inapp/dto/request/FCMDataDTO.java" in refs
-        )
-        assert "replace_generic_code_anchor_for_code_only" in str(
-            filtered.retrieval_trace.get("evidence_ref_sets", {}).get("citation_precision_filter_reason", "")
-        )
-    def test_citation_precision_replaces_generic_code_anchor_for_mixed_requirement_analysis(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-003:row_2",
-            check_id="CL-003",
-            status=RowStatus.FAIL,
-            score=19.5,
-            score_breakdown=ScoreBreakdown.compute(raw_score=25.0),
-            reason="The final cited file was pom.xml even though AppConfig.java was read.",
-            finding="Dependency-graph evidence degraded to generic metadata.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="pom.xml",
-                    excerpt="<artifactId>arch-grpc-test-client</artifactId>",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/vn/com/viettel/vds/campaign/config/application/AppConfig.java",
-                    excerpt="public class AppConfig {",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 220),
-                ),
-                EvidenceAnchor(
-                    ref_type="url",
-                    ref_value="https://confluence.example.local/pages/viewpage.action?pageId=25123685",
-                    excerpt="Architecture note",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "tool_first_loop": {
-                    "route_mode": "hybrid_code_first",
-                    "code_search_count": 1,
-                    "code_read_count": 1,
-                    "docs_search_count": 1,
-                    "docs_read_count": 1,
-                    "requirement_analysis": {
-                        "analysis_mode": "hybrid_balanced",
-                        "analysis_reason_codes": [
-                            "evidence_target_docs",
-                            "evidence_target_code",
-                            "substantive_source_candidate_present",
-                            "mixed_modalities_required",
-                        ],
-                        "required_anchor_modalities": ["code", "docs"],
-                        "generic_anchor_exemption": False,
-                        "minimum_substantive_code_anchors": 1,
-                        "finalization_policy": "mixed_modalities_preserve_strong_code",
-                    },
-                },
-                "evidence_ref_sets": {
-                    "citation_precision_filter_reason": "reasoning_linked_refs|preserve_docs_modality",
-                    "final_cited_refs": [
-                        "pom.xml",
-                        "https://confluence.example.local/pages/viewpage.action?pageId=25123685",
-                    ],
-                    "final_cited_ref_count": 2,
-                    "citation_retention_rate": 1.0,
-                },
-            },
-        )
-        filtered = RowEvaluator._apply_citation_precision_filter(result)
-        refs = [anchor.ref_value for anchor in filtered.evidence_anchors]
-        assert refs[0] == "src/main/java/vn/com/viettel/vds/campaign/config/application/AppConfig.java"
-        assert "pom.xml" not in refs
-        reasons = str(filtered.retrieval_trace.get("evidence_ref_sets", {}).get("citation_precision_filter_reason", ""))
-        assert "replace_generic_code_anchor_for_requirement_analysis" in reasons
-        assert "prioritize_substantive_code_anchor_for_requirement_analysis" in reasons
-    def test_finalize_result_downgrades_code_only_anchor_allowlist_violation_to_fail(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-049:row_48",
-            check_id="CL-049",
-            status=RowStatus.PARTIAL,
-            score=61.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=61.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="README.md",
-                    verified=False,
-                    verification_reason="no_excerpt_to_verify",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "route_mode": "code_only",
-                "tool_first_loop": {
-                    "route_mode": "code_only",
-                    "code_search_count": 1,
-                    "code_read_count": 1,
-                    "docs_search_count": 0,
-                    "docs_read_count": 0,
-                    "evidence_refs": ["src/security/AuthConfig.java"],
-                },
-            },
-        )
-        finalized = evaluator._finalize_result(base_result)
-        assert finalized.status == RowStatus.FAIL
-        assert finalized.score == 9.0
-        assert finalized.error_message is None
-        assert finalized.retry_metadata["reason_code"] == "anchor_allowlist_violation"
-        assert finalized.retrieval_trace.get("code_only_quality_floor", {}).get("applied") is True
-    def test_finalize_result_downgrades_code_required_allowlist_violation_to_fail(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-017:row_16",
-            check_id="CL-017",
-            status=RowStatus.PARTIAL,
-            score=63.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=63.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="attachment://123/456",
-                    excerpt="public class PushDataToKafkaUtils {",
-                    verified=True,
-                    verification_reason="llm_anchor_provided",
-                    line_range=(1, 220),
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "hybrid_balanced",
-                    "analysis_reason_codes": ["decoupling_config_split_row"],
-                    "required_anchor_modalities": ["code", "docs"],
-                    "generic_anchor_exemption": False,
-                    "minimum_substantive_code_anchors": 1,
-                    "finalization_policy": "mixed_modalities_preserve_strong_code",
-                },
-                "analysis_mode": "hybrid_balanced",
-                "required_anchor_modalities": ["code", "docs"],
-                "finalization_policy": "mixed_modalities_preserve_strong_code",
-                "route_mode": "hybrid_code_first",
-                "tool_first_loop": {
-                    "route_mode": "hybrid_code_first",
-                    "code_search_count": 2,
-                    "code_read_count": 2,
-                    "docs_search_count": 1,
-                    "docs_read_count": 1,
-                    "evidence_refs": [
-                        "src/main/java/vn/com/viettel/vds/campaign/config/kafka/PushDataToKafkaUtils.java",
-                        "src/main/java/vn/com/viettel/vds/campaign/config/database/CampaignDBConfig.java",
-                    ],
-                },
-                "evidence_ref_sets": {
-                    "exploratory_refs": [
-                        "src/main/java/vn/com/viettel/vds/campaign/config/kafka/PushDataToKafkaUtils.java",
-                        "src/main/java/vn/com/viettel/vds/campaign/config/database/CampaignDBConfig.java",
-                    ]
-                },
-            },
-        )
-        finalized = evaluator._finalize_result(base_result)
-        assert finalized.status == RowStatus.FAIL
-        assert finalized.score == 9.0
-        assert finalized.error_message is None
-        assert finalized.retry_metadata["reason_code"] == "anchor_allowlist_violation"
-        assert finalized.retrieval_trace.get("code_required_quality_floor", {}).get("applied") is True
-    def test_finalize_result_clears_stale_allowlist_fail_when_verified_anchors_become_compliant(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-001:row_0",
-            check_id="CL-001",
-            status=RowStatus.FAIL,
-            score=9.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=9.0),
-            reason="Đã đọc bằng chứng code nhưng các anchor cuối cùng không khớp allowlist của nguồn đã đọc.",
-            finding="Cần trích dẫn lại trực tiếp từ file code đã đọc; anchor cuối cùng hiện không nằm trong exploratory allowlist nên bị loại bỏ.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/com/example/config/DatabaseConfig.java",
-                    excerpt="public class DatabaseConfig {}",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 120),
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retry_metadata={
-                "stage": "anchor_allowlist_filter",
-                "failure_stage": "synthesis",
-                "reason_code": "anchor_allowlist_violation",
-                "downgraded_from_error_to_fail": True,
-            },
-            retrieval_trace={
-                "code_only_quality_floor": {
-                    "applied": True,
-                    "reason_code": "anchor_allowlist_violation",
-                    "downgraded_from_error_to_fail": True,
-                    "score_floor": 9.0,
-                }
-            },
-        )
-        cleared = evaluator._clear_stale_anchor_allowlist_failure(
-            result=base_result,
-            anchor_identity_contract={
-                "invalid_count": 0,
-                "allowed_anchor_ids": ["src/main/java/com/example/config/DatabaseConfig.java"],
-                "cited_anchor_ids": ["src/main/java/com/example/config/DatabaseConfig.java"],
-            },
-        )
-        assert cleared.status == RowStatus.PARTIAL
-        assert cleared.score == 60.0
-        assert cleared.error_message is None
-        assert cleared.retry_metadata is None
-        assert cleared.retrieval_trace.get("anchor_allowlist_reconciled", {}).get("applied") is True
-        assert "Allowlist đã được reconcile" in str(cleared.finding)
-    def test_finalize_result_marks_authoritative_skill_as_final_result_source(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-007:row_6",
-            check_id="CL-007",
-            status=RowStatus.FAIL,
-            score=82.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=82.0),
-            reason="Skill result found decisive migration evidence.",
-            finding="Migration assets are present and authoritative.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/resources/db/changelog.xml",
-                    excerpt="<databaseChangeLog>",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "strict_single_skill_only": True,
-                },
-                "tool_first_loop": {
-                    "skill_execution_effective_call_count": 1,
-                    "trace_steps": [
-                        {
-                            "tool": "read_skill_resource",
-                            "output": {"success": True, "name": "migration-script-detection-skill"},
-                        },
-                        {
-                            "tool": "record_evidence_refs",
-                            "output": {
-                                "refs": [
-                                    {
-                                        "ref_type": "code_path",
-                                        "ref_value": "src/main/resources/db/changelog.xml",
-                                        "verified": True,
-                                    }
-                                ]
-                            },
-                        },
-                    ],
-                },
-                "authoritative_skill_finalization": {
-                    "applied": True,
-                    "recorded_ref_count": 1,
-                    "strict_single_skill_only": True,
-                },
-                "final_result_source": "authoritative_skill",
-                "failure_diagnostics": {
-                    "failure_stage": "synthesis",
-                    "reason_code": "provider_transient_error",
-                },
-            },
-            retry_metadata={
-                "failure_stage": "synthesis",
-                "reason_code": "provider_transient_error",
-            },
-        )
-        finalized = evaluator._finalize_result(base_result)
-        assert finalized.retrieval_trace.get("final_result_source") == "authoritative_skill"
-        assert finalized.retrieval_trace.get("authoritative_skill_finalization", {}).get("applied") is True
-        assert finalized.retry_metadata is None
-    def test_clear_stale_allowlist_failure_accepts_authoritative_skill_finalization_without_verified_refs(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-003:row_2",
-            check_id="CL-003",
-            status=RowStatus.FAIL,
-            score=9.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=9.0),
-            reason="stale allowlist failure",
-            finding="weak generic finding",
-            evidence_anchors=[],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retry_metadata={
-                "failure_stage": "synthesis",
-                "stage": "anchor_allowlist_filter",
-                "reason_code": "anchor_allowlist_violation",
-            },
-            retrieval_trace={
-                "final_result_source": "authoritative_skill",
-            },
-        )
-        cleared = evaluator._clear_stale_anchor_allowlist_failure(
-            result=base_result,
-            anchor_identity_contract={"invalid_count": 0},
-        )
-        assert cleared.retrieval_trace.get("anchor_allowlist_reconciled", {}).get("applied") is True
-    def test_finalize_result_downgrades_docs_primary_allowlist_violation_to_partial(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-052:row_51",
-            check_id="CL-052",
-            status=RowStatus.FAIL,
-            score=65.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=65.0),
-            reason="test",
-            finding="test",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/com/example/Dto.java",
-                    excerpt="public class Dto {}",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "docs_first",
-                    "required_anchor_modalities": ["docs"],
-                    "finalization_policy": "docs_primary",
-                },
-                "required_anchor_modalities": ["docs"],
-                "finalization_policy": "docs_primary",
-                "evidence_ref_sets": {
-                    "exploratory_refs": ["https://confluence.local/pages/viewpage.action?pageId=1"],
-                    "final_cited_refs": ["src/main/java/com/example/Dto.java"],
-                },
-            },
-        )
-        finalized = evaluator._finalize_result(base_result)
-        assert finalized.status == RowStatus.PARTIAL
-        assert finalized.score > 0.0
-        assert finalized.error_message is None
-        assert finalized.retry_metadata["reason_code"] in {
-            "anchor_allowlist_violation",
-            "evidence_shortfall_before_synthesis",
-        }
-    def test_finalize_result_normalizes_line_qualified_code_anchor_ids_for_allowlist(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-004:row_3",
-            check_id="CL-004",
-            status=RowStatus.PARTIAL,
-            score=63.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=63.0),
-            reason="Current evidence is still incomplete.",
-            finding="Need mixed-modality corroboration.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="code:src/main/java/com/example/config/DatabaseConfig.java:18",
-                    excerpt='@EnableJpaRepositories(basePackages = {"com.example.repository"})',
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 220),
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="code:src/main/resources/application.properties:49",
-                    excerpt="app.datasource.default.url=jdbc:mariadb://host:3306/campaign",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 220),
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "hybrid_balanced",
-                    "analysis_reason_codes": ["mixed_modalities_required", "corroborating_code_anchors_required"],
-                    "required_anchor_modalities": ["code", "docs"],
-                    "generic_anchor_exemption": False,
-                    "minimum_substantive_code_anchors": 2,
-                    "finalization_policy": "mixed_modalities_preserve_strong_code",
-                },
-                "analysis_mode": "hybrid_balanced",
-                "required_anchor_modalities": ["code", "docs"],
-                "finalization_policy": "mixed_modalities_preserve_strong_code",
-                "tool_first_loop": {
-                    "route_mode": "hybrid_code_first",
-                    "code_search_count": 2,
-                    "code_read_count": 3,
-                    "docs_search_count": 1,
-                    "docs_read_count": 0,
-                    "evidence_refs": [
-                        "src/main/java/com/example/config/DatabaseConfig.java",
-                        "src/main/resources/application.properties",
-                    ],
-                },
-                "evidence_ref_sets": {
-                    "exploratory_refs": [
-                        "src/main/java/com/example/config/DatabaseConfig.java",
-                        "src/main/resources/application.properties",
-                    ],
-                    "final_cited_refs": [
-                        "code:src/main/java/com/example/config/DatabaseConfig.java:18",
-                        "code:src/main/resources/application.properties:49",
-                    ],
-                },
-            },
-        )
-        finalized = evaluator._finalize_result(base_result)
-        assert finalized.retry_metadata.get("reason_code") != "anchor_allowlist_violation"
-        assert finalized.retrieval_trace.get("anchor_identity_contract", {}).get("invalid_count") == 0
-        assert [anchor.ref_value for anchor in finalized.evidence_anchors] == [
-            "code:src/main/java/com/example/config/DatabaseConfig.java:18",
-            "code:src/main/resources/application.properties:49",
-        ]
-    def test_finalize_result_clears_stale_allowlist_failure_when_contract_is_clean(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-017:row_16",
-            check_id="CL-017",
-            status=RowStatus.FAIL,
-            score=9.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=9.0),
-            reason="Đã đọc bằng chứng code nhưng các anchor cuối cùng không khớp allowlist của nguồn đã đọc, nên không thể giữ kết luận ở mức pass.",
-            finding="Cần trích dẫn lại trực tiếp từ file code đã đọc; anchor cuối cùng hiện không nằm trong exploratory allowlist nên bị loại bỏ.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/com/example/config/RestConfiguration.java",
-                    excerpt="public class RestConfiguration {}",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 20),
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retry_metadata={
-                "failure_stage": "synthesis",
-                "stage": "anchor_allowlist_filter",
-                "reason_code": "anchor_allowlist_violation",
-                "evidence_retrieved": True,
-                "downgraded_from_error_to_fail": True,
-            },
-            retrieval_trace={
-                "route_mode": "code_only",
-                "tool_first_loop": {
-                    "route_mode": "code_only",
-                    "code_search_count": 1,
-                    "code_read_count": 1,
-                    "docs_search_count": 0,
-                    "docs_read_count": 0,
-                    "evidence_refs": ["src/main/java/com/example/config/RestConfiguration.java"],
-                },
-                "code_only_quality_floor": {
-                    "applied": True,
-                    "reason_code": "anchor_allowlist_violation",
-                    "downgraded_from_error_to_fail": True,
-                    "score_floor": 9.0,
-                },
-                "evidence_ref_sets": {
-                    "exploratory_refs": ["src/main/java/com/example/config/RestConfiguration.java"],
-                    "final_cited_refs": ["src/main/java/com/example/config/RestConfiguration.java"],
-                },
-                "failure_diagnostics": {
-                    "failure_stage": "synthesis",
-                    "reason_code": "provider_transient_error",
-                    "stage": "prompt_backend_invoke",
-                    "evidence_retrieved": True,
-                    "retry_count": 2,
-                    "error_type": "RowEvaluationBackendError",
-                },
-            },
-        )
-        finalized = evaluator._finalize_result(base_result)
-        assert finalized.retrieval_trace.get("anchor_identity_contract", {}).get("invalid_count") == 0
-        assert finalized.retrieval_trace.get("anchor_allowlist_reconciled", {}).get("applied") is True
-        assert finalized.error_message is None
-    def test_finalize_result_reclassifies_requirement_analysis_error_with_substantive_code_anchor(
-        self,
-        sample_audit_template,
-    ) -> None:
-        evaluator = RowEvaluator(config=RowEvaluatorConfig(mode=RowLLMMode.PER_ROW), template=sample_audit_template)
-        base_result = RowEvaluationResult(
-            row_id="CL-002:row_1",
-            check_id="CL-002",
-            status=RowStatus.ERROR,
-            score=30.6,
-            score_breakdown=ScoreBreakdown.compute(raw_score=30.6),
-            reason="Need stronger source proof for cross-module calls.",
-            finding="Current source proof is incomplete but not a runtime evaluation error.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/vn/com/viettel/vds/campaign/config/application/AppConfig.java",
-                    excerpt="public class AppConfig {",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=(1, 220),
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="pom.xml",
-                    excerpt="<artifactId>campaign-rws-distribution-service</artifactId>",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "tool_first_loop": {
-                    "route_mode": "hybrid_code_first",
-                    "code_search_count": 1,
-                    "code_read_count": 2,
-                    "docs_search_count": 1,
-                    "docs_read_count": 0,
-                    "requirement_analysis": {
-                        "analysis_mode": "hybrid_balanced",
-                        "analysis_reason_codes": [
-                            "dual_evidence_targets",
-                            "substantive_source_candidate_present",
-                            "mixed_modalities_required",
-                        ],
-                        "required_anchor_modalities": ["code", "docs"],
-                        "generic_anchor_exemption": False,
-                        "minimum_substantive_code_anchors": 1,
-                        "finalization_policy": "mixed_modalities_preserve_strong_code",
-                    },
-                },
-            },
-        )
-        finalized = evaluator._finalize_result(base_result)
-        assert finalized.status == RowStatus.FAIL
-        assert finalized.score == 30.6
-        assert finalized.error_message is None
-        assert finalized.retry_metadata["reason_code"] == "requirement_analysis_substantive_code_evidence_preserved"
-        assert finalized.retrieval_trace.get("requirement_analysis_error_reclassified", {}).get("applied") is True
-class TestPhase104AnchorRepairDiagnostics:
-    """Phase 104: anchor-repair telemetry and dominant reason projection."""
-    def test_anchor_repair_diagnostics_project_success_and_dominant_reason(self) -> None:
-        diagnostics = RowEvaluator._build_anchor_repair_diagnostics(
-            pre_repair_coverage={
-                "row_ref_count": 2,
-                "verified_true_count": 0,
-                "verified_false_count": 2,
-                "verification_reason_breakdown": {"llm_anchor_provided": 2},
-            },
-            post_repair_coverage={
-                "row_ref_count": 2,
-                "verified_true_count": 1,
-                "verified_false_count": 1,
-                "verification_reason_breakdown": {"no_excerpt_to_verify": 1},
-            },
-        )
-        assert diagnostics == {
-            "anchor_repair_attempted": True,
-            "anchor_repair_success_count": 1,
-            "anchor_repair_unresolved_count": 1,
-            "dominant_unverified_reason": "no_excerpt_to_verify",
-        }
-    def test_anchor_repair_diagnostics_handles_no_anchors(self) -> None:
-        diagnostics = RowEvaluator._build_anchor_repair_diagnostics(
-            pre_repair_coverage={"row_ref_count": 0, "verified_true_count": 0, "verified_false_count": 0},
-            post_repair_coverage={"row_ref_count": 0, "verified_true_count": 0, "verified_false_count": 0},
-        )
-        assert diagnostics == {
-            "anchor_repair_attempted": False,
-            "anchor_repair_success_count": 0,
-            "anchor_repair_unresolved_count": 0,
-            "dominant_unverified_reason": False,
-        }
-class TestPhase105RetryTelemetryParity:
-    def test_resolve_effective_retry_telemetry_counts_corrective_retry_attempt(self) -> None:
-        retry_count, retry_metadata = RowEvaluator._resolve_effective_retry_telemetry(
-            retry_count=0,
-            retry_metadata={},
-            retrieval_trace={
-                "retry_attempted": True,
-                "retry_outcome": "retry_rejected_no_improvement",
-                "retry_ratio": 0.5,
-            },
-        )
-        assert retry_count == 1
-        assert retry_metadata["corrective_retry_attempted"] is True
-        assert retry_metadata["corrective_retry_count"] == 1
-        assert retry_metadata["corrective_retry_outcome"] == "retry_rejected_no_improvement"
-    def test_resolve_effective_retry_telemetry_preserves_higher_backend_retry_count(self) -> None:
-        retry_count, retry_metadata = RowEvaluator._resolve_effective_retry_telemetry(
-            retry_count=2,
-            retry_metadata={"existing": "value"},
-            retrieval_trace={"retry_attempted": True},
-        )
-        assert retry_count == 2
-        assert retry_metadata["existing"] == "value"
-        assert retry_metadata["corrective_retry_attempted"] is True
-        assert retry_metadata["corrective_retry_count"] == 2
-class TestPhase111RequirementAnalysisTraceDiagnostics:
-    def test_promote_requirement_analysis_diagnostics_from_code_ranking_trace(self) -> None:
-        trace = {
-            "tool_first_loop": {
-                "code_ranking_diagnostics": {
-                    "requirement_analysis": {
-                        "analysis_mode": "code_first",
-                        "analysis_reason_codes": ["code_first_requirement"],
-                        "required_anchor_modalities": ["code"],
-                        "generic_anchor_exemption": False,
-                        "minimum_substantive_code_anchors": 1,
-                        "finalization_policy": "code_substantive_required",
-                    }
-                }
-            }
-        }
-        promoted = RowEvaluator._promote_requirement_analysis_diagnostics(trace)
-        assert isinstance(promoted, dict)
-        assert promoted["analysis_mode"] == "code_first"
-        assert promoted["analysis_reason_codes"] == ["code_first_requirement"]
-        assert promoted["required_anchor_modalities"] == ["code"]
-        assert promoted["generic_anchor_exemption"] is False
-        assert promoted["minimum_substantive_code_anchors"] == 1
-        assert promoted["finalization_policy"] == "code_substantive_required"
-        assert promoted["tool_first_loop"]["analysis_mode"] == "code_first"
-        assert promoted["tool_first_loop"]["finalization_policy"] == "code_substantive_required"
-    def test_promote_requirement_analysis_prefers_richer_mixed_modality_payload(self) -> None:
-        trace = {
-            "requirement_analysis": {
-                "analysis_mode": "code_first",
-                "analysis_reason_codes": ["code_first_requirement"],
-                "required_anchor_modalities": ["code"],
-                "generic_anchor_exemption": False,
-                "minimum_substantive_code_anchors": 0,
-                "finalization_policy": "code_substantive_required",
-            },
-            "tool_first_loop": {
-                "code_ranking_diagnostics": {
-                    "requirement_analysis": {
-                        "analysis_mode": "hybrid_balanced",
-                        "analysis_reason_codes": ["mixed_modalities_required", "docs_marker_signal"],
-                        "required_anchor_modalities": ["code", "docs"],
-                        "generic_anchor_exemption": False,
-                        "minimum_substantive_code_anchors": 1,
-                        "finalization_policy": "mixed_modalities_preserve_strong_code",
-                    }
-                }
-            },
-        }
-        promoted = RowEvaluator._promote_requirement_analysis_diagnostics(trace)
-        assert isinstance(promoted, dict)
-        assert promoted["analysis_mode"] == "hybrid_balanced"
-        assert promoted["analysis_reason_codes"] == ["mixed_modalities_required", "docs_marker_signal"]
-        assert promoted["required_anchor_modalities"] == ["code", "docs"]
-        assert promoted["minimum_substantive_code_anchors"] == 1
-        assert promoted["finalization_policy"] == "mixed_modalities_preserve_strong_code"
-    def test_promote_requirement_analysis_from_inline_diagnostics(self) -> None:
-        trace = {
-            "analysis_mode": "code_first",
-            "analysis_reason_codes": ["code_first_requirement", "substantive_source_candidate_present"],
-            "required_anchor_modalities": ["code"],
-            "generic_anchor_exemption": False,
-            "minimum_substantive_code_anchors": 1,
-            "finalization_policy": "code_substantive_required",
-            "tool_first_loop": {},
-        }
-        promoted = RowEvaluator._promote_requirement_analysis_diagnostics(trace)
-        assert isinstance(promoted, dict)
-        assert promoted["analysis_mode"] == "code_first"
-        assert promoted["analysis_reason_codes"] == ["code_first_requirement", "substantive_source_candidate_present"]
-        assert promoted["required_anchor_modalities"] == ["code"]
-        assert promoted["generic_anchor_exemption"] is False
-        assert promoted["minimum_substantive_code_anchors"] == 1
-        assert promoted["finalization_policy"] == "code_substantive_required"
-        assert promoted["requirement_analysis"]["analysis_mode"] == "code_first"
-        assert promoted["tool_first_loop"]["analysis_mode"] == "code_first"
-    def test_promote_requirement_analysis_preserves_existing_tool_first_loop_payload(self) -> None:
-        trace = {
-            "tool_first_loop": {
-                "route_mode": "code_only",
-                "code_read_count": 2,
-                "record_evidence_refs_contract_met": True,
-                "trace_steps": [{"tool": "read_code_file", "ref": "src/main/java/Foo.java"}],
-            },
-            "analysis_mode": "code_first",
-            "analysis_reason_codes": ["code_first_requirement"],
-            "required_anchor_modalities": ["code"],
-            "generic_anchor_exemption": False,
-            "minimum_substantive_code_anchors": 1,
-            "finalization_policy": "code_substantive_required",
-        }
-        promoted = RowEvaluator._promote_requirement_analysis_diagnostics(trace)
-        assert isinstance(promoted, dict)
-        loop_trace = promoted["tool_first_loop"]
-        assert loop_trace["route_mode"] == "code_only"
-        assert loop_trace["code_read_count"] == 2
-        assert loop_trace["record_evidence_refs_contract_met"] is True
-        assert loop_trace["trace_steps"] == [{"tool": "read_code_file", "ref": "src/main/java/Foo.java"}]
-        assert loop_trace["analysis_mode"] == "code_first"
-        assert loop_trace["finalization_policy"] == "code_substantive_required"
-        assert loop_trace["requirement_analysis"]["analysis_mode"] == "code_first"
-    def test_promote_requirement_analysis_from_route_reason_and_interpretation_fallback(self) -> None:
-        trace = {
-            "requirement_interpretation": {
-                "intent": "Validate hexagonal decoupling through implementation evidence.",
-                "control_objective": "Architecture",
-                "evidence_targets": ["code"],
-                "code_targets": ["src/main/java/com/acme/domain/OrderService.java"],
-                "docs_markers": [],
-                "code_markers": ["decoupling"],
-                "process_markers": [],
-            },
-            "tool_first_loop": {
-                "route_mode": "hybrid_code_first",
-                "route_reason": (
-                    "targets=code; control_objective=architecture; risk_focus=quality;"
-                    " confidence=0.82; analysis_mode=code_first; finalization_policy=code_substantive_required"
-                ),
-                "code_search_count": 2,
-                "code_read_count": 1,
-                "docs_intent_utilization": {
-                    "docs_target": False,
-                    "code_target": True,
-                },
-            },
-        }
-        promoted = RowEvaluator._promote_requirement_analysis_diagnostics(trace)
-        assert isinstance(promoted, dict)
-        assert promoted["analysis_mode"] == "code_first"
-        assert promoted["finalization_policy"] == "code_substantive_required"
-        assert promoted["required_anchor_modalities"] == ["code"]
-        assert promoted["minimum_substantive_code_anchors"] == 1
-        assert promoted["generic_anchor_exemption"] is False
-        assert "route_reason_analysis_mode_hint" in promoted["analysis_reason_codes"]
-        assert promoted["requirement_analysis"]["analysis_mode"] == "code_first"
-        assert promoted["tool_first_loop"]["analysis_mode"] == "code_first"
-    def test_citation_precision_records_reason_when_only_generic_code_anchor_remains(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-051:row_50",
-            check_id="CL-051",
-            status=RowStatus.FAIL,
-            score=18.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=18.0),
-            reason="Only generic metadata anchor found in repo.",
-            finding="Substantive source path is unavailable for this requirement.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="pom.xml",
-                    excerpt="<artifactId>campaign-service</artifactId>",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "code_first",
-                    "analysis_reason_codes": ["code_first_requirement"],
-                    "required_anchor_modalities": ["code"],
-                    "generic_anchor_exemption": False,
-                    "minimum_substantive_code_anchors": 1,
-                    "finalization_policy": "code_substantive_required",
-                },
-                "tool_first_loop": {
-                    "route_mode": "hybrid_code_first",
-                    "code_search_count": 1,
-                    "code_read_count": 1,
-                    "docs_search_count": 0,
-                    "docs_read_count": 0,
-                },
-            },
-        )
-        finalized = RowEvaluator._apply_citation_precision_filter(result)
-        # Phase 147: When only a generic code anchor remains (no substantive ones) and
-        # minimum_substantive_code_anchors > 0, the grounding precheck triggers early
-        # and records the shortfall via grounding_precheck instead of evidence_ref_sets.
-        grounding_precheck = finalized.retrieval_trace.get("grounding_precheck", {})
-        assert isinstance(grounding_precheck, dict)
-        assert bool(grounding_precheck.get("applied"))
-        assert bool(grounding_precheck.get("unmet_substantive_code_anchors"))
-        assert finalized.status == RowStatus.PARTIAL
-    def test_citation_precision_preserves_corroborating_code_anchors_for_requirement_analysis(self) -> None:
-        result = RowEvaluationResult(
-            row_id="CL-003:row_2",
-            check_id="CL-003",
-            status=RowStatus.FAIL,
-            score=21.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=21.0),
-            reason="Need corroborating code anchors for decoupling proof.",
-            finding="Single code anchor is insufficient when multiple substantive candidates were explored.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/com/example/config/AppConfig.java",
-                    file_path="src/main/java/com/example/config/AppConfig.java",
-                    excerpt="class AppConfig {}",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=[1, 220],
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/com/example/service/OrderService.java",
-                    file_path="src/main/java/com/example/service/OrderService.java",
-                    excerpt="class OrderService {}",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    line_range=[1, 220],
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="pom.xml",
-                    excerpt="<artifactId>campaign-service</artifactId>",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "hybrid_balanced",
-                    "analysis_reason_codes": ["mixed_modalities_required", "corroborating_code_anchors_required"],
-                    "required_anchor_modalities": ["code", "docs"],
-                    "generic_anchor_exemption": False,
-                    "minimum_substantive_code_anchors": 2,
-                    "finalization_policy": "mixed_modalities_preserve_strong_code",
-                },
-                "tool_first_loop": {
-                    "route_mode": "hybrid_code_first",
-                    "code_read_count": 2,
-                    "docs_read_count": 1,
-                },
-                "evidence_ref_sets": {
-                    "exploratory_refs": [
-                        "src/main/java/com/example/config/AppConfig.java",
-                        "src/main/java/com/example/service/OrderService.java",
-                        "pom.xml",
-                    ]
-                },
-            },
-        )
-        finalized = RowEvaluator._apply_citation_precision_filter(result)
-        refs = list(finalized.evidence_anchors or [])
-        ref_values = [getattr(anchor, "ref_value", "") for anchor in refs]
-        assert "src/main/java/com/example/config/AppConfig.java" in ref_values
-        assert "src/main/java/com/example/service/OrderService.java" in ref_values
-        assert "pom.xml" not in ref_values
-        ref_sets = finalized.retrieval_trace.get("evidence_ref_sets", {})
-        assert isinstance(ref_sets, dict)
-        reasons = list(ref_sets.get("filter_reasons") or [])
-        assert "replace_generic_code_anchor_for_requirement_analysis" in reasons
-        assert "prioritize_substantive_code_anchor_for_requirement_analysis" in reasons
-# ---------------------------------------------------------------------------
-# TSK-172 — tool-first loop evidence bypasses grounding precheck
-# ---------------------------------------------------------------------------
-class TestToolFirstLoopGroundingPrecheck:
-    """TSK-172: When all evidence_anchors are supplemental fallback refs (added after LLM
-    synthesis failed) and tool_first_loop collected real code evidence_refs, the
-    substantive_code sub-check in the grounding precheck must be waived.
-    This mirrors the smoke6 CL-001 failure: provider_http_error during synthesis left
-    evidence_anchors=[] initially; _append_trace_grounded_supplemental_anchors added a
-    Dockerfile anchor with verification_reason=VERIFICATION_REASON_FALLBACK_REF_INHERITED.
-    Without the waiver, the grounding precheck fires on the Dockerfile and sets score=0%.
-    """
-    def test_all_supplemental_anchors_with_tool_first_evidence_bypasses_substantive_code_check(
-        self,
-    ) -> None:
-        """Smoke6 scenario: ALL evidence_anchors are supplemental fallback refs
-        (Dockerfile, verification_reason=fallback_ref_inherited) AND tool_first_loop
-        collected real Java source evidence_refs.  The substantive_code sub-check must
-        be waived — only the modality check still runs."""
-        from vds_audit_orchestrator.models.checklist import (
-            VERIFICATION_REASON_FALLBACK_REF_INHERITED,
-            RowStatus,
-        )
-        result = RowEvaluationResult(
-            row_id="CL-001:row_1",
-            check_id="CL-001",
-            status=RowStatus.PASS,
-            score=0.45,
-            score_breakdown=ScoreBreakdown.compute(raw_score=0.45),
-            reason="Partial hexagonal compliance detected.",
-            finding="Partial hexagonal compliance: domain layer present but adapters mixed.",
-            # Dockerfile supplemental anchor added by _append_trace_grounded_supplemental_anchors
-            # after LLM synthesis failed with provider_http_error.
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="Dockerfile",
-                    verified=False,
-                    verification_reason=VERIFICATION_REASON_FALLBACK_REF_INHERITED,
-                    excerpt_unavailable_reason="fallback_ref_inherited_no_excerpt",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "code_first",
-                    "analysis_reason_codes": ["corroborating_code_anchors_required", "code_first_requirement"],
-                    "required_anchor_modalities": ["code"],
-                    "generic_anchor_exemption": False,
-                    "minimum_substantive_code_anchors": 2,
-                    "finalization_policy": "code_substantive_required",
-                },
-                "tool_first_loop": {
-                    "termination_reason": "stagnation",
-                    "steps_executed": 15,
-                    "code_read_count": 10,
-                    "skill_first_loaded_at_step": 2,
-                    "breadth_enforcement_applied": True,
-                    "evidence_refs": [
-                        "src/main/java/vn/com/example/core/domain/entity/ConfigPolicyEntity.java",
-                        "src/main/java/vn/com/example/core/service/impl/TmsTransactionServiceImpl.java",
-                        "src/main/java/vn/com/example/infrastructure/store/repository/TxRepository.java",
-                    ],
-                },
-            },
-        )
-        finalized = RowEvaluator._apply_citation_precision_filter(result)
-        # substantive_code sub-check must be waived: precheck must NOT fire for this
-        assert "substantive_code_shortfall" not in (finalized.finding or ""), (
-            "substantive_code precheck must be waived when all anchors are supplemental "
-            "fallback refs and tool_first_loop has real evidence_refs. "
-            f"Got finding={finalized.finding!r}"
-        )
-    def test_empty_tool_first_loop_evidence_refs_still_applies_precheck(self) -> None:
-        """When tool_first_loop.evidence_refs is empty/absent, the grounding precheck
-        still fires for rows that require substantive code anchors."""
-        from vds_audit_orchestrator.models.checklist import RowStatus
-        result = RowEvaluationResult(
-            row_id="CL-001:row_1",
-            check_id="CL-001",
-            status=RowStatus.PASS,
-            score=0.45,
-            score_breakdown=ScoreBreakdown.compute(raw_score=0.45),
-            reason="Synthetic reason.",
-            finding="Synthetic finding.",
-            # Non-empty but non-substantive: Dockerfile is excluded from substantive code
-            # anchors so the precheck CAN fire (avoids the early-return on empty list).
-            evidence_anchors=[EvidenceAnchor(ref_type="code_path", ref_value="Dockerfile")],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "code_first",
-                    "analysis_reason_codes": ["corroborating_code_anchors_required"],
-                    "required_anchor_modalities": ["code"],
-                    "generic_anchor_exemption": False,
-                    "minimum_substantive_code_anchors": 2,
-                    "finalization_policy": "code_substantive_required",
-                },
-                # tool_first_loop present but evidence_refs is empty — no bypass
-                "tool_first_loop": {
-                    "termination_reason": "max_steps",
-                    "steps_executed": 5,
-                    "evidence_refs": [],
-                },
-            },
-        )
-        finalized = RowEvaluator._apply_citation_precision_filter(result)
-        # Precheck should fire because evidence_refs is empty (no bypass) and the only
-        # anchor (Dockerfile) is excluded from substantive code anchors.
-        assert finalized.status == RowStatus.PARTIAL, (
-            "Grounding precheck must still apply when tool_first_loop.evidence_refs is empty. "
-            f"Got status={finalized.status}"
-        )
-        assert "substantive_code_shortfall" in (finalized.finding or ""), (
-            "substantive_code_shortfall must appear in finding when no tool-first evidence collected"
-        )
-    def test_code_modality_check_waived_when_all_supplemental_and_tool_first_has_refs(self) -> None:
-        """When all anchors are fallback refs but tool_first_loop has real code refs,
-        the code-modality check must be waived — smoke8b regression: unmet_modalities=['code']
-        blocked synthesis even though 8+ Java files were collected by the retrieval loop."""
-        from vds_audit_orchestrator.models.checklist import (
-            VERIFICATION_REASON_FALLBACK_REF_INHERITED,
-            RowStatus,
-        )
-        result = RowEvaluationResult(
-            row_id="CL-001:row_1",
-            check_id="CL-001",
-            status=RowStatus.PASS,
-            score=0.45,
-            score_breakdown=ScoreBreakdown.compute(raw_score=0.45),
-            reason="Partial hexagonal compliance detected.",
-            finding="Partial hexagonal compliance: domain layer present but adapters mixed.",
-            # Only a Dockerfile with fallback_ref_inherited — this makes filtered=[] in the
-            # precheck (supplemental anchors are excluded from filtered), so _select_anchors_for_modality
-            # returns [] for "code" → modality check fires WITHOUT the fix.
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="Dockerfile",
-                    verified=False,
-                    verification_reason=VERIFICATION_REASON_FALLBACK_REF_INHERITED,
-                    excerpt_unavailable_reason="fallback_ref_inherited_no_excerpt",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "code_first",
-                    "analysis_reason_codes": ["corroborating_code_anchors_required"],
-                    "required_anchor_modalities": ["code"],
-                    "generic_anchor_exemption": False,
-                    # minimum=0 isolates just the modality check (substantive check is guarded by >0)
-                    "minimum_substantive_code_anchors": 0,
-                    "finalization_policy": "code_substantive_required",
-                },
-                "tool_first_loop": {
-                    "termination_reason": "stagnation",
-                    "steps_executed": 15,
-                    "code_read_count": 8,
-                    "evidence_refs": [
-                        "src/main/java/vn/com/example/core/domain/entity/ConfigPolicyEntity.java",
-                        "src/main/java/vn/com/example/core/service/impl/TmsTransactionServiceImpl.java",
-                        "src/main/java/vn/com/example/infrastructure/store/repository/TxRepository.java",
-                    ],
-                },
-            },
-        )
-        finalized = RowEvaluator._apply_citation_precision_filter(result)
-        assert finalized.status == RowStatus.PASS, (
-            "Code-modality check must be waived when all anchors are supplemental fallback refs "
-            "and tool_first_loop collected real Java evidence refs. "
-            f"Got status={finalized.status}, finding={finalized.finding!r}"
-        )
-    def test_llm_anchor_provided_with_tool_first_code_refs_waives_both_gates(self) -> None:
-        """Smoke9+smoke10 regression: LLM partially ran and provided anchors with
-        llm_anchor_provided and excerpt_verified_in_context — synthesis didn't complete.
-        tool_first_loop has Java refs. Both grounding precheck gates must be waived.
-        excerpt_verified_in_context is the supplemental pipeline verifying Dockerfile content —
-        it is NOT a sign of successful LLM synthesis and must be in the allowed set."""
-        from vds_audit_orchestrator.models.checklist import (
-            VERIFICATION_REASON_EXCERPT_VERIFIED_IN_CONTEXT,
-            VERIFICATION_REASON_LLM_ANCHOR_PROVIDED,
-            RowStatus,
-        )
-        result = RowEvaluationResult(
-            row_id="CL-001:row_1",
-            check_id="CL-001",
-            status=RowStatus.PASS,
-            score=0.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=0.0),
-            reason="Synthesis incomplete.",
-            finding="Synthesis incomplete: provider partially responded.",
-            # Mix of llm_anchor_provided + excerpt_verified_in_context (smoke10: supplemental
-            # pipeline verified Dockerfile content) — _all_anchors_supplemental=False, but
-            # both reasons indicate synthesis did NOT complete successfully.
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="Dockerfile",
-                    verified=True,
-                    verification_reason=VERIFICATION_REASON_EXCERPT_VERIFIED_IN_CONTEXT,
-                    excerpt="FROM openjdk:17-jre-slim",
-                ),
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="pom.xml",
-                    verified=False,
-                    verification_reason=VERIFICATION_REASON_LLM_ANCHOR_PROVIDED,
-                    excerpt_unavailable_reason="llm_cited_without_excerpt",
-                ),
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "code_first",
-                    "analysis_reason_codes": ["corroborating_code_anchors_required"],
-                    "required_anchor_modalities": ["code"],
-                    "generic_anchor_exemption": False,
-                    "minimum_substantive_code_anchors": 2,
-                    "finalization_policy": "code_substantive_required",
-                },
-                "tool_first_loop": {
-                    "termination_reason": "stagnation",
-                    "steps_executed": 15,
-                    "code_read_count": 8,
-                    "evidence_refs": [
-                        "src/main/java/vn/com/example/core/domain/entity/ConfigPolicyEntity.java",
-                        "src/main/java/vn/com/example/core/service/impl/TmsServiceImpl.java",
-                        "src/main/java/vn/com/example/infrastructure/repository/TxRepository.java",
-                    ],
-                },
-            },
-        )
-        finalized = RowEvaluator._apply_citation_precision_filter(result)
-        assert "substantive_code_shortfall" not in (finalized.finding or ""), (
-            "substantive_code precheck must be waived for llm_anchor_provided anchors + Java evidence_refs. "
-            f"Got finding={finalized.finding!r}"
-        )
-        assert finalized.status != RowStatus.PARTIAL or "evidence_shortfall" not in (finalized.finding or ""), (
-            "Grounding precheck must not fire when llm_anchor_provided + Java evidence_refs collected. "
-            f"Got status={finalized.status}, finding={finalized.finding!r}"
-        )
-    def test_no_waiver_when_no_code_extension_in_tool_first_refs(self) -> None:
-        """Safety check: when tool_first_loop only collected non-code files (.xml, .txt),
-        the waiver must NOT apply — the grounding precheck should still fire."""
-        from vds_audit_orchestrator.models.checklist import (
-            VERIFICATION_REASON_FALLBACK_REF_INHERITED,
-            RowStatus,
-        )
-        result = RowEvaluationResult(
-            row_id="CL-001:row_1",
-            check_id="CL-001",
-            status=RowStatus.PASS,
-            score=0.45,
-            score_breakdown=ScoreBreakdown.compute(raw_score=0.45),
-            reason="Incomplete analysis.",
-            finding="Incomplete: no code anchors verified.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="Dockerfile",
-                    verified=False,
-                    verification_reason=VERIFICATION_REASON_FALLBACK_REF_INHERITED,
-                    excerpt_unavailable_reason="fallback_ref_inherited_no_excerpt",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "requirement_analysis": {
-                    "analysis_mode": "code_first",
-                    "analysis_reason_codes": ["corroborating_code_anchors_required"],
-                    "required_anchor_modalities": ["code"],
-                    "generic_anchor_exemption": False,
-                    "minimum_substantive_code_anchors": 2,
-                    "finalization_policy": "code_substantive_required",
-                },
-                "tool_first_loop": {
-                    "termination_reason": "stagnation",
-                    "steps_executed": 5,
-                    # Only .xml and README — no .java/.py/.ts etc.
-                    "evidence_refs": ["pom.xml", "README.md", "config/application.yml"],
-                },
-            },
-        )
-        finalized = RowEvaluator._apply_citation_precision_filter(result)
-        assert finalized.status == RowStatus.PARTIAL, (
-            "Grounding precheck must still fire when tool_first_loop has no code file extensions. "
-            f"Got status={finalized.status}"
-        )
-        assert "substantive_code_shortfall" in (finalized.finding or ""), (
-            "substantive_code_shortfall must appear when only non-code refs in tool_first_loop. "
-            f"Got finding={finalized.finding!r}"
-        )
-    def test_code_grounding_hard_gate_waived_when_initial_verification_passed(self) -> None:
-        """Phase 172: _apply_code_grounding_hard_gate returns result unchanged (no downgrade)
-        when tool_first_loop collected real Java code evidence AND initial verification_coverage
-        ratio exceeded 0.8. Mirrors smoke11: architecture rows penalized for paraphrased
-        excerpts despite substantive tool-first evidence already being verified."""
-        from vds_audit_orchestrator.models.checklist import RowStatus
-        result = RowEvaluationResult(
-            row_id="CL-001:row_1",
-            check_id="CL-001",
-            status=RowStatus.PASS,
-            score=75.0,
-            score_breakdown=ScoreBreakdown.compute(raw_score=75.0),
-            reason="Port interfaces found; infrastructure separated.",
-            finding="Hexagonal compliance verified via tool-first evidence.",
-            evidence_anchors=[
-                EvidenceAnchor(
-                    ref_type="code_path",
-                    ref_value="src/main/java/vn/com/example/core/domain/PaymentPort.java",
-                    verified=True,
-                    verification_reason="excerpt_verified_in_context",
-                    excerpt="interface PaymentPort",
-                )
-            ],
-            provenance=RowProvenance(row_llm_mode="per-row", template_hash="t", evidence_hash="e"),
-            retrieval_trace={
-                "verification_coverage": {
-                    # Initial verification passed — ratio above 0.8 threshold
-                    "verified_ratio": 0.9,
-                    "has_any_refs": True,
-                    "missing_excerpt_count": 0,
-                    "mismatched_excerpt_count": 0,
-                    "verified_false_count": 0,
-                },
-                "tool_first_loop": {
-                    "termination_reason": "completed",
-                    "steps_executed": 8,
-                    "code_read_count": 4,
-                    "evidence_refs": [
-                        "src/main/java/vn/com/example/core/domain/PaymentPort.java",
-                        "src/main/java/vn/com/example/infrastructure/adapter/PaymentAdapter.java",
-                    ],
-                },
-            },
-        )
-        # Post-finalization coverage: verified_ratio=0.0 so the "> 0.0" early return
-        # does NOT fire, forcing the gate to reach the Phase 172 waiver block.
-        post_coverage: dict = {
-            "has_any_refs": True,
-            "verified_ratio": 0.0,
-            "missing_excerpt_count": 1,
-            "mismatched_excerpt_count": 0,
-            "verified_false_count": 1,
-            "verification_reason_breakdown": {"missing_excerpt": 1},
-        }
-        gated = RowEvaluator._apply_code_grounding_hard_gate(
-            result=result,
-            verification_coverage=post_coverage,
-            requirement_interpretation={"code_target": True},
-        )
-        assert gated.score == 75.0, (
-            "Hard gate must not downgrade score when Phase 172 waiver fires "
-            "(initial_verification_passed=True + Java evidence_refs in tool_first_loop). "
-            f"Got score={gated.score}"
-        )
-        assert gated.status == RowStatus.PASS, (
-            f"Hard gate must not change status when Phase 172 waiver fires. Got status={gated.status}"
-        )