PyPI - pace-agents - Versions diffs - 0.1.0__py3-none-any.whl - Mend

pace-agents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

pace/__init__.py +1 -0
pace/audit/__init__.py +373 -0
pace/audit_state.py +109 -0
pace/auth.py +257 -0
pace/cli/__init__.py +0 -0
pace/cli/main.py +989 -0
pace/cli/rich_utils.py +18 -0
pace/codeindex.py +269 -0
pace/config.py +40 -0
pace/diff.py +33 -0
pace/exceptions.py +10 -0
pace/exit_codes.py +55 -0
pace/finding_id.py +74 -0
pace/fix/__init__.py +0 -0
pace/fix/author.py +112 -0
pace/fix/coder.py +169 -0
pace/fix/evaluator.py +213 -0
pace/fix/planner.py +64 -0
pace/index.py +278 -0
pace/llm.py +56 -0
pace/profiles.py +81 -0
pace/report.py +324 -0
pace/rules/__init__.py +151 -0
pace/rulesets.py +395 -0
pace_agents-0.1.0.dist-info/METADATA +266 -0
pace_agents-0.1.0.dist-info/RECORD +29 -0
pace_agents-0.1.0.dist-info/WHEEL +4 -0
pace_agents-0.1.0.dist-info/entry_points.txt +2 -0
pace_agents-0.1.0.dist-info/licenses/LICENSE +21 -0

pace/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0"

pace/audit/__init__.py ADDED Viewed

@@ -0,0 +1,373 @@
+"""Autoresearch-style audit loop with compressed journal context — STORY-105."""
+from __future__ import annotations
+import json
+import logging
+import re
+from collections.abc import Callable
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+from pydantic import BaseModel, ValidationError
+from pace.exceptions import LLMError
+from pace.finding_id import compute_finding_id
+from pace.index import FunctionRecord
+from pace.rules import Finding, Rule, Severity
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Token estimation (no external tokenizer required for MVP)
+# ---------------------------------------------------------------------------
+def _estimate_tokens(text: str) -> int:
+    return len(text) // 4
+# ---------------------------------------------------------------------------
+# Journal models
+# ---------------------------------------------------------------------------
+class JournalEntry(BaseModel):
+    """One entry in the audit journal (one reviewed function)."""
+    function_name: str
+    file_path: str
+    finding_count: int
+    llm_summary: str
+class JournalCompressor:
+    """Maintains a running compressed journal of reviewed functions."""
+    def __init__(self) -> None:
+        self._entries: list[JournalEntry] = []
+        self._condensed_prefix: str = ""  # compressed representation of older entries
+    def add_review(
+        self,
+        function_name: str,
+        file_path: str,
+        findings: list[Finding],
+        llm_summary: str,
+    ) -> None:
+        """Record a completed function review."""
+        self._entries.append(
+            JournalEntry(
+                function_name=function_name,
+                file_path=file_path,
+                finding_count=len(findings),
+                llm_summary=llm_summary,
+            )
+        )
+    def get_compressed_summary(self, max_tokens: int) -> str:
+        """Return a summary string whose token estimate is <= max_tokens.
+        If the full journal exceeds 80% of max_tokens, older entries are
+        condensed into a single paragraph.  Instance state (_condensed_prefix
+        and _entries) is updated so subsequent calls do not re-emit already-
+        condensed entries.
+        """
+        threshold = int(max_tokens * 0.8)
+        # Build full text; if within budget, return as-is
+        full_text = self._build_full_text()
+        if _estimate_tokens(full_text) <= threshold:
+            return full_text
+        # Condense older entries progressively until we fit.
+        # Accumulate condensed text locally first, then commit to instance
+        # state once we have a version that fits within the budget.
+        local_prefix = self._condensed_prefix
+        remaining_entries = list(self._entries)
+        while remaining_entries and _estimate_tokens(
+            self._build_text_from_with_prefix(local_prefix, remaining_entries)
+        ) > threshold:
+            cut = max(1, len(remaining_entries) // 2)
+            condensed_entries = remaining_entries[:cut]
+            remaining_entries = remaining_entries[cut:]
+            condensed_text = self._condense_entries(condensed_entries)
+            # Append to local prefix so earlier condensed chunks are preserved
+            local_prefix = (
+                local_prefix + " " + condensed_text if local_prefix else condensed_text
+            )
+        # Commit to instance state: remove condensed entries, update prefix
+        self._condensed_prefix = local_prefix
+        self._entries = remaining_entries
+        result = self._build_text_from_with_prefix(local_prefix, remaining_entries)
+        # Final hard cap: truncate to max_tokens worth of characters
+        cap = max_tokens * 4
+        if len(result) > cap:
+            result = result[:cap]
+        return result
+    def entries(self) -> list[JournalEntry]:
+        return list(self._entries)
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    def _build_full_text(self) -> str:
+        return self._build_text_from_with_prefix(self._condensed_prefix, self._entries)
+    def _build_text_from(self, entries: list[JournalEntry]) -> str:
+        return self._build_text_from_with_prefix(self._condensed_prefix, entries)
+    def _build_text_from_with_prefix(
+        self, prefix: str, entries: list[JournalEntry]
+    ) -> str:
+        parts: list[str] = []
+        if prefix:
+            parts.append(f"[Earlier reviews condensed] {prefix}")
+        for entry in entries:
+            finding_note = (
+                f"{entry.finding_count} finding(s)" if entry.finding_count else "no findings"
+            )
+            parts.append(
+                f"- {entry.function_name} ({entry.file_path}): {finding_note}. {entry.llm_summary}"
+            )
+        return "\n".join(parts)
+    @staticmethod
+    def _condense_entries(entries: list[JournalEntry]) -> str:
+        total_findings = sum(e.finding_count for e in entries)
+        names = ", ".join(e.function_name for e in entries)
+        return (
+            f"{len(entries)} functions reviewed ({names}); "
+            f"{total_findings} total finding(s) identified."
+        )
+# ---------------------------------------------------------------------------
+# LLM response parsing
+# ---------------------------------------------------------------------------
+class _LLMFinding(BaseModel):
+    """Raw shape expected from the LLM JSON block."""
+    rule_id: str
+    description: str
+    severity: Severity
+_JSON_FENCE_RE = re.compile(r"```json\s*(.*?)\s*```", re.DOTALL)
+def _parse_findings_from_response(
+    response: str,
+    function_record: FunctionRecord,
+    rules_by_id: dict[str, Rule] | None = None,
+) -> list[Finding] | None:
+    """Extract and validate findings from an LLM response.
+    Returns None if no valid JSON fenced block is present.
+    Returns a (possibly empty) list of Finding on success.
+    """
+    match = _JSON_FENCE_RE.search(response)
+    if not match:
+        return None
+    raw_json = match.group(1)
+    try:
+        data = json.loads(raw_json)
+    except json.JSONDecodeError as exc:
+        logger.warning("JSON parse error in LLM response: %s", exc)
+        return None
+    if not isinstance(data, list):
+        logger.warning("Expected JSON array in LLM response, got %s", type(data))
+        return None
+    findings: list[Finding] = []
+    for item in data:
+        try:
+            llm_finding = _LLMFinding.model_validate(item)
+        except ValidationError as exc:
+            logger.warning("Invalid finding shape from LLM (skipped): %s", exc)
+            continue
+        rule = (rules_by_id or {}).get(llm_finding.rule_id)
+        if rule is None:
+            logger.warning(
+                "LLM returned unknown rule_id %r (skipped) — "
+                "valid IDs: %s",
+                llm_finding.rule_id,
+                sorted((rules_by_id or {}).keys()),
+            )
+            continue
+        findings.append(
+            Finding(
+                finding_id=compute_finding_id(
+                    llm_finding.rule_id,
+                    function_record.file_path,
+                    function_record.source,
+                ),
+                rule_id=llm_finding.rule_id,
+                rule_name=rule.name,
+                file_path=function_record.file_path,
+                function_name=function_record.name,
+                severity=llm_finding.severity,
+                description=llm_finding.description,
+                content_hash="",
+                control_ref=rule.control_ref,
+            )
+        )
+    return findings
+# ---------------------------------------------------------------------------
+# Prompt builder
+# ---------------------------------------------------------------------------
+def _build_prompt(
+    function_record: FunctionRecord,
+    journal_summary: str,
+    rules: list[Rule],
+) -> list[dict[str, Any]]:
+    rule_names = ", ".join(r.name for r in rules)
+    system_content = (
+        f"You are a HIPAA compliance auditor. Active rules: {rule_names}."
+        if rules
+        else "You are a HIPAA compliance auditor."
+    )
+    prior_context = journal_summary or "(No prior reviews yet.)"
+    user_content = (
+        "You are a HIPAA compliance auditor reviewing a Python function.\n\n"
+        "## Prior Audit Context\n"
+        + prior_context
+        + "\n\n## Function Under Review\n"
+        f"File: {function_record.file_path}\n"
+        f"Function: {function_record.name}\n\n"
+        "```python\n"
+        + function_record.source
+        + "\n```\n\n"
+        "## Task\n"
+        "Review this function for HIPAA compliance violations. Focus on:\n"
+        "- PHI/PII appearing in log statements\n"
+        "- Hardcoded credentials or API keys\n"
+        "- Unredacted sensitive data in API responses or error handlers\n\n"
+        "Return your findings as a JSON array in a fenced code block. Each finding must have:\n"
+        "- rule_id (string)\n"
+        "- description (string, plain English)\n"
+        '- severity ("critical" | "high" | "medium" | "low")\n\n'
+        "If no violations are found, return an empty array: ```json\n[]\n```\n"
+    )
+    return [
+        {"role": "system", "content": system_content},
+        {"role": "user", "content": user_content},
+    ]
+# ---------------------------------------------------------------------------
+# Audit Loop
+# ---------------------------------------------------------------------------
+class AuditLoop:
+    """Walks every function, calls the LLM, accumulates findings with journal."""
+    def __init__(
+        self,
+        llm: Any,  # LLMClient or stub — duck-typed for testability
+        functions: list[FunctionRecord],
+        rules: list[Rule],
+        pace_dir: Path,
+        max_tokens: int = 8192,
+    ) -> None:
+        self._llm = llm
+        self._functions = functions
+        self._rules = rules
+        self._rules_by_id: dict[str, Rule] = {r.id: r for r in rules}
+        self._pace_dir = pace_dir
+        self._max_tokens = max_tokens
+        self._findings: list[Finding] = []
+        self._journal = JournalCompressor()
+        self._reviewed: int = 0
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def run(
+        self,
+        on_progress: Callable[[int, int], None] | None = None,
+    ) -> list[Finding]:
+        """Iterate all functions, call LLM, return accumulated findings.
+        Args:
+            on_progress: Optional callback invoked after each function review
+                         as ``on_progress(current_index, total_count)``.
+        """
+        self._pace_dir.mkdir(parents=True, exist_ok=True)
+        (self._pace_dir / "logs").mkdir(parents=True, exist_ok=True)
+        total = len(self._functions)
+        for i, fn_record in enumerate(self._functions, 1):
+            self._review_function(fn_record)
+            self._reviewed += 1
+            self._write_journal()
+            if on_progress is not None:
+                on_progress(i, total)
+        return list(self._findings)
+    # ------------------------------------------------------------------
+    # Internal: single function review
+    # ------------------------------------------------------------------
+    def _review_function(self, fn_record: FunctionRecord) -> None:
+        journal_budget = int(self._max_tokens * 0.30)
+        journal_summary = self._journal.get_compressed_summary(max_tokens=journal_budget)
+        messages = _build_prompt(fn_record, journal_summary, self._rules)
+        try:
+            response = self._llm.chat(messages)
+        except LLMError as exc:
+            logger.error("LLM error reviewing %s: %s", fn_record.name, exc)
+            self._log_error(fn_record.name, str(exc))
+            self._journal.add_review(
+                fn_record.name, fn_record.file_path, [], "review-failed: LLM error"
+            )
+            return
+        parsed = _parse_findings_from_response(response, fn_record, self._rules_by_id)
+        if parsed is None:
+            self._log_error(fn_record.name, "No JSON block in LLM response (response omitted)")
+            self._journal.add_review(
+                fn_record.name, fn_record.file_path, [], "review-failed: no JSON block"
+            )
+            return
+        self._findings.extend(parsed)
+        summary = f"Reviewed {fn_record.name}; {len(parsed)} finding(s)."
+        self._journal.add_review(fn_record.name, fn_record.file_path, parsed, summary)
+    # ------------------------------------------------------------------
+    # Internal: persistence
+    # ------------------------------------------------------------------
+    def _write_journal(self) -> None:
+        """Write `.pace/audit-journal.json` with current state."""
+        journal_path = self._pace_dir / "audit-journal.json"
+        payload: dict[str, Any] = {
+            "reviewed": self._reviewed,
+            "findings": [f.model_dump() for f in self._findings],
+            "journal_entries": [e.model_dump() for e in self._journal.entries()],
+            "last_updated": datetime.now(tz=UTC).isoformat(),
+        }
+        journal_path.write_text(json.dumps(payload, indent=2))
+    def _log_error(self, function_name: str, message: str) -> None:
+        """Append an error entry to `.pace/logs/pass2-errors.log`."""
+        log_path = self._pace_dir / "logs" / "pass2-errors.log"
+        ts = datetime.now(tz=UTC).isoformat()
+        with log_path.open("a") as f:
+            f.write(f"[{ts}] {function_name}: {message}\n")

pace/audit_state.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Audit state file — STORY-202.
+Manages `.pace/audit-state.json` which persists findings with their status
+across multiple `pace scan` and `pace fix` invocations.
+"""
+from __future__ import annotations
+import json
+from datetime import UTC, datetime
+from enum import StrEnum
+from pathlib import Path
+from pydantic import BaseModel, Field, ValidationError
+from pace.rules import Finding
+class FindingStatus(StrEnum):
+    OPEN = "open"
+    FIXED = "fixed"
+    ACCEPTED_RISK = "accepted-risk"
+    FALSE_POSITIVE = "false-positive"
+    BLOCKED = "blocked"
+class FixEvidence(BaseModel):
+    """Evidence produced by a successful fix loop run for one finding."""
+    branch_name: str
+    commit_sha: str
+    diff: str        # unified diff / patch applied by the Author
+    test_output: str
+    tests_passed: bool
+    evaluator_verdict: str  # human-readable outcome from the Evaluator
+class AuditState(BaseModel):
+    """Schema-validated representation of `.pace/audit-state.json`."""
+    version: str
+    project_root: str
+    last_scan_at: str  # ISO 8601
+    profile: list[str]
+    findings: list[Finding]
+    statuses: dict[str, FindingStatus]  # finding_id → status
+    fix_evidence: dict[str, FixEvidence] = Field(default_factory=dict)  # finding_id → evidence
+def load_audit_state(path: Path) -> AuditState | None:
+    """Load and validate audit state from *path*.
+    Returns ``None`` if the file does not exist.
+    Raises ``ValueError`` if the file exists but fails validation.
+    """
+    if not path.exists():
+        return None
+    raw = json.loads(path.read_text())
+    try:
+        return AuditState.model_validate(raw)
+    except ValidationError as exc:
+        raise ValueError(f"Invalid audit state at {path}: {exc}") from exc
+def save_audit_state(state: AuditState, path: Path) -> None:
+    """Write *state* to *path* as indented JSON.
+    Creates parent directories as needed.
+    """
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(state.model_dump(), indent=2))
+def merge_findings(state: AuditState, new_findings: list[Finding]) -> AuditState:
+    """Merge *new_findings* from a fresh scan into *state*.
+    Rules:
+    - New finding IDs not in current state are added with status OPEN.
+    - Findings already in state with status FIXED, ACCEPTED_RISK, or
+      FALSE_POSITIVE keep their existing status (not reset to OPEN).
+    - Findings already OPEN stay OPEN.
+    - The ``last_scan_at`` timestamp is updated.
+    """
+    existing_by_id: dict[str, Finding] = {f.finding_id: f for f in state.findings}
+    existing_statuses = dict(state.statuses)
+    for finding in new_findings:
+        fid = finding.finding_id
+        if fid not in existing_by_id:
+            existing_by_id[fid] = finding
+            existing_statuses[fid] = FindingStatus.OPEN
+        # If already present: preserve existing status (do not override).
+    return AuditState(
+        version=state.version,
+        project_root=state.project_root,
+        last_scan_at=datetime.now(tz=UTC).isoformat(),
+        profile=state.profile,
+        findings=list(existing_by_id.values()),
+        statuses=existing_statuses,
+        fix_evidence=dict(state.fix_evidence),
+    )
+def get_open_findings(state: AuditState) -> list[Finding]:
+    """Return findings whose status is OPEN."""
+    return [
+        f for f in state.findings
+        if state.statuses.get(f.finding_id) == FindingStatus.OPEN
+    ]