pace-agents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pace/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
pace/audit/__init__.py ADDED
@@ -0,0 +1,373 @@
1
+ """Autoresearch-style audit loop with compressed journal context — STORY-105."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import logging
6
+ import re
7
+ from collections.abc import Callable
8
+ from datetime import UTC, datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from pydantic import BaseModel, ValidationError
13
+
14
+ from pace.exceptions import LLMError
15
+ from pace.finding_id import compute_finding_id
16
+ from pace.index import FunctionRecord
17
+ from pace.rules import Finding, Rule, Severity
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Token estimation (no external tokenizer required for MVP)
23
+ # ---------------------------------------------------------------------------
24
+
25
+ def _estimate_tokens(text: str) -> int:
26
+ return len(text) // 4
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Journal models
31
+ # ---------------------------------------------------------------------------
32
+
33
+ class JournalEntry(BaseModel):
34
+ """One entry in the audit journal (one reviewed function)."""
35
+
36
+ function_name: str
37
+ file_path: str
38
+ finding_count: int
39
+ llm_summary: str
40
+
41
+
42
+ class JournalCompressor:
43
+ """Maintains a running compressed journal of reviewed functions."""
44
+
45
+ def __init__(self) -> None:
46
+ self._entries: list[JournalEntry] = []
47
+ self._condensed_prefix: str = "" # compressed representation of older entries
48
+
49
+ def add_review(
50
+ self,
51
+ function_name: str,
52
+ file_path: str,
53
+ findings: list[Finding],
54
+ llm_summary: str,
55
+ ) -> None:
56
+ """Record a completed function review."""
57
+ self._entries.append(
58
+ JournalEntry(
59
+ function_name=function_name,
60
+ file_path=file_path,
61
+ finding_count=len(findings),
62
+ llm_summary=llm_summary,
63
+ )
64
+ )
65
+
66
+ def get_compressed_summary(self, max_tokens: int) -> str:
67
+ """Return a summary string whose token estimate is <= max_tokens.
68
+
69
+ If the full journal exceeds 80% of max_tokens, older entries are
70
+ condensed into a single paragraph. Instance state (_condensed_prefix
71
+ and _entries) is updated so subsequent calls do not re-emit already-
72
+ condensed entries.
73
+ """
74
+ threshold = int(max_tokens * 0.8)
75
+
76
+ # Build full text; if within budget, return as-is
77
+ full_text = self._build_full_text()
78
+ if _estimate_tokens(full_text) <= threshold:
79
+ return full_text
80
+
81
+ # Condense older entries progressively until we fit.
82
+ # Accumulate condensed text locally first, then commit to instance
83
+ # state once we have a version that fits within the budget.
84
+ local_prefix = self._condensed_prefix
85
+ remaining_entries = list(self._entries)
86
+
87
+ while remaining_entries and _estimate_tokens(
88
+ self._build_text_from_with_prefix(local_prefix, remaining_entries)
89
+ ) > threshold:
90
+ cut = max(1, len(remaining_entries) // 2)
91
+ condensed_entries = remaining_entries[:cut]
92
+ remaining_entries = remaining_entries[cut:]
93
+ condensed_text = self._condense_entries(condensed_entries)
94
+ # Append to local prefix so earlier condensed chunks are preserved
95
+ local_prefix = (
96
+ local_prefix + " " + condensed_text if local_prefix else condensed_text
97
+ )
98
+
99
+ # Commit to instance state: remove condensed entries, update prefix
100
+ self._condensed_prefix = local_prefix
101
+ self._entries = remaining_entries
102
+
103
+ result = self._build_text_from_with_prefix(local_prefix, remaining_entries)
104
+ # Final hard cap: truncate to max_tokens worth of characters
105
+ cap = max_tokens * 4
106
+ if len(result) > cap:
107
+ result = result[:cap]
108
+ return result
109
+
110
+ def entries(self) -> list[JournalEntry]:
111
+ return list(self._entries)
112
+
113
+ # ------------------------------------------------------------------
114
+ # Internal helpers
115
+ # ------------------------------------------------------------------
116
+
117
+ def _build_full_text(self) -> str:
118
+ return self._build_text_from_with_prefix(self._condensed_prefix, self._entries)
119
+
120
+ def _build_text_from(self, entries: list[JournalEntry]) -> str:
121
+ return self._build_text_from_with_prefix(self._condensed_prefix, entries)
122
+
123
+ def _build_text_from_with_prefix(
124
+ self, prefix: str, entries: list[JournalEntry]
125
+ ) -> str:
126
+ parts: list[str] = []
127
+ if prefix:
128
+ parts.append(f"[Earlier reviews condensed] {prefix}")
129
+ for entry in entries:
130
+ finding_note = (
131
+ f"{entry.finding_count} finding(s)" if entry.finding_count else "no findings"
132
+ )
133
+ parts.append(
134
+ f"- {entry.function_name} ({entry.file_path}): {finding_note}. {entry.llm_summary}"
135
+ )
136
+ return "\n".join(parts)
137
+
138
+ @staticmethod
139
+ def _condense_entries(entries: list[JournalEntry]) -> str:
140
+ total_findings = sum(e.finding_count for e in entries)
141
+ names = ", ".join(e.function_name for e in entries)
142
+ return (
143
+ f"{len(entries)} functions reviewed ({names}); "
144
+ f"{total_findings} total finding(s) identified."
145
+ )
146
+
147
+
148
+ # ---------------------------------------------------------------------------
149
+ # LLM response parsing
150
+ # ---------------------------------------------------------------------------
151
+
152
+ class _LLMFinding(BaseModel):
153
+ """Raw shape expected from the LLM JSON block."""
154
+
155
+ rule_id: str
156
+ description: str
157
+ severity: Severity
158
+
159
+
160
+ _JSON_FENCE_RE = re.compile(r"```json\s*(.*?)\s*```", re.DOTALL)
161
+
162
+
163
+ def _parse_findings_from_response(
164
+ response: str,
165
+ function_record: FunctionRecord,
166
+ rules_by_id: dict[str, Rule] | None = None,
167
+ ) -> list[Finding] | None:
168
+ """Extract and validate findings from an LLM response.
169
+
170
+ Returns None if no valid JSON fenced block is present.
171
+ Returns a (possibly empty) list of Finding on success.
172
+ """
173
+ match = _JSON_FENCE_RE.search(response)
174
+ if not match:
175
+ return None
176
+
177
+ raw_json = match.group(1)
178
+ try:
179
+ data = json.loads(raw_json)
180
+ except json.JSONDecodeError as exc:
181
+ logger.warning("JSON parse error in LLM response: %s", exc)
182
+ return None
183
+
184
+ if not isinstance(data, list):
185
+ logger.warning("Expected JSON array in LLM response, got %s", type(data))
186
+ return None
187
+
188
+ findings: list[Finding] = []
189
+ for item in data:
190
+ try:
191
+ llm_finding = _LLMFinding.model_validate(item)
192
+ except ValidationError as exc:
193
+ logger.warning("Invalid finding shape from LLM (skipped): %s", exc)
194
+ continue
195
+ rule = (rules_by_id or {}).get(llm_finding.rule_id)
196
+ if rule is None:
197
+ logger.warning(
198
+ "LLM returned unknown rule_id %r (skipped) — "
199
+ "valid IDs: %s",
200
+ llm_finding.rule_id,
201
+ sorted((rules_by_id or {}).keys()),
202
+ )
203
+ continue
204
+ findings.append(
205
+ Finding(
206
+ finding_id=compute_finding_id(
207
+ llm_finding.rule_id,
208
+ function_record.file_path,
209
+ function_record.source,
210
+ ),
211
+ rule_id=llm_finding.rule_id,
212
+ rule_name=rule.name,
213
+ file_path=function_record.file_path,
214
+ function_name=function_record.name,
215
+ severity=llm_finding.severity,
216
+ description=llm_finding.description,
217
+ content_hash="",
218
+ control_ref=rule.control_ref,
219
+ )
220
+ )
221
+ return findings
222
+
223
+
224
+ # ---------------------------------------------------------------------------
225
+ # Prompt builder
226
+ # ---------------------------------------------------------------------------
227
+
228
+
229
+ def _build_prompt(
230
+ function_record: FunctionRecord,
231
+ journal_summary: str,
232
+ rules: list[Rule],
233
+ ) -> list[dict[str, Any]]:
234
+ rule_names = ", ".join(r.name for r in rules)
235
+ system_content = (
236
+ f"You are a HIPAA compliance auditor. Active rules: {rule_names}."
237
+ if rules
238
+ else "You are a HIPAA compliance auditor."
239
+ )
240
+ prior_context = journal_summary or "(No prior reviews yet.)"
241
+ user_content = (
242
+ "You are a HIPAA compliance auditor reviewing a Python function.\n\n"
243
+ "## Prior Audit Context\n"
244
+ + prior_context
245
+ + "\n\n## Function Under Review\n"
246
+ f"File: {function_record.file_path}\n"
247
+ f"Function: {function_record.name}\n\n"
248
+ "```python\n"
249
+ + function_record.source
250
+ + "\n```\n\n"
251
+ "## Task\n"
252
+ "Review this function for HIPAA compliance violations. Focus on:\n"
253
+ "- PHI/PII appearing in log statements\n"
254
+ "- Hardcoded credentials or API keys\n"
255
+ "- Unredacted sensitive data in API responses or error handlers\n\n"
256
+ "Return your findings as a JSON array in a fenced code block. Each finding must have:\n"
257
+ "- rule_id (string)\n"
258
+ "- description (string, plain English)\n"
259
+ '- severity ("critical" | "high" | "medium" | "low")\n\n'
260
+ "If no violations are found, return an empty array: ```json\n[]\n```\n"
261
+ )
262
+ return [
263
+ {"role": "system", "content": system_content},
264
+ {"role": "user", "content": user_content},
265
+ ]
266
+
267
+
268
+ # ---------------------------------------------------------------------------
269
+ # Audit Loop
270
+ # ---------------------------------------------------------------------------
271
+
272
+ class AuditLoop:
273
+ """Walks every function, calls the LLM, accumulates findings with journal."""
274
+
275
+ def __init__(
276
+ self,
277
+ llm: Any, # LLMClient or stub — duck-typed for testability
278
+ functions: list[FunctionRecord],
279
+ rules: list[Rule],
280
+ pace_dir: Path,
281
+ max_tokens: int = 8192,
282
+ ) -> None:
283
+ self._llm = llm
284
+ self._functions = functions
285
+ self._rules = rules
286
+ self._rules_by_id: dict[str, Rule] = {r.id: r for r in rules}
287
+ self._pace_dir = pace_dir
288
+ self._max_tokens = max_tokens
289
+ self._findings: list[Finding] = []
290
+ self._journal = JournalCompressor()
291
+ self._reviewed: int = 0
292
+
293
+ # ------------------------------------------------------------------
294
+ # Public API
295
+ # ------------------------------------------------------------------
296
+
297
+ def run(
298
+ self,
299
+ on_progress: Callable[[int, int], None] | None = None,
300
+ ) -> list[Finding]:
301
+ """Iterate all functions, call LLM, return accumulated findings.
302
+
303
+ Args:
304
+ on_progress: Optional callback invoked after each function review
305
+ as ``on_progress(current_index, total_count)``.
306
+ """
307
+ self._pace_dir.mkdir(parents=True, exist_ok=True)
308
+ (self._pace_dir / "logs").mkdir(parents=True, exist_ok=True)
309
+
310
+ total = len(self._functions)
311
+ for i, fn_record in enumerate(self._functions, 1):
312
+ self._review_function(fn_record)
313
+ self._reviewed += 1
314
+ self._write_journal()
315
+ if on_progress is not None:
316
+ on_progress(i, total)
317
+
318
+ return list(self._findings)
319
+
320
+ # ------------------------------------------------------------------
321
+ # Internal: single function review
322
+ # ------------------------------------------------------------------
323
+
324
+ def _review_function(self, fn_record: FunctionRecord) -> None:
325
+ journal_budget = int(self._max_tokens * 0.30)
326
+ journal_summary = self._journal.get_compressed_summary(max_tokens=journal_budget)
327
+
328
+ messages = _build_prompt(fn_record, journal_summary, self._rules)
329
+
330
+ try:
331
+ response = self._llm.chat(messages)
332
+ except LLMError as exc:
333
+ logger.error("LLM error reviewing %s: %s", fn_record.name, exc)
334
+ self._log_error(fn_record.name, str(exc))
335
+ self._journal.add_review(
336
+ fn_record.name, fn_record.file_path, [], "review-failed: LLM error"
337
+ )
338
+ return
339
+
340
+ parsed = _parse_findings_from_response(response, fn_record, self._rules_by_id)
341
+
342
+ if parsed is None:
343
+ self._log_error(fn_record.name, "No JSON block in LLM response (response omitted)")
344
+ self._journal.add_review(
345
+ fn_record.name, fn_record.file_path, [], "review-failed: no JSON block"
346
+ )
347
+ return
348
+
349
+ self._findings.extend(parsed)
350
+ summary = f"Reviewed {fn_record.name}; {len(parsed)} finding(s)."
351
+ self._journal.add_review(fn_record.name, fn_record.file_path, parsed, summary)
352
+
353
+ # ------------------------------------------------------------------
354
+ # Internal: persistence
355
+ # ------------------------------------------------------------------
356
+
357
+ def _write_journal(self) -> None:
358
+ """Write `.pace/audit-journal.json` with current state."""
359
+ journal_path = self._pace_dir / "audit-journal.json"
360
+ payload: dict[str, Any] = {
361
+ "reviewed": self._reviewed,
362
+ "findings": [f.model_dump() for f in self._findings],
363
+ "journal_entries": [e.model_dump() for e in self._journal.entries()],
364
+ "last_updated": datetime.now(tz=UTC).isoformat(),
365
+ }
366
+ journal_path.write_text(json.dumps(payload, indent=2))
367
+
368
+ def _log_error(self, function_name: str, message: str) -> None:
369
+ """Append an error entry to `.pace/logs/pass2-errors.log`."""
370
+ log_path = self._pace_dir / "logs" / "pass2-errors.log"
371
+ ts = datetime.now(tz=UTC).isoformat()
372
+ with log_path.open("a") as f:
373
+ f.write(f"[{ts}] {function_name}: {message}\n")
pace/audit_state.py ADDED
@@ -0,0 +1,109 @@
1
+ """Audit state file — STORY-202.
2
+
3
+ Manages `.pace/audit-state.json` which persists findings with their status
4
+ across multiple `pace scan` and `pace fix` invocations.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ from datetime import UTC, datetime
10
+ from enum import StrEnum
11
+ from pathlib import Path
12
+
13
+ from pydantic import BaseModel, Field, ValidationError
14
+
15
+ from pace.rules import Finding
16
+
17
+
18
+ class FindingStatus(StrEnum):
19
+ OPEN = "open"
20
+ FIXED = "fixed"
21
+ ACCEPTED_RISK = "accepted-risk"
22
+ FALSE_POSITIVE = "false-positive"
23
+ BLOCKED = "blocked"
24
+
25
+
26
+ class FixEvidence(BaseModel):
27
+ """Evidence produced by a successful fix loop run for one finding."""
28
+
29
+ branch_name: str
30
+ commit_sha: str
31
+ diff: str # unified diff / patch applied by the Author
32
+ test_output: str
33
+ tests_passed: bool
34
+ evaluator_verdict: str # human-readable outcome from the Evaluator
35
+
36
+
37
+ class AuditState(BaseModel):
38
+ """Schema-validated representation of `.pace/audit-state.json`."""
39
+
40
+ version: str
41
+ project_root: str
42
+ last_scan_at: str # ISO 8601
43
+ profile: list[str]
44
+ findings: list[Finding]
45
+ statuses: dict[str, FindingStatus] # finding_id → status
46
+ fix_evidence: dict[str, FixEvidence] = Field(default_factory=dict) # finding_id → evidence
47
+
48
+
49
+ def load_audit_state(path: Path) -> AuditState | None:
50
+ """Load and validate audit state from *path*.
51
+
52
+ Returns ``None`` if the file does not exist.
53
+ Raises ``ValueError`` if the file exists but fails validation.
54
+ """
55
+ if not path.exists():
56
+ return None
57
+ raw = json.loads(path.read_text())
58
+ try:
59
+ return AuditState.model_validate(raw)
60
+ except ValidationError as exc:
61
+ raise ValueError(f"Invalid audit state at {path}: {exc}") from exc
62
+
63
+
64
+ def save_audit_state(state: AuditState, path: Path) -> None:
65
+ """Write *state* to *path* as indented JSON.
66
+
67
+ Creates parent directories as needed.
68
+ """
69
+ path.parent.mkdir(parents=True, exist_ok=True)
70
+ path.write_text(json.dumps(state.model_dump(), indent=2))
71
+
72
+
73
+ def merge_findings(state: AuditState, new_findings: list[Finding]) -> AuditState:
74
+ """Merge *new_findings* from a fresh scan into *state*.
75
+
76
+ Rules:
77
+ - New finding IDs not in current state are added with status OPEN.
78
+ - Findings already in state with status FIXED, ACCEPTED_RISK, or
79
+ FALSE_POSITIVE keep their existing status (not reset to OPEN).
80
+ - Findings already OPEN stay OPEN.
81
+ - The ``last_scan_at`` timestamp is updated.
82
+ """
83
+ existing_by_id: dict[str, Finding] = {f.finding_id: f for f in state.findings}
84
+ existing_statuses = dict(state.statuses)
85
+
86
+ for finding in new_findings:
87
+ fid = finding.finding_id
88
+ if fid not in existing_by_id:
89
+ existing_by_id[fid] = finding
90
+ existing_statuses[fid] = FindingStatus.OPEN
91
+ # If already present: preserve existing status (do not override).
92
+
93
+ return AuditState(
94
+ version=state.version,
95
+ project_root=state.project_root,
96
+ last_scan_at=datetime.now(tz=UTC).isoformat(),
97
+ profile=state.profile,
98
+ findings=list(existing_by_id.values()),
99
+ statuses=existing_statuses,
100
+ fix_evidence=dict(state.fix_evidence),
101
+ )
102
+
103
+
104
+ def get_open_findings(state: AuditState) -> list[Finding]:
105
+ """Return findings whose status is OPEN."""
106
+ return [
107
+ f for f in state.findings
108
+ if state.statuses.get(f.finding_id) == FindingStatus.OPEN
109
+ ]