delimit-cli 4.0.0 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/gateway/ai/cross_model_audit.py +600 -0
- package/gateway/ai/github_scanner.py +622 -0
- package/gateway/ai/handoff_receipts.py +409 -0
- package/gateway/ai/license_core.py +1 -2
- package/gateway/ai/notify.py +8 -8
- package/gateway/ai/reddit_scanner.py +562 -0
- package/gateway/ai/server.py +15 -7
- package/gateway/ai/session_phoenix.py +371 -0
- package/gateway/ai/swarm.py +2 -2
- package/gateway/ai/toolcard_cache.py +327 -0
- package/gateway/core/contract_ledger.py +1 -1
- package/gateway/core/dependency_graph.py +1 -1
- package/gateway/core/dependency_manifest.py +1 -1
- package/gateway/core/event_backbone.py +2 -2
- package/gateway/core/event_schema.py +1 -1
- package/gateway/core/impact_analyzer.py +1 -1
- package/package.json +1 -7
- package/scripts/security-check.sh +6 -50
package/README.md
CHANGED
|
@@ -5,7 +5,6 @@ Stop re-explaining your codebase every session. Memory, tasks, and governance th
|
|
|
5
5
|
[](https://www.npmjs.com/package/delimit-cli)
|
|
6
6
|
[](https://github.com/marketplace/actions/delimit-api-governance)
|
|
7
7
|
[](https://opensource.org/licenses/MIT)
|
|
8
|
-
[](https://glama.ai/mcp/servers/delimit-ai/delimit-mcp-server)
|
|
9
8
|
[](https://github.com/marketplace/actions/delimit-api-governance)
|
|
10
9
|
|
|
11
10
|
<p align="center">
|
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Delimit Cross-Model Audit — Trust through triangulation.
|
|
3
|
+
|
|
4
|
+
Run the same code review through 3 different AI models, each with a different
|
|
5
|
+
review lens (security, correctness, governance). A synthesis step merges their
|
|
6
|
+
findings: agreements become high-confidence, disagreements surface tradeoffs.
|
|
7
|
+
|
|
8
|
+
This is different from `delimit_deliberate` (which debates a question).
|
|
9
|
+
Cross-Model Audit reviews actual code/specs for specific issues.
|
|
10
|
+
|
|
11
|
+
Models are configured via ~/.delimit/models.json or ~/.delimit/secrets/hosted-models.json.
|
|
12
|
+
Uses the same infrastructure as deliberation.py.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import re
|
|
19
|
+
import threading
|
|
20
|
+
import time
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger("delimit.cross_model_audit")
|
|
26
|
+
|
|
27
|
+
AUDIT_DIR = Path.home() / ".delimit" / "audits"
|
|
28
|
+
|
|
29
|
+
# ═══════════════════════════════════════════════════════════════════════
|
|
30
|
+
# Audit Lenses — each model gets a different review focus
|
|
31
|
+
# ═══════════════════════════════════════════════════════════════════════
|
|
32
|
+
|
|
33
|
+
AUDIT_LENSES = {
|
|
34
|
+
"security": (
|
|
35
|
+
"Review for security vulnerabilities: injection, auth bypass, data exposure, "
|
|
36
|
+
"privilege escalation, secret leaks. Focus on exploitable issues."
|
|
37
|
+
),
|
|
38
|
+
"correctness": (
|
|
39
|
+
"Review for logical errors, edge cases, off-by-one, race conditions, "
|
|
40
|
+
"null handling, error propagation. Focus on bugs that cause wrong behavior."
|
|
41
|
+
),
|
|
42
|
+
"governance": (
|
|
43
|
+
"Review for breaking changes, API contract violations, backward compatibility, "
|
|
44
|
+
"schema drift, missing validation. Focus on issues that affect consumers."
|
|
45
|
+
),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Severity levels for structured findings
|
|
49
|
+
SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
|
|
50
|
+
|
|
51
|
+
MODEL_TIMEOUT = 60 # seconds
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _build_lens_prompt(lens_name: str, lens_description: str, target_code: str, target_type: str) -> str:
|
|
55
|
+
"""Build the prompt for a model with its assigned lens."""
|
|
56
|
+
type_label = {
|
|
57
|
+
"file": "source file",
|
|
58
|
+
"diff": "git diff",
|
|
59
|
+
"snippet": "code snippet",
|
|
60
|
+
}.get(target_type, "code")
|
|
61
|
+
|
|
62
|
+
return f"""You are a code auditor focused on **{lens_name}**.
|
|
63
|
+
|
|
64
|
+
{lens_description}
|
|
65
|
+
|
|
66
|
+
Analyze the following {type_label} and return your findings as a JSON array.
|
|
67
|
+
Each finding must be a JSON object with these fields:
|
|
68
|
+
- "severity": one of "critical", "high", "medium", "low", "info"
|
|
69
|
+
- "location": line number, function name, or description of where the issue is (e.g. "Line 42", "function validate_token", "JWT handling block")
|
|
70
|
+
- "finding": clear description of the issue
|
|
71
|
+
- "recommendation": what to do about it
|
|
72
|
+
|
|
73
|
+
Return ONLY a JSON array. No markdown fences, no explanatory text before or after.
|
|
74
|
+
If you find no issues, return an empty array: []
|
|
75
|
+
|
|
76
|
+
--- BEGIN CODE ---
|
|
77
|
+
{target_code}
|
|
78
|
+
--- END CODE ---"""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _resolve_target(target: str, target_type: str) -> Tuple[str, Optional[str]]:
|
|
82
|
+
"""Resolve the target to actual code content.
|
|
83
|
+
|
|
84
|
+
Returns (code_content, error_message).
|
|
85
|
+
"""
|
|
86
|
+
if target_type == "file":
|
|
87
|
+
path = Path(target).expanduser()
|
|
88
|
+
if not path.exists():
|
|
89
|
+
return "", f"File not found: {target}"
|
|
90
|
+
if not path.is_file():
|
|
91
|
+
return "", f"Not a file: {target}"
|
|
92
|
+
try:
|
|
93
|
+
content = path.read_text(errors="replace")
|
|
94
|
+
if len(content) > 50000:
|
|
95
|
+
content = content[:50000] + "\n\n[... truncated at 50,000 characters ...]"
|
|
96
|
+
return content, None
|
|
97
|
+
except Exception as e:
|
|
98
|
+
return "", f"Failed to read file: {e}"
|
|
99
|
+
elif target_type in ("diff", "snippet"):
|
|
100
|
+
if not target.strip():
|
|
101
|
+
return "", "Empty target provided."
|
|
102
|
+
return target, None
|
|
103
|
+
else:
|
|
104
|
+
return "", f"Unknown target_type: {target_type}. Use 'file', 'diff', or 'snippet'."
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _parse_model_findings(raw_response: str, model_name: str) -> List[Dict[str, str]]:
|
|
108
|
+
"""Parse structured findings from a model response.
|
|
109
|
+
|
|
110
|
+
Tries to extract a JSON array from the response. Handles markdown fences
|
|
111
|
+
and other common formatting issues.
|
|
112
|
+
"""
|
|
113
|
+
text = raw_response.strip()
|
|
114
|
+
|
|
115
|
+
# Strip markdown code fences
|
|
116
|
+
if text.startswith("```"):
|
|
117
|
+
lines = text.split("\n")
|
|
118
|
+
# Remove first line (```json or ```) and last line (```)
|
|
119
|
+
lines = [l for l in lines[1:] if not l.strip().startswith("```")]
|
|
120
|
+
text = "\n".join(lines).strip()
|
|
121
|
+
|
|
122
|
+
# Try direct parse
|
|
123
|
+
try:
|
|
124
|
+
findings = json.loads(text)
|
|
125
|
+
if isinstance(findings, list):
|
|
126
|
+
return _validate_findings(findings, model_name)
|
|
127
|
+
except json.JSONDecodeError:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
# Try to extract JSON array from text
|
|
131
|
+
match = re.search(r'\[.*\]', text, re.DOTALL)
|
|
132
|
+
if match:
|
|
133
|
+
try:
|
|
134
|
+
findings = json.loads(match.group())
|
|
135
|
+
if isinstance(findings, list):
|
|
136
|
+
return _validate_findings(findings, model_name)
|
|
137
|
+
except json.JSONDecodeError:
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
# Could not parse — return the raw response as a single finding
|
|
141
|
+
logger.warning("Could not parse structured findings from %s, wrapping raw response", model_name)
|
|
142
|
+
return [{
|
|
143
|
+
"severity": "info",
|
|
144
|
+
"location": "general",
|
|
145
|
+
"finding": f"[Unstructured response from {model_name}]: {raw_response[:500]}",
|
|
146
|
+
"recommendation": "Review raw model output manually.",
|
|
147
|
+
}]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _validate_findings(findings: List, model_name: str) -> List[Dict[str, str]]:
|
|
151
|
+
"""Validate and normalize finding objects."""
|
|
152
|
+
validated = []
|
|
153
|
+
for f in findings:
|
|
154
|
+
if not isinstance(f, dict):
|
|
155
|
+
continue
|
|
156
|
+
validated.append({
|
|
157
|
+
"severity": str(f.get("severity", "info")).lower(),
|
|
158
|
+
"location": str(f.get("location", "unknown")),
|
|
159
|
+
"finding": str(f.get("finding", "")),
|
|
160
|
+
"recommendation": str(f.get("recommendation", "")),
|
|
161
|
+
})
|
|
162
|
+
return validated
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _call_model_with_lens(
|
|
166
|
+
model_id: str,
|
|
167
|
+
model_config: Dict,
|
|
168
|
+
lens_name: str,
|
|
169
|
+
target_code: str,
|
|
170
|
+
target_type: str,
|
|
171
|
+
) -> Dict[str, Any]:
|
|
172
|
+
"""Call a single model with its lens prompt. Returns result dict."""
|
|
173
|
+
from ai.deliberation import _call_model
|
|
174
|
+
|
|
175
|
+
lens_description = AUDIT_LENSES[lens_name]
|
|
176
|
+
prompt = _build_lens_prompt(lens_name, lens_description, target_code, target_type)
|
|
177
|
+
system_prompt = (
|
|
178
|
+
f"You are a senior code auditor performing a {lens_name} review. "
|
|
179
|
+
"Return findings as a JSON array. Be thorough but precise."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
start = time.time()
|
|
183
|
+
try:
|
|
184
|
+
raw = _call_model(model_id, model_config, prompt, system_prompt)
|
|
185
|
+
elapsed = round(time.time() - start, 1)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
elapsed = round(time.time() - start, 1)
|
|
188
|
+
return {
|
|
189
|
+
"model_id": model_id,
|
|
190
|
+
"model_name": model_config.get("name", model_id),
|
|
191
|
+
"lens": lens_name,
|
|
192
|
+
"status": "error",
|
|
193
|
+
"error": str(e),
|
|
194
|
+
"elapsed_seconds": elapsed,
|
|
195
|
+
"findings": [],
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
# Check for model-level errors
|
|
199
|
+
if raw.startswith("[") and "unavailable" in raw.lower() or "error" in raw.lower():
|
|
200
|
+
if raw.startswith("[") and raw.endswith("]") and ("unavailable" in raw or "error:" in raw):
|
|
201
|
+
return {
|
|
202
|
+
"model_id": model_id,
|
|
203
|
+
"model_name": model_config.get("name", model_id),
|
|
204
|
+
"lens": lens_name,
|
|
205
|
+
"status": "error",
|
|
206
|
+
"error": raw,
|
|
207
|
+
"elapsed_seconds": elapsed,
|
|
208
|
+
"findings": [],
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
findings = _parse_model_findings(raw, model_config.get("name", model_id))
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
"model_id": model_id,
|
|
215
|
+
"model_name": model_config.get("name", model_id),
|
|
216
|
+
"lens": lens_name,
|
|
217
|
+
"status": "ok",
|
|
218
|
+
"elapsed_seconds": elapsed,
|
|
219
|
+
"findings": findings,
|
|
220
|
+
"raw_response": raw,
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _normalize_location(loc: str) -> str:
|
|
225
|
+
"""Normalize a location string for matching purposes."""
|
|
226
|
+
loc = loc.lower().strip()
|
|
227
|
+
# Extract line numbers
|
|
228
|
+
line_match = re.search(r'line\s*(\d+)', loc)
|
|
229
|
+
if line_match:
|
|
230
|
+
return f"line_{line_match.group(1)}"
|
|
231
|
+
# Extract function names
|
|
232
|
+
func_match = re.search(r'function\s+(\w+)', loc)
|
|
233
|
+
if func_match:
|
|
234
|
+
return f"func_{func_match.group(1)}"
|
|
235
|
+
# Fall back to cleaned string
|
|
236
|
+
return re.sub(r'[^a-z0-9_]', '_', loc).strip('_')
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def synthesize(audit_results: Dict[str, Any]) -> Dict[str, Any]:
|
|
240
|
+
"""Merge findings from multiple model audits.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
agreements: findings flagged by 2+ models (high confidence)
|
|
244
|
+
unique_findings: flagged by only 1 model (review needed)
|
|
245
|
+
disagreements: models contradict each other on severity/recommendation
|
|
246
|
+
summary: one-paragraph synthesis
|
|
247
|
+
"""
|
|
248
|
+
model_results = audit_results.get("model_results", [])
|
|
249
|
+
if not model_results:
|
|
250
|
+
return {
|
|
251
|
+
"agreements": [],
|
|
252
|
+
"unique_findings": [],
|
|
253
|
+
"disagreements": [],
|
|
254
|
+
"summary": "No model results to synthesize.",
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
# Collect all findings with their source lens
|
|
258
|
+
all_findings: List[Dict[str, Any]] = []
|
|
259
|
+
for result in model_results:
|
|
260
|
+
if result.get("status") != "ok":
|
|
261
|
+
continue
|
|
262
|
+
lens = result["lens"]
|
|
263
|
+
model_name = result["model_name"]
|
|
264
|
+
for f in result.get("findings", []):
|
|
265
|
+
all_findings.append({
|
|
266
|
+
**f,
|
|
267
|
+
"lens": lens,
|
|
268
|
+
"model": model_name,
|
|
269
|
+
"norm_location": _normalize_location(f.get("location", "")),
|
|
270
|
+
})
|
|
271
|
+
|
|
272
|
+
if not all_findings:
|
|
273
|
+
return {
|
|
274
|
+
"agreements": [],
|
|
275
|
+
"unique_findings": [],
|
|
276
|
+
"disagreements": [],
|
|
277
|
+
"summary": "All models returned clean results. No issues found.",
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
# Group by normalized location
|
|
281
|
+
location_groups: Dict[str, List[Dict]] = {}
|
|
282
|
+
for f in all_findings:
|
|
283
|
+
key = f["norm_location"]
|
|
284
|
+
location_groups.setdefault(key, []).append(f)
|
|
285
|
+
|
|
286
|
+
agreements = []
|
|
287
|
+
unique_findings = []
|
|
288
|
+
disagreements = []
|
|
289
|
+
|
|
290
|
+
for loc_key, findings in location_groups.items():
|
|
291
|
+
models_involved = set(f["model"] for f in findings)
|
|
292
|
+
lenses_involved = set(f["lens"] for f in findings)
|
|
293
|
+
|
|
294
|
+
if len(models_involved) >= 2:
|
|
295
|
+
# Check for severity disagreements
|
|
296
|
+
severities = set(f["severity"] for f in findings)
|
|
297
|
+
if len(severities) > 1:
|
|
298
|
+
# Models agree on location but disagree on severity
|
|
299
|
+
disagreements.append({
|
|
300
|
+
"location": findings[0]["location"],
|
|
301
|
+
"models": {f["model"]: {
|
|
302
|
+
"lens": f["lens"],
|
|
303
|
+
"severity": f["severity"],
|
|
304
|
+
"finding": f["finding"],
|
|
305
|
+
"recommendation": f["recommendation"],
|
|
306
|
+
} for f in findings},
|
|
307
|
+
"type": "severity_disagreement",
|
|
308
|
+
})
|
|
309
|
+
else:
|
|
310
|
+
# Full agreement
|
|
311
|
+
agreements.append({
|
|
312
|
+
"location": findings[0]["location"],
|
|
313
|
+
"severity": findings[0]["severity"],
|
|
314
|
+
"models_agreed": list(models_involved),
|
|
315
|
+
"lenses": list(lenses_involved),
|
|
316
|
+
"findings": {f["lens"]: {
|
|
317
|
+
"finding": f["finding"],
|
|
318
|
+
"recommendation": f["recommendation"],
|
|
319
|
+
} for f in findings},
|
|
320
|
+
})
|
|
321
|
+
else:
|
|
322
|
+
# Only one model flagged this
|
|
323
|
+
f = findings[0]
|
|
324
|
+
unique_findings.append({
|
|
325
|
+
"location": f["location"],
|
|
326
|
+
"severity": f["severity"],
|
|
327
|
+
"lens": f["lens"],
|
|
328
|
+
"model": f["model"],
|
|
329
|
+
"finding": f["finding"],
|
|
330
|
+
"recommendation": f["recommendation"],
|
|
331
|
+
})
|
|
332
|
+
|
|
333
|
+
# Sort by severity
|
|
334
|
+
agreements.sort(key=lambda x: SEVERITY_ORDER.get(x["severity"], 5))
|
|
335
|
+
unique_findings.sort(key=lambda x: SEVERITY_ORDER.get(x["severity"], 5))
|
|
336
|
+
|
|
337
|
+
# Build summary
|
|
338
|
+
total = len(agreements) + len(unique_findings) + len(disagreements)
|
|
339
|
+
successful_models = [r for r in model_results if r.get("status") == "ok"]
|
|
340
|
+
failed_models = [r for r in model_results if r.get("status") != "ok"]
|
|
341
|
+
|
|
342
|
+
summary_parts = [
|
|
343
|
+
f"{len(agreements)} high-confidence finding(s)",
|
|
344
|
+
f"{len(unique_findings)} unique catch(es)",
|
|
345
|
+
f"{len(disagreements)} tradeoff(s)",
|
|
346
|
+
]
|
|
347
|
+
summary = f"{total} total findings across {len(successful_models)} models: " + ", ".join(summary_parts) + "."
|
|
348
|
+
|
|
349
|
+
if failed_models:
|
|
350
|
+
failed_names = [r.get("model_name", "unknown") for r in failed_models]
|
|
351
|
+
summary += f" ({', '.join(failed_names)} failed — results are partial.)"
|
|
352
|
+
|
|
353
|
+
if agreements:
|
|
354
|
+
critical_count = sum(1 for a in agreements if a["severity"] == "critical")
|
|
355
|
+
if critical_count:
|
|
356
|
+
summary += f" {critical_count} CRITICAL issue(s) confirmed by multiple models."
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
"agreements": agreements,
|
|
360
|
+
"unique_findings": unique_findings,
|
|
361
|
+
"disagreements": disagreements,
|
|
362
|
+
"summary": summary,
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def format_audit_output(audit_results: Dict[str, Any], synthesis: Dict[str, Any]) -> str:
|
|
367
|
+
"""Format audit results as human-readable text."""
|
|
368
|
+
lines = []
|
|
369
|
+
lines.append("=== CROSS-MODEL AUDIT ===")
|
|
370
|
+
|
|
371
|
+
target_display = audit_results.get("target_display", audit_results.get("target", "unknown"))
|
|
372
|
+
lines.append(f"Target: {target_display}")
|
|
373
|
+
|
|
374
|
+
# Model assignments
|
|
375
|
+
model_parts = []
|
|
376
|
+
for r in audit_results.get("model_results", []):
|
|
377
|
+
name = r.get("model_name", r.get("model_id", "?"))
|
|
378
|
+
lens = r.get("lens", "?")
|
|
379
|
+
status = "" if r.get("status") == "ok" else " [FAILED]"
|
|
380
|
+
model_parts.append(f"{name} ({lens}){status}")
|
|
381
|
+
lines.append(f"Models: {' | '.join(model_parts)}")
|
|
382
|
+
lines.append("")
|
|
383
|
+
|
|
384
|
+
# High confidence
|
|
385
|
+
agreements = synthesis.get("agreements", [])
|
|
386
|
+
if agreements:
|
|
387
|
+
lines.append("HIGH CONFIDENCE (2+ models agree):")
|
|
388
|
+
for a in agreements:
|
|
389
|
+
sev = a["severity"].upper()
|
|
390
|
+
loc = a["location"]
|
|
391
|
+
lines.append(f" [{sev}] {loc}")
|
|
392
|
+
for lens, detail in a.get("findings", {}).items():
|
|
393
|
+
lines.append(f" {lens.title()}: {detail['finding']}")
|
|
394
|
+
# Show first recommendation
|
|
395
|
+
recs = [d["recommendation"] for d in a.get("findings", {}).values() if d.get("recommendation")]
|
|
396
|
+
if recs:
|
|
397
|
+
lines.append(f" Action: {recs[0]}")
|
|
398
|
+
lines.append("")
|
|
399
|
+
else:
|
|
400
|
+
lines.append("HIGH CONFIDENCE: None (no multi-model agreement)")
|
|
401
|
+
lines.append("")
|
|
402
|
+
|
|
403
|
+
# Unique findings
|
|
404
|
+
unique = synthesis.get("unique_findings", [])
|
|
405
|
+
if unique:
|
|
406
|
+
lines.append("UNIQUE FINDINGS (single model):")
|
|
407
|
+
for u in unique:
|
|
408
|
+
sev = u["severity"].upper()
|
|
409
|
+
loc = u["location"]
|
|
410
|
+
lens = u["lens"].title()
|
|
411
|
+
lines.append(f" [{sev}] {loc} ({lens})")
|
|
412
|
+
lines.append(f" {u['finding']}")
|
|
413
|
+
if u.get("recommendation"):
|
|
414
|
+
lines.append(f" Recommendation: {u['recommendation']}")
|
|
415
|
+
lines.append("")
|
|
416
|
+
else:
|
|
417
|
+
lines.append("UNIQUE FINDINGS: None")
|
|
418
|
+
lines.append("")
|
|
419
|
+
|
|
420
|
+
# Disagreements
|
|
421
|
+
disagreements = synthesis.get("disagreements", [])
|
|
422
|
+
if disagreements:
|
|
423
|
+
lines.append("DISAGREEMENTS:")
|
|
424
|
+
for d in disagreements:
|
|
425
|
+
loc = d["location"]
|
|
426
|
+
lines.append(f" {loc}:")
|
|
427
|
+
for model_name, detail in d.get("models", {}).items():
|
|
428
|
+
lines.append(f" {detail['lens'].title()} ({model_name}): {detail['finding']} [severity: {detail['severity']}]")
|
|
429
|
+
lines.append(f" Tradeoff: review and decide based on your risk tolerance.")
|
|
430
|
+
lines.append("")
|
|
431
|
+
else:
|
|
432
|
+
lines.append("DISAGREEMENTS: None")
|
|
433
|
+
lines.append("")
|
|
434
|
+
|
|
435
|
+
lines.append(f"Summary: {synthesis.get('summary', 'No summary available.')}")
|
|
436
|
+
|
|
437
|
+
return "\n".join(lines)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def _select_models_and_lenses(
|
|
441
|
+
lenses: Optional[List[str]] = None,
|
|
442
|
+
models: Optional[List[str]] = None,
|
|
443
|
+
) -> Tuple[List[Tuple[str, Dict, str]], Optional[str]]:
|
|
444
|
+
"""Select which models get which lenses.
|
|
445
|
+
|
|
446
|
+
Returns (assignments, error).
|
|
447
|
+
Each assignment is (model_id, model_config, lens_name).
|
|
448
|
+
"""
|
|
449
|
+
from ai.deliberation import get_models_config
|
|
450
|
+
|
|
451
|
+
# Resolve lenses
|
|
452
|
+
active_lenses = list(lenses) if lenses else list(AUDIT_LENSES.keys())
|
|
453
|
+
for lens in active_lenses:
|
|
454
|
+
if lens not in AUDIT_LENSES:
|
|
455
|
+
return [], f"Unknown lens: {lens}. Available: {', '.join(AUDIT_LENSES.keys())}"
|
|
456
|
+
|
|
457
|
+
# Get available models
|
|
458
|
+
config = get_models_config(allow_hosted_fallback=True)
|
|
459
|
+
enabled = {k: v for k, v in config.items() if v.get("enabled")}
|
|
460
|
+
|
|
461
|
+
if not enabled:
|
|
462
|
+
return [], (
|
|
463
|
+
"No models available for audit. Configure API keys in ~/.delimit/models.json "
|
|
464
|
+
"or ensure hosted models are available."
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
# If specific models requested, filter
|
|
468
|
+
if models:
|
|
469
|
+
filtered = {}
|
|
470
|
+
for m in models:
|
|
471
|
+
m_lower = m.lower()
|
|
472
|
+
if m_lower in enabled:
|
|
473
|
+
filtered[m_lower] = enabled[m_lower]
|
|
474
|
+
else:
|
|
475
|
+
# Try partial match
|
|
476
|
+
for k, v in enabled.items():
|
|
477
|
+
if m_lower in k.lower() or m_lower in v.get("name", "").lower():
|
|
478
|
+
filtered[k] = v
|
|
479
|
+
break
|
|
480
|
+
if not filtered:
|
|
481
|
+
return [], f"None of the requested models ({', '.join(models)}) are available."
|
|
482
|
+
enabled = filtered
|
|
483
|
+
|
|
484
|
+
# Assign lenses to models round-robin
|
|
485
|
+
model_ids = list(enabled.keys())
|
|
486
|
+
assignments = []
|
|
487
|
+
for i, lens in enumerate(active_lenses):
|
|
488
|
+
model_id = model_ids[i % len(model_ids)]
|
|
489
|
+
assignments.append((model_id, enabled[model_id], lens))
|
|
490
|
+
|
|
491
|
+
return assignments, None
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def audit(
|
|
495
|
+
target: str,
|
|
496
|
+
target_type: str = "file",
|
|
497
|
+
lenses: Optional[List[str]] = None,
|
|
498
|
+
models: Optional[List[str]] = None,
|
|
499
|
+
) -> Dict[str, Any]:
|
|
500
|
+
"""Run cross-model audit on a target.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
target: File path, diff text, or code snippet.
|
|
504
|
+
target_type: "file", "diff", or "snippet".
|
|
505
|
+
lenses: Which lenses to apply (default: all 3).
|
|
506
|
+
models: Which models to use (default: auto-detect).
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
Full audit results with model_results, synthesis, and formatted output.
|
|
510
|
+
"""
|
|
511
|
+
start_time = time.time()
|
|
512
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
513
|
+
|
|
514
|
+
# Resolve target
|
|
515
|
+
target_code, err = _resolve_target(target, target_type)
|
|
516
|
+
if err:
|
|
517
|
+
return {"status": "error", "error": err}
|
|
518
|
+
|
|
519
|
+
# Select models and assign lenses
|
|
520
|
+
assignments, err = _select_models_and_lenses(lenses, models)
|
|
521
|
+
if err:
|
|
522
|
+
return {"status": "error", "error": err}
|
|
523
|
+
|
|
524
|
+
# Target display name
|
|
525
|
+
if target_type == "file":
|
|
526
|
+
target_display = target
|
|
527
|
+
elif target_type == "diff":
|
|
528
|
+
first_line = target.strip().split("\n")[0][:80]
|
|
529
|
+
target_display = f"diff: {first_line}..."
|
|
530
|
+
else:
|
|
531
|
+
target_display = f"snippet ({len(target_code)} chars)"
|
|
532
|
+
|
|
533
|
+
# Call models in parallel
|
|
534
|
+
results: List[Dict[str, Any]] = [None] * len(assignments) # type: ignore
|
|
535
|
+
threads = []
|
|
536
|
+
|
|
537
|
+
def _run(index: int, model_id: str, config: Dict, lens: str):
|
|
538
|
+
results[index] = _call_model_with_lens(model_id, config, lens, target_code, target_type)
|
|
539
|
+
|
|
540
|
+
for i, (model_id, config, lens) in enumerate(assignments):
|
|
541
|
+
t = threading.Thread(target=_run, args=(i, model_id, config, lens), daemon=True)
|
|
542
|
+
threads.append(t)
|
|
543
|
+
t.start()
|
|
544
|
+
|
|
545
|
+
# Wait with timeout
|
|
546
|
+
for t in threads:
|
|
547
|
+
t.join(timeout=MODEL_TIMEOUT)
|
|
548
|
+
|
|
549
|
+
# Replace any None results (timed out threads)
|
|
550
|
+
for i, r in enumerate(results):
|
|
551
|
+
if r is None:
|
|
552
|
+
model_id, config, lens = assignments[i]
|
|
553
|
+
results[i] = {
|
|
554
|
+
"model_id": model_id,
|
|
555
|
+
"model_name": config.get("name", model_id),
|
|
556
|
+
"lens": lens,
|
|
557
|
+
"status": "error",
|
|
558
|
+
"error": "Timed out after 60 seconds",
|
|
559
|
+
"elapsed_seconds": MODEL_TIMEOUT,
|
|
560
|
+
"findings": [],
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
audit_results = {
|
|
564
|
+
"status": "ok",
|
|
565
|
+
"target": target if target_type != "file" else str(target),
|
|
566
|
+
"target_type": target_type,
|
|
567
|
+
"target_display": target_display,
|
|
568
|
+
"timestamp": timestamp,
|
|
569
|
+
"model_results": results,
|
|
570
|
+
"elapsed_seconds": round(time.time() - start_time, 1),
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
# Synthesize
|
|
574
|
+
synthesis_result = synthesize(audit_results)
|
|
575
|
+
audit_results["synthesis"] = synthesis_result
|
|
576
|
+
|
|
577
|
+
# Format output
|
|
578
|
+
audit_results["formatted"] = format_audit_output(audit_results, synthesis_result)
|
|
579
|
+
|
|
580
|
+
# Save to disk
|
|
581
|
+
_save_audit(audit_results, timestamp)
|
|
582
|
+
|
|
583
|
+
return audit_results
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def _save_audit(audit_results: Dict[str, Any], timestamp: str) -> Optional[str]:
|
|
587
|
+
"""Save audit results to ~/.delimit/audits/."""
|
|
588
|
+
try:
|
|
589
|
+
AUDIT_DIR.mkdir(parents=True, exist_ok=True)
|
|
590
|
+
path = AUDIT_DIR / f"{timestamp}.json"
|
|
591
|
+
# Remove raw_response before saving (can be large)
|
|
592
|
+
save_data = json.loads(json.dumps(audit_results, default=str))
|
|
593
|
+
for r in save_data.get("model_results", []):
|
|
594
|
+
r.pop("raw_response", None)
|
|
595
|
+
path.write_text(json.dumps(save_data, indent=2, default=str))
|
|
596
|
+
audit_results["saved_to"] = str(path)
|
|
597
|
+
return str(path)
|
|
598
|
+
except Exception as e:
|
|
599
|
+
logger.warning("Failed to save audit results: %s", e)
|
|
600
|
+
return None
|