delimit-cli 3.15.13 → 3.15.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gateway/ai/server.py +2 -9
- package/package.json +7 -1
- package/scripts/security-check.sh +2 -2
- package/gateway/ai/cross_model_audit.py +0 -600
- package/gateway/ai/github_scanner.py +0 -622
- package/gateway/ai/handoff_receipts.py +0 -409
- package/gateway/ai/reddit_scanner.py +0 -562
- package/gateway/ai/session_phoenix.py +0 -371
- package/gateway/ai/toolcard_cache.py +0 -327
package/gateway/ai/server.py
CHANGED
|
@@ -5700,14 +5700,7 @@ def delimit_social_post(text: str = "", category: str = "", platform: str = "twi
|
|
|
5700
5700
|
Every post provides value — tips, insights, governance wisdom.
|
|
5701
5701
|
Max 2 posts per day to stay authentic.
|
|
5702
5702
|
|
|
5703
|
-
|
|
5704
|
-
- Twitter: confident technical brand. Direct, professional, ALWAYS POSITIVE.
|
|
5705
|
-
Celebrate wins and progress. Never complain or air gaps publicly.
|
|
5706
|
-
No em dashes or en dashes. Include install commands when relevant.
|
|
5707
|
-
- Reddit: proud builder posting as u/delimitdev. Casual, typed-on-phone energy.
|
|
5708
|
-
ALWAYS POSITIVE. Mention Delimit ONLY when genuinely helpful.
|
|
5709
|
-
NO bullet points/lists/bold/em dashes. 2-3 sentences max.
|
|
5710
|
-
- LinkedIn: professional hook + insight + CTA
|
|
5703
|
+
Platform tone and posting guidelines are loaded from user config at runtime.
|
|
5711
5704
|
|
|
5712
5705
|
Args:
|
|
5713
5706
|
text: Tweet text. Leave empty to auto-generate.
|
|
@@ -5744,7 +5737,7 @@ def delimit_social_post(text: str = "", category: str = "", platform: str = "twi
|
|
|
5744
5737
|
from ai.social import store_draft_message_id
|
|
5745
5738
|
|
|
5746
5739
|
# Build contextual email body so the founder knows exactly what to do
|
|
5747
|
-
_acct = account or ("
|
|
5740
|
+
_acct = account or os.environ.get("DELIMIT_SOCIAL_REDDIT_ACCOUNT", "delimit") if platform == "reddit" else account or os.environ.get("DELIMIT_SOCIAL_TWITTER_ACCOUNT", "delimit_ai")
|
|
5748
5741
|
_lines = []
|
|
5749
5742
|
|
|
5750
5743
|
if platform == "reddit":
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "delimit-cli",
|
|
3
3
|
"mcpName": "io.github.delimit-ai/delimit-mcp-server",
|
|
4
|
-
"version": "3.15.
|
|
4
|
+
"version": "3.15.14",
|
|
5
5
|
"description": "Unify Claude Code, Codex, Cursor, and Gemini CLI with persistent context, governance, and multi-model debate.",
|
|
6
6
|
"main": "index.js",
|
|
7
7
|
"files": [
|
|
@@ -14,6 +14,12 @@
|
|
|
14
14
|
"!gateway/ai/founding_users.py",
|
|
15
15
|
"!gateway/ai/inbox_daemon.py",
|
|
16
16
|
"!gateway/ai/deliberation.py",
|
|
17
|
+
"!gateway/ai/reddit_scanner.py",
|
|
18
|
+
"!gateway/ai/github_scanner.py",
|
|
19
|
+
"!gateway/ai/cross_model_audit.py",
|
|
20
|
+
"!gateway/ai/session_phoenix.py",
|
|
21
|
+
"!gateway/ai/handoff_receipts.py",
|
|
22
|
+
"!gateway/ai/toolcard_cache.py",
|
|
17
23
|
"scripts/",
|
|
18
24
|
"server.json",
|
|
19
25
|
"README.md",
|
|
@@ -25,7 +25,7 @@ fi
|
|
|
25
25
|
|
|
26
26
|
# 2. Blocklist terms
|
|
27
27
|
echo -n " Blocklist... "
|
|
28
|
-
BLOCKLIST="jamsonsholdings|Bladabah|Domainvested26|Delimit26|home/jamsons|infracore|crypttrx|\.wr_env"
|
|
28
|
+
BLOCKLIST="jamsonsholdings|Bladabah|Domainvested26|Delimit26|home/jamsons|infracore|crypttrx|\.wr_env|delimitdev|typed-on-phone|em dash.*ai tell|PAIN_CATEGORIES|VENTURE_CONFIG|VENTURE_SUBREDDITS|karma_building"
|
|
29
29
|
if grep -rEi "$BLOCKLIST" "$TMPDIR/package/" --include="*.py" --include="*.js" --include="*.json" 2>/dev/null; then
|
|
30
30
|
echo "❌ BLOCKED TERMS FOUND"
|
|
31
31
|
FAIL=1
|
|
@@ -44,7 +44,7 @@ fi
|
|
|
44
44
|
|
|
45
45
|
# 4. Proprietary files that shouldn't ship
|
|
46
46
|
echo -n " Proprietary files... "
|
|
47
|
-
PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py|deliberation\.py"
|
|
47
|
+
PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py|deliberation\.py|reddit_scanner\.py|github_scanner\.py|cross_model_audit\.py|session_phoenix\.py|handoff_receipts\.py|toolcard_cache\.py"
|
|
48
48
|
if find "$TMPDIR/package/" -name "*.py" | grep -Ei "$PROPRIETARY" 2>/dev/null; then
|
|
49
49
|
echo "❌ PROPRIETARY FILES IN PACKAGE"
|
|
50
50
|
FAIL=1
|
|
@@ -1,600 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Delimit Cross-Model Audit — Trust through triangulation.
|
|
3
|
-
|
|
4
|
-
Run the same code review through 3 different AI models, each with a different
|
|
5
|
-
review lens (security, correctness, governance). A synthesis step merges their
|
|
6
|
-
findings: agreements become high-confidence, disagreements surface tradeoffs.
|
|
7
|
-
|
|
8
|
-
This is different from `delimit_deliberate` (which debates a question).
|
|
9
|
-
Cross-Model Audit reviews actual code/specs for specific issues.
|
|
10
|
-
|
|
11
|
-
Models are configured via ~/.delimit/models.json or ~/.delimit/secrets/hosted-models.json.
|
|
12
|
-
Uses the same infrastructure as deliberation.py.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
import json
|
|
16
|
-
import logging
|
|
17
|
-
import os
|
|
18
|
-
import re
|
|
19
|
-
import threading
|
|
20
|
-
import time
|
|
21
|
-
from datetime import datetime, timezone
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
from typing import Any, Dict, List, Optional, Tuple
|
|
24
|
-
|
|
25
|
-
logger = logging.getLogger("delimit.cross_model_audit")
|
|
26
|
-
|
|
27
|
-
AUDIT_DIR = Path.home() / ".delimit" / "audits"
|
|
28
|
-
|
|
29
|
-
# ═══════════════════════════════════════════════════════════════════════
|
|
30
|
-
# Audit Lenses — each model gets a different review focus
|
|
31
|
-
# ═══════════════════════════════════════════════════════════════════════
|
|
32
|
-
|
|
33
|
-
AUDIT_LENSES = {
|
|
34
|
-
"security": (
|
|
35
|
-
"Review for security vulnerabilities: injection, auth bypass, data exposure, "
|
|
36
|
-
"privilege escalation, secret leaks. Focus on exploitable issues."
|
|
37
|
-
),
|
|
38
|
-
"correctness": (
|
|
39
|
-
"Review for logical errors, edge cases, off-by-one, race conditions, "
|
|
40
|
-
"null handling, error propagation. Focus on bugs that cause wrong behavior."
|
|
41
|
-
),
|
|
42
|
-
"governance": (
|
|
43
|
-
"Review for breaking changes, API contract violations, backward compatibility, "
|
|
44
|
-
"schema drift, missing validation. Focus on issues that affect consumers."
|
|
45
|
-
),
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
# Severity levels for structured findings
|
|
49
|
-
SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
|
|
50
|
-
|
|
51
|
-
MODEL_TIMEOUT = 60 # seconds
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _build_lens_prompt(lens_name: str, lens_description: str, target_code: str, target_type: str) -> str:
|
|
55
|
-
"""Build the prompt for a model with its assigned lens."""
|
|
56
|
-
type_label = {
|
|
57
|
-
"file": "source file",
|
|
58
|
-
"diff": "git diff",
|
|
59
|
-
"snippet": "code snippet",
|
|
60
|
-
}.get(target_type, "code")
|
|
61
|
-
|
|
62
|
-
return f"""You are a code auditor focused on **{lens_name}**.
|
|
63
|
-
|
|
64
|
-
{lens_description}
|
|
65
|
-
|
|
66
|
-
Analyze the following {type_label} and return your findings as a JSON array.
|
|
67
|
-
Each finding must be a JSON object with these fields:
|
|
68
|
-
- "severity": one of "critical", "high", "medium", "low", "info"
|
|
69
|
-
- "location": line number, function name, or description of where the issue is (e.g. "Line 42", "function validate_token", "JWT handling block")
|
|
70
|
-
- "finding": clear description of the issue
|
|
71
|
-
- "recommendation": what to do about it
|
|
72
|
-
|
|
73
|
-
Return ONLY a JSON array. No markdown fences, no explanatory text before or after.
|
|
74
|
-
If you find no issues, return an empty array: []
|
|
75
|
-
|
|
76
|
-
--- BEGIN CODE ---
|
|
77
|
-
{target_code}
|
|
78
|
-
--- END CODE ---"""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def _resolve_target(target: str, target_type: str) -> Tuple[str, Optional[str]]:
|
|
82
|
-
"""Resolve the target to actual code content.
|
|
83
|
-
|
|
84
|
-
Returns (code_content, error_message).
|
|
85
|
-
"""
|
|
86
|
-
if target_type == "file":
|
|
87
|
-
path = Path(target).expanduser()
|
|
88
|
-
if not path.exists():
|
|
89
|
-
return "", f"File not found: {target}"
|
|
90
|
-
if not path.is_file():
|
|
91
|
-
return "", f"Not a file: {target}"
|
|
92
|
-
try:
|
|
93
|
-
content = path.read_text(errors="replace")
|
|
94
|
-
if len(content) > 50000:
|
|
95
|
-
content = content[:50000] + "\n\n[... truncated at 50,000 characters ...]"
|
|
96
|
-
return content, None
|
|
97
|
-
except Exception as e:
|
|
98
|
-
return "", f"Failed to read file: {e}"
|
|
99
|
-
elif target_type in ("diff", "snippet"):
|
|
100
|
-
if not target.strip():
|
|
101
|
-
return "", "Empty target provided."
|
|
102
|
-
return target, None
|
|
103
|
-
else:
|
|
104
|
-
return "", f"Unknown target_type: {target_type}. Use 'file', 'diff', or 'snippet'."
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def _parse_model_findings(raw_response: str, model_name: str) -> List[Dict[str, str]]:
|
|
108
|
-
"""Parse structured findings from a model response.
|
|
109
|
-
|
|
110
|
-
Tries to extract a JSON array from the response. Handles markdown fences
|
|
111
|
-
and other common formatting issues.
|
|
112
|
-
"""
|
|
113
|
-
text = raw_response.strip()
|
|
114
|
-
|
|
115
|
-
# Strip markdown code fences
|
|
116
|
-
if text.startswith("```"):
|
|
117
|
-
lines = text.split("\n")
|
|
118
|
-
# Remove first line (```json or ```) and last line (```)
|
|
119
|
-
lines = [l for l in lines[1:] if not l.strip().startswith("```")]
|
|
120
|
-
text = "\n".join(lines).strip()
|
|
121
|
-
|
|
122
|
-
# Try direct parse
|
|
123
|
-
try:
|
|
124
|
-
findings = json.loads(text)
|
|
125
|
-
if isinstance(findings, list):
|
|
126
|
-
return _validate_findings(findings, model_name)
|
|
127
|
-
except json.JSONDecodeError:
|
|
128
|
-
pass
|
|
129
|
-
|
|
130
|
-
# Try to extract JSON array from text
|
|
131
|
-
match = re.search(r'\[.*\]', text, re.DOTALL)
|
|
132
|
-
if match:
|
|
133
|
-
try:
|
|
134
|
-
findings = json.loads(match.group())
|
|
135
|
-
if isinstance(findings, list):
|
|
136
|
-
return _validate_findings(findings, model_name)
|
|
137
|
-
except json.JSONDecodeError:
|
|
138
|
-
pass
|
|
139
|
-
|
|
140
|
-
# Could not parse — return the raw response as a single finding
|
|
141
|
-
logger.warning("Could not parse structured findings from %s, wrapping raw response", model_name)
|
|
142
|
-
return [{
|
|
143
|
-
"severity": "info",
|
|
144
|
-
"location": "general",
|
|
145
|
-
"finding": f"[Unstructured response from {model_name}]: {raw_response[:500]}",
|
|
146
|
-
"recommendation": "Review raw model output manually.",
|
|
147
|
-
}]
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def _validate_findings(findings: List, model_name: str) -> List[Dict[str, str]]:
|
|
151
|
-
"""Validate and normalize finding objects."""
|
|
152
|
-
validated = []
|
|
153
|
-
for f in findings:
|
|
154
|
-
if not isinstance(f, dict):
|
|
155
|
-
continue
|
|
156
|
-
validated.append({
|
|
157
|
-
"severity": str(f.get("severity", "info")).lower(),
|
|
158
|
-
"location": str(f.get("location", "unknown")),
|
|
159
|
-
"finding": str(f.get("finding", "")),
|
|
160
|
-
"recommendation": str(f.get("recommendation", "")),
|
|
161
|
-
})
|
|
162
|
-
return validated
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def _call_model_with_lens(
|
|
166
|
-
model_id: str,
|
|
167
|
-
model_config: Dict,
|
|
168
|
-
lens_name: str,
|
|
169
|
-
target_code: str,
|
|
170
|
-
target_type: str,
|
|
171
|
-
) -> Dict[str, Any]:
|
|
172
|
-
"""Call a single model with its lens prompt. Returns result dict."""
|
|
173
|
-
from ai.deliberation import _call_model
|
|
174
|
-
|
|
175
|
-
lens_description = AUDIT_LENSES[lens_name]
|
|
176
|
-
prompt = _build_lens_prompt(lens_name, lens_description, target_code, target_type)
|
|
177
|
-
system_prompt = (
|
|
178
|
-
f"You are a senior code auditor performing a {lens_name} review. "
|
|
179
|
-
"Return findings as a JSON array. Be thorough but precise."
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
start = time.time()
|
|
183
|
-
try:
|
|
184
|
-
raw = _call_model(model_id, model_config, prompt, system_prompt)
|
|
185
|
-
elapsed = round(time.time() - start, 1)
|
|
186
|
-
except Exception as e:
|
|
187
|
-
elapsed = round(time.time() - start, 1)
|
|
188
|
-
return {
|
|
189
|
-
"model_id": model_id,
|
|
190
|
-
"model_name": model_config.get("name", model_id),
|
|
191
|
-
"lens": lens_name,
|
|
192
|
-
"status": "error",
|
|
193
|
-
"error": str(e),
|
|
194
|
-
"elapsed_seconds": elapsed,
|
|
195
|
-
"findings": [],
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
# Check for model-level errors
|
|
199
|
-
if raw.startswith("[") and "unavailable" in raw.lower() or "error" in raw.lower():
|
|
200
|
-
if raw.startswith("[") and raw.endswith("]") and ("unavailable" in raw or "error:" in raw):
|
|
201
|
-
return {
|
|
202
|
-
"model_id": model_id,
|
|
203
|
-
"model_name": model_config.get("name", model_id),
|
|
204
|
-
"lens": lens_name,
|
|
205
|
-
"status": "error",
|
|
206
|
-
"error": raw,
|
|
207
|
-
"elapsed_seconds": elapsed,
|
|
208
|
-
"findings": [],
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
findings = _parse_model_findings(raw, model_config.get("name", model_id))
|
|
212
|
-
|
|
213
|
-
return {
|
|
214
|
-
"model_id": model_id,
|
|
215
|
-
"model_name": model_config.get("name", model_id),
|
|
216
|
-
"lens": lens_name,
|
|
217
|
-
"status": "ok",
|
|
218
|
-
"elapsed_seconds": elapsed,
|
|
219
|
-
"findings": findings,
|
|
220
|
-
"raw_response": raw,
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def _normalize_location(loc: str) -> str:
|
|
225
|
-
"""Normalize a location string for matching purposes."""
|
|
226
|
-
loc = loc.lower().strip()
|
|
227
|
-
# Extract line numbers
|
|
228
|
-
line_match = re.search(r'line\s*(\d+)', loc)
|
|
229
|
-
if line_match:
|
|
230
|
-
return f"line_{line_match.group(1)}"
|
|
231
|
-
# Extract function names
|
|
232
|
-
func_match = re.search(r'function\s+(\w+)', loc)
|
|
233
|
-
if func_match:
|
|
234
|
-
return f"func_{func_match.group(1)}"
|
|
235
|
-
# Fall back to cleaned string
|
|
236
|
-
return re.sub(r'[^a-z0-9_]', '_', loc).strip('_')
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def synthesize(audit_results: Dict[str, Any]) -> Dict[str, Any]:
|
|
240
|
-
"""Merge findings from multiple model audits.
|
|
241
|
-
|
|
242
|
-
Returns:
|
|
243
|
-
agreements: findings flagged by 2+ models (high confidence)
|
|
244
|
-
unique_findings: flagged by only 1 model (review needed)
|
|
245
|
-
disagreements: models contradict each other on severity/recommendation
|
|
246
|
-
summary: one-paragraph synthesis
|
|
247
|
-
"""
|
|
248
|
-
model_results = audit_results.get("model_results", [])
|
|
249
|
-
if not model_results:
|
|
250
|
-
return {
|
|
251
|
-
"agreements": [],
|
|
252
|
-
"unique_findings": [],
|
|
253
|
-
"disagreements": [],
|
|
254
|
-
"summary": "No model results to synthesize.",
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
# Collect all findings with their source lens
|
|
258
|
-
all_findings: List[Dict[str, Any]] = []
|
|
259
|
-
for result in model_results:
|
|
260
|
-
if result.get("status") != "ok":
|
|
261
|
-
continue
|
|
262
|
-
lens = result["lens"]
|
|
263
|
-
model_name = result["model_name"]
|
|
264
|
-
for f in result.get("findings", []):
|
|
265
|
-
all_findings.append({
|
|
266
|
-
**f,
|
|
267
|
-
"lens": lens,
|
|
268
|
-
"model": model_name,
|
|
269
|
-
"norm_location": _normalize_location(f.get("location", "")),
|
|
270
|
-
})
|
|
271
|
-
|
|
272
|
-
if not all_findings:
|
|
273
|
-
return {
|
|
274
|
-
"agreements": [],
|
|
275
|
-
"unique_findings": [],
|
|
276
|
-
"disagreements": [],
|
|
277
|
-
"summary": "All models returned clean results. No issues found.",
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
# Group by normalized location
|
|
281
|
-
location_groups: Dict[str, List[Dict]] = {}
|
|
282
|
-
for f in all_findings:
|
|
283
|
-
key = f["norm_location"]
|
|
284
|
-
location_groups.setdefault(key, []).append(f)
|
|
285
|
-
|
|
286
|
-
agreements = []
|
|
287
|
-
unique_findings = []
|
|
288
|
-
disagreements = []
|
|
289
|
-
|
|
290
|
-
for loc_key, findings in location_groups.items():
|
|
291
|
-
models_involved = set(f["model"] for f in findings)
|
|
292
|
-
lenses_involved = set(f["lens"] for f in findings)
|
|
293
|
-
|
|
294
|
-
if len(models_involved) >= 2:
|
|
295
|
-
# Check for severity disagreements
|
|
296
|
-
severities = set(f["severity"] for f in findings)
|
|
297
|
-
if len(severities) > 1:
|
|
298
|
-
# Models agree on location but disagree on severity
|
|
299
|
-
disagreements.append({
|
|
300
|
-
"location": findings[0]["location"],
|
|
301
|
-
"models": {f["model"]: {
|
|
302
|
-
"lens": f["lens"],
|
|
303
|
-
"severity": f["severity"],
|
|
304
|
-
"finding": f["finding"],
|
|
305
|
-
"recommendation": f["recommendation"],
|
|
306
|
-
} for f in findings},
|
|
307
|
-
"type": "severity_disagreement",
|
|
308
|
-
})
|
|
309
|
-
else:
|
|
310
|
-
# Full agreement
|
|
311
|
-
agreements.append({
|
|
312
|
-
"location": findings[0]["location"],
|
|
313
|
-
"severity": findings[0]["severity"],
|
|
314
|
-
"models_agreed": list(models_involved),
|
|
315
|
-
"lenses": list(lenses_involved),
|
|
316
|
-
"findings": {f["lens"]: {
|
|
317
|
-
"finding": f["finding"],
|
|
318
|
-
"recommendation": f["recommendation"],
|
|
319
|
-
} for f in findings},
|
|
320
|
-
})
|
|
321
|
-
else:
|
|
322
|
-
# Only one model flagged this
|
|
323
|
-
f = findings[0]
|
|
324
|
-
unique_findings.append({
|
|
325
|
-
"location": f["location"],
|
|
326
|
-
"severity": f["severity"],
|
|
327
|
-
"lens": f["lens"],
|
|
328
|
-
"model": f["model"],
|
|
329
|
-
"finding": f["finding"],
|
|
330
|
-
"recommendation": f["recommendation"],
|
|
331
|
-
})
|
|
332
|
-
|
|
333
|
-
# Sort by severity
|
|
334
|
-
agreements.sort(key=lambda x: SEVERITY_ORDER.get(x["severity"], 5))
|
|
335
|
-
unique_findings.sort(key=lambda x: SEVERITY_ORDER.get(x["severity"], 5))
|
|
336
|
-
|
|
337
|
-
# Build summary
|
|
338
|
-
total = len(agreements) + len(unique_findings) + len(disagreements)
|
|
339
|
-
successful_models = [r for r in model_results if r.get("status") == "ok"]
|
|
340
|
-
failed_models = [r for r in model_results if r.get("status") != "ok"]
|
|
341
|
-
|
|
342
|
-
summary_parts = [
|
|
343
|
-
f"{len(agreements)} high-confidence finding(s)",
|
|
344
|
-
f"{len(unique_findings)} unique catch(es)",
|
|
345
|
-
f"{len(disagreements)} tradeoff(s)",
|
|
346
|
-
]
|
|
347
|
-
summary = f"{total} total findings across {len(successful_models)} models: " + ", ".join(summary_parts) + "."
|
|
348
|
-
|
|
349
|
-
if failed_models:
|
|
350
|
-
failed_names = [r.get("model_name", "unknown") for r in failed_models]
|
|
351
|
-
summary += f" ({', '.join(failed_names)} failed — results are partial.)"
|
|
352
|
-
|
|
353
|
-
if agreements:
|
|
354
|
-
critical_count = sum(1 for a in agreements if a["severity"] == "critical")
|
|
355
|
-
if critical_count:
|
|
356
|
-
summary += f" {critical_count} CRITICAL issue(s) confirmed by multiple models."
|
|
357
|
-
|
|
358
|
-
return {
|
|
359
|
-
"agreements": agreements,
|
|
360
|
-
"unique_findings": unique_findings,
|
|
361
|
-
"disagreements": disagreements,
|
|
362
|
-
"summary": summary,
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
def format_audit_output(audit_results: Dict[str, Any], synthesis: Dict[str, Any]) -> str:
|
|
367
|
-
"""Format audit results as human-readable text."""
|
|
368
|
-
lines = []
|
|
369
|
-
lines.append("=== CROSS-MODEL AUDIT ===")
|
|
370
|
-
|
|
371
|
-
target_display = audit_results.get("target_display", audit_results.get("target", "unknown"))
|
|
372
|
-
lines.append(f"Target: {target_display}")
|
|
373
|
-
|
|
374
|
-
# Model assignments
|
|
375
|
-
model_parts = []
|
|
376
|
-
for r in audit_results.get("model_results", []):
|
|
377
|
-
name = r.get("model_name", r.get("model_id", "?"))
|
|
378
|
-
lens = r.get("lens", "?")
|
|
379
|
-
status = "" if r.get("status") == "ok" else " [FAILED]"
|
|
380
|
-
model_parts.append(f"{name} ({lens}){status}")
|
|
381
|
-
lines.append(f"Models: {' | '.join(model_parts)}")
|
|
382
|
-
lines.append("")
|
|
383
|
-
|
|
384
|
-
# High confidence
|
|
385
|
-
agreements = synthesis.get("agreements", [])
|
|
386
|
-
if agreements:
|
|
387
|
-
lines.append("HIGH CONFIDENCE (2+ models agree):")
|
|
388
|
-
for a in agreements:
|
|
389
|
-
sev = a["severity"].upper()
|
|
390
|
-
loc = a["location"]
|
|
391
|
-
lines.append(f" [{sev}] {loc}")
|
|
392
|
-
for lens, detail in a.get("findings", {}).items():
|
|
393
|
-
lines.append(f" {lens.title()}: {detail['finding']}")
|
|
394
|
-
# Show first recommendation
|
|
395
|
-
recs = [d["recommendation"] for d in a.get("findings", {}).values() if d.get("recommendation")]
|
|
396
|
-
if recs:
|
|
397
|
-
lines.append(f" Action: {recs[0]}")
|
|
398
|
-
lines.append("")
|
|
399
|
-
else:
|
|
400
|
-
lines.append("HIGH CONFIDENCE: None (no multi-model agreement)")
|
|
401
|
-
lines.append("")
|
|
402
|
-
|
|
403
|
-
# Unique findings
|
|
404
|
-
unique = synthesis.get("unique_findings", [])
|
|
405
|
-
if unique:
|
|
406
|
-
lines.append("UNIQUE FINDINGS (single model):")
|
|
407
|
-
for u in unique:
|
|
408
|
-
sev = u["severity"].upper()
|
|
409
|
-
loc = u["location"]
|
|
410
|
-
lens = u["lens"].title()
|
|
411
|
-
lines.append(f" [{sev}] {loc} ({lens})")
|
|
412
|
-
lines.append(f" {u['finding']}")
|
|
413
|
-
if u.get("recommendation"):
|
|
414
|
-
lines.append(f" Recommendation: {u['recommendation']}")
|
|
415
|
-
lines.append("")
|
|
416
|
-
else:
|
|
417
|
-
lines.append("UNIQUE FINDINGS: None")
|
|
418
|
-
lines.append("")
|
|
419
|
-
|
|
420
|
-
# Disagreements
|
|
421
|
-
disagreements = synthesis.get("disagreements", [])
|
|
422
|
-
if disagreements:
|
|
423
|
-
lines.append("DISAGREEMENTS:")
|
|
424
|
-
for d in disagreements:
|
|
425
|
-
loc = d["location"]
|
|
426
|
-
lines.append(f" {loc}:")
|
|
427
|
-
for model_name, detail in d.get("models", {}).items():
|
|
428
|
-
lines.append(f" {detail['lens'].title()} ({model_name}): {detail['finding']} [severity: {detail['severity']}]")
|
|
429
|
-
lines.append(f" Tradeoff: review and decide based on your risk tolerance.")
|
|
430
|
-
lines.append("")
|
|
431
|
-
else:
|
|
432
|
-
lines.append("DISAGREEMENTS: None")
|
|
433
|
-
lines.append("")
|
|
434
|
-
|
|
435
|
-
lines.append(f"Summary: {synthesis.get('summary', 'No summary available.')}")
|
|
436
|
-
|
|
437
|
-
return "\n".join(lines)
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
def _select_models_and_lenses(
|
|
441
|
-
lenses: Optional[List[str]] = None,
|
|
442
|
-
models: Optional[List[str]] = None,
|
|
443
|
-
) -> Tuple[List[Tuple[str, Dict, str]], Optional[str]]:
|
|
444
|
-
"""Select which models get which lenses.
|
|
445
|
-
|
|
446
|
-
Returns (assignments, error).
|
|
447
|
-
Each assignment is (model_id, model_config, lens_name).
|
|
448
|
-
"""
|
|
449
|
-
from ai.deliberation import get_models_config
|
|
450
|
-
|
|
451
|
-
# Resolve lenses
|
|
452
|
-
active_lenses = list(lenses) if lenses else list(AUDIT_LENSES.keys())
|
|
453
|
-
for lens in active_lenses:
|
|
454
|
-
if lens not in AUDIT_LENSES:
|
|
455
|
-
return [], f"Unknown lens: {lens}. Available: {', '.join(AUDIT_LENSES.keys())}"
|
|
456
|
-
|
|
457
|
-
# Get available models
|
|
458
|
-
config = get_models_config(allow_hosted_fallback=True)
|
|
459
|
-
enabled = {k: v for k, v in config.items() if v.get("enabled")}
|
|
460
|
-
|
|
461
|
-
if not enabled:
|
|
462
|
-
return [], (
|
|
463
|
-
"No models available for audit. Configure API keys in ~/.delimit/models.json "
|
|
464
|
-
"or ensure hosted models are available."
|
|
465
|
-
)
|
|
466
|
-
|
|
467
|
-
# If specific models requested, filter
|
|
468
|
-
if models:
|
|
469
|
-
filtered = {}
|
|
470
|
-
for m in models:
|
|
471
|
-
m_lower = m.lower()
|
|
472
|
-
if m_lower in enabled:
|
|
473
|
-
filtered[m_lower] = enabled[m_lower]
|
|
474
|
-
else:
|
|
475
|
-
# Try partial match
|
|
476
|
-
for k, v in enabled.items():
|
|
477
|
-
if m_lower in k.lower() or m_lower in v.get("name", "").lower():
|
|
478
|
-
filtered[k] = v
|
|
479
|
-
break
|
|
480
|
-
if not filtered:
|
|
481
|
-
return [], f"None of the requested models ({', '.join(models)}) are available."
|
|
482
|
-
enabled = filtered
|
|
483
|
-
|
|
484
|
-
# Assign lenses to models round-robin
|
|
485
|
-
model_ids = list(enabled.keys())
|
|
486
|
-
assignments = []
|
|
487
|
-
for i, lens in enumerate(active_lenses):
|
|
488
|
-
model_id = model_ids[i % len(model_ids)]
|
|
489
|
-
assignments.append((model_id, enabled[model_id], lens))
|
|
490
|
-
|
|
491
|
-
return assignments, None
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
def audit(
|
|
495
|
-
target: str,
|
|
496
|
-
target_type: str = "file",
|
|
497
|
-
lenses: Optional[List[str]] = None,
|
|
498
|
-
models: Optional[List[str]] = None,
|
|
499
|
-
) -> Dict[str, Any]:
|
|
500
|
-
"""Run cross-model audit on a target.
|
|
501
|
-
|
|
502
|
-
Args:
|
|
503
|
-
target: File path, diff text, or code snippet.
|
|
504
|
-
target_type: "file", "diff", or "snippet".
|
|
505
|
-
lenses: Which lenses to apply (default: all 3).
|
|
506
|
-
models: Which models to use (default: auto-detect).
|
|
507
|
-
|
|
508
|
-
Returns:
|
|
509
|
-
Full audit results with model_results, synthesis, and formatted output.
|
|
510
|
-
"""
|
|
511
|
-
start_time = time.time()
|
|
512
|
-
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
513
|
-
|
|
514
|
-
# Resolve target
|
|
515
|
-
target_code, err = _resolve_target(target, target_type)
|
|
516
|
-
if err:
|
|
517
|
-
return {"status": "error", "error": err}
|
|
518
|
-
|
|
519
|
-
# Select models and assign lenses
|
|
520
|
-
assignments, err = _select_models_and_lenses(lenses, models)
|
|
521
|
-
if err:
|
|
522
|
-
return {"status": "error", "error": err}
|
|
523
|
-
|
|
524
|
-
# Target display name
|
|
525
|
-
if target_type == "file":
|
|
526
|
-
target_display = target
|
|
527
|
-
elif target_type == "diff":
|
|
528
|
-
first_line = target.strip().split("\n")[0][:80]
|
|
529
|
-
target_display = f"diff: {first_line}..."
|
|
530
|
-
else:
|
|
531
|
-
target_display = f"snippet ({len(target_code)} chars)"
|
|
532
|
-
|
|
533
|
-
# Call models in parallel
|
|
534
|
-
results: List[Dict[str, Any]] = [None] * len(assignments) # type: ignore
|
|
535
|
-
threads = []
|
|
536
|
-
|
|
537
|
-
def _run(index: int, model_id: str, config: Dict, lens: str):
|
|
538
|
-
results[index] = _call_model_with_lens(model_id, config, lens, target_code, target_type)
|
|
539
|
-
|
|
540
|
-
for i, (model_id, config, lens) in enumerate(assignments):
|
|
541
|
-
t = threading.Thread(target=_run, args=(i, model_id, config, lens), daemon=True)
|
|
542
|
-
threads.append(t)
|
|
543
|
-
t.start()
|
|
544
|
-
|
|
545
|
-
# Wait with timeout
|
|
546
|
-
for t in threads:
|
|
547
|
-
t.join(timeout=MODEL_TIMEOUT)
|
|
548
|
-
|
|
549
|
-
# Replace any None results (timed out threads)
|
|
550
|
-
for i, r in enumerate(results):
|
|
551
|
-
if r is None:
|
|
552
|
-
model_id, config, lens = assignments[i]
|
|
553
|
-
results[i] = {
|
|
554
|
-
"model_id": model_id,
|
|
555
|
-
"model_name": config.get("name", model_id),
|
|
556
|
-
"lens": lens,
|
|
557
|
-
"status": "error",
|
|
558
|
-
"error": "Timed out after 60 seconds",
|
|
559
|
-
"elapsed_seconds": MODEL_TIMEOUT,
|
|
560
|
-
"findings": [],
|
|
561
|
-
}
|
|
562
|
-
|
|
563
|
-
audit_results = {
|
|
564
|
-
"status": "ok",
|
|
565
|
-
"target": target if target_type != "file" else str(target),
|
|
566
|
-
"target_type": target_type,
|
|
567
|
-
"target_display": target_display,
|
|
568
|
-
"timestamp": timestamp,
|
|
569
|
-
"model_results": results,
|
|
570
|
-
"elapsed_seconds": round(time.time() - start_time, 1),
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
# Synthesize
|
|
574
|
-
synthesis_result = synthesize(audit_results)
|
|
575
|
-
audit_results["synthesis"] = synthesis_result
|
|
576
|
-
|
|
577
|
-
# Format output
|
|
578
|
-
audit_results["formatted"] = format_audit_output(audit_results, synthesis_result)
|
|
579
|
-
|
|
580
|
-
# Save to disk
|
|
581
|
-
_save_audit(audit_results, timestamp)
|
|
582
|
-
|
|
583
|
-
return audit_results
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
def _save_audit(audit_results: Dict[str, Any], timestamp: str) -> Optional[str]:
|
|
587
|
-
"""Save audit results to ~/.delimit/audits/."""
|
|
588
|
-
try:
|
|
589
|
-
AUDIT_DIR.mkdir(parents=True, exist_ok=True)
|
|
590
|
-
path = AUDIT_DIR / f"{timestamp}.json"
|
|
591
|
-
# Remove raw_response before saving (can be large)
|
|
592
|
-
save_data = json.loads(json.dumps(audit_results, default=str))
|
|
593
|
-
for r in save_data.get("model_results", []):
|
|
594
|
-
r.pop("raw_response", None)
|
|
595
|
-
path.write_text(json.dumps(save_data, indent=2, default=str))
|
|
596
|
-
audit_results["saved_to"] = str(path)
|
|
597
|
-
return str(path)
|
|
598
|
-
except Exception as e:
|
|
599
|
-
logger.warning("Failed to save audit results: %s", e)
|
|
600
|
-
return None
|