crucible-mcp 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crucible/__init__.py +1 -1
- crucible/cli.py +347 -160
- crucible/enforcement/__init__.py +40 -0
- crucible/enforcement/assertions.py +276 -0
- crucible/enforcement/models.py +107 -0
- crucible/enforcement/patterns.py +337 -0
- crucible/review/__init__.py +23 -0
- crucible/review/core.py +383 -0
- crucible/server.py +104 -327
- {crucible_mcp-0.3.0.dist-info → crucible_mcp-0.4.0.dist-info}/METADATA +10 -3
- {crucible_mcp-0.3.0.dist-info → crucible_mcp-0.4.0.dist-info}/RECORD +14 -8
- {crucible_mcp-0.3.0.dist-info → crucible_mcp-0.4.0.dist-info}/WHEEL +0 -0
- {crucible_mcp-0.3.0.dist-info → crucible_mcp-0.4.0.dist-info}/entry_points.txt +0 -0
- {crucible_mcp-0.3.0.dist-info → crucible_mcp-0.4.0.dist-info}/top_level.txt +0 -0
crucible/review/core.py
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""Core review functionality shared between CLI and MCP server."""
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from crucible.models import Domain, Severity, ToolFinding
|
|
7
|
+
from crucible.tools.delegation import (
|
|
8
|
+
delegate_bandit,
|
|
9
|
+
delegate_ruff,
|
|
10
|
+
delegate_semgrep,
|
|
11
|
+
delegate_slither,
|
|
12
|
+
get_semgrep_config,
|
|
13
|
+
)
|
|
14
|
+
from crucible.tools.git import GitContext
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def detect_domain_for_file(path: str) -> tuple[Domain, list[str]]:
|
|
18
|
+
"""Detect domain from a single file path.
|
|
19
|
+
|
|
20
|
+
Returns (domain, list of domain tags for skill matching).
|
|
21
|
+
"""
|
|
22
|
+
if path.endswith(".sol"):
|
|
23
|
+
return Domain.SMART_CONTRACT, ["solidity", "smart_contract", "web3"]
|
|
24
|
+
elif path.endswith(".vy"):
|
|
25
|
+
return Domain.SMART_CONTRACT, ["vyper", "smart_contract", "web3"]
|
|
26
|
+
elif path.endswith(".py"):
|
|
27
|
+
return Domain.BACKEND, ["python", "backend"]
|
|
28
|
+
elif path.endswith((".ts", ".tsx")):
|
|
29
|
+
return Domain.FRONTEND, ["typescript", "frontend"]
|
|
30
|
+
elif path.endswith((".js", ".jsx")):
|
|
31
|
+
return Domain.FRONTEND, ["javascript", "frontend"]
|
|
32
|
+
elif path.endswith(".go"):
|
|
33
|
+
return Domain.BACKEND, ["go", "backend"]
|
|
34
|
+
elif path.endswith(".rs"):
|
|
35
|
+
return Domain.BACKEND, ["rust", "backend"]
|
|
36
|
+
elif path.endswith((".tf", ".yaml", ".yml")):
|
|
37
|
+
return Domain.INFRASTRUCTURE, ["infrastructure", "devops"]
|
|
38
|
+
else:
|
|
39
|
+
return Domain.UNKNOWN, []
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def detect_domain(path: str) -> tuple[Domain, list[str]]:
|
|
43
|
+
"""Detect domain from file or directory path.
|
|
44
|
+
|
|
45
|
+
For directories, scans contained files and aggregates domains.
|
|
46
|
+
Returns (primary_domain, list of all domain tags).
|
|
47
|
+
"""
|
|
48
|
+
p = Path(path)
|
|
49
|
+
|
|
50
|
+
# Single file - use direct detection
|
|
51
|
+
if p.is_file():
|
|
52
|
+
return detect_domain_for_file(path)
|
|
53
|
+
|
|
54
|
+
# Directory - scan and aggregate
|
|
55
|
+
if not p.is_dir():
|
|
56
|
+
return Domain.UNKNOWN, ["unknown"]
|
|
57
|
+
|
|
58
|
+
domain_counts: Counter[Domain] = Counter()
|
|
59
|
+
all_tags: set[str] = set()
|
|
60
|
+
|
|
61
|
+
# Scan files in directory (up to 1000 to avoid huge repos)
|
|
62
|
+
file_count = 0
|
|
63
|
+
max_files = 1000
|
|
64
|
+
skip_dirs = {"node_modules", "__pycache__", "venv", ".venv", "dist", "build"}
|
|
65
|
+
|
|
66
|
+
for file_path in p.rglob("*"):
|
|
67
|
+
if file_count >= max_files:
|
|
68
|
+
break
|
|
69
|
+
if not file_path.is_file():
|
|
70
|
+
continue
|
|
71
|
+
# Skip hidden files and common non-code directories
|
|
72
|
+
if any(part.startswith(".") for part in file_path.parts):
|
|
73
|
+
continue
|
|
74
|
+
if any(part in skip_dirs for part in file_path.parts):
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
domain, tags = detect_domain_for_file(str(file_path))
|
|
78
|
+
if domain != Domain.UNKNOWN:
|
|
79
|
+
domain_counts[domain] += 1
|
|
80
|
+
all_tags.update(tags)
|
|
81
|
+
file_count += 1
|
|
82
|
+
|
|
83
|
+
# Return most common domain, or UNKNOWN if none found
|
|
84
|
+
if not domain_counts:
|
|
85
|
+
return Domain.UNKNOWN, ["unknown"]
|
|
86
|
+
|
|
87
|
+
primary_domain = domain_counts.most_common(1)[0][0]
|
|
88
|
+
return primary_domain, sorted(all_tags) if all_tags else ["unknown"]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_tools_for_domain(domain: Domain, domain_tags: list[str]) -> list[str]:
|
|
92
|
+
"""Select static analysis tools based on domain and tags."""
|
|
93
|
+
if domain == Domain.SMART_CONTRACT:
|
|
94
|
+
return ["slither", "semgrep"]
|
|
95
|
+
elif domain == Domain.BACKEND and "python" in domain_tags:
|
|
96
|
+
return ["ruff", "bandit", "semgrep"]
|
|
97
|
+
elif domain == Domain.FRONTEND:
|
|
98
|
+
return ["semgrep"]
|
|
99
|
+
else:
|
|
100
|
+
return ["semgrep"]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def run_static_analysis(
|
|
104
|
+
path: str,
|
|
105
|
+
domain: Domain,
|
|
106
|
+
domain_tags: list[str],
|
|
107
|
+
tools: list[str] | None = None,
|
|
108
|
+
) -> tuple[list[ToolFinding], list[str]]:
|
|
109
|
+
"""Run static analysis tools.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
path: File or directory to analyze
|
|
113
|
+
domain: Detected domain
|
|
114
|
+
domain_tags: Domain tags for tool selection
|
|
115
|
+
tools: Override tool selection (if None, auto-select based on domain)
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
(findings, tool_errors)
|
|
119
|
+
"""
|
|
120
|
+
if tools is None:
|
|
121
|
+
tools = get_tools_for_domain(domain, domain_tags)
|
|
122
|
+
|
|
123
|
+
all_findings: list[ToolFinding] = []
|
|
124
|
+
tool_errors: list[str] = []
|
|
125
|
+
|
|
126
|
+
if "semgrep" in tools:
|
|
127
|
+
config = get_semgrep_config(domain)
|
|
128
|
+
result = delegate_semgrep(path, config)
|
|
129
|
+
if result.is_ok:
|
|
130
|
+
all_findings.extend(result.value)
|
|
131
|
+
elif result.is_err:
|
|
132
|
+
tool_errors.append(f"semgrep: {result.error}")
|
|
133
|
+
|
|
134
|
+
if "ruff" in tools:
|
|
135
|
+
result = delegate_ruff(path)
|
|
136
|
+
if result.is_ok:
|
|
137
|
+
all_findings.extend(result.value)
|
|
138
|
+
elif result.is_err:
|
|
139
|
+
tool_errors.append(f"ruff: {result.error}")
|
|
140
|
+
|
|
141
|
+
if "slither" in tools:
|
|
142
|
+
result = delegate_slither(path)
|
|
143
|
+
if result.is_ok:
|
|
144
|
+
all_findings.extend(result.value)
|
|
145
|
+
elif result.is_err:
|
|
146
|
+
tool_errors.append(f"slither: {result.error}")
|
|
147
|
+
|
|
148
|
+
if "bandit" in tools:
|
|
149
|
+
result = delegate_bandit(path)
|
|
150
|
+
if result.is_ok:
|
|
151
|
+
all_findings.extend(result.value)
|
|
152
|
+
elif result.is_err:
|
|
153
|
+
tool_errors.append(f"bandit: {result.error}")
|
|
154
|
+
|
|
155
|
+
return all_findings, tool_errors
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def deduplicate_findings(findings: list[ToolFinding]) -> list[ToolFinding]:
|
|
159
|
+
"""Deduplicate findings by location and message.
|
|
160
|
+
|
|
161
|
+
When multiple tools report the same issue at the same location,
|
|
162
|
+
keep only the highest severity finding.
|
|
163
|
+
"""
|
|
164
|
+
seen: dict[tuple[str, str], ToolFinding] = {}
|
|
165
|
+
severity_order = [
|
|
166
|
+
Severity.CRITICAL,
|
|
167
|
+
Severity.HIGH,
|
|
168
|
+
Severity.MEDIUM,
|
|
169
|
+
Severity.LOW,
|
|
170
|
+
Severity.INFO,
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
for f in findings:
|
|
174
|
+
# Normalize the message for comparison
|
|
175
|
+
norm_msg = f.message.lower().strip()
|
|
176
|
+
key = (f.location, norm_msg)
|
|
177
|
+
|
|
178
|
+
if key not in seen:
|
|
179
|
+
seen[key] = f
|
|
180
|
+
else:
|
|
181
|
+
# Keep the higher severity finding
|
|
182
|
+
existing = seen[key]
|
|
183
|
+
if severity_order.index(f.severity) < severity_order.index(existing.severity):
|
|
184
|
+
seen[key] = f
|
|
185
|
+
|
|
186
|
+
return list(seen.values())
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def filter_findings_to_changes(
|
|
190
|
+
findings: list[ToolFinding],
|
|
191
|
+
context: GitContext,
|
|
192
|
+
include_context: bool = False,
|
|
193
|
+
) -> list[ToolFinding]:
|
|
194
|
+
"""Filter findings to only those in changed lines.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
findings: All findings from analysis
|
|
198
|
+
context: Git context with changed files and line ranges
|
|
199
|
+
include_context: Include findings within 5 lines of changes
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Filtered findings that are in or near changed lines
|
|
203
|
+
"""
|
|
204
|
+
# Build a lookup of file -> changed line ranges
|
|
205
|
+
changed_ranges: dict[str, list[tuple[int, int]]] = {}
|
|
206
|
+
for change in context.changes:
|
|
207
|
+
if change.status == "D":
|
|
208
|
+
continue # Skip deleted files
|
|
209
|
+
ranges = [(r.start, r.end) for r in change.added_lines]
|
|
210
|
+
changed_ranges[change.path] = ranges
|
|
211
|
+
|
|
212
|
+
context_lines = 5 if include_context else 0
|
|
213
|
+
filtered: list[ToolFinding] = []
|
|
214
|
+
|
|
215
|
+
for finding in findings:
|
|
216
|
+
# Parse location: "path:line" or "path:line:col"
|
|
217
|
+
parts = finding.location.split(":")
|
|
218
|
+
if len(parts) < 2:
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
file_path = parts[0]
|
|
222
|
+
try:
|
|
223
|
+
line_num = int(parts[1])
|
|
224
|
+
except ValueError:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
# Check if file is in changes (handle both absolute and relative paths)
|
|
228
|
+
matching_file = None
|
|
229
|
+
for changed_file in changed_ranges:
|
|
230
|
+
if file_path.endswith(changed_file) or changed_file.endswith(file_path):
|
|
231
|
+
matching_file = changed_file
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
if not matching_file:
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
# Check if line is in changed ranges
|
|
238
|
+
ranges = changed_ranges[matching_file]
|
|
239
|
+
in_range = False
|
|
240
|
+
for start, end in ranges:
|
|
241
|
+
if start - context_lines <= line_num <= end + context_lines:
|
|
242
|
+
in_range = True
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
if in_range:
|
|
246
|
+
filtered.append(finding)
|
|
247
|
+
|
|
248
|
+
return filtered
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def compute_severity_counts(findings: list[ToolFinding]) -> dict[str, int]:
|
|
252
|
+
"""Compute severity counts for findings."""
|
|
253
|
+
counts: dict[str, int] = {}
|
|
254
|
+
for f in findings:
|
|
255
|
+
sev = f.severity.value
|
|
256
|
+
counts[sev] = counts.get(sev, 0) + 1
|
|
257
|
+
return counts
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def load_skills_and_knowledge(
|
|
261
|
+
domain: Domain,
|
|
262
|
+
domain_tags: list[str],
|
|
263
|
+
skills_override: list[str] | None = None,
|
|
264
|
+
) -> tuple[list[tuple[str, list[str]]], dict[str, str], set[str], dict[str, str]]:
|
|
265
|
+
"""Load matched skills and linked knowledge.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
domain: Primary domain
|
|
269
|
+
domain_tags: All domain tags
|
|
270
|
+
skills_override: Override auto skill selection
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
(matched_skills, skill_content, knowledge_files, knowledge_content)
|
|
274
|
+
"""
|
|
275
|
+
from crucible.knowledge.loader import get_custom_knowledge_files, load_knowledge_file
|
|
276
|
+
from crucible.skills.loader import (
|
|
277
|
+
get_knowledge_for_skills,
|
|
278
|
+
load_skill,
|
|
279
|
+
match_skills_for_domain,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
matched_skills = match_skills_for_domain(domain, domain_tags, skills_override)
|
|
283
|
+
skill_names = [name for name, _ in matched_skills]
|
|
284
|
+
|
|
285
|
+
# Load skill content
|
|
286
|
+
skill_content: dict[str, str] = {}
|
|
287
|
+
for skill_name, _ in matched_skills:
|
|
288
|
+
result = load_skill(skill_name)
|
|
289
|
+
if result.is_ok:
|
|
290
|
+
_, content = result.value
|
|
291
|
+
# Extract content after frontmatter
|
|
292
|
+
if "\n---\n" in content:
|
|
293
|
+
skill_content[skill_name] = content.split("\n---\n", 1)[1].strip()
|
|
294
|
+
else:
|
|
295
|
+
skill_content[skill_name] = content
|
|
296
|
+
|
|
297
|
+
# Load knowledge from skills + custom project/user knowledge
|
|
298
|
+
knowledge_files = get_knowledge_for_skills(skill_names)
|
|
299
|
+
custom_knowledge = get_custom_knowledge_files()
|
|
300
|
+
knowledge_files = knowledge_files | custom_knowledge
|
|
301
|
+
|
|
302
|
+
knowledge_content: dict[str, str] = {}
|
|
303
|
+
for filename in knowledge_files:
|
|
304
|
+
result = load_knowledge_file(filename)
|
|
305
|
+
if result.is_ok:
|
|
306
|
+
knowledge_content[filename] = result.value
|
|
307
|
+
|
|
308
|
+
return matched_skills, skill_content, knowledge_files, knowledge_content
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def run_enforcement(
|
|
312
|
+
path: str,
|
|
313
|
+
content: str | None = None,
|
|
314
|
+
changed_files: list[str] | None = None,
|
|
315
|
+
repo_root: str | None = None,
|
|
316
|
+
) -> tuple[list, list[str], int, int]:
|
|
317
|
+
"""Run pattern assertions.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
path: File or directory path
|
|
321
|
+
content: File content (for single file mode)
|
|
322
|
+
changed_files: List of changed files (for git mode)
|
|
323
|
+
repo_root: Repository root path (for git mode)
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
(enforcement_findings, errors, assertions_checked, assertions_skipped)
|
|
327
|
+
"""
|
|
328
|
+
import os
|
|
329
|
+
|
|
330
|
+
from crucible.enforcement.assertions import load_assertions
|
|
331
|
+
from crucible.enforcement.models import EnforcementFinding
|
|
332
|
+
from crucible.enforcement.patterns import run_pattern_assertions
|
|
333
|
+
|
|
334
|
+
assertions, errors = load_assertions()
|
|
335
|
+
if not assertions:
|
|
336
|
+
return [], errors, 0, 0
|
|
337
|
+
|
|
338
|
+
findings: list[EnforcementFinding] = []
|
|
339
|
+
checked = 0
|
|
340
|
+
skipped = 0
|
|
341
|
+
|
|
342
|
+
if changed_files and repo_root:
|
|
343
|
+
# Git mode: check each changed file
|
|
344
|
+
for file_path in changed_files:
|
|
345
|
+
full_path = os.path.join(repo_root, file_path)
|
|
346
|
+
try:
|
|
347
|
+
with open(full_path) as f:
|
|
348
|
+
file_content = f.read()
|
|
349
|
+
f_findings, c, s = run_pattern_assertions(file_path, file_content, assertions)
|
|
350
|
+
findings.extend(f_findings)
|
|
351
|
+
checked = max(checked, c)
|
|
352
|
+
skipped = max(skipped, s)
|
|
353
|
+
except OSError:
|
|
354
|
+
pass # File may have been deleted
|
|
355
|
+
elif content is not None:
|
|
356
|
+
# Single file with provided content
|
|
357
|
+
f_findings, checked, skipped = run_pattern_assertions(path, content, assertions)
|
|
358
|
+
findings.extend(f_findings)
|
|
359
|
+
elif os.path.isfile(path):
|
|
360
|
+
# Single file
|
|
361
|
+
try:
|
|
362
|
+
with open(path) as f:
|
|
363
|
+
file_content = f.read()
|
|
364
|
+
findings, checked, skipped = run_pattern_assertions(path, file_content, assertions)
|
|
365
|
+
except OSError as e:
|
|
366
|
+
errors.append(f"Failed to read {path}: {e}")
|
|
367
|
+
elif os.path.isdir(path):
|
|
368
|
+
# Directory
|
|
369
|
+
for root, _, files in os.walk(path):
|
|
370
|
+
for fname in files:
|
|
371
|
+
fpath = os.path.join(root, fname)
|
|
372
|
+
rel_path = os.path.relpath(fpath, path)
|
|
373
|
+
try:
|
|
374
|
+
with open(fpath) as f:
|
|
375
|
+
file_content = f.read()
|
|
376
|
+
f_findings, c, s = run_pattern_assertions(rel_path, file_content, assertions)
|
|
377
|
+
findings.extend(f_findings)
|
|
378
|
+
checked = max(checked, c)
|
|
379
|
+
skipped = max(skipped, s)
|
|
380
|
+
except (OSError, UnicodeDecodeError):
|
|
381
|
+
pass # Skip unreadable files
|
|
382
|
+
|
|
383
|
+
return findings, errors, checked, skipped
|