crucible-mcp 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,383 @@
1
+ """Core review functionality shared between CLI and MCP server."""
2
+
3
+ from collections import Counter
4
+ from pathlib import Path
5
+
6
+ from crucible.models import Domain, Severity, ToolFinding
7
+ from crucible.tools.delegation import (
8
+ delegate_bandit,
9
+ delegate_ruff,
10
+ delegate_semgrep,
11
+ delegate_slither,
12
+ get_semgrep_config,
13
+ )
14
+ from crucible.tools.git import GitContext
15
+
16
+
17
+ def detect_domain_for_file(path: str) -> tuple[Domain, list[str]]:
18
+ """Detect domain from a single file path.
19
+
20
+ Returns (domain, list of domain tags for skill matching).
21
+ """
22
+ if path.endswith(".sol"):
23
+ return Domain.SMART_CONTRACT, ["solidity", "smart_contract", "web3"]
24
+ elif path.endswith(".vy"):
25
+ return Domain.SMART_CONTRACT, ["vyper", "smart_contract", "web3"]
26
+ elif path.endswith(".py"):
27
+ return Domain.BACKEND, ["python", "backend"]
28
+ elif path.endswith((".ts", ".tsx")):
29
+ return Domain.FRONTEND, ["typescript", "frontend"]
30
+ elif path.endswith((".js", ".jsx")):
31
+ return Domain.FRONTEND, ["javascript", "frontend"]
32
+ elif path.endswith(".go"):
33
+ return Domain.BACKEND, ["go", "backend"]
34
+ elif path.endswith(".rs"):
35
+ return Domain.BACKEND, ["rust", "backend"]
36
+ elif path.endswith((".tf", ".yaml", ".yml")):
37
+ return Domain.INFRASTRUCTURE, ["infrastructure", "devops"]
38
+ else:
39
+ return Domain.UNKNOWN, []
40
+
41
+
42
+ def detect_domain(path: str) -> tuple[Domain, list[str]]:
43
+ """Detect domain from file or directory path.
44
+
45
+ For directories, scans contained files and aggregates domains.
46
+ Returns (primary_domain, list of all domain tags).
47
+ """
48
+ p = Path(path)
49
+
50
+ # Single file - use direct detection
51
+ if p.is_file():
52
+ return detect_domain_for_file(path)
53
+
54
+ # Directory - scan and aggregate
55
+ if not p.is_dir():
56
+ return Domain.UNKNOWN, ["unknown"]
57
+
58
+ domain_counts: Counter[Domain] = Counter()
59
+ all_tags: set[str] = set()
60
+
61
+ # Scan files in directory (up to 1000 to avoid huge repos)
62
+ file_count = 0
63
+ max_files = 1000
64
+ skip_dirs = {"node_modules", "__pycache__", "venv", ".venv", "dist", "build"}
65
+
66
+ for file_path in p.rglob("*"):
67
+ if file_count >= max_files:
68
+ break
69
+ if not file_path.is_file():
70
+ continue
71
+ # Skip hidden files and common non-code directories
72
+ if any(part.startswith(".") for part in file_path.parts):
73
+ continue
74
+ if any(part in skip_dirs for part in file_path.parts):
75
+ continue
76
+
77
+ domain, tags = detect_domain_for_file(str(file_path))
78
+ if domain != Domain.UNKNOWN:
79
+ domain_counts[domain] += 1
80
+ all_tags.update(tags)
81
+ file_count += 1
82
+
83
+ # Return most common domain, or UNKNOWN if none found
84
+ if not domain_counts:
85
+ return Domain.UNKNOWN, ["unknown"]
86
+
87
+ primary_domain = domain_counts.most_common(1)[0][0]
88
+ return primary_domain, sorted(all_tags) if all_tags else ["unknown"]
89
+
90
+
91
+ def get_tools_for_domain(domain: Domain, domain_tags: list[str]) -> list[str]:
92
+ """Select static analysis tools based on domain and tags."""
93
+ if domain == Domain.SMART_CONTRACT:
94
+ return ["slither", "semgrep"]
95
+ elif domain == Domain.BACKEND and "python" in domain_tags:
96
+ return ["ruff", "bandit", "semgrep"]
97
+ elif domain == Domain.FRONTEND:
98
+ return ["semgrep"]
99
+ else:
100
+ return ["semgrep"]
101
+
102
+
103
+ def run_static_analysis(
104
+ path: str,
105
+ domain: Domain,
106
+ domain_tags: list[str],
107
+ tools: list[str] | None = None,
108
+ ) -> tuple[list[ToolFinding], list[str]]:
109
+ """Run static analysis tools.
110
+
111
+ Args:
112
+ path: File or directory to analyze
113
+ domain: Detected domain
114
+ domain_tags: Domain tags for tool selection
115
+ tools: Override tool selection (if None, auto-select based on domain)
116
+
117
+ Returns:
118
+ (findings, tool_errors)
119
+ """
120
+ if tools is None:
121
+ tools = get_tools_for_domain(domain, domain_tags)
122
+
123
+ all_findings: list[ToolFinding] = []
124
+ tool_errors: list[str] = []
125
+
126
+ if "semgrep" in tools:
127
+ config = get_semgrep_config(domain)
128
+ result = delegate_semgrep(path, config)
129
+ if result.is_ok:
130
+ all_findings.extend(result.value)
131
+ elif result.is_err:
132
+ tool_errors.append(f"semgrep: {result.error}")
133
+
134
+ if "ruff" in tools:
135
+ result = delegate_ruff(path)
136
+ if result.is_ok:
137
+ all_findings.extend(result.value)
138
+ elif result.is_err:
139
+ tool_errors.append(f"ruff: {result.error}")
140
+
141
+ if "slither" in tools:
142
+ result = delegate_slither(path)
143
+ if result.is_ok:
144
+ all_findings.extend(result.value)
145
+ elif result.is_err:
146
+ tool_errors.append(f"slither: {result.error}")
147
+
148
+ if "bandit" in tools:
149
+ result = delegate_bandit(path)
150
+ if result.is_ok:
151
+ all_findings.extend(result.value)
152
+ elif result.is_err:
153
+ tool_errors.append(f"bandit: {result.error}")
154
+
155
+ return all_findings, tool_errors
156
+
157
+
158
+ def deduplicate_findings(findings: list[ToolFinding]) -> list[ToolFinding]:
159
+ """Deduplicate findings by location and message.
160
+
161
+ When multiple tools report the same issue at the same location,
162
+ keep only the highest severity finding.
163
+ """
164
+ seen: dict[tuple[str, str], ToolFinding] = {}
165
+ severity_order = [
166
+ Severity.CRITICAL,
167
+ Severity.HIGH,
168
+ Severity.MEDIUM,
169
+ Severity.LOW,
170
+ Severity.INFO,
171
+ ]
172
+
173
+ for f in findings:
174
+ # Normalize the message for comparison
175
+ norm_msg = f.message.lower().strip()
176
+ key = (f.location, norm_msg)
177
+
178
+ if key not in seen:
179
+ seen[key] = f
180
+ else:
181
+ # Keep the higher severity finding
182
+ existing = seen[key]
183
+ if severity_order.index(f.severity) < severity_order.index(existing.severity):
184
+ seen[key] = f
185
+
186
+ return list(seen.values())
187
+
188
+
189
+ def filter_findings_to_changes(
190
+ findings: list[ToolFinding],
191
+ context: GitContext,
192
+ include_context: bool = False,
193
+ ) -> list[ToolFinding]:
194
+ """Filter findings to only those in changed lines.
195
+
196
+ Args:
197
+ findings: All findings from analysis
198
+ context: Git context with changed files and line ranges
199
+ include_context: Include findings within 5 lines of changes
200
+
201
+ Returns:
202
+ Filtered findings that are in or near changed lines
203
+ """
204
+ # Build a lookup of file -> changed line ranges
205
+ changed_ranges: dict[str, list[tuple[int, int]]] = {}
206
+ for change in context.changes:
207
+ if change.status == "D":
208
+ continue # Skip deleted files
209
+ ranges = [(r.start, r.end) for r in change.added_lines]
210
+ changed_ranges[change.path] = ranges
211
+
212
+ context_lines = 5 if include_context else 0
213
+ filtered: list[ToolFinding] = []
214
+
215
+ for finding in findings:
216
+ # Parse location: "path:line" or "path:line:col"
217
+ parts = finding.location.split(":")
218
+ if len(parts) < 2:
219
+ continue
220
+
221
+ file_path = parts[0]
222
+ try:
223
+ line_num = int(parts[1])
224
+ except ValueError:
225
+ continue
226
+
227
+ # Check if file is in changes (handle both absolute and relative paths)
228
+ matching_file = None
229
+ for changed_file in changed_ranges:
230
+ if file_path.endswith(changed_file) or changed_file.endswith(file_path):
231
+ matching_file = changed_file
232
+ break
233
+
234
+ if not matching_file:
235
+ continue
236
+
237
+ # Check if line is in changed ranges
238
+ ranges = changed_ranges[matching_file]
239
+ in_range = False
240
+ for start, end in ranges:
241
+ if start - context_lines <= line_num <= end + context_lines:
242
+ in_range = True
243
+ break
244
+
245
+ if in_range:
246
+ filtered.append(finding)
247
+
248
+ return filtered
249
+
250
+
251
+ def compute_severity_counts(findings: list[ToolFinding]) -> dict[str, int]:
252
+ """Compute severity counts for findings."""
253
+ counts: dict[str, int] = {}
254
+ for f in findings:
255
+ sev = f.severity.value
256
+ counts[sev] = counts.get(sev, 0) + 1
257
+ return counts
258
+
259
+
260
+ def load_skills_and_knowledge(
261
+ domain: Domain,
262
+ domain_tags: list[str],
263
+ skills_override: list[str] | None = None,
264
+ ) -> tuple[list[tuple[str, list[str]]], dict[str, str], set[str], dict[str, str]]:
265
+ """Load matched skills and linked knowledge.
266
+
267
+ Args:
268
+ domain: Primary domain
269
+ domain_tags: All domain tags
270
+ skills_override: Override auto skill selection
271
+
272
+ Returns:
273
+ (matched_skills, skill_content, knowledge_files, knowledge_content)
274
+ """
275
+ from crucible.knowledge.loader import get_custom_knowledge_files, load_knowledge_file
276
+ from crucible.skills.loader import (
277
+ get_knowledge_for_skills,
278
+ load_skill,
279
+ match_skills_for_domain,
280
+ )
281
+
282
+ matched_skills = match_skills_for_domain(domain, domain_tags, skills_override)
283
+ skill_names = [name for name, _ in matched_skills]
284
+
285
+ # Load skill content
286
+ skill_content: dict[str, str] = {}
287
+ for skill_name, _ in matched_skills:
288
+ result = load_skill(skill_name)
289
+ if result.is_ok:
290
+ _, content = result.value
291
+ # Extract content after frontmatter
292
+ if "\n---\n" in content:
293
+ skill_content[skill_name] = content.split("\n---\n", 1)[1].strip()
294
+ else:
295
+ skill_content[skill_name] = content
296
+
297
+ # Load knowledge from skills + custom project/user knowledge
298
+ knowledge_files = get_knowledge_for_skills(skill_names)
299
+ custom_knowledge = get_custom_knowledge_files()
300
+ knowledge_files = knowledge_files | custom_knowledge
301
+
302
+ knowledge_content: dict[str, str] = {}
303
+ for filename in knowledge_files:
304
+ result = load_knowledge_file(filename)
305
+ if result.is_ok:
306
+ knowledge_content[filename] = result.value
307
+
308
+ return matched_skills, skill_content, knowledge_files, knowledge_content
309
+
310
+
311
+ def run_enforcement(
312
+ path: str,
313
+ content: str | None = None,
314
+ changed_files: list[str] | None = None,
315
+ repo_root: str | None = None,
316
+ ) -> tuple[list, list[str], int, int]:
317
+ """Run pattern assertions.
318
+
319
+ Args:
320
+ path: File or directory path
321
+ content: File content (for single file mode)
322
+ changed_files: List of changed files (for git mode)
323
+ repo_root: Repository root path (for git mode)
324
+
325
+ Returns:
326
+ (enforcement_findings, errors, assertions_checked, assertions_skipped)
327
+ """
328
+ import os
329
+
330
+ from crucible.enforcement.assertions import load_assertions
331
+ from crucible.enforcement.models import EnforcementFinding
332
+ from crucible.enforcement.patterns import run_pattern_assertions
333
+
334
+ assertions, errors = load_assertions()
335
+ if not assertions:
336
+ return [], errors, 0, 0
337
+
338
+ findings: list[EnforcementFinding] = []
339
+ checked = 0
340
+ skipped = 0
341
+
342
+ if changed_files and repo_root:
343
+ # Git mode: check each changed file
344
+ for file_path in changed_files:
345
+ full_path = os.path.join(repo_root, file_path)
346
+ try:
347
+ with open(full_path) as f:
348
+ file_content = f.read()
349
+ f_findings, c, s = run_pattern_assertions(file_path, file_content, assertions)
350
+ findings.extend(f_findings)
351
+ checked = max(checked, c)
352
+ skipped = max(skipped, s)
353
+ except OSError:
354
+ pass # File may have been deleted
355
+ elif content is not None:
356
+ # Single file with provided content
357
+ f_findings, checked, skipped = run_pattern_assertions(path, content, assertions)
358
+ findings.extend(f_findings)
359
+ elif os.path.isfile(path):
360
+ # Single file
361
+ try:
362
+ with open(path) as f:
363
+ file_content = f.read()
364
+ findings, checked, skipped = run_pattern_assertions(path, file_content, assertions)
365
+ except OSError as e:
366
+ errors.append(f"Failed to read {path}: {e}")
367
+ elif os.path.isdir(path):
368
+ # Directory
369
+ for root, _, files in os.walk(path):
370
+ for fname in files:
371
+ fpath = os.path.join(root, fname)
372
+ rel_path = os.path.relpath(fpath, path)
373
+ try:
374
+ with open(fpath) as f:
375
+ file_content = f.read()
376
+ f_findings, c, s = run_pattern_assertions(rel_path, file_content, assertions)
377
+ findings.extend(f_findings)
378
+ checked = max(checked, c)
379
+ skipped = max(skipped, s)
380
+ except (OSError, UnicodeDecodeError):
381
+ pass # Skip unreadable files
382
+
383
+ return findings, errors, checked, skipped