crucible-mcp 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,454 @@
1
+ """Core review functionality shared between CLI and MCP server."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import Counter
6
+ from pathlib import Path
7
+
8
+ from crucible.enforcement.models import BudgetState, ComplianceConfig
9
+ from crucible.models import Domain, Severity, ToolFinding
10
+ from crucible.tools.delegation import (
11
+ delegate_bandit,
12
+ delegate_ruff,
13
+ delegate_semgrep,
14
+ delegate_slither,
15
+ get_semgrep_config,
16
+ )
17
+ from crucible.tools.git import GitContext
18
+
19
+
20
+ def detect_domain_for_file(path: str) -> tuple[Domain, list[str]]:
21
+ """Detect domain from a single file path.
22
+
23
+ Returns (domain, list of domain tags for skill matching).
24
+ """
25
+ if path.endswith(".sol"):
26
+ return Domain.SMART_CONTRACT, ["solidity", "smart_contract", "web3"]
27
+ elif path.endswith(".vy"):
28
+ return Domain.SMART_CONTRACT, ["vyper", "smart_contract", "web3"]
29
+ elif path.endswith(".py"):
30
+ return Domain.BACKEND, ["python", "backend"]
31
+ elif path.endswith((".ts", ".tsx")):
32
+ return Domain.FRONTEND, ["typescript", "frontend"]
33
+ elif path.endswith((".js", ".jsx")):
34
+ return Domain.FRONTEND, ["javascript", "frontend"]
35
+ elif path.endswith(".go"):
36
+ return Domain.BACKEND, ["go", "backend"]
37
+ elif path.endswith(".rs"):
38
+ return Domain.BACKEND, ["rust", "backend"]
39
+ elif path.endswith((".tf", ".yaml", ".yml")):
40
+ return Domain.INFRASTRUCTURE, ["infrastructure", "devops"]
41
+ else:
42
+ return Domain.UNKNOWN, []
43
+
44
+
45
+ def detect_domain(path: str) -> tuple[Domain, list[str]]:
46
+ """Detect domain from file or directory path.
47
+
48
+ For directories, scans contained files and aggregates domains.
49
+ Returns (primary_domain, list of all domain tags).
50
+ """
51
+ p = Path(path)
52
+
53
+ # Single file - use direct detection
54
+ if p.is_file():
55
+ return detect_domain_for_file(path)
56
+
57
+ # Directory - scan and aggregate
58
+ if not p.is_dir():
59
+ return Domain.UNKNOWN, ["unknown"]
60
+
61
+ domain_counts: Counter[Domain] = Counter()
62
+ all_tags: set[str] = set()
63
+
64
+ # Scan files in directory (up to 1000 to avoid huge repos)
65
+ file_count = 0
66
+ max_files = 1000
67
+ skip_dirs = {"node_modules", "__pycache__", "venv", ".venv", "dist", "build"}
68
+
69
+ for file_path in p.rglob("*"):
70
+ if file_count >= max_files:
71
+ break
72
+ if not file_path.is_file():
73
+ continue
74
+ # Skip hidden files and common non-code directories
75
+ if any(part.startswith(".") for part in file_path.parts):
76
+ continue
77
+ if any(part in skip_dirs for part in file_path.parts):
78
+ continue
79
+
80
+ domain, tags = detect_domain_for_file(str(file_path))
81
+ if domain != Domain.UNKNOWN:
82
+ domain_counts[domain] += 1
83
+ all_tags.update(tags)
84
+ file_count += 1
85
+
86
+ # Return most common domain, or UNKNOWN if none found
87
+ if not domain_counts:
88
+ return Domain.UNKNOWN, ["unknown"]
89
+
90
+ primary_domain = domain_counts.most_common(1)[0][0]
91
+ return primary_domain, sorted(all_tags) if all_tags else ["unknown"]
92
+
93
+
94
+ def get_tools_for_domain(domain: Domain, domain_tags: list[str]) -> list[str]:
95
+ """Select static analysis tools based on domain and tags."""
96
+ if domain == Domain.SMART_CONTRACT:
97
+ return ["slither", "semgrep"]
98
+ elif domain == Domain.BACKEND and "python" in domain_tags:
99
+ return ["ruff", "bandit", "semgrep"]
100
+ elif domain == Domain.FRONTEND:
101
+ return ["semgrep"]
102
+ else:
103
+ return ["semgrep"]
104
+
105
+
106
+ def run_static_analysis(
107
+ path: str,
108
+ domain: Domain,
109
+ domain_tags: list[str],
110
+ tools: list[str] | None = None,
111
+ ) -> tuple[list[ToolFinding], list[str]]:
112
+ """Run static analysis tools.
113
+
114
+ Args:
115
+ path: File or directory to analyze
116
+ domain: Detected domain
117
+ domain_tags: Domain tags for tool selection
118
+ tools: Override tool selection (if None, auto-select based on domain)
119
+
120
+ Returns:
121
+ (findings, tool_errors)
122
+ """
123
+ if tools is None:
124
+ tools = get_tools_for_domain(domain, domain_tags)
125
+
126
+ all_findings: list[ToolFinding] = []
127
+ tool_errors: list[str] = []
128
+
129
+ if "semgrep" in tools:
130
+ config = get_semgrep_config(domain)
131
+ result = delegate_semgrep(path, config)
132
+ if result.is_ok:
133
+ all_findings.extend(result.value)
134
+ elif result.is_err:
135
+ tool_errors.append(f"semgrep: {result.error}")
136
+
137
+ if "ruff" in tools:
138
+ result = delegate_ruff(path)
139
+ if result.is_ok:
140
+ all_findings.extend(result.value)
141
+ elif result.is_err:
142
+ tool_errors.append(f"ruff: {result.error}")
143
+
144
+ if "slither" in tools:
145
+ result = delegate_slither(path)
146
+ if result.is_ok:
147
+ all_findings.extend(result.value)
148
+ elif result.is_err:
149
+ tool_errors.append(f"slither: {result.error}")
150
+
151
+ if "bandit" in tools:
152
+ result = delegate_bandit(path)
153
+ if result.is_ok:
154
+ all_findings.extend(result.value)
155
+ elif result.is_err:
156
+ tool_errors.append(f"bandit: {result.error}")
157
+
158
+ return all_findings, tool_errors
159
+
160
+
161
+ def deduplicate_findings(findings: list[ToolFinding]) -> list[ToolFinding]:
162
+ """Deduplicate findings by location and message.
163
+
164
+ When multiple tools report the same issue at the same location,
165
+ keep only the highest severity finding.
166
+ """
167
+ seen: dict[tuple[str, str], ToolFinding] = {}
168
+ severity_order = [
169
+ Severity.CRITICAL,
170
+ Severity.HIGH,
171
+ Severity.MEDIUM,
172
+ Severity.LOW,
173
+ Severity.INFO,
174
+ ]
175
+
176
+ for f in findings:
177
+ # Normalize the message for comparison
178
+ norm_msg = f.message.lower().strip()
179
+ key = (f.location, norm_msg)
180
+
181
+ if key not in seen:
182
+ seen[key] = f
183
+ else:
184
+ # Keep the higher severity finding
185
+ existing = seen[key]
186
+ if severity_order.index(f.severity) < severity_order.index(existing.severity):
187
+ seen[key] = f
188
+
189
+ return list(seen.values())
190
+
191
+
192
+ def filter_findings_to_changes(
193
+ findings: list[ToolFinding],
194
+ context: GitContext,
195
+ include_context: bool = False,
196
+ ) -> list[ToolFinding]:
197
+ """Filter findings to only those in changed lines.
198
+
199
+ Args:
200
+ findings: All findings from analysis
201
+ context: Git context with changed files and line ranges
202
+ include_context: Include findings within 5 lines of changes
203
+
204
+ Returns:
205
+ Filtered findings that are in or near changed lines
206
+ """
207
+ # Build a lookup of file -> changed line ranges
208
+ changed_ranges: dict[str, list[tuple[int, int]]] = {}
209
+ for change in context.changes:
210
+ if change.status == "D":
211
+ continue # Skip deleted files
212
+ ranges = [(r.start, r.end) for r in change.added_lines]
213
+ changed_ranges[change.path] = ranges
214
+
215
+ context_lines = 5 if include_context else 0
216
+ filtered: list[ToolFinding] = []
217
+
218
+ for finding in findings:
219
+ # Parse location: "path:line" or "path:line:col"
220
+ parts = finding.location.split(":")
221
+ if len(parts) < 2:
222
+ continue
223
+
224
+ file_path = parts[0]
225
+ try:
226
+ line_num = int(parts[1])
227
+ except ValueError:
228
+ continue
229
+
230
+ # Check if file is in changes (handle both absolute and relative paths)
231
+ matching_file = None
232
+ for changed_file in changed_ranges:
233
+ if file_path.endswith(changed_file) or changed_file.endswith(file_path):
234
+ matching_file = changed_file
235
+ break
236
+
237
+ if not matching_file:
238
+ continue
239
+
240
+ # Check if line is in changed ranges
241
+ ranges = changed_ranges[matching_file]
242
+ in_range = False
243
+ for start, end in ranges:
244
+ if start - context_lines <= line_num <= end + context_lines:
245
+ in_range = True
246
+ break
247
+
248
+ if in_range:
249
+ filtered.append(finding)
250
+
251
+ return filtered
252
+
253
+
254
+ def compute_severity_counts(findings: list[ToolFinding]) -> dict[str, int]:
255
+ """Compute severity counts for findings."""
256
+ counts: dict[str, int] = {}
257
+ for f in findings:
258
+ sev = f.severity.value
259
+ counts[sev] = counts.get(sev, 0) + 1
260
+ return counts
261
+
262
+
263
+ def load_skills_and_knowledge(
264
+ domain: Domain,
265
+ domain_tags: list[str],
266
+ skills_override: list[str] | None = None,
267
+ ) -> tuple[list[tuple[str, list[str]]], dict[str, str], set[str], dict[str, str]]:
268
+ """Load matched skills and linked knowledge.
269
+
270
+ Args:
271
+ domain: Primary domain
272
+ domain_tags: All domain tags
273
+ skills_override: Override auto skill selection
274
+
275
+ Returns:
276
+ (matched_skills, skill_content, knowledge_files, knowledge_content)
277
+ """
278
+ from crucible.knowledge.loader import get_custom_knowledge_files, load_knowledge_file
279
+ from crucible.skills.loader import (
280
+ get_knowledge_for_skills,
281
+ load_skill,
282
+ match_skills_for_domain,
283
+ )
284
+
285
+ matched_skills = match_skills_for_domain(domain, domain_tags, skills_override)
286
+ skill_names = [name for name, _ in matched_skills]
287
+
288
+ # Load skill content
289
+ skill_content: dict[str, str] = {}
290
+ for skill_name, _ in matched_skills:
291
+ result = load_skill(skill_name)
292
+ if result.is_ok:
293
+ _, content = result.value
294
+ # Extract content after frontmatter
295
+ if "\n---\n" in content:
296
+ skill_content[skill_name] = content.split("\n---\n", 1)[1].strip()
297
+ else:
298
+ skill_content[skill_name] = content
299
+
300
+ # Load knowledge from skills + custom project/user knowledge
301
+ knowledge_files = get_knowledge_for_skills(skill_names)
302
+ custom_knowledge = get_custom_knowledge_files()
303
+ knowledge_files = knowledge_files | custom_knowledge
304
+
305
+ knowledge_content: dict[str, str] = {}
306
+ for filename in knowledge_files:
307
+ result = load_knowledge_file(filename)
308
+ if result.is_ok:
309
+ knowledge_content[filename] = result.value
310
+
311
+ return matched_skills, skill_content, knowledge_files, knowledge_content
312
+
313
+
314
+ def run_enforcement(
315
+ path: str,
316
+ content: str | None = None,
317
+ changed_files: list[str] | None = None,
318
+ repo_root: str | None = None,
319
+ compliance_config: ComplianceConfig | None = None,
320
+ ) -> tuple[list, list[str], int, int, BudgetState | None]:
321
+ """Run pattern and LLM assertions.
322
+
323
+ Args:
324
+ path: File or directory path
325
+ content: File content (for single file mode)
326
+ changed_files: List of changed files (for git mode)
327
+ repo_root: Repository root path (for git mode)
328
+ compliance_config: Configuration for LLM compliance checking (optional)
329
+
330
+ Returns:
331
+ (enforcement_findings, errors, assertions_checked, assertions_skipped, budget_state)
332
+ """
333
+ import os
334
+
335
+ from crucible.enforcement.assertions import load_assertions
336
+ from crucible.enforcement.compliance import run_llm_assertions, run_llm_assertions_batch
337
+ from crucible.enforcement.models import EnforcementFinding
338
+ from crucible.enforcement.patterns import run_pattern_assertions
339
+
340
+ assertions, errors = load_assertions()
341
+ if not assertions:
342
+ return [], errors, 0, 0, None
343
+
344
+ findings: list[EnforcementFinding] = []
345
+ checked = 0
346
+ skipped = 0
347
+ budget_state: BudgetState | None = None
348
+
349
+ # Default compliance config if not provided
350
+ if compliance_config is None:
351
+ compliance_config = ComplianceConfig()
352
+
353
+ # Collect files for batch LLM processing
354
+ files_for_llm: list[tuple[str, str]] = []
355
+
356
+ if changed_files and repo_root:
357
+ # Git mode: check each changed file
358
+ for file_path in changed_files:
359
+ full_path = os.path.join(repo_root, file_path)
360
+ try:
361
+ with open(full_path) as f:
362
+ file_content = f.read()
363
+
364
+ # Run pattern assertions
365
+ f_findings, c, s = run_pattern_assertions(file_path, file_content, assertions)
366
+ findings.extend(f_findings)
367
+ checked = max(checked, c)
368
+ skipped = max(skipped, s)
369
+
370
+ # Collect for LLM processing
371
+ if compliance_config.enabled:
372
+ files_for_llm.append((file_path, file_content))
373
+ except OSError:
374
+ pass # File may have been deleted
375
+
376
+ # Run LLM assertions in batch
377
+ if files_for_llm and compliance_config.enabled:
378
+ llm_findings, budget_state, llm_errors = run_llm_assertions_batch(
379
+ files_for_llm, assertions, compliance_config
380
+ )
381
+ findings.extend(llm_findings)
382
+ errors.extend(llm_errors)
383
+ if budget_state:
384
+ skipped += budget_state.assertions_skipped
385
+
386
+ elif content is not None:
387
+ # Single file with provided content
388
+ f_findings, checked, skipped = run_pattern_assertions(path, content, assertions)
389
+ findings.extend(f_findings)
390
+
391
+ # Run LLM assertions
392
+ if compliance_config.enabled:
393
+ llm_findings, budget_state, llm_errors = run_llm_assertions(
394
+ path, content, assertions, compliance_config
395
+ )
396
+ findings.extend(llm_findings)
397
+ errors.extend(llm_errors)
398
+ if budget_state:
399
+ skipped += budget_state.assertions_skipped
400
+
401
+ elif os.path.isfile(path):
402
+ # Single file
403
+ try:
404
+ with open(path) as f:
405
+ file_content = f.read()
406
+
407
+ p_findings, checked, skipped = run_pattern_assertions(path, file_content, assertions)
408
+ findings.extend(p_findings)
409
+
410
+ # Run LLM assertions
411
+ if compliance_config.enabled:
412
+ llm_findings, budget_state, llm_errors = run_llm_assertions(
413
+ path, file_content, assertions, compliance_config
414
+ )
415
+ findings.extend(llm_findings)
416
+ errors.extend(llm_errors)
417
+ if budget_state:
418
+ skipped += budget_state.assertions_skipped
419
+ except OSError as e:
420
+ errors.append(f"Failed to read {path}: {e}")
421
+
422
+ elif os.path.isdir(path):
423
+ # Directory - collect all files for batch processing
424
+ for root, _, files in os.walk(path):
425
+ for fname in files:
426
+ fpath = os.path.join(root, fname)
427
+ rel_path = os.path.relpath(fpath, path)
428
+ try:
429
+ with open(fpath) as f:
430
+ file_content = f.read()
431
+
432
+ # Run pattern assertions
433
+ f_findings, c, s = run_pattern_assertions(rel_path, file_content, assertions)
434
+ findings.extend(f_findings)
435
+ checked = max(checked, c)
436
+ skipped = max(skipped, s)
437
+
438
+ # Collect for LLM processing
439
+ if compliance_config.enabled:
440
+ files_for_llm.append((rel_path, file_content))
441
+ except (OSError, UnicodeDecodeError):
442
+ pass # Skip unreadable files
443
+
444
+ # Run LLM assertions in batch
445
+ if files_for_llm and compliance_config.enabled:
446
+ llm_findings, budget_state, llm_errors = run_llm_assertions_batch(
447
+ files_for_llm, assertions, compliance_config
448
+ )
449
+ findings.extend(llm_findings)
450
+ errors.extend(llm_errors)
451
+ if budget_state:
452
+ skipped += budget_state.assertions_skipped
453
+
454
+ return findings, errors, checked, skipped, budget_state