crucible-mcp 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crucible/server.py CHANGED
@@ -7,8 +7,13 @@ from mcp.server import Server
7
7
  from mcp.server.stdio import stdio_server
8
8
  from mcp.types import TextContent, Tool
9
9
 
10
- from crucible.knowledge.loader import load_principles
11
- from crucible.models import Domain, Severity, ToolFinding
10
+ from crucible.knowledge.loader import (
11
+ get_custom_knowledge_files,
12
+ load_all_knowledge,
13
+ load_principles,
14
+ )
15
+ from crucible.models import Domain, FullReviewResult, Severity, ToolFinding
16
+ from crucible.skills import get_knowledge_for_skills, load_skill, match_skills_for_domain
12
17
  from crucible.tools.delegation import (
13
18
  check_all_tools,
14
19
  delegate_bandit,
@@ -17,6 +22,15 @@ from crucible.tools.delegation import (
17
22
  delegate_slither,
18
23
  get_semgrep_config,
19
24
  )
25
+ from crucible.tools.git import (
26
+ GitContext,
27
+ get_branch_diff,
28
+ get_changed_files,
29
+ get_recent_commits,
30
+ get_repo_root,
31
+ get_staged_changes,
32
+ get_unstaged_changes,
33
+ )
20
34
 
21
35
  server = Server("crucible")
22
36
 
@@ -47,13 +61,86 @@ def _format_findings(findings: list[ToolFinding]) -> str:
47
61
  return "\n".join(parts) if parts else "No findings."
48
62
 
49
63
 
64
+ def _deduplicate_findings(findings: list[ToolFinding]) -> list[ToolFinding]:
65
+ """Deduplicate findings by location and message.
66
+
67
+ When multiple tools report the same issue at the same location,
68
+ keep only the highest severity finding.
69
+ """
70
+ # Group by (location, normalized_message)
71
+ seen: dict[tuple[str, str], ToolFinding] = {}
72
+
73
+ for f in findings:
74
+ # Normalize the message for comparison (lowercase, strip whitespace)
75
+ norm_msg = f.message.lower().strip()
76
+ key = (f.location, norm_msg)
77
+
78
+ if key not in seen:
79
+ seen[key] = f
80
+ else:
81
+ # Keep the higher severity finding
82
+ existing = seen[key]
83
+ severity_order = [
84
+ Severity.CRITICAL,
85
+ Severity.HIGH,
86
+ Severity.MEDIUM,
87
+ Severity.LOW,
88
+ Severity.INFO,
89
+ ]
90
+ if severity_order.index(f.severity) < severity_order.index(existing.severity):
91
+ seen[key] = f
92
+
93
+ return list(seen.values())
94
+
95
+
50
96
  @server.list_tools() # type: ignore[misc]
51
97
  async def list_tools() -> list[Tool]:
52
98
  """List available tools."""
53
99
  return [
100
+ Tool(
101
+ name="review",
102
+ description="Unified code review tool. Supports path-based review OR git-aware review. Runs static analysis, matches skills, loads knowledge.",
103
+ inputSchema={
104
+ "type": "object",
105
+ "properties": {
106
+ "path": {
107
+ "type": "string",
108
+ "description": "File or directory path to review. If not provided, uses git mode.",
109
+ },
110
+ "mode": {
111
+ "type": "string",
112
+ "enum": ["staged", "unstaged", "branch", "commits"],
113
+ "description": "Git mode: staged (about to commit), unstaged (working dir), branch (PR diff), commits (recent N)",
114
+ },
115
+ "base": {
116
+ "type": "string",
117
+ "description": "Base branch for 'branch' mode (default: main) or commit count for 'commits' mode (default: 1)",
118
+ },
119
+ "include_context": {
120
+ "type": "boolean",
121
+ "description": "For git modes: include findings near (within 5 lines of) changes (default: false)",
122
+ },
123
+ "skills": {
124
+ "type": "array",
125
+ "items": {"type": "string"},
126
+ "description": "Override skill selection (default: auto-detect based on domain)",
127
+ },
128
+ "include_skills": {
129
+ "type": "boolean",
130
+ "description": "Load skills and checklists (default: true). Set false for quick analysis only.",
131
+ "default": True,
132
+ },
133
+ "include_knowledge": {
134
+ "type": "boolean",
135
+ "description": "Load knowledge files (default: true). Set false for quick analysis only.",
136
+ "default": True,
137
+ },
138
+ },
139
+ },
140
+ ),
54
141
  Tool(
55
142
  name="quick_review",
56
- description="Run static analysis tools on code. Returns findings with domain metadata for skill selection.",
143
+ description="[DEPRECATED: use review(path, include_skills=false)] Run static analysis only.",
57
144
  inputSchema={
58
145
  "type": "object",
59
146
  "properties": {
@@ -70,6 +157,57 @@ async def list_tools() -> list[Tool]:
70
157
  "required": ["path"],
71
158
  },
72
159
  ),
160
+ Tool(
161
+ name="full_review",
162
+ description="[DEPRECATED: use review(path)] Comprehensive code review with skills and knowledge.",
163
+ inputSchema={
164
+ "type": "object",
165
+ "properties": {
166
+ "path": {
167
+ "type": "string",
168
+ "description": "File or directory path to review",
169
+ },
170
+ "skills": {
171
+ "type": "array",
172
+ "items": {"type": "string"},
173
+ "description": "Override skill selection (default: auto-detect based on domain)",
174
+ },
175
+ "include_sage": {
176
+ "type": "boolean",
177
+ "description": "Include Sage knowledge recall (not yet implemented)",
178
+ "default": True,
179
+ },
180
+ },
181
+ "required": ["path"],
182
+ },
183
+ ),
184
+ Tool(
185
+ name="review_changes",
186
+ description="[DEPRECATED: use review(mode='staged')] Review git changes.",
187
+ inputSchema={
188
+ "type": "object",
189
+ "properties": {
190
+ "mode": {
191
+ "type": "string",
192
+ "enum": ["staged", "unstaged", "branch", "commits"],
193
+ "description": "What changes to review",
194
+ },
195
+ "base": {
196
+ "type": "string",
197
+ "description": "Base branch for 'branch' mode or commit count for 'commits' mode",
198
+ },
199
+ "path": {
200
+ "type": "string",
201
+ "description": "Repository path (default: current directory)",
202
+ },
203
+ "include_context": {
204
+ "type": "boolean",
205
+ "description": "Include findings near changes (default: false)",
206
+ },
207
+ },
208
+ "required": ["mode"],
209
+ },
210
+ ),
73
211
  Tool(
74
212
  name="get_principles",
75
213
  description="Load engineering principles by topic",
@@ -157,9 +295,368 @@ async def list_tools() -> list[Tool]:
157
295
  "properties": {},
158
296
  },
159
297
  ),
298
+ Tool(
299
+ name="load_knowledge",
300
+ description="Load knowledge/principles files without running static analysis. Useful for getting guidance on patterns, best practices, or domain-specific knowledge. Automatically includes project and user knowledge files.",
301
+ inputSchema={
302
+ "type": "object",
303
+ "properties": {
304
+ "files": {
305
+ "type": "array",
306
+ "items": {"type": "string"},
307
+ "description": "Specific knowledge files to load (e.g., ['SECURITY.md', 'SMART_CONTRACT.md']). If not specified, loads all project/user knowledge files.",
308
+ },
309
+ "include_bundled": {
310
+ "type": "boolean",
311
+ "description": "Include bundled knowledge files in addition to project/user files (default: false)",
312
+ "default": False,
313
+ },
314
+ "topic": {
315
+ "type": "string",
316
+ "description": "Load by topic instead of files: 'security', 'engineering', 'smart_contract', 'checklist', 'repo_hygiene'",
317
+ },
318
+ },
319
+ },
320
+ ),
160
321
  ]
161
322
 
162
323
 
324
+ def _run_static_analysis(
325
+ path: str,
326
+ domain: Domain,
327
+ domain_tags: list[str],
328
+ ) -> tuple[list[ToolFinding], list[str]]:
329
+ """Run static analysis tools based on domain.
330
+
331
+ Returns (findings, tool_errors).
332
+ """
333
+ # Select tools based on domain
334
+ if domain == Domain.SMART_CONTRACT:
335
+ tools = ["slither", "semgrep"]
336
+ elif domain == Domain.BACKEND and "python" in domain_tags:
337
+ tools = ["ruff", "bandit", "semgrep"]
338
+ elif domain == Domain.FRONTEND:
339
+ tools = ["semgrep"]
340
+ else:
341
+ tools = ["semgrep"]
342
+
343
+ all_findings: list[ToolFinding] = []
344
+ tool_errors: list[str] = []
345
+
346
+ if "semgrep" in tools:
347
+ config = get_semgrep_config(domain)
348
+ result = delegate_semgrep(path, config)
349
+ if result.is_ok:
350
+ all_findings.extend(result.value)
351
+ elif result.is_err:
352
+ tool_errors.append(f"semgrep: {result.error}")
353
+
354
+ if "ruff" in tools:
355
+ result = delegate_ruff(path)
356
+ if result.is_ok:
357
+ all_findings.extend(result.value)
358
+ elif result.is_err:
359
+ tool_errors.append(f"ruff: {result.error}")
360
+
361
+ if "slither" in tools:
362
+ result = delegate_slither(path)
363
+ if result.is_ok:
364
+ all_findings.extend(result.value)
365
+ elif result.is_err:
366
+ tool_errors.append(f"slither: {result.error}")
367
+
368
+ if "bandit" in tools:
369
+ result = delegate_bandit(path)
370
+ if result.is_ok:
371
+ all_findings.extend(result.value)
372
+ elif result.is_err:
373
+ tool_errors.append(f"bandit: {result.error}")
374
+
375
+ return all_findings, tool_errors
376
+
377
+
378
+ def _load_skills_and_knowledge(
379
+ domain: Domain,
380
+ domain_tags: list[str],
381
+ skills_override: list[str] | None = None,
382
+ ) -> tuple[list[tuple[str, list[str]]], dict[str, str], set[str], dict[str, str]]:
383
+ """Load matched skills and linked knowledge.
384
+
385
+ Returns (matched_skills, skill_content, knowledge_files, knowledge_content).
386
+ """
387
+ from crucible.knowledge.loader import load_knowledge_file
388
+ from crucible.skills.loader import (
389
+ get_knowledge_for_skills,
390
+ load_skill,
391
+ match_skills_for_domain,
392
+ )
393
+
394
+ matched_skills = match_skills_for_domain(domain, domain_tags, skills_override)
395
+ skill_names = [name for name, _ in matched_skills]
396
+
397
+ # Load skill content
398
+ skill_content: dict[str, str] = {}
399
+ for skill_name, _ in matched_skills:
400
+ result = load_skill(skill_name)
401
+ if result.is_ok:
402
+ _, content = result.value
403
+ # Extract content after frontmatter
404
+ if "\n---\n" in content:
405
+ skill_content[skill_name] = content.split("\n---\n", 1)[1].strip()
406
+ else:
407
+ skill_content[skill_name] = content
408
+
409
+ # Load knowledge from skills + custom project/user knowledge
410
+ knowledge_files = get_knowledge_for_skills(skill_names)
411
+ custom_knowledge = get_custom_knowledge_files()
412
+ knowledge_files = knowledge_files | custom_knowledge
413
+
414
+ knowledge_content: dict[str, str] = {}
415
+ for filename in knowledge_files:
416
+ result = load_knowledge_file(filename)
417
+ if result.is_ok:
418
+ knowledge_content[filename] = result.value
419
+
420
+ return matched_skills, skill_content, knowledge_files, knowledge_content
421
+
422
+
423
+ def _format_review_output(
424
+ path: str | None,
425
+ git_context: GitContext | None,
426
+ domains: list[str],
427
+ severity_counts: dict[str, int],
428
+ findings: list[ToolFinding],
429
+ tool_errors: list[str],
430
+ matched_skills: list[tuple[str, list[str]]] | None,
431
+ skill_content: dict[str, str] | None,
432
+ knowledge_files: set[str] | None,
433
+ knowledge_content: dict[str, str] | None,
434
+ ) -> str:
435
+ """Format unified review output."""
436
+ parts: list[str] = ["# Code Review\n"]
437
+
438
+ # Header based on mode
439
+ if git_context:
440
+ parts.append(f"**Mode:** {git_context.mode}")
441
+ if git_context.base_ref:
442
+ parts.append(f"**Base:** {git_context.base_ref}")
443
+ elif path:
444
+ parts.append(f"**Path:** `{path}`")
445
+
446
+ parts.append(f"**Domains:** {', '.join(domains)}")
447
+ parts.append(f"**Severity summary:** {severity_counts or 'No findings'}\n")
448
+
449
+ # Files changed (git mode)
450
+ if git_context and git_context.changes:
451
+ added = [c for c in git_context.changes if c.status == "A"]
452
+ modified = [c for c in git_context.changes if c.status == "M"]
453
+ deleted = [c for c in git_context.changes if c.status == "D"]
454
+ renamed = [c for c in git_context.changes if c.status == "R"]
455
+
456
+ total = len(git_context.changes)
457
+ parts.append(f"## Files Changed ({total})")
458
+ for c in added:
459
+ parts.append(f"- `+` {c.path}")
460
+ for c in modified:
461
+ parts.append(f"- `~` {c.path}")
462
+ for c in renamed:
463
+ parts.append(f"- `R` {c.old_path} -> {c.path}")
464
+ for c in deleted:
465
+ parts.append(f"- `-` {c.path}")
466
+ parts.append("")
467
+
468
+ # Commit messages
469
+ if git_context.commit_messages:
470
+ parts.append("## Commits")
471
+ for msg in git_context.commit_messages:
472
+ parts.append(f"- {msg}")
473
+ parts.append("")
474
+
475
+ # Tool errors
476
+ if tool_errors:
477
+ parts.append("## Tool Errors\n")
478
+ for error in tool_errors:
479
+ parts.append(f"- {error}")
480
+ parts.append("")
481
+
482
+ # Applicable skills
483
+ if matched_skills:
484
+ parts.append("## Applicable Skills\n")
485
+ for skill_name, triggers in matched_skills:
486
+ parts.append(f"- **{skill_name}**: matched on {', '.join(triggers)}")
487
+ parts.append("")
488
+
489
+ # Knowledge loaded
490
+ if knowledge_files:
491
+ parts.append("## Knowledge Loaded\n")
492
+ parts.append(f"Files: {', '.join(sorted(knowledge_files))}")
493
+ parts.append("")
494
+
495
+ # Findings
496
+ parts.append("## Static Analysis Findings\n")
497
+ if findings:
498
+ parts.append(_format_findings(findings))
499
+ else:
500
+ parts.append("No issues found.")
501
+ parts.append("")
502
+
503
+ # Review checklists from skills
504
+ if skill_content:
505
+ parts.append("---\n")
506
+ parts.append("## Review Checklists\n")
507
+ for skill_name, content in skill_content.items():
508
+ parts.append(f"### {skill_name}\n")
509
+ parts.append(content)
510
+ parts.append("")
511
+
512
+ # Knowledge reference
513
+ if knowledge_content:
514
+ parts.append("---\n")
515
+ parts.append("## Principles Reference\n")
516
+ for filename, content in sorted(knowledge_content.items()):
517
+ parts.append(f"### {filename}\n")
518
+ parts.append(content)
519
+ parts.append("")
520
+
521
+ return "\n".join(parts)
522
+
523
+
524
+ def _handle_review(arguments: dict[str, Any]) -> list[TextContent]:
525
+ """Handle unified review tool."""
526
+ import os
527
+
528
+ path = arguments.get("path")
529
+ mode = arguments.get("mode")
530
+ base = arguments.get("base")
531
+ include_context = arguments.get("include_context", False)
532
+ skills_override = arguments.get("skills")
533
+ include_skills = arguments.get("include_skills", True)
534
+ include_knowledge = arguments.get("include_knowledge", True)
535
+
536
+ # Determine if this is path-based or git-based review
537
+ git_context: GitContext | None = None
538
+ changed_files: list[str] = []
539
+
540
+ if mode:
541
+ # Git-based review
542
+ repo_path = path if path else os.getcwd()
543
+ root_result = get_repo_root(repo_path)
544
+ if root_result.is_err:
545
+ return [TextContent(type="text", text=f"Error: {root_result.error}")]
546
+ repo_path = root_result.value
547
+
548
+ # Get git context based on mode
549
+ if mode == "staged":
550
+ context_result = get_staged_changes(repo_path)
551
+ elif mode == "unstaged":
552
+ context_result = get_unstaged_changes(repo_path)
553
+ elif mode == "branch":
554
+ base_branch = base if base else "main"
555
+ context_result = get_branch_diff(repo_path, base_branch)
556
+ elif mode == "commits":
557
+ try:
558
+ count = int(base) if base else 1
559
+ except ValueError:
560
+ return [TextContent(type="text", text=f"Error: Invalid commit count '{base}'")]
561
+ context_result = get_recent_commits(repo_path, count)
562
+ else:
563
+ return [TextContent(type="text", text=f"Error: Unknown mode '{mode}'")]
564
+
565
+ if context_result.is_err:
566
+ return [TextContent(type="text", text=f"Error: {context_result.error}")]
567
+
568
+ git_context = context_result.value
569
+
570
+ if not git_context.changes:
571
+ if mode == "staged":
572
+ return [TextContent(type="text", text="No changes to review. Stage files with `git add` first.")]
573
+ elif mode == "unstaged":
574
+ return [TextContent(type="text", text="No unstaged changes to review.")]
575
+ else:
576
+ return [TextContent(type="text", text="No changes found.")]
577
+
578
+ changed_files = get_changed_files(git_context)
579
+ if not changed_files:
580
+ return [TextContent(type="text", text="No files to analyze (only deletions).")]
581
+
582
+ elif not path:
583
+ return [TextContent(type="text", text="Error: Either 'path' or 'mode' is required.")]
584
+
585
+ # Detect domains and run analysis
586
+ all_findings: list[ToolFinding] = []
587
+ tool_errors: list[str] = []
588
+ domains_detected: set[Domain] = set()
589
+ all_domain_tags: set[str] = set()
590
+
591
+ if git_context:
592
+ # Git mode: analyze each changed file
593
+ repo_path = get_repo_root(path if path else os.getcwd()).value
594
+ for file_path in changed_files:
595
+ full_path = f"{repo_path}/{file_path}"
596
+ domain, domain_tags = _detect_domain(file_path)
597
+ domains_detected.add(domain)
598
+ all_domain_tags.update(domain_tags)
599
+
600
+ findings, errors = _run_static_analysis(full_path, domain, domain_tags)
601
+ all_findings.extend(findings)
602
+ tool_errors.extend([f"{e} ({file_path})" for e in errors])
603
+
604
+ # Filter findings to changed lines
605
+ all_findings = _filter_findings_to_changes(all_findings, git_context, include_context)
606
+ else:
607
+ # Path mode: analyze the path directly
608
+ domain, domain_tags = _detect_domain(path)
609
+ domains_detected.add(domain)
610
+ all_domain_tags.update(domain_tags)
611
+
612
+ findings, errors = _run_static_analysis(path, domain, domain_tags)
613
+ all_findings.extend(findings)
614
+ tool_errors.extend(errors)
615
+
616
+ # Deduplicate findings
617
+ all_findings = _deduplicate_findings(all_findings)
618
+
619
+ # Compute severity summary
620
+ severity_counts: dict[str, int] = {}
621
+ for f in all_findings:
622
+ sev = f.severity.value
623
+ severity_counts[sev] = severity_counts.get(sev, 0) + 1
624
+
625
+ # Load skills and knowledge
626
+ matched_skills: list[tuple[str, list[str]]] | None = None
627
+ skill_content: dict[str, str] | None = None
628
+ knowledge_files: set[str] | None = None
629
+ knowledge_content: dict[str, str] | None = None
630
+
631
+ if include_skills or include_knowledge:
632
+ primary_domain = next(iter(domains_detected)) if domains_detected else Domain.UNKNOWN
633
+ matched, s_content, k_files, k_content = _load_skills_and_knowledge(
634
+ primary_domain, list(all_domain_tags), skills_override
635
+ )
636
+ if include_skills:
637
+ matched_skills = matched
638
+ skill_content = s_content
639
+ if include_knowledge:
640
+ knowledge_files = k_files
641
+ knowledge_content = k_content
642
+
643
+ # Format output
644
+ output = _format_review_output(
645
+ path,
646
+ git_context,
647
+ list(all_domain_tags) if all_domain_tags else ["unknown"],
648
+ severity_counts,
649
+ all_findings,
650
+ tool_errors,
651
+ matched_skills,
652
+ skill_content,
653
+ knowledge_files,
654
+ knowledge_content,
655
+ )
656
+
657
+ return [TextContent(type="text", text=output)]
658
+
659
+
163
660
  def _handle_get_principles(arguments: dict[str, Any]) -> list[TextContent]:
164
661
  """Handle get_principles tool."""
165
662
  topic = arguments.get("topic")
@@ -170,6 +667,41 @@ def _handle_get_principles(arguments: dict[str, Any]) -> list[TextContent]:
170
667
  return [TextContent(type="text", text=f"Error: {result.error}")]
171
668
 
172
669
 
670
+ def _handle_load_knowledge(arguments: dict[str, Any]) -> list[TextContent]:
671
+ """Handle load_knowledge tool."""
672
+ files = arguments.get("files")
673
+ include_bundled = arguments.get("include_bundled", False)
674
+ topic = arguments.get("topic")
675
+
676
+ # If topic specified, use load_principles
677
+ if topic:
678
+ result = load_principles(topic)
679
+ if result.is_ok:
680
+ return [TextContent(type="text", text=result.value)]
681
+ return [TextContent(type="text", text=f"Error: {result.error}")]
682
+
683
+ # Otherwise load by files
684
+ filenames = set(files) if files else None
685
+ loaded, content = load_all_knowledge(
686
+ include_bundled=include_bundled,
687
+ filenames=filenames,
688
+ )
689
+
690
+ if not loaded:
691
+ if filenames:
692
+ return [TextContent(type="text", text=f"No knowledge files found matching: {', '.join(sorted(filenames))}")]
693
+ return [TextContent(type="text", text="No knowledge files found. Add files to .crucible/knowledge/ or ~/.claude/crucible/knowledge/")]
694
+
695
+ output_parts = [
696
+ "# Knowledge Loaded\n",
697
+ f"**Files:** {', '.join(loaded)}\n",
698
+ "---\n",
699
+ content,
700
+ ]
701
+
702
+ return [TextContent(type="text", text="\n".join(output_parts))]
703
+
704
+
173
705
  def _handle_delegate_semgrep(arguments: dict[str, Any]) -> list[TextContent]:
174
706
  """Handle delegate_semgrep tool."""
175
707
  path = arguments.get("path", "")
@@ -241,8 +773,8 @@ def _handle_check_tools(arguments: dict[str, Any]) -> list[TextContent]:
241
773
  return [TextContent(type="text", text="\n".join(parts))]
242
774
 
243
775
 
244
- def _detect_domain(path: str) -> tuple[Domain, list[str]]:
245
- """Internal domain detection from file path.
776
+ def _detect_domain_for_file(path: str) -> tuple[Domain, list[str]]:
777
+ """Detect domain from a single file path.
246
778
 
247
779
  Returns (domain, list of domain tags for skill matching).
248
780
  """
@@ -263,8 +795,59 @@ def _detect_domain(path: str) -> tuple[Domain, list[str]]:
263
795
  elif path.endswith((".tf", ".yaml", ".yml")):
264
796
  return Domain.INFRASTRUCTURE, ["infrastructure", "devops"]
265
797
  else:
798
+ return Domain.UNKNOWN, []
799
+
800
+
801
+ def _detect_domain(path: str) -> tuple[Domain, list[str]]:
802
+ """Detect domain from file or directory path.
803
+
804
+ For directories, scans contained files and aggregates domains.
805
+ Returns (primary_domain, list of all domain tags).
806
+ """
807
+ from collections import Counter
808
+ from pathlib import Path
809
+
810
+ p = Path(path)
811
+
812
+ # Single file - use direct detection
813
+ if p.is_file():
814
+ return _detect_domain_for_file(path)
815
+
816
+ # Directory - scan and aggregate
817
+ if not p.is_dir():
818
+ return Domain.UNKNOWN, ["unknown"]
819
+
820
+ domain_counts: Counter[Domain] = Counter()
821
+ all_tags: set[str] = set()
822
+
823
+ # Scan files in directory (up to 1000 to avoid huge repos)
824
+ file_count = 0
825
+ max_files = 1000
826
+
827
+ for file_path in p.rglob("*"):
828
+ if file_count >= max_files:
829
+ break
830
+ if not file_path.is_file():
831
+ continue
832
+ # Skip hidden files and common non-code directories
833
+ if any(part.startswith(".") for part in file_path.parts):
834
+ continue
835
+ if any(part in ("node_modules", "__pycache__", "venv", ".venv", "dist", "build") for part in file_path.parts):
836
+ continue
837
+
838
+ domain, tags = _detect_domain_for_file(str(file_path))
839
+ if domain != Domain.UNKNOWN:
840
+ domain_counts[domain] += 1
841
+ all_tags.update(tags)
842
+ file_count += 1
843
+
844
+ # Return most common domain, or UNKNOWN if none found
845
+ if not domain_counts:
266
846
  return Domain.UNKNOWN, ["unknown"]
267
847
 
848
+ primary_domain = domain_counts.most_common(1)[0][0]
849
+ return primary_domain, sorted(all_tags) if all_tags else ["unknown"]
850
+
268
851
 
269
852
  def _handle_quick_review(arguments: dict[str, Any]) -> list[TextContent]:
270
853
  """Handle quick_review tool - returns findings with domain metadata."""
@@ -324,6 +907,9 @@ def _handle_quick_review(arguments: dict[str, Any]) -> list[TextContent]:
324
907
  else:
325
908
  tool_results.append(f"## Bandit\nError: {result.error}")
326
909
 
910
+ # Deduplicate findings
911
+ all_findings = _deduplicate_findings(all_findings)
912
+
327
913
  # Compute severity summary
328
914
  severity_counts: dict[str, int] = {}
329
915
  for f in all_findings:
@@ -341,16 +927,483 @@ def _handle_quick_review(arguments: dict[str, Any]) -> list[TextContent]:
341
927
  return [TextContent(type="text", text="\n".join(output_parts))]
342
928
 
343
929
 
930
+ def _filter_findings_to_changes(
931
+ findings: list[ToolFinding],
932
+ context: GitContext,
933
+ include_context: bool = False,
934
+ ) -> list[ToolFinding]:
935
+ """Filter findings to only those in changed lines."""
936
+ # Build a lookup of file -> changed line ranges
937
+ changed_ranges: dict[str, list[tuple[int, int]]] = {}
938
+ for change in context.changes:
939
+ if change.status == "D":
940
+ continue # Skip deleted files
941
+ ranges = [(r.start, r.end) for r in change.added_lines]
942
+ changed_ranges[change.path] = ranges
943
+
944
+ context_lines = 5 if include_context else 0
945
+ filtered: list[ToolFinding] = []
946
+
947
+ for finding in findings:
948
+ # Parse location: "path:line" or "path:line:col"
949
+ parts = finding.location.split(":")
950
+ if len(parts) < 2:
951
+ continue
952
+
953
+ file_path = parts[0]
954
+ try:
955
+ line_num = int(parts[1])
956
+ except ValueError:
957
+ continue
958
+
959
+ # Check if file is in changes
960
+ # Handle both absolute and relative paths
961
+ matching_file = None
962
+ for changed_file in changed_ranges:
963
+ if file_path.endswith(changed_file) or changed_file.endswith(file_path):
964
+ matching_file = changed_file
965
+ break
966
+
967
+ if not matching_file:
968
+ continue
969
+
970
+ # Check if line is in changed ranges
971
+ ranges = changed_ranges[matching_file]
972
+ in_range = False
973
+ for start, end in ranges:
974
+ if start - context_lines <= line_num <= end + context_lines:
975
+ in_range = True
976
+ break
977
+
978
+ if in_range:
979
+ filtered.append(finding)
980
+
981
+ return filtered
982
+
983
+
984
+ def _format_change_review(
985
+ context: GitContext,
986
+ findings: list[ToolFinding],
987
+ severity_counts: dict[str, int],
988
+ tool_errors: list[str] | None = None,
989
+ matched_skills: list[tuple[str, list[str]]] | None = None,
990
+ skill_content: dict[str, str] | None = None,
991
+ knowledge_files: set[str] | None = None,
992
+ knowledge_content: dict[str, str] | None = None,
993
+ ) -> str:
994
+ """Format change review output."""
995
+ parts: list[str] = ["# Change Review\n"]
996
+ parts.append(f"**Mode:** {context.mode}")
997
+ if context.base_ref:
998
+ parts.append(f"**Base:** {context.base_ref}")
999
+ parts.append("")
1000
+
1001
+ # Files changed
1002
+ added = [c for c in context.changes if c.status == "A"]
1003
+ modified = [c for c in context.changes if c.status == "M"]
1004
+ deleted = [c for c in context.changes if c.status == "D"]
1005
+ renamed = [c for c in context.changes if c.status == "R"]
1006
+
1007
+ total = len(context.changes)
1008
+ parts.append(f"## Files Changed ({total})")
1009
+ for c in added:
1010
+ parts.append(f"- `+` {c.path}")
1011
+ for c in modified:
1012
+ parts.append(f"- `~` {c.path}")
1013
+ for c in renamed:
1014
+ parts.append(f"- `R` {c.old_path} -> {c.path}")
1015
+ for c in deleted:
1016
+ parts.append(f"- `-` {c.path}")
1017
+ parts.append("")
1018
+
1019
+ # Commit messages (if available)
1020
+ if context.commit_messages:
1021
+ parts.append("## Commits")
1022
+ for msg in context.commit_messages:
1023
+ parts.append(f"- {msg}")
1024
+ parts.append("")
1025
+
1026
+ # Applicable skills
1027
+ if matched_skills:
1028
+ parts.append("## Applicable Skills\n")
1029
+ for skill_name, triggers in matched_skills:
1030
+ parts.append(f"- **{skill_name}**: matched on {', '.join(triggers)}")
1031
+ parts.append("")
1032
+
1033
+ # Knowledge loaded
1034
+ if knowledge_files:
1035
+ parts.append("## Knowledge Loaded\n")
1036
+ parts.append(f"Files: {', '.join(sorted(knowledge_files))}")
1037
+ parts.append("")
1038
+
1039
+ # Tool errors (if any)
1040
+ if tool_errors:
1041
+ parts.append("## Tool Errors\n")
1042
+ for error in tool_errors:
1043
+ parts.append(f"- {error}")
1044
+ parts.append("")
1045
+
1046
+ # Findings
1047
+ if findings:
1048
+ parts.append("## Findings in Changed Code\n")
1049
+ parts.append(f"**Summary:** {severity_counts}\n")
1050
+ parts.append(_format_findings(findings))
1051
+ else:
1052
+ parts.append("## Findings in Changed Code\n")
1053
+ parts.append("No issues found in changed code.")
1054
+ parts.append("")
1055
+
1056
+ # Review checklists from skills
1057
+ if skill_content:
1058
+ parts.append("---\n")
1059
+ parts.append("## Review Checklists\n")
1060
+ for skill_name, content in skill_content.items():
1061
+ parts.append(f"### {skill_name}\n")
1062
+ parts.append(content)
1063
+ parts.append("")
1064
+
1065
+ # Knowledge reference
1066
+ if knowledge_content:
1067
+ parts.append("---\n")
1068
+ parts.append("## Principles Reference\n")
1069
+ for filename, content in sorted(knowledge_content.items()):
1070
+ parts.append(f"### {filename}\n")
1071
+ parts.append(content)
1072
+ parts.append("")
1073
+
1074
+ return "\n".join(parts)
1075
+
1076
+
1077
+ def _handle_review_changes(arguments: dict[str, Any]) -> list[TextContent]:
1078
+ """Handle review_changes tool - review git changes."""
1079
+ import os
1080
+
1081
+ mode = arguments.get("mode", "staged")
1082
+ base = arguments.get("base")
1083
+ path = arguments.get("path", os.getcwd())
1084
+ include_context = arguments.get("include_context", False)
1085
+
1086
+ # Get repo root
1087
+ root_result = get_repo_root(path)
1088
+ if root_result.is_err:
1089
+ return [TextContent(type="text", text=f"Error: {root_result.error}")]
1090
+
1091
+ repo_path = root_result.value
1092
+
1093
+ # Get git context based on mode
1094
+ if mode == "staged":
1095
+ context_result = get_staged_changes(repo_path)
1096
+ elif mode == "unstaged":
1097
+ context_result = get_unstaged_changes(repo_path)
1098
+ elif mode == "branch":
1099
+ base_branch = base if base else "main"
1100
+ context_result = get_branch_diff(repo_path, base_branch)
1101
+ elif mode == "commits":
1102
+ try:
1103
+ count = int(base) if base else 1
1104
+ except ValueError:
1105
+ return [TextContent(type="text", text=f"Error: Invalid commit count '{base}'")]
1106
+ context_result = get_recent_commits(repo_path, count)
1107
+ else:
1108
+ return [TextContent(type="text", text=f"Error: Unknown mode '{mode}'")]
1109
+
1110
+ if context_result.is_err:
1111
+ return [TextContent(type="text", text=f"Error: {context_result.error}")]
1112
+
1113
+ context = context_result.value
1114
+
1115
+ # Check if there are any changes
1116
+ if not context.changes:
1117
+ if mode == "staged":
1118
+ return [TextContent(type="text", text="No changes to review. Stage files with `git add` first.")]
1119
+ elif mode == "unstaged":
1120
+ return [TextContent(type="text", text="No unstaged changes to review.")]
1121
+ else:
1122
+ return [TextContent(type="text", text="No changes found.")]
1123
+
1124
+ # Get changed files (excluding deleted)
1125
+ changed_files = get_changed_files(context)
1126
+ if not changed_files:
1127
+ return [TextContent(type="text", text="No files to analyze (only deletions).")]
1128
+
1129
+ # Run analysis on changed files
1130
+ all_findings: list[ToolFinding] = []
1131
+ tool_errors: list[str] = []
1132
+ domains_detected: set[Domain] = set()
1133
+ all_domain_tags: set[str] = set()
1134
+
1135
+ for file_path in changed_files:
1136
+ full_path = f"{repo_path}/{file_path}"
1137
+
1138
+ # Detect domain for this file
1139
+ domain, domain_tags = _detect_domain(file_path)
1140
+ domains_detected.add(domain)
1141
+ all_domain_tags.update(domain_tags)
1142
+
1143
+ # Select tools based on domain
1144
+ if domain == Domain.SMART_CONTRACT:
1145
+ tools = ["slither", "semgrep"]
1146
+ elif domain == Domain.BACKEND and "python" in domain_tags:
1147
+ tools = ["ruff", "bandit", "semgrep"]
1148
+ elif domain == Domain.FRONTEND:
1149
+ tools = ["semgrep"]
1150
+ else:
1151
+ tools = ["semgrep"]
1152
+
1153
+ # Run tools
1154
+ if "semgrep" in tools:
1155
+ config = get_semgrep_config(domain)
1156
+ result = delegate_semgrep(full_path, config)
1157
+ if result.is_ok:
1158
+ all_findings.extend(result.value)
1159
+ elif result.is_err:
1160
+ tool_errors.append(f"semgrep ({file_path}): {result.error}")
1161
+
1162
+ if "ruff" in tools:
1163
+ result = delegate_ruff(full_path)
1164
+ if result.is_ok:
1165
+ all_findings.extend(result.value)
1166
+ elif result.is_err:
1167
+ tool_errors.append(f"ruff ({file_path}): {result.error}")
1168
+
1169
+ if "slither" in tools:
1170
+ result = delegate_slither(full_path)
1171
+ if result.is_ok:
1172
+ all_findings.extend(result.value)
1173
+ elif result.is_err:
1174
+ tool_errors.append(f"slither ({file_path}): {result.error}")
1175
+
1176
+ if "bandit" in tools:
1177
+ result = delegate_bandit(full_path)
1178
+ if result.is_ok:
1179
+ all_findings.extend(result.value)
1180
+ elif result.is_err:
1181
+ tool_errors.append(f"bandit ({file_path}): {result.error}")
1182
+
1183
+ # Filter findings to changed lines
1184
+ filtered_findings = _filter_findings_to_changes(all_findings, context, include_context)
1185
+
1186
+ # Deduplicate findings
1187
+ filtered_findings = _deduplicate_findings(filtered_findings)
1188
+
1189
+ # Compute severity summary
1190
+ severity_counts: dict[str, int] = {}
1191
+ for f in filtered_findings:
1192
+ sev = f.severity.value
1193
+ severity_counts[sev] = severity_counts.get(sev, 0) + 1
1194
+
1195
+ # Match skills and load knowledge based on detected domains
1196
+ from crucible.knowledge.loader import load_knowledge_file
1197
+ from crucible.skills.loader import (
1198
+ get_knowledge_for_skills,
1199
+ load_skill,
1200
+ match_skills_for_domain,
1201
+ )
1202
+
1203
+ primary_domain = next(iter(domains_detected)) if domains_detected else Domain.UNKNOWN
1204
+ matched_skills = match_skills_for_domain(
1205
+ primary_domain, list(all_domain_tags), override=None
1206
+ )
1207
+
1208
+ skill_names = [name for name, _ in matched_skills]
1209
+ skill_content: dict[str, str] = {}
1210
+ for skill_name, _triggers in matched_skills:
1211
+ result = load_skill(skill_name)
1212
+ if result.is_ok:
1213
+ _, content = result.value
1214
+ skill_content[skill_name] = content
1215
+
1216
+ knowledge_files = get_knowledge_for_skills(skill_names)
1217
+ knowledge_content: dict[str, str] = {}
1218
+ for filename in knowledge_files:
1219
+ result = load_knowledge_file(filename)
1220
+ if result.is_ok:
1221
+ knowledge_content[filename] = result.value
1222
+
1223
+ # Format output
1224
+ output = _format_change_review(
1225
+ context,
1226
+ filtered_findings,
1227
+ severity_counts,
1228
+ tool_errors,
1229
+ matched_skills,
1230
+ skill_content,
1231
+ knowledge_files,
1232
+ knowledge_content,
1233
+ )
1234
+ return [TextContent(type="text", text=output)]
1235
+
1236
+
1237
+ def _handle_full_review(arguments: dict[str, Any]) -> list[TextContent]:
1238
+ """Handle full_review tool - comprehensive code review."""
1239
+ path = arguments.get("path", "")
1240
+ skills_override = arguments.get("skills")
1241
+ # include_sage is accepted but not yet implemented
1242
+ # _ = arguments.get("include_sage", True)
1243
+
1244
+ # 1. Detect domain
1245
+ domain, domain_tags = _detect_domain(path)
1246
+
1247
+ # 2. Run static analysis (reuse quick_review logic)
1248
+ if domain == Domain.SMART_CONTRACT:
1249
+ default_tools = ["slither", "semgrep"]
1250
+ elif domain == Domain.BACKEND and "python" in domain_tags:
1251
+ default_tools = ["ruff", "bandit", "semgrep"]
1252
+ elif domain == Domain.FRONTEND:
1253
+ default_tools = ["semgrep"]
1254
+ else:
1255
+ default_tools = ["semgrep"]
1256
+
1257
+ all_findings: list[ToolFinding] = []
1258
+ tool_errors: list[str] = []
1259
+
1260
+ if "semgrep" in default_tools:
1261
+ config = get_semgrep_config(domain)
1262
+ result = delegate_semgrep(path, config)
1263
+ if result.is_ok:
1264
+ all_findings.extend(result.value)
1265
+ elif result.is_err:
1266
+ tool_errors.append(f"semgrep: {result.error}")
1267
+
1268
+ if "ruff" in default_tools:
1269
+ result = delegate_ruff(path)
1270
+ if result.is_ok:
1271
+ all_findings.extend(result.value)
1272
+ elif result.is_err:
1273
+ tool_errors.append(f"ruff: {result.error}")
1274
+
1275
+ if "slither" in default_tools:
1276
+ result = delegate_slither(path)
1277
+ if result.is_ok:
1278
+ all_findings.extend(result.value)
1279
+ elif result.is_err:
1280
+ tool_errors.append(f"slither: {result.error}")
1281
+
1282
+ if "bandit" in default_tools:
1283
+ result = delegate_bandit(path)
1284
+ if result.is_ok:
1285
+ all_findings.extend(result.value)
1286
+ elif result.is_err:
1287
+ tool_errors.append(f"bandit: {result.error}")
1288
+
1289
+ # 3. Match applicable skills
1290
+ matched_skills = match_skills_for_domain(domain, domain_tags, skills_override)
1291
+ skill_names = [name for name, _ in matched_skills]
1292
+ skill_triggers: dict[str, tuple[str, ...]] = {
1293
+ name: tuple(triggers) for name, triggers in matched_skills
1294
+ }
1295
+
1296
+ # 4. Load skill content (checklists/prompts)
1297
+ skill_contents: dict[str, str] = {}
1298
+ for skill_name in skill_names:
1299
+ result = load_skill(skill_name)
1300
+ if result.is_ok:
1301
+ _, content = result.value
1302
+ # Extract content after frontmatter
1303
+ if "\n---\n" in content:
1304
+ skill_contents[skill_name] = content.split("\n---\n", 1)[1].strip()
1305
+ else:
1306
+ skill_contents[skill_name] = content
1307
+
1308
+ # 5. Collect knowledge files from matched skills + custom project/user knowledge
1309
+ skill_knowledge = get_knowledge_for_skills(skill_names)
1310
+ custom_knowledge = get_custom_knowledge_files()
1311
+ # Merge: custom knowledge always included, plus skill-referenced files
1312
+ knowledge_files = skill_knowledge | custom_knowledge
1313
+
1314
+ # 6. Load knowledge content
1315
+ loaded_files, principles_content = load_all_knowledge(
1316
+ include_bundled=False,
1317
+ filenames=knowledge_files,
1318
+ )
1319
+
1320
+ # 7. Deduplicate findings
1321
+ all_findings = _deduplicate_findings(all_findings)
1322
+
1323
+ # 8. Compute severity summary
1324
+ severity_counts: dict[str, int] = {}
1325
+ for f in all_findings:
1326
+ sev = f.severity.value
1327
+ severity_counts[sev] = severity_counts.get(sev, 0) + 1
1328
+
1329
+ # 8. Build result
1330
+ review_result = FullReviewResult(
1331
+ domains_detected=tuple(domain_tags),
1332
+ severity_summary=severity_counts,
1333
+ findings=tuple(all_findings),
1334
+ applicable_skills=tuple(skill_names),
1335
+ skill_triggers_matched=skill_triggers,
1336
+ principles_loaded=tuple(loaded_files),
1337
+ principles_content=principles_content,
1338
+ sage_knowledge=None, # Not implemented yet
1339
+ sage_query_used=None, # Not implemented yet
1340
+ )
1341
+
1342
+ # 8. Format output
1343
+ output_parts = [
1344
+ "# Full Review Results\n",
1345
+ f"**Path:** `{path}`",
1346
+ f"**Domains detected:** {', '.join(review_result.domains_detected)}",
1347
+ f"**Severity summary:** {review_result.severity_summary or 'No findings'}\n",
1348
+ ]
1349
+
1350
+ if tool_errors:
1351
+ output_parts.append("## Tool Errors\n")
1352
+ for error in tool_errors:
1353
+ output_parts.append(f"- {error}")
1354
+ output_parts.append("")
1355
+
1356
+ output_parts.append("## Applicable Skills\n")
1357
+ if review_result.applicable_skills:
1358
+ for skill in review_result.applicable_skills:
1359
+ triggers = review_result.skill_triggers_matched.get(skill, ())
1360
+ output_parts.append(f"- **{skill}**: matched on {', '.join(triggers)}")
1361
+ else:
1362
+ output_parts.append("- No skills matched")
1363
+ output_parts.append("")
1364
+
1365
+ # Include skill checklists
1366
+ if skill_contents:
1367
+ output_parts.append("## Review Checklists\n")
1368
+ for skill_name, content in skill_contents.items():
1369
+ output_parts.append(f"### {skill_name}\n")
1370
+ output_parts.append(content)
1371
+ output_parts.append("")
1372
+
1373
+ output_parts.append("## Knowledge Loaded\n")
1374
+ if review_result.principles_loaded:
1375
+ output_parts.append(f"Files: {', '.join(review_result.principles_loaded)}\n")
1376
+ else:
1377
+ output_parts.append("No knowledge files loaded.\n")
1378
+
1379
+ output_parts.append("## Static Analysis Findings\n")
1380
+ output_parts.append(_format_findings(list(review_result.findings)))
1381
+
1382
+ if review_result.principles_content:
1383
+ output_parts.append("\n---\n")
1384
+ output_parts.append("## Principles Reference\n")
1385
+ output_parts.append(review_result.principles_content)
1386
+
1387
+ return [TextContent(type="text", text="\n".join(output_parts))]
1388
+
1389
+
344
1390
  @server.call_tool() # type: ignore[misc]
345
1391
  async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
346
1392
  """Handle tool calls."""
347
1393
  handlers = {
1394
+ # Unified review tool
1395
+ "review": _handle_review,
1396
+ # Deprecated tools (kept for backwards compatibility)
1397
+ "quick_review": _handle_quick_review,
1398
+ "full_review": _handle_full_review,
1399
+ "review_changes": _handle_review_changes,
1400
+ # Other tools
348
1401
  "get_principles": _handle_get_principles,
1402
+ "load_knowledge": _handle_load_knowledge,
349
1403
  "delegate_semgrep": _handle_delegate_semgrep,
350
1404
  "delegate_ruff": _handle_delegate_ruff,
351
1405
  "delegate_slither": _handle_delegate_slither,
352
1406
  "delegate_bandit": _handle_delegate_bandit,
353
- "quick_review": _handle_quick_review,
354
1407
  "check_tools": _handle_check_tools,
355
1408
  }
356
1409