crucible-mcp 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,281 @@
1
+ """Skill loading and matching for full_review.
2
+
3
+ Skills follow cascade resolution:
4
+ 1. Project: .crucible/skills/
5
+ 2. User: ~/.claude/crucible/skills/
6
+ 3. Bundled: package skills/
7
+ """
8
+
9
+ import re
10
+ from dataclasses import dataclass
11
+ from functools import lru_cache
12
+ from pathlib import Path
13
+
14
+ from crucible.errors import Result, err, ok
15
+ from crucible.models import Domain
16
+
17
+ # Skill directories (cascade priority)
18
+ SKILLS_BUNDLED = Path(__file__).parent
19
+ SKILLS_USER = Path.home() / ".claude" / "crucible" / "skills"
20
+ SKILLS_PROJECT = Path(".crucible") / "skills"
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class SkillMetadata:
25
+ """Parsed skill frontmatter metadata."""
26
+
27
+ name: str
28
+ version: str
29
+ triggers: tuple[str, ...]
30
+ always_run: bool
31
+ always_run_for_domains: tuple[str, ...]
32
+ knowledge: tuple[str, ...]
33
+
34
+
35
+ def parse_skill_frontmatter(content: str) -> Result[SkillMetadata, str]:
36
+ """Parse YAML frontmatter from skill markdown content.
37
+
38
+ Args:
39
+ content: Full skill file content with frontmatter
40
+
41
+ Returns:
42
+ Result containing SkillMetadata or error message
43
+ """
44
+ # Check for frontmatter delimiters
45
+ if not content.startswith("---"):
46
+ return err("No frontmatter found (file must start with ---)")
47
+
48
+ # Find the closing delimiter
49
+ end_match = re.search(r"\n---\s*\n", content)
50
+ if not end_match:
51
+ return err("No closing frontmatter delimiter found")
52
+
53
+ frontmatter = content[3 : end_match.start()]
54
+
55
+ # Parse simple YAML (we don't need a full parser for this format)
56
+ data: dict[str, str | list[str] | bool] = {}
57
+
58
+ for line in frontmatter.strip().split("\n"):
59
+ line = line.strip()
60
+ if not line or line.startswith("#"):
61
+ continue
62
+
63
+ if ":" not in line:
64
+ continue
65
+
66
+ key, value = line.split(":", 1)
67
+ key = key.strip()
68
+ value = value.strip()
69
+
70
+ # Handle list values: [item1, item2, ...]
71
+ if value.startswith("[") and value.endswith("]"):
72
+ items = value[1:-1].split(",")
73
+ data[key] = [item.strip() for item in items if item.strip()]
74
+ # Handle boolean values
75
+ elif value.lower() == "true":
76
+ data[key] = True
77
+ elif value.lower() == "false":
78
+ data[key] = False
79
+ # Handle quoted strings
80
+ elif value.startswith('"') and value.endswith('"'):
81
+ data[key] = value[1:-1]
82
+ else:
83
+ data[key] = value
84
+
85
+ # Build SkillMetadata
86
+ version = data.get("version", "1.0")
87
+ if isinstance(version, list):
88
+ version = version[0] if version else "1.0"
89
+
90
+ triggers_raw = data.get("triggers", [])
91
+ triggers = tuple(triggers_raw) if isinstance(triggers_raw, list) else ()
92
+
93
+ always_run = data.get("always_run", False)
94
+ if not isinstance(always_run, bool):
95
+ always_run = False
96
+
97
+ always_run_for_domains_raw = data.get("always_run_for_domains", [])
98
+ always_run_for_domains = (
99
+ tuple(always_run_for_domains_raw)
100
+ if isinstance(always_run_for_domains_raw, list)
101
+ else ()
102
+ )
103
+
104
+ knowledge_raw = data.get("knowledge", [])
105
+ knowledge = tuple(knowledge_raw) if isinstance(knowledge_raw, list) else ()
106
+
107
+ return ok(
108
+ SkillMetadata(
109
+ name="", # Will be set by load_skill
110
+ version=str(version),
111
+ triggers=triggers,
112
+ always_run=always_run,
113
+ always_run_for_domains=always_run_for_domains,
114
+ knowledge=knowledge,
115
+ )
116
+ )
117
+
118
+
119
+ def resolve_skill_path(skill_name: str) -> tuple[Path | None, str]:
120
+ """Find skill directory with cascade priority.
121
+
122
+ Returns (path, source) where source is 'project', 'user', or 'bundled'.
123
+ """
124
+ # 1. Project-level (highest priority)
125
+ project_path = SKILLS_PROJECT / skill_name / "SKILL.md"
126
+ if project_path.exists():
127
+ return project_path, "project"
128
+
129
+ # 2. User-level
130
+ user_path = SKILLS_USER / skill_name / "SKILL.md"
131
+ if user_path.exists():
132
+ return user_path, "user"
133
+
134
+ # 3. Bundled (lowest priority)
135
+ bundled_path = SKILLS_BUNDLED / skill_name / "SKILL.md"
136
+ if bundled_path.exists():
137
+ return bundled_path, "bundled"
138
+
139
+ return None, ""
140
+
141
+
142
+ def get_all_skill_names() -> set[str]:
143
+ """Get all available skill names from all sources."""
144
+ names: set[str] = set()
145
+
146
+ for source_dir in [SKILLS_BUNDLED, SKILLS_USER, SKILLS_PROJECT]:
147
+ if source_dir.exists():
148
+ for item in source_dir.iterdir():
149
+ if item.is_dir() and (item / "SKILL.md").exists():
150
+ names.add(item.name)
151
+
152
+ return names
153
+
154
+
155
+ @lru_cache(maxsize=64)
156
+ def _load_skill_cached(skill_name: str, path_str: str) -> tuple[SkillMetadata, str] | str:
157
+ """Internal cached skill loader.
158
+
159
+ Returns tuple on success, error string on failure.
160
+ Using path_str as cache key to invalidate on path changes.
161
+ """
162
+ path = Path(path_str)
163
+ content = path.read_text()
164
+ result = parse_skill_frontmatter(content)
165
+
166
+ if result.is_err:
167
+ return f"Failed to parse skill '{skill_name}': {result.error}"
168
+
169
+ metadata = SkillMetadata(
170
+ name=skill_name,
171
+ version=result.value.version,
172
+ triggers=result.value.triggers,
173
+ always_run=result.value.always_run,
174
+ always_run_for_domains=result.value.always_run_for_domains,
175
+ knowledge=result.value.knowledge,
176
+ )
177
+
178
+ return (metadata, content)
179
+
180
+
181
+ def load_skill(skill_name: str) -> Result[tuple[SkillMetadata, str], str]:
182
+ """Load a skill by name with cascade resolution.
183
+
184
+ Results are cached to avoid repeated file reads.
185
+
186
+ Args:
187
+ skill_name: Name of the skill directory (e.g., "security-engineer")
188
+
189
+ Returns:
190
+ Result containing (metadata, content) tuple or error message
191
+ """
192
+ path, source = resolve_skill_path(skill_name)
193
+ if path is None:
194
+ available = get_all_skill_names()
195
+ if available:
196
+ return err(f"Skill '{skill_name}' not found. Available: {', '.join(sorted(available))}")
197
+ return err(f"Skill '{skill_name}' not found and no skills available")
198
+
199
+ cached = _load_skill_cached(skill_name, str(path))
200
+ if isinstance(cached, str):
201
+ return err(cached)
202
+ return ok(cached)
203
+
204
+
205
+ def clear_skill_cache() -> None:
206
+ """Clear the skill loading cache. Useful for testing or after skill updates."""
207
+ _load_skill_cached.cache_clear()
208
+
209
+
210
+ def match_skills_for_domain(
211
+ domain: Domain,
212
+ domain_tags: list[str],
213
+ override: list[str] | None = None,
214
+ ) -> list[tuple[str, list[str]]]:
215
+ """Find skills that match the given domain and tags.
216
+
217
+ Args:
218
+ domain: Detected code domain
219
+ domain_tags: Tags from domain detection (e.g., ["python", "backend"])
220
+ override: Optional explicit skill list (skips matching logic)
221
+
222
+ Returns:
223
+ List of (skill_name, matched_triggers) tuples
224
+ """
225
+ # If explicit override, just return those skills
226
+ if override:
227
+ return [(name, ["explicit"]) for name in override]
228
+
229
+ matched: list[tuple[str, list[str]]] = []
230
+ domain_value = domain.value # e.g., "smart_contract"
231
+
232
+ for skill_name in get_all_skill_names():
233
+ result = load_skill(skill_name)
234
+ if result.is_err:
235
+ continue
236
+
237
+ metadata, _ = result.value
238
+ triggers_matched: list[str] = []
239
+
240
+ # Rule 1: always_run = true → always include
241
+ if metadata.always_run:
242
+ triggers_matched.append("always_run")
243
+
244
+ # Rule 2: always_run_for_domains contains the domain → include
245
+ if domain_value in metadata.always_run_for_domains:
246
+ triggers_matched.append(f"always_run_for_domains:{domain_value}")
247
+
248
+ # Rule 3: triggers intersect with domain_tags → include
249
+ trigger_set = set(metadata.triggers)
250
+ tag_set = set(domain_tags)
251
+ intersection = trigger_set & tag_set
252
+ if intersection:
253
+ triggers_matched.extend(sorted(intersection))
254
+
255
+ if triggers_matched:
256
+ matched.append((skill_name, triggers_matched))
257
+
258
+ # Sort by skill name for consistent ordering
259
+ return sorted(matched, key=lambda x: x[0])
260
+
261
+
262
+ def get_knowledge_for_skills(skill_names: list[str]) -> set[str]:
263
+ """Collect all knowledge files referenced by the given skills.
264
+
265
+ Args:
266
+ skill_names: List of skill names to check
267
+
268
+ Returns:
269
+ Set of knowledge file names (e.g., {"SECURITY.md", "SMART_CONTRACT.md"})
270
+ """
271
+ knowledge_files: set[str] = set()
272
+
273
+ for skill_name in skill_names:
274
+ result = load_skill(skill_name)
275
+ if result.is_err:
276
+ continue
277
+
278
+ metadata, _ = result.value
279
+ knowledge_files.update(metadata.knowledge)
280
+
281
+ return knowledge_files
@@ -41,7 +41,7 @@ def check_tool(name: str) -> ToolStatus:
41
41
  if name == "semgrep":
42
42
  result = subprocess.run([name, "--version"], capture_output=True, text=True, timeout=5)
43
43
  version = result.stdout.strip().split("\n")[0] if result.returncode == 0 else None
44
- elif name == "ruff" or name == "slither":
44
+ elif name in ("ruff", "slither", "gitleaks"):
45
45
  result = subprocess.run([name, "--version"], capture_output=True, text=True, timeout=5)
46
46
  version = result.stdout.strip() if result.returncode == 0 else None
47
47
  except (subprocess.TimeoutExpired, FileNotFoundError):
@@ -52,7 +52,7 @@ def check_tool(name: str) -> ToolStatus:
52
52
 
53
53
  def check_all_tools() -> dict[str, ToolStatus]:
54
54
  """Check status of all supported tools."""
55
- tools = ["semgrep", "ruff", "slither", "bandit"]
55
+ tools = ["semgrep", "ruff", "slither", "bandit", "gitleaks"]
56
56
  return {name: check_tool(name) for name in tools}
57
57
 
58
58
 
@@ -73,6 +73,24 @@ def _severity_from_semgrep(level: str) -> Severity:
73
73
  return mapping.get(level.upper(), Severity.INFO)
74
74
 
75
75
 
76
+ def _validate_path(path: str) -> Result[None, str]:
77
+ """Validate path argument to prevent argument injection.
78
+
79
+ Args:
80
+ path: Path to validate
81
+
82
+ Returns:
83
+ Result with None on success, error message on failure
84
+ """
85
+ if not path:
86
+ return err("Path cannot be empty")
87
+ if path.startswith("-"):
88
+ return err(f"Path cannot start with '-': {path}")
89
+ if not Path(path).exists():
90
+ return err(f"Path does not exist: {path}")
91
+ return ok(None)
92
+
93
+
76
94
  def delegate_semgrep(
77
95
  path: str,
78
96
  config: str = "auto",
@@ -89,8 +107,9 @@ def delegate_semgrep(
89
107
  Returns:
90
108
  Result containing list of findings or error message
91
109
  """
92
- if not Path(path).exists():
93
- return err(f"Path does not exist: {path}")
110
+ validation = _validate_path(path)
111
+ if validation.is_err:
112
+ return err(validation.error)
94
113
 
95
114
  try:
96
115
  result = subprocess.run(
@@ -141,8 +160,9 @@ def delegate_ruff(
141
160
  Returns:
142
161
  Result containing list of findings or error message
143
162
  """
144
- if not Path(path).exists():
145
- return err(f"Path does not exist: {path}")
163
+ validation = _validate_path(path)
164
+ if validation.is_err:
165
+ return err(validation.error)
146
166
 
147
167
  try:
148
168
  result = subprocess.run(
@@ -215,8 +235,9 @@ def delegate_bandit(
215
235
  Returns:
216
236
  Result containing list of findings or error message
217
237
  """
218
- if not Path(path).exists():
219
- return err(f"Path does not exist: {path}")
238
+ validation = _validate_path(path)
239
+ if validation.is_err:
240
+ return err(validation.error)
220
241
 
221
242
  try:
222
243
  result = subprocess.run(
@@ -273,8 +294,9 @@ def delegate_slither(
273
294
  Returns:
274
295
  Result containing list of findings or error message
275
296
  """
276
- if not Path(path).exists():
277
- return err(f"Path does not exist: {path}")
297
+ validation = _validate_path(path)
298
+ if validation.is_err:
299
+ return err(validation.error)
278
300
 
279
301
  cmd = ["slither", path, "--json", "-"]
280
302
  if detectors:
@@ -324,3 +346,67 @@ def delegate_slither(
324
346
  findings.append(finding)
325
347
 
326
348
  return ok(findings)
349
+
350
+
351
+ def delegate_gitleaks(
352
+ path: str,
353
+ staged_only: bool = False,
354
+ timeout: int = 60,
355
+ ) -> Result[list[ToolFinding], str]:
356
+ """
357
+ Run gitleaks to detect secrets in code.
358
+
359
+ Args:
360
+ path: Repository path to scan
361
+ staged_only: Only scan staged changes (for pre-commit)
362
+ timeout: Timeout in seconds
363
+
364
+ Returns:
365
+ Result containing list of findings or error message
366
+ """
367
+ validation = _validate_path(path)
368
+ if validation.is_err:
369
+ return err(validation.error)
370
+
371
+ # Build command
372
+ if staged_only:
373
+ cmd = ["gitleaks", "protect", "--staged", "--report-format", "json", "--report-path", "/dev/stdout"]
374
+ else:
375
+ cmd = ["gitleaks", "detect", "--source", path, "--report-format", "json", "--report-path", "/dev/stdout"]
376
+
377
+ try:
378
+ result = subprocess.run(
379
+ cmd,
380
+ capture_output=True,
381
+ text=True,
382
+ timeout=timeout,
383
+ cwd=path if staged_only else None,
384
+ )
385
+ except FileNotFoundError:
386
+ return err("gitleaks not found. Install from: https://github.com/gitleaks/gitleaks")
387
+ except subprocess.TimeoutExpired:
388
+ return err(f"gitleaks timed out after {timeout}s")
389
+
390
+ # Exit code 1 means leaks found, 0 means clean
391
+ if result.returncode not in (0, 1):
392
+ return err(f"gitleaks failed: {result.stderr}")
393
+
394
+ try:
395
+ output = json.loads(result.stdout) if result.stdout.strip() else []
396
+ except json.JSONDecodeError as e:
397
+ return err(f"Failed to parse gitleaks output: {e}")
398
+
399
+ findings: list[ToolFinding] = []
400
+ for leak in output:
401
+ # Gitleaks output format
402
+ finding = ToolFinding(
403
+ tool="gitleaks",
404
+ rule=leak.get("RuleID", "unknown"),
405
+ severity=Severity.CRITICAL, # All secrets are critical
406
+ message=f"Secret detected: {leak.get('Description', 'potential secret')}",
407
+ location=f"{leak.get('File', '?')}:{leak.get('StartLine', '?')}",
408
+ suggestion="Remove secret and rotate credentials",
409
+ )
410
+ findings.append(finding)
411
+
412
+ return ok(findings)