crucible-mcp 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,486 @@
1
+ """LLM-based compliance checking for assertions.
2
+
3
+ Uses Anthropic SDK to run compliance assertions against code.
4
+ Supports Sonnet (default) and Opus (for high-stakes assertions).
5
+ """
6
+
7
+ import json
8
+ import os
9
+ from typing import Any
10
+
11
+ from crucible.enforcement.budget import (
12
+ create_budget_state,
13
+ estimate_assertion_tokens,
14
+ prepare_llm_assertions,
15
+ )
16
+ from crucible.enforcement.models import (
17
+ Assertion,
18
+ AssertionType,
19
+ BudgetState,
20
+ ComplianceConfig,
21
+ EnforcementFinding,
22
+ LLMAssertionResult,
23
+ OverflowBehavior,
24
+ )
25
+ from crucible.enforcement.patterns import matches_glob, matches_language
26
+
27
+ # Model ID mapping
28
+ MODEL_IDS = {
29
+ "sonnet": "claude-sonnet-4-20250514",
30
+ "opus": "claude-opus-4-20250514",
31
+ "haiku": "claude-haiku-4-20250514",
32
+ }
33
+
34
+ # System prompt for compliance checking
35
+ SYSTEM_PROMPT = """You are a code compliance checker. Analyze the provided code against the compliance requirements.
36
+
37
+ Respond with a JSON object:
38
+ {
39
+ "compliant": true/false,
40
+ "findings": [
41
+ {
42
+ "line": <line_number or null>,
43
+ "issue": "<description of the issue>",
44
+ "severity": "error" | "warning" | "info"
45
+ }
46
+ ],
47
+ "reasoning": "<brief explanation of your analysis>"
48
+ }
49
+
50
+ If the code is compliant, return compliant: true with an empty findings array.
51
+ If there are issues, return compliant: false with specific findings.
52
+ Be precise about line numbers when possible. Focus on actual compliance issues, not style preferences."""
53
+
54
+
55
+ def _load_api_key_from_config() -> str | None:
56
+ """Try to load API key from config file.
57
+
58
+ Checks (in order):
59
+ 1. ~/.config/crucible/secrets.yaml
60
+ 2. ~/.crucible/secrets.yaml (legacy)
61
+
62
+ Returns:
63
+ API key if found, None otherwise
64
+ """
65
+ from pathlib import Path
66
+
67
+ import yaml
68
+
69
+ config_paths = [
70
+ Path.home() / ".config" / "crucible" / "secrets.yaml",
71
+ Path.home() / ".crucible" / "secrets.yaml",
72
+ ]
73
+
74
+ for config_path in config_paths:
75
+ if config_path.exists():
76
+ try:
77
+ with open(config_path) as f:
78
+ data = yaml.safe_load(f) or {}
79
+ key = data.get("anthropic_api_key") or data.get("ANTHROPIC_API_KEY")
80
+ if key:
81
+ return key
82
+ except Exception:
83
+ pass # Ignore malformed config files
84
+
85
+ return None
86
+
87
+
88
+ def _get_anthropic_client() -> Any:
89
+ """Get Anthropic client instance.
90
+
91
+ Checks for API key in order:
92
+ 1. ANTHROPIC_API_KEY environment variable
93
+ 2. ~/.config/crucible/secrets.yaml
94
+ 3. ~/.crucible/secrets.yaml
95
+
96
+ Returns:
97
+ Anthropic client
98
+
99
+ Raises:
100
+ ImportError: If anthropic package is not installed
101
+ ValueError: If API key not found in any location
102
+ """
103
+ try:
104
+ import anthropic
105
+ except ImportError as e:
106
+ raise ImportError(
107
+ "anthropic package is required for LLM compliance checking. "
108
+ "Install with: pip install anthropic"
109
+ ) from e
110
+
111
+ # Try env var first (standard for CI)
112
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
113
+
114
+ # Fall back to config file (convenient for local dev)
115
+ if not api_key:
116
+ api_key = _load_api_key_from_config()
117
+
118
+ if not api_key:
119
+ raise ValueError(
120
+ "Anthropic API key not found. Set ANTHROPIC_API_KEY environment variable "
121
+ "or add to ~/.config/crucible/secrets.yaml:\n"
122
+ " anthropic_api_key: sk-ant-..."
123
+ )
124
+
125
+ return anthropic.Anthropic(api_key=api_key)
126
+
127
+
128
+ def _build_user_prompt(assertion: Assertion, file_path: str, content: str) -> str:
129
+ """Build user prompt for compliance check.
130
+
131
+ Args:
132
+ assertion: The assertion to check
133
+ file_path: Path to the file being checked
134
+ content: File content
135
+
136
+ Returns:
137
+ Formatted user prompt
138
+ """
139
+ return f"""## File: {file_path}
140
+
141
+ ## Compliance Requirements
142
+ {assertion.compliance}
143
+
144
+ ## Code to Analyze
145
+ ```
146
+ {content}
147
+ ```
148
+
149
+ Analyze this code against the compliance requirements and respond with JSON."""
150
+
151
+
152
+ def _parse_llm_response(
153
+ response_text: str,
154
+ assertion: Assertion,
155
+ file_path: str,
156
+ ) -> tuple[list[EnforcementFinding], str | None]:
157
+ """Parse LLM response into findings.
158
+
159
+ Args:
160
+ response_text: Raw response from LLM
161
+ assertion: The assertion that was checked
162
+ file_path: Path to the file
163
+
164
+ Returns:
165
+ Tuple of (findings, reasoning)
166
+ """
167
+ findings: list[EnforcementFinding] = []
168
+ reasoning = None
169
+
170
+ try:
171
+ # Try to extract JSON from response
172
+ # Handle markdown code blocks
173
+ text = response_text.strip()
174
+ if text.startswith("```"):
175
+ # Remove markdown code block
176
+ lines = text.split("\n")
177
+ # Find first and last ``` lines
178
+ start = 0
179
+ end = len(lines)
180
+ for i, line in enumerate(lines):
181
+ if line.startswith("```") and i == 0:
182
+ start = i + 1
183
+ elif line.startswith("```") and i > 0:
184
+ end = i
185
+ break
186
+ text = "\n".join(lines[start:end])
187
+
188
+ data = json.loads(text)
189
+
190
+ reasoning = data.get("reasoning")
191
+ is_compliant = data.get("compliant", True)
192
+
193
+ if not is_compliant and "findings" in data:
194
+ for finding_data in data["findings"]:
195
+ line_num = finding_data.get("line")
196
+ issue = finding_data.get("issue", "Compliance issue detected")
197
+ severity = finding_data.get("severity", assertion.severity)
198
+
199
+ # Validate severity
200
+ if severity not in ("error", "warning", "info"):
201
+ severity = assertion.severity
202
+
203
+ location = f"{file_path}:{line_num}" if line_num else file_path
204
+
205
+ findings.append(
206
+ EnforcementFinding(
207
+ assertion_id=assertion.id,
208
+ message=issue,
209
+ severity=severity, # type: ignore[arg-type]
210
+ priority=assertion.priority,
211
+ location=location,
212
+ source="llm",
213
+ llm_reasoning=reasoning,
214
+ )
215
+ )
216
+
217
+ except (json.JSONDecodeError, KeyError, TypeError):
218
+ # If we can't parse the response, create a single finding with the raw response
219
+ findings.append(
220
+ EnforcementFinding(
221
+ assertion_id=assertion.id,
222
+ message=f"LLM compliance check failed to parse: {response_text[:200]}...",
223
+ severity="warning",
224
+ priority=assertion.priority,
225
+ location=file_path,
226
+ source="llm",
227
+ )
228
+ )
229
+
230
+ return findings, reasoning
231
+
232
+
233
+ def run_single_assertion(
234
+ assertion: Assertion,
235
+ file_path: str,
236
+ content: str,
237
+ config: ComplianceConfig,
238
+ ) -> LLMAssertionResult:
239
+ """Run a single LLM assertion against file content.
240
+
241
+ Args:
242
+ assertion: The assertion to run
243
+ file_path: Path to the file
244
+ content: File content
245
+ config: Compliance configuration
246
+
247
+ Returns:
248
+ LLMAssertionResult with findings
249
+ """
250
+ if assertion.type != AssertionType.LLM:
251
+ return LLMAssertionResult(
252
+ assertion_id=assertion.id,
253
+ passed=True,
254
+ findings=(),
255
+ tokens_used=0,
256
+ model_used="",
257
+ error="Not an LLM assertion",
258
+ )
259
+
260
+ # Determine model to use
261
+ model_name = assertion.model or config.model
262
+ model_id = MODEL_IDS.get(model_name, MODEL_IDS["sonnet"])
263
+
264
+ try:
265
+ client = _get_anthropic_client()
266
+
267
+ user_prompt = _build_user_prompt(assertion, file_path, content)
268
+
269
+ response = client.messages.create(
270
+ model=model_id,
271
+ max_tokens=1024,
272
+ system=SYSTEM_PROMPT,
273
+ messages=[{"role": "user", "content": user_prompt}],
274
+ )
275
+
276
+ # Extract text from response
277
+ response_text = ""
278
+ for block in response.content:
279
+ if hasattr(block, "text"):
280
+ response_text += block.text
281
+
282
+ # Calculate tokens used
283
+ tokens_used = response.usage.input_tokens + response.usage.output_tokens
284
+
285
+ # Parse response
286
+ findings, reasoning = _parse_llm_response(response_text, assertion, file_path)
287
+
288
+ return LLMAssertionResult(
289
+ assertion_id=assertion.id,
290
+ passed=len(findings) == 0,
291
+ findings=tuple(findings),
292
+ tokens_used=tokens_used,
293
+ model_used=model_name,
294
+ )
295
+
296
+ except ImportError as e:
297
+ return LLMAssertionResult(
298
+ assertion_id=assertion.id,
299
+ passed=True, # Don't fail on missing dependency
300
+ findings=(),
301
+ tokens_used=0,
302
+ model_used=model_name,
303
+ error=str(e),
304
+ )
305
+ except ValueError as e:
306
+ return LLMAssertionResult(
307
+ assertion_id=assertion.id,
308
+ passed=True, # Don't fail on missing API key
309
+ findings=(),
310
+ tokens_used=0,
311
+ model_used=model_name,
312
+ error=str(e),
313
+ )
314
+ except Exception as e:
315
+ return LLMAssertionResult(
316
+ assertion_id=assertion.id,
317
+ passed=True, # Don't fail on API errors
318
+ findings=(),
319
+ tokens_used=0,
320
+ model_used=model_name,
321
+ error=f"API error: {e}",
322
+ )
323
+
324
+
325
+ def filter_applicable_assertions(
326
+ assertions: list[Assertion],
327
+ file_path: str,
328
+ ) -> list[Assertion]:
329
+ """Filter assertions to those applicable to the given file.
330
+
331
+ Args:
332
+ assertions: All LLM assertions
333
+ file_path: File path to check
334
+
335
+ Returns:
336
+ Assertions applicable to this file
337
+ """
338
+ applicable: list[Assertion] = []
339
+
340
+ for assertion in assertions:
341
+ # Check language applicability
342
+ if assertion.languages and not matches_language(file_path, assertion.languages):
343
+ continue
344
+
345
+ # Check glob applicability
346
+ if assertion.applicability and not matches_glob(
347
+ file_path,
348
+ assertion.applicability.glob,
349
+ assertion.applicability.exclude,
350
+ ):
351
+ continue
352
+
353
+ applicable.append(assertion)
354
+
355
+ return applicable
356
+
357
+
358
+ def run_llm_assertions(
359
+ file_path: str,
360
+ content: str,
361
+ assertions: list[Assertion],
362
+ config: ComplianceConfig,
363
+ ) -> tuple[list[EnforcementFinding], BudgetState, list[str]]:
364
+ """Run LLM assertions against a file.
365
+
366
+ Args:
367
+ file_path: Path to the file
368
+ content: File content
369
+ assertions: All assertions (will filter to LLM type)
370
+ config: Compliance configuration
371
+
372
+ Returns:
373
+ Tuple of (findings, budget_state, errors)
374
+ """
375
+ if not config.enabled:
376
+ return [], create_budget_state(config), []
377
+
378
+ all_findings: list[EnforcementFinding] = []
379
+ errors: list[str] = []
380
+
381
+ # Prepare assertions (filter, sort, select within budget)
382
+ to_run, budget_state = prepare_llm_assertions(
383
+ assertions,
384
+ len(content),
385
+ config,
386
+ )
387
+
388
+ # Filter to applicable assertions for this file
389
+ applicable = filter_applicable_assertions(to_run, file_path)
390
+
391
+ # Run each applicable assertion
392
+ for assertion in applicable:
393
+ # Check if we still have budget
394
+ estimated = estimate_assertion_tokens(assertion, len(content))
395
+ if budget_state.total_budget > 0 and budget_state.tokens_used + estimated > budget_state.total_budget:
396
+ budget_state.skip(assertion.id)
397
+
398
+ if config.overflow_behavior == OverflowBehavior.FAIL:
399
+ errors.append(
400
+ f"Token budget exceeded before running '{assertion.id}'. "
401
+ f"Used: {budget_state.tokens_used}, Budget: {budget_state.total_budget}"
402
+ )
403
+ break
404
+ elif config.overflow_behavior == OverflowBehavior.WARN:
405
+ errors.append(
406
+ f"Skipped '{assertion.id}' due to token budget. "
407
+ f"Used: {budget_state.tokens_used}, Budget: {budget_state.total_budget}"
408
+ )
409
+ continue
410
+
411
+ # Run the assertion
412
+ result = run_single_assertion(assertion, file_path, content, config)
413
+
414
+ # Update budget state
415
+ budget_state.consume(result.tokens_used)
416
+
417
+ # Collect findings
418
+ all_findings.extend(result.findings)
419
+
420
+ # Record errors
421
+ if result.error:
422
+ errors.append(f"{assertion.id}: {result.error}")
423
+
424
+ return all_findings, budget_state, errors
425
+
426
+
427
+ def run_llm_assertions_batch(
428
+ files: list[tuple[str, str]],
429
+ assertions: list[Assertion],
430
+ config: ComplianceConfig,
431
+ ) -> tuple[list[EnforcementFinding], BudgetState, list[str]]:
432
+ """Run LLM assertions against multiple files with shared budget.
433
+
434
+ Args:
435
+ files: List of (file_path, content) tuples
436
+ assertions: All assertions
437
+ config: Compliance configuration
438
+
439
+ Returns:
440
+ Tuple of (all_findings, budget_state, errors)
441
+ """
442
+ if not config.enabled:
443
+ return [], create_budget_state(config), []
444
+
445
+ all_findings: list[EnforcementFinding] = []
446
+ all_errors: list[str] = []
447
+
448
+ # Calculate total content length for budget estimation
449
+ total_content_length = sum(len(content) for _, content in files)
450
+
451
+ # Prepare assertions with total budget
452
+ to_run, budget_state = prepare_llm_assertions(
453
+ assertions,
454
+ total_content_length // max(1, len(files)), # Average per file
455
+ config,
456
+ )
457
+
458
+ # Process each file
459
+ for file_path, content in files:
460
+ applicable = filter_applicable_assertions(to_run, file_path)
461
+
462
+ for assertion in applicable:
463
+ # Check budget before each assertion
464
+ estimated = estimate_assertion_tokens(assertion, len(content))
465
+ if budget_state.total_budget > 0 and budget_state.tokens_used + estimated > budget_state.total_budget:
466
+ budget_state.skip(assertion.id)
467
+
468
+ if config.overflow_behavior == OverflowBehavior.FAIL:
469
+ all_errors.append(
470
+ f"Token budget exceeded at '{file_path}' before '{assertion.id}'"
471
+ )
472
+ return all_findings, budget_state, all_errors
473
+ elif config.overflow_behavior == OverflowBehavior.WARN:
474
+ all_errors.append(
475
+ f"Skipped '{assertion.id}' on '{file_path}' due to budget"
476
+ )
477
+ continue
478
+
479
+ result = run_single_assertion(assertion, file_path, content, config)
480
+ budget_state.consume(result.tokens_used)
481
+ all_findings.extend(result.findings)
482
+
483
+ if result.error:
484
+ all_errors.append(f"{file_path}:{assertion.id}: {result.error}")
485
+
486
+ return all_findings, budget_state, all_errors
@@ -0,0 +1,177 @@
1
+ """Data models for the enforcement module."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+ from typing import Literal
6
+
7
+
8
+ class AssertionType(Enum):
9
+ """Type of assertion check."""
10
+
11
+ PATTERN = "pattern"
12
+ LLM = "llm"
13
+
14
+
15
+ class OverflowBehavior(Enum):
16
+ """Behavior when token budget is exceeded."""
17
+
18
+ SKIP = "skip" # Skip remaining assertions silently
19
+ WARN = "warn" # Skip with warning
20
+ FAIL = "fail" # Fail the review
21
+
22
+
23
+ class Priority(Enum):
24
+ """Assertion priority levels for budget management."""
25
+
26
+ CRITICAL = "critical"
27
+ HIGH = "high"
28
+ MEDIUM = "medium"
29
+ LOW = "low"
30
+
31
+ @property
32
+ def rank(self) -> int:
33
+ """Return numeric rank for sorting (lower = higher priority)."""
34
+ return {
35
+ Priority.CRITICAL: 0,
36
+ Priority.HIGH: 1,
37
+ Priority.MEDIUM: 2,
38
+ Priority.LOW: 3,
39
+ }[self]
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class Applicability:
44
+ """Applicability configuration for an assertion."""
45
+
46
+ glob: str | None = None
47
+ exclude: tuple[str, ...] = ()
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class Assertion:
52
+ """A single assertion rule."""
53
+
54
+ id: str
55
+ type: AssertionType
56
+ message: str
57
+ severity: Literal["error", "warning", "info"]
58
+ priority: Priority
59
+ pattern: str | None = None # For pattern assertions
60
+ languages: tuple[str, ...] = ()
61
+ applicability: Applicability | None = None
62
+ compliance: str | None = None # For LLM assertions (v0.5+)
63
+ model: str | None = None # For LLM assertions (v0.5+)
64
+
65
+
66
+ @dataclass(frozen=True)
67
+ class AssertionFile:
68
+ """A parsed assertion file."""
69
+
70
+ version: str
71
+ name: str
72
+ description: str
73
+ assertions: tuple[Assertion, ...]
74
+ source: str # "project", "user", or "bundled"
75
+ path: str # File path for error reporting
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class PatternMatch:
80
+ """A pattern match result."""
81
+
82
+ assertion_id: str
83
+ line: int
84
+ column: int
85
+ match_text: str
86
+ file_path: str
87
+
88
+ @property
89
+ def location(self) -> str:
90
+ """Return location string in standard format."""
91
+ return f"{self.file_path}:{self.line}:{self.column}"
92
+
93
+
94
+ @dataclass(frozen=True)
95
+ class Suppression:
96
+ """An inline suppression comment."""
97
+
98
+ line: int
99
+ rule_ids: tuple[str, ...]
100
+ reason: str | None
101
+ applies_to_next_line: bool
102
+
103
+
104
+ @dataclass(frozen=True)
105
+ class EnforcementFinding:
106
+ """A finding from enforcement checking."""
107
+
108
+ assertion_id: str
109
+ message: str
110
+ severity: Literal["error", "warning", "info"]
111
+ priority: Priority
112
+ location: str
113
+ match_text: str | None = None
114
+ suppressed: bool = False
115
+ suppression_reason: str | None = None
116
+ source: Literal["pattern", "llm"] = "pattern"
117
+ llm_reasoning: str | None = None # LLM's explanation for the finding
118
+
119
+
120
+ @dataclass(frozen=True)
121
+ class ComplianceConfig:
122
+ """Configuration for LLM-based compliance checking."""
123
+
124
+ enabled: bool = True
125
+ model: str = "sonnet" # Default model (sonnet or opus)
126
+ token_budget: int = 10000 # 0 = unlimited
127
+ priority_order: tuple[str, ...] = ("critical", "high", "medium", "low")
128
+ overflow_behavior: OverflowBehavior = OverflowBehavior.WARN
129
+
130
+
131
+ @dataclass
132
+ class BudgetState:
133
+ """Mutable state for tracking token budget during compliance run."""
134
+
135
+ total_budget: int
136
+ tokens_used: int = 0
137
+ assertions_run: int = 0
138
+ assertions_skipped: int = 0
139
+ overflow_triggered: bool = False
140
+ skipped_assertions: list[str] = field(default_factory=list)
141
+
142
+ @property
143
+ def tokens_remaining(self) -> int:
144
+ """Tokens remaining in budget."""
145
+ if self.total_budget == 0:
146
+ return float("inf") # type: ignore[return-value]
147
+ return max(0, self.total_budget - self.tokens_used)
148
+
149
+ @property
150
+ def is_exhausted(self) -> bool:
151
+ """Whether budget is exhausted."""
152
+ if self.total_budget == 0:
153
+ return False
154
+ return self.tokens_used >= self.total_budget
155
+
156
+ def consume(self, tokens: int) -> None:
157
+ """Consume tokens from budget."""
158
+ self.tokens_used += tokens
159
+ self.assertions_run += 1
160
+
161
+ def skip(self, assertion_id: str) -> None:
162
+ """Record a skipped assertion."""
163
+ self.assertions_skipped += 1
164
+ self.skipped_assertions.append(assertion_id)
165
+ self.overflow_triggered = True
166
+
167
+
168
+ @dataclass(frozen=True)
169
+ class LLMAssertionResult:
170
+ """Result from running a single LLM assertion."""
171
+
172
+ assertion_id: str
173
+ passed: bool
174
+ findings: tuple["EnforcementFinding", ...]
175
+ tokens_used: int
176
+ model_used: str
177
+ error: str | None = None