crucible-mcp 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ """Enforcement module for pattern assertions and applicability checking."""
2
+
3
+ from crucible.enforcement.assertions import (
4
+ get_all_assertion_files,
5
+ load_assertion_file,
6
+ load_assertions,
7
+ resolve_assertion_file,
8
+ )
9
+ from crucible.enforcement.models import (
10
+ Assertion,
11
+ AssertionFile,
12
+ AssertionType,
13
+ PatternMatch,
14
+ Priority,
15
+ Suppression,
16
+ )
17
+ from crucible.enforcement.patterns import (
18
+ find_pattern_matches,
19
+ parse_suppressions,
20
+ run_pattern_assertions,
21
+ )
22
+
23
+ __all__ = [
24
+ # Models
25
+ "Assertion",
26
+ "AssertionFile",
27
+ "AssertionType",
28
+ "PatternMatch",
29
+ "Priority",
30
+ "Suppression",
31
+ # Assertions
32
+ "get_all_assertion_files",
33
+ "load_assertion_file",
34
+ "load_assertions",
35
+ "resolve_assertion_file",
36
+ # Patterns
37
+ "find_pattern_matches",
38
+ "parse_suppressions",
39
+ "run_pattern_assertions",
40
+ ]
@@ -0,0 +1,276 @@
1
+ """Load and validate assertion files.
2
+
3
+ Assertions follow the same cascade as skills/knowledge:
4
+ 1. Project: .crucible/assertions/
5
+ 2. User: ~/.claude/crucible/assertions/
6
+ 3. Bundled: package assertions/ (none bundled by default)
7
+ """
8
+
9
+ from functools import lru_cache
10
+ from pathlib import Path
11
+
12
+ import yaml
13
+
14
+ from crucible.enforcement.models import (
15
+ Applicability,
16
+ Assertion,
17
+ AssertionFile,
18
+ AssertionType,
19
+ Priority,
20
+ )
21
+ from crucible.errors import Result, err, ok
22
+
23
+ # Assertion directories (cascade priority)
24
+ ASSERTIONS_BUNDLED = Path(__file__).parent / "bundled"
25
+ ASSERTIONS_USER = Path.home() / ".claude" / "crucible" / "assertions"
26
+ ASSERTIONS_PROJECT = Path(".crucible") / "assertions"
27
+
28
+
29
+ def resolve_assertion_file(filename: str) -> tuple[Path | None, str]:
30
+ """Find assertion file with cascade priority.
31
+
32
+ Returns (path, source) where source is 'project', 'user', or 'bundled'.
33
+ """
34
+ # Ensure .yaml extension
35
+ if not filename.endswith((".yaml", ".yml")):
36
+ filename = f"{filename}.yaml"
37
+
38
+ # 1. Project-level (highest priority)
39
+ project_path = ASSERTIONS_PROJECT / filename
40
+ if project_path.exists():
41
+ return project_path, "project"
42
+
43
+ # 2. User-level
44
+ user_path = ASSERTIONS_USER / filename
45
+ if user_path.exists():
46
+ return user_path, "user"
47
+
48
+ # 3. Bundled (lowest priority)
49
+ bundled_path = ASSERTIONS_BUNDLED / filename
50
+ if bundled_path.exists():
51
+ return bundled_path, "bundled"
52
+
53
+ return None, ""
54
+
55
+
56
+ def get_all_assertion_files() -> set[str]:
57
+ """Get all available assertion file names from all sources."""
58
+ files: set[str] = set()
59
+
60
+ for source_dir in [ASSERTIONS_BUNDLED, ASSERTIONS_USER, ASSERTIONS_PROJECT]:
61
+ if source_dir.exists():
62
+ for file_path in source_dir.iterdir():
63
+ if file_path.is_file() and file_path.suffix in (".yaml", ".yml"):
64
+ files.add(file_path.name)
65
+
66
+ return files
67
+
68
+
69
+ def _parse_priority(value: str | None) -> Priority:
70
+ """Parse priority string to enum."""
71
+ if value is None:
72
+ return Priority.MEDIUM
73
+ try:
74
+ return Priority(value.lower())
75
+ except ValueError:
76
+ return Priority.MEDIUM
77
+
78
+
79
+ def _parse_applicability(data: dict | None) -> Applicability | None:
80
+ """Parse applicability configuration."""
81
+ if data is None:
82
+ return None
83
+
84
+ exclude_raw = data.get("exclude", [])
85
+ if isinstance(exclude_raw, str):
86
+ exclude_raw = [exclude_raw]
87
+
88
+ return Applicability(
89
+ glob=data.get("glob"),
90
+ exclude=tuple(exclude_raw),
91
+ )
92
+
93
+
94
+ def _parse_assertion(data: dict) -> Result[Assertion, str]:
95
+ """Parse a single assertion from YAML data."""
96
+ # Required fields
97
+ assertion_id = data.get("id")
98
+ if not assertion_id:
99
+ return err("Assertion missing required 'id' field")
100
+
101
+ type_str = data.get("type", "pattern")
102
+ try:
103
+ assertion_type = AssertionType(type_str)
104
+ except ValueError:
105
+ return err(f"Invalid assertion type: {type_str}")
106
+
107
+ message = data.get("message")
108
+ if not message:
109
+ return err(f"Assertion '{assertion_id}' missing required 'message' field")
110
+
111
+ # Optional fields with defaults
112
+ severity = data.get("severity", "warning")
113
+ if severity not in ("error", "warning", "info"):
114
+ severity = "warning"
115
+
116
+ priority = _parse_priority(data.get("priority"))
117
+
118
+ # Pattern-specific fields
119
+ pattern = data.get("pattern")
120
+ if assertion_type == AssertionType.PATTERN and not pattern:
121
+ return err(f"Pattern assertion '{assertion_id}' missing required 'pattern' field")
122
+
123
+ # Languages
124
+ languages_raw = data.get("languages", [])
125
+ if isinstance(languages_raw, str):
126
+ languages_raw = [languages_raw]
127
+ languages = tuple(lang.lower() for lang in languages_raw)
128
+
129
+ # Applicability
130
+ applicability = _parse_applicability(data.get("applicability"))
131
+
132
+ # LLM-specific fields (v0.5+)
133
+ compliance = data.get("compliance")
134
+ model = data.get("model")
135
+
136
+ return ok(
137
+ Assertion(
138
+ id=assertion_id,
139
+ type=assertion_type,
140
+ message=message,
141
+ severity=severity, # type: ignore[arg-type]
142
+ priority=priority,
143
+ pattern=pattern,
144
+ languages=languages,
145
+ applicability=applicability,
146
+ compliance=compliance,
147
+ model=model,
148
+ )
149
+ )
150
+
151
+
152
+ def _validate_assertion_file(data: dict, path: str) -> Result[AssertionFile, str]:
153
+ """Validate and parse an assertion file."""
154
+ version = data.get("version", "0.4")
155
+ name = data.get("name", "")
156
+ description = data.get("description", "")
157
+
158
+ assertions_data = data.get("assertions", [])
159
+ if not isinstance(assertions_data, list):
160
+ return err(f"{path}: 'assertions' must be a list")
161
+
162
+ assertions: list[Assertion] = []
163
+ for i, assertion_data in enumerate(assertions_data):
164
+ if not isinstance(assertion_data, dict):
165
+ return err(f"{path}: assertion {i} must be an object")
166
+
167
+ result = _parse_assertion(assertion_data)
168
+ if result.is_err:
169
+ return err(f"{path}: {result.error}")
170
+ assertions.append(result.value)
171
+
172
+ # Check for duplicate IDs
173
+ seen_ids: set[str] = set()
174
+ for assertion in assertions:
175
+ if assertion.id in seen_ids:
176
+ return err(f"{path}: duplicate assertion ID '{assertion.id}'")
177
+ seen_ids.add(assertion.id)
178
+
179
+ return ok(
180
+ AssertionFile(
181
+ version=str(version),
182
+ name=name,
183
+ description=description,
184
+ assertions=tuple(assertions),
185
+ source="", # Set by caller
186
+ path=path,
187
+ )
188
+ )
189
+
190
+
191
+ @lru_cache(maxsize=64)
192
+ def _load_assertion_file_cached(path_str: str) -> AssertionFile | str:
193
+ """Internal cached assertion file loader.
194
+
195
+ Returns AssertionFile on success, error string on failure.
196
+ """
197
+ path = Path(path_str)
198
+ try:
199
+ content = path.read_text()
200
+ data = yaml.safe_load(content)
201
+ except OSError as e:
202
+ return f"Failed to read '{path}': {e}"
203
+ except yaml.YAMLError as e:
204
+ return f"Invalid YAML in '{path}': {e}"
205
+
206
+ if not isinstance(data, dict):
207
+ return f"'{path}' must contain a YAML object"
208
+
209
+ result = _validate_assertion_file(data, str(path))
210
+ if result.is_err:
211
+ return result.error
212
+
213
+ return result.value
214
+
215
+
216
+ def load_assertion_file(filename: str) -> Result[AssertionFile, str]:
217
+ """Load a single assertion file by name with cascade resolution.
218
+
219
+ Args:
220
+ filename: Assertion file name (e.g., "security.yaml")
221
+
222
+ Returns:
223
+ Result containing AssertionFile or error message
224
+ """
225
+ path, source = resolve_assertion_file(filename)
226
+ if path is None:
227
+ return err(f"Assertion file '{filename}' not found")
228
+
229
+ cached = _load_assertion_file_cached(str(path))
230
+ if isinstance(cached, str):
231
+ return err(cached)
232
+
233
+ # Return a new AssertionFile with the correct source
234
+ return ok(
235
+ AssertionFile(
236
+ version=cached.version,
237
+ name=cached.name,
238
+ description=cached.description,
239
+ assertions=cached.assertions,
240
+ source=source,
241
+ path=str(path),
242
+ )
243
+ )
244
+
245
+
246
+ def load_assertions(filenames: set[str] | None = None) -> tuple[list[Assertion], list[str]]:
247
+ """Load all assertions from specified or all available files.
248
+
249
+ Args:
250
+ filenames: Specific files to load (if None, loads all)
251
+
252
+ Returns:
253
+ Tuple of (list of assertions, list of error messages)
254
+ """
255
+ if filenames is None:
256
+ filenames = get_all_assertion_files()
257
+
258
+ assertions: list[Assertion] = []
259
+ errors: list[str] = []
260
+
261
+ for filename in sorted(filenames):
262
+ result = load_assertion_file(filename)
263
+ if result.is_err:
264
+ errors.append(result.error)
265
+ else:
266
+ assertions.extend(result.value.assertions)
267
+
268
+ # Sort by priority (critical first)
269
+ assertions.sort(key=lambda a: a.priority.rank)
270
+
271
+ return assertions, errors
272
+
273
+
274
+ def clear_assertion_cache() -> None:
275
+ """Clear the assertion loading cache. Useful for testing or after updates."""
276
+ _load_assertion_file_cached.cache_clear()
@@ -0,0 +1,179 @@
1
+ """Token budget estimation and tracking for LLM compliance assertions."""
2
+
3
+ from crucible.enforcement.models import (
4
+ Assertion,
5
+ AssertionType,
6
+ BudgetState,
7
+ ComplianceConfig,
8
+ )
9
+
10
+ # Average tokens per character (rough estimate for code)
11
+ TOKENS_PER_CHAR = 0.25
12
+
13
+ # Base overhead for each LLM call (system prompt, response format, etc.)
14
+ BASE_OVERHEAD_TOKENS = 200
15
+
16
+ # Minimum tokens for compliance prompt
17
+ MIN_COMPLIANCE_TOKENS = 50
18
+
19
+
20
+ def estimate_assertion_tokens(assertion: Assertion, content_length: int) -> int:
21
+ """Estimate tokens needed to run an LLM assertion.
22
+
23
+ Args:
24
+ assertion: The assertion to estimate
25
+ content_length: Length of content to analyze in characters
26
+
27
+ Returns:
28
+ Estimated token count for input
29
+ """
30
+ if assertion.type != AssertionType.LLM:
31
+ return 0
32
+
33
+ # Content tokens
34
+ content_tokens = int(content_length * TOKENS_PER_CHAR)
35
+
36
+ # Compliance prompt tokens
37
+ compliance_tokens = 0
38
+ if assertion.compliance:
39
+ compliance_tokens = max(
40
+ MIN_COMPLIANCE_TOKENS,
41
+ int(len(assertion.compliance) * TOKENS_PER_CHAR),
42
+ )
43
+
44
+ return BASE_OVERHEAD_TOKENS + content_tokens + compliance_tokens
45
+
46
+
47
+ def estimate_total_budget(
48
+ assertions: list[Assertion],
49
+ content_length: int,
50
+ ) -> int:
51
+ """Estimate total tokens needed to run all LLM assertions.
52
+
53
+ Args:
54
+ assertions: List of assertions (filters to LLM only)
55
+ content_length: Length of content to analyze
56
+
57
+ Returns:
58
+ Estimated total token count
59
+ """
60
+ total = 0
61
+ for assertion in assertions:
62
+ if assertion.type == AssertionType.LLM:
63
+ total += estimate_assertion_tokens(assertion, content_length)
64
+ return total
65
+
66
+
67
+ def sort_by_priority(assertions: list[Assertion]) -> list[Assertion]:
68
+ """Sort assertions by priority (critical first).
69
+
70
+ Args:
71
+ assertions: Assertions to sort
72
+
73
+ Returns:
74
+ Sorted list (critical > high > medium > low)
75
+ """
76
+ return sorted(assertions, key=lambda a: a.priority.rank)
77
+
78
+
79
+ def select_within_budget(
80
+ assertions: list[Assertion],
81
+ content_length: int,
82
+ budget: int,
83
+ ) -> tuple[list[Assertion], list[Assertion]]:
84
+ """Select assertions that fit within token budget.
85
+
86
+ Args:
87
+ assertions: Assertions to select from (should be pre-sorted by priority)
88
+ content_length: Length of content to analyze
89
+ budget: Token budget (0 = unlimited)
90
+
91
+ Returns:
92
+ Tuple of (selected_assertions, skipped_assertions)
93
+ """
94
+ if budget == 0:
95
+ # Unlimited budget
96
+ return list(assertions), []
97
+
98
+ selected: list[Assertion] = []
99
+ skipped: list[Assertion] = []
100
+ tokens_used = 0
101
+
102
+ for assertion in assertions:
103
+ if assertion.type != AssertionType.LLM:
104
+ continue
105
+
106
+ estimated = estimate_assertion_tokens(assertion, content_length)
107
+
108
+ if tokens_used + estimated <= budget:
109
+ selected.append(assertion)
110
+ tokens_used += estimated
111
+ else:
112
+ skipped.append(assertion)
113
+
114
+ return selected, skipped
115
+
116
+
117
+ def filter_llm_assertions(assertions: list[Assertion]) -> list[Assertion]:
118
+ """Filter to only LLM-type assertions.
119
+
120
+ Args:
121
+ assertions: All assertions
122
+
123
+ Returns:
124
+ Only assertions with type=llm
125
+ """
126
+ return [a for a in assertions if a.type == AssertionType.LLM]
127
+
128
+
129
+ def create_budget_state(config: ComplianceConfig) -> BudgetState:
130
+ """Create initial budget state from config.
131
+
132
+ Args:
133
+ config: Compliance configuration
134
+
135
+ Returns:
136
+ Fresh BudgetState
137
+ """
138
+ return BudgetState(total_budget=config.token_budget)
139
+
140
+
141
+ def prepare_llm_assertions(
142
+ assertions: list[Assertion],
143
+ content_length: int,
144
+ config: ComplianceConfig,
145
+ ) -> tuple[list[Assertion], BudgetState]:
146
+ """Prepare LLM assertions for execution.
147
+
148
+ Filters to LLM assertions, sorts by priority, and selects within budget.
149
+
150
+ Args:
151
+ assertions: All loaded assertions
152
+ content_length: Length of content to analyze
153
+ config: Compliance configuration
154
+
155
+ Returns:
156
+ Tuple of (assertions_to_run, budget_state)
157
+ """
158
+ # Filter to LLM assertions only
159
+ llm_assertions = filter_llm_assertions(assertions)
160
+
161
+ if not llm_assertions:
162
+ return [], create_budget_state(config)
163
+
164
+ # Sort by priority
165
+ sorted_assertions = sort_by_priority(llm_assertions)
166
+
167
+ # Select within budget
168
+ selected, skipped = select_within_budget(
169
+ sorted_assertions,
170
+ content_length,
171
+ config.token_budget,
172
+ )
173
+
174
+ # Create budget state
175
+ state = create_budget_state(config)
176
+ for assertion in skipped:
177
+ state.skip(assertion.id)
178
+
179
+ return selected, state