crucible-mcp 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crucible/cli.py +425 -12
- crucible/enforcement/budget.py +179 -0
- crucible/enforcement/compliance.py +486 -0
- crucible/enforcement/models.py +71 -1
- crucible/review/core.py +78 -7
- crucible/server.py +81 -14
- crucible/tools/git.py +17 -4
- {crucible_mcp-0.4.0.dist-info → crucible_mcp-0.5.0.dist-info}/METADATA +2 -1
- {crucible_mcp-0.4.0.dist-info → crucible_mcp-0.5.0.dist-info}/RECORD +12 -10
- {crucible_mcp-0.4.0.dist-info → crucible_mcp-0.5.0.dist-info}/WHEEL +0 -0
- {crucible_mcp-0.4.0.dist-info → crucible_mcp-0.5.0.dist-info}/entry_points.txt +0 -0
- {crucible_mcp-0.4.0.dist-info → crucible_mcp-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""Token budget estimation and tracking for LLM compliance assertions."""
|
|
2
|
+
|
|
3
|
+
from crucible.enforcement.models import (
|
|
4
|
+
Assertion,
|
|
5
|
+
AssertionType,
|
|
6
|
+
BudgetState,
|
|
7
|
+
ComplianceConfig,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
# Average tokens per character (rough estimate for code)
|
|
11
|
+
TOKENS_PER_CHAR = 0.25
|
|
12
|
+
|
|
13
|
+
# Base overhead for each LLM call (system prompt, response format, etc.)
|
|
14
|
+
BASE_OVERHEAD_TOKENS = 200
|
|
15
|
+
|
|
16
|
+
# Minimum tokens for compliance prompt
|
|
17
|
+
MIN_COMPLIANCE_TOKENS = 50
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def estimate_assertion_tokens(assertion: Assertion, content_length: int) -> int:
|
|
21
|
+
"""Estimate tokens needed to run an LLM assertion.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
assertion: The assertion to estimate
|
|
25
|
+
content_length: Length of content to analyze in characters
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Estimated token count for input
|
|
29
|
+
"""
|
|
30
|
+
if assertion.type != AssertionType.LLM:
|
|
31
|
+
return 0
|
|
32
|
+
|
|
33
|
+
# Content tokens
|
|
34
|
+
content_tokens = int(content_length * TOKENS_PER_CHAR)
|
|
35
|
+
|
|
36
|
+
# Compliance prompt tokens
|
|
37
|
+
compliance_tokens = 0
|
|
38
|
+
if assertion.compliance:
|
|
39
|
+
compliance_tokens = max(
|
|
40
|
+
MIN_COMPLIANCE_TOKENS,
|
|
41
|
+
int(len(assertion.compliance) * TOKENS_PER_CHAR),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return BASE_OVERHEAD_TOKENS + content_tokens + compliance_tokens
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def estimate_total_budget(
|
|
48
|
+
assertions: list[Assertion],
|
|
49
|
+
content_length: int,
|
|
50
|
+
) -> int:
|
|
51
|
+
"""Estimate total tokens needed to run all LLM assertions.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
assertions: List of assertions (filters to LLM only)
|
|
55
|
+
content_length: Length of content to analyze
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Estimated total token count
|
|
59
|
+
"""
|
|
60
|
+
total = 0
|
|
61
|
+
for assertion in assertions:
|
|
62
|
+
if assertion.type == AssertionType.LLM:
|
|
63
|
+
total += estimate_assertion_tokens(assertion, content_length)
|
|
64
|
+
return total
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def sort_by_priority(assertions: list[Assertion]) -> list[Assertion]:
|
|
68
|
+
"""Sort assertions by priority (critical first).
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
assertions: Assertions to sort
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Sorted list (critical > high > medium > low)
|
|
75
|
+
"""
|
|
76
|
+
return sorted(assertions, key=lambda a: a.priority.rank)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def select_within_budget(
|
|
80
|
+
assertions: list[Assertion],
|
|
81
|
+
content_length: int,
|
|
82
|
+
budget: int,
|
|
83
|
+
) -> tuple[list[Assertion], list[Assertion]]:
|
|
84
|
+
"""Select assertions that fit within token budget.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
assertions: Assertions to select from (should be pre-sorted by priority)
|
|
88
|
+
content_length: Length of content to analyze
|
|
89
|
+
budget: Token budget (0 = unlimited)
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Tuple of (selected_assertions, skipped_assertions)
|
|
93
|
+
"""
|
|
94
|
+
if budget == 0:
|
|
95
|
+
# Unlimited budget
|
|
96
|
+
return list(assertions), []
|
|
97
|
+
|
|
98
|
+
selected: list[Assertion] = []
|
|
99
|
+
skipped: list[Assertion] = []
|
|
100
|
+
tokens_used = 0
|
|
101
|
+
|
|
102
|
+
for assertion in assertions:
|
|
103
|
+
if assertion.type != AssertionType.LLM:
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
estimated = estimate_assertion_tokens(assertion, content_length)
|
|
107
|
+
|
|
108
|
+
if tokens_used + estimated <= budget:
|
|
109
|
+
selected.append(assertion)
|
|
110
|
+
tokens_used += estimated
|
|
111
|
+
else:
|
|
112
|
+
skipped.append(assertion)
|
|
113
|
+
|
|
114
|
+
return selected, skipped
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def filter_llm_assertions(assertions: list[Assertion]) -> list[Assertion]:
|
|
118
|
+
"""Filter to only LLM-type assertions.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
assertions: All assertions
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Only assertions with type=llm
|
|
125
|
+
"""
|
|
126
|
+
return [a for a in assertions if a.type == AssertionType.LLM]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def create_budget_state(config: ComplianceConfig) -> BudgetState:
|
|
130
|
+
"""Create initial budget state from config.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
config: Compliance configuration
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Fresh BudgetState
|
|
137
|
+
"""
|
|
138
|
+
return BudgetState(total_budget=config.token_budget)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def prepare_llm_assertions(
|
|
142
|
+
assertions: list[Assertion],
|
|
143
|
+
content_length: int,
|
|
144
|
+
config: ComplianceConfig,
|
|
145
|
+
) -> tuple[list[Assertion], BudgetState]:
|
|
146
|
+
"""Prepare LLM assertions for execution.
|
|
147
|
+
|
|
148
|
+
Filters to LLM assertions, sorts by priority, and selects within budget.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
assertions: All loaded assertions
|
|
152
|
+
content_length: Length of content to analyze
|
|
153
|
+
config: Compliance configuration
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Tuple of (assertions_to_run, budget_state)
|
|
157
|
+
"""
|
|
158
|
+
# Filter to LLM assertions only
|
|
159
|
+
llm_assertions = filter_llm_assertions(assertions)
|
|
160
|
+
|
|
161
|
+
if not llm_assertions:
|
|
162
|
+
return [], create_budget_state(config)
|
|
163
|
+
|
|
164
|
+
# Sort by priority
|
|
165
|
+
sorted_assertions = sort_by_priority(llm_assertions)
|
|
166
|
+
|
|
167
|
+
# Select within budget
|
|
168
|
+
selected, skipped = select_within_budget(
|
|
169
|
+
sorted_assertions,
|
|
170
|
+
content_length,
|
|
171
|
+
config.token_budget,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Create budget state
|
|
175
|
+
state = create_budget_state(config)
|
|
176
|
+
for assertion in skipped:
|
|
177
|
+
state.skip(assertion.id)
|
|
178
|
+
|
|
179
|
+
return selected, state
|
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
"""LLM-based compliance checking for assertions.
|
|
2
|
+
|
|
3
|
+
Uses Anthropic SDK to run compliance assertions against code.
|
|
4
|
+
Supports Sonnet (default) and Opus (for high-stakes assertions).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from crucible.enforcement.budget import (
|
|
12
|
+
create_budget_state,
|
|
13
|
+
estimate_assertion_tokens,
|
|
14
|
+
prepare_llm_assertions,
|
|
15
|
+
)
|
|
16
|
+
from crucible.enforcement.models import (
|
|
17
|
+
Assertion,
|
|
18
|
+
AssertionType,
|
|
19
|
+
BudgetState,
|
|
20
|
+
ComplianceConfig,
|
|
21
|
+
EnforcementFinding,
|
|
22
|
+
LLMAssertionResult,
|
|
23
|
+
OverflowBehavior,
|
|
24
|
+
)
|
|
25
|
+
from crucible.enforcement.patterns import matches_glob, matches_language
|
|
26
|
+
|
|
27
|
+
# Model ID mapping
|
|
28
|
+
MODEL_IDS = {
|
|
29
|
+
"sonnet": "claude-sonnet-4-20250514",
|
|
30
|
+
"opus": "claude-opus-4-20250514",
|
|
31
|
+
"haiku": "claude-haiku-4-20250514",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# System prompt for compliance checking
|
|
35
|
+
SYSTEM_PROMPT = """You are a code compliance checker. Analyze the provided code against the compliance requirements.
|
|
36
|
+
|
|
37
|
+
Respond with a JSON object:
|
|
38
|
+
{
|
|
39
|
+
"compliant": true/false,
|
|
40
|
+
"findings": [
|
|
41
|
+
{
|
|
42
|
+
"line": <line_number or null>,
|
|
43
|
+
"issue": "<description of the issue>",
|
|
44
|
+
"severity": "error" | "warning" | "info"
|
|
45
|
+
}
|
|
46
|
+
],
|
|
47
|
+
"reasoning": "<brief explanation of your analysis>"
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
If the code is compliant, return compliant: true with an empty findings array.
|
|
51
|
+
If there are issues, return compliant: false with specific findings.
|
|
52
|
+
Be precise about line numbers when possible. Focus on actual compliance issues, not style preferences."""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _load_api_key_from_config() -> str | None:
|
|
56
|
+
"""Try to load API key from config file.
|
|
57
|
+
|
|
58
|
+
Checks (in order):
|
|
59
|
+
1. ~/.config/crucible/secrets.yaml
|
|
60
|
+
2. ~/.crucible/secrets.yaml (legacy)
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
API key if found, None otherwise
|
|
64
|
+
"""
|
|
65
|
+
from pathlib import Path
|
|
66
|
+
|
|
67
|
+
import yaml
|
|
68
|
+
|
|
69
|
+
config_paths = [
|
|
70
|
+
Path.home() / ".config" / "crucible" / "secrets.yaml",
|
|
71
|
+
Path.home() / ".crucible" / "secrets.yaml",
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
for config_path in config_paths:
|
|
75
|
+
if config_path.exists():
|
|
76
|
+
try:
|
|
77
|
+
with open(config_path) as f:
|
|
78
|
+
data = yaml.safe_load(f) or {}
|
|
79
|
+
key = data.get("anthropic_api_key") or data.get("ANTHROPIC_API_KEY")
|
|
80
|
+
if key:
|
|
81
|
+
return key
|
|
82
|
+
except Exception:
|
|
83
|
+
pass # Ignore malformed config files
|
|
84
|
+
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _get_anthropic_client() -> Any:
|
|
89
|
+
"""Get Anthropic client instance.
|
|
90
|
+
|
|
91
|
+
Checks for API key in order:
|
|
92
|
+
1. ANTHROPIC_API_KEY environment variable
|
|
93
|
+
2. ~/.config/crucible/secrets.yaml
|
|
94
|
+
3. ~/.crucible/secrets.yaml
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Anthropic client
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ImportError: If anthropic package is not installed
|
|
101
|
+
ValueError: If API key not found in any location
|
|
102
|
+
"""
|
|
103
|
+
try:
|
|
104
|
+
import anthropic
|
|
105
|
+
except ImportError as e:
|
|
106
|
+
raise ImportError(
|
|
107
|
+
"anthropic package is required for LLM compliance checking. "
|
|
108
|
+
"Install with: pip install anthropic"
|
|
109
|
+
) from e
|
|
110
|
+
|
|
111
|
+
# Try env var first (standard for CI)
|
|
112
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
113
|
+
|
|
114
|
+
# Fall back to config file (convenient for local dev)
|
|
115
|
+
if not api_key:
|
|
116
|
+
api_key = _load_api_key_from_config()
|
|
117
|
+
|
|
118
|
+
if not api_key:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
"Anthropic API key not found. Set ANTHROPIC_API_KEY environment variable "
|
|
121
|
+
"or add to ~/.config/crucible/secrets.yaml:\n"
|
|
122
|
+
" anthropic_api_key: sk-ant-..."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return anthropic.Anthropic(api_key=api_key)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _build_user_prompt(assertion: Assertion, file_path: str, content: str) -> str:
|
|
129
|
+
"""Build user prompt for compliance check.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
assertion: The assertion to check
|
|
133
|
+
file_path: Path to the file being checked
|
|
134
|
+
content: File content
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Formatted user prompt
|
|
138
|
+
"""
|
|
139
|
+
return f"""## File: {file_path}
|
|
140
|
+
|
|
141
|
+
## Compliance Requirements
|
|
142
|
+
{assertion.compliance}
|
|
143
|
+
|
|
144
|
+
## Code to Analyze
|
|
145
|
+
```
|
|
146
|
+
{content}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Analyze this code against the compliance requirements and respond with JSON."""
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _parse_llm_response(
|
|
153
|
+
response_text: str,
|
|
154
|
+
assertion: Assertion,
|
|
155
|
+
file_path: str,
|
|
156
|
+
) -> tuple[list[EnforcementFinding], str | None]:
|
|
157
|
+
"""Parse LLM response into findings.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
response_text: Raw response from LLM
|
|
161
|
+
assertion: The assertion that was checked
|
|
162
|
+
file_path: Path to the file
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Tuple of (findings, reasoning)
|
|
166
|
+
"""
|
|
167
|
+
findings: list[EnforcementFinding] = []
|
|
168
|
+
reasoning = None
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
# Try to extract JSON from response
|
|
172
|
+
# Handle markdown code blocks
|
|
173
|
+
text = response_text.strip()
|
|
174
|
+
if text.startswith("```"):
|
|
175
|
+
# Remove markdown code block
|
|
176
|
+
lines = text.split("\n")
|
|
177
|
+
# Find first and last ``` lines
|
|
178
|
+
start = 0
|
|
179
|
+
end = len(lines)
|
|
180
|
+
for i, line in enumerate(lines):
|
|
181
|
+
if line.startswith("```") and i == 0:
|
|
182
|
+
start = i + 1
|
|
183
|
+
elif line.startswith("```") and i > 0:
|
|
184
|
+
end = i
|
|
185
|
+
break
|
|
186
|
+
text = "\n".join(lines[start:end])
|
|
187
|
+
|
|
188
|
+
data = json.loads(text)
|
|
189
|
+
|
|
190
|
+
reasoning = data.get("reasoning")
|
|
191
|
+
is_compliant = data.get("compliant", True)
|
|
192
|
+
|
|
193
|
+
if not is_compliant and "findings" in data:
|
|
194
|
+
for finding_data in data["findings"]:
|
|
195
|
+
line_num = finding_data.get("line")
|
|
196
|
+
issue = finding_data.get("issue", "Compliance issue detected")
|
|
197
|
+
severity = finding_data.get("severity", assertion.severity)
|
|
198
|
+
|
|
199
|
+
# Validate severity
|
|
200
|
+
if severity not in ("error", "warning", "info"):
|
|
201
|
+
severity = assertion.severity
|
|
202
|
+
|
|
203
|
+
location = f"{file_path}:{line_num}" if line_num else file_path
|
|
204
|
+
|
|
205
|
+
findings.append(
|
|
206
|
+
EnforcementFinding(
|
|
207
|
+
assertion_id=assertion.id,
|
|
208
|
+
message=issue,
|
|
209
|
+
severity=severity, # type: ignore[arg-type]
|
|
210
|
+
priority=assertion.priority,
|
|
211
|
+
location=location,
|
|
212
|
+
source="llm",
|
|
213
|
+
llm_reasoning=reasoning,
|
|
214
|
+
)
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
218
|
+
# If we can't parse the response, create a single finding with the raw response
|
|
219
|
+
findings.append(
|
|
220
|
+
EnforcementFinding(
|
|
221
|
+
assertion_id=assertion.id,
|
|
222
|
+
message=f"LLM compliance check failed to parse: {response_text[:200]}...",
|
|
223
|
+
severity="warning",
|
|
224
|
+
priority=assertion.priority,
|
|
225
|
+
location=file_path,
|
|
226
|
+
source="llm",
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
return findings, reasoning
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def run_single_assertion(
|
|
234
|
+
assertion: Assertion,
|
|
235
|
+
file_path: str,
|
|
236
|
+
content: str,
|
|
237
|
+
config: ComplianceConfig,
|
|
238
|
+
) -> LLMAssertionResult:
|
|
239
|
+
"""Run a single LLM assertion against file content.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
assertion: The assertion to run
|
|
243
|
+
file_path: Path to the file
|
|
244
|
+
content: File content
|
|
245
|
+
config: Compliance configuration
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
LLMAssertionResult with findings
|
|
249
|
+
"""
|
|
250
|
+
if assertion.type != AssertionType.LLM:
|
|
251
|
+
return LLMAssertionResult(
|
|
252
|
+
assertion_id=assertion.id,
|
|
253
|
+
passed=True,
|
|
254
|
+
findings=(),
|
|
255
|
+
tokens_used=0,
|
|
256
|
+
model_used="",
|
|
257
|
+
error="Not an LLM assertion",
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Determine model to use
|
|
261
|
+
model_name = assertion.model or config.model
|
|
262
|
+
model_id = MODEL_IDS.get(model_name, MODEL_IDS["sonnet"])
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
client = _get_anthropic_client()
|
|
266
|
+
|
|
267
|
+
user_prompt = _build_user_prompt(assertion, file_path, content)
|
|
268
|
+
|
|
269
|
+
response = client.messages.create(
|
|
270
|
+
model=model_id,
|
|
271
|
+
max_tokens=1024,
|
|
272
|
+
system=SYSTEM_PROMPT,
|
|
273
|
+
messages=[{"role": "user", "content": user_prompt}],
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Extract text from response
|
|
277
|
+
response_text = ""
|
|
278
|
+
for block in response.content:
|
|
279
|
+
if hasattr(block, "text"):
|
|
280
|
+
response_text += block.text
|
|
281
|
+
|
|
282
|
+
# Calculate tokens used
|
|
283
|
+
tokens_used = response.usage.input_tokens + response.usage.output_tokens
|
|
284
|
+
|
|
285
|
+
# Parse response
|
|
286
|
+
findings, reasoning = _parse_llm_response(response_text, assertion, file_path)
|
|
287
|
+
|
|
288
|
+
return LLMAssertionResult(
|
|
289
|
+
assertion_id=assertion.id,
|
|
290
|
+
passed=len(findings) == 0,
|
|
291
|
+
findings=tuple(findings),
|
|
292
|
+
tokens_used=tokens_used,
|
|
293
|
+
model_used=model_name,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
except ImportError as e:
|
|
297
|
+
return LLMAssertionResult(
|
|
298
|
+
assertion_id=assertion.id,
|
|
299
|
+
passed=True, # Don't fail on missing dependency
|
|
300
|
+
findings=(),
|
|
301
|
+
tokens_used=0,
|
|
302
|
+
model_used=model_name,
|
|
303
|
+
error=str(e),
|
|
304
|
+
)
|
|
305
|
+
except ValueError as e:
|
|
306
|
+
return LLMAssertionResult(
|
|
307
|
+
assertion_id=assertion.id,
|
|
308
|
+
passed=True, # Don't fail on missing API key
|
|
309
|
+
findings=(),
|
|
310
|
+
tokens_used=0,
|
|
311
|
+
model_used=model_name,
|
|
312
|
+
error=str(e),
|
|
313
|
+
)
|
|
314
|
+
except Exception as e:
|
|
315
|
+
return LLMAssertionResult(
|
|
316
|
+
assertion_id=assertion.id,
|
|
317
|
+
passed=True, # Don't fail on API errors
|
|
318
|
+
findings=(),
|
|
319
|
+
tokens_used=0,
|
|
320
|
+
model_used=model_name,
|
|
321
|
+
error=f"API error: {e}",
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def filter_applicable_assertions(
|
|
326
|
+
assertions: list[Assertion],
|
|
327
|
+
file_path: str,
|
|
328
|
+
) -> list[Assertion]:
|
|
329
|
+
"""Filter assertions to those applicable to the given file.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
assertions: All LLM assertions
|
|
333
|
+
file_path: File path to check
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Assertions applicable to this file
|
|
337
|
+
"""
|
|
338
|
+
applicable: list[Assertion] = []
|
|
339
|
+
|
|
340
|
+
for assertion in assertions:
|
|
341
|
+
# Check language applicability
|
|
342
|
+
if assertion.languages and not matches_language(file_path, assertion.languages):
|
|
343
|
+
continue
|
|
344
|
+
|
|
345
|
+
# Check glob applicability
|
|
346
|
+
if assertion.applicability and not matches_glob(
|
|
347
|
+
file_path,
|
|
348
|
+
assertion.applicability.glob,
|
|
349
|
+
assertion.applicability.exclude,
|
|
350
|
+
):
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
applicable.append(assertion)
|
|
354
|
+
|
|
355
|
+
return applicable
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def run_llm_assertions(
|
|
359
|
+
file_path: str,
|
|
360
|
+
content: str,
|
|
361
|
+
assertions: list[Assertion],
|
|
362
|
+
config: ComplianceConfig,
|
|
363
|
+
) -> tuple[list[EnforcementFinding], BudgetState, list[str]]:
|
|
364
|
+
"""Run LLM assertions against a file.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
file_path: Path to the file
|
|
368
|
+
content: File content
|
|
369
|
+
assertions: All assertions (will filter to LLM type)
|
|
370
|
+
config: Compliance configuration
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Tuple of (findings, budget_state, errors)
|
|
374
|
+
"""
|
|
375
|
+
if not config.enabled:
|
|
376
|
+
return [], create_budget_state(config), []
|
|
377
|
+
|
|
378
|
+
all_findings: list[EnforcementFinding] = []
|
|
379
|
+
errors: list[str] = []
|
|
380
|
+
|
|
381
|
+
# Prepare assertions (filter, sort, select within budget)
|
|
382
|
+
to_run, budget_state = prepare_llm_assertions(
|
|
383
|
+
assertions,
|
|
384
|
+
len(content),
|
|
385
|
+
config,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# Filter to applicable assertions for this file
|
|
389
|
+
applicable = filter_applicable_assertions(to_run, file_path)
|
|
390
|
+
|
|
391
|
+
# Run each applicable assertion
|
|
392
|
+
for assertion in applicable:
|
|
393
|
+
# Check if we still have budget
|
|
394
|
+
estimated = estimate_assertion_tokens(assertion, len(content))
|
|
395
|
+
if budget_state.total_budget > 0 and budget_state.tokens_used + estimated > budget_state.total_budget:
|
|
396
|
+
budget_state.skip(assertion.id)
|
|
397
|
+
|
|
398
|
+
if config.overflow_behavior == OverflowBehavior.FAIL:
|
|
399
|
+
errors.append(
|
|
400
|
+
f"Token budget exceeded before running '{assertion.id}'. "
|
|
401
|
+
f"Used: {budget_state.tokens_used}, Budget: {budget_state.total_budget}"
|
|
402
|
+
)
|
|
403
|
+
break
|
|
404
|
+
elif config.overflow_behavior == OverflowBehavior.WARN:
|
|
405
|
+
errors.append(
|
|
406
|
+
f"Skipped '{assertion.id}' due to token budget. "
|
|
407
|
+
f"Used: {budget_state.tokens_used}, Budget: {budget_state.total_budget}"
|
|
408
|
+
)
|
|
409
|
+
continue
|
|
410
|
+
|
|
411
|
+
# Run the assertion
|
|
412
|
+
result = run_single_assertion(assertion, file_path, content, config)
|
|
413
|
+
|
|
414
|
+
# Update budget state
|
|
415
|
+
budget_state.consume(result.tokens_used)
|
|
416
|
+
|
|
417
|
+
# Collect findings
|
|
418
|
+
all_findings.extend(result.findings)
|
|
419
|
+
|
|
420
|
+
# Record errors
|
|
421
|
+
if result.error:
|
|
422
|
+
errors.append(f"{assertion.id}: {result.error}")
|
|
423
|
+
|
|
424
|
+
return all_findings, budget_state, errors
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def run_llm_assertions_batch(
|
|
428
|
+
files: list[tuple[str, str]],
|
|
429
|
+
assertions: list[Assertion],
|
|
430
|
+
config: ComplianceConfig,
|
|
431
|
+
) -> tuple[list[EnforcementFinding], BudgetState, list[str]]:
|
|
432
|
+
"""Run LLM assertions against multiple files with shared budget.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
files: List of (file_path, content) tuples
|
|
436
|
+
assertions: All assertions
|
|
437
|
+
config: Compliance configuration
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
Tuple of (all_findings, budget_state, errors)
|
|
441
|
+
"""
|
|
442
|
+
if not config.enabled:
|
|
443
|
+
return [], create_budget_state(config), []
|
|
444
|
+
|
|
445
|
+
all_findings: list[EnforcementFinding] = []
|
|
446
|
+
all_errors: list[str] = []
|
|
447
|
+
|
|
448
|
+
# Calculate total content length for budget estimation
|
|
449
|
+
total_content_length = sum(len(content) for _, content in files)
|
|
450
|
+
|
|
451
|
+
# Prepare assertions with total budget
|
|
452
|
+
to_run, budget_state = prepare_llm_assertions(
|
|
453
|
+
assertions,
|
|
454
|
+
total_content_length // max(1, len(files)), # Average per file
|
|
455
|
+
config,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# Process each file
|
|
459
|
+
for file_path, content in files:
|
|
460
|
+
applicable = filter_applicable_assertions(to_run, file_path)
|
|
461
|
+
|
|
462
|
+
for assertion in applicable:
|
|
463
|
+
# Check budget before each assertion
|
|
464
|
+
estimated = estimate_assertion_tokens(assertion, len(content))
|
|
465
|
+
if budget_state.total_budget > 0 and budget_state.tokens_used + estimated > budget_state.total_budget:
|
|
466
|
+
budget_state.skip(assertion.id)
|
|
467
|
+
|
|
468
|
+
if config.overflow_behavior == OverflowBehavior.FAIL:
|
|
469
|
+
all_errors.append(
|
|
470
|
+
f"Token budget exceeded at '{file_path}' before '{assertion.id}'"
|
|
471
|
+
)
|
|
472
|
+
return all_findings, budget_state, all_errors
|
|
473
|
+
elif config.overflow_behavior == OverflowBehavior.WARN:
|
|
474
|
+
all_errors.append(
|
|
475
|
+
f"Skipped '{assertion.id}' on '{file_path}' due to budget"
|
|
476
|
+
)
|
|
477
|
+
continue
|
|
478
|
+
|
|
479
|
+
result = run_single_assertion(assertion, file_path, content, config)
|
|
480
|
+
budget_state.consume(result.tokens_used)
|
|
481
|
+
all_findings.extend(result.findings)
|
|
482
|
+
|
|
483
|
+
if result.error:
|
|
484
|
+
all_errors.append(f"{file_path}:{assertion.id}: {result.error}")
|
|
485
|
+
|
|
486
|
+
return all_findings, budget_state, all_errors
|