agent-audit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_audit/__init__.py +3 -0
- agent_audit/__main__.py +13 -0
- agent_audit/cli/__init__.py +1 -0
- agent_audit/cli/commands/__init__.py +1 -0
- agent_audit/cli/commands/init.py +44 -0
- agent_audit/cli/commands/inspect.py +236 -0
- agent_audit/cli/commands/scan.py +329 -0
- agent_audit/cli/formatters/__init__.py +1 -0
- agent_audit/cli/formatters/json.py +138 -0
- agent_audit/cli/formatters/sarif.py +155 -0
- agent_audit/cli/formatters/terminal.py +221 -0
- agent_audit/cli/main.py +34 -0
- agent_audit/config/__init__.py +1 -0
- agent_audit/config/ignore.py +477 -0
- agent_audit/core_utils/__init__.py +1 -0
- agent_audit/models/__init__.py +18 -0
- agent_audit/models/finding.py +159 -0
- agent_audit/models/risk.py +77 -0
- agent_audit/models/tool.py +182 -0
- agent_audit/rules/__init__.py +6 -0
- agent_audit/rules/engine.py +503 -0
- agent_audit/rules/loader.py +160 -0
- agent_audit/scanners/__init__.py +5 -0
- agent_audit/scanners/base.py +32 -0
- agent_audit/scanners/config_scanner.py +390 -0
- agent_audit/scanners/mcp_config_scanner.py +321 -0
- agent_audit/scanners/mcp_inspector.py +421 -0
- agent_audit/scanners/python_scanner.py +544 -0
- agent_audit/scanners/secret_scanner.py +521 -0
- agent_audit/utils/__init__.py +21 -0
- agent_audit/utils/compat.py +98 -0
- agent_audit/utils/mcp_client.py +343 -0
- agent_audit/version.py +3 -0
- agent_audit-0.1.0.dist-info/METADATA +219 -0
- agent_audit-0.1.0.dist-info/RECORD +37 -0
- agent_audit-0.1.0.dist-info/WHEEL +4 -0
- agent_audit-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
"""Rule engine for pattern matching and finding generation."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any, List, Optional
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
|
|
9
|
+
from agent_audit.models.finding import Finding, Remediation
|
|
10
|
+
from agent_audit.models.risk import Severity, Category, Location
|
|
11
|
+
from agent_audit.rules.loader import RuleLoader
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class MatchContext:
|
|
18
|
+
"""Context for rule matching."""
|
|
19
|
+
file_path: str
|
|
20
|
+
source_code: Optional[str] = None
|
|
21
|
+
dangerous_patterns: List[Dict[str, Any]] = field(default_factory=list)
|
|
22
|
+
tools: List[Any] = field(default_factory=list)
|
|
23
|
+
mcp_servers: List[Dict[str, Any]] = field(default_factory=list)
|
|
24
|
+
function_calls: List[Dict[str, Any]] = field(default_factory=list)
|
|
25
|
+
imports: List[str] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RuleEngine:
|
|
29
|
+
"""
|
|
30
|
+
Rule engine for matching patterns and generating findings.
|
|
31
|
+
|
|
32
|
+
Loads rules from YAML files and applies them to scan results.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# Pre-compiled regex patterns for common detections
|
|
36
|
+
CREDENTIAL_PATTERNS = [
|
|
37
|
+
(re.compile(r'AKIA[0-9A-Z]{16}'), "AWS Access Key"),
|
|
38
|
+
(re.compile(r'sk-[a-zA-Z0-9]{48}'), "OpenAI API Key"),
|
|
39
|
+
(re.compile(r'sk-ant-[a-zA-Z0-9]{40,}'), "Anthropic API Key"),
|
|
40
|
+
(re.compile(r'ghp_[a-zA-Z0-9]{36}'), "GitHub Personal Access Token"),
|
|
41
|
+
(re.compile(r'gho_[a-zA-Z0-9]{36}'), "GitHub OAuth Token"),
|
|
42
|
+
(re.compile(r'(?i)(api[_-]?key|secret|password|token)\s*[=:]\s*["\']?[a-zA-Z0-9]{20,}'),
|
|
43
|
+
"Generic API Key/Secret"),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
def __init__(self, rules_dirs: Optional[List[Path]] = None):
|
|
47
|
+
"""
|
|
48
|
+
Initialize the rule engine.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
rules_dirs: List of directories containing rule files
|
|
52
|
+
"""
|
|
53
|
+
self.loader = RuleLoader(rules_dirs)
|
|
54
|
+
self._rules: Dict[str, Dict[str, Any]] = {}
|
|
55
|
+
|
|
56
|
+
def load_rules(self, additional_dirs: Optional[List[Path]] = None):
|
|
57
|
+
"""Load rules from all configured directories."""
|
|
58
|
+
if additional_dirs:
|
|
59
|
+
for d in additional_dirs:
|
|
60
|
+
self.loader.add_rules_directory(d)
|
|
61
|
+
|
|
62
|
+
self._rules = self.loader.load_all_rules()
|
|
63
|
+
logger.info(f"Loaded {len(self._rules)} rules")
|
|
64
|
+
|
|
65
|
+
def add_builtin_rules_dir(self, builtin_dir: Path):
|
|
66
|
+
"""Add the builtin rules directory."""
|
|
67
|
+
self.loader.add_rules_directory(builtin_dir)
|
|
68
|
+
|
|
69
|
+
def evaluate(self, context: MatchContext) -> List[Finding]:
|
|
70
|
+
"""
|
|
71
|
+
Evaluate all rules against a match context.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
context: The context containing scan results to check
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
List of findings from matched rules
|
|
78
|
+
"""
|
|
79
|
+
findings: List[Finding] = []
|
|
80
|
+
|
|
81
|
+
for rule_id, rule in self._rules.items():
|
|
82
|
+
rule_findings = self._evaluate_rule(rule, context)
|
|
83
|
+
findings.extend(rule_findings)
|
|
84
|
+
|
|
85
|
+
return findings
|
|
86
|
+
|
|
87
|
+
def evaluate_dangerous_patterns(
|
|
88
|
+
self,
|
|
89
|
+
patterns: List[Dict[str, Any]],
|
|
90
|
+
file_path: str
|
|
91
|
+
) -> List[Finding]:
|
|
92
|
+
"""
|
|
93
|
+
Evaluate dangerous patterns detected by scanners.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
patterns: List of dangerous patterns from scanner
|
|
97
|
+
file_path: Source file path
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
List of findings
|
|
101
|
+
"""
|
|
102
|
+
findings: List[Finding] = []
|
|
103
|
+
|
|
104
|
+
for pattern in patterns:
|
|
105
|
+
pattern_type = pattern.get('type', '')
|
|
106
|
+
|
|
107
|
+
if pattern_type == 'shell_true' or pattern_type == 'dangerous_function_call':
|
|
108
|
+
# Check if this matches AGENT-001 (Command Injection)
|
|
109
|
+
if self._is_command_injection(pattern):
|
|
110
|
+
finding = self._create_finding_from_pattern(
|
|
111
|
+
rule_id="AGENT-001",
|
|
112
|
+
pattern=pattern,
|
|
113
|
+
file_path=file_path
|
|
114
|
+
)
|
|
115
|
+
if finding:
|
|
116
|
+
findings.append(finding)
|
|
117
|
+
|
|
118
|
+
return findings
|
|
119
|
+
|
|
120
|
+
def evaluate_credentials(
|
|
121
|
+
self,
|
|
122
|
+
content: str,
|
|
123
|
+
file_path: str
|
|
124
|
+
) -> List[Finding]:
|
|
125
|
+
"""
|
|
126
|
+
Check content for hardcoded credentials.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
content: File content to scan
|
|
130
|
+
file_path: Source file path
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
List of credential exposure findings
|
|
134
|
+
"""
|
|
135
|
+
findings: List[Finding] = []
|
|
136
|
+
lines = content.splitlines()
|
|
137
|
+
|
|
138
|
+
for pattern, description in self.CREDENTIAL_PATTERNS:
|
|
139
|
+
for line_num, line in enumerate(lines, start=1):
|
|
140
|
+
matches = pattern.finditer(line)
|
|
141
|
+
for match in matches:
|
|
142
|
+
finding = Finding(
|
|
143
|
+
rule_id="AGENT-004",
|
|
144
|
+
title="Hardcoded Credentials",
|
|
145
|
+
description=f"Found {description} in source code",
|
|
146
|
+
severity=Severity.CRITICAL,
|
|
147
|
+
category=Category.CREDENTIAL_EXPOSURE,
|
|
148
|
+
location=Location(
|
|
149
|
+
file_path=file_path,
|
|
150
|
+
start_line=line_num,
|
|
151
|
+
end_line=line_num,
|
|
152
|
+
start_column=match.start(),
|
|
153
|
+
end_column=match.end(),
|
|
154
|
+
snippet=self._mask_credential(line)
|
|
155
|
+
),
|
|
156
|
+
cwe_id="CWE-798",
|
|
157
|
+
remediation=Remediation(
|
|
158
|
+
description="Use environment variables or a secrets manager",
|
|
159
|
+
code_example="api_key = os.environ.get('API_KEY')"
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
findings.append(finding)
|
|
163
|
+
|
|
164
|
+
return findings
|
|
165
|
+
|
|
166
|
+
def evaluate_mcp_config(
|
|
167
|
+
self,
|
|
168
|
+
servers: List[Dict[str, Any]],
|
|
169
|
+
file_path: str
|
|
170
|
+
) -> List[Finding]:
|
|
171
|
+
"""
|
|
172
|
+
Evaluate MCP server configurations for security issues.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
servers: List of MCP server configurations
|
|
176
|
+
file_path: Config file path
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of findings
|
|
180
|
+
"""
|
|
181
|
+
findings: List[Finding] = []
|
|
182
|
+
|
|
183
|
+
for server in servers:
|
|
184
|
+
server_name = server.get('name', 'unknown')
|
|
185
|
+
server_url = server.get('url', '')
|
|
186
|
+
env_vars = server.get('env', {})
|
|
187
|
+
is_verified = server.get('verified', False)
|
|
188
|
+
|
|
189
|
+
# Check for unverified servers (AGENT-005)
|
|
190
|
+
if not is_verified and server_url:
|
|
191
|
+
if not self._is_trusted_source(server_url):
|
|
192
|
+
finding = Finding(
|
|
193
|
+
rule_id="AGENT-005",
|
|
194
|
+
title="Unverified MCP Server",
|
|
195
|
+
description=f"MCP server '{server_name}' lacks signature verification",
|
|
196
|
+
severity=Severity.HIGH,
|
|
197
|
+
category=Category.SUPPLY_CHAIN,
|
|
198
|
+
location=Location(
|
|
199
|
+
file_path=file_path,
|
|
200
|
+
start_line=server.get('_line', 1),
|
|
201
|
+
end_line=server.get('_line', 1),
|
|
202
|
+
snippet=f"server: {server_name}"
|
|
203
|
+
),
|
|
204
|
+
cwe_id="CWE-494",
|
|
205
|
+
remediation=Remediation(
|
|
206
|
+
description="Use verified MCP servers from trusted registries",
|
|
207
|
+
reference_url="https://modelcontextprotocol.io/docs/security"
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
findings.append(finding)
|
|
211
|
+
|
|
212
|
+
# Check for credentials in env vars (AGENT-004)
|
|
213
|
+
for key, value in env_vars.items():
|
|
214
|
+
if isinstance(value, str):
|
|
215
|
+
for pattern, description in self.CREDENTIAL_PATTERNS:
|
|
216
|
+
if pattern.search(value):
|
|
217
|
+
finding = Finding(
|
|
218
|
+
rule_id="AGENT-004",
|
|
219
|
+
title="Hardcoded Credentials in MCP Config",
|
|
220
|
+
description=f"Found {description} in environment variable '{key}'",
|
|
221
|
+
severity=Severity.CRITICAL,
|
|
222
|
+
category=Category.CREDENTIAL_EXPOSURE,
|
|
223
|
+
location=Location(
|
|
224
|
+
file_path=file_path,
|
|
225
|
+
start_line=server.get('_line', 1),
|
|
226
|
+
end_line=server.get('_line', 1),
|
|
227
|
+
snippet=f"{key}=***REDACTED***"
|
|
228
|
+
),
|
|
229
|
+
cwe_id="CWE-798",
|
|
230
|
+
remediation=Remediation(
|
|
231
|
+
description="Use environment variables from the host system"
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
findings.append(finding)
|
|
235
|
+
|
|
236
|
+
return findings
|
|
237
|
+
|
|
238
|
+
def evaluate_permission_scope(
|
|
239
|
+
self,
|
|
240
|
+
tools: List[Any],
|
|
241
|
+
file_path: str
|
|
242
|
+
) -> List[Finding]:
|
|
243
|
+
"""
|
|
244
|
+
Check for excessive tool permissions (AGENT-002).
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
tools: List of tool definitions
|
|
248
|
+
file_path: Source file path
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
List of findings
|
|
252
|
+
"""
|
|
253
|
+
findings: List[Finding] = []
|
|
254
|
+
|
|
255
|
+
if len(tools) > 15:
|
|
256
|
+
finding = Finding(
|
|
257
|
+
rule_id="AGENT-002",
|
|
258
|
+
title="Excessive Agent Permissions",
|
|
259
|
+
description=f"Agent has {len(tools)} tools configured, which may be excessive",
|
|
260
|
+
severity=Severity.MEDIUM,
|
|
261
|
+
category=Category.EXCESSIVE_PERMISSION,
|
|
262
|
+
location=Location(
|
|
263
|
+
file_path=file_path,
|
|
264
|
+
start_line=1,
|
|
265
|
+
end_line=1,
|
|
266
|
+
snippet=f"Total tools: {len(tools)}"
|
|
267
|
+
),
|
|
268
|
+
cwe_id="CWE-250",
|
|
269
|
+
confidence=0.7, # Lower confidence as this is heuristic
|
|
270
|
+
remediation=Remediation(
|
|
271
|
+
description="Consider splitting into multiple specialized agents"
|
|
272
|
+
)
|
|
273
|
+
)
|
|
274
|
+
findings.append(finding)
|
|
275
|
+
|
|
276
|
+
# Check for high-risk permission combinations
|
|
277
|
+
high_risk_count = sum(
|
|
278
|
+
1 for t in tools
|
|
279
|
+
if hasattr(t, 'calculate_risk_score') and t.calculate_risk_score() > 5.0
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if high_risk_count > 5:
|
|
283
|
+
finding = Finding(
|
|
284
|
+
rule_id="AGENT-002",
|
|
285
|
+
title="Multiple High-Risk Tools",
|
|
286
|
+
description=f"Agent has {high_risk_count} high-risk tools",
|
|
287
|
+
severity=Severity.HIGH,
|
|
288
|
+
category=Category.EXCESSIVE_PERMISSION,
|
|
289
|
+
location=Location(
|
|
290
|
+
file_path=file_path,
|
|
291
|
+
start_line=1,
|
|
292
|
+
end_line=1,
|
|
293
|
+
snippet=f"High-risk tools: {high_risk_count}"
|
|
294
|
+
),
|
|
295
|
+
cwe_id="CWE-250",
|
|
296
|
+
remediation=Remediation(
|
|
297
|
+
description="Review and reduce high-risk tool permissions"
|
|
298
|
+
)
|
|
299
|
+
)
|
|
300
|
+
findings.append(finding)
|
|
301
|
+
|
|
302
|
+
return findings
|
|
303
|
+
|
|
304
|
+
def _evaluate_rule(
|
|
305
|
+
self,
|
|
306
|
+
rule: Dict[str, Any],
|
|
307
|
+
context: MatchContext
|
|
308
|
+
) -> List[Finding]:
|
|
309
|
+
"""Evaluate a single rule against the context."""
|
|
310
|
+
findings: List[Finding] = []
|
|
311
|
+
|
|
312
|
+
detection = rule.get('detection', {})
|
|
313
|
+
patterns = detection.get('patterns', [])
|
|
314
|
+
|
|
315
|
+
for pattern in patterns:
|
|
316
|
+
pattern_type = pattern.get('type', '')
|
|
317
|
+
|
|
318
|
+
if pattern_type == 'python_ast':
|
|
319
|
+
# Match against dangerous patterns from Python scanner
|
|
320
|
+
matches = self._match_python_ast_pattern(
|
|
321
|
+
pattern,
|
|
322
|
+
context.dangerous_patterns
|
|
323
|
+
)
|
|
324
|
+
for match in matches:
|
|
325
|
+
finding = self._create_finding_from_rule(
|
|
326
|
+
rule, context.file_path, match
|
|
327
|
+
)
|
|
328
|
+
findings.append(finding)
|
|
329
|
+
|
|
330
|
+
elif pattern_type == 'function_call':
|
|
331
|
+
# Match against function calls
|
|
332
|
+
matches = self._match_function_calls(
|
|
333
|
+
pattern,
|
|
334
|
+
context.function_calls
|
|
335
|
+
)
|
|
336
|
+
for match in matches:
|
|
337
|
+
finding = self._create_finding_from_rule(
|
|
338
|
+
rule, context.file_path, match
|
|
339
|
+
)
|
|
340
|
+
findings.append(finding)
|
|
341
|
+
|
|
342
|
+
elif pattern_type == 'regex':
|
|
343
|
+
# Match against source code
|
|
344
|
+
if context.source_code:
|
|
345
|
+
matches = self._match_regex_patterns(
|
|
346
|
+
pattern.get('patterns', []),
|
|
347
|
+
context.source_code
|
|
348
|
+
)
|
|
349
|
+
for match in matches:
|
|
350
|
+
finding = self._create_finding_from_rule(
|
|
351
|
+
rule, context.file_path, match
|
|
352
|
+
)
|
|
353
|
+
findings.append(finding)
|
|
354
|
+
|
|
355
|
+
return findings
|
|
356
|
+
|
|
357
|
+
def _match_python_ast_pattern(
|
|
358
|
+
self,
|
|
359
|
+
pattern: Dict[str, Any],
|
|
360
|
+
dangerous_patterns: List[Dict[str, Any]]
|
|
361
|
+
) -> List[Dict[str, Any]]:
|
|
362
|
+
"""Match Python AST patterns."""
|
|
363
|
+
matches = []
|
|
364
|
+
match_patterns = pattern.get('match', [])
|
|
365
|
+
|
|
366
|
+
for dp in dangerous_patterns:
|
|
367
|
+
func_name = dp.get('function', '')
|
|
368
|
+
for mp in match_patterns:
|
|
369
|
+
# Simple substring matching for now
|
|
370
|
+
if func_name in mp or mp.split('(')[0] in func_name:
|
|
371
|
+
matches.append(dp)
|
|
372
|
+
break
|
|
373
|
+
|
|
374
|
+
return matches
|
|
375
|
+
|
|
376
|
+
def _match_function_calls(
|
|
377
|
+
self,
|
|
378
|
+
pattern: Dict[str, Any],
|
|
379
|
+
function_calls: List[Dict[str, Any]]
|
|
380
|
+
) -> List[Dict[str, Any]]:
|
|
381
|
+
"""Match function call patterns."""
|
|
382
|
+
matches = []
|
|
383
|
+
target_functions = pattern.get('functions', [])
|
|
384
|
+
required_args = pattern.get('arguments', {})
|
|
385
|
+
|
|
386
|
+
for call in function_calls:
|
|
387
|
+
call_name = call.get('name', '')
|
|
388
|
+
if call_name in target_functions:
|
|
389
|
+
# Check required arguments if specified
|
|
390
|
+
if required_args:
|
|
391
|
+
# This would need the actual call arguments
|
|
392
|
+
# For now, just match on function name
|
|
393
|
+
pass
|
|
394
|
+
matches.append(call)
|
|
395
|
+
|
|
396
|
+
return matches
|
|
397
|
+
|
|
398
|
+
def _match_regex_patterns(
|
|
399
|
+
self,
|
|
400
|
+
patterns: List[str],
|
|
401
|
+
content: str
|
|
402
|
+
) -> List[Dict[str, Any]]:
|
|
403
|
+
"""Match regex patterns against content."""
|
|
404
|
+
matches = []
|
|
405
|
+
lines = content.splitlines()
|
|
406
|
+
|
|
407
|
+
for pattern_str in patterns:
|
|
408
|
+
try:
|
|
409
|
+
pattern = re.compile(pattern_str)
|
|
410
|
+
for line_num, line in enumerate(lines, start=1):
|
|
411
|
+
if pattern.search(line):
|
|
412
|
+
matches.append({
|
|
413
|
+
'line': line_num,
|
|
414
|
+
'snippet': line.strip(),
|
|
415
|
+
'pattern': pattern_str
|
|
416
|
+
})
|
|
417
|
+
except re.error as e:
|
|
418
|
+
logger.warning(f"Invalid regex pattern: {pattern_str}: {e}")
|
|
419
|
+
|
|
420
|
+
return matches
|
|
421
|
+
|
|
422
|
+
def _is_command_injection(self, pattern: Dict[str, Any]) -> bool:
|
|
423
|
+
"""Check if a pattern represents command injection."""
|
|
424
|
+
func_name = pattern.get('function', '')
|
|
425
|
+
has_tainted = pattern.get('has_tainted_input', False)
|
|
426
|
+
|
|
427
|
+
# shell=True is always risky
|
|
428
|
+
if pattern.get('type') == 'shell_true':
|
|
429
|
+
return True
|
|
430
|
+
|
|
431
|
+
# Dangerous functions with tainted input
|
|
432
|
+
dangerous_funcs = ['os.system', 'eval', 'exec', 'os.popen']
|
|
433
|
+
if func_name in dangerous_funcs and has_tainted:
|
|
434
|
+
return True
|
|
435
|
+
|
|
436
|
+
return False
|
|
437
|
+
|
|
438
|
+
def _create_finding_from_pattern(
|
|
439
|
+
self,
|
|
440
|
+
rule_id: str,
|
|
441
|
+
pattern: Dict[str, Any],
|
|
442
|
+
file_path: str
|
|
443
|
+
) -> Optional[Finding]:
|
|
444
|
+
"""Create a finding from a matched pattern."""
|
|
445
|
+
rule = self._rules.get(rule_id)
|
|
446
|
+
if not rule:
|
|
447
|
+
return None
|
|
448
|
+
|
|
449
|
+
return self._create_finding_from_rule(rule, file_path, pattern)
|
|
450
|
+
|
|
451
|
+
def _create_finding_from_rule(
|
|
452
|
+
self,
|
|
453
|
+
rule: Dict[str, Any],
|
|
454
|
+
file_path: str,
|
|
455
|
+
match: Dict[str, Any]
|
|
456
|
+
) -> Finding:
|
|
457
|
+
"""Create a Finding from a rule and match."""
|
|
458
|
+
remediation_data = rule.get('remediation', {})
|
|
459
|
+
remediation = None
|
|
460
|
+
if remediation_data:
|
|
461
|
+
remediation = Remediation(
|
|
462
|
+
description=remediation_data.get('description', ''),
|
|
463
|
+
code_example=remediation_data.get('code_example'),
|
|
464
|
+
reference_url=remediation_data.get('references', [None])[0]
|
|
465
|
+
if remediation_data.get('references') else None
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
return Finding(
|
|
469
|
+
rule_id=rule['id'],
|
|
470
|
+
title=rule['title'],
|
|
471
|
+
description=rule.get('description', ''),
|
|
472
|
+
severity=Severity(rule['severity'].lower()),
|
|
473
|
+
category=Category(rule['category'].lower()),
|
|
474
|
+
location=Location(
|
|
475
|
+
file_path=file_path,
|
|
476
|
+
start_line=match.get('line', 1),
|
|
477
|
+
end_line=match.get('line', 1),
|
|
478
|
+
snippet=match.get('snippet', '')
|
|
479
|
+
),
|
|
480
|
+
cwe_id=rule.get('cwe_id'),
|
|
481
|
+
owasp_id=rule.get('owasp_id'),
|
|
482
|
+
remediation=remediation,
|
|
483
|
+
confidence=match.get('confidence', 1.0)
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
def _is_trusted_source(self, url: str) -> bool:
|
|
487
|
+
"""Check if a URL is from a trusted source."""
|
|
488
|
+
trusted_prefixes = [
|
|
489
|
+
"docker.io/mcp-catalog/",
|
|
490
|
+
"ghcr.io/anthropics/",
|
|
491
|
+
"ghcr.io/modelcontextprotocol/",
|
|
492
|
+
]
|
|
493
|
+
return any(url.startswith(prefix) for prefix in trusted_prefixes)
|
|
494
|
+
|
|
495
|
+
def _mask_credential(self, line: str) -> str:
|
|
496
|
+
"""Mask credentials in a line for safe display."""
|
|
497
|
+
# Replace potential credentials with asterisks
|
|
498
|
+
masked = re.sub(
|
|
499
|
+
r'(["\']?)([A-Za-z0-9_-]{20,})(["\']?)',
|
|
500
|
+
r'\1***REDACTED***\3',
|
|
501
|
+
line
|
|
502
|
+
)
|
|
503
|
+
return masked.strip()
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""YAML rule loader for Agent Security Suite."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Any, List, Optional
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RuleLoader:
|
|
12
|
+
"""
|
|
13
|
+
Loader for YAML rule files.
|
|
14
|
+
|
|
15
|
+
Discovers and parses .yaml files from a rules directory.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, rules_dirs: Optional[List[Path]] = None):
|
|
19
|
+
"""
|
|
20
|
+
Initialize the rule loader.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
rules_dirs: List of directories to search for rules.
|
|
24
|
+
If None, uses default builtin rules directory.
|
|
25
|
+
"""
|
|
26
|
+
self.rules_dirs = rules_dirs or []
|
|
27
|
+
self._rules_cache: Dict[str, Dict[str, Any]] = {}
|
|
28
|
+
|
|
29
|
+
def add_rules_directory(self, path: Path):
|
|
30
|
+
"""Add a directory to search for rules."""
|
|
31
|
+
if path.exists() and path.is_dir():
|
|
32
|
+
self.rules_dirs.append(path)
|
|
33
|
+
|
|
34
|
+
def load_all_rules(self) -> Dict[str, Dict[str, Any]]:
|
|
35
|
+
"""
|
|
36
|
+
Load all rules from configured directories.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Dictionary mapping rule_id to rule definition.
|
|
40
|
+
"""
|
|
41
|
+
all_rules: Dict[str, Dict[str, Any]] = {}
|
|
42
|
+
|
|
43
|
+
for rules_dir in self.rules_dirs:
|
|
44
|
+
rules = self._load_rules_from_directory(rules_dir)
|
|
45
|
+
all_rules.update(rules)
|
|
46
|
+
|
|
47
|
+
self._rules_cache = all_rules
|
|
48
|
+
return all_rules
|
|
49
|
+
|
|
50
|
+
def load_rule_file(self, file_path: Path) -> Dict[str, Dict[str, Any]]:
|
|
51
|
+
"""
|
|
52
|
+
Load rules from a single YAML file.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
file_path: Path to the YAML rule file
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Dictionary mapping rule_id to rule definition
|
|
59
|
+
"""
|
|
60
|
+
rules: Dict[str, Dict[str, Any]] = {}
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
64
|
+
data = yaml.safe_load(f)
|
|
65
|
+
|
|
66
|
+
if not data or 'rules' not in data:
|
|
67
|
+
logger.warning(f"No rules found in {file_path}")
|
|
68
|
+
return rules
|
|
69
|
+
|
|
70
|
+
for rule in data['rules']:
|
|
71
|
+
rule_id = rule.get('id')
|
|
72
|
+
if not rule_id:
|
|
73
|
+
logger.warning(f"Rule without id in {file_path}")
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
# Validate required fields
|
|
77
|
+
if not self._validate_rule(rule, file_path):
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
# Add source file reference
|
|
81
|
+
rule['_source_file'] = str(file_path)
|
|
82
|
+
rules[rule_id] = rule
|
|
83
|
+
|
|
84
|
+
except yaml.YAMLError as e:
|
|
85
|
+
logger.error(f"YAML parse error in {file_path}: {e}")
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error(f"Error loading {file_path}: {e}")
|
|
88
|
+
|
|
89
|
+
return rules
|
|
90
|
+
|
|
91
|
+
def _load_rules_from_directory(self, rules_dir: Path) -> Dict[str, Dict[str, Any]]:
|
|
92
|
+
"""Load all rules from a directory."""
|
|
93
|
+
rules: Dict[str, Dict[str, Any]] = {}
|
|
94
|
+
|
|
95
|
+
if not rules_dir.exists():
|
|
96
|
+
logger.warning(f"Rules directory does not exist: {rules_dir}")
|
|
97
|
+
return rules
|
|
98
|
+
|
|
99
|
+
for yaml_file in rules_dir.glob("**/*.yaml"):
|
|
100
|
+
file_rules = self.load_rule_file(yaml_file)
|
|
101
|
+
rules.update(file_rules)
|
|
102
|
+
|
|
103
|
+
for yml_file in rules_dir.glob("**/*.yml"):
|
|
104
|
+
file_rules = self.load_rule_file(yml_file)
|
|
105
|
+
rules.update(file_rules)
|
|
106
|
+
|
|
107
|
+
return rules
|
|
108
|
+
|
|
109
|
+
def _validate_rule(self, rule: Dict[str, Any], source_file: Path) -> bool:
|
|
110
|
+
"""
|
|
111
|
+
Validate a rule definition has required fields.
|
|
112
|
+
|
|
113
|
+
Returns True if valid, False otherwise.
|
|
114
|
+
"""
|
|
115
|
+
required_fields = ['id', 'title', 'severity', 'category']
|
|
116
|
+
|
|
117
|
+
for field in required_fields:
|
|
118
|
+
if field not in rule:
|
|
119
|
+
logger.warning(
|
|
120
|
+
f"Rule missing required field '{field}' in {source_file}"
|
|
121
|
+
)
|
|
122
|
+
return False
|
|
123
|
+
|
|
124
|
+
# Validate severity
|
|
125
|
+
valid_severities = {'critical', 'high', 'medium', 'low', 'info'}
|
|
126
|
+
if rule.get('severity', '').lower() not in valid_severities:
|
|
127
|
+
logger.warning(
|
|
128
|
+
f"Invalid severity '{rule.get('severity')}' in rule {rule['id']}"
|
|
129
|
+
)
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
def get_rule(self, rule_id: str) -> Optional[Dict[str, Any]]:
|
|
135
|
+
"""Get a rule by its ID."""
|
|
136
|
+
if not self._rules_cache:
|
|
137
|
+
self.load_all_rules()
|
|
138
|
+
return self._rules_cache.get(rule_id)
|
|
139
|
+
|
|
140
|
+
def get_rules_by_category(self, category: str) -> List[Dict[str, Any]]:
|
|
141
|
+
"""Get all rules in a category."""
|
|
142
|
+
if not self._rules_cache:
|
|
143
|
+
self.load_all_rules()
|
|
144
|
+
return [
|
|
145
|
+
rule for rule in self._rules_cache.values()
|
|
146
|
+
if rule.get('category') == category
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
def get_rules_by_severity(self, severity: str) -> List[Dict[str, Any]]:
|
|
150
|
+
"""Get all rules at or above a severity level."""
|
|
151
|
+
if not self._rules_cache:
|
|
152
|
+
self.load_all_rules()
|
|
153
|
+
|
|
154
|
+
severity_order = ['info', 'low', 'medium', 'high', 'critical']
|
|
155
|
+
min_index = severity_order.index(severity.lower())
|
|
156
|
+
|
|
157
|
+
return [
|
|
158
|
+
rule for rule in self._rules_cache.values()
|
|
159
|
+
if severity_order.index(rule.get('severity', 'info').lower()) >= min_index
|
|
160
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Base scanner interface."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Any, Dict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ScanResult:
|
|
11
|
+
"""Base scan result."""
|
|
12
|
+
source_file: str
|
|
13
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseScanner(ABC):
|
|
17
|
+
"""Abstract base class for all scanners."""
|
|
18
|
+
|
|
19
|
+
name: str = "BaseScanner"
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def scan(self, path: Path) -> List[ScanResult]:
|
|
23
|
+
"""
|
|
24
|
+
Scan the given path and return results.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
path: Path to scan (file or directory)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
List of scan results
|
|
31
|
+
"""
|
|
32
|
+
pass
|