agent-audit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. agent_audit/__init__.py +3 -0
  2. agent_audit/__main__.py +13 -0
  3. agent_audit/cli/__init__.py +1 -0
  4. agent_audit/cli/commands/__init__.py +1 -0
  5. agent_audit/cli/commands/init.py +44 -0
  6. agent_audit/cli/commands/inspect.py +236 -0
  7. agent_audit/cli/commands/scan.py +329 -0
  8. agent_audit/cli/formatters/__init__.py +1 -0
  9. agent_audit/cli/formatters/json.py +138 -0
  10. agent_audit/cli/formatters/sarif.py +155 -0
  11. agent_audit/cli/formatters/terminal.py +221 -0
  12. agent_audit/cli/main.py +34 -0
  13. agent_audit/config/__init__.py +1 -0
  14. agent_audit/config/ignore.py +477 -0
  15. agent_audit/core_utils/__init__.py +1 -0
  16. agent_audit/models/__init__.py +18 -0
  17. agent_audit/models/finding.py +159 -0
  18. agent_audit/models/risk.py +77 -0
  19. agent_audit/models/tool.py +182 -0
  20. agent_audit/rules/__init__.py +6 -0
  21. agent_audit/rules/engine.py +503 -0
  22. agent_audit/rules/loader.py +160 -0
  23. agent_audit/scanners/__init__.py +5 -0
  24. agent_audit/scanners/base.py +32 -0
  25. agent_audit/scanners/config_scanner.py +390 -0
  26. agent_audit/scanners/mcp_config_scanner.py +321 -0
  27. agent_audit/scanners/mcp_inspector.py +421 -0
  28. agent_audit/scanners/python_scanner.py +544 -0
  29. agent_audit/scanners/secret_scanner.py +521 -0
  30. agent_audit/utils/__init__.py +21 -0
  31. agent_audit/utils/compat.py +98 -0
  32. agent_audit/utils/mcp_client.py +343 -0
  33. agent_audit/version.py +3 -0
  34. agent_audit-0.1.0.dist-info/METADATA +219 -0
  35. agent_audit-0.1.0.dist-info/RECORD +37 -0
  36. agent_audit-0.1.0.dist-info/WHEEL +4 -0
  37. agent_audit-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,544 @@
1
+ """Python AST scanner for detecting dangerous patterns in agent code."""
2
+
3
+ import ast
4
+ import fnmatch
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import List, Set, Optional, Dict, Any
8
+ from dataclasses import dataclass, field
9
+
10
+ from agent_audit.scanners.base import BaseScanner, ScanResult
11
+ from agent_audit.models.tool import ToolDefinition, PermissionType, ToolParameter
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class PythonScanResult(ScanResult):
18
+ """Python scan result with extracted tools and patterns."""
19
+ tools: List[ToolDefinition] = field(default_factory=list)
20
+ imports: List[str] = field(default_factory=list)
21
+ function_calls: List[Dict[str, Any]] = field(default_factory=list)
22
+ dangerous_patterns: List[Dict[str, Any]] = field(default_factory=list)
23
+
24
+
25
+ class PythonScanner(BaseScanner):
26
+ """
27
+ Python code scanner using the built-in ast module.
28
+
29
+ Detects:
30
+ - Dangerous function calls (os.system, subprocess with shell=True, eval, exec)
31
+ - @tool decorators and BaseTool subclasses
32
+ - Tainted input flowing to dangerous functions
33
+ - Import aliases for tracking function origins
34
+ """
35
+
36
+ name = "Python Scanner"
37
+
38
+ # Dangerous function mapping to permission types
39
+ DANGEROUS_FUNCTIONS = {
40
+ 'os.system': PermissionType.SHELL_EXEC,
41
+ 'os.popen': PermissionType.SHELL_EXEC,
42
+ 'subprocess.run': PermissionType.SHELL_EXEC,
43
+ 'subprocess.Popen': PermissionType.SHELL_EXEC,
44
+ 'subprocess.call': PermissionType.SHELL_EXEC,
45
+ 'subprocess.check_output': PermissionType.SHELL_EXEC,
46
+ 'subprocess.check_call': PermissionType.SHELL_EXEC,
47
+ 'eval': PermissionType.SHELL_EXEC,
48
+ 'exec': PermissionType.SHELL_EXEC,
49
+ 'open': PermissionType.FILE_READ,
50
+ 'os.remove': PermissionType.FILE_DELETE,
51
+ 'os.unlink': PermissionType.FILE_DELETE,
52
+ 'os.rmdir': PermissionType.FILE_DELETE,
53
+ 'shutil.rmtree': PermissionType.FILE_DELETE,
54
+ 'shutil.move': PermissionType.FILE_WRITE,
55
+ 'shutil.copy': PermissionType.FILE_WRITE,
56
+ 'requests.get': PermissionType.NETWORK_OUTBOUND,
57
+ 'requests.post': PermissionType.NETWORK_OUTBOUND,
58
+ 'requests.put': PermissionType.NETWORK_OUTBOUND,
59
+ 'requests.delete': PermissionType.NETWORK_OUTBOUND,
60
+ 'httpx.get': PermissionType.NETWORK_OUTBOUND,
61
+ 'httpx.post': PermissionType.NETWORK_OUTBOUND,
62
+ 'aiohttp.ClientSession': PermissionType.NETWORK_OUTBOUND,
63
+ 'urllib.request.urlopen': PermissionType.NETWORK_OUTBOUND,
64
+ }
65
+
66
+ # Tool decorator names to detect
67
+ TOOL_DECORATORS = {'tool', 'langchain.tools.tool', 'langchain_core.tools.tool'}
68
+
69
+ # Base classes for tool detection
70
+ TOOL_BASE_CLASSES = {'BaseTool', 'StructuredTool'}
71
+
72
+ def __init__(
73
+ self,
74
+ exclude_patterns: Optional[List[str]] = None,
75
+ exclude_paths: Optional[List[str]] = None # Backward compatibility alias
76
+ ):
77
+ """
78
+ Initialize the Python scanner.
79
+
80
+ Args:
81
+ exclude_patterns: Glob patterns to exclude from scanning (e.g., "tests/**")
82
+ exclude_paths: Deprecated alias for exclude_patterns (backward compatibility)
83
+ """
84
+ # Support both parameter names for backward compatibility
85
+ self.exclude_patterns = exclude_patterns or exclude_paths or []
86
+
87
+ def scan(self, path: Path) -> List[PythonScanResult]:
88
+ """
89
+ Scan a path for Python files and analyze them.
90
+
91
+ Args:
92
+ path: File or directory to scan
93
+
94
+ Returns:
95
+ List of scan results
96
+ """
97
+ results = []
98
+ python_files = self._find_python_files(path)
99
+
100
+ for py_file in python_files:
101
+ result = self._scan_file(py_file)
102
+ if result:
103
+ results.append(result)
104
+
105
+ return results
106
+
107
+ def _find_python_files(self, path: Path) -> List[Path]:
108
+ """Find all Python files to scan."""
109
+ if path.is_file():
110
+ return [path] if path.suffix == '.py' else []
111
+
112
+ python_files = []
113
+ for py_file in path.rglob('*.py'):
114
+ rel_path = str(py_file.relative_to(path))
115
+
116
+ # Skip excluded paths using glob patterns
117
+ if self._should_exclude(rel_path):
118
+ continue
119
+
120
+ # Skip common non-source directories
121
+ skip_dirs = {'.git', 'venv', '.venv', '__pycache__', 'dist',
122
+ 'build', 'node_modules', '.tox', '.pytest_cache'}
123
+ if any(part in skip_dirs for part in py_file.parts):
124
+ continue
125
+
126
+ # Skip hidden directories (but not . or ..)
127
+ if any(part.startswith('.') and part not in {'.', '..'}
128
+ for part in py_file.parts):
129
+ continue
130
+
131
+ python_files.append(py_file)
132
+
133
+ return python_files
134
+
135
+ def _should_exclude(self, rel_path: str) -> bool:
136
+ """Check if a relative path matches any exclude pattern."""
137
+ # Normalize path separators
138
+ normalized_path = rel_path.replace('\\', '/')
139
+
140
+ for pattern in self.exclude_patterns:
141
+ normalized_pattern = pattern.replace('\\', '/')
142
+
143
+ # Simple substring matching (backward compatibility)
144
+ if normalized_pattern in normalized_path:
145
+ return True
146
+
147
+ # Direct fnmatch for glob patterns
148
+ if fnmatch.fnmatch(normalized_path, normalized_pattern):
149
+ return True
150
+
151
+ # Handle "tests/**" style patterns
152
+ if normalized_pattern.endswith('/**'):
153
+ prefix = normalized_pattern[:-3]
154
+ if normalized_path.startswith(prefix + '/') or normalized_path == prefix:
155
+ return True
156
+
157
+ # Handle "**/test_*" style patterns
158
+ if normalized_pattern.startswith('**/'):
159
+ suffix_pattern = normalized_pattern[3:]
160
+ # Match against filename
161
+ filename = Path(normalized_path).name
162
+ if fnmatch.fnmatch(filename, suffix_pattern):
163
+ return True
164
+ # Match against any path segment
165
+ for part in Path(normalized_path).parts:
166
+ if fnmatch.fnmatch(part, suffix_pattern):
167
+ return True
168
+
169
+ return False
170
+
171
+ def _scan_file(self, file_path: Path) -> Optional[PythonScanResult]:
172
+ """Scan a single Python file."""
173
+ try:
174
+ source = file_path.read_text(encoding='utf-8')
175
+ tree = ast.parse(source, filename=str(file_path))
176
+ except SyntaxError as e:
177
+ logger.warning(f"Syntax error in {file_path}: {e}")
178
+ return None
179
+ except UnicodeDecodeError as e:
180
+ logger.warning(f"Encoding error in {file_path}: {e}")
181
+ return None
182
+ except Exception as e:
183
+ logger.warning(f"Error parsing {file_path}: {e}")
184
+ return None
185
+
186
+ visitor = PythonASTVisitor(file_path, source)
187
+ visitor.visit(tree)
188
+
189
+ return PythonScanResult(
190
+ source_file=str(file_path),
191
+ tools=visitor.tools,
192
+ imports=visitor.imports,
193
+ function_calls=visitor.function_calls,
194
+ dangerous_patterns=visitor.dangerous_patterns
195
+ )
196
+
197
+
198
+ class PythonASTVisitor(ast.NodeVisitor):
199
+ """AST visitor that extracts security-relevant information."""
200
+
201
+ def __init__(self, file_path: Path, source: str):
202
+ self.file_path = file_path
203
+ self.source = source
204
+ self.source_lines = source.splitlines()
205
+
206
+ self.tools: List[ToolDefinition] = []
207
+ self.imports: List[str] = []
208
+ self.function_calls: List[Dict[str, Any]] = []
209
+ self.dangerous_patterns: List[Dict[str, Any]] = []
210
+
211
+ # Track import aliases (e.g., import subprocess as sp)
212
+ self._imported_names: Dict[str, str] = {}
213
+ # Track current function context for taint analysis
214
+ self._current_function: Optional[str] = None
215
+ self._current_function_params: Set[str] = set()
216
+ # Track current class for tool detection
217
+ self._current_class: Optional[str] = None
218
+
219
+ def visit_Import(self, node: ast.Import):
220
+ """Track imports like 'import subprocess'."""
221
+ for alias in node.names:
222
+ self.imports.append(alias.name)
223
+ name = alias.asname or alias.name
224
+ self._imported_names[name] = alias.name
225
+ self.generic_visit(node)
226
+
227
+ def visit_ImportFrom(self, node: ast.ImportFrom):
228
+ """Track imports like 'from subprocess import run'."""
229
+ module = node.module or ''
230
+ for alias in node.names:
231
+ full_name = f"{module}.{alias.name}" if module else alias.name
232
+ self.imports.append(full_name)
233
+ name = alias.asname or alias.name
234
+ self._imported_names[name] = full_name
235
+ self.generic_visit(node)
236
+
237
+ def visit_ClassDef(self, node: ast.ClassDef):
238
+ """Visit class definitions to detect BaseTool subclasses."""
239
+ old_class = self._current_class
240
+ self._current_class = node.name
241
+
242
+ # Check if this class inherits from a tool base class
243
+ is_tool_class = False
244
+ for base in node.bases:
245
+ base_name = self._get_name(base)
246
+ if base_name and base_name in PythonScanner.TOOL_BASE_CLASSES:
247
+ is_tool_class = True
248
+ break
249
+
250
+ if is_tool_class:
251
+ tool = self._extract_tool_from_class(node)
252
+ if tool:
253
+ self.tools.append(tool)
254
+
255
+ self.generic_visit(node)
256
+ self._current_class = old_class
257
+
258
+ def visit_FunctionDef(self, node: ast.FunctionDef):
259
+ """Visit function definitions to detect @tool decorators."""
260
+ old_func = self._current_function
261
+ old_params = self._current_function_params
262
+
263
+ self._current_function = node.name
264
+ self._current_function_params = {
265
+ arg.arg for arg in node.args.args
266
+ }
267
+
268
+ # Check for @tool decorator
269
+ if self._has_tool_decorator(node):
270
+ tool = self._extract_tool_from_function(node)
271
+ if tool:
272
+ self.tools.append(tool)
273
+
274
+ self.generic_visit(node)
275
+
276
+ self._current_function = old_func
277
+ self._current_function_params = old_params
278
+
279
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
280
+ """Handle async function definitions the same as regular functions."""
281
+ # Reuse the same logic as FunctionDef
282
+ self.visit_FunctionDef(node) # type: ignore
283
+
284
+ def visit_Call(self, node: ast.Call):
285
+ """Visit function calls to detect dangerous patterns."""
286
+ func_name = self._get_call_name(node)
287
+
288
+ if func_name:
289
+ call_info = {
290
+ 'name': func_name,
291
+ 'line': node.lineno,
292
+ 'in_function': self._current_function,
293
+ 'in_class': self._current_class,
294
+ }
295
+ self.function_calls.append(call_info)
296
+
297
+ # Check if this is a dangerous function
298
+ if func_name in PythonScanner.DANGEROUS_FUNCTIONS:
299
+ pattern = {
300
+ 'type': 'dangerous_function_call',
301
+ 'function': func_name,
302
+ 'permission': PythonScanner.DANGEROUS_FUNCTIONS[func_name],
303
+ 'line': node.lineno,
304
+ 'snippet': self._get_line(node.lineno),
305
+ 'has_tainted_input': self._check_tainted_input(node),
306
+ 'in_function': self._current_function,
307
+ }
308
+ self.dangerous_patterns.append(pattern)
309
+
310
+ # Check for subprocess with shell=True
311
+ if func_name in {'subprocess.run', 'subprocess.Popen',
312
+ 'subprocess.call', 'subprocess.check_output',
313
+ 'subprocess.check_call'}:
314
+ if self._has_shell_true(node):
315
+ pattern = {
316
+ 'type': 'shell_true',
317
+ 'function': func_name,
318
+ 'line': node.lineno,
319
+ 'snippet': self._get_line(node.lineno),
320
+ 'has_tainted_input': self._check_tainted_input(node),
321
+ 'in_function': self._current_function,
322
+ }
323
+ self.dangerous_patterns.append(pattern)
324
+
325
+ self.generic_visit(node)
326
+
327
+ def _has_tool_decorator(self, node: ast.FunctionDef) -> bool:
328
+ """Check if a function has a tool decorator."""
329
+ for decorator in node.decorator_list:
330
+ dec_name = self._get_decorator_name(decorator)
331
+ if dec_name and any(t in dec_name for t in PythonScanner.TOOL_DECORATORS):
332
+ return True
333
+ return False
334
+
335
+ def _extract_tool_from_function(self, node: ast.FunctionDef) -> Optional[ToolDefinition]:
336
+ """Extract a ToolDefinition from a @tool decorated function."""
337
+ description = ast.get_docstring(node) or ""
338
+ permissions = self._analyze_function_permissions(node)
339
+
340
+ # Extract parameters from function signature
341
+ parameters = []
342
+ for arg in node.args.args:
343
+ if arg.arg == 'self':
344
+ continue
345
+ param = ToolParameter(
346
+ name=arg.arg,
347
+ type=self._get_annotation_type(arg.annotation),
348
+ required=True, # Default to required
349
+ allows_arbitrary_input=True, # Conservative assumption
350
+ )
351
+ parameters.append(param)
352
+
353
+ tool = ToolDefinition(
354
+ name=node.name,
355
+ description=description,
356
+ source_file=str(self.file_path),
357
+ source_line=node.lineno,
358
+ permissions=permissions,
359
+ parameters=parameters,
360
+ has_input_validation=self._check_input_validation(node),
361
+ )
362
+ tool.update_capability_flags()
363
+ tool.risk_level = tool.infer_risk_level()
364
+
365
+ return tool
366
+
367
+ def _extract_tool_from_class(self, node: ast.ClassDef) -> Optional[ToolDefinition]:
368
+ """Extract a ToolDefinition from a BaseTool subclass."""
369
+ description = ast.get_docstring(node) or ""
370
+
371
+ # Find the _run method to analyze permissions
372
+ permissions: Set[PermissionType] = set()
373
+ has_validation = False
374
+
375
+ for item in node.body:
376
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
377
+ if item.name in ('_run', '_arun', 'run', 'arun'):
378
+ permissions.update(self._analyze_function_permissions(item))
379
+ has_validation = has_validation or self._check_input_validation(item)
380
+
381
+ tool = ToolDefinition(
382
+ name=node.name,
383
+ description=description,
384
+ source_file=str(self.file_path),
385
+ source_line=node.lineno,
386
+ permissions=permissions,
387
+ has_input_validation=has_validation,
388
+ )
389
+ tool.update_capability_flags()
390
+ tool.risk_level = tool.infer_risk_level()
391
+
392
+ return tool
393
+
394
+ def _analyze_function_permissions(self, node: ast.FunctionDef) -> Set[PermissionType]:
395
+ """Analyze a function body to infer required permissions."""
396
+ permissions: Set[PermissionType] = set()
397
+
398
+ for child in ast.walk(node):
399
+ if isinstance(child, ast.Call):
400
+ func_name = self._get_call_name(child)
401
+ if func_name and func_name in PythonScanner.DANGEROUS_FUNCTIONS:
402
+ permissions.add(PythonScanner.DANGEROUS_FUNCTIONS[func_name])
403
+
404
+ return permissions
405
+
406
+ def _check_tainted_input(self, node: ast.Call) -> bool:
407
+ """
408
+ Check if a function call uses tainted (user-controlled) input.
409
+
410
+ This is a simplified taint analysis that checks if any argument
411
+ comes from function parameters.
412
+ """
413
+ if not self._current_function_params:
414
+ return False
415
+
416
+ for arg in node.args:
417
+ if self._contains_tainted_var(arg):
418
+ return True
419
+
420
+ for keyword in node.keywords:
421
+ if self._contains_tainted_var(keyword.value):
422
+ return True
423
+
424
+ return False
425
+
426
+ def _contains_tainted_var(self, node: ast.expr) -> bool:
427
+ """Check if an expression contains a tainted variable."""
428
+ if isinstance(node, ast.Name):
429
+ return node.id in self._current_function_params
430
+
431
+ if isinstance(node, ast.JoinedStr):
432
+ # f-strings might contain tainted variables
433
+ for value in node.values:
434
+ if isinstance(value, ast.FormattedValue):
435
+ if self._contains_tainted_var(value.value):
436
+ return True
437
+ return False
438
+
439
+ if isinstance(node, ast.BinOp):
440
+ # String concatenation
441
+ return (self._contains_tainted_var(node.left) or
442
+ self._contains_tainted_var(node.right))
443
+
444
+ if isinstance(node, ast.Call):
445
+ # Check arguments of nested calls
446
+ for arg in node.args:
447
+ if self._contains_tainted_var(arg):
448
+ return True
449
+
450
+ return False
451
+
452
+ def _has_shell_true(self, node: ast.Call) -> bool:
453
+ """Check if a subprocess call has shell=True."""
454
+ for keyword in node.keywords:
455
+ if keyword.arg == 'shell':
456
+ if isinstance(keyword.value, ast.Constant):
457
+ return keyword.value.value is True
458
+ if isinstance(keyword.value, ast.NameConstant): # Python 3.7 compat
459
+ return keyword.value.value is True
460
+ return False
461
+
462
+ def _check_input_validation(self, node: ast.FunctionDef) -> bool:
463
+ """
464
+ Check if a function has input validation.
465
+
466
+ Looks for assert statements, raise statements, or type checking.
467
+ """
468
+ for child in ast.walk(node):
469
+ if isinstance(child, ast.Assert):
470
+ return True
471
+ if isinstance(child, ast.Raise):
472
+ return True
473
+ if isinstance(child, ast.Call):
474
+ func_name = self._get_call_name(child)
475
+ if func_name and any(v in func_name.lower() for v in
476
+ ['validate', 'check', 'verify', 'sanitize']):
477
+ return True
478
+
479
+ return False
480
+
481
+ def _get_call_name(self, node: ast.Call) -> Optional[str]:
482
+ """Get the full name of a function being called."""
483
+ if isinstance(node.func, ast.Name):
484
+ name = node.func.id
485
+ # Resolve import alias
486
+ return self._imported_names.get(name, name)
487
+
488
+ elif isinstance(node.func, ast.Attribute):
489
+ parts = []
490
+ current = node.func
491
+ while isinstance(current, ast.Attribute):
492
+ parts.append(current.attr)
493
+ current = current.value
494
+ if isinstance(current, ast.Name):
495
+ parts.append(current.id)
496
+ parts.reverse()
497
+ full_name = '.'.join(parts)
498
+
499
+ # Check if base name is aliased
500
+ base = parts[0]
501
+ if base in self._imported_names:
502
+ parts[0] = self._imported_names[base]
503
+ return '.'.join(parts)
504
+
505
+ return full_name
506
+
507
+ return None
508
+
509
+ def _get_name(self, node: ast.expr) -> Optional[str]:
510
+ """Get the name from a Name or Attribute node."""
511
+ if isinstance(node, ast.Name):
512
+ return node.id
513
+ elif isinstance(node, ast.Attribute):
514
+ return node.attr
515
+ return None
516
+
517
+ def _get_decorator_name(self, decorator: ast.expr) -> Optional[str]:
518
+ """Get the name of a decorator."""
519
+ if isinstance(decorator, ast.Name):
520
+ return decorator.id
521
+ elif isinstance(decorator, ast.Attribute):
522
+ return decorator.attr
523
+ elif isinstance(decorator, ast.Call):
524
+ return self._get_decorator_name(decorator.func)
525
+ return None
526
+
527
+ def _get_annotation_type(self, annotation: Optional[ast.expr]) -> str:
528
+ """Convert an annotation to a type string."""
529
+ if annotation is None:
530
+ return "Any"
531
+ if isinstance(annotation, ast.Name):
532
+ return annotation.id
533
+ if isinstance(annotation, ast.Constant):
534
+ return str(annotation.value)
535
+ if isinstance(annotation, ast.Subscript):
536
+ # Handle generic types like List[str]
537
+ return ast.unparse(annotation) if hasattr(ast, 'unparse') else "Generic"
538
+ return "Any"
539
+
540
+ def _get_line(self, lineno: int) -> str:
541
+ """Get a source line by number."""
542
+ if 0 < lineno <= len(self.source_lines):
543
+ return self.source_lines[lineno - 1].strip()
544
+ return ""