securitybench 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sb/__init__.py ADDED
@@ -0,0 +1,38 @@
1
+ """Security Bench - Security testing for LLM pipelines.
2
+
3
+ A comprehensive CLI tool for testing LLM applications against security
4
+ vulnerabilities including prompt injection, jailbreaks, and data leakage.
5
+ """
6
+ from .config import PipelineConfig, EndpointConfig, InputConfig, OutputConfig, ConfigError
7
+ from .bench import SecurityBench, SecurityTest, ScanResults, TestResult
8
+ from .runner import TestRunner, RunResult
9
+ from .loader import TestLoader, LoaderError
10
+ from .evaluators import PatternEvaluator, EvaluationResult
11
+ from .cli import main
12
+
13
+ __version__ = "0.1.0"
14
+
15
+ __all__ = [
16
+ # Config
17
+ "PipelineConfig",
18
+ "EndpointConfig",
19
+ "InputConfig",
20
+ "OutputConfig",
21
+ "ConfigError",
22
+ # Bench
23
+ "SecurityBench",
24
+ "SecurityTest",
25
+ "ScanResults",
26
+ "TestResult",
27
+ # Runner
28
+ "TestRunner",
29
+ "RunResult",
30
+ # Loader
31
+ "TestLoader",
32
+ "LoaderError",
33
+ # Evaluator
34
+ "PatternEvaluator",
35
+ "EvaluationResult",
36
+ # CLI
37
+ "main",
38
+ ]
File without changes
sb/api_client.py ADDED
File without changes
sb/auditor.py ADDED
@@ -0,0 +1,355 @@
1
+ """Security Auditor for local code, config, and infrastructure checks.
2
+
3
+ Lynis-inspired security scanner that fetches checks from the API
4
+ and runs them against local files and system.
5
+ """
6
+ import asyncio
7
+ import fnmatch
8
+ import os
9
+ import re
10
+ import subprocess
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Optional
15
+
16
+ import httpx
17
+
18
+ from .loader import TestLoader
19
+
20
+
21
+ @dataclass
22
+ class SecurityCheck:
23
+ """A security check definition from the API."""
24
+ id: str
25
+ command: str # "code", "config", or "infra"
26
+ category: str
27
+ name: str
28
+ description: str
29
+ severity: str # "critical", "high", "medium", "low"
30
+ detection_type: str # "regex" or "command"
31
+ pattern: str # Regex pattern or shell command
32
+ file_patterns: list[str] = field(default_factory=list)
33
+ cwe: Optional[str] = None
34
+ owasp_llm: Optional[str] = None
35
+ weight: int = 5
36
+ remediation: Optional[str] = None
37
+
38
+
39
+ @dataclass
40
+ class Finding:
41
+ """A security finding from running a check."""
42
+ check_id: str
43
+ check_name: str
44
+ severity: str
45
+ category: str
46
+ description: str
47
+ file_path: Optional[str] = None
48
+ line_number: Optional[int] = None
49
+ matched_content: Optional[str] = None
50
+ remediation: Optional[str] = None
51
+ cwe: Optional[str] = None
52
+
53
+
54
+ @dataclass
55
+ class AuditResults:
56
+ """Results from running an audit."""
57
+ scan_path: str
58
+ started_at: str
59
+ completed_at: str
60
+ checks_run: int
61
+ checks_passed: int
62
+ checks_failed: int
63
+ findings: list[Finding] = field(default_factory=list)
64
+
65
+ @property
66
+ def hardening_score(self) -> int:
67
+ """Calculate hardening score (0-100)."""
68
+ if self.checks_run == 0:
69
+ return 100
70
+ return int((self.checks_passed / self.checks_run) * 100)
71
+
72
+ @property
73
+ def grade(self) -> str:
74
+ """Letter grade based on hardening score."""
75
+ score = self.hardening_score
76
+ if score >= 90:
77
+ return "A"
78
+ elif score >= 80:
79
+ return "B"
80
+ elif score >= 70:
81
+ return "C"
82
+ elif score >= 60:
83
+ return "D"
84
+ else:
85
+ return "F"
86
+
87
+ def findings_by_severity(self) -> dict[str, list[Finding]]:
88
+ """Group findings by severity."""
89
+ result = {"critical": [], "high": [], "medium": [], "low": []}
90
+ for f in self.findings:
91
+ if f.severity in result:
92
+ result[f.severity].append(f)
93
+ return result
94
+
95
+
96
+ class CheckLoader:
97
+ """Loads security checks from the API."""
98
+
99
+ API_BASE = TestLoader.API_BASE
100
+
101
+ async def load_checks(
102
+ self,
103
+ command: Optional[str] = None,
104
+ category: Optional[str] = None,
105
+ limit: int = 500,
106
+ ) -> list[SecurityCheck]:
107
+ """Load checks from the Security Bench API.
108
+
109
+ Args:
110
+ command: Filter by command type (code, config, infra).
111
+ category: Filter by category.
112
+ limit: Maximum number of checks to load.
113
+
114
+ Returns:
115
+ List of SecurityCheck objects.
116
+ """
117
+ async with httpx.AsyncClient(timeout=30.0) as client:
118
+ params = {"limit": limit}
119
+ if command:
120
+ params["command"] = command
121
+ if category:
122
+ params["category"] = category
123
+
124
+ response = await client.get(
125
+ f"{self.API_BASE}/api/checks",
126
+ params=params,
127
+ )
128
+ response.raise_for_status()
129
+
130
+ data = response.json()
131
+ checks = []
132
+
133
+ for check_data in data.get("checks", data if isinstance(data, list) else []):
134
+ check = SecurityCheck(
135
+ id=check_data["id"],
136
+ command=check_data.get("command", "code"),
137
+ category=check_data.get("category", "misc"),
138
+ name=check_data.get("name", check_data["id"]),
139
+ description=check_data.get("description", ""),
140
+ severity=check_data.get("severity", "medium"),
141
+ detection_type=check_data.get("detection_type", "regex"),
142
+ pattern=check_data.get("pattern", ""),
143
+ file_patterns=check_data.get("file_patterns", []),
144
+ cwe=check_data.get("cwe"),
145
+ owasp_llm=check_data.get("owasp_llm"),
146
+ weight=check_data.get("weight", 5),
147
+ remediation=check_data.get("remediation"),
148
+ )
149
+ checks.append(check)
150
+
151
+ return checks
152
+
153
+
154
+ class Auditor:
155
+ """Security auditor that runs checks against local files and system."""
156
+
157
+ def __init__(
158
+ self,
159
+ scan_path: Path = Path("."),
160
+ exclude_patterns: Optional[list[str]] = None,
161
+ ):
162
+ """Initialize the auditor.
163
+
164
+ Args:
165
+ scan_path: Directory to scan.
166
+ exclude_patterns: Glob patterns to exclude.
167
+ """
168
+ self.scan_path = scan_path.resolve()
169
+ self.exclude_patterns = exclude_patterns or [
170
+ "node_modules/*",
171
+ ".git/*",
172
+ "__pycache__/*",
173
+ "*.pyc",
174
+ ".venv/*",
175
+ "venv/*",
176
+ "dist/*",
177
+ "build/*",
178
+ ]
179
+ self.loader = CheckLoader()
180
+ self._file_cache: dict[str, list[Path]] = {}
181
+
182
+ def _should_exclude(self, path: Path) -> bool:
183
+ """Check if a path should be excluded."""
184
+ rel_path = str(path.relative_to(self.scan_path))
185
+ for pattern in self.exclude_patterns:
186
+ if fnmatch.fnmatch(rel_path, pattern):
187
+ return True
188
+ return False
189
+
190
+ def _find_files(self, patterns: list[str]) -> list[Path]:
191
+ """Find files matching glob patterns."""
192
+ cache_key = tuple(sorted(patterns))
193
+ if cache_key in self._file_cache:
194
+ return self._file_cache[cache_key]
195
+
196
+ files = set()
197
+ for pattern in patterns:
198
+ # Clean the pattern - remove leading wildcards and slashes
199
+ clean_pattern = pattern.lstrip("*./")
200
+ if not clean_pattern:
201
+ continue
202
+
203
+ try:
204
+ for path in self.scan_path.rglob(clean_pattern):
205
+ if path.is_file() and not self._should_exclude(path):
206
+ files.add(path)
207
+ except (NotImplementedError, ValueError):
208
+ # Fall back to fnmatch for complex patterns
209
+ for path in self.scan_path.rglob("*"):
210
+ if path.is_file() and fnmatch.fnmatch(path.name, pattern):
211
+ if not self._should_exclude(path):
212
+ files.add(path)
213
+
214
+ result = list(files)
215
+ self._file_cache[cache_key] = result
216
+ return result
217
+
218
+ def _run_regex_check(self, check: SecurityCheck) -> list[Finding]:
219
+ """Run a regex-based check against matching files."""
220
+ findings = []
221
+
222
+ if not check.file_patterns:
223
+ return findings
224
+
225
+ files = self._find_files(check.file_patterns)
226
+
227
+ try:
228
+ pattern = re.compile(check.pattern, re.IGNORECASE | re.MULTILINE)
229
+ except re.error:
230
+ return findings
231
+
232
+ for file_path in files:
233
+ try:
234
+ content = file_path.read_text(errors="ignore")
235
+ for line_num, line in enumerate(content.splitlines(), 1):
236
+ match = pattern.search(line)
237
+ if match:
238
+ findings.append(Finding(
239
+ check_id=check.id,
240
+ check_name=check.name,
241
+ severity=check.severity,
242
+ category=check.category,
243
+ description=check.description,
244
+ file_path=str(file_path.relative_to(self.scan_path)),
245
+ line_number=line_num,
246
+ matched_content=line.strip()[:200],
247
+ remediation=check.remediation,
248
+ cwe=check.cwe,
249
+ ))
250
+ except Exception:
251
+ continue
252
+
253
+ return findings
254
+
255
+ def _run_command_check(self, check: SecurityCheck) -> list[Finding]:
256
+ """Run a command-based check."""
257
+ findings = []
258
+
259
+ try:
260
+ result = subprocess.run(
261
+ check.pattern,
262
+ shell=True,
263
+ capture_output=True,
264
+ text=True,
265
+ timeout=30,
266
+ cwd=str(self.scan_path),
267
+ )
268
+
269
+ # Command checks typically return non-zero or specific output on failure
270
+ if result.returncode != 0 or result.stdout.strip():
271
+ findings.append(Finding(
272
+ check_id=check.id,
273
+ check_name=check.name,
274
+ severity=check.severity,
275
+ category=check.category,
276
+ description=check.description,
277
+ matched_content=result.stdout.strip()[:500] if result.stdout else None,
278
+ remediation=check.remediation,
279
+ cwe=check.cwe,
280
+ ))
281
+ except subprocess.TimeoutExpired:
282
+ pass
283
+ except Exception:
284
+ pass
285
+
286
+ return findings
287
+
288
+ async def run_check(self, check: SecurityCheck) -> tuple[bool, list[Finding]]:
289
+ """Run a single security check.
290
+
291
+ Args:
292
+ check: The check to run.
293
+
294
+ Returns:
295
+ Tuple of (passed, findings).
296
+ """
297
+ if check.detection_type == "regex":
298
+ findings = self._run_regex_check(check)
299
+ elif check.detection_type == "command":
300
+ findings = self._run_command_check(check)
301
+ else:
302
+ findings = []
303
+
304
+ return len(findings) == 0, findings
305
+
306
+ async def audit(
307
+ self,
308
+ command: Optional[str] = None,
309
+ categories: Optional[list[str]] = None,
310
+ progress_callback=None,
311
+ ) -> AuditResults:
312
+ """Run a full audit.
313
+
314
+ Args:
315
+ command: Filter checks by command type (code, config, infra).
316
+ categories: Filter checks by category.
317
+ progress_callback: Called with (current, total, check) for progress updates.
318
+
319
+ Returns:
320
+ AuditResults with all findings.
321
+ """
322
+ started_at = datetime.now().isoformat()
323
+
324
+ # Load checks
325
+ checks = await self.loader.load_checks(command=command)
326
+
327
+ # Filter by categories if specified
328
+ if categories:
329
+ checks = [c for c in checks if c.category in categories]
330
+
331
+ all_findings = []
332
+ passed = 0
333
+ failed = 0
334
+
335
+ for i, check in enumerate(checks):
336
+ if progress_callback:
337
+ progress_callback(i + 1, len(checks), check)
338
+
339
+ check_passed, findings = await self.run_check(check)
340
+
341
+ if check_passed:
342
+ passed += 1
343
+ else:
344
+ failed += 1
345
+ all_findings.extend(findings)
346
+
347
+ return AuditResults(
348
+ scan_path=str(self.scan_path),
349
+ started_at=started_at,
350
+ completed_at=datetime.now().isoformat(),
351
+ checks_run=len(checks),
352
+ checks_passed=passed,
353
+ checks_failed=failed,
354
+ findings=all_findings,
355
+ )