securitybench 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sb/__init__.py +38 -0
- sb/adapters/__init__.py +0 -0
- sb/api_client.py +0 -0
- sb/auditor.py +355 -0
- sb/bench.py +359 -0
- sb/cli.py +592 -0
- sb/config.py +182 -0
- sb/evaluators/__init__.py +13 -0
- sb/evaluators/pattern.py +193 -0
- sb/loader.py +243 -0
- sb/output.py +291 -0
- sb/reports/__init__.py +422 -0
- sb/runner.py +181 -0
- securitybench-0.1.0.dist-info/METADATA +219 -0
- securitybench-0.1.0.dist-info/RECORD +18 -0
- securitybench-0.1.0.dist-info/WHEEL +4 -0
- securitybench-0.1.0.dist-info/entry_points.txt +2 -0
- securitybench-0.1.0.dist-info/licenses/LICENSE +59 -0
sb/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Security Bench - Security testing for LLM pipelines.
|
|
2
|
+
|
|
3
|
+
A comprehensive CLI tool for testing LLM applications against security
|
|
4
|
+
vulnerabilities including prompt injection, jailbreaks, and data leakage.
|
|
5
|
+
"""
|
|
6
|
+
from .config import PipelineConfig, EndpointConfig, InputConfig, OutputConfig, ConfigError
|
|
7
|
+
from .bench import SecurityBench, SecurityTest, ScanResults, TestResult
|
|
8
|
+
from .runner import TestRunner, RunResult
|
|
9
|
+
from .loader import TestLoader, LoaderError
|
|
10
|
+
from .evaluators import PatternEvaluator, EvaluationResult
|
|
11
|
+
from .cli import main
|
|
12
|
+
|
|
13
|
+
__version__ = "0.1.0"
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
# Config
|
|
17
|
+
"PipelineConfig",
|
|
18
|
+
"EndpointConfig",
|
|
19
|
+
"InputConfig",
|
|
20
|
+
"OutputConfig",
|
|
21
|
+
"ConfigError",
|
|
22
|
+
# Bench
|
|
23
|
+
"SecurityBench",
|
|
24
|
+
"SecurityTest",
|
|
25
|
+
"ScanResults",
|
|
26
|
+
"TestResult",
|
|
27
|
+
# Runner
|
|
28
|
+
"TestRunner",
|
|
29
|
+
"RunResult",
|
|
30
|
+
# Loader
|
|
31
|
+
"TestLoader",
|
|
32
|
+
"LoaderError",
|
|
33
|
+
# Evaluator
|
|
34
|
+
"PatternEvaluator",
|
|
35
|
+
"EvaluationResult",
|
|
36
|
+
# CLI
|
|
37
|
+
"main",
|
|
38
|
+
]
|
sb/adapters/__init__.py
ADDED
|
File without changes
|
sb/api_client.py
ADDED
|
File without changes
|
sb/auditor.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
"""Security Auditor for local code, config, and infrastructure checks.
|
|
2
|
+
|
|
3
|
+
Lynis-inspired security scanner that fetches checks from the API
|
|
4
|
+
and runs them against local files and system.
|
|
5
|
+
"""
|
|
6
|
+
import asyncio
|
|
7
|
+
import fnmatch
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import subprocess
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
|
|
18
|
+
from .loader import TestLoader
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class SecurityCheck:
|
|
23
|
+
"""A security check definition from the API."""
|
|
24
|
+
id: str
|
|
25
|
+
command: str # "code", "config", or "infra"
|
|
26
|
+
category: str
|
|
27
|
+
name: str
|
|
28
|
+
description: str
|
|
29
|
+
severity: str # "critical", "high", "medium", "low"
|
|
30
|
+
detection_type: str # "regex" or "command"
|
|
31
|
+
pattern: str # Regex pattern or shell command
|
|
32
|
+
file_patterns: list[str] = field(default_factory=list)
|
|
33
|
+
cwe: Optional[str] = None
|
|
34
|
+
owasp_llm: Optional[str] = None
|
|
35
|
+
weight: int = 5
|
|
36
|
+
remediation: Optional[str] = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class Finding:
|
|
41
|
+
"""A security finding from running a check."""
|
|
42
|
+
check_id: str
|
|
43
|
+
check_name: str
|
|
44
|
+
severity: str
|
|
45
|
+
category: str
|
|
46
|
+
description: str
|
|
47
|
+
file_path: Optional[str] = None
|
|
48
|
+
line_number: Optional[int] = None
|
|
49
|
+
matched_content: Optional[str] = None
|
|
50
|
+
remediation: Optional[str] = None
|
|
51
|
+
cwe: Optional[str] = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class AuditResults:
|
|
56
|
+
"""Results from running an audit."""
|
|
57
|
+
scan_path: str
|
|
58
|
+
started_at: str
|
|
59
|
+
completed_at: str
|
|
60
|
+
checks_run: int
|
|
61
|
+
checks_passed: int
|
|
62
|
+
checks_failed: int
|
|
63
|
+
findings: list[Finding] = field(default_factory=list)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def hardening_score(self) -> int:
|
|
67
|
+
"""Calculate hardening score (0-100)."""
|
|
68
|
+
if self.checks_run == 0:
|
|
69
|
+
return 100
|
|
70
|
+
return int((self.checks_passed / self.checks_run) * 100)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def grade(self) -> str:
|
|
74
|
+
"""Letter grade based on hardening score."""
|
|
75
|
+
score = self.hardening_score
|
|
76
|
+
if score >= 90:
|
|
77
|
+
return "A"
|
|
78
|
+
elif score >= 80:
|
|
79
|
+
return "B"
|
|
80
|
+
elif score >= 70:
|
|
81
|
+
return "C"
|
|
82
|
+
elif score >= 60:
|
|
83
|
+
return "D"
|
|
84
|
+
else:
|
|
85
|
+
return "F"
|
|
86
|
+
|
|
87
|
+
def findings_by_severity(self) -> dict[str, list[Finding]]:
|
|
88
|
+
"""Group findings by severity."""
|
|
89
|
+
result = {"critical": [], "high": [], "medium": [], "low": []}
|
|
90
|
+
for f in self.findings:
|
|
91
|
+
if f.severity in result:
|
|
92
|
+
result[f.severity].append(f)
|
|
93
|
+
return result
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class CheckLoader:
|
|
97
|
+
"""Loads security checks from the API."""
|
|
98
|
+
|
|
99
|
+
API_BASE = TestLoader.API_BASE
|
|
100
|
+
|
|
101
|
+
async def load_checks(
|
|
102
|
+
self,
|
|
103
|
+
command: Optional[str] = None,
|
|
104
|
+
category: Optional[str] = None,
|
|
105
|
+
limit: int = 500,
|
|
106
|
+
) -> list[SecurityCheck]:
|
|
107
|
+
"""Load checks from the Security Bench API.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
command: Filter by command type (code, config, infra).
|
|
111
|
+
category: Filter by category.
|
|
112
|
+
limit: Maximum number of checks to load.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
List of SecurityCheck objects.
|
|
116
|
+
"""
|
|
117
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
118
|
+
params = {"limit": limit}
|
|
119
|
+
if command:
|
|
120
|
+
params["command"] = command
|
|
121
|
+
if category:
|
|
122
|
+
params["category"] = category
|
|
123
|
+
|
|
124
|
+
response = await client.get(
|
|
125
|
+
f"{self.API_BASE}/api/checks",
|
|
126
|
+
params=params,
|
|
127
|
+
)
|
|
128
|
+
response.raise_for_status()
|
|
129
|
+
|
|
130
|
+
data = response.json()
|
|
131
|
+
checks = []
|
|
132
|
+
|
|
133
|
+
for check_data in data.get("checks", data if isinstance(data, list) else []):
|
|
134
|
+
check = SecurityCheck(
|
|
135
|
+
id=check_data["id"],
|
|
136
|
+
command=check_data.get("command", "code"),
|
|
137
|
+
category=check_data.get("category", "misc"),
|
|
138
|
+
name=check_data.get("name", check_data["id"]),
|
|
139
|
+
description=check_data.get("description", ""),
|
|
140
|
+
severity=check_data.get("severity", "medium"),
|
|
141
|
+
detection_type=check_data.get("detection_type", "regex"),
|
|
142
|
+
pattern=check_data.get("pattern", ""),
|
|
143
|
+
file_patterns=check_data.get("file_patterns", []),
|
|
144
|
+
cwe=check_data.get("cwe"),
|
|
145
|
+
owasp_llm=check_data.get("owasp_llm"),
|
|
146
|
+
weight=check_data.get("weight", 5),
|
|
147
|
+
remediation=check_data.get("remediation"),
|
|
148
|
+
)
|
|
149
|
+
checks.append(check)
|
|
150
|
+
|
|
151
|
+
return checks
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Auditor:
|
|
155
|
+
"""Security auditor that runs checks against local files and system."""
|
|
156
|
+
|
|
157
|
+
def __init__(
|
|
158
|
+
self,
|
|
159
|
+
scan_path: Path = Path("."),
|
|
160
|
+
exclude_patterns: Optional[list[str]] = None,
|
|
161
|
+
):
|
|
162
|
+
"""Initialize the auditor.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
scan_path: Directory to scan.
|
|
166
|
+
exclude_patterns: Glob patterns to exclude.
|
|
167
|
+
"""
|
|
168
|
+
self.scan_path = scan_path.resolve()
|
|
169
|
+
self.exclude_patterns = exclude_patterns or [
|
|
170
|
+
"node_modules/*",
|
|
171
|
+
".git/*",
|
|
172
|
+
"__pycache__/*",
|
|
173
|
+
"*.pyc",
|
|
174
|
+
".venv/*",
|
|
175
|
+
"venv/*",
|
|
176
|
+
"dist/*",
|
|
177
|
+
"build/*",
|
|
178
|
+
]
|
|
179
|
+
self.loader = CheckLoader()
|
|
180
|
+
self._file_cache: dict[str, list[Path]] = {}
|
|
181
|
+
|
|
182
|
+
def _should_exclude(self, path: Path) -> bool:
|
|
183
|
+
"""Check if a path should be excluded."""
|
|
184
|
+
rel_path = str(path.relative_to(self.scan_path))
|
|
185
|
+
for pattern in self.exclude_patterns:
|
|
186
|
+
if fnmatch.fnmatch(rel_path, pattern):
|
|
187
|
+
return True
|
|
188
|
+
return False
|
|
189
|
+
|
|
190
|
+
def _find_files(self, patterns: list[str]) -> list[Path]:
|
|
191
|
+
"""Find files matching glob patterns."""
|
|
192
|
+
cache_key = tuple(sorted(patterns))
|
|
193
|
+
if cache_key in self._file_cache:
|
|
194
|
+
return self._file_cache[cache_key]
|
|
195
|
+
|
|
196
|
+
files = set()
|
|
197
|
+
for pattern in patterns:
|
|
198
|
+
# Clean the pattern - remove leading wildcards and slashes
|
|
199
|
+
clean_pattern = pattern.lstrip("*./")
|
|
200
|
+
if not clean_pattern:
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
for path in self.scan_path.rglob(clean_pattern):
|
|
205
|
+
if path.is_file() and not self._should_exclude(path):
|
|
206
|
+
files.add(path)
|
|
207
|
+
except (NotImplementedError, ValueError):
|
|
208
|
+
# Fall back to fnmatch for complex patterns
|
|
209
|
+
for path in self.scan_path.rglob("*"):
|
|
210
|
+
if path.is_file() and fnmatch.fnmatch(path.name, pattern):
|
|
211
|
+
if not self._should_exclude(path):
|
|
212
|
+
files.add(path)
|
|
213
|
+
|
|
214
|
+
result = list(files)
|
|
215
|
+
self._file_cache[cache_key] = result
|
|
216
|
+
return result
|
|
217
|
+
|
|
218
|
+
def _run_regex_check(self, check: SecurityCheck) -> list[Finding]:
|
|
219
|
+
"""Run a regex-based check against matching files."""
|
|
220
|
+
findings = []
|
|
221
|
+
|
|
222
|
+
if not check.file_patterns:
|
|
223
|
+
return findings
|
|
224
|
+
|
|
225
|
+
files = self._find_files(check.file_patterns)
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
pattern = re.compile(check.pattern, re.IGNORECASE | re.MULTILINE)
|
|
229
|
+
except re.error:
|
|
230
|
+
return findings
|
|
231
|
+
|
|
232
|
+
for file_path in files:
|
|
233
|
+
try:
|
|
234
|
+
content = file_path.read_text(errors="ignore")
|
|
235
|
+
for line_num, line in enumerate(content.splitlines(), 1):
|
|
236
|
+
match = pattern.search(line)
|
|
237
|
+
if match:
|
|
238
|
+
findings.append(Finding(
|
|
239
|
+
check_id=check.id,
|
|
240
|
+
check_name=check.name,
|
|
241
|
+
severity=check.severity,
|
|
242
|
+
category=check.category,
|
|
243
|
+
description=check.description,
|
|
244
|
+
file_path=str(file_path.relative_to(self.scan_path)),
|
|
245
|
+
line_number=line_num,
|
|
246
|
+
matched_content=line.strip()[:200],
|
|
247
|
+
remediation=check.remediation,
|
|
248
|
+
cwe=check.cwe,
|
|
249
|
+
))
|
|
250
|
+
except Exception:
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
return findings
|
|
254
|
+
|
|
255
|
+
def _run_command_check(self, check: SecurityCheck) -> list[Finding]:
|
|
256
|
+
"""Run a command-based check."""
|
|
257
|
+
findings = []
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
result = subprocess.run(
|
|
261
|
+
check.pattern,
|
|
262
|
+
shell=True,
|
|
263
|
+
capture_output=True,
|
|
264
|
+
text=True,
|
|
265
|
+
timeout=30,
|
|
266
|
+
cwd=str(self.scan_path),
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Command checks typically return non-zero or specific output on failure
|
|
270
|
+
if result.returncode != 0 or result.stdout.strip():
|
|
271
|
+
findings.append(Finding(
|
|
272
|
+
check_id=check.id,
|
|
273
|
+
check_name=check.name,
|
|
274
|
+
severity=check.severity,
|
|
275
|
+
category=check.category,
|
|
276
|
+
description=check.description,
|
|
277
|
+
matched_content=result.stdout.strip()[:500] if result.stdout else None,
|
|
278
|
+
remediation=check.remediation,
|
|
279
|
+
cwe=check.cwe,
|
|
280
|
+
))
|
|
281
|
+
except subprocess.TimeoutExpired:
|
|
282
|
+
pass
|
|
283
|
+
except Exception:
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
return findings
|
|
287
|
+
|
|
288
|
+
async def run_check(self, check: SecurityCheck) -> tuple[bool, list[Finding]]:
|
|
289
|
+
"""Run a single security check.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
check: The check to run.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
Tuple of (passed, findings).
|
|
296
|
+
"""
|
|
297
|
+
if check.detection_type == "regex":
|
|
298
|
+
findings = self._run_regex_check(check)
|
|
299
|
+
elif check.detection_type == "command":
|
|
300
|
+
findings = self._run_command_check(check)
|
|
301
|
+
else:
|
|
302
|
+
findings = []
|
|
303
|
+
|
|
304
|
+
return len(findings) == 0, findings
|
|
305
|
+
|
|
306
|
+
async def audit(
|
|
307
|
+
self,
|
|
308
|
+
command: Optional[str] = None,
|
|
309
|
+
categories: Optional[list[str]] = None,
|
|
310
|
+
progress_callback=None,
|
|
311
|
+
) -> AuditResults:
|
|
312
|
+
"""Run a full audit.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
command: Filter checks by command type (code, config, infra).
|
|
316
|
+
categories: Filter checks by category.
|
|
317
|
+
progress_callback: Called with (current, total, check) for progress updates.
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
AuditResults with all findings.
|
|
321
|
+
"""
|
|
322
|
+
started_at = datetime.now().isoformat()
|
|
323
|
+
|
|
324
|
+
# Load checks
|
|
325
|
+
checks = await self.loader.load_checks(command=command)
|
|
326
|
+
|
|
327
|
+
# Filter by categories if specified
|
|
328
|
+
if categories:
|
|
329
|
+
checks = [c for c in checks if c.category in categories]
|
|
330
|
+
|
|
331
|
+
all_findings = []
|
|
332
|
+
passed = 0
|
|
333
|
+
failed = 0
|
|
334
|
+
|
|
335
|
+
for i, check in enumerate(checks):
|
|
336
|
+
if progress_callback:
|
|
337
|
+
progress_callback(i + 1, len(checks), check)
|
|
338
|
+
|
|
339
|
+
check_passed, findings = await self.run_check(check)
|
|
340
|
+
|
|
341
|
+
if check_passed:
|
|
342
|
+
passed += 1
|
|
343
|
+
else:
|
|
344
|
+
failed += 1
|
|
345
|
+
all_findings.extend(findings)
|
|
346
|
+
|
|
347
|
+
return AuditResults(
|
|
348
|
+
scan_path=str(self.scan_path),
|
|
349
|
+
started_at=started_at,
|
|
350
|
+
completed_at=datetime.now().isoformat(),
|
|
351
|
+
checks_run=len(checks),
|
|
352
|
+
checks_passed=passed,
|
|
353
|
+
checks_failed=failed,
|
|
354
|
+
findings=all_findings,
|
|
355
|
+
)
|