offsec-ai 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- offsec_ai/__init__.py +91 -0
- offsec_ai/__main__.py +12 -0
- offsec_ai/cli.py +2764 -0
- offsec_ai/core/__init__.py +1 -0
- offsec_ai/core/ai_owasp_scanner.py +389 -0
- offsec_ai/core/cert_analyzer.py +721 -0
- offsec_ai/core/hybrid_identity_checker.py +585 -0
- offsec_ai/core/l7_detector.py +1628 -0
- offsec_ai/core/llm_judge.py +183 -0
- offsec_ai/core/mcp_attacker.py +384 -0
- offsec_ai/core/mcp_scanner.py +506 -0
- offsec_ai/core/mtls_checker.py +990 -0
- offsec_ai/core/owasp_scanner.py +653 -0
- offsec_ai/core/port_scanner.py +277 -0
- offsec_ai/core/security_headers.py +472 -0
- offsec_ai/models/__init__.py +1 -0
- offsec_ai/models/ai_owasp_result.py +161 -0
- offsec_ai/models/l7_result.py +231 -0
- offsec_ai/models/mcp_result.py +148 -0
- offsec_ai/models/mtls_result.py +95 -0
- offsec_ai/models/owasp_result.py +282 -0
- offsec_ai/models/scan_result.py +143 -0
- offsec_ai/py.typed +0 -0
- offsec_ai/utils/__init__.py +1 -0
- offsec_ai/utils/ai_owasp_payloads.py +283 -0
- offsec_ai/utils/ai_owasp_remediation.py +248 -0
- offsec_ai/utils/common_ports.py +316 -0
- offsec_ai/utils/exporters.py +441 -0
- offsec_ai/utils/l7_signatures.py +460 -0
- offsec_ai/utils/mcp_cve_db.py +263 -0
- offsec_ai/utils/mcp_payloads.py +121 -0
- offsec_ai/utils/owasp_remediation.py +787 -0
- offsec_ai-2.0.0.dist-info/METADATA +601 -0
- offsec_ai-2.0.0.dist-info/RECORD +37 -0
- offsec_ai-2.0.0.dist-info/WHEEL +4 -0
- offsec_ai-2.0.0.dist-info/entry_points.txt +2 -0
- offsec_ai-2.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core functionality for the simple port checker package."""
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI/LLM OWASP Top 10 (2025) black-box scanner.
|
|
3
|
+
|
|
4
|
+
Sends adversarial probes to a live LLM chat endpoint and evaluates
|
|
5
|
+
responses for security misconfigurations. Supports OpenAI-compatible
|
|
6
|
+
chat APIs as well as a generic JSON request/response format.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
scanner = LLMOwaspScanner(
|
|
10
|
+
endpoint="https://api.example.com/v1/chat/completions",
|
|
11
|
+
mode="safe",
|
|
12
|
+
api_format="openai",
|
|
13
|
+
headers={"Authorization": "Bearer sk-..."},
|
|
14
|
+
)
|
|
15
|
+
result = await scanner.scan()
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import asyncio
|
|
21
|
+
import time
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
import httpx
|
|
25
|
+
|
|
26
|
+
from ..models.ai_owasp_result import (
|
|
27
|
+
BatchLLMScanResult,
|
|
28
|
+
LLMCategoryResult,
|
|
29
|
+
LLMFinding,
|
|
30
|
+
LLMScanMode,
|
|
31
|
+
LLMScanResult,
|
|
32
|
+
LLMSeverity,
|
|
33
|
+
)
|
|
34
|
+
from ..utils.ai_owasp_payloads import (
|
|
35
|
+
ALL_PAYLOADS,
|
|
36
|
+
NOT_TESTABLE_CATEGORIES,
|
|
37
|
+
SAFE_MODE_CATEGORIES,
|
|
38
|
+
get_payloads,
|
|
39
|
+
)
|
|
40
|
+
from ..utils.ai_owasp_remediation import LLM_CATEGORIES
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Default categories for each scan mode
|
|
44
|
+
_SAFE_CATEGORIES = ["LLM02", "LLM07", "LLM09"]
|
|
45
|
+
_DEEP_CATEGORIES = ["LLM01", "LLM02", "LLM05", "LLM06", "LLM07", "LLM09", "LLM10"]
|
|
46
|
+
_ALL_CATEGORIES = list(LLM_CATEGORIES.keys()) # includes non-testable (reported as N/A)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class LLMOwaspScanner:
|
|
50
|
+
"""Black-box scanner for LLM OWASP Top 10 (2025) against a live endpoint."""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
endpoint: str,
|
|
55
|
+
mode: str = "safe",
|
|
56
|
+
categories: list[str] | None = None,
|
|
57
|
+
api_format: str = "openai",
|
|
58
|
+
headers: dict[str, str] | None = None,
|
|
59
|
+
timeout: float = 30.0,
|
|
60
|
+
judge: Any | None = None,
|
|
61
|
+
model: str = "gpt-3.5-turbo",
|
|
62
|
+
) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Args:
|
|
65
|
+
endpoint: Full URL to the chat completions endpoint.
|
|
66
|
+
mode: "safe" (passive/benign probes) or "deep" (full adversarial).
|
|
67
|
+
categories: Override list of LLM categories to test (e.g. ["LLM01", "LLM02"]).
|
|
68
|
+
api_format: "openai" (default) or "generic" (custom JSON body/response path).
|
|
69
|
+
headers: Extra HTTP headers — e.g. Authorization.
|
|
70
|
+
timeout: Per-request timeout in seconds.
|
|
71
|
+
judge: Optional LLMJudge instance for AI-assisted evaluation.
|
|
72
|
+
model: Model name forwarded in OpenAI-format requests.
|
|
73
|
+
"""
|
|
74
|
+
self.endpoint = endpoint.rstrip("/")
|
|
75
|
+
self.mode = LLMScanMode(mode)
|
|
76
|
+
self.api_format = api_format
|
|
77
|
+
self.headers = headers or {}
|
|
78
|
+
self.timeout = timeout
|
|
79
|
+
self.judge = judge
|
|
80
|
+
self.model = model
|
|
81
|
+
|
|
82
|
+
if categories:
|
|
83
|
+
self.categories = [c.upper() for c in categories]
|
|
84
|
+
elif self.mode == LLMScanMode.DEEP:
|
|
85
|
+
self.categories = _DEEP_CATEGORIES
|
|
86
|
+
else:
|
|
87
|
+
self.categories = _SAFE_CATEGORIES
|
|
88
|
+
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
# Public API
|
|
91
|
+
# ------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
async def scan(self) -> LLMScanResult:
|
|
94
|
+
"""Run all enabled category checks and return a consolidated result."""
|
|
95
|
+
start = time.monotonic()
|
|
96
|
+
category_results: list[LLMCategoryResult] = []
|
|
97
|
+
|
|
98
|
+
async with httpx.AsyncClient(
|
|
99
|
+
headers={
|
|
100
|
+
"Content-Type": "application/json",
|
|
101
|
+
"User-Agent": "offsec-ai/2.0.0",
|
|
102
|
+
**self.headers,
|
|
103
|
+
},
|
|
104
|
+
timeout=self.timeout,
|
|
105
|
+
) as client:
|
|
106
|
+
# Run all categories — non-testable are handled synchronously
|
|
107
|
+
tasks = []
|
|
108
|
+
for cat_id in _ALL_CATEGORIES:
|
|
109
|
+
tasks.append(self._scan_category(cat_id, client))
|
|
110
|
+
category_results = list(await asyncio.gather(*tasks, return_exceptions=False))
|
|
111
|
+
|
|
112
|
+
# Score each category
|
|
113
|
+
for cat in category_results:
|
|
114
|
+
cat.category_score = cat.calculate_score()
|
|
115
|
+
cat.grade = cat.calculate_grade()
|
|
116
|
+
|
|
117
|
+
scan_duration = time.monotonic() - start
|
|
118
|
+
|
|
119
|
+
result = LLMScanResult(
|
|
120
|
+
target=self.endpoint,
|
|
121
|
+
scan_mode=self.mode,
|
|
122
|
+
api_format=self.api_format,
|
|
123
|
+
enabled_categories=self.categories,
|
|
124
|
+
categories=category_results,
|
|
125
|
+
scan_duration=scan_duration,
|
|
126
|
+
judge_used=self.judge is not None,
|
|
127
|
+
)
|
|
128
|
+
result.overall_score = result.calculate_overall_score()
|
|
129
|
+
result.overall_grade = result.calculate_overall_grade()
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
async def batch_scan(
|
|
133
|
+
self,
|
|
134
|
+
targets: list[dict],
|
|
135
|
+
max_concurrent: int = 3,
|
|
136
|
+
) -> BatchLLMScanResult:
|
|
137
|
+
"""
|
|
138
|
+
Scan multiple endpoints concurrently.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
targets: List of dicts with at least {"endpoint": "..."},
|
|
142
|
+
optionally {"headers": {...}, "model": "..."}.
|
|
143
|
+
max_concurrent: Maximum parallel scans.
|
|
144
|
+
"""
|
|
145
|
+
sem = asyncio.Semaphore(max_concurrent)
|
|
146
|
+
start = time.monotonic()
|
|
147
|
+
|
|
148
|
+
async def _guarded_scan(target_cfg: dict) -> LLMScanResult:
|
|
149
|
+
async with sem:
|
|
150
|
+
scanner = LLMOwaspScanner(
|
|
151
|
+
endpoint=target_cfg["endpoint"],
|
|
152
|
+
mode=self.mode.value,
|
|
153
|
+
categories=self.categories,
|
|
154
|
+
api_format=self.api_format,
|
|
155
|
+
headers=target_cfg.get("headers", self.headers),
|
|
156
|
+
timeout=self.timeout,
|
|
157
|
+
judge=self.judge,
|
|
158
|
+
model=target_cfg.get("model", self.model),
|
|
159
|
+
)
|
|
160
|
+
try:
|
|
161
|
+
return await scanner.scan()
|
|
162
|
+
except Exception as exc:
|
|
163
|
+
return LLMScanResult(
|
|
164
|
+
target=target_cfg["endpoint"],
|
|
165
|
+
scan_mode=self.mode,
|
|
166
|
+
error=str(exc),
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
results = await asyncio.gather(*[_guarded_scan(t) for t in targets])
|
|
170
|
+
scan_duration = time.monotonic() - start
|
|
171
|
+
|
|
172
|
+
successful = [r for r in results if not r.error]
|
|
173
|
+
failed = [r for r in results if r.error]
|
|
174
|
+
all_findings = [f for r in successful for f in r.all_findings]
|
|
175
|
+
|
|
176
|
+
return BatchLLMScanResult(
|
|
177
|
+
results=list(results),
|
|
178
|
+
total_targets=len(targets),
|
|
179
|
+
successful_scans=len(successful),
|
|
180
|
+
failed_scans=len(failed),
|
|
181
|
+
total_findings=len(all_findings),
|
|
182
|
+
critical_count=sum(1 for f in all_findings if f.severity == LLMSeverity.CRITICAL),
|
|
183
|
+
high_count=sum(1 for f in all_findings if f.severity == LLMSeverity.HIGH),
|
|
184
|
+
scan_duration=scan_duration,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# ------------------------------------------------------------------
|
|
188
|
+
# Category dispatch
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
async def _scan_category(
|
|
192
|
+
self,
|
|
193
|
+
category_id: str,
|
|
194
|
+
client: httpx.AsyncClient,
|
|
195
|
+
) -> LLMCategoryResult:
|
|
196
|
+
cat_meta = LLM_CATEGORIES.get(category_id, {})
|
|
197
|
+
cat_name = cat_meta.get("name", category_id)
|
|
198
|
+
|
|
199
|
+
# Non-testable categories
|
|
200
|
+
if category_id in NOT_TESTABLE_CATEGORIES:
|
|
201
|
+
return LLMCategoryResult(
|
|
202
|
+
category_id=category_id,
|
|
203
|
+
category_name=cat_name,
|
|
204
|
+
testable=False,
|
|
205
|
+
not_testable_reason=NOT_TESTABLE_CATEGORIES[category_id],
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Categories not in the enabled list for this mode
|
|
209
|
+
if category_id not in self.categories:
|
|
210
|
+
return LLMCategoryResult(
|
|
211
|
+
category_id=category_id,
|
|
212
|
+
category_name=cat_name,
|
|
213
|
+
testable=False,
|
|
214
|
+
not_testable_reason=f"Not enabled for {self.mode.value} mode scan.",
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
payloads = get_payloads(category_id, self.mode.value)
|
|
218
|
+
if not payloads:
|
|
219
|
+
return LLMCategoryResult(
|
|
220
|
+
category_id=category_id,
|
|
221
|
+
category_name=cat_name,
|
|
222
|
+
testable=False,
|
|
223
|
+
not_testable_reason="No payloads available for the current scan mode.",
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
findings: list[LLMFinding] = []
|
|
227
|
+
for probe_def in payloads:
|
|
228
|
+
finding = await self._run_probe(category_id, probe_def, client)
|
|
229
|
+
if finding:
|
|
230
|
+
findings.append(finding)
|
|
231
|
+
|
|
232
|
+
return LLMCategoryResult(
|
|
233
|
+
category_id=category_id,
|
|
234
|
+
category_name=cat_name,
|
|
235
|
+
findings=findings,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# ------------------------------------------------------------------
|
|
239
|
+
# Probe execution
|
|
240
|
+
# ------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
async def _run_probe(
|
|
243
|
+
self,
|
|
244
|
+
category_id: str,
|
|
245
|
+
probe_def: dict,
|
|
246
|
+
client: httpx.AsyncClient,
|
|
247
|
+
) -> LLMFinding | None:
|
|
248
|
+
"""Send a single probe and evaluate the response. Returns a Finding or None."""
|
|
249
|
+
probe_text: str = probe_def.get("probe", "")
|
|
250
|
+
probe_id: str = probe_def.get("id", "")
|
|
251
|
+
description: str = probe_def.get("description", "")
|
|
252
|
+
severity_str: str = probe_def.get("severity", "medium")
|
|
253
|
+
detect_in: list[str] = probe_def.get("detect_in_response", [])
|
|
254
|
+
detect_fn = probe_def.get("detect_fn")
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
response_text = await self._send_probe(probe_text, client)
|
|
258
|
+
except Exception as exc:
|
|
259
|
+
# Network/timeout errors are not findings, but note them
|
|
260
|
+
return None
|
|
261
|
+
|
|
262
|
+
# Rule-based detection
|
|
263
|
+
triggered = False
|
|
264
|
+
evidence = ""
|
|
265
|
+
response_lower = response_text.lower()
|
|
266
|
+
|
|
267
|
+
if detect_in:
|
|
268
|
+
for signal in detect_in:
|
|
269
|
+
if signal.lower() in response_lower:
|
|
270
|
+
triggered = True
|
|
271
|
+
evidence = f"Response contained indicator '{signal}'"
|
|
272
|
+
break
|
|
273
|
+
|
|
274
|
+
if not triggered and detect_fn:
|
|
275
|
+
try:
|
|
276
|
+
if detect_fn(response_text):
|
|
277
|
+
triggered = True
|
|
278
|
+
evidence = f"Response length {len(response_text)} chars exceeded threshold"
|
|
279
|
+
except Exception:
|
|
280
|
+
pass
|
|
281
|
+
|
|
282
|
+
# Optional LLM judge pass
|
|
283
|
+
if not triggered and self.judge:
|
|
284
|
+
try:
|
|
285
|
+
verdict = await asyncio.to_thread(
|
|
286
|
+
self.judge.evaluate,
|
|
287
|
+
category_id,
|
|
288
|
+
probe_text,
|
|
289
|
+
response_text,
|
|
290
|
+
)
|
|
291
|
+
if verdict.get("vulnerable"):
|
|
292
|
+
triggered = True
|
|
293
|
+
evidence = verdict.get("reason", "LLM judge flagged response")
|
|
294
|
+
except Exception:
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
if not triggered:
|
|
298
|
+
return None
|
|
299
|
+
|
|
300
|
+
# Map category to remediation key
|
|
301
|
+
remediation_key = _REMEDIATION_KEY_MAP.get(probe_id, category_id.lower())
|
|
302
|
+
cwe_id = _CWE_MAP.get(category_id)
|
|
303
|
+
|
|
304
|
+
return LLMFinding(
|
|
305
|
+
category=category_id,
|
|
306
|
+
severity=LLMSeverity(severity_str),
|
|
307
|
+
title=description,
|
|
308
|
+
description=(
|
|
309
|
+
f"Probe '{probe_id}' triggered a positive response indicating a potential "
|
|
310
|
+
f"{LLM_CATEGORIES.get(category_id, {}).get('name', category_id)} vulnerability."
|
|
311
|
+
),
|
|
312
|
+
remediation_key=remediation_key,
|
|
313
|
+
cwe_id=cwe_id,
|
|
314
|
+
owasp_llm_ref=LLM_CATEGORIES.get(category_id, {}).get("ref", ""),
|
|
315
|
+
evidence=evidence[:500], # truncate
|
|
316
|
+
probe_used=probe_text[:200], # truncate
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
async def _send_probe(self, probe_text: str, client: httpx.AsyncClient) -> str:
|
|
320
|
+
"""Send probe to the endpoint and return the response text."""
|
|
321
|
+
if self.api_format == "openai":
|
|
322
|
+
return await self._send_openai_format(probe_text, client)
|
|
323
|
+
return await self._send_generic_format(probe_text, client)
|
|
324
|
+
|
|
325
|
+
async def _send_openai_format(self, probe_text: str, client: httpx.AsyncClient) -> str:
|
|
326
|
+
payload = {
|
|
327
|
+
"model": self.model,
|
|
328
|
+
"messages": [{"role": "user", "content": probe_text}],
|
|
329
|
+
"max_tokens": 1024,
|
|
330
|
+
"temperature": 0.0,
|
|
331
|
+
}
|
|
332
|
+
resp = await client.post(self.endpoint, json=payload)
|
|
333
|
+
resp.raise_for_status()
|
|
334
|
+
data = resp.json()
|
|
335
|
+
# Standard OpenAI response shape
|
|
336
|
+
return data["choices"][0]["message"]["content"]
|
|
337
|
+
|
|
338
|
+
async def _send_generic_format(self, probe_text: str, client: httpx.AsyncClient) -> str:
|
|
339
|
+
"""Fallback for non-OpenAI APIs: POST {"prompt": ...}, expect {"response": ...}."""
|
|
340
|
+
payload = {"prompt": probe_text, "max_tokens": 1024}
|
|
341
|
+
resp = await client.post(self.endpoint, json=payload)
|
|
342
|
+
resp.raise_for_status()
|
|
343
|
+
data = resp.json()
|
|
344
|
+
# Try common response field names
|
|
345
|
+
for key in ("response", "text", "output", "content", "generated_text", "answer"):
|
|
346
|
+
if key in data:
|
|
347
|
+
return str(data[key])
|
|
348
|
+
# Fallback: stringify the whole body
|
|
349
|
+
return str(data)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# ---------------------------------------------------------------------------
|
|
353
|
+
# Lookup tables
|
|
354
|
+
# ---------------------------------------------------------------------------
|
|
355
|
+
|
|
356
|
+
_REMEDIATION_KEY_MAP: dict[str, str] = {
|
|
357
|
+
"LLM01-PI-001": "prompt_injection_direct",
|
|
358
|
+
"LLM01-PI-002": "prompt_injection_direct",
|
|
359
|
+
"LLM01-PI-003": "prompt_injection_direct",
|
|
360
|
+
"LLM01-PI-004": "prompt_injection_direct",
|
|
361
|
+
"LLM01-PI-005": "prompt_injection_direct",
|
|
362
|
+
"LLM02-SD-001": "system_prompt_leakage",
|
|
363
|
+
"LLM02-SD-002": "sensitive_info_disclosure",
|
|
364
|
+
"LLM02-SD-003": "sensitive_info_disclosure",
|
|
365
|
+
"LLM02-SD-004": "sensitive_info_disclosure",
|
|
366
|
+
"LLM05-OH-001": "improper_output_xss",
|
|
367
|
+
"LLM05-OH-002": "improper_output_xss",
|
|
368
|
+
"LLM05-OH-003": "improper_output_xss",
|
|
369
|
+
"LLM06-EA-001": "excessive_agency",
|
|
370
|
+
"LLM06-EA-002": "excessive_agency",
|
|
371
|
+
"LLM06-EA-003": "excessive_agency",
|
|
372
|
+
"LLM07-SPL-001": "system_prompt_leakage",
|
|
373
|
+
"LLM07-SPL-002": "system_prompt_leakage",
|
|
374
|
+
"LLM07-SPL-003": "system_prompt_leakage",
|
|
375
|
+
"LLM09-MI-001": "misinformation_no_guardrail",
|
|
376
|
+
"LLM09-MI-002": "misinformation_no_guardrail",
|
|
377
|
+
"LLM10-UC-001": "unbounded_consumption",
|
|
378
|
+
"LLM10-UC-002": "unbounded_consumption",
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
_CWE_MAP: dict[str, int] = {
|
|
382
|
+
"LLM01": 77,
|
|
383
|
+
"LLM02": 200,
|
|
384
|
+
"LLM05": 79,
|
|
385
|
+
"LLM06": 272,
|
|
386
|
+
"LLM07": 200,
|
|
387
|
+
"LLM09": 1009,
|
|
388
|
+
"LLM10": 400,
|
|
389
|
+
}
|