aiptx 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiptx might be problematic. Click here for more details.
- aipt_v2/__init__.py +110 -0
- aipt_v2/__main__.py +24 -0
- aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
- aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
- aipt_v2/agents/__init__.py +24 -0
- aipt_v2/agents/base.py +520 -0
- aipt_v2/agents/ptt.py +406 -0
- aipt_v2/agents/state.py +168 -0
- aipt_v2/app.py +960 -0
- aipt_v2/browser/__init__.py +31 -0
- aipt_v2/browser/automation.py +458 -0
- aipt_v2/browser/crawler.py +453 -0
- aipt_v2/cli.py +321 -0
- aipt_v2/compliance/__init__.py +71 -0
- aipt_v2/compliance/compliance_report.py +449 -0
- aipt_v2/compliance/framework_mapper.py +424 -0
- aipt_v2/compliance/nist_mapping.py +345 -0
- aipt_v2/compliance/owasp_mapping.py +330 -0
- aipt_v2/compliance/pci_mapping.py +297 -0
- aipt_v2/config.py +288 -0
- aipt_v2/core/__init__.py +43 -0
- aipt_v2/core/agent.py +630 -0
- aipt_v2/core/llm.py +395 -0
- aipt_v2/core/memory.py +305 -0
- aipt_v2/core/ptt.py +329 -0
- aipt_v2/database/__init__.py +14 -0
- aipt_v2/database/models.py +232 -0
- aipt_v2/database/repository.py +384 -0
- aipt_v2/docker/__init__.py +23 -0
- aipt_v2/docker/builder.py +260 -0
- aipt_v2/docker/manager.py +222 -0
- aipt_v2/docker/sandbox.py +371 -0
- aipt_v2/evasion/__init__.py +58 -0
- aipt_v2/evasion/request_obfuscator.py +272 -0
- aipt_v2/evasion/tls_fingerprint.py +285 -0
- aipt_v2/evasion/ua_rotator.py +301 -0
- aipt_v2/evasion/waf_bypass.py +439 -0
- aipt_v2/execution/__init__.py +23 -0
- aipt_v2/execution/executor.py +302 -0
- aipt_v2/execution/parser.py +544 -0
- aipt_v2/execution/terminal.py +337 -0
- aipt_v2/health.py +437 -0
- aipt_v2/intelligence/__init__.py +85 -0
- aipt_v2/intelligence/auth.py +520 -0
- aipt_v2/intelligence/chaining.py +775 -0
- aipt_v2/intelligence/cve_aipt.py +334 -0
- aipt_v2/intelligence/cve_info.py +1111 -0
- aipt_v2/intelligence/rag.py +239 -0
- aipt_v2/intelligence/scope.py +442 -0
- aipt_v2/intelligence/searchers/__init__.py +5 -0
- aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
- aipt_v2/intelligence/searchers/github_searcher.py +467 -0
- aipt_v2/intelligence/searchers/google_searcher.py +281 -0
- aipt_v2/intelligence/tools.json +443 -0
- aipt_v2/intelligence/triage.py +670 -0
- aipt_v2/interface/__init__.py +5 -0
- aipt_v2/interface/cli.py +230 -0
- aipt_v2/interface/main.py +501 -0
- aipt_v2/interface/tui.py +1276 -0
- aipt_v2/interface/utils.py +583 -0
- aipt_v2/llm/__init__.py +39 -0
- aipt_v2/llm/config.py +26 -0
- aipt_v2/llm/llm.py +514 -0
- aipt_v2/llm/memory.py +214 -0
- aipt_v2/llm/request_queue.py +89 -0
- aipt_v2/llm/utils.py +89 -0
- aipt_v2/models/__init__.py +15 -0
- aipt_v2/models/findings.py +295 -0
- aipt_v2/models/phase_result.py +224 -0
- aipt_v2/models/scan_config.py +207 -0
- aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
- aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
- aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
- aipt_v2/monitoring/prometheus.yml +60 -0
- aipt_v2/orchestration/__init__.py +52 -0
- aipt_v2/orchestration/pipeline.py +398 -0
- aipt_v2/orchestration/progress.py +300 -0
- aipt_v2/orchestration/scheduler.py +296 -0
- aipt_v2/orchestrator.py +2284 -0
- aipt_v2/payloads/__init__.py +27 -0
- aipt_v2/payloads/cmdi.py +150 -0
- aipt_v2/payloads/sqli.py +263 -0
- aipt_v2/payloads/ssrf.py +204 -0
- aipt_v2/payloads/templates.py +222 -0
- aipt_v2/payloads/traversal.py +166 -0
- aipt_v2/payloads/xss.py +204 -0
- aipt_v2/prompts/__init__.py +60 -0
- aipt_v2/proxy/__init__.py +29 -0
- aipt_v2/proxy/history.py +352 -0
- aipt_v2/proxy/interceptor.py +452 -0
- aipt_v2/recon/__init__.py +44 -0
- aipt_v2/recon/dns.py +241 -0
- aipt_v2/recon/osint.py +367 -0
- aipt_v2/recon/subdomain.py +372 -0
- aipt_v2/recon/tech_detect.py +311 -0
- aipt_v2/reports/__init__.py +17 -0
- aipt_v2/reports/generator.py +313 -0
- aipt_v2/reports/html_report.py +378 -0
- aipt_v2/runtime/__init__.py +44 -0
- aipt_v2/runtime/base.py +30 -0
- aipt_v2/runtime/docker.py +401 -0
- aipt_v2/runtime/local.py +346 -0
- aipt_v2/runtime/tool_server.py +205 -0
- aipt_v2/scanners/__init__.py +28 -0
- aipt_v2/scanners/base.py +273 -0
- aipt_v2/scanners/nikto.py +244 -0
- aipt_v2/scanners/nmap.py +402 -0
- aipt_v2/scanners/nuclei.py +273 -0
- aipt_v2/scanners/web.py +454 -0
- aipt_v2/scripts/security_audit.py +366 -0
- aipt_v2/telemetry/__init__.py +7 -0
- aipt_v2/telemetry/tracer.py +347 -0
- aipt_v2/terminal/__init__.py +28 -0
- aipt_v2/terminal/executor.py +400 -0
- aipt_v2/terminal/sandbox.py +350 -0
- aipt_v2/tools/__init__.py +44 -0
- aipt_v2/tools/active_directory/__init__.py +78 -0
- aipt_v2/tools/active_directory/ad_config.py +238 -0
- aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
- aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
- aipt_v2/tools/active_directory/ldap_enum.py +533 -0
- aipt_v2/tools/active_directory/smb_attacks.py +505 -0
- aipt_v2/tools/agents_graph/__init__.py +19 -0
- aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
- aipt_v2/tools/api_security/__init__.py +76 -0
- aipt_v2/tools/api_security/api_discovery.py +608 -0
- aipt_v2/tools/api_security/graphql_scanner.py +622 -0
- aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
- aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
- aipt_v2/tools/browser/__init__.py +5 -0
- aipt_v2/tools/browser/browser_actions.py +238 -0
- aipt_v2/tools/browser/browser_instance.py +535 -0
- aipt_v2/tools/browser/tab_manager.py +344 -0
- aipt_v2/tools/cloud/__init__.py +70 -0
- aipt_v2/tools/cloud/cloud_config.py +273 -0
- aipt_v2/tools/cloud/cloud_scanner.py +639 -0
- aipt_v2/tools/cloud/prowler_tool.py +571 -0
- aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
- aipt_v2/tools/executor.py +307 -0
- aipt_v2/tools/parser.py +408 -0
- aipt_v2/tools/proxy/__init__.py +5 -0
- aipt_v2/tools/proxy/proxy_actions.py +103 -0
- aipt_v2/tools/proxy/proxy_manager.py +789 -0
- aipt_v2/tools/registry.py +196 -0
- aipt_v2/tools/scanners/__init__.py +343 -0
- aipt_v2/tools/scanners/acunetix_tool.py +712 -0
- aipt_v2/tools/scanners/burp_tool.py +631 -0
- aipt_v2/tools/scanners/config.py +156 -0
- aipt_v2/tools/scanners/nessus_tool.py +588 -0
- aipt_v2/tools/scanners/zap_tool.py +612 -0
- aipt_v2/tools/terminal/__init__.py +5 -0
- aipt_v2/tools/terminal/terminal_actions.py +37 -0
- aipt_v2/tools/terminal/terminal_manager.py +153 -0
- aipt_v2/tools/terminal/terminal_session.py +449 -0
- aipt_v2/tools/tool_processing.py +108 -0
- aipt_v2/utils/__init__.py +17 -0
- aipt_v2/utils/logging.py +201 -0
- aipt_v2/utils/model_manager.py +187 -0
- aipt_v2/utils/searchers/__init__.py +269 -0
- aiptx-2.0.2.dist-info/METADATA +324 -0
- aiptx-2.0.2.dist-info/RECORD +165 -0
- aiptx-2.0.2.dist-info/WHEEL +5 -0
- aiptx-2.0.2.dist-info/entry_points.txt +7 -0
- aiptx-2.0.2.dist-info/licenses/LICENSE +21 -0
- aiptx-2.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AIPT RAG Tool Selection - BGE-based tool retrieval and ranking
|
|
3
|
+
Selects the optimal security tool for each objective.
|
|
4
|
+
|
|
5
|
+
Inspired by: PentestAssistant's proven scoring formula
|
|
6
|
+
Score = 0.5 * description_similarity + 0.5 * sample_similarity + 2.0 * keyword_match
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ToolMatch:
|
|
20
|
+
"""A matched tool with its score"""
|
|
21
|
+
name: str
|
|
22
|
+
score: float
|
|
23
|
+
tool: dict
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ToolRAG:
|
|
27
|
+
"""
|
|
28
|
+
RAG-based tool selection using BGE embeddings.
|
|
29
|
+
|
|
30
|
+
Features:
|
|
31
|
+
- Semantic search via sentence-transformers
|
|
32
|
+
- Keyword boosting for exact matches
|
|
33
|
+
- Phase filtering for context-aware selection
|
|
34
|
+
- Lazy loading of embeddings for fast startup
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
# Scoring weights (from PentestAssistant)
|
|
38
|
+
WEIGHT_DESCRIPTION = 0.5
|
|
39
|
+
WEIGHT_SAMPLES = 0.5
|
|
40
|
+
WEIGHT_KEYWORDS = 2.0 # Keyword matches are heavily weighted
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
tools_path: Optional[str] = None,
|
|
45
|
+
embedding_model: str = "BAAI/bge-large-en-v1.5",
|
|
46
|
+
lazy_load: bool = True,
|
|
47
|
+
):
|
|
48
|
+
self.tools_path = tools_path or self._default_tools_path()
|
|
49
|
+
self.embedding_model_name = embedding_model
|
|
50
|
+
self.tools: list[dict] = []
|
|
51
|
+
self._embedder = None
|
|
52
|
+
self._embeddings_cache: dict = {}
|
|
53
|
+
|
|
54
|
+
# Load tools
|
|
55
|
+
self._load_tools()
|
|
56
|
+
|
|
57
|
+
# Optionally pre-compute embeddings
|
|
58
|
+
if not lazy_load:
|
|
59
|
+
self._ensure_embedder()
|
|
60
|
+
self._precompute_embeddings()
|
|
61
|
+
|
|
62
|
+
def _default_tools_path(self) -> str:
|
|
63
|
+
"""Get default tools.json path"""
|
|
64
|
+
return str(Path(__file__).parent / "tools.json")
|
|
65
|
+
|
|
66
|
+
def _load_tools(self) -> None:
|
|
67
|
+
"""Load tool definitions from JSON"""
|
|
68
|
+
try:
|
|
69
|
+
with open(self.tools_path, "r") as f:
|
|
70
|
+
self.tools = json.load(f)
|
|
71
|
+
except FileNotFoundError:
|
|
72
|
+
# Initialize with empty list if file doesn't exist yet
|
|
73
|
+
self.tools = []
|
|
74
|
+
|
|
75
|
+
def _ensure_embedder(self) -> None:
|
|
76
|
+
"""Lazy-load the embedding model"""
|
|
77
|
+
if self._embedder is None:
|
|
78
|
+
try:
|
|
79
|
+
from sentence_transformers import SentenceTransformer
|
|
80
|
+
self._embedder = SentenceTransformer(self.embedding_model_name)
|
|
81
|
+
except ImportError:
|
|
82
|
+
raise ImportError(
|
|
83
|
+
"sentence-transformers required. Install with: pip install sentence-transformers"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def _precompute_embeddings(self) -> None:
|
|
87
|
+
"""Pre-compute embeddings for all tools"""
|
|
88
|
+
self._ensure_embedder()
|
|
89
|
+
|
|
90
|
+
for tool in self.tools:
|
|
91
|
+
name = tool.get("name", "")
|
|
92
|
+
if name not in self._embeddings_cache:
|
|
93
|
+
desc = tool.get("description", "")
|
|
94
|
+
samples = " ".join(tool.get("samples", []))
|
|
95
|
+
|
|
96
|
+
self._embeddings_cache[name] = {
|
|
97
|
+
"desc": self._embedder.encode(desc, normalize_embeddings=True),
|
|
98
|
+
"samples": self._embedder.encode(samples, normalize_embeddings=True) if samples else None,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
def search(
|
|
102
|
+
self,
|
|
103
|
+
query: str,
|
|
104
|
+
phase: Optional[str] = None,
|
|
105
|
+
top_k: int = 5,
|
|
106
|
+
) -> list[dict]:
|
|
107
|
+
"""
|
|
108
|
+
Search for tools matching the query.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
query: Natural language description of what to do
|
|
112
|
+
phase: Optional phase filter (recon, enum, exploit, post)
|
|
113
|
+
top_k: Number of results to return
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of tool dictionaries, sorted by relevance
|
|
117
|
+
"""
|
|
118
|
+
if not self.tools:
|
|
119
|
+
return []
|
|
120
|
+
|
|
121
|
+
self._ensure_embedder()
|
|
122
|
+
|
|
123
|
+
# Encode query
|
|
124
|
+
query_embedding = self._embedder.encode(query, normalize_embeddings=True)
|
|
125
|
+
|
|
126
|
+
# Filter by phase if specified
|
|
127
|
+
candidates = self.tools
|
|
128
|
+
if phase:
|
|
129
|
+
candidates = [t for t in self.tools if t.get("phase") == phase or not t.get("phase")]
|
|
130
|
+
|
|
131
|
+
# Score all candidates
|
|
132
|
+
scored_tools: list[ToolMatch] = []
|
|
133
|
+
|
|
134
|
+
for tool in candidates:
|
|
135
|
+
score = self._score_tool(query, query_embedding, tool)
|
|
136
|
+
scored_tools.append(ToolMatch(
|
|
137
|
+
name=tool.get("name", "unknown"),
|
|
138
|
+
score=score,
|
|
139
|
+
tool=tool,
|
|
140
|
+
))
|
|
141
|
+
|
|
142
|
+
# Sort by score (descending)
|
|
143
|
+
scored_tools.sort(key=lambda x: x.score, reverse=True)
|
|
144
|
+
|
|
145
|
+
# Return top_k
|
|
146
|
+
return [match.tool for match in scored_tools[:top_k]]
|
|
147
|
+
|
|
148
|
+
def _score_tool(
|
|
149
|
+
self,
|
|
150
|
+
query: str,
|
|
151
|
+
query_embedding: np.ndarray,
|
|
152
|
+
tool: dict,
|
|
153
|
+
) -> float:
|
|
154
|
+
"""
|
|
155
|
+
Score a tool against the query using the proven formula:
|
|
156
|
+
score = 0.5 * desc_sim + 0.5 * sample_sim + 2.0 * keyword_match
|
|
157
|
+
"""
|
|
158
|
+
name = tool.get("name", "")
|
|
159
|
+
|
|
160
|
+
# Get or compute embeddings
|
|
161
|
+
if name in self._embeddings_cache:
|
|
162
|
+
cached = self._embeddings_cache[name]
|
|
163
|
+
desc_emb = cached["desc"]
|
|
164
|
+
sample_emb = cached.get("samples")
|
|
165
|
+
else:
|
|
166
|
+
desc = tool.get("description", "")
|
|
167
|
+
samples = " ".join(tool.get("samples", []))
|
|
168
|
+
|
|
169
|
+
desc_emb = self._embedder.encode(desc, normalize_embeddings=True)
|
|
170
|
+
sample_emb = self._embedder.encode(samples, normalize_embeddings=True) if samples else None
|
|
171
|
+
|
|
172
|
+
# Cache for future use
|
|
173
|
+
self._embeddings_cache[name] = {"desc": desc_emb, "samples": sample_emb}
|
|
174
|
+
|
|
175
|
+
# Compute similarities
|
|
176
|
+
desc_score = self._cosine_similarity(query_embedding, desc_emb)
|
|
177
|
+
sample_score = self._cosine_similarity(query_embedding, sample_emb) if sample_emb is not None else 0.0
|
|
178
|
+
|
|
179
|
+
# Keyword matching
|
|
180
|
+
keyword_score = self._keyword_match(query, tool)
|
|
181
|
+
|
|
182
|
+
# Combined score (the magic formula from PentestAssistant)
|
|
183
|
+
score = (
|
|
184
|
+
self.WEIGHT_DESCRIPTION * desc_score +
|
|
185
|
+
self.WEIGHT_SAMPLES * sample_score +
|
|
186
|
+
self.WEIGHT_KEYWORDS * keyword_score
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
return float(score)
|
|
190
|
+
|
|
191
|
+
def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> float:
|
|
192
|
+
"""Compute cosine similarity between two vectors"""
|
|
193
|
+
if a is None or b is None:
|
|
194
|
+
return 0.0
|
|
195
|
+
return float(np.dot(a, b)) # Already normalized
|
|
196
|
+
|
|
197
|
+
def _keyword_match(self, query: str, tool: dict) -> float:
|
|
198
|
+
"""
|
|
199
|
+
Compute keyword match score.
|
|
200
|
+
Higher weight for exact keyword matches.
|
|
201
|
+
"""
|
|
202
|
+
query_lower = query.lower()
|
|
203
|
+
keywords = tool.get("keywords", [])
|
|
204
|
+
|
|
205
|
+
if not keywords:
|
|
206
|
+
return 0.0
|
|
207
|
+
|
|
208
|
+
# Count matches
|
|
209
|
+
matches = sum(1 for kw in keywords if kw.lower() in query_lower)
|
|
210
|
+
|
|
211
|
+
# Also check tool name
|
|
212
|
+
if tool.get("name", "").lower() in query_lower:
|
|
213
|
+
matches += 2 # Bonus for mentioning tool by name
|
|
214
|
+
|
|
215
|
+
# Normalize to 0-1
|
|
216
|
+
return min(matches / max(len(keywords), 1), 1.0)
|
|
217
|
+
|
|
218
|
+
def get_tool_by_name(self, name: str) -> Optional[dict]:
|
|
219
|
+
"""Get a specific tool by name"""
|
|
220
|
+
for tool in self.tools:
|
|
221
|
+
if tool.get("name", "").lower() == name.lower():
|
|
222
|
+
return tool
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
def get_tools_by_phase(self, phase: str) -> list[dict]:
|
|
226
|
+
"""Get all tools for a specific phase"""
|
|
227
|
+
return [t for t in self.tools if t.get("phase") == phase]
|
|
228
|
+
|
|
229
|
+
def add_tool(self, tool: dict) -> None:
|
|
230
|
+
"""Add a custom tool to the registry"""
|
|
231
|
+
self.tools.append(tool)
|
|
232
|
+
# Clear cache for re-computation
|
|
233
|
+
name = tool.get("name", "")
|
|
234
|
+
if name in self._embeddings_cache:
|
|
235
|
+
del self._embeddings_cache[name]
|
|
236
|
+
|
|
237
|
+
def list_tools(self) -> list[str]:
|
|
238
|
+
"""List all available tool names"""
|
|
239
|
+
return [t.get("name", "unknown") for t in self.tools]
|
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AIPT Scope Enforcement Module
|
|
3
|
+
|
|
4
|
+
Ensures all testing activities stay within authorized scope.
|
|
5
|
+
This is CRITICAL for legitimate penetration testing.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Domain/IP allowlist enforcement
|
|
9
|
+
- Path exclusion patterns
|
|
10
|
+
- Rate limiting
|
|
11
|
+
- Out-of-scope detection and alerting
|
|
12
|
+
- Audit logging for compliance
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import ipaddress
|
|
17
|
+
import logging
|
|
18
|
+
import re
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from enum import Enum
|
|
22
|
+
from typing import Any
|
|
23
|
+
from urllib.parse import urlparse
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ScopeDecision(Enum):
|
|
30
|
+
"""Decision about whether a target is in scope"""
|
|
31
|
+
IN_SCOPE = "in_scope"
|
|
32
|
+
OUT_OF_SCOPE = "out_of_scope"
|
|
33
|
+
EXCLUDED = "excluded"
|
|
34
|
+
RATE_LIMITED = "rate_limited"
|
|
35
|
+
UNKNOWN = "unknown"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ScopeViolation:
|
|
40
|
+
"""Record of an attempted scope violation"""
|
|
41
|
+
timestamp: datetime
|
|
42
|
+
url: str
|
|
43
|
+
reason: str
|
|
44
|
+
decision: ScopeDecision
|
|
45
|
+
tool: str
|
|
46
|
+
blocked: bool
|
|
47
|
+
|
|
48
|
+
def to_dict(self) -> dict[str, Any]:
|
|
49
|
+
return {
|
|
50
|
+
"timestamp": self.timestamp.isoformat(),
|
|
51
|
+
"url": self.url,
|
|
52
|
+
"reason": self.reason,
|
|
53
|
+
"decision": self.decision.value,
|
|
54
|
+
"tool": self.tool,
|
|
55
|
+
"blocked": self.blocked,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ScopeConfig:
|
|
61
|
+
"""Configuration defining authorized scope"""
|
|
62
|
+
# Included targets (allowlist)
|
|
63
|
+
included_domains: list[str] = field(default_factory=list)
|
|
64
|
+
included_ips: list[str] = field(default_factory=list) # CIDR notation supported
|
|
65
|
+
included_urls: list[str] = field(default_factory=list) # Specific URL patterns
|
|
66
|
+
|
|
67
|
+
# Excluded targets (denylist - always blocked even if in allowlist)
|
|
68
|
+
excluded_domains: list[str] = field(default_factory=list)
|
|
69
|
+
excluded_paths: list[str] = field(default_factory=list) # Regex patterns
|
|
70
|
+
excluded_keywords: list[str] = field(default_factory=list) # e.g., "production", "prod"
|
|
71
|
+
|
|
72
|
+
# Rate limiting
|
|
73
|
+
max_requests_per_second: int = 10
|
|
74
|
+
max_requests_per_minute: int = 300
|
|
75
|
+
|
|
76
|
+
# Safety settings
|
|
77
|
+
block_out_of_scope: bool = True # If False, just log but don't block
|
|
78
|
+
allow_subdomains: bool = True # Allow *.example.com if example.com is in scope
|
|
79
|
+
|
|
80
|
+
# Audit settings
|
|
81
|
+
log_all_requests: bool = True
|
|
82
|
+
alert_on_violation: bool = True
|
|
83
|
+
|
|
84
|
+
# Authorization metadata
|
|
85
|
+
engagement_id: str = ""
|
|
86
|
+
client_name: str = ""
|
|
87
|
+
authorized_by: str = ""
|
|
88
|
+
start_date: str = ""
|
|
89
|
+
end_date: str = ""
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def from_dict(cls, data: dict[str, Any]) -> "ScopeConfig":
|
|
93
|
+
"""Create config from dictionary"""
|
|
94
|
+
return cls(
|
|
95
|
+
included_domains=data.get("included_domains", []),
|
|
96
|
+
included_ips=data.get("included_ips", []),
|
|
97
|
+
included_urls=data.get("included_urls", []),
|
|
98
|
+
excluded_domains=data.get("excluded_domains", []),
|
|
99
|
+
excluded_paths=data.get("excluded_paths", []),
|
|
100
|
+
excluded_keywords=data.get("excluded_keywords", []),
|
|
101
|
+
max_requests_per_second=data.get("max_requests_per_second", 10),
|
|
102
|
+
max_requests_per_minute=data.get("max_requests_per_minute", 300),
|
|
103
|
+
block_out_of_scope=data.get("block_out_of_scope", True),
|
|
104
|
+
allow_subdomains=data.get("allow_subdomains", True),
|
|
105
|
+
engagement_id=data.get("engagement_id", ""),
|
|
106
|
+
client_name=data.get("client_name", ""),
|
|
107
|
+
authorized_by=data.get("authorized_by", ""),
|
|
108
|
+
start_date=data.get("start_date", ""),
|
|
109
|
+
end_date=data.get("end_date", ""),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def to_dict(self) -> dict[str, Any]:
|
|
113
|
+
return {
|
|
114
|
+
"included_domains": self.included_domains,
|
|
115
|
+
"included_ips": self.included_ips,
|
|
116
|
+
"included_urls": self.included_urls,
|
|
117
|
+
"excluded_domains": self.excluded_domains,
|
|
118
|
+
"excluded_paths": self.excluded_paths,
|
|
119
|
+
"excluded_keywords": self.excluded_keywords,
|
|
120
|
+
"max_requests_per_second": self.max_requests_per_second,
|
|
121
|
+
"max_requests_per_minute": self.max_requests_per_minute,
|
|
122
|
+
"block_out_of_scope": self.block_out_of_scope,
|
|
123
|
+
"allow_subdomains": self.allow_subdomains,
|
|
124
|
+
"engagement_id": self.engagement_id,
|
|
125
|
+
"client_name": self.client_name,
|
|
126
|
+
"authorized_by": self.authorized_by,
|
|
127
|
+
"start_date": self.start_date,
|
|
128
|
+
"end_date": self.end_date,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class ScopeEnforcer:
|
|
133
|
+
"""
|
|
134
|
+
Enforces authorized testing scope.
|
|
135
|
+
|
|
136
|
+
This class is essential for legitimate penetration testing.
|
|
137
|
+
It ensures all requests stay within the authorized scope
|
|
138
|
+
and logs all activity for compliance and audit purposes.
|
|
139
|
+
|
|
140
|
+
Example:
|
|
141
|
+
config = ScopeConfig(
|
|
142
|
+
included_domains=["example.com", "api.example.com"],
|
|
143
|
+
excluded_paths=["/admin/delete", "/production/*"],
|
|
144
|
+
client_name="ACME Corp",
|
|
145
|
+
engagement_id="PT-2024-001",
|
|
146
|
+
)
|
|
147
|
+
enforcer = ScopeEnforcer(config)
|
|
148
|
+
|
|
149
|
+
# Check before making requests
|
|
150
|
+
if enforcer.is_in_scope("https://example.com/api/users"):
|
|
151
|
+
# Safe to test
|
|
152
|
+
pass
|
|
153
|
+
else:
|
|
154
|
+
# Do not test - out of scope
|
|
155
|
+
pass
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
def __init__(self, config: ScopeConfig):
|
|
159
|
+
self.config = config
|
|
160
|
+
self._violations: list[ScopeViolation] = []
|
|
161
|
+
self._request_timestamps: list[datetime] = []
|
|
162
|
+
|
|
163
|
+
# Compile regex patterns for performance
|
|
164
|
+
self._excluded_path_patterns = [
|
|
165
|
+
re.compile(p) for p in config.excluded_paths
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
# Parse IP networks
|
|
169
|
+
self._included_networks: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = []
|
|
170
|
+
for ip_str in config.included_ips:
|
|
171
|
+
try:
|
|
172
|
+
network = ipaddress.ip_network(ip_str, strict=False)
|
|
173
|
+
self._included_networks.append(network)
|
|
174
|
+
except ValueError as e:
|
|
175
|
+
logger.warning(f"Invalid IP/CIDR in scope: {ip_str}: {e}")
|
|
176
|
+
|
|
177
|
+
logger.info(
|
|
178
|
+
f"Scope enforcer initialized: {len(config.included_domains)} domains, "
|
|
179
|
+
f"{len(self._included_networks)} IP ranges"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def is_in_scope(
|
|
183
|
+
self,
|
|
184
|
+
url: str,
|
|
185
|
+
tool: str = "unknown",
|
|
186
|
+
) -> bool:
|
|
187
|
+
"""
|
|
188
|
+
Check if a URL is within authorized scope.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
url: URL to check
|
|
192
|
+
tool: Name of tool making the request (for logging)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
True if in scope, False otherwise
|
|
196
|
+
"""
|
|
197
|
+
decision, reason = self._check_scope(url)
|
|
198
|
+
|
|
199
|
+
# Log the check
|
|
200
|
+
if self.config.log_all_requests:
|
|
201
|
+
logger.debug(f"Scope check: {url} -> {decision.value} ({reason})")
|
|
202
|
+
|
|
203
|
+
# Record violation if out of scope
|
|
204
|
+
if decision in [ScopeDecision.OUT_OF_SCOPE, ScopeDecision.EXCLUDED]:
|
|
205
|
+
violation = ScopeViolation(
|
|
206
|
+
timestamp=datetime.utcnow(),
|
|
207
|
+
url=url,
|
|
208
|
+
reason=reason,
|
|
209
|
+
decision=decision,
|
|
210
|
+
tool=tool,
|
|
211
|
+
blocked=self.config.block_out_of_scope,
|
|
212
|
+
)
|
|
213
|
+
self._violations.append(violation)
|
|
214
|
+
|
|
215
|
+
if self.config.alert_on_violation:
|
|
216
|
+
logger.warning(
|
|
217
|
+
f"SCOPE VIOLATION: {tool} attempted to access {url} - {reason}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
return not self.config.block_out_of_scope
|
|
221
|
+
|
|
222
|
+
return decision == ScopeDecision.IN_SCOPE
|
|
223
|
+
|
|
224
|
+
def _check_scope(self, url: str) -> tuple[ScopeDecision, str]:
|
|
225
|
+
"""Internal scope checking logic"""
|
|
226
|
+
try:
|
|
227
|
+
parsed = urlparse(url)
|
|
228
|
+
host = parsed.netloc.split(":")[0].lower() # Remove port
|
|
229
|
+
path = parsed.path
|
|
230
|
+
|
|
231
|
+
# Check excluded keywords first (highest priority)
|
|
232
|
+
url_lower = url.lower()
|
|
233
|
+
for keyword in self.config.excluded_keywords:
|
|
234
|
+
if keyword.lower() in url_lower:
|
|
235
|
+
return ScopeDecision.EXCLUDED, f"Contains excluded keyword: {keyword}"
|
|
236
|
+
|
|
237
|
+
# Check excluded paths
|
|
238
|
+
for pattern in self._excluded_path_patterns:
|
|
239
|
+
if pattern.search(path):
|
|
240
|
+
return ScopeDecision.EXCLUDED, f"Matches excluded path pattern"
|
|
241
|
+
|
|
242
|
+
# Check excluded domains
|
|
243
|
+
for domain in self.config.excluded_domains:
|
|
244
|
+
if self._domain_matches(host, domain):
|
|
245
|
+
return ScopeDecision.EXCLUDED, f"Domain explicitly excluded: {domain}"
|
|
246
|
+
|
|
247
|
+
# Check if IP is in scope
|
|
248
|
+
try:
|
|
249
|
+
ip = ipaddress.ip_address(host)
|
|
250
|
+
for network in self._included_networks:
|
|
251
|
+
if ip in network:
|
|
252
|
+
return ScopeDecision.IN_SCOPE, f"IP in authorized range: {network}"
|
|
253
|
+
except ValueError:
|
|
254
|
+
pass # Not an IP address, check domain
|
|
255
|
+
|
|
256
|
+
# Check included domains
|
|
257
|
+
for domain in self.config.included_domains:
|
|
258
|
+
if self._domain_matches(host, domain):
|
|
259
|
+
return ScopeDecision.IN_SCOPE, f"Domain in scope: {domain}"
|
|
260
|
+
|
|
261
|
+
# Check included URL patterns
|
|
262
|
+
for url_pattern in self.config.included_urls:
|
|
263
|
+
if url.startswith(url_pattern) or re.match(url_pattern, url):
|
|
264
|
+
return ScopeDecision.IN_SCOPE, f"URL matches pattern: {url_pattern}"
|
|
265
|
+
|
|
266
|
+
return ScopeDecision.OUT_OF_SCOPE, "Not in authorized scope"
|
|
267
|
+
|
|
268
|
+
except Exception as e:
|
|
269
|
+
logger.error(f"Error checking scope for {url}: {e}")
|
|
270
|
+
return ScopeDecision.UNKNOWN, f"Error: {str(e)}"
|
|
271
|
+
|
|
272
|
+
def _domain_matches(self, host: str, scope_domain: str) -> bool:
|
|
273
|
+
"""Check if a host matches a scope domain"""
|
|
274
|
+
scope_domain = scope_domain.lower()
|
|
275
|
+
|
|
276
|
+
# Exact match
|
|
277
|
+
if host == scope_domain:
|
|
278
|
+
return True
|
|
279
|
+
|
|
280
|
+
# Subdomain match (if allowed)
|
|
281
|
+
if self.config.allow_subdomains:
|
|
282
|
+
if host.endswith("." + scope_domain):
|
|
283
|
+
return True
|
|
284
|
+
|
|
285
|
+
# Wildcard match
|
|
286
|
+
if scope_domain.startswith("*."):
|
|
287
|
+
base = scope_domain[2:]
|
|
288
|
+
if host == base or host.endswith("." + base):
|
|
289
|
+
return True
|
|
290
|
+
|
|
291
|
+
return False
|
|
292
|
+
|
|
293
|
+
def check_rate_limit(self) -> bool:
|
|
294
|
+
"""
|
|
295
|
+
Check if request rate is within limits.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
True if within limits, False if rate limited
|
|
299
|
+
"""
|
|
300
|
+
now = datetime.utcnow()
|
|
301
|
+
|
|
302
|
+
# Clean old timestamps
|
|
303
|
+
one_minute_ago = now.timestamp() - 60
|
|
304
|
+
self._request_timestamps = [
|
|
305
|
+
ts for ts in self._request_timestamps
|
|
306
|
+
if ts.timestamp() > one_minute_ago
|
|
307
|
+
]
|
|
308
|
+
|
|
309
|
+
# Check per-minute limit
|
|
310
|
+
if len(self._request_timestamps) >= self.config.max_requests_per_minute:
|
|
311
|
+
logger.warning("Rate limit exceeded (per minute)")
|
|
312
|
+
return False
|
|
313
|
+
|
|
314
|
+
# Check per-second limit
|
|
315
|
+
one_second_ago = now.timestamp() - 1
|
|
316
|
+
recent = sum(1 for ts in self._request_timestamps if ts.timestamp() > one_second_ago)
|
|
317
|
+
if recent >= self.config.max_requests_per_second:
|
|
318
|
+
logger.warning("Rate limit exceeded (per second)")
|
|
319
|
+
return False
|
|
320
|
+
|
|
321
|
+
# Record this request
|
|
322
|
+
self._request_timestamps.append(now)
|
|
323
|
+
return True
|
|
324
|
+
|
|
325
|
+
def record_request(self, url: str, tool: str = "unknown") -> None:
|
|
326
|
+
"""Record a request for rate limiting and audit"""
|
|
327
|
+
self._request_timestamps.append(datetime.utcnow())
|
|
328
|
+
|
|
329
|
+
def get_violations(self) -> list[ScopeViolation]:
|
|
330
|
+
"""Get all recorded scope violations"""
|
|
331
|
+
return self._violations.copy()
|
|
332
|
+
|
|
333
|
+
def get_violation_count(self) -> int:
|
|
334
|
+
"""Get count of scope violations"""
|
|
335
|
+
return len(self._violations)
|
|
336
|
+
|
|
337
|
+
def get_audit_log(self) -> dict[str, Any]:
|
|
338
|
+
"""Get audit log for compliance reporting"""
|
|
339
|
+
return {
|
|
340
|
+
"engagement_id": self.config.engagement_id,
|
|
341
|
+
"client_name": self.config.client_name,
|
|
342
|
+
"authorized_by": self.config.authorized_by,
|
|
343
|
+
"start_date": self.config.start_date,
|
|
344
|
+
"end_date": self.config.end_date,
|
|
345
|
+
"scope_config": self.config.to_dict(),
|
|
346
|
+
"violations": [v.to_dict() for v in self._violations],
|
|
347
|
+
"violation_count": len(self._violations),
|
|
348
|
+
"generated_at": datetime.utcnow().isoformat(),
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
def validate_scope_config(self) -> list[str]:
|
|
352
|
+
"""
|
|
353
|
+
Validate scope configuration for common issues.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
List of validation warnings/errors
|
|
357
|
+
"""
|
|
358
|
+
issues = []
|
|
359
|
+
|
|
360
|
+
# Check for empty scope
|
|
361
|
+
if not self.config.included_domains and not self.config.included_ips:
|
|
362
|
+
issues.append("WARNING: No targets in scope - nothing will be tested")
|
|
363
|
+
|
|
364
|
+
# Check for overly broad scope
|
|
365
|
+
for domain in self.config.included_domains:
|
|
366
|
+
if domain in ["*", "*.com", "*.net", "*.org"]:
|
|
367
|
+
issues.append(f"DANGER: Overly broad scope: {domain}")
|
|
368
|
+
|
|
369
|
+
# Check for missing engagement metadata
|
|
370
|
+
if not self.config.engagement_id:
|
|
371
|
+
issues.append("INFO: No engagement ID set - recommended for tracking")
|
|
372
|
+
|
|
373
|
+
if not self.config.authorized_by:
|
|
374
|
+
issues.append("WARNING: No authorizer specified - document authorization")
|
|
375
|
+
|
|
376
|
+
# Check for common sensitive paths not excluded
|
|
377
|
+
sensitive_paths = ["/admin", "/backup", "/production", "/prod"]
|
|
378
|
+
for path in sensitive_paths:
|
|
379
|
+
excluded = any(
|
|
380
|
+
path in ep for ep in self.config.excluded_paths
|
|
381
|
+
)
|
|
382
|
+
if not excluded:
|
|
383
|
+
issues.append(f"INFO: Consider excluding {path} if not in scope")
|
|
384
|
+
|
|
385
|
+
return issues
|
|
386
|
+
|
|
387
|
+
def generate_scope_summary(self) -> str:
|
|
388
|
+
"""Generate human-readable scope summary"""
|
|
389
|
+
lines = [
|
|
390
|
+
"=" * 60,
|
|
391
|
+
"AUTHORIZED TESTING SCOPE",
|
|
392
|
+
"=" * 60,
|
|
393
|
+
f"Engagement ID: {self.config.engagement_id or 'Not specified'}",
|
|
394
|
+
f"Client: {self.config.client_name or 'Not specified'}",
|
|
395
|
+
f"Authorized by: {self.config.authorized_by or 'Not specified'}",
|
|
396
|
+
f"Period: {self.config.start_date} to {self.config.end_date}",
|
|
397
|
+
"",
|
|
398
|
+
"IN-SCOPE TARGETS:",
|
|
399
|
+
]
|
|
400
|
+
|
|
401
|
+
for domain in self.config.included_domains:
|
|
402
|
+
subdomain_note = " (including subdomains)" if self.config.allow_subdomains else ""
|
|
403
|
+
lines.append(f" • {domain}{subdomain_note}")
|
|
404
|
+
|
|
405
|
+
for ip in self.config.included_ips:
|
|
406
|
+
lines.append(f" • {ip}")
|
|
407
|
+
|
|
408
|
+
if self.config.excluded_paths:
|
|
409
|
+
lines.append("")
|
|
410
|
+
lines.append("EXCLUDED PATHS:")
|
|
411
|
+
for path in self.config.excluded_paths:
|
|
412
|
+
lines.append(f" ✗ {path}")
|
|
413
|
+
|
|
414
|
+
if self.config.excluded_keywords:
|
|
415
|
+
lines.append("")
|
|
416
|
+
lines.append("EXCLUDED KEYWORDS:")
|
|
417
|
+
for keyword in self.config.excluded_keywords:
|
|
418
|
+
lines.append(f" ✗ {keyword}")
|
|
419
|
+
|
|
420
|
+
lines.append("")
|
|
421
|
+
lines.append(f"Rate Limit: {self.config.max_requests_per_second}/sec, {self.config.max_requests_per_minute}/min")
|
|
422
|
+
lines.append(f"Block Out-of-Scope: {'Yes' if self.config.block_out_of_scope else 'No (log only)'}")
|
|
423
|
+
lines.append("=" * 60)
|
|
424
|
+
|
|
425
|
+
return "\n".join(lines)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def create_scope_from_target(target: str) -> ScopeConfig:
|
|
429
|
+
"""
|
|
430
|
+
Create a basic scope config from a single target URL.
|
|
431
|
+
|
|
432
|
+
This is a convenience function for simple scans where
|
|
433
|
+
the scope is just the target domain.
|
|
434
|
+
"""
|
|
435
|
+
parsed = urlparse(target)
|
|
436
|
+
host = parsed.netloc.split(":")[0]
|
|
437
|
+
|
|
438
|
+
return ScopeConfig(
|
|
439
|
+
included_domains=[host],
|
|
440
|
+
allow_subdomains=True,
|
|
441
|
+
excluded_keywords=["production", "prod", "live"],
|
|
442
|
+
)
|