aiptx 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiptx might be problematic. Click here for more details.

Files changed (165) hide show
  1. aipt_v2/__init__.py +110 -0
  2. aipt_v2/__main__.py +24 -0
  3. aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
  4. aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
  5. aipt_v2/agents/__init__.py +24 -0
  6. aipt_v2/agents/base.py +520 -0
  7. aipt_v2/agents/ptt.py +406 -0
  8. aipt_v2/agents/state.py +168 -0
  9. aipt_v2/app.py +960 -0
  10. aipt_v2/browser/__init__.py +31 -0
  11. aipt_v2/browser/automation.py +458 -0
  12. aipt_v2/browser/crawler.py +453 -0
  13. aipt_v2/cli.py +321 -0
  14. aipt_v2/compliance/__init__.py +71 -0
  15. aipt_v2/compliance/compliance_report.py +449 -0
  16. aipt_v2/compliance/framework_mapper.py +424 -0
  17. aipt_v2/compliance/nist_mapping.py +345 -0
  18. aipt_v2/compliance/owasp_mapping.py +330 -0
  19. aipt_v2/compliance/pci_mapping.py +297 -0
  20. aipt_v2/config.py +288 -0
  21. aipt_v2/core/__init__.py +43 -0
  22. aipt_v2/core/agent.py +630 -0
  23. aipt_v2/core/llm.py +395 -0
  24. aipt_v2/core/memory.py +305 -0
  25. aipt_v2/core/ptt.py +329 -0
  26. aipt_v2/database/__init__.py +14 -0
  27. aipt_v2/database/models.py +232 -0
  28. aipt_v2/database/repository.py +384 -0
  29. aipt_v2/docker/__init__.py +23 -0
  30. aipt_v2/docker/builder.py +260 -0
  31. aipt_v2/docker/manager.py +222 -0
  32. aipt_v2/docker/sandbox.py +371 -0
  33. aipt_v2/evasion/__init__.py +58 -0
  34. aipt_v2/evasion/request_obfuscator.py +272 -0
  35. aipt_v2/evasion/tls_fingerprint.py +285 -0
  36. aipt_v2/evasion/ua_rotator.py +301 -0
  37. aipt_v2/evasion/waf_bypass.py +439 -0
  38. aipt_v2/execution/__init__.py +23 -0
  39. aipt_v2/execution/executor.py +302 -0
  40. aipt_v2/execution/parser.py +544 -0
  41. aipt_v2/execution/terminal.py +337 -0
  42. aipt_v2/health.py +437 -0
  43. aipt_v2/intelligence/__init__.py +85 -0
  44. aipt_v2/intelligence/auth.py +520 -0
  45. aipt_v2/intelligence/chaining.py +775 -0
  46. aipt_v2/intelligence/cve_aipt.py +334 -0
  47. aipt_v2/intelligence/cve_info.py +1111 -0
  48. aipt_v2/intelligence/rag.py +239 -0
  49. aipt_v2/intelligence/scope.py +442 -0
  50. aipt_v2/intelligence/searchers/__init__.py +5 -0
  51. aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
  52. aipt_v2/intelligence/searchers/github_searcher.py +467 -0
  53. aipt_v2/intelligence/searchers/google_searcher.py +281 -0
  54. aipt_v2/intelligence/tools.json +443 -0
  55. aipt_v2/intelligence/triage.py +670 -0
  56. aipt_v2/interface/__init__.py +5 -0
  57. aipt_v2/interface/cli.py +230 -0
  58. aipt_v2/interface/main.py +501 -0
  59. aipt_v2/interface/tui.py +1276 -0
  60. aipt_v2/interface/utils.py +583 -0
  61. aipt_v2/llm/__init__.py +39 -0
  62. aipt_v2/llm/config.py +26 -0
  63. aipt_v2/llm/llm.py +514 -0
  64. aipt_v2/llm/memory.py +214 -0
  65. aipt_v2/llm/request_queue.py +89 -0
  66. aipt_v2/llm/utils.py +89 -0
  67. aipt_v2/models/__init__.py +15 -0
  68. aipt_v2/models/findings.py +295 -0
  69. aipt_v2/models/phase_result.py +224 -0
  70. aipt_v2/models/scan_config.py +207 -0
  71. aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
  72. aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
  73. aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
  74. aipt_v2/monitoring/prometheus.yml +60 -0
  75. aipt_v2/orchestration/__init__.py +52 -0
  76. aipt_v2/orchestration/pipeline.py +398 -0
  77. aipt_v2/orchestration/progress.py +300 -0
  78. aipt_v2/orchestration/scheduler.py +296 -0
  79. aipt_v2/orchestrator.py +2284 -0
  80. aipt_v2/payloads/__init__.py +27 -0
  81. aipt_v2/payloads/cmdi.py +150 -0
  82. aipt_v2/payloads/sqli.py +263 -0
  83. aipt_v2/payloads/ssrf.py +204 -0
  84. aipt_v2/payloads/templates.py +222 -0
  85. aipt_v2/payloads/traversal.py +166 -0
  86. aipt_v2/payloads/xss.py +204 -0
  87. aipt_v2/prompts/__init__.py +60 -0
  88. aipt_v2/proxy/__init__.py +29 -0
  89. aipt_v2/proxy/history.py +352 -0
  90. aipt_v2/proxy/interceptor.py +452 -0
  91. aipt_v2/recon/__init__.py +44 -0
  92. aipt_v2/recon/dns.py +241 -0
  93. aipt_v2/recon/osint.py +367 -0
  94. aipt_v2/recon/subdomain.py +372 -0
  95. aipt_v2/recon/tech_detect.py +311 -0
  96. aipt_v2/reports/__init__.py +17 -0
  97. aipt_v2/reports/generator.py +313 -0
  98. aipt_v2/reports/html_report.py +378 -0
  99. aipt_v2/runtime/__init__.py +44 -0
  100. aipt_v2/runtime/base.py +30 -0
  101. aipt_v2/runtime/docker.py +401 -0
  102. aipt_v2/runtime/local.py +346 -0
  103. aipt_v2/runtime/tool_server.py +205 -0
  104. aipt_v2/scanners/__init__.py +28 -0
  105. aipt_v2/scanners/base.py +273 -0
  106. aipt_v2/scanners/nikto.py +244 -0
  107. aipt_v2/scanners/nmap.py +402 -0
  108. aipt_v2/scanners/nuclei.py +273 -0
  109. aipt_v2/scanners/web.py +454 -0
  110. aipt_v2/scripts/security_audit.py +366 -0
  111. aipt_v2/telemetry/__init__.py +7 -0
  112. aipt_v2/telemetry/tracer.py +347 -0
  113. aipt_v2/terminal/__init__.py +28 -0
  114. aipt_v2/terminal/executor.py +400 -0
  115. aipt_v2/terminal/sandbox.py +350 -0
  116. aipt_v2/tools/__init__.py +44 -0
  117. aipt_v2/tools/active_directory/__init__.py +78 -0
  118. aipt_v2/tools/active_directory/ad_config.py +238 -0
  119. aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
  120. aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
  121. aipt_v2/tools/active_directory/ldap_enum.py +533 -0
  122. aipt_v2/tools/active_directory/smb_attacks.py +505 -0
  123. aipt_v2/tools/agents_graph/__init__.py +19 -0
  124. aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
  125. aipt_v2/tools/api_security/__init__.py +76 -0
  126. aipt_v2/tools/api_security/api_discovery.py +608 -0
  127. aipt_v2/tools/api_security/graphql_scanner.py +622 -0
  128. aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
  129. aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
  130. aipt_v2/tools/browser/__init__.py +5 -0
  131. aipt_v2/tools/browser/browser_actions.py +238 -0
  132. aipt_v2/tools/browser/browser_instance.py +535 -0
  133. aipt_v2/tools/browser/tab_manager.py +344 -0
  134. aipt_v2/tools/cloud/__init__.py +70 -0
  135. aipt_v2/tools/cloud/cloud_config.py +273 -0
  136. aipt_v2/tools/cloud/cloud_scanner.py +639 -0
  137. aipt_v2/tools/cloud/prowler_tool.py +571 -0
  138. aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
  139. aipt_v2/tools/executor.py +307 -0
  140. aipt_v2/tools/parser.py +408 -0
  141. aipt_v2/tools/proxy/__init__.py +5 -0
  142. aipt_v2/tools/proxy/proxy_actions.py +103 -0
  143. aipt_v2/tools/proxy/proxy_manager.py +789 -0
  144. aipt_v2/tools/registry.py +196 -0
  145. aipt_v2/tools/scanners/__init__.py +343 -0
  146. aipt_v2/tools/scanners/acunetix_tool.py +712 -0
  147. aipt_v2/tools/scanners/burp_tool.py +631 -0
  148. aipt_v2/tools/scanners/config.py +156 -0
  149. aipt_v2/tools/scanners/nessus_tool.py +588 -0
  150. aipt_v2/tools/scanners/zap_tool.py +612 -0
  151. aipt_v2/tools/terminal/__init__.py +5 -0
  152. aipt_v2/tools/terminal/terminal_actions.py +37 -0
  153. aipt_v2/tools/terminal/terminal_manager.py +153 -0
  154. aipt_v2/tools/terminal/terminal_session.py +449 -0
  155. aipt_v2/tools/tool_processing.py +108 -0
  156. aipt_v2/utils/__init__.py +17 -0
  157. aipt_v2/utils/logging.py +201 -0
  158. aipt_v2/utils/model_manager.py +187 -0
  159. aipt_v2/utils/searchers/__init__.py +269 -0
  160. aiptx-2.0.2.dist-info/METADATA +324 -0
  161. aiptx-2.0.2.dist-info/RECORD +165 -0
  162. aiptx-2.0.2.dist-info/WHEEL +5 -0
  163. aiptx-2.0.2.dist-info/entry_points.txt +7 -0
  164. aiptx-2.0.2.dist-info/licenses/LICENSE +21 -0
  165. aiptx-2.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,239 @@
1
+ """
2
+ AIPT RAG Tool Selection - BGE-based tool retrieval and ranking
3
+ Selects the optimal security tool for each objective.
4
+
5
+ Inspired by: PentestAssistant's proven scoring formula
6
+ Score = 0.5 * description_similarity + 0.5 * sample_similarity + 2.0 * keyword_match
7
+ """
8
+
9
+ import json
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Optional
13
+ from dataclasses import dataclass
14
+
15
+ import numpy as np
16
+
17
+
18
+ @dataclass
19
+ class ToolMatch:
20
+ """A matched tool with its score"""
21
+ name: str
22
+ score: float
23
+ tool: dict
24
+
25
+
26
+ class ToolRAG:
27
+ """
28
+ RAG-based tool selection using BGE embeddings.
29
+
30
+ Features:
31
+ - Semantic search via sentence-transformers
32
+ - Keyword boosting for exact matches
33
+ - Phase filtering for context-aware selection
34
+ - Lazy loading of embeddings for fast startup
35
+ """
36
+
37
+ # Scoring weights (from PentestAssistant)
38
+ WEIGHT_DESCRIPTION = 0.5
39
+ WEIGHT_SAMPLES = 0.5
40
+ WEIGHT_KEYWORDS = 2.0 # Keyword matches are heavily weighted
41
+
42
+ def __init__(
43
+ self,
44
+ tools_path: Optional[str] = None,
45
+ embedding_model: str = "BAAI/bge-large-en-v1.5",
46
+ lazy_load: bool = True,
47
+ ):
48
+ self.tools_path = tools_path or self._default_tools_path()
49
+ self.embedding_model_name = embedding_model
50
+ self.tools: list[dict] = []
51
+ self._embedder = None
52
+ self._embeddings_cache: dict = {}
53
+
54
+ # Load tools
55
+ self._load_tools()
56
+
57
+ # Optionally pre-compute embeddings
58
+ if not lazy_load:
59
+ self._ensure_embedder()
60
+ self._precompute_embeddings()
61
+
62
+ def _default_tools_path(self) -> str:
63
+ """Get default tools.json path"""
64
+ return str(Path(__file__).parent / "tools.json")
65
+
66
+ def _load_tools(self) -> None:
67
+ """Load tool definitions from JSON"""
68
+ try:
69
+ with open(self.tools_path, "r") as f:
70
+ self.tools = json.load(f)
71
+ except FileNotFoundError:
72
+ # Initialize with empty list if file doesn't exist yet
73
+ self.tools = []
74
+
75
+ def _ensure_embedder(self) -> None:
76
+ """Lazy-load the embedding model"""
77
+ if self._embedder is None:
78
+ try:
79
+ from sentence_transformers import SentenceTransformer
80
+ self._embedder = SentenceTransformer(self.embedding_model_name)
81
+ except ImportError:
82
+ raise ImportError(
83
+ "sentence-transformers required. Install with: pip install sentence-transformers"
84
+ )
85
+
86
+ def _precompute_embeddings(self) -> None:
87
+ """Pre-compute embeddings for all tools"""
88
+ self._ensure_embedder()
89
+
90
+ for tool in self.tools:
91
+ name = tool.get("name", "")
92
+ if name not in self._embeddings_cache:
93
+ desc = tool.get("description", "")
94
+ samples = " ".join(tool.get("samples", []))
95
+
96
+ self._embeddings_cache[name] = {
97
+ "desc": self._embedder.encode(desc, normalize_embeddings=True),
98
+ "samples": self._embedder.encode(samples, normalize_embeddings=True) if samples else None,
99
+ }
100
+
101
+ def search(
102
+ self,
103
+ query: str,
104
+ phase: Optional[str] = None,
105
+ top_k: int = 5,
106
+ ) -> list[dict]:
107
+ """
108
+ Search for tools matching the query.
109
+
110
+ Args:
111
+ query: Natural language description of what to do
112
+ phase: Optional phase filter (recon, enum, exploit, post)
113
+ top_k: Number of results to return
114
+
115
+ Returns:
116
+ List of tool dictionaries, sorted by relevance
117
+ """
118
+ if not self.tools:
119
+ return []
120
+
121
+ self._ensure_embedder()
122
+
123
+ # Encode query
124
+ query_embedding = self._embedder.encode(query, normalize_embeddings=True)
125
+
126
+ # Filter by phase if specified
127
+ candidates = self.tools
128
+ if phase:
129
+ candidates = [t for t in self.tools if t.get("phase") == phase or not t.get("phase")]
130
+
131
+ # Score all candidates
132
+ scored_tools: list[ToolMatch] = []
133
+
134
+ for tool in candidates:
135
+ score = self._score_tool(query, query_embedding, tool)
136
+ scored_tools.append(ToolMatch(
137
+ name=tool.get("name", "unknown"),
138
+ score=score,
139
+ tool=tool,
140
+ ))
141
+
142
+ # Sort by score (descending)
143
+ scored_tools.sort(key=lambda x: x.score, reverse=True)
144
+
145
+ # Return top_k
146
+ return [match.tool for match in scored_tools[:top_k]]
147
+
148
+ def _score_tool(
149
+ self,
150
+ query: str,
151
+ query_embedding: np.ndarray,
152
+ tool: dict,
153
+ ) -> float:
154
+ """
155
+ Score a tool against the query using the proven formula:
156
+ score = 0.5 * desc_sim + 0.5 * sample_sim + 2.0 * keyword_match
157
+ """
158
+ name = tool.get("name", "")
159
+
160
+ # Get or compute embeddings
161
+ if name in self._embeddings_cache:
162
+ cached = self._embeddings_cache[name]
163
+ desc_emb = cached["desc"]
164
+ sample_emb = cached.get("samples")
165
+ else:
166
+ desc = tool.get("description", "")
167
+ samples = " ".join(tool.get("samples", []))
168
+
169
+ desc_emb = self._embedder.encode(desc, normalize_embeddings=True)
170
+ sample_emb = self._embedder.encode(samples, normalize_embeddings=True) if samples else None
171
+
172
+ # Cache for future use
173
+ self._embeddings_cache[name] = {"desc": desc_emb, "samples": sample_emb}
174
+
175
+ # Compute similarities
176
+ desc_score = self._cosine_similarity(query_embedding, desc_emb)
177
+ sample_score = self._cosine_similarity(query_embedding, sample_emb) if sample_emb is not None else 0.0
178
+
179
+ # Keyword matching
180
+ keyword_score = self._keyword_match(query, tool)
181
+
182
+ # Combined score (the magic formula from PentestAssistant)
183
+ score = (
184
+ self.WEIGHT_DESCRIPTION * desc_score +
185
+ self.WEIGHT_SAMPLES * sample_score +
186
+ self.WEIGHT_KEYWORDS * keyword_score
187
+ )
188
+
189
+ return float(score)
190
+
191
+ def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> float:
192
+ """Compute cosine similarity between two vectors"""
193
+ if a is None or b is None:
194
+ return 0.0
195
+ return float(np.dot(a, b)) # Already normalized
196
+
197
+ def _keyword_match(self, query: str, tool: dict) -> float:
198
+ """
199
+ Compute keyword match score.
200
+ Higher weight for exact keyword matches.
201
+ """
202
+ query_lower = query.lower()
203
+ keywords = tool.get("keywords", [])
204
+
205
+ if not keywords:
206
+ return 0.0
207
+
208
+ # Count matches
209
+ matches = sum(1 for kw in keywords if kw.lower() in query_lower)
210
+
211
+ # Also check tool name
212
+ if tool.get("name", "").lower() in query_lower:
213
+ matches += 2 # Bonus for mentioning tool by name
214
+
215
+ # Normalize to 0-1
216
+ return min(matches / max(len(keywords), 1), 1.0)
217
+
218
+ def get_tool_by_name(self, name: str) -> Optional[dict]:
219
+ """Get a specific tool by name"""
220
+ for tool in self.tools:
221
+ if tool.get("name", "").lower() == name.lower():
222
+ return tool
223
+ return None
224
+
225
+ def get_tools_by_phase(self, phase: str) -> list[dict]:
226
+ """Get all tools for a specific phase"""
227
+ return [t for t in self.tools if t.get("phase") == phase]
228
+
229
+ def add_tool(self, tool: dict) -> None:
230
+ """Add a custom tool to the registry"""
231
+ self.tools.append(tool)
232
+ # Clear cache for re-computation
233
+ name = tool.get("name", "")
234
+ if name in self._embeddings_cache:
235
+ del self._embeddings_cache[name]
236
+
237
+ def list_tools(self) -> list[str]:
238
+ """List all available tool names"""
239
+ return [t.get("name", "unknown") for t in self.tools]
@@ -0,0 +1,442 @@
1
+ """
2
+ AIPT Scope Enforcement Module
3
+
4
+ Ensures all testing activities stay within authorized scope.
5
+ This is CRITICAL for legitimate penetration testing.
6
+
7
+ Features:
8
+ - Domain/IP allowlist enforcement
9
+ - Path exclusion patterns
10
+ - Rate limiting
11
+ - Out-of-scope detection and alerting
12
+ - Audit logging for compliance
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import ipaddress
17
+ import logging
18
+ import re
19
+ from dataclasses import dataclass, field
20
+ from datetime import datetime
21
+ from enum import Enum
22
+ from typing import Any
23
+ from urllib.parse import urlparse
24
+
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class ScopeDecision(Enum):
30
+ """Decision about whether a target is in scope"""
31
+ IN_SCOPE = "in_scope"
32
+ OUT_OF_SCOPE = "out_of_scope"
33
+ EXCLUDED = "excluded"
34
+ RATE_LIMITED = "rate_limited"
35
+ UNKNOWN = "unknown"
36
+
37
+
38
+ @dataclass
39
+ class ScopeViolation:
40
+ """Record of an attempted scope violation"""
41
+ timestamp: datetime
42
+ url: str
43
+ reason: str
44
+ decision: ScopeDecision
45
+ tool: str
46
+ blocked: bool
47
+
48
+ def to_dict(self) -> dict[str, Any]:
49
+ return {
50
+ "timestamp": self.timestamp.isoformat(),
51
+ "url": self.url,
52
+ "reason": self.reason,
53
+ "decision": self.decision.value,
54
+ "tool": self.tool,
55
+ "blocked": self.blocked,
56
+ }
57
+
58
+
59
+ @dataclass
60
+ class ScopeConfig:
61
+ """Configuration defining authorized scope"""
62
+ # Included targets (allowlist)
63
+ included_domains: list[str] = field(default_factory=list)
64
+ included_ips: list[str] = field(default_factory=list) # CIDR notation supported
65
+ included_urls: list[str] = field(default_factory=list) # Specific URL patterns
66
+
67
+ # Excluded targets (denylist - always blocked even if in allowlist)
68
+ excluded_domains: list[str] = field(default_factory=list)
69
+ excluded_paths: list[str] = field(default_factory=list) # Regex patterns
70
+ excluded_keywords: list[str] = field(default_factory=list) # e.g., "production", "prod"
71
+
72
+ # Rate limiting
73
+ max_requests_per_second: int = 10
74
+ max_requests_per_minute: int = 300
75
+
76
+ # Safety settings
77
+ block_out_of_scope: bool = True # If False, just log but don't block
78
+ allow_subdomains: bool = True # Allow *.example.com if example.com is in scope
79
+
80
+ # Audit settings
81
+ log_all_requests: bool = True
82
+ alert_on_violation: bool = True
83
+
84
+ # Authorization metadata
85
+ engagement_id: str = ""
86
+ client_name: str = ""
87
+ authorized_by: str = ""
88
+ start_date: str = ""
89
+ end_date: str = ""
90
+
91
+ @classmethod
92
+ def from_dict(cls, data: dict[str, Any]) -> "ScopeConfig":
93
+ """Create config from dictionary"""
94
+ return cls(
95
+ included_domains=data.get("included_domains", []),
96
+ included_ips=data.get("included_ips", []),
97
+ included_urls=data.get("included_urls", []),
98
+ excluded_domains=data.get("excluded_domains", []),
99
+ excluded_paths=data.get("excluded_paths", []),
100
+ excluded_keywords=data.get("excluded_keywords", []),
101
+ max_requests_per_second=data.get("max_requests_per_second", 10),
102
+ max_requests_per_minute=data.get("max_requests_per_minute", 300),
103
+ block_out_of_scope=data.get("block_out_of_scope", True),
104
+ allow_subdomains=data.get("allow_subdomains", True),
105
+ engagement_id=data.get("engagement_id", ""),
106
+ client_name=data.get("client_name", ""),
107
+ authorized_by=data.get("authorized_by", ""),
108
+ start_date=data.get("start_date", ""),
109
+ end_date=data.get("end_date", ""),
110
+ )
111
+
112
+ def to_dict(self) -> dict[str, Any]:
113
+ return {
114
+ "included_domains": self.included_domains,
115
+ "included_ips": self.included_ips,
116
+ "included_urls": self.included_urls,
117
+ "excluded_domains": self.excluded_domains,
118
+ "excluded_paths": self.excluded_paths,
119
+ "excluded_keywords": self.excluded_keywords,
120
+ "max_requests_per_second": self.max_requests_per_second,
121
+ "max_requests_per_minute": self.max_requests_per_minute,
122
+ "block_out_of_scope": self.block_out_of_scope,
123
+ "allow_subdomains": self.allow_subdomains,
124
+ "engagement_id": self.engagement_id,
125
+ "client_name": self.client_name,
126
+ "authorized_by": self.authorized_by,
127
+ "start_date": self.start_date,
128
+ "end_date": self.end_date,
129
+ }
130
+
131
+
132
+ class ScopeEnforcer:
133
+ """
134
+ Enforces authorized testing scope.
135
+
136
+ This class is essential for legitimate penetration testing.
137
+ It ensures all requests stay within the authorized scope
138
+ and logs all activity for compliance and audit purposes.
139
+
140
+ Example:
141
+ config = ScopeConfig(
142
+ included_domains=["example.com", "api.example.com"],
143
+ excluded_paths=["/admin/delete", "/production/*"],
144
+ client_name="ACME Corp",
145
+ engagement_id="PT-2024-001",
146
+ )
147
+ enforcer = ScopeEnforcer(config)
148
+
149
+ # Check before making requests
150
+ if enforcer.is_in_scope("https://example.com/api/users"):
151
+ # Safe to test
152
+ pass
153
+ else:
154
+ # Do not test - out of scope
155
+ pass
156
+ """
157
+
158
+ def __init__(self, config: ScopeConfig):
159
+ self.config = config
160
+ self._violations: list[ScopeViolation] = []
161
+ self._request_timestamps: list[datetime] = []
162
+
163
+ # Compile regex patterns for performance
164
+ self._excluded_path_patterns = [
165
+ re.compile(p) for p in config.excluded_paths
166
+ ]
167
+
168
+ # Parse IP networks
169
+ self._included_networks: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = []
170
+ for ip_str in config.included_ips:
171
+ try:
172
+ network = ipaddress.ip_network(ip_str, strict=False)
173
+ self._included_networks.append(network)
174
+ except ValueError as e:
175
+ logger.warning(f"Invalid IP/CIDR in scope: {ip_str}: {e}")
176
+
177
+ logger.info(
178
+ f"Scope enforcer initialized: {len(config.included_domains)} domains, "
179
+ f"{len(self._included_networks)} IP ranges"
180
+ )
181
+
182
+ def is_in_scope(
183
+ self,
184
+ url: str,
185
+ tool: str = "unknown",
186
+ ) -> bool:
187
+ """
188
+ Check if a URL is within authorized scope.
189
+
190
+ Args:
191
+ url: URL to check
192
+ tool: Name of tool making the request (for logging)
193
+
194
+ Returns:
195
+ True if in scope, False otherwise
196
+ """
197
+ decision, reason = self._check_scope(url)
198
+
199
+ # Log the check
200
+ if self.config.log_all_requests:
201
+ logger.debug(f"Scope check: {url} -> {decision.value} ({reason})")
202
+
203
+ # Record violation if out of scope
204
+ if decision in [ScopeDecision.OUT_OF_SCOPE, ScopeDecision.EXCLUDED]:
205
+ violation = ScopeViolation(
206
+ timestamp=datetime.utcnow(),
207
+ url=url,
208
+ reason=reason,
209
+ decision=decision,
210
+ tool=tool,
211
+ blocked=self.config.block_out_of_scope,
212
+ )
213
+ self._violations.append(violation)
214
+
215
+ if self.config.alert_on_violation:
216
+ logger.warning(
217
+ f"SCOPE VIOLATION: {tool} attempted to access {url} - {reason}"
218
+ )
219
+
220
+ return not self.config.block_out_of_scope
221
+
222
+ return decision == ScopeDecision.IN_SCOPE
223
+
224
+ def _check_scope(self, url: str) -> tuple[ScopeDecision, str]:
225
+ """Internal scope checking logic"""
226
+ try:
227
+ parsed = urlparse(url)
228
+ host = parsed.netloc.split(":")[0].lower() # Remove port
229
+ path = parsed.path
230
+
231
+ # Check excluded keywords first (highest priority)
232
+ url_lower = url.lower()
233
+ for keyword in self.config.excluded_keywords:
234
+ if keyword.lower() in url_lower:
235
+ return ScopeDecision.EXCLUDED, f"Contains excluded keyword: {keyword}"
236
+
237
+ # Check excluded paths
238
+ for pattern in self._excluded_path_patterns:
239
+ if pattern.search(path):
240
+ return ScopeDecision.EXCLUDED, f"Matches excluded path pattern"
241
+
242
+ # Check excluded domains
243
+ for domain in self.config.excluded_domains:
244
+ if self._domain_matches(host, domain):
245
+ return ScopeDecision.EXCLUDED, f"Domain explicitly excluded: {domain}"
246
+
247
+ # Check if IP is in scope
248
+ try:
249
+ ip = ipaddress.ip_address(host)
250
+ for network in self._included_networks:
251
+ if ip in network:
252
+ return ScopeDecision.IN_SCOPE, f"IP in authorized range: {network}"
253
+ except ValueError:
254
+ pass # Not an IP address, check domain
255
+
256
+ # Check included domains
257
+ for domain in self.config.included_domains:
258
+ if self._domain_matches(host, domain):
259
+ return ScopeDecision.IN_SCOPE, f"Domain in scope: {domain}"
260
+
261
+ # Check included URL patterns
262
+ for url_pattern in self.config.included_urls:
263
+ if url.startswith(url_pattern) or re.match(url_pattern, url):
264
+ return ScopeDecision.IN_SCOPE, f"URL matches pattern: {url_pattern}"
265
+
266
+ return ScopeDecision.OUT_OF_SCOPE, "Not in authorized scope"
267
+
268
+ except Exception as e:
269
+ logger.error(f"Error checking scope for {url}: {e}")
270
+ return ScopeDecision.UNKNOWN, f"Error: {str(e)}"
271
+
272
+ def _domain_matches(self, host: str, scope_domain: str) -> bool:
273
+ """Check if a host matches a scope domain"""
274
+ scope_domain = scope_domain.lower()
275
+
276
+ # Exact match
277
+ if host == scope_domain:
278
+ return True
279
+
280
+ # Subdomain match (if allowed)
281
+ if self.config.allow_subdomains:
282
+ if host.endswith("." + scope_domain):
283
+ return True
284
+
285
+ # Wildcard match
286
+ if scope_domain.startswith("*."):
287
+ base = scope_domain[2:]
288
+ if host == base or host.endswith("." + base):
289
+ return True
290
+
291
+ return False
292
+
293
+ def check_rate_limit(self) -> bool:
294
+ """
295
+ Check if request rate is within limits.
296
+
297
+ Returns:
298
+ True if within limits, False if rate limited
299
+ """
300
+ now = datetime.utcnow()
301
+
302
+ # Clean old timestamps
303
+ one_minute_ago = now.timestamp() - 60
304
+ self._request_timestamps = [
305
+ ts for ts in self._request_timestamps
306
+ if ts.timestamp() > one_minute_ago
307
+ ]
308
+
309
+ # Check per-minute limit
310
+ if len(self._request_timestamps) >= self.config.max_requests_per_minute:
311
+ logger.warning("Rate limit exceeded (per minute)")
312
+ return False
313
+
314
+ # Check per-second limit
315
+ one_second_ago = now.timestamp() - 1
316
+ recent = sum(1 for ts in self._request_timestamps if ts.timestamp() > one_second_ago)
317
+ if recent >= self.config.max_requests_per_second:
318
+ logger.warning("Rate limit exceeded (per second)")
319
+ return False
320
+
321
+ # Record this request
322
+ self._request_timestamps.append(now)
323
+ return True
324
+
325
+ def record_request(self, url: str, tool: str = "unknown") -> None:
326
+ """Record a request for rate limiting and audit"""
327
+ self._request_timestamps.append(datetime.utcnow())
328
+
329
+ def get_violations(self) -> list[ScopeViolation]:
330
+ """Get all recorded scope violations"""
331
+ return self._violations.copy()
332
+
333
+ def get_violation_count(self) -> int:
334
+ """Get count of scope violations"""
335
+ return len(self._violations)
336
+
337
+ def get_audit_log(self) -> dict[str, Any]:
338
+ """Get audit log for compliance reporting"""
339
+ return {
340
+ "engagement_id": self.config.engagement_id,
341
+ "client_name": self.config.client_name,
342
+ "authorized_by": self.config.authorized_by,
343
+ "start_date": self.config.start_date,
344
+ "end_date": self.config.end_date,
345
+ "scope_config": self.config.to_dict(),
346
+ "violations": [v.to_dict() for v in self._violations],
347
+ "violation_count": len(self._violations),
348
+ "generated_at": datetime.utcnow().isoformat(),
349
+ }
350
+
351
+ def validate_scope_config(self) -> list[str]:
352
+ """
353
+ Validate scope configuration for common issues.
354
+
355
+ Returns:
356
+ List of validation warnings/errors
357
+ """
358
+ issues = []
359
+
360
+ # Check for empty scope
361
+ if not self.config.included_domains and not self.config.included_ips:
362
+ issues.append("WARNING: No targets in scope - nothing will be tested")
363
+
364
+ # Check for overly broad scope
365
+ for domain in self.config.included_domains:
366
+ if domain in ["*", "*.com", "*.net", "*.org"]:
367
+ issues.append(f"DANGER: Overly broad scope: {domain}")
368
+
369
+ # Check for missing engagement metadata
370
+ if not self.config.engagement_id:
371
+ issues.append("INFO: No engagement ID set - recommended for tracking")
372
+
373
+ if not self.config.authorized_by:
374
+ issues.append("WARNING: No authorizer specified - document authorization")
375
+
376
+ # Check for common sensitive paths not excluded
377
+ sensitive_paths = ["/admin", "/backup", "/production", "/prod"]
378
+ for path in sensitive_paths:
379
+ excluded = any(
380
+ path in ep for ep in self.config.excluded_paths
381
+ )
382
+ if not excluded:
383
+ issues.append(f"INFO: Consider excluding {path} if not in scope")
384
+
385
+ return issues
386
+
387
+ def generate_scope_summary(self) -> str:
388
+ """Generate human-readable scope summary"""
389
+ lines = [
390
+ "=" * 60,
391
+ "AUTHORIZED TESTING SCOPE",
392
+ "=" * 60,
393
+ f"Engagement ID: {self.config.engagement_id or 'Not specified'}",
394
+ f"Client: {self.config.client_name or 'Not specified'}",
395
+ f"Authorized by: {self.config.authorized_by or 'Not specified'}",
396
+ f"Period: {self.config.start_date} to {self.config.end_date}",
397
+ "",
398
+ "IN-SCOPE TARGETS:",
399
+ ]
400
+
401
+ for domain in self.config.included_domains:
402
+ subdomain_note = " (including subdomains)" if self.config.allow_subdomains else ""
403
+ lines.append(f" • {domain}{subdomain_note}")
404
+
405
+ for ip in self.config.included_ips:
406
+ lines.append(f" • {ip}")
407
+
408
+ if self.config.excluded_paths:
409
+ lines.append("")
410
+ lines.append("EXCLUDED PATHS:")
411
+ for path in self.config.excluded_paths:
412
+ lines.append(f" ✗ {path}")
413
+
414
+ if self.config.excluded_keywords:
415
+ lines.append("")
416
+ lines.append("EXCLUDED KEYWORDS:")
417
+ for keyword in self.config.excluded_keywords:
418
+ lines.append(f" ✗ {keyword}")
419
+
420
+ lines.append("")
421
+ lines.append(f"Rate Limit: {self.config.max_requests_per_second}/sec, {self.config.max_requests_per_minute}/min")
422
+ lines.append(f"Block Out-of-Scope: {'Yes' if self.config.block_out_of_scope else 'No (log only)'}")
423
+ lines.append("=" * 60)
424
+
425
+ return "\n".join(lines)
426
+
427
+
428
+ def create_scope_from_target(target: str) -> ScopeConfig:
429
+ """
430
+ Create a basic scope config from a single target URL.
431
+
432
+ This is a convenience function for simple scans where
433
+ the scope is just the target domain.
434
+ """
435
+ parsed = urlparse(target)
436
+ host = parsed.netloc.split(":")[0]
437
+
438
+ return ScopeConfig(
439
+ included_domains=[host],
440
+ allow_subdomains=True,
441
+ excluded_keywords=["production", "prod", "live"],
442
+ )
@@ -0,0 +1,5 @@
1
+ """
2
+ AIPT Intelligence Searchers - Exploit search from various sources
3
+ """
4
+
5
+ __all__ = []