aiptx 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiptx might be problematic. Click here for more details.

Files changed (165) hide show
  1. aipt_v2/__init__.py +110 -0
  2. aipt_v2/__main__.py +24 -0
  3. aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
  4. aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
  5. aipt_v2/agents/__init__.py +24 -0
  6. aipt_v2/agents/base.py +520 -0
  7. aipt_v2/agents/ptt.py +406 -0
  8. aipt_v2/agents/state.py +168 -0
  9. aipt_v2/app.py +960 -0
  10. aipt_v2/browser/__init__.py +31 -0
  11. aipt_v2/browser/automation.py +458 -0
  12. aipt_v2/browser/crawler.py +453 -0
  13. aipt_v2/cli.py +321 -0
  14. aipt_v2/compliance/__init__.py +71 -0
  15. aipt_v2/compliance/compliance_report.py +449 -0
  16. aipt_v2/compliance/framework_mapper.py +424 -0
  17. aipt_v2/compliance/nist_mapping.py +345 -0
  18. aipt_v2/compliance/owasp_mapping.py +330 -0
  19. aipt_v2/compliance/pci_mapping.py +297 -0
  20. aipt_v2/config.py +288 -0
  21. aipt_v2/core/__init__.py +43 -0
  22. aipt_v2/core/agent.py +630 -0
  23. aipt_v2/core/llm.py +395 -0
  24. aipt_v2/core/memory.py +305 -0
  25. aipt_v2/core/ptt.py +329 -0
  26. aipt_v2/database/__init__.py +14 -0
  27. aipt_v2/database/models.py +232 -0
  28. aipt_v2/database/repository.py +384 -0
  29. aipt_v2/docker/__init__.py +23 -0
  30. aipt_v2/docker/builder.py +260 -0
  31. aipt_v2/docker/manager.py +222 -0
  32. aipt_v2/docker/sandbox.py +371 -0
  33. aipt_v2/evasion/__init__.py +58 -0
  34. aipt_v2/evasion/request_obfuscator.py +272 -0
  35. aipt_v2/evasion/tls_fingerprint.py +285 -0
  36. aipt_v2/evasion/ua_rotator.py +301 -0
  37. aipt_v2/evasion/waf_bypass.py +439 -0
  38. aipt_v2/execution/__init__.py +23 -0
  39. aipt_v2/execution/executor.py +302 -0
  40. aipt_v2/execution/parser.py +544 -0
  41. aipt_v2/execution/terminal.py +337 -0
  42. aipt_v2/health.py +437 -0
  43. aipt_v2/intelligence/__init__.py +85 -0
  44. aipt_v2/intelligence/auth.py +520 -0
  45. aipt_v2/intelligence/chaining.py +775 -0
  46. aipt_v2/intelligence/cve_aipt.py +334 -0
  47. aipt_v2/intelligence/cve_info.py +1111 -0
  48. aipt_v2/intelligence/rag.py +239 -0
  49. aipt_v2/intelligence/scope.py +442 -0
  50. aipt_v2/intelligence/searchers/__init__.py +5 -0
  51. aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
  52. aipt_v2/intelligence/searchers/github_searcher.py +467 -0
  53. aipt_v2/intelligence/searchers/google_searcher.py +281 -0
  54. aipt_v2/intelligence/tools.json +443 -0
  55. aipt_v2/intelligence/triage.py +670 -0
  56. aipt_v2/interface/__init__.py +5 -0
  57. aipt_v2/interface/cli.py +230 -0
  58. aipt_v2/interface/main.py +501 -0
  59. aipt_v2/interface/tui.py +1276 -0
  60. aipt_v2/interface/utils.py +583 -0
  61. aipt_v2/llm/__init__.py +39 -0
  62. aipt_v2/llm/config.py +26 -0
  63. aipt_v2/llm/llm.py +514 -0
  64. aipt_v2/llm/memory.py +214 -0
  65. aipt_v2/llm/request_queue.py +89 -0
  66. aipt_v2/llm/utils.py +89 -0
  67. aipt_v2/models/__init__.py +15 -0
  68. aipt_v2/models/findings.py +295 -0
  69. aipt_v2/models/phase_result.py +224 -0
  70. aipt_v2/models/scan_config.py +207 -0
  71. aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
  72. aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
  73. aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
  74. aipt_v2/monitoring/prometheus.yml +60 -0
  75. aipt_v2/orchestration/__init__.py +52 -0
  76. aipt_v2/orchestration/pipeline.py +398 -0
  77. aipt_v2/orchestration/progress.py +300 -0
  78. aipt_v2/orchestration/scheduler.py +296 -0
  79. aipt_v2/orchestrator.py +2284 -0
  80. aipt_v2/payloads/__init__.py +27 -0
  81. aipt_v2/payloads/cmdi.py +150 -0
  82. aipt_v2/payloads/sqli.py +263 -0
  83. aipt_v2/payloads/ssrf.py +204 -0
  84. aipt_v2/payloads/templates.py +222 -0
  85. aipt_v2/payloads/traversal.py +166 -0
  86. aipt_v2/payloads/xss.py +204 -0
  87. aipt_v2/prompts/__init__.py +60 -0
  88. aipt_v2/proxy/__init__.py +29 -0
  89. aipt_v2/proxy/history.py +352 -0
  90. aipt_v2/proxy/interceptor.py +452 -0
  91. aipt_v2/recon/__init__.py +44 -0
  92. aipt_v2/recon/dns.py +241 -0
  93. aipt_v2/recon/osint.py +367 -0
  94. aipt_v2/recon/subdomain.py +372 -0
  95. aipt_v2/recon/tech_detect.py +311 -0
  96. aipt_v2/reports/__init__.py +17 -0
  97. aipt_v2/reports/generator.py +313 -0
  98. aipt_v2/reports/html_report.py +378 -0
  99. aipt_v2/runtime/__init__.py +44 -0
  100. aipt_v2/runtime/base.py +30 -0
  101. aipt_v2/runtime/docker.py +401 -0
  102. aipt_v2/runtime/local.py +346 -0
  103. aipt_v2/runtime/tool_server.py +205 -0
  104. aipt_v2/scanners/__init__.py +28 -0
  105. aipt_v2/scanners/base.py +273 -0
  106. aipt_v2/scanners/nikto.py +244 -0
  107. aipt_v2/scanners/nmap.py +402 -0
  108. aipt_v2/scanners/nuclei.py +273 -0
  109. aipt_v2/scanners/web.py +454 -0
  110. aipt_v2/scripts/security_audit.py +366 -0
  111. aipt_v2/telemetry/__init__.py +7 -0
  112. aipt_v2/telemetry/tracer.py +347 -0
  113. aipt_v2/terminal/__init__.py +28 -0
  114. aipt_v2/terminal/executor.py +400 -0
  115. aipt_v2/terminal/sandbox.py +350 -0
  116. aipt_v2/tools/__init__.py +44 -0
  117. aipt_v2/tools/active_directory/__init__.py +78 -0
  118. aipt_v2/tools/active_directory/ad_config.py +238 -0
  119. aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
  120. aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
  121. aipt_v2/tools/active_directory/ldap_enum.py +533 -0
  122. aipt_v2/tools/active_directory/smb_attacks.py +505 -0
  123. aipt_v2/tools/agents_graph/__init__.py +19 -0
  124. aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
  125. aipt_v2/tools/api_security/__init__.py +76 -0
  126. aipt_v2/tools/api_security/api_discovery.py +608 -0
  127. aipt_v2/tools/api_security/graphql_scanner.py +622 -0
  128. aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
  129. aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
  130. aipt_v2/tools/browser/__init__.py +5 -0
  131. aipt_v2/tools/browser/browser_actions.py +238 -0
  132. aipt_v2/tools/browser/browser_instance.py +535 -0
  133. aipt_v2/tools/browser/tab_manager.py +344 -0
  134. aipt_v2/tools/cloud/__init__.py +70 -0
  135. aipt_v2/tools/cloud/cloud_config.py +273 -0
  136. aipt_v2/tools/cloud/cloud_scanner.py +639 -0
  137. aipt_v2/tools/cloud/prowler_tool.py +571 -0
  138. aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
  139. aipt_v2/tools/executor.py +307 -0
  140. aipt_v2/tools/parser.py +408 -0
  141. aipt_v2/tools/proxy/__init__.py +5 -0
  142. aipt_v2/tools/proxy/proxy_actions.py +103 -0
  143. aipt_v2/tools/proxy/proxy_manager.py +789 -0
  144. aipt_v2/tools/registry.py +196 -0
  145. aipt_v2/tools/scanners/__init__.py +343 -0
  146. aipt_v2/tools/scanners/acunetix_tool.py +712 -0
  147. aipt_v2/tools/scanners/burp_tool.py +631 -0
  148. aipt_v2/tools/scanners/config.py +156 -0
  149. aipt_v2/tools/scanners/nessus_tool.py +588 -0
  150. aipt_v2/tools/scanners/zap_tool.py +612 -0
  151. aipt_v2/tools/terminal/__init__.py +5 -0
  152. aipt_v2/tools/terminal/terminal_actions.py +37 -0
  153. aipt_v2/tools/terminal/terminal_manager.py +153 -0
  154. aipt_v2/tools/terminal/terminal_session.py +449 -0
  155. aipt_v2/tools/tool_processing.py +108 -0
  156. aipt_v2/utils/__init__.py +17 -0
  157. aipt_v2/utils/logging.py +201 -0
  158. aipt_v2/utils/model_manager.py +187 -0
  159. aipt_v2/utils/searchers/__init__.py +269 -0
  160. aiptx-2.0.2.dist-info/METADATA +324 -0
  161. aiptx-2.0.2.dist-info/RECORD +165 -0
  162. aiptx-2.0.2.dist-info/WHEEL +5 -0
  163. aiptx-2.0.2.dist-info/entry_points.txt +7 -0
  164. aiptx-2.0.2.dist-info/licenses/LICENSE +21 -0
  165. aiptx-2.0.2.dist-info/top_level.txt +1 -0
aipt_v2/llm/memory.py ADDED
@@ -0,0 +1,214 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from typing import Any
6
+
7
+ import litellm
8
+
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ MAX_TOTAL_TOKENS = 100_000
14
+ MIN_RECENT_MESSAGES = 15
15
+
16
+ SUMMARY_PROMPT_TEMPLATE = """You are an agent performing context
17
+ condensation for a security agent. Your job is to compress scan data while preserving
18
+ ALL operationally critical information for continuing the security assessment.
19
+
20
+ CRITICAL ELEMENTS TO PRESERVE:
21
+ - Discovered vulnerabilities and potential attack vectors
22
+ - Scan results and tool outputs (compressed but maintaining key findings)
23
+ - Access credentials, tokens, or authentication details found
24
+ - System architecture insights and potential weak points
25
+ - Progress made in the assessment
26
+ - Failed attempts and dead ends (to avoid duplication)
27
+ - Any decisions made about the testing approach
28
+
29
+ COMPRESSION GUIDELINES:
30
+ - Preserve exact technical details (URLs, paths, parameters, payloads)
31
+ - Summarize verbose tool outputs while keeping critical findings
32
+ - Maintain version numbers, specific technologies identified
33
+ - Keep exact error messages that might indicate vulnerabilities
34
+ - Compress repetitive or similar findings into consolidated form
35
+
36
+ Remember: Another security agent will use this summary to continue the assessment.
37
+ They must be able to pick up exactly where you left off without losing any
38
+ operational advantage or context needed to find vulnerabilities.
39
+
40
+ CONVERSATION SEGMENT TO SUMMARIZE:
41
+ {conversation}
42
+
43
+ Provide a technically precise summary that preserves all operational security context while
44
+ keeping the summary concise and to the point."""
45
+
46
+
47
+ def _count_tokens(text: str, model: str) -> int:
48
+ try:
49
+ count = litellm.token_counter(model=model, text=text)
50
+ return int(count)
51
+ except Exception:
52
+ logger.exception("Failed to count tokens")
53
+ return len(text) // 4 # Rough estimate
54
+
55
+
56
+ def _get_message_tokens(msg: dict[str, Any], model: str) -> int:
57
+ content = msg.get("content", "")
58
+ if isinstance(content, str):
59
+ return _count_tokens(content, model)
60
+ if isinstance(content, list):
61
+ return sum(
62
+ _count_tokens(item.get("text", ""), model)
63
+ for item in content
64
+ if isinstance(item, dict) and item.get("type") == "text"
65
+ )
66
+ return 0
67
+
68
+
69
+ def _extract_message_text(msg: dict[str, Any]) -> str:
70
+ content = msg.get("content", "")
71
+ if isinstance(content, str):
72
+ return content
73
+
74
+ if isinstance(content, list):
75
+ parts = []
76
+ for item in content:
77
+ if isinstance(item, dict):
78
+ if item.get("type") == "text":
79
+ parts.append(item.get("text", ""))
80
+ elif item.get("type") == "image_url":
81
+ parts.append("[IMAGE]")
82
+ return " ".join(parts)
83
+
84
+ return str(content)
85
+
86
+
87
+ def _summarize_messages(
88
+ messages: list[dict[str, Any]],
89
+ model: str,
90
+ timeout: int = 600,
91
+ ) -> dict[str, Any]:
92
+ if not messages:
93
+ empty_summary = "<context_summary message_count='0'>{text}</context_summary>"
94
+ return {
95
+ "role": "assistant",
96
+ "content": empty_summary.format(text="No messages to summarize"),
97
+ }
98
+
99
+ formatted = []
100
+ for msg in messages:
101
+ role = msg.get("role", "unknown")
102
+ text = _extract_message_text(msg)
103
+ formatted.append(f"{role}: {text}")
104
+
105
+ conversation = "\n".join(formatted)
106
+ prompt = SUMMARY_PROMPT_TEMPLATE.format(conversation=conversation)
107
+
108
+ try:
109
+ completion_args = {
110
+ "model": model,
111
+ "messages": [{"role": "user", "content": prompt}],
112
+ "timeout": timeout,
113
+ }
114
+
115
+ response = litellm.completion(**completion_args)
116
+ summary = response.choices[0].message.content or ""
117
+ if not summary.strip():
118
+ return messages[0]
119
+ summary_msg = "<context_summary message_count='{count}'>{text}</context_summary>"
120
+ return {
121
+ "role": "assistant",
122
+ "content": summary_msg.format(count=len(messages), text=summary),
123
+ }
124
+ except Exception:
125
+ logger.exception("Failed to summarize messages")
126
+ return messages[0]
127
+
128
+
129
+ def _handle_images(messages: list[dict[str, Any]], max_images: int) -> None:
130
+ image_count = 0
131
+ for msg in reversed(messages):
132
+ content = msg.get("content", [])
133
+ if isinstance(content, list):
134
+ for item in content:
135
+ if isinstance(item, dict) and item.get("type") == "image_url":
136
+ if image_count >= max_images:
137
+ item.update(
138
+ {
139
+ "type": "text",
140
+ "text": "[Previously attached image removed to preserve context]",
141
+ }
142
+ )
143
+ else:
144
+ image_count += 1
145
+
146
+
147
+ class MemoryCompressor:
148
+ def __init__(
149
+ self,
150
+ max_images: int = 3,
151
+ model_name: str | None = None,
152
+ timeout: int = 600,
153
+ ):
154
+ self.max_images = max_images
155
+ self.model_name = model_name or os.getenv("AIPT_LLM", "openai/gpt-5")
156
+ self.timeout = timeout
157
+
158
+ if not self.model_name:
159
+ raise ValueError("AIPT_LLM environment variable must be set and not empty")
160
+
161
+ def compress_history(
162
+ self,
163
+ messages: list[dict[str, Any]],
164
+ ) -> list[dict[str, Any]]:
165
+ """Compress conversation history to stay within token limits.
166
+
167
+ Strategy:
168
+ 1. Handle image limits first
169
+ 2. Keep all system messages
170
+ 3. Keep minimum recent messages
171
+ 4. Summarize older messages when total tokens exceed limit
172
+
173
+ The compression preserves:
174
+ - All system messages unchanged
175
+ - Most recent messages intact
176
+ - Critical security context in summaries
177
+ - Recent images for visual context
178
+ - Technical details and findings
179
+ """
180
+ if not messages:
181
+ return messages
182
+
183
+ _handle_images(messages, self.max_images)
184
+
185
+ system_msgs = []
186
+ regular_msgs = []
187
+ for msg in messages:
188
+ if msg.get("role") == "system":
189
+ system_msgs.append(msg)
190
+ else:
191
+ regular_msgs.append(msg)
192
+
193
+ recent_msgs = regular_msgs[-MIN_RECENT_MESSAGES:]
194
+ old_msgs = regular_msgs[:-MIN_RECENT_MESSAGES]
195
+
196
+ # Type assertion since we ensure model_name is not None in __init__
197
+ model_name: str = self.model_name # type: ignore[assignment]
198
+
199
+ total_tokens = sum(
200
+ _get_message_tokens(msg, model_name) for msg in system_msgs + regular_msgs
201
+ )
202
+
203
+ if total_tokens <= MAX_TOTAL_TOKENS * 0.9:
204
+ return messages
205
+
206
+ compressed = []
207
+ chunk_size = 10
208
+ for i in range(0, len(old_msgs), chunk_size):
209
+ chunk = old_msgs[i : i + chunk_size]
210
+ summary = _summarize_messages(chunk, model_name, self.timeout)
211
+ if summary:
212
+ compressed.append(summary)
213
+
214
+ return system_msgs + compressed + recent_msgs
@@ -0,0 +1,89 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+ import threading
7
+ import time
8
+ from typing import Any
9
+
10
+ import litellm
11
+ from litellm import ModelResponse, completion
12
+ from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def should_retry_exception(exception: Exception) -> bool:
19
+ status_code = None
20
+
21
+ if hasattr(exception, "status_code"):
22
+ status_code = exception.status_code
23
+ elif hasattr(exception, "response") and hasattr(exception.response, "status_code"):
24
+ status_code = exception.response.status_code
25
+
26
+ if status_code is not None:
27
+ return bool(litellm._should_retry(status_code))
28
+ return True
29
+
30
+
31
+ class LLMRequestQueue:
32
+ def __init__(self, max_concurrent: int = 1, delay_between_requests: float = 4.0):
33
+ rate_limit_delay = os.getenv("LLM_RATE_LIMIT_DELAY")
34
+ if rate_limit_delay:
35
+ delay_between_requests = float(rate_limit_delay)
36
+
37
+ rate_limit_concurrent = os.getenv("LLM_RATE_LIMIT_CONCURRENT")
38
+ if rate_limit_concurrent:
39
+ max_concurrent = int(rate_limit_concurrent)
40
+
41
+ self.max_concurrent = max_concurrent
42
+ self.delay_between_requests = delay_between_requests
43
+ self._semaphore = threading.BoundedSemaphore(max_concurrent)
44
+ self._last_request_time = 0.0
45
+ self._lock = threading.Lock()
46
+
47
+ async def make_request(self, completion_args: dict[str, Any]) -> ModelResponse:
48
+ try:
49
+ while not self._semaphore.acquire(timeout=0.2):
50
+ await asyncio.sleep(0.1)
51
+
52
+ with self._lock:
53
+ now = time.time()
54
+ time_since_last = now - self._last_request_time
55
+ sleep_needed = max(0, self.delay_between_requests - time_since_last)
56
+ self._last_request_time = now + sleep_needed
57
+
58
+ if sleep_needed > 0:
59
+ await asyncio.sleep(sleep_needed)
60
+
61
+ return await self._reliable_request(completion_args)
62
+ finally:
63
+ self._semaphore.release()
64
+
65
+ @retry( # type: ignore[misc]
66
+ stop=stop_after_attempt(3),
67
+ wait=wait_exponential(multiplier=8, min=8, max=64),
68
+ retry=retry_if_exception(should_retry_exception),
69
+ reraise=True,
70
+ )
71
+ async def _reliable_request(self, completion_args: dict[str, Any]) -> ModelResponse:
72
+ response = completion(**completion_args, stream=False)
73
+ if isinstance(response, ModelResponse):
74
+ return response
75
+ self._raise_unexpected_response()
76
+ raise RuntimeError("Unreachable code")
77
+
78
+ def _raise_unexpected_response(self) -> None:
79
+ raise RuntimeError("Unexpected response type")
80
+
81
+
82
+ _global_queue: LLMRequestQueue | None = None
83
+
84
+
85
+ def get_global_queue() -> LLMRequestQueue:
86
+ global _global_queue # noqa: PLW0603
87
+ if _global_queue is None:
88
+ _global_queue = LLMRequestQueue()
89
+ return _global_queue
aipt_v2/llm/utils.py ADDED
@@ -0,0 +1,89 @@
1
+ from __future__ import annotations
2
+
3
+ import html
4
+ import re
5
+ from typing import Any
6
+
7
+
8
+ def _truncate_to_first_function(content: str) -> str:
9
+ if not content:
10
+ return content
11
+
12
+ function_starts = [match.start() for match in re.finditer(r"<function=", content)]
13
+
14
+ if len(function_starts) >= 2:
15
+ second_function_start = function_starts[1]
16
+
17
+ return content[:second_function_start].rstrip()
18
+
19
+ return content
20
+
21
+
22
+ def parse_tool_invocations(content: str) -> list[dict[str, Any]] | None:
23
+ content = _fix_stopword(content)
24
+
25
+ tool_invocations: list[dict[str, Any]] = []
26
+
27
+ fn_regex_pattern = r"<function=([^>]+)>\n?(.*?)</function>"
28
+ fn_param_regex_pattern = r"<parameter=([^>]+)>(.*?)</parameter>"
29
+
30
+ fn_matches = re.finditer(fn_regex_pattern, content, re.DOTALL)
31
+
32
+ for fn_match in fn_matches:
33
+ fn_name = fn_match.group(1)
34
+ fn_body = fn_match.group(2)
35
+
36
+ param_matches = re.finditer(fn_param_regex_pattern, fn_body, re.DOTALL)
37
+
38
+ args = {}
39
+ for param_match in param_matches:
40
+ param_name = param_match.group(1)
41
+ param_value = param_match.group(2).strip()
42
+
43
+ param_value = html.unescape(param_value)
44
+ args[param_name] = param_value
45
+
46
+ tool_invocations.append({"toolName": fn_name, "args": args})
47
+
48
+ return tool_invocations if tool_invocations else None
49
+
50
+
51
+ def _fix_stopword(content: str) -> str:
52
+ if "<function=" in content and content.count("<function=") == 1:
53
+ if content.endswith("</"):
54
+ content = content.rstrip() + "function>"
55
+ elif not content.rstrip().endswith("</function>"):
56
+ content = content + "\n</function>"
57
+ return content
58
+
59
+
60
+ def format_tool_call(tool_name: str, args: dict[str, Any]) -> str:
61
+ xml_parts = [f"<function={tool_name}>"]
62
+
63
+ for key, value in args.items():
64
+ xml_parts.append(f"<parameter={key}>{value}</parameter>")
65
+
66
+ xml_parts.append("</function>")
67
+
68
+ return "\n".join(xml_parts)
69
+
70
+
71
+ def clean_content(content: str) -> str:
72
+ if not content:
73
+ return ""
74
+
75
+ content = _fix_stopword(content)
76
+
77
+ tool_pattern = r"<function=[^>]+>.*?</function>"
78
+ cleaned = re.sub(tool_pattern, "", content, flags=re.DOTALL)
79
+
80
+ hidden_xml_patterns = [
81
+ r"<inter_agent_message>.*?</inter_agent_message>",
82
+ r"<agent_completion_report>.*?</agent_completion_report>",
83
+ ]
84
+ for pattern in hidden_xml_patterns:
85
+ cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL | re.IGNORECASE)
86
+
87
+ cleaned = re.sub(r"\n\s*\n", "\n\n", cleaned)
88
+
89
+ return cleaned.strip()
@@ -0,0 +1,15 @@
1
+ """AIPT Data Models"""
2
+
3
+ from .findings import Finding, Severity, VulnerabilityType
4
+ from .scan_config import ScanConfig, ScanMode
5
+ from .phase_result import PhaseResult, Phase
6
+
7
+ __all__ = [
8
+ "Finding",
9
+ "Severity",
10
+ "VulnerabilityType",
11
+ "ScanConfig",
12
+ "ScanMode",
13
+ "PhaseResult",
14
+ "Phase",
15
+ ]
@@ -0,0 +1,295 @@
1
+ """
2
+ AIPT Finding Model - Unified vulnerability representation
3
+
4
+ This model represents vulnerabilities discovered by ANY tool in the pipeline:
5
+ - Traditional scanners (Acunetix, Burp, Nuclei, ZAP)
6
+ - AI-autonomous agents (Strix)
7
+ - Manual exploitation attempts
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+ from enum import Enum
14
+ from typing import Any
15
+ import hashlib
16
+ import json
17
+
18
+
19
+ class Severity(Enum):
20
+ """CVSS-aligned severity levels"""
21
+ CRITICAL = "critical" # CVSS 9.0-10.0
22
+ HIGH = "high" # CVSS 7.0-8.9
23
+ MEDIUM = "medium" # CVSS 4.0-6.9
24
+ LOW = "low" # CVSS 0.1-3.9
25
+ INFO = "info" # CVSS 0.0 / Informational
26
+
27
+ @classmethod
28
+ def from_cvss(cls, score: float) -> "Severity":
29
+ """Convert CVSS score to severity level"""
30
+ if score >= 9.0:
31
+ return cls.CRITICAL
32
+ elif score >= 7.0:
33
+ return cls.HIGH
34
+ elif score >= 4.0:
35
+ return cls.MEDIUM
36
+ elif score > 0:
37
+ return cls.LOW
38
+ return cls.INFO
39
+
40
+ def __lt__(self, other: "Severity") -> bool:
41
+ order = [self.INFO, self.LOW, self.MEDIUM, self.HIGH, self.CRITICAL]
42
+ return order.index(self) < order.index(other)
43
+
44
+
45
+ class VulnerabilityType(Enum):
46
+ """OWASP Top 10 aligned vulnerability categories"""
47
+ # A01:2021 - Broken Access Control
48
+ IDOR = "idor"
49
+ BROKEN_ACCESS_CONTROL = "broken_access_control"
50
+ PRIVILEGE_ESCALATION = "privilege_escalation"
51
+
52
+ # A02:2021 - Cryptographic Failures
53
+ WEAK_CRYPTO = "weak_crypto"
54
+ SENSITIVE_DATA_EXPOSURE = "sensitive_data_exposure"
55
+
56
+ # A03:2021 - Injection
57
+ SQL_INJECTION = "sql_injection"
58
+ COMMAND_INJECTION = "command_injection"
59
+ LDAP_INJECTION = "ldap_injection"
60
+ XPATH_INJECTION = "xpath_injection"
61
+ NOSQL_INJECTION = "nosql_injection"
62
+
63
+ # A04:2021 - Insecure Design
64
+ BUSINESS_LOGIC_FLAW = "business_logic_flaw"
65
+
66
+ # A05:2021 - Security Misconfiguration
67
+ MISCONFIGURATION = "misconfiguration"
68
+ DEFAULT_CREDENTIALS = "default_credentials"
69
+ DIRECTORY_LISTING = "directory_listing"
70
+
71
+ # A06:2021 - Vulnerable Components
72
+ OUTDATED_COMPONENT = "outdated_component"
73
+ KNOWN_CVE = "known_cve"
74
+
75
+ # A07:2021 - Authentication Failures
76
+ AUTH_BYPASS = "auth_bypass"
77
+ WEAK_PASSWORD = "weak_password"
78
+ SESSION_FIXATION = "session_fixation"
79
+
80
+ # A08:2021 - Software Integrity Failures
81
+ INSECURE_DESERIALIZATION = "insecure_deserialization"
82
+
83
+ # A09:2021 - Logging & Monitoring Failures
84
+ INSUFFICIENT_LOGGING = "insufficient_logging"
85
+
86
+ # A10:2021 - SSRF
87
+ SSRF = "ssrf"
88
+
89
+ # Cross-Site Scripting (separate category)
90
+ XSS_REFLECTED = "xss_reflected"
91
+ XSS_STORED = "xss_stored"
92
+ XSS_DOM = "xss_dom"
93
+
94
+ # Other
95
+ OPEN_REDIRECT = "open_redirect"
96
+ FILE_INCLUSION = "file_inclusion"
97
+ FILE_UPLOAD = "file_upload"
98
+ XXE = "xxe"
99
+ CORS_MISCONFIGURATION = "cors_misconfiguration"
100
+ CSRF = "csrf"
101
+ INFORMATION_DISCLOSURE = "information_disclosure"
102
+ RCE = "rce"
103
+
104
+ # Catch-all
105
+ OTHER = "other"
106
+
107
+
108
+ @dataclass
109
+ class Finding:
110
+ """
111
+ Unified vulnerability finding from any source
112
+
113
+ This is the core data structure that normalizes findings from:
114
+ - Acunetix (JSON API responses)
115
+ - Burp Suite (XML/JSON exports)
116
+ - Nuclei (JSON output)
117
+ - ZAP (JSON API responses)
118
+ - Strix (AI agent reports)
119
+ """
120
+
121
+ # Core identification
122
+ title: str
123
+ severity: Severity
124
+ vuln_type: VulnerabilityType
125
+
126
+ # Location
127
+ url: str
128
+ parameter: str | None = None
129
+ method: str = "GET"
130
+
131
+ # Evidence
132
+ description: str = ""
133
+ evidence: str = ""
134
+ request: str | None = None
135
+ response: str | None = None
136
+
137
+ # Source tracking
138
+ source: str = "unknown" # acunetix, burp, nuclei, zap, aipt, manual
139
+ source_id: str | None = None # Original ID from source scanner
140
+
141
+ # Validation
142
+ confirmed: bool = False
143
+ exploited: bool = False
144
+ poc_command: str | None = None
145
+
146
+ # Metadata
147
+ cvss_score: float | None = None
148
+ cwe_id: str | None = None
149
+ cve_ids: list[str] = field(default_factory=list)
150
+ references: list[str] = field(default_factory=list)
151
+
152
+ # Remediation
153
+ remediation: str = ""
154
+
155
+ # Timestamps
156
+ discovered_at: datetime = field(default_factory=datetime.utcnow)
157
+
158
+ # AI-specific fields (for Strix findings)
159
+ ai_reasoning: str | None = None
160
+ ai_confidence: float | None = None # 0.0 to 1.0
161
+
162
+ def __post_init__(self):
163
+ """Generate unique fingerprint for deduplication"""
164
+ self._fingerprint = self._generate_fingerprint()
165
+
166
+ def _generate_fingerprint(self) -> str:
167
+ """
168
+ Generate a unique fingerprint for finding deduplication.
169
+
170
+ Two findings are considered duplicates if they have the same:
171
+ - URL (normalized)
172
+ - Parameter
173
+ - Vulnerability type
174
+ """
175
+ normalized_url = self.url.rstrip("/").lower()
176
+ data = f"{normalized_url}:{self.parameter}:{self.vuln_type.value}"
177
+ return hashlib.sha256(data.encode()).hexdigest()[:16]
178
+
179
+ @property
180
+ def fingerprint(self) -> str:
181
+ return self._fingerprint
182
+
183
+ def is_duplicate_of(self, other: "Finding") -> bool:
184
+ """Check if this finding is a duplicate of another"""
185
+ return self.fingerprint == other.fingerprint
186
+
187
+ def merge_with(self, other: "Finding") -> "Finding":
188
+ """
189
+ Merge two duplicate findings, keeping the best evidence from both.
190
+ Prefers confirmed/exploited findings, higher confidence, more details.
191
+ """
192
+ # Prefer the confirmed/exploited finding
193
+ if other.confirmed and not self.confirmed:
194
+ base, supplement = other, self
195
+ elif other.exploited and not self.exploited:
196
+ base, supplement = other, self
197
+ else:
198
+ base, supplement = self, other
199
+
200
+ # Merge evidence
201
+ merged_evidence = base.evidence
202
+ if supplement.evidence and supplement.evidence not in merged_evidence:
203
+ merged_evidence = f"{merged_evidence}\n\n--- Additional Evidence ---\n{supplement.evidence}"
204
+
205
+ # Merge sources
206
+ sources = set([base.source, supplement.source])
207
+ merged_source = ", ".join(sorted(sources))
208
+
209
+ # Take highest confidence
210
+ confidence = max(
211
+ base.ai_confidence or 0,
212
+ supplement.ai_confidence or 0
213
+ ) or None
214
+
215
+ return Finding(
216
+ title=base.title,
217
+ severity=max(base.severity, other.severity), # Take highest severity
218
+ vuln_type=base.vuln_type,
219
+ url=base.url,
220
+ parameter=base.parameter,
221
+ method=base.method,
222
+ description=base.description or supplement.description,
223
+ evidence=merged_evidence,
224
+ request=base.request or supplement.request,
225
+ response=base.response or supplement.response,
226
+ source=merged_source,
227
+ confirmed=base.confirmed or supplement.confirmed,
228
+ exploited=base.exploited or supplement.exploited,
229
+ poc_command=base.poc_command or supplement.poc_command,
230
+ cvss_score=base.cvss_score or supplement.cvss_score,
231
+ cwe_id=base.cwe_id or supplement.cwe_id,
232
+ cve_ids=list(set(base.cve_ids + supplement.cve_ids)),
233
+ references=list(set(base.references + supplement.references)),
234
+ remediation=base.remediation or supplement.remediation,
235
+ ai_reasoning=base.ai_reasoning or supplement.ai_reasoning,
236
+ ai_confidence=confidence,
237
+ )
238
+
239
+ def to_dict(self) -> dict[str, Any]:
240
+ """Convert to dictionary for JSON serialization"""
241
+ return {
242
+ "fingerprint": self.fingerprint,
243
+ "title": self.title,
244
+ "severity": self.severity.value,
245
+ "vuln_type": self.vuln_type.value,
246
+ "url": self.url,
247
+ "parameter": self.parameter,
248
+ "method": self.method,
249
+ "description": self.description,
250
+ "evidence": self.evidence,
251
+ "request": self.request,
252
+ "response": self.response,
253
+ "source": self.source,
254
+ "source_id": self.source_id,
255
+ "confirmed": self.confirmed,
256
+ "exploited": self.exploited,
257
+ "poc_command": self.poc_command,
258
+ "cvss_score": self.cvss_score,
259
+ "cwe_id": self.cwe_id,
260
+ "cve_ids": self.cve_ids,
261
+ "references": self.references,
262
+ "remediation": self.remediation,
263
+ "discovered_at": self.discovered_at.isoformat(),
264
+ "ai_reasoning": self.ai_reasoning,
265
+ "ai_confidence": self.ai_confidence,
266
+ }
267
+
268
+ @classmethod
269
+ def from_dict(cls, data: dict[str, Any]) -> "Finding":
270
+ """Create Finding from dictionary"""
271
+ return cls(
272
+ title=data["title"],
273
+ severity=Severity(data["severity"]),
274
+ vuln_type=VulnerabilityType(data.get("vuln_type", "other")),
275
+ url=data["url"],
276
+ parameter=data.get("parameter"),
277
+ method=data.get("method", "GET"),
278
+ description=data.get("description", ""),
279
+ evidence=data.get("evidence", ""),
280
+ request=data.get("request"),
281
+ response=data.get("response"),
282
+ source=data.get("source", "unknown"),
283
+ source_id=data.get("source_id"),
284
+ confirmed=data.get("confirmed", False),
285
+ exploited=data.get("exploited", False),
286
+ poc_command=data.get("poc_command"),
287
+ cvss_score=data.get("cvss_score"),
288
+ cwe_id=data.get("cwe_id"),
289
+ cve_ids=data.get("cve_ids", []),
290
+ references=data.get("references", []),
291
+ remediation=data.get("remediation", ""),
292
+ discovered_at=datetime.fromisoformat(data["discovered_at"]) if "discovered_at" in data else datetime.utcnow(),
293
+ ai_reasoning=data.get("ai_reasoning"),
294
+ ai_confidence=data.get("ai_confidence"),
295
+ )