aiptx 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiptx might be problematic. Click here for more details.

Files changed (165) hide show
  1. aipt_v2/__init__.py +110 -0
  2. aipt_v2/__main__.py +24 -0
  3. aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
  4. aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
  5. aipt_v2/agents/__init__.py +24 -0
  6. aipt_v2/agents/base.py +520 -0
  7. aipt_v2/agents/ptt.py +406 -0
  8. aipt_v2/agents/state.py +168 -0
  9. aipt_v2/app.py +960 -0
  10. aipt_v2/browser/__init__.py +31 -0
  11. aipt_v2/browser/automation.py +458 -0
  12. aipt_v2/browser/crawler.py +453 -0
  13. aipt_v2/cli.py +321 -0
  14. aipt_v2/compliance/__init__.py +71 -0
  15. aipt_v2/compliance/compliance_report.py +449 -0
  16. aipt_v2/compliance/framework_mapper.py +424 -0
  17. aipt_v2/compliance/nist_mapping.py +345 -0
  18. aipt_v2/compliance/owasp_mapping.py +330 -0
  19. aipt_v2/compliance/pci_mapping.py +297 -0
  20. aipt_v2/config.py +288 -0
  21. aipt_v2/core/__init__.py +43 -0
  22. aipt_v2/core/agent.py +630 -0
  23. aipt_v2/core/llm.py +395 -0
  24. aipt_v2/core/memory.py +305 -0
  25. aipt_v2/core/ptt.py +329 -0
  26. aipt_v2/database/__init__.py +14 -0
  27. aipt_v2/database/models.py +232 -0
  28. aipt_v2/database/repository.py +384 -0
  29. aipt_v2/docker/__init__.py +23 -0
  30. aipt_v2/docker/builder.py +260 -0
  31. aipt_v2/docker/manager.py +222 -0
  32. aipt_v2/docker/sandbox.py +371 -0
  33. aipt_v2/evasion/__init__.py +58 -0
  34. aipt_v2/evasion/request_obfuscator.py +272 -0
  35. aipt_v2/evasion/tls_fingerprint.py +285 -0
  36. aipt_v2/evasion/ua_rotator.py +301 -0
  37. aipt_v2/evasion/waf_bypass.py +439 -0
  38. aipt_v2/execution/__init__.py +23 -0
  39. aipt_v2/execution/executor.py +302 -0
  40. aipt_v2/execution/parser.py +544 -0
  41. aipt_v2/execution/terminal.py +337 -0
  42. aipt_v2/health.py +437 -0
  43. aipt_v2/intelligence/__init__.py +85 -0
  44. aipt_v2/intelligence/auth.py +520 -0
  45. aipt_v2/intelligence/chaining.py +775 -0
  46. aipt_v2/intelligence/cve_aipt.py +334 -0
  47. aipt_v2/intelligence/cve_info.py +1111 -0
  48. aipt_v2/intelligence/rag.py +239 -0
  49. aipt_v2/intelligence/scope.py +442 -0
  50. aipt_v2/intelligence/searchers/__init__.py +5 -0
  51. aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
  52. aipt_v2/intelligence/searchers/github_searcher.py +467 -0
  53. aipt_v2/intelligence/searchers/google_searcher.py +281 -0
  54. aipt_v2/intelligence/tools.json +443 -0
  55. aipt_v2/intelligence/triage.py +670 -0
  56. aipt_v2/interface/__init__.py +5 -0
  57. aipt_v2/interface/cli.py +230 -0
  58. aipt_v2/interface/main.py +501 -0
  59. aipt_v2/interface/tui.py +1276 -0
  60. aipt_v2/interface/utils.py +583 -0
  61. aipt_v2/llm/__init__.py +39 -0
  62. aipt_v2/llm/config.py +26 -0
  63. aipt_v2/llm/llm.py +514 -0
  64. aipt_v2/llm/memory.py +214 -0
  65. aipt_v2/llm/request_queue.py +89 -0
  66. aipt_v2/llm/utils.py +89 -0
  67. aipt_v2/models/__init__.py +15 -0
  68. aipt_v2/models/findings.py +295 -0
  69. aipt_v2/models/phase_result.py +224 -0
  70. aipt_v2/models/scan_config.py +207 -0
  71. aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
  72. aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
  73. aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
  74. aipt_v2/monitoring/prometheus.yml +60 -0
  75. aipt_v2/orchestration/__init__.py +52 -0
  76. aipt_v2/orchestration/pipeline.py +398 -0
  77. aipt_v2/orchestration/progress.py +300 -0
  78. aipt_v2/orchestration/scheduler.py +296 -0
  79. aipt_v2/orchestrator.py +2284 -0
  80. aipt_v2/payloads/__init__.py +27 -0
  81. aipt_v2/payloads/cmdi.py +150 -0
  82. aipt_v2/payloads/sqli.py +263 -0
  83. aipt_v2/payloads/ssrf.py +204 -0
  84. aipt_v2/payloads/templates.py +222 -0
  85. aipt_v2/payloads/traversal.py +166 -0
  86. aipt_v2/payloads/xss.py +204 -0
  87. aipt_v2/prompts/__init__.py +60 -0
  88. aipt_v2/proxy/__init__.py +29 -0
  89. aipt_v2/proxy/history.py +352 -0
  90. aipt_v2/proxy/interceptor.py +452 -0
  91. aipt_v2/recon/__init__.py +44 -0
  92. aipt_v2/recon/dns.py +241 -0
  93. aipt_v2/recon/osint.py +367 -0
  94. aipt_v2/recon/subdomain.py +372 -0
  95. aipt_v2/recon/tech_detect.py +311 -0
  96. aipt_v2/reports/__init__.py +17 -0
  97. aipt_v2/reports/generator.py +313 -0
  98. aipt_v2/reports/html_report.py +378 -0
  99. aipt_v2/runtime/__init__.py +44 -0
  100. aipt_v2/runtime/base.py +30 -0
  101. aipt_v2/runtime/docker.py +401 -0
  102. aipt_v2/runtime/local.py +346 -0
  103. aipt_v2/runtime/tool_server.py +205 -0
  104. aipt_v2/scanners/__init__.py +28 -0
  105. aipt_v2/scanners/base.py +273 -0
  106. aipt_v2/scanners/nikto.py +244 -0
  107. aipt_v2/scanners/nmap.py +402 -0
  108. aipt_v2/scanners/nuclei.py +273 -0
  109. aipt_v2/scanners/web.py +454 -0
  110. aipt_v2/scripts/security_audit.py +366 -0
  111. aipt_v2/telemetry/__init__.py +7 -0
  112. aipt_v2/telemetry/tracer.py +347 -0
  113. aipt_v2/terminal/__init__.py +28 -0
  114. aipt_v2/terminal/executor.py +400 -0
  115. aipt_v2/terminal/sandbox.py +350 -0
  116. aipt_v2/tools/__init__.py +44 -0
  117. aipt_v2/tools/active_directory/__init__.py +78 -0
  118. aipt_v2/tools/active_directory/ad_config.py +238 -0
  119. aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
  120. aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
  121. aipt_v2/tools/active_directory/ldap_enum.py +533 -0
  122. aipt_v2/tools/active_directory/smb_attacks.py +505 -0
  123. aipt_v2/tools/agents_graph/__init__.py +19 -0
  124. aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
  125. aipt_v2/tools/api_security/__init__.py +76 -0
  126. aipt_v2/tools/api_security/api_discovery.py +608 -0
  127. aipt_v2/tools/api_security/graphql_scanner.py +622 -0
  128. aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
  129. aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
  130. aipt_v2/tools/browser/__init__.py +5 -0
  131. aipt_v2/tools/browser/browser_actions.py +238 -0
  132. aipt_v2/tools/browser/browser_instance.py +535 -0
  133. aipt_v2/tools/browser/tab_manager.py +344 -0
  134. aipt_v2/tools/cloud/__init__.py +70 -0
  135. aipt_v2/tools/cloud/cloud_config.py +273 -0
  136. aipt_v2/tools/cloud/cloud_scanner.py +639 -0
  137. aipt_v2/tools/cloud/prowler_tool.py +571 -0
  138. aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
  139. aipt_v2/tools/executor.py +307 -0
  140. aipt_v2/tools/parser.py +408 -0
  141. aipt_v2/tools/proxy/__init__.py +5 -0
  142. aipt_v2/tools/proxy/proxy_actions.py +103 -0
  143. aipt_v2/tools/proxy/proxy_manager.py +789 -0
  144. aipt_v2/tools/registry.py +196 -0
  145. aipt_v2/tools/scanners/__init__.py +343 -0
  146. aipt_v2/tools/scanners/acunetix_tool.py +712 -0
  147. aipt_v2/tools/scanners/burp_tool.py +631 -0
  148. aipt_v2/tools/scanners/config.py +156 -0
  149. aipt_v2/tools/scanners/nessus_tool.py +588 -0
  150. aipt_v2/tools/scanners/zap_tool.py +612 -0
  151. aipt_v2/tools/terminal/__init__.py +5 -0
  152. aipt_v2/tools/terminal/terminal_actions.py +37 -0
  153. aipt_v2/tools/terminal/terminal_manager.py +153 -0
  154. aipt_v2/tools/terminal/terminal_session.py +449 -0
  155. aipt_v2/tools/tool_processing.py +108 -0
  156. aipt_v2/utils/__init__.py +17 -0
  157. aipt_v2/utils/logging.py +201 -0
  158. aipt_v2/utils/model_manager.py +187 -0
  159. aipt_v2/utils/searchers/__init__.py +269 -0
  160. aiptx-2.0.2.dist-info/METADATA +324 -0
  161. aiptx-2.0.2.dist-info/RECORD +165 -0
  162. aiptx-2.0.2.dist-info/WHEEL +5 -0
  163. aiptx-2.0.2.dist-info/entry_points.txt +7 -0
  164. aiptx-2.0.2.dist-info/licenses/LICENSE +21 -0
  165. aiptx-2.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,281 @@
1
+ import os
2
+ # from googlesearch import search
3
+ import re
4
+ from tqdm import tqdm
5
+ # import csv
6
+ from deep_translator import GoogleTranslator
7
+ import spacy
8
+ from bs4 import BeautifulSoup
9
+ import requests
10
+ from aipt_v2.utils.searchers.Domain_Filter import domain_filter
11
+ from aipt_v2.utils.searchers.Extension_Filter import for_google_webpage
12
+ from aipt_v2.utils.searchers.util import *
13
+ from langdetect import detect
14
+ import time
15
+ from readability import Document
16
+ from inscriptis import get_text
17
+ import dotenv
18
+ from aipt_v2.utils.model_manager import get_model
19
+
20
+ dotenv.load_dotenv()
21
+
22
+ nlp = spacy.load("en_core_web_sm")
23
+
24
+ def google_search(query, api_key, cse_id, num=10, **kwargs):
25
+ """
26
+ use Google Custom Search JSON API toexecute search
27
+
28
+ Args:
29
+ query (str): search key word
30
+ api_key (str): your Google API key
31
+ cse_id (str): your self defined serch engine ID (cx parameter)
32
+ num (int, optional): num of search results
33
+ **kwargs: other parameters
34
+
35
+ Returns:
36
+ dict: a JSON from API
37
+ """
38
+ url = "https://www.googleapis.com/customsearch/v1"
39
+ params = {
40
+ 'q': query,
41
+ 'key': api_key,
42
+ 'cx': cse_id,
43
+ 'num': num
44
+ }
45
+ # merge other parameters
46
+ params.update(kwargs)
47
+
48
+ # Security: Add timeout to prevent indefinite hangs (CWE-400)
49
+ response = requests.get(url, params=params, timeout=30)
50
+ response.raise_for_status() # if request fails
51
+ return response.json()
52
+
53
+ class GoogleSearcher:
54
+ '''Given a query with exploit name and app number, search on Google, get 10 hyperlinks, create folder with 10 different files from 10 links'''
55
+ def __init__(self, model_name: str = "openai"):
56
+ self.llm = get_model(model_name)
57
+
58
+ def fetch_webpage_content(self,url, timeout=180):
59
+ try:
60
+ response = requests.get(url, timeout=timeout) # set timeout management
61
+ response.raise_for_status() # check if request is successful
62
+ return response.text
63
+ except requests.RequestException as e:
64
+ print(f"Request Error: {e}")
65
+ return None
66
+
67
+ def translate_text(self, text, source='auto', target='en', max_length=5000):
68
+ doc = nlp(text)
69
+ parts = []
70
+ current_part = ""
71
+
72
+ for sent in doc.sents:
73
+ if len(current_part) + len(sent.text) <= max_length:
74
+ current_part += sent.text + " "
75
+ else:
76
+ parts.append(current_part)
77
+ current_part = sent.text + " "
78
+ if current_part:
79
+ parts.append(current_part)
80
+
81
+ translated_parts = [GoogleTranslator(source=source, target=target).translate(part.strip()) for part in parts if len(part.strip()) > 0]
82
+ return ' '.join(translated_parts)
83
+
84
+ def extract_content(self, result, start_marker, end_marker):
85
+ # define regular expression pattern
86
+ pattern = f'{start_marker}(.*?){end_marker}'
87
+ # use re.DOTALL allows . to match any character, including newline characters
88
+ matches = re.findall(pattern, result, re.DOTALL)
89
+ return matches
90
+
91
+ def filter_full_file(self, file_content):
92
+ # use configed LLM to filter files
93
+ if not self.llm:
94
+ print("LLM not initialized, skipping file filtering")
95
+ return "<<@#%_tag useless >>@#%_tag"
96
+
97
+ from langchain_core.messages import HumanMessage, SystemMessage
98
+
99
+ system_message = SystemMessage(content="""You're an excellent cybersecurity penetration tester assistant.
100
+ You need to help the tester in filtering a file's content, and your commitment is essential to the task.
101
+ You will be provided a file's content, you need to read it, and then use your knowledge to determine if the file is directly useful for an actual penetration.
102
+ "DIRECTLY USEFUL" means you can directly follow the instructions provided by the file to penetrate.
103
+ If so, you are supposed to label it, then it will be passed to your co-workers.
104
+
105
+ To help you understand what is "DIRECTLY USEFUL", here are some examples:
106
+ \t\t"code(s):\n <?php
107
+ // PHP script to test file upload vulnerability
108
+ echo "File upload vulnerability test successful";
109
+ ?>"
110
+ \t\t"plain instruction(s):\n Open the login page of the target website.
111
+ Enter the following code in the username or password field: <script>alert('XSS')</script>.
112
+ Submit the form.
113
+ If there is an XSS vulnerability, a warning box with the word "XSS" will pop up in your browser."
114
+ \t\t"command line operation(s):\n curl -d "username=whoami; cat /etc/passwd" http://example.com/login\n
115
+ or\n
116
+ netsh>rpc
117
+ netsh rpc>filter
118
+ netsh rpc filter>add rule layer=um actiontype=block
119
+ netsh rpc filter>add condition field=if_uuid matchtype=equal data=4b324fc8-1670-01d3-1278-5a47bf6ee188
120
+ netsh rpc filter>add filter
121
+ netsh rpc filter>quit"
122
+ \t\t"payload(s):\n ' OR 1=1 -- "
123
+
124
+ To make the progress more efficient, we need to predefine a pair of unique strings("<<@#%_tag", ">>@#%_tag") in your answer so that others will recognize them easier.
125
+ So the format in your answer should looks like: "<<@#%_tag useful >>@#%_tag" or "<<@#%_tag useless >>@#%_tag". The tag should be unique.
126
+
127
+ Remember, you should be careful. A common misleading scenario is a file actually contains something helpful, but it is too long or the information is hidden too deeply that can make it be excluded. So you need to carefully read the whole text.
128
+ Meanwhile, to reduce your co-workers' burdens, you need to be strict. It is okay that you find the file is not actually useful to execute penetration. If so, feel free to skip those parts.
129
+ You do not need to make assumption that a strange URL or link may contain something useful. We can access them through other approaches. So if these things appear in the file, make sure they do not affect your judgement.
130
+ You can summarize but do not conclude or make assumptions, and your answer should be your most confident one. Keep the answer concise.
131
+ """)
132
+
133
+ user_message = HumanMessage(content=f"""Please make judgement and filter the following content: \n\n{file_content}
134
+ \n\n\n\n\n
135
+
136
+ Please make sure that you have used the unique string pair("<<@#%_tag", ">>@#%_tag"), especially do not forget to add ">>@#%_tag" as an end.""")
137
+
138
+ try:
139
+ response = self.llm.invoke([system_message, user_message])
140
+ summary = response.content.strip()
141
+ return summary
142
+ except Exception as e:
143
+ print(f"Error calling LLM: {e}")
144
+ return "<<@#%_tag useless >>@#%_tag"
145
+
146
+ def search_keyword(self, keyword: str, output_dir: str):
147
+ # print("google called")
148
+ links = []
149
+ search_keyword = keyword
150
+ # domain_filter = ["github", "suibian"]
151
+ # + " exploit"
152
+ search_results = []
153
+ try:
154
+ time.sleep(5)
155
+ search_results = google_search(search_keyword, os.environ.get("GOOGLE_API_KEY"), os.environ.get("GOOGLE_CSE_ID"))
156
+ except:
157
+ print("Error occurred during google search. Continuing...")
158
+
159
+ # search_results = list(search_results)[:10]
160
+
161
+ # create list with all links
162
+ # for result in search_results:
163
+ # print(result)
164
+ # if all(domain not in result.lower() for domain in domain_filter):
165
+ # web_name = result.split("//")[1]
166
+ # web_name = web_name.replace("/", "-")
167
+ # web_name = web_name[:30] if len(web_name) > 30 else web_name
168
+ # links.append((web_name, result))
169
+ # if not os.path.exists(output_dir):
170
+
171
+ if 'items' in search_results:
172
+ for i, item in enumerate(search_results['items'], 1):
173
+
174
+ # process links - filter with domain name
175
+ result_link = item['link']
176
+ if domain_filter:
177
+ if all(domain not in result_link.lower() for domain in domain_filter):
178
+ web_name = result_link.split("//")[-1].split("/")[0]
179
+ web_name = web_name.replace("/", "-")
180
+ web_name = web_name[:30] if len(web_name) > 30 else web_name
181
+ links.append((web_name, result_link))
182
+ else:
183
+ web_name = result_link.split("//")[-1].split("/")[0]
184
+ web_name = web_name.replace("/", "-")
185
+ web_name = web_name[:30] if len(web_name) > 30 else web_name
186
+ links.append((web_name, result_link))
187
+ else:
188
+ print("Cannot find related info.")
189
+ self.create_directories(output_dir, links)
190
+
191
+ def create_directories(self, output_dir, links):
192
+ # create a folder if it doesn't exist already
193
+ # print("called!")
194
+ os.makedirs(output_dir, exist_ok=True)
195
+ for (name, link) in tqdm(links, desc="Crawling Google pages"):
196
+ try:
197
+ # if exist, then skip
198
+ if os.path.exists(os.path.join(output_dir, name)):
199
+ continue
200
+
201
+ time.sleep(5)
202
+ # document = SimpleWebPageReader(html_to_text=True).load_data([link])[0].text
203
+ soup = BeautifulSoup(self.fetch_webpage_content(link), "html.parser")
204
+
205
+ # extension list
206
+ extensions_to_remove = for_google_webpage
207
+
208
+ # remove images
209
+ for ule_tag in soup.find_all('True'):
210
+ src = ule_tag.get('src')
211
+ if src and any(src.lower().endswith(ext) for ext in extensions_to_remove):
212
+ ule_tag.decompose()
213
+
214
+ # remove links, may not necessary
215
+ # for a_tag in soup.find_all('a'):
216
+ # href = a_tag.get('href')
217
+ # if href and any(href.lower().endswith(ext) for ext in extensions_to_remove):
218
+ # a_tag.decompose()
219
+
220
+ doc = Document(soup.prettify()) # restore to html format, then transfer to the format can be processed by readability
221
+ content = doc.summary() # summarize, remove nonrelated content, html format
222
+ full_text = get_text(content) # get clean text, with relative location remained
223
+
224
+ # not empty -> creaye directory, else skip
225
+ if full_text.strip():
226
+ os.mkdir(os.path.join(output_dir, name))
227
+ else:
228
+ continue
229
+
230
+ # translate Chinese webpages
231
+ if full_text.strip():
232
+ lan = detect(full_text)
233
+ # print(language)
234
+ if lan != "en":
235
+ time.sleep(5)
236
+ full_doc = self.translate_text(full_text)
237
+ else:
238
+ full_doc = full_text
239
+
240
+ except Exception as e:
241
+ print("Error occurred while downloading web page:", e)
242
+ continue
243
+
244
+ if len(full_doc) > 1000000:
245
+ continue
246
+
247
+ # use LLM to filter, then create md doc
248
+ full_doc_judgement = self.filter_full_file(full_doc)
249
+ # print(full_doc_judgement)
250
+ full_doc_judgement = self.extract_content(full_doc_judgement, '<<@#%_tag', '>>@#%_tag')
251
+ # print(full_doc_judgement)
252
+ if full_doc_judgement[0].strip() == "useful":
253
+ with open(os.path.join(output_dir, name, "R_DOC.md"), "w") as f:
254
+ f.write(f"link to this page is {link}\n\n")
255
+ f.write(full_doc)
256
+
257
+ remove_empty_directories(output_dir)
258
+
259
+
260
+
261
+
262
+
263
+ # def main():
264
+ # g = GoogleSearcher()
265
+ # base_dir = "/root/crawl_classify/data/"
266
+ # level = "WPNZ"
267
+ # # with open(os.path.join(base_dir, "index", f"{level}.csv"), mode='r', newline='', encoding='utf-8') as file:
268
+ # with open(os.path.join(base_dir, "index", "example1.csv"), mode='r', newline='', encoding='utf-8') as file:
269
+ # reader = csv.reader(file, delimiter='\t')
270
+
271
+ # for row in reader:
272
+ # avd_id, cve_id, zh_keyword, avd_url = row
273
+ # if cve_id != "N/A":
274
+ # g.search_keyword(f"{cve_id} exlpoit", os.path.join(base_dir, level, avd_id, "temp"))
275
+ # time.sleep(30)
276
+
277
+
278
+
279
+ # if __name__ == "__main__":
280
+ # main()
281
+