PyPI - aiptx - Versions diffs - 2.0.2__py3-none-any.whl - Mend

aiptx 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiptx might be problematic. Click here for more details.

Files changed (165) hide show

aipt_v2/__init__.py +110 -0
aipt_v2/__main__.py +24 -0
aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
aipt_v2/agents/__init__.py +24 -0
aipt_v2/agents/base.py +520 -0
aipt_v2/agents/ptt.py +406 -0
aipt_v2/agents/state.py +168 -0
aipt_v2/app.py +960 -0
aipt_v2/browser/__init__.py +31 -0
aipt_v2/browser/automation.py +458 -0
aipt_v2/browser/crawler.py +453 -0
aipt_v2/cli.py +321 -0
aipt_v2/compliance/__init__.py +71 -0
aipt_v2/compliance/compliance_report.py +449 -0
aipt_v2/compliance/framework_mapper.py +424 -0
aipt_v2/compliance/nist_mapping.py +345 -0
aipt_v2/compliance/owasp_mapping.py +330 -0
aipt_v2/compliance/pci_mapping.py +297 -0
aipt_v2/config.py +288 -0
aipt_v2/core/__init__.py +43 -0
aipt_v2/core/agent.py +630 -0
aipt_v2/core/llm.py +395 -0
aipt_v2/core/memory.py +305 -0
aipt_v2/core/ptt.py +329 -0
aipt_v2/database/__init__.py +14 -0
aipt_v2/database/models.py +232 -0
aipt_v2/database/repository.py +384 -0
aipt_v2/docker/__init__.py +23 -0
aipt_v2/docker/builder.py +260 -0
aipt_v2/docker/manager.py +222 -0
aipt_v2/docker/sandbox.py +371 -0
aipt_v2/evasion/__init__.py +58 -0
aipt_v2/evasion/request_obfuscator.py +272 -0
aipt_v2/evasion/tls_fingerprint.py +285 -0
aipt_v2/evasion/ua_rotator.py +301 -0
aipt_v2/evasion/waf_bypass.py +439 -0
aipt_v2/execution/__init__.py +23 -0
aipt_v2/execution/executor.py +302 -0
aipt_v2/execution/parser.py +544 -0
aipt_v2/execution/terminal.py +337 -0
aipt_v2/health.py +437 -0
aipt_v2/intelligence/__init__.py +85 -0
aipt_v2/intelligence/auth.py +520 -0
aipt_v2/intelligence/chaining.py +775 -0
aipt_v2/intelligence/cve_aipt.py +334 -0
aipt_v2/intelligence/cve_info.py +1111 -0
aipt_v2/intelligence/rag.py +239 -0
aipt_v2/intelligence/scope.py +442 -0
aipt_v2/intelligence/searchers/__init__.py +5 -0
aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
aipt_v2/intelligence/searchers/github_searcher.py +467 -0
aipt_v2/intelligence/searchers/google_searcher.py +281 -0
aipt_v2/intelligence/tools.json +443 -0
aipt_v2/intelligence/triage.py +670 -0
aipt_v2/interface/__init__.py +5 -0
aipt_v2/interface/cli.py +230 -0
aipt_v2/interface/main.py +501 -0
aipt_v2/interface/tui.py +1276 -0
aipt_v2/interface/utils.py +583 -0
aipt_v2/llm/__init__.py +39 -0
aipt_v2/llm/config.py +26 -0
aipt_v2/llm/llm.py +514 -0
aipt_v2/llm/memory.py +214 -0
aipt_v2/llm/request_queue.py +89 -0
aipt_v2/llm/utils.py +89 -0
aipt_v2/models/__init__.py +15 -0
aipt_v2/models/findings.py +295 -0
aipt_v2/models/phase_result.py +224 -0
aipt_v2/models/scan_config.py +207 -0
aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
aipt_v2/monitoring/prometheus.yml +60 -0
aipt_v2/orchestration/__init__.py +52 -0
aipt_v2/orchestration/pipeline.py +398 -0
aipt_v2/orchestration/progress.py +300 -0
aipt_v2/orchestration/scheduler.py +296 -0
aipt_v2/orchestrator.py +2284 -0
aipt_v2/payloads/__init__.py +27 -0
aipt_v2/payloads/cmdi.py +150 -0
aipt_v2/payloads/sqli.py +263 -0
aipt_v2/payloads/ssrf.py +204 -0
aipt_v2/payloads/templates.py +222 -0
aipt_v2/payloads/traversal.py +166 -0
aipt_v2/payloads/xss.py +204 -0
aipt_v2/prompts/__init__.py +60 -0
aipt_v2/proxy/__init__.py +29 -0
aipt_v2/proxy/history.py +352 -0
aipt_v2/proxy/interceptor.py +452 -0
aipt_v2/recon/__init__.py +44 -0
aipt_v2/recon/dns.py +241 -0
aipt_v2/recon/osint.py +367 -0
aipt_v2/recon/subdomain.py +372 -0
aipt_v2/recon/tech_detect.py +311 -0
aipt_v2/reports/__init__.py +17 -0
aipt_v2/reports/generator.py +313 -0
aipt_v2/reports/html_report.py +378 -0
aipt_v2/runtime/__init__.py +44 -0
aipt_v2/runtime/base.py +30 -0
aipt_v2/runtime/docker.py +401 -0
aipt_v2/runtime/local.py +346 -0
aipt_v2/runtime/tool_server.py +205 -0
aipt_v2/scanners/__init__.py +28 -0
aipt_v2/scanners/base.py +273 -0
aipt_v2/scanners/nikto.py +244 -0
aipt_v2/scanners/nmap.py +402 -0
aipt_v2/scanners/nuclei.py +273 -0
aipt_v2/scanners/web.py +454 -0
aipt_v2/scripts/security_audit.py +366 -0
aipt_v2/telemetry/__init__.py +7 -0
aipt_v2/telemetry/tracer.py +347 -0
aipt_v2/terminal/__init__.py +28 -0
aipt_v2/terminal/executor.py +400 -0
aipt_v2/terminal/sandbox.py +350 -0
aipt_v2/tools/__init__.py +44 -0
aipt_v2/tools/active_directory/__init__.py +78 -0
aipt_v2/tools/active_directory/ad_config.py +238 -0
aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
aipt_v2/tools/active_directory/ldap_enum.py +533 -0
aipt_v2/tools/active_directory/smb_attacks.py +505 -0
aipt_v2/tools/agents_graph/__init__.py +19 -0
aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
aipt_v2/tools/api_security/__init__.py +76 -0
aipt_v2/tools/api_security/api_discovery.py +608 -0
aipt_v2/tools/api_security/graphql_scanner.py +622 -0
aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
aipt_v2/tools/browser/__init__.py +5 -0
aipt_v2/tools/browser/browser_actions.py +238 -0
aipt_v2/tools/browser/browser_instance.py +535 -0
aipt_v2/tools/browser/tab_manager.py +344 -0
aipt_v2/tools/cloud/__init__.py +70 -0
aipt_v2/tools/cloud/cloud_config.py +273 -0
aipt_v2/tools/cloud/cloud_scanner.py +639 -0
aipt_v2/tools/cloud/prowler_tool.py +571 -0
aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
aipt_v2/tools/executor.py +307 -0
aipt_v2/tools/parser.py +408 -0
aipt_v2/tools/proxy/__init__.py +5 -0
aipt_v2/tools/proxy/proxy_actions.py +103 -0
aipt_v2/tools/proxy/proxy_manager.py +789 -0
aipt_v2/tools/registry.py +196 -0
aipt_v2/tools/scanners/__init__.py +343 -0
aipt_v2/tools/scanners/acunetix_tool.py +712 -0
aipt_v2/tools/scanners/burp_tool.py +631 -0
aipt_v2/tools/scanners/config.py +156 -0
aipt_v2/tools/scanners/nessus_tool.py +588 -0
aipt_v2/tools/scanners/zap_tool.py +612 -0
aipt_v2/tools/terminal/__init__.py +5 -0
aipt_v2/tools/terminal/terminal_actions.py +37 -0
aipt_v2/tools/terminal/terminal_manager.py +153 -0
aipt_v2/tools/terminal/terminal_session.py +449 -0
aipt_v2/tools/tool_processing.py +108 -0
aipt_v2/utils/__init__.py +17 -0
aipt_v2/utils/logging.py +201 -0
aipt_v2/utils/model_manager.py +187 -0
aipt_v2/utils/searchers/__init__.py +269 -0
aiptx-2.0.2.dist-info/METADATA +324 -0
aiptx-2.0.2.dist-info/RECORD +165 -0
aiptx-2.0.2.dist-info/WHEEL +5 -0
aiptx-2.0.2.dist-info/entry_points.txt +7 -0
aiptx-2.0.2.dist-info/licenses/LICENSE +21 -0
aiptx-2.0.2.dist-info/top_level.txt +1 -0

aipt_v2/intelligence/searchers/google_searcher.py ADDED Viewed

@@ -0,0 +1,281 @@
+import os
+# from googlesearch import search
+import re
+from tqdm import tqdm
+# import csv
+from deep_translator import GoogleTranslator
+import spacy
+from bs4 import BeautifulSoup
+import requests
+from aipt_v2.utils.searchers.Domain_Filter import domain_filter
+from aipt_v2.utils.searchers.Extension_Filter import for_google_webpage
+from aipt_v2.utils.searchers.util import *
+from langdetect import detect
+import time
+from readability import Document
+from inscriptis import get_text
+import dotenv
+from aipt_v2.utils.model_manager import get_model
+dotenv.load_dotenv()
+nlp = spacy.load("en_core_web_sm")
+def google_search(query, api_key, cse_id, num=10, **kwargs):
+    """
+    use Google Custom Search JSON API toexecute search
+    Args:
+        query (str): search key word
+        api_key (str): your Google API key
+        cse_id (str): your self defined serch engine ID (cx parameter)
+        num (int, optional): num of search results
+        **kwargs: other parameters
+    Returns:
+        dict: a JSON from API
+    """
+    url = "https://www.googleapis.com/customsearch/v1"
+    params = {
+        'q': query,
+        'key': api_key,
+        'cx': cse_id,
+        'num': num
+    }
+    # merge other parameters
+    params.update(kwargs)
+    # Security: Add timeout to prevent indefinite hangs (CWE-400)
+    response = requests.get(url, params=params, timeout=30)
+    response.raise_for_status()  # if request fails
+    return response.json()
+class GoogleSearcher:
+    '''Given a query with exploit name and app number, search on Google, get 10 hyperlinks, create folder with 10 different files from 10 links'''
+    def __init__(self, model_name: str = "openai"):
+        self.llm = get_model(model_name)
+    def fetch_webpage_content(self,url, timeout=180):
+        try:
+            response = requests.get(url, timeout=timeout) # set timeout management
+            response.raise_for_status()  # check if request is successful
+            return response.text
+        except requests.RequestException as e:
+            print(f"Request Error: {e}")
+            return None
+    def translate_text(self, text, source='auto', target='en', max_length=5000):
+        doc = nlp(text)
+        parts = []
+        current_part = ""
+        for sent in doc.sents:
+            if len(current_part) + len(sent.text) <= max_length:
+                current_part += sent.text + " "
+            else:
+                parts.append(current_part)
+                current_part = sent.text + " "
+        if current_part:
+            parts.append(current_part)
+        translated_parts = [GoogleTranslator(source=source, target=target).translate(part.strip()) for part in parts if len(part.strip()) > 0]
+        return ' '.join(translated_parts)
+    def extract_content(self, result, start_marker, end_marker):
+        # define regular expression pattern
+        pattern = f'{start_marker}(.*?){end_marker}'
+        # use re.DOTALL allows . to match any character, including newline characters
+        matches = re.findall(pattern, result, re.DOTALL)
+        return matches
+    def filter_full_file(self, file_content):
+        # use configed LLM to filter files
+        if not self.llm:
+            print("LLM not initialized, skipping file filtering")
+            return "<<@#%_tag useless >>@#%_tag"
+        from langchain_core.messages import HumanMessage, SystemMessage
+        system_message = SystemMessage(content="""You're an excellent cybersecurity penetration tester assistant.
+        You need to help the tester in filtering a file's content, and your commitment is essential to the task.
+        You will be provided a file's content, you need to read it, and then use your knowledge to determine if the file is directly useful for an actual penetration.
+        "DIRECTLY USEFUL" means you can directly follow the instructions provided by the file to penetrate.
+        If so, you are supposed to label it, then it will be passed to your co-workers.
+        To help you understand what is "DIRECTLY USEFUL", here are some examples:
+        \t\t"code(s):\n  <?php
+                                // PHP script to test file upload vulnerability
+                                echo "File upload vulnerability test successful";
+                                ?>"
+        \t\t"plain instruction(s):\n   Open the login page of the target website.
+                                Enter the following code in the username or password field: <script>alert('XSS')</script>.
+                                Submit the form.
+                                If there is an XSS vulnerability, a warning box with the word "XSS" will pop up in your browser."
+        \t\t"command line operation(s):\n  curl -d "username=whoami; cat /etc/passwd" http://example.com/login\n
+                                                or\n
+                                                netsh>rpc
+                                                netsh rpc>filter
+                                                netsh rpc filter>add rule layer=um actiontype=block
+                                                netsh rpc filter>add condition field=if_uuid matchtype=equal data=4b324fc8-1670-01d3-1278-5a47bf6ee188
+                                                netsh rpc filter>add filter
+                                                netsh rpc filter>quit"
+        \t\t"payload(s):\n   ' OR 1=1 --    "
+        To make the progress more efficient, we need to predefine a pair of unique strings("<<@#%_tag", ">>@#%_tag") in your answer so that others will recognize them easier.
+        So the format in your answer should looks like: "<<@#%_tag useful >>@#%_tag" or "<<@#%_tag useless >>@#%_tag". The tag should be unique.
+        Remember, you should be careful. A common misleading scenario is a file actually contains something helpful, but it is too long or the information is hidden too deeply that can make it be excluded. So you need to carefully read the whole text.
+        Meanwhile, to reduce your co-workers' burdens, you need to be strict. It is okay that you find the file is not actually useful to execute penetration. If so, feel free to skip those parts.
+        You do not need to make assumption that a strange URL or link may contain something useful. We can access them through other approaches. So if these things appear in the file, make sure they do not affect your judgement.
+        You can summarize but do not conclude or make assumptions, and your answer should be your most confident one. Keep the answer concise.
+        """)
+        user_message = HumanMessage(content=f"""Please make judgement and filter the following content: \n\n{file_content}
+        \n\n\n\n\n
+        Please make sure that you have used the unique string pair("<<@#%_tag", ">>@#%_tag"), especially do not forget to add ">>@#%_tag" as an end.""")
+        try:
+            response = self.llm.invoke([system_message, user_message])
+            summary = response.content.strip()
+            return summary
+        except Exception as e:
+            print(f"Error calling LLM: {e}")
+            return "<<@#%_tag useless >>@#%_tag"
+    def search_keyword(self, keyword: str, output_dir: str):
+        # print("google called")
+        links = []
+        search_keyword = keyword
+        # domain_filter = ["github", "suibian"]
+        # + " exploit"
+        search_results = []
+        try:
+            time.sleep(5)
+            search_results = google_search(search_keyword, os.environ.get("GOOGLE_API_KEY"), os.environ.get("GOOGLE_CSE_ID"))
+        except:
+            print("Error occurred during google search. Continuing...")
+        # search_results = list(search_results)[:10]
+        # create list with all links
+        # for result in search_results:
+        #     print(result)
+        #     if all(domain not in result.lower() for domain in domain_filter):
+        #         web_name = result.split("//")[1]
+        #         web_name = web_name.replace("/", "-")
+        #         web_name = web_name[:30] if len(web_name) > 30 else web_name
+        #         links.append((web_name, result))
+        # if not os.path.exists(output_dir):
+        if 'items' in search_results:
+            for i, item in enumerate(search_results['items'], 1):
+                # process links - filter with domain name
+                result_link = item['link']
+                if domain_filter:
+                    if all(domain not in result_link.lower() for domain in domain_filter):
+                        web_name = result_link.split("//")[-1].split("/")[0]
+                        web_name = web_name.replace("/", "-")
+                        web_name = web_name[:30] if len(web_name) > 30 else web_name
+                        links.append((web_name, result_link))
+                else:
+                    web_name = result_link.split("//")[-1].split("/")[0]
+                    web_name = web_name.replace("/", "-")
+                    web_name = web_name[:30] if len(web_name) > 30 else web_name
+                    links.append((web_name, result_link))
+        else:
+            print("Cannot find related info.")
+        self.create_directories(output_dir, links)
+    def create_directories(self, output_dir, links):
+        # create a folder if it doesn't exist already
+        # print("called!")
+        os.makedirs(output_dir, exist_ok=True)
+        for (name, link) in tqdm(links, desc="Crawling Google pages"):
+            try:
+                # if exist, then skip
+                if os.path.exists(os.path.join(output_dir, name)):
+                    continue
+                time.sleep(5)
+                # document = SimpleWebPageReader(html_to_text=True).load_data([link])[0].text
+                soup = BeautifulSoup(self.fetch_webpage_content(link), "html.parser")
+                # extension list
+                extensions_to_remove = for_google_webpage
+                # remove images
+                for ule_tag in soup.find_all('True'):
+                    src = ule_tag.get('src')
+                    if src and any(src.lower().endswith(ext) for ext in extensions_to_remove):
+                        ule_tag.decompose()
+                # remove links, may not necessary
+                # for a_tag in soup.find_all('a'):
+                #     href = a_tag.get('href')
+                #     if href and any(href.lower().endswith(ext) for ext in extensions_to_remove):
+                #         a_tag.decompose()
+                doc = Document(soup.prettify()) # restore to html format, then transfer to the format can be processed by readability
+                content = doc.summary() # summarize, remove nonrelated content, html format
+                full_text = get_text(content) # get clean text, with relative location remained
+                # not empty -> creaye directory, else skip
+                if full_text.strip():
+                    os.mkdir(os.path.join(output_dir, name))
+                else:
+                    continue
+                # translate Chinese webpages
+                if full_text.strip():
+                    lan = detect(full_text)
+                    # print(language)
+                    if lan != "en":
+                        time.sleep(5)
+                        full_doc = self.translate_text(full_text)
+                    else:
+                        full_doc = full_text
+            except Exception as e:
+                print("Error occurred while downloading web page:", e)
+                continue
+            if len(full_doc) > 1000000:
+                continue
+            # use LLM to filter, then create md doc
+            full_doc_judgement = self.filter_full_file(full_doc)
+            # print(full_doc_judgement)
+            full_doc_judgement = self.extract_content(full_doc_judgement, '<<@#%_tag', '>>@#%_tag')
+            # print(full_doc_judgement)
+            if full_doc_judgement[0].strip() == "useful":
+                with open(os.path.join(output_dir, name, "R_DOC.md"), "w") as f:
+                    f.write(f"link to this page is {link}\n\n")
+                    f.write(full_doc)
+        remove_empty_directories(output_dir)
+# def main():
+#     g = GoogleSearcher()
+#     base_dir = "/root/crawl_classify/data/"
+#     level = "WPNZ"
+#     # with open(os.path.join(base_dir, "index", f"{level}.csv"), mode='r', newline='', encoding='utf-8') as file:
+#     with open(os.path.join(base_dir, "index", "example1.csv"), mode='r', newline='', encoding='utf-8') as file:
+#         reader = csv.reader(file, delimiter='\t')
+#         for row in reader:
+#             avd_id, cve_id, zh_keyword, avd_url = row
+#             if cve_id != "N/A":
+#                 g.search_keyword(f"{cve_id} exlpoit", os.path.join(base_dir, level, avd_id, "temp"))
+#             time.sleep(30)
+# if __name__ == "__main__":
+#     main()