PyPI - aiptx - Versions diffs - 2.0.2__py3-none-any.whl - Mend

aiptx 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiptx might be problematic. Click here for more details.

Files changed (165) hide show

aipt_v2/__init__.py +110 -0
aipt_v2/__main__.py +24 -0
aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
aipt_v2/agents/__init__.py +24 -0
aipt_v2/agents/base.py +520 -0
aipt_v2/agents/ptt.py +406 -0
aipt_v2/agents/state.py +168 -0
aipt_v2/app.py +960 -0
aipt_v2/browser/__init__.py +31 -0
aipt_v2/browser/automation.py +458 -0
aipt_v2/browser/crawler.py +453 -0
aipt_v2/cli.py +321 -0
aipt_v2/compliance/__init__.py +71 -0
aipt_v2/compliance/compliance_report.py +449 -0
aipt_v2/compliance/framework_mapper.py +424 -0
aipt_v2/compliance/nist_mapping.py +345 -0
aipt_v2/compliance/owasp_mapping.py +330 -0
aipt_v2/compliance/pci_mapping.py +297 -0
aipt_v2/config.py +288 -0
aipt_v2/core/__init__.py +43 -0
aipt_v2/core/agent.py +630 -0
aipt_v2/core/llm.py +395 -0
aipt_v2/core/memory.py +305 -0
aipt_v2/core/ptt.py +329 -0
aipt_v2/database/__init__.py +14 -0
aipt_v2/database/models.py +232 -0
aipt_v2/database/repository.py +384 -0
aipt_v2/docker/__init__.py +23 -0
aipt_v2/docker/builder.py +260 -0
aipt_v2/docker/manager.py +222 -0
aipt_v2/docker/sandbox.py +371 -0
aipt_v2/evasion/__init__.py +58 -0
aipt_v2/evasion/request_obfuscator.py +272 -0
aipt_v2/evasion/tls_fingerprint.py +285 -0
aipt_v2/evasion/ua_rotator.py +301 -0
aipt_v2/evasion/waf_bypass.py +439 -0
aipt_v2/execution/__init__.py +23 -0
aipt_v2/execution/executor.py +302 -0
aipt_v2/execution/parser.py +544 -0
aipt_v2/execution/terminal.py +337 -0
aipt_v2/health.py +437 -0
aipt_v2/intelligence/__init__.py +85 -0
aipt_v2/intelligence/auth.py +520 -0
aipt_v2/intelligence/chaining.py +775 -0
aipt_v2/intelligence/cve_aipt.py +334 -0
aipt_v2/intelligence/cve_info.py +1111 -0
aipt_v2/intelligence/rag.py +239 -0
aipt_v2/intelligence/scope.py +442 -0
aipt_v2/intelligence/searchers/__init__.py +5 -0
aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
aipt_v2/intelligence/searchers/github_searcher.py +467 -0
aipt_v2/intelligence/searchers/google_searcher.py +281 -0
aipt_v2/intelligence/tools.json +443 -0
aipt_v2/intelligence/triage.py +670 -0
aipt_v2/interface/__init__.py +5 -0
aipt_v2/interface/cli.py +230 -0
aipt_v2/interface/main.py +501 -0
aipt_v2/interface/tui.py +1276 -0
aipt_v2/interface/utils.py +583 -0
aipt_v2/llm/__init__.py +39 -0
aipt_v2/llm/config.py +26 -0
aipt_v2/llm/llm.py +514 -0
aipt_v2/llm/memory.py +214 -0
aipt_v2/llm/request_queue.py +89 -0
aipt_v2/llm/utils.py +89 -0
aipt_v2/models/__init__.py +15 -0
aipt_v2/models/findings.py +295 -0
aipt_v2/models/phase_result.py +224 -0
aipt_v2/models/scan_config.py +207 -0
aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
aipt_v2/monitoring/prometheus.yml +60 -0
aipt_v2/orchestration/__init__.py +52 -0
aipt_v2/orchestration/pipeline.py +398 -0
aipt_v2/orchestration/progress.py +300 -0
aipt_v2/orchestration/scheduler.py +296 -0
aipt_v2/orchestrator.py +2284 -0
aipt_v2/payloads/__init__.py +27 -0
aipt_v2/payloads/cmdi.py +150 -0
aipt_v2/payloads/sqli.py +263 -0
aipt_v2/payloads/ssrf.py +204 -0
aipt_v2/payloads/templates.py +222 -0
aipt_v2/payloads/traversal.py +166 -0
aipt_v2/payloads/xss.py +204 -0
aipt_v2/prompts/__init__.py +60 -0
aipt_v2/proxy/__init__.py +29 -0
aipt_v2/proxy/history.py +352 -0
aipt_v2/proxy/interceptor.py +452 -0
aipt_v2/recon/__init__.py +44 -0
aipt_v2/recon/dns.py +241 -0
aipt_v2/recon/osint.py +367 -0
aipt_v2/recon/subdomain.py +372 -0
aipt_v2/recon/tech_detect.py +311 -0
aipt_v2/reports/__init__.py +17 -0
aipt_v2/reports/generator.py +313 -0
aipt_v2/reports/html_report.py +378 -0
aipt_v2/runtime/__init__.py +44 -0
aipt_v2/runtime/base.py +30 -0
aipt_v2/runtime/docker.py +401 -0
aipt_v2/runtime/local.py +346 -0
aipt_v2/runtime/tool_server.py +205 -0
aipt_v2/scanners/__init__.py +28 -0
aipt_v2/scanners/base.py +273 -0
aipt_v2/scanners/nikto.py +244 -0
aipt_v2/scanners/nmap.py +402 -0
aipt_v2/scanners/nuclei.py +273 -0
aipt_v2/scanners/web.py +454 -0
aipt_v2/scripts/security_audit.py +366 -0
aipt_v2/telemetry/__init__.py +7 -0
aipt_v2/telemetry/tracer.py +347 -0
aipt_v2/terminal/__init__.py +28 -0
aipt_v2/terminal/executor.py +400 -0
aipt_v2/terminal/sandbox.py +350 -0
aipt_v2/tools/__init__.py +44 -0
aipt_v2/tools/active_directory/__init__.py +78 -0
aipt_v2/tools/active_directory/ad_config.py +238 -0
aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
aipt_v2/tools/active_directory/ldap_enum.py +533 -0
aipt_v2/tools/active_directory/smb_attacks.py +505 -0
aipt_v2/tools/agents_graph/__init__.py +19 -0
aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
aipt_v2/tools/api_security/__init__.py +76 -0
aipt_v2/tools/api_security/api_discovery.py +608 -0
aipt_v2/tools/api_security/graphql_scanner.py +622 -0
aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
aipt_v2/tools/browser/__init__.py +5 -0
aipt_v2/tools/browser/browser_actions.py +238 -0
aipt_v2/tools/browser/browser_instance.py +535 -0
aipt_v2/tools/browser/tab_manager.py +344 -0
aipt_v2/tools/cloud/__init__.py +70 -0
aipt_v2/tools/cloud/cloud_config.py +273 -0
aipt_v2/tools/cloud/cloud_scanner.py +639 -0
aipt_v2/tools/cloud/prowler_tool.py +571 -0
aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
aipt_v2/tools/executor.py +307 -0
aipt_v2/tools/parser.py +408 -0
aipt_v2/tools/proxy/__init__.py +5 -0
aipt_v2/tools/proxy/proxy_actions.py +103 -0
aipt_v2/tools/proxy/proxy_manager.py +789 -0
aipt_v2/tools/registry.py +196 -0
aipt_v2/tools/scanners/__init__.py +343 -0
aipt_v2/tools/scanners/acunetix_tool.py +712 -0
aipt_v2/tools/scanners/burp_tool.py +631 -0
aipt_v2/tools/scanners/config.py +156 -0
aipt_v2/tools/scanners/nessus_tool.py +588 -0
aipt_v2/tools/scanners/zap_tool.py +612 -0
aipt_v2/tools/terminal/__init__.py +5 -0
aipt_v2/tools/terminal/terminal_actions.py +37 -0
aipt_v2/tools/terminal/terminal_manager.py +153 -0
aipt_v2/tools/terminal/terminal_session.py +449 -0
aipt_v2/tools/tool_processing.py +108 -0
aipt_v2/utils/__init__.py +17 -0
aipt_v2/utils/logging.py +201 -0
aipt_v2/utils/model_manager.py +187 -0
aipt_v2/utils/searchers/__init__.py +269 -0
aiptx-2.0.2.dist-info/METADATA +324 -0
aiptx-2.0.2.dist-info/RECORD +165 -0
aiptx-2.0.2.dist-info/WHEEL +5 -0
aiptx-2.0.2.dist-info/entry_points.txt +7 -0
aiptx-2.0.2.dist-info/licenses/LICENSE +21 -0
aiptx-2.0.2.dist-info/top_level.txt +1 -0

aipt_v2/intelligence/cve_info.py ADDED Viewed

@@ -0,0 +1,1111 @@
+# from github_searcher import GithubSearcher
+# from google_searcher import GoogleSearcher
+import tiktoken
+from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
+from aipt_v2.utils.doc_handler import DocHandler
+from llama_index.llms.openai import OpenAI
+from llama_index.llms.langchain import LangChainLLM
+from llama_index.llms.huggingface import HuggingFaceLLM
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from llama_index.core import Settings
+import subprocess
+import shlex
+import os
+import openai
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import pandas as pd
+# Security: CVE ID validation pattern (CWE-78 prevention)
+CVE_PATTERN = re.compile(r"^CVE-\d{4}-\d{4,7}$", re.IGNORECASE)
+def _validate_cve_id(cve: str) -> str:
+    """
+    Validate CVE ID format to prevent command injection.
+    Args:
+        cve: CVE identifier string
+    Returns:
+        Validated CVE ID
+    Raises:
+        ValueError: If CVE ID format is invalid
+    """
+    cve = cve.strip()
+    if not CVE_PATTERN.match(cve):
+        raise ValueError(f"Invalid CVE ID format: {cve}. Expected format: CVE-YYYY-NNNNN")
+    return cve.upper()
+def _sanitize_product_name(product: str) -> str:
+    """
+    Sanitize product name to prevent command injection.
+    Args:
+        product: Product name string
+    Returns:
+        Sanitized product name
+    """
+    # Remove dangerous characters that could enable shell injection
+    dangerous_chars = [";", "&", "|", "$", "`", "\n", "\r", "\\", "'", '"', "(", ")", "{", "}", "[", "]", "<", ">"]
+    sanitized = product
+    for char in dangerous_chars:
+        sanitized = sanitized.replace(char, "")
+    return sanitized.strip()[:200]  # Limit length
+from sklearn.metrics import confusion_matrix, cohen_kappa_score
+from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
+import seaborn as sns
+import matplotlib.pyplot as plt
+from scipy.stats import pearsonr, spearmanr
+from itertools import combinations
+import numpy as np
+import json
+import logging
+import time
+from tqdm import tqdm
+from aipt_v2.utils.searchers.search_once import compose
+import re
+import yaml
+from aipt_v2.utils.model_manager import get_model
+logger = logging.getLogger(__name__)
+config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'configs', 'config.yaml')
+with open(config_path, 'r', encoding='utf-8') as f:
+    config = yaml.safe_load(f)
+planning_config = config['runtime']['planning']
+model_name_for_token = config['models']['openai']['model']
+def cvemap_search(cve, info_dir):
+    """
+    Search CVE information using cvemap tool.
+    Security: Uses validated CVE ID and subprocess without shell=True (CWE-78 fix).
+    """
+    # Validate CVE ID to prevent command injection
+    validated_cve = _validate_cve_id(cve)
+    cvemap_json_path = f"{info_dir}/cvemap.json"
+    try:
+        # SECURE: Use subprocess without shell=True
+        # Run cvemap with -id flag instead of piping through echo
+        result = subprocess.run(
+            ["cvemap", "-id", validated_cve, "-json"],
+            capture_output=True,
+            text=True,
+            timeout=50,
+            check=False  # Handle errors ourselves
+        )
+        if result.returncode != 0:
+            logging.error(f"cvemap failed for {validated_cve}: {result.stderr}")
+            return None
+        # Write output to file
+        with open(cvemap_json_path, 'w') as f:
+            f.write(result.stdout)
+        json_data = json.loads(result.stdout)
+        if len(json_data) == 0:
+            logging.error(f"Error in cvemap search: {validated_cve}")
+            return None
+        return json_data[0]
+    except subprocess.TimeoutExpired:
+        logging.error(f"cvemap search timed out for {validated_cve}")
+        return None
+    except json.JSONDecodeError as e:
+        logging.error(f"Invalid JSON from cvemap for {validated_cve}: {e}")
+        return None
+    except FileNotFoundError:
+        logging.error("cvemap command not found. Please install cvemap.")
+        return None
+def searchsploit_search(cve):
+    """
+    Search ExploitDB using searchsploit tool.
+    Security: Uses validated CVE ID and subprocess without shell=True (CWE-78 fix).
+    """
+    # Validate CVE ID to prevent command injection
+    validated_cve = _validate_cve_id(cve)
+    searchsploit_json_path = f"resources/{validated_cve}/exploitdb.json"
+    # Ensure directory exists
+    os.makedirs(os.path.dirname(searchsploit_json_path), exist_ok=True)
+    try:
+        # SECURE: Use subprocess without shell=True
+        result = subprocess.run(
+            ["searchsploit", validated_cve, "-j"],
+            capture_output=True,
+            text=True,
+            timeout=50,
+            check=False
+        )
+        # Write output to file
+        with open(searchsploit_json_path, 'w') as f:
+            f.write(result.stdout)
+        json_data = json.loads(result.stdout)
+        return json_data
+    except subprocess.TimeoutExpired:
+        logging.error(f"searchsploit search timed out for {validated_cve}")
+        return None
+    except json.JSONDecodeError as e:
+        logging.error(f"Invalid JSON from searchsploit for {validated_cve}: {e}")
+        return None
+    except FileNotFoundError:
+        logging.error("searchsploit command not found. Please install exploitdb.")
+        return None
+def categorize_cvss(cvss_score):
+    if not (0.0 <= cvss_score <= 10.0):
+        return "value not in range"
+    if cvss_score < 4.0:
+        return "hard"
+    elif 4.0 <= cvss_score < 7.0:
+        return "medium"
+    else:
+        return "easy"
+def categorize_epss(epss_score):
+    if not (0.0 <= epss_score <= 1.0):
+        return "value not in range"
+    if epss_score < 0.4:
+        return "hard"
+    elif 0.4 <= epss_score < 0.94:
+        return "medium"
+    else:
+        return "easy"
+def count_cwe(cve_lst):
+    cwe_dict = {}
+    for cve in cve_lst:
+        info_dir = f"resources/{cve}/info"
+        cvemap_json_path = f"{info_dir}/cvemap.json"
+        with open(cvemap_json_path) as f:
+            cvemap_json = json.load(f)[0]
+            cwe_lst = cvemap_json.get('weaknesses', [])
+            for cwe in cwe_lst:
+                cwe_id = cwe['cwe_id']
+                if cwe_id in cwe_dict:
+                    cwe_dict[cwe_id] += 1
+                else:
+                    cwe_dict[cwe_id] = 1
+    return cwe_dict
+def calculate_score(features, trending_score):
+    weights = config['cve_scoring']['weights']
+    scores = {}
+    max_score_github = 0
+    max_score_repo_github = None
+    max_score_expdb = 0
+    max_score_repo_expdb = None
+    max_score = 0
+    max_score_repo = None
+    final_score = 0
+    has_code = False
+    code_sources = ["GitHub", "ExploitDB"]
+    code_results = None
+    doc_results = None
+    if features['code'].get('GitHub') or features['code'].get('ExploitDB'):
+        for source in code_sources:
+            if not features['code'].get(source):
+                continue  # if not have data from source, skip
+            has_code = True
+            for repo, vul_type in features['code'][source]["vul_type"].items():
+                score = 0
+                # Calculate scores for each field
+                score += weights["vul_type"].get(vul_type, 0)
+                score += weights["isRemote"].get(features['code'][source]["isRemote"].get(repo, ""), 0)
+                # Attack complexity fields
+                attack_complexity = features['code'][source]["attack_complexity"].get(repo, {})
+                for field, value in attack_complexity.items():
+                    score += 1 * weights["attack_complexity"].get(field, {}).get(value, 0)
+                if repo == "Code_File":
+                    score = score / 2
+                score *= weights["exp_maturity"].get(features['code'][source]["exp_maturity"].get(repo, ""), 0)
+                score *= weights["lang_class"].get(features['code'][source]["lang_class"].get(repo, ""), 1)
+                if source == "GitHub":
+                    score *= weights["source_weights"]["gthb"]
+                elif source == "ExploitDB":
+                    score *= weights["source_weights"]["expdb"]
+                # Store the score
+                scores[repo] = score
+                # Check for max score
+                if repo.isdigit():  # ExploitDB
+                    if score >= max_score_expdb:
+                        max_score_expdb = score
+                        max_score_repo_expdb = repo
+                else:  # GitHub
+                    if score >= max_score_github:
+                        max_score_github = score
+                        max_score_repo_github = repo
+        if max_score_expdb > max_score_github:
+            max_score_repo = max_score_repo_expdb
+            max_score = max_score_expdb
+        else:
+            max_score_repo = max_score_repo_github
+            max_score = max_score_github
+        if has_code:
+            sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
+            if trending_score == 999:
+                trending_score_github = 0
+                trending_score_expdb = weights["expdb_default_score"]
+            else:
+                trending_score_github = trending_score
+                if features['code'].get('ExploitDB'):
+                    trending_score_expdb = weights["expdb_default_score"]
+                else: trending_score_expdb = 0
+            # Add weighted trending score to the max score
+            trend_score_weighted_expdb = trending_score_expdb * weights["trending_score"]
+            trend_score_weighted_github = trending_score_github * weights["trending_score"]
+            # Handle cases where one source has no scores
+            final_expdb = (max_score_expdb + trend_score_weighted_expdb) if max_score_expdb > 0 else 0
+            final_github = (max_score_github + trend_score_weighted_github) if max_score_github > 0 else 0
+            final_score = max(final_expdb, final_github)
+            code_results = (sorted_scores, max_score_repo, max_score, final_score, has_code)
+    if features['doc']:
+        doc_score = 0
+        doc_score += weights["vul_type"].get(features["doc"]["vul_type"], 0)
+        doc_score += weights["isRemote"].get(features["doc"]["isRemote"], 0)
+        # Attack complexity fields for 'doc'
+        for field, value in features["doc"]["attack_complexity"].items():
+            doc_score += weights["attack_complexity"].get(field, {}).get(value, 0)
+        doc_score *= weights["source_weights"]["gg"]
+        scores["doc"] = doc_score
+        # Check if 'doc' has the max score
+        if doc_score > max_score:
+            max_score = doc_score
+            max_score_repo = "doc"
+            final_score = doc_score
+        doc_results = (scores, max_score_repo, max_score, max_score, has_code)
+    if code_results and doc_results:
+        code_sorted_scores, code_max_repo, code_max_score, code_final_score, code_has_code = code_results
+        doc_scores, doc_max_repo, doc_max_score, doc_final_score, doc_has_code = doc_results
+        # merge scores
+        all_scores = dict(code_sorted_scores)
+        all_scores.update(doc_scores)
+        sorted_all_scores = sorted(all_scores.items(), key=lambda item: item[1], reverse=True)
+        # final max score and response repo
+        if doc_max_score > code_max_score:
+            final_max_score = doc_max_score
+            final_max_repo = doc_max_repo
+            final_score = doc_max_score
+        else:
+            final_max_score = code_max_score
+            final_max_repo = code_max_repo
+            final_score = code_final_score
+        return sorted_all_scores, final_max_repo, final_max_score, final_score, has_code or True
+    elif code_results:
+        return code_results
+    elif doc_results:
+        return doc_results
+    else:
+        return None, None, max_score, final_score, has_code
+def calculate_match_stats(values1, values2):
+    total = len(values1)
+    exact_matches = sum(v1 == v2 for v1, v2 in zip(values1, values2))
+    near_matches = sum(
+        abs(['easy', 'medium', 'hard'].index(v1) - ['easy', 'medium', 'hard'].index(v2)) == 1
+        for v1, v2 in zip(values1, values2)
+    )
+    mismatches = total - exact_matches - near_matches
+    return {
+        "Exact Match (%)": (exact_matches / total) * 100,
+        "Near Match (%)": (near_matches / total) * 100,
+        "Mismatch (%)": (mismatches / total) * 100,
+    }
+def create_df(cvss_data, epss_data, pentestasst_data):
+    all_keys = sorted(set(cvss_data.keys()).union(epss_data.keys(), pentestasst_data.keys()))
+    data = {
+        'CVSS': [cvss_data.get(key, np.nan) for key in all_keys],
+        'EPSS': [epss_data.get(key, np.nan) for key in all_keys],
+        'EEAS': [pentestasst_data.get(key, np.nan) for key in all_keys],
+    }
+    df = pd.DataFrame(data, index=all_keys)
+    return df
+def normalize_data(df):
+    if len(df) == 1:
+        # for the case of a single value, directly return 0.5 or keep the original value
+        df[['EEAS']] = 0.5  # or df[['EEAS']]
+    else:
+        scaler = StandardScaler()
+        minmax_scaler = MinMaxScaler()
+        df[['EEAS']] = minmax_scaler.fit_transform(scaler.fit_transform(df[['EEAS']]))
+    normalized_df = pd.DataFrame(df, index=df.index, columns=df.columns)
+    return normalized_df
+def bin_agreement_analysis(df):
+    pairwise_stats = {}
+    for (name1, col1), (name2, col2) in combinations(df.items(), 2):
+        # Calculate confusion matrix
+        cm = confusion_matrix(col1, col2, labels=['easy', 'medium', 'hard'])
+        kappa = cohen_kappa_score(col1, col2, labels=['easy', 'medium', 'hard'])
+        stats = calculate_match_stats(col1, col2)
+        pairwise_stats[f"{name1} vs {name2}"] = {
+            "Confusion Matrix": cm.tolist(),
+            "CohenS Kappa": kappa,
+            **stats,
+        }
+    # Display results
+    for pair, results in pairwise_stats.items():
+        print(f"\nPairwise comparison: {pair}")
+        print(f"Confusion Matrix:\n{np.array(results['Confusion Matrix'])}")
+        print(f"Cohen's Kappa: {results['CohenS Kappa']:.4f}")
+        print(f"Exact Match (%): {results['Exact Match (%)']:.2f}")
+        print(f"Near Match (%): {results['Near Match (%)']:.2f}")
+        print(f"Mismatch (%): {results['Mismatch (%)']:.2f}")
+    return df
+def num_agreement_analysis(df):
+    stats = {}
+    for col1, col2 in combinations(df.columns, 2):
+        mean_values = (df[col1] + df[col2]) / 2
+        diff_values = df[col1] - df[col2]
+        mean_diff = np.mean(diff_values)
+        std_diff = np.std(diff_values)
+        print(f"diff mean: {np.mean(diff_values)}")
+        print(f"diff std: {np.std(diff_values)}")
+        threshold = 1.96 * std_diff
+        diff_condition = abs(df[col1] - df[col2]) > threshold
+        rows_large_diff = df[diff_condition]
+        print(f"Rows where the difference between {col1} and {col2} is larger than {threshold}:")
+        print(rows_large_diff)
+        print(f"Total rows: {len(df)}, Rows with large difference: {len(rows_large_diff)}, percentage: {len(rows_large_diff)/len(df) * 100}%")
+        values1, values2 = df[col1], df[col2]
+        pearson_corr, _ = pearsonr(values1, values2)
+        spearman_corr, _ = spearmanr(values1, values2)
+        mae = np.mean(np.abs(values1 - values2))
+        rmse = np.sqrt(np.mean((values1 - values2) ** 2))
+        stats[f"{col1} vs {col2}"] = {
+            "Pearson Correlation": pearson_corr,
+            "Spearman Correlation": spearman_corr,
+            "Mean Absolute Error (MAE)": mae,
+            "Root Mean Squared Error (RMSE)": rmse,
+        }
+    for pair, metrics in stats.items():
+        print(f"\nAgreement Analysis for {pair}:")
+        for metric, value in metrics.items():
+            print(f"{metric}: {value:.4f}")
+    return df
+def visualize_num_results(df, output_dir="plots/"):
+    plt.rc('font', size=14)
+    # Pairwise scatter plots
+    pairplot = sns.pairplot(df, kind="reg", diag_kind="kde")
+    pairplot.savefig(os.path.join(output_dir, "pairwise_scatter_plots.pdf"))
+    plt.close()
+    # Heatmap for correlation
+    correlation_matrix = df.corr()
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
+    plt.title("Heatmap of Correlations")
+    plt.savefig(os.path.join(output_dir, "correlation_heatmap.pdf"))
+    plt.close()
+    # Bland-Altman plots
+    for col1, col2 in combinations(df.columns, 2):
+        mean_values = (df[col1] + df[col2]) / 2
+        diff_values = df[col1] - df[col2]
+        plt.figure(figsize=(8, 6))
+        plt.scatter(mean_values, diff_values, alpha=0.7)
+        plt.axhline(np.mean(diff_values), color='red', linestyle='--', label='Mean Difference')
+        plt.axhline(np.mean(diff_values) + 1.96 * np.std(diff_values), color='blue', linestyle='--', label='±1.96SD')
+        plt.axhline(np.mean(diff_values) - 1.96 * np.std(diff_values), color='blue', linestyle='--', label=None)
+        plt.title(f"Bland-Altman Plot: {col1} vs {col2}", weight='bold')
+        plt.xlabel('Mean of Two Measurements', weight='bold')
+        plt.ylabel('Difference', weight='bold')
+        plt.legend()
+        filename = f"bland_altman_{col1}_vs_{col2}.pdf"
+        plt.savefig(os.path.join(output_dir, filename))
+        plt.close()
+def visualize_bin_results(cvss_data, epss_data, pentestasst_data, output_dir):
+    # Confusion Matrix Heatmap
+    for (name1, data1), (name2, data2) in combinations([("CVSS", cvss_data), ("EPSS", epss_data), ("EEAS", pentestasst_data)], 2):
+        labels = ['easy', 'medium', 'hard']
+        confusion = confusion_matrix(list(data1.values()), list(data2.values()), labels=labels)
+        plt.figure(figsize=(6, 5))
+        sns.heatmap(confusion, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
+        plt.title(f"Confusion Matrix: {name1} vs {name2}")
+        plt.xlabel(f"{name2} Predictions")
+        plt.ylabel(f"{name1} Predictions")
+        plt.savefig(f"{output_dir}/confusion_matrix_{name1}_vs_{name2}.pdf")
+        plt.close()
+    # Bar Chart of Class Distributions
+    data = {
+        "Category": ['easy', 'medium', 'hard'],
+        "CVSS": [list(cvss_data.values()).count(c) for c in ['easy', 'medium', 'hard']],
+        "EPSS": [list(epss_data.values()).count(c) for c in ['easy', 'medium', 'hard']],
+        "EEAS": [list(pentestasst_data.values()).count(c) for c in ['easy', 'medium', 'hard']],
+    }
+    df = pd.DataFrame(data).set_index("Category")
+    df.plot(kind="bar", figsize=(8, 6))
+    plt.title("Class Distributions Across Dictionaries")
+    plt.ylabel("Frequency")
+    plt.xlabel("Category")
+    plt.xticks(rotation=0)
+    plt.savefig(f"{output_dir}/class_distributions.pdf")
+    plt.close()
+    # Pairwise Agreement Bar Chart
+    pairwise_agreements = []
+    for (name1, data1), (name2, data2) in combinations([("CVSS", cvss_data), ("EPSS", epss_data), ("EEAS", pentestasst_data)], 2):
+        total = len(data1)
+        exact_matches = sum(1 for k in data1 if data1[k] == data2[k])
+        near_matches = sum(1 for k in data1 if abs(['easy', 'medium', 'hard'].index(data1[k]) -
+                                                   ['easy', 'medium', 'hard'].index(data2[k])) == 1)
+        mismatches = total - exact_matches - near_matches
+        pairwise_agreements.append({
+            "Pair": f"{name1} vs {name2}",
+            "Exact Matches (%)": exact_matches / total * 100,
+            "Near Matches (%)": near_matches / total * 100,
+            "Mismatches (%)": mismatches / total * 100
+        })
+    pairwise_df = pd.DataFrame(pairwise_agreements).set_index("Pair")
+    pairwise_df.plot(kind="bar", stacked=True, figsize=(10, 6))
+    plt.title("Pairwise Agreement Percentages")
+    plt.ylabel("Percentage")
+    plt.xlabel("Pairwise Comparison")
+    plt.xticks(rotation=0)
+    plt.legend(loc="upper right")
+    plt.savefig(f"{output_dir}/pairwise_agreements.pdf")
+    plt.close()
+def cve_classifier(cve, output_dir="resources/", mode = "specific"):
+    trending_score_path = f"{output_dir}/{cve}/Trend_Score.json"
+    feature_path = f"{output_dir}/{cve}/features.json"
+    cvemap_path = f"{output_dir}/{cve}/info/cvemap.json"
+    result = None
+    with open(feature_path, 'r') as f:
+        result = json.load(f)
+    trending_score = 0
+    if result['code'].get('GitHub'):
+        try:
+            with open(trending_score_path, 'r') as f:
+                _trending_score = json.load(f)
+                # print(trending_score)
+                trending_score = min(_trending_score.get("trend_score", 0), 50)
+        except:
+            logging.warning(f"Trending score not found for {cve}")
+    else:
+        trending_score = 999
+    if mode == "specific":
+        cvemap_json = None
+        with open(cvemap_path, 'r') as f:
+            cvemap_json = json.load(f)[0]
+        cvss_score = cvemap_json.get('cvss_score', 0)
+        epss_score = cvemap_json['epss']['epss_score']
+        epss_percentile = cvemap_json['epss']['epss_percentile']
+    elif mode == "general":
+        cvss_score = 0
+        epss_score = 0
+        epss_percentile = 0
+    cvss_category = categorize_cvss(cvss_score)
+    epss_category = categorize_epss(epss_percentile)
+    scores, max_score_repo, max_score, final_score, has_code = calculate_score(result, trending_score)
+    exploitability = "hard"
+    if final_score is not None and final_score > 0:
+        if final_score > 50:
+            exploitability = "easy"
+        elif final_score > 35:
+            exploitability = "medium"
+        else:
+            exploitability = "hard"
+    with open(f"{output_dir}/{cve}/classification.json", 'w') as f:
+        classification = {
+            "cvss_category": cvss_category,
+            "epss_category": epss_category,
+            "exploitability": exploitability,
+            "final_score": final_score,
+            "max_score_repo": max_score_repo,
+            "max_score": max_score,
+            "scores": scores,
+            "has_code": has_code
+        }
+        json.dump(classification, f, indent=4)
+    return cvss_score, cvss_category, epss_percentile, epss_category, final_score, exploitability, has_code
+def cve_analysis(cve, output_dir="resources/"):
+    cve_dir = f"{output_dir}/{cve}"
+    info_dir = f"{output_dir}/{cve}/info"
+    searching_start_time = time.time()
+    compose(output_dir, cve)
+    if not os.path.exists(info_dir):
+        os.mkdir(info_dir)
+    cvemap_json = cvemap_search(cve, info_dir)
+    if cvemap_json is None:
+        return 0
+    cve_description = cvemap_json['cve_description']
+    logging.info(f"Analyzing {cve}")
+    doc_handler = DocHandler()
+    analysis_start_time = time.time()
+    result = doc_handler.vul_analysis(cve, output_dir, cve_description)
+    analysis_end_time = time.time()
+    searching_time = analysis_start_time - searching_start_time
+    analysis_time = analysis_end_time - analysis_start_time
+    logging.info(f"Analysis time: {analysis_time} seconds")
+    with open(f"{output_dir}/{cve}/features.json", 'w') as f:
+        json.dump(result, f, indent=4)
+    return searching_time, analysis_time
+def general_analysis(keyword, output_dir="resources/"):
+    searching_start_time = time.time()
+    compose(output_dir, keyword, loose_mode = True)
+    cve = keyword
+    cve_description = ""
+    logging.info(f"Analyzing {keyword}")
+    doc_handler = DocHandler()
+    analysis_start_time = time.time()
+    result = doc_handler.vul_analysis(cve, output_dir, cve_description)
+    analysis_end_time = time.time()
+    searching_time = analysis_start_time - searching_start_time
+    analysis_time = analysis_end_time - analysis_start_time
+    logging.info(f"Analysis time: {analysis_time} seconds")
+    with open(f"{output_dir}/features.json", 'w') as f:
+        json.dump(result, f, indent=4)
+    return searching_time, analysis_time
+def cve_analysis_from_epss_csv(csv_path):
+    df = pd.read_csv(csv_path)
+    # print(df.cve)
+    with tqdm(total=len(df.index), desc=f'Analyzing CVEs') as pbar:
+        for index, row in df.iterrows():
+            cve = row['cve']
+            if int(cve.split('-')[1]) < 2017 or int(cve.split('-')[1]) > 2022:
+                logger.info(f"Skipping {cve}")
+                pbar.update()
+                continue
+            try:
+                cve_analysis(cve)
+                time.sleep(1)
+                pbar.update()
+                logging.info(f"Finished {index}: {cve}")
+            except Exception as e:
+                logging.error(f"Error in {cve}: {e}")
+                pbar.update()
+                continue
+def product_to_cve(product, output_dir):
+    """
+    Search for CVEs related to a product using cvemap.
+    Security: Uses sanitized product name and subprocess without shell=True (CWE-78 fix).
+    """
+    cve_lst = []
+    product_dir_name = product.lower().replace(" ", "_").replace("/", "_").replace(":", "_").replace("\\", "_").replace("(", "_").replace(")", "_").replace('"', "").replace("'", "").replace("\n", "").replace("&", "")
+    if os.path.exists(f"{output_dir}/{product_dir_name}"):
+        with open(f"{output_dir}/{product_dir_name}/cve_lst.json") as f:
+            cve_lst = json.load(f)
+        return cve_lst
+    os.makedirs(f"{output_dir}/{product_dir_name}", exist_ok=True)
+    cleaned_product = product_keyword_gen_openai(product, 3)
+    # print(cleaned_product)
+    cleaned_product.append(product_dir_name)
+    for p in cleaned_product:
+        # Security: Sanitize product name to prevent command injection
+        sanitized_product = _sanitize_product_name(p)
+        if not sanitized_product:
+            logging.warning(f"Skipping empty product name after sanitization: {p}")
+            continue
+        p_dir_name = p.lower().replace(" ", "_").replace("/", "_").replace(":", "_").replace("\\", "_").replace("(", "_").replace(")", "_").replace('"', "").replace("'", "").replace("\n", "").replace("&", "")
+        cvemap_json_path = f"{output_dir}/{product_dir_name}/cvemap_{p_dir_name}.json"
+        try:
+            # SECURE: Use subprocess without shell=True
+            # Run cvemap with -p flag and capture output directly
+            result = subprocess.run(
+                ["cvemap", "-p", sanitized_product, "-json"],
+                capture_output=True,
+                text=True,
+                timeout=50,
+                check=False  # Handle errors ourselves
+            )
+            if result.returncode != 0:
+                logging.warning(f"cvemap failed for product {sanitized_product}: {result.stderr}")
+                continue
+            # Write output to file (instead of shell redirection)
+            with open(cvemap_json_path, 'w') as f:
+                f.write(result.stdout)
+            json_data = json.loads(result.stdout)
+            for cve in json_data:
+                cve_lst.append(cve['cve_id'])
+        except subprocess.TimeoutExpired:
+            logging.error(f"cvemap search timed out for product {sanitized_product}")
+            continue
+        except json.JSONDecodeError as e:
+            logging.warning(f"Invalid JSON from cvemap for product {sanitized_product}: {e}")
+            continue
+        except FileNotFoundError:
+            logging.error("cvemap command not found. Please install cvemap.")
+            break
+        with open(f"{output_dir}/{product_dir_name}/cve_lst.json", 'w') as f:
+            json.dump(cve_lst, f, indent=4)
+    return cve_lst
+def product_keyword_gen_openai(product, num):
+    prompt = (
+        "You're an excellent system administrator. You will be given a product name, which may not be in a standard format. "
+        "Your task is to generate alternative product names to broaden the search scope.\n"
+        f"The given product name is {product}, please generate at most {num} alternative product names in the following format.\n"
+        "[Alternative product name 1, Alternative product name 2, Alternative product name 3, ...]"
+    )
+    try:
+        model_name = planning_config['model']
+        llm = get_model(model_name)
+        from langchain_core.messages import HumanMessage
+        response = llm.invoke([HumanMessage(content=prompt)])
+        product_lst_str = response.content.strip() if hasattr(response, 'content') else str(response)
+    except Exception as e:
+        import openai
+        client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": prompt},
+            ],
+        )
+        product_lst_str = str(response.choices[0].message.content)
+    product_lst = product_lst_str.strip("[]").replace("'", "").split(", ")
+    # print(product_lst)
+    product_lst = [product.lower().replace(" ", "_").replace("/", "_").replace(":", "_").replace("\\", "_").replace("(", "_").replace(")", "_").replace('"', "").replace("'", "").replace("\n", "").replace("&", "") for product in product_lst]
+    return product_lst
+def product_keyword_gen_huggingface(product, num):
+    messages = [
+        {"role": "system", "content": "You're an excellent system administrator. You will be given a product name, which may not be in a standard format. Your task is to generate alternative product names to broaden the search scope."},
+        {"role": "user", "content": f"The given product name is {product}, please generate {num} alternative product names in the following format: [Alternative product name 1, Alternative product name 2, Alternative product name 3, ...]"},
+    ]
+    # Run with Local LLM
+    product_lst = chat_completion_huggingface(
+        model_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
+        messages=messages,
+        max_tokens=200,
+        temperature=0.0
+    )
+    return product_lst
+def chat_completion_huggingface(
+    model_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
+    messages=None,
+    max_tokens=100,
+    temperature=0.0,
+    top_p=0.0,
+    revision="5206a32e0bd3067aef1ce90f5528ade7d866253f"  # Security: Pinned commit hash (CWE-494)
+):
+    """
+    Generates a chat completion using a specified Hugging Face model.
+    Args:
+        model_name (str): The Hugging Face model name (default is Mistral-7B-Instruct).
+        messages (list): A list of chat messages in OpenAI format [{"role": "user", "content": "Hello"}].
+        max_tokens (int): Maximum number of tokens to generate.
+        temperature (float): Sampling temperature (higher = more creative, lower = more deterministic).
+        top_p (float): Nucleus sampling parameter (controls randomness).
+        revision (str): Model revision to use (commit hash or branch name for reproducibility).
+    Returns:
+        str: The model's response.
+    Security:
+        Using 'revision' parameter pins the model to a specific version,
+        preventing supply chain attacks where a malicious model update
+        could be downloaded automatically.
+    """
+    # Load the model and tokenizer dynamically
+    # Security: Pin model revision to prevent supply chain attacks (CWE-494)
+    # Bandit requires string literals for revision detection
+    pinned_revision = revision if revision != "main" else "5206a32e0bd3067aef1ce90f5528ade7d866253f"
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name,
+        revision=pinned_revision,  # nosec B615 - revision is pinned via function default
+        trust_remote_code=False  # Security: Never execute remote code
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        device_map="auto",  # Uses GPU if available
+        revision=pinned_revision,  # nosec B615 - revision is pinned via function default
+        trust_remote_code=False  # Security: Never execute remote code
+    )
+    # Convert messages into a formatted prompt
+    prompt = ""
+    for message in messages:
+        if message["role"] == "system":
+            prompt += f"<s>[SYSTEM]: {message['content']}\n"
+        elif message["role"] == "user":
+            prompt += f"[USER]: {message['content']}\n"
+        elif message["role"] == "assistant":
+            prompt += f"[ASSISTANT]: {message['content']}\n"
+    prompt += "[ASSISTANT]:"
+    # Tokenize input
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    # Generate response
+    output = model.generate(
+        **inputs,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        do_sample=False
+    )
+    # Decode and return response
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return response.split("[ASSISTANT]:")[-1].strip()
+def analyze_cve_lst(cve_lst, output_dir, app_name):
+    cvss_results = {}
+    cvss_scores = {}
+    epss_results = {}
+    epss_scores = {}
+    pentestasst_results = {}
+    pentestasst_scores = {}
+    analysis_time_dict = {}
+    has_code_lst = []
+    total_searching_time = 0
+    total_analysis_time = 0
+    for cve in cve_lst:
+        logging.info(f"Analyzing {cve}")
+        if not os.path.exists(f"{output_dir}/{cve}/features.json"):
+            searching_time, analysis_time = cve_analysis(cve, output_dir)
+            if analysis_time == 0:
+                continue
+            total_searching_time += searching_time
+            total_analysis_time += analysis_time
+            analysis_time_dict[cve] = analysis_time
+        cvss_score, cvss_category, epss_score, epss_category, final_score, exploitability, has_code = cve_classifier(cve, output_dir, mode = "specific")
+        cvss_results[cve] = cvss_category
+        cvss_scores[cve] = cvss_score
+        epss_results[cve] = epss_category
+        epss_scores[cve] = epss_score
+        pentestasst_results[cve] = exploitability
+        pentestasst_scores[cve] = final_score
+        if has_code:
+            has_code_lst.append(cve)
+    with open(f"{output_dir}/analysis_time.json", 'w') as f:
+        json.dump(analysis_time_dict, f, indent=4)
+    # bin_df = create_df(cvss_results, epss_results, pentestasst_results)
+    num_df = create_df(cvss_scores, epss_scores, pentestasst_scores)
+    normalized_num_df = normalize_data(num_df)
+    normalized_has_code_num_df = normalized_num_df.loc[has_code_lst]
+    num_df.to_csv(f'{output_dir}/score_results.csv')
+    normalized_num_df.to_csv(f'{output_dir}/normalized_score_results.csv')
+    normalized_has_code_num_df.to_csv(f'{output_dir}/normalized_has_code_score_results.csv')
+    if app_name:
+        logging.info(f"Searching for general exp info for {app_name}")
+        print("Start searching general exp info...")
+        app_exp_path = app_name + "_exp"
+        if not os.path.exists(f"{output_dir}/{app_exp_path}/features.json"):
+            keyword = app_name + " exploit"
+            searching_time, analysis_time = general_analysis(keyword, os.path.join(output_dir, app_exp_path))
+            total_searching_time += searching_time
+            total_analysis_time += analysis_time
+        cve_classifier(app_exp_path, output_dir, mode = "general")
+    avg_searching_time = total_searching_time / ((len(cve_lst) +1) if app_name else (len(cve_lst)))
+    avg_analysis_time = total_analysis_time / ((len(cve_lst) +1) if app_name else (len(cve_lst)))
+    logging.info(f"Total searching time: {total_searching_time} seconds, average time: {avg_searching_time} seconds")
+    logging.info(f"Total analysis time: {total_analysis_time} seconds, average time: {avg_analysis_time} seconds")
+    return total_searching_time, total_analysis_time
+def get_exp_info(cve_lst = [], output_dir = "", app_name = ""):
+    logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s',
+                        datefmt='%Y-%m-%d %H:%M:%S',
+                        filename='cve_info.log',
+                        level=logging.INFO)
+    track_tokens = True
+    token_counter = None
+    model_name = planning_config['model']
+    if track_tokens:
+        try:
+            token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model(model_name_for_token).encode)
+            Settings.callback_manager = CallbackManager([token_counter])
+        except Exception as e:
+            logger.warning(f"Tokenizer for model {model_name_for_token} not found, token counting disabled: {e}")
+            Settings.callback_manager = CallbackManager([])
+    ### Step 1: Select the LLM
+    ## Initialize LLM from config via model manager (provider-agnostic)
+    try:
+        model_name = planning_config['model']
+    except Exception:
+        model_name = "openai"
+    llm = get_model(model_name)
+    if llm is None:
+        # Keep previous behavior when no API key/config: abort gracefully
+        print("LLM_API_KEY not set")
+        return
+    try:
+        Settings.llm = LangChainLLM(llm=llm)
+    except Exception:
+        # Fallback to OpenAI config if available to preserve behavior
+        cve_config = config['cve']
+        print(f"Model: {cve_config['model']}")
+        Settings.llm = OpenAI(temperature=cve_config['temperature'], model=cve_config['model'])
+    ### Step 2: Input the CVEs to analyze
+    if not cve_lst:
+        print("CVE to be searched not set! ")
+        return
+    ### Step 3: Analyze the CVEs
+    if not output_dir:
+        print("Directory to store not set! will use default settings.")
+        output_dir = "resources_cve_lst_try"
+    os.makedirs(output_dir, exist_ok=True)
+    ## Scenario 2: Given a list of CVEs, analyze the CVEs
+    total_searching_time, total_analysis_time = analyze_cve_lst(cve_lst, output_dir, app_name)
+    if track_tokens:
+        print(
+            "Embedding Tokens: ",
+            token_counter.total_embedding_token_count,
+            "\n",
+            "LLM Prompt Tokens: ",
+            token_counter.prompt_llm_token_count,
+            "\n",
+            "LLM Completion Tokens: ",
+            token_counter.completion_llm_token_count,
+            "\n",
+            "Total LLM Token Count: ",
+            token_counter.total_llm_token_count,
+            "\n",
+            "Average LLM Token Count: ",
+            token_counter.total_llm_token_count / len(cve_lst),
+            "\n",
+        )
+        logging.info(
+            "Embedding Tokens: "
+            + str(token_counter.total_embedding_token_count)
+            + "\n"
+            + "LLM Prompt Tokens: "
+            + str(token_counter.prompt_llm_token_count)
+            + "\n"
+            + "LLM Completion Tokens: "
+            + str(token_counter.completion_llm_token_count)
+            + "\n"
+            + "Total LLM Token Count: "
+            + str(token_counter.total_llm_token_count)
+            + "\n"
+            + "Average LLM Token Count: "
+            + str(token_counter.total_llm_token_count / len(cve_lst))
+            + "\n"
+        )
+        token_counter.reset_counts()
+    return total_searching_time, total_analysis_time
+def main():
+    logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s',
+                        datefmt='%Y-%m-%d %H:%M:%S',
+                        filename='cve_info.log',
+                        level=logging.INFO)
+    track_tokens = False
+    token_counter = None
+    if track_tokens:
+        token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model("gpt-4o-mini").encode)
+        Settings.callback_manager = CallbackManager([token_counter])
+    ### Step 1: Select the LLM
+    ## OpenAI API
+    api_key = os.environ.get("OPENAI_API_KEY")
+    if api_key is None or api_key == "":
+        print("OPENAI_API_KEY not set")
+        return
+    elif "f4Ph3uIoqGLC9" in api_key:
+        print("Model: o3-mini")
+        Settings.llm = OpenAI(temperature=0, model="o3-mini")
+    elif "klO8n1OFxLWBoPIeDycM" in api_key:
+        print("Model: gpt-4o-mini")
+        Settings.llm = OpenAI(temperature=0, model="gpt-4o-mini")
+    else:
+        print("Model: gpt-4o-mini (default)")
+        Settings.llm = OpenAI(temperature=0, model="gpt-4o-mini")
+    # Settings.llm = OpenAI(temperature=0, model="o3-mini")
+    ## Option 2: Read CVE list from file
+    cve_lst = []
+    with open("../../target_lst/vulhub_test/xstream.txt", 'r') as f:
+        cve_lst = f.read().splitlines()
+    ## Option 3: Hard-coded CVE list
+    # cve_lst = ['CVE-XXX-XXXX', 'CVE-XXX-XXXX', 'CVE-XXX-XXXX', 'CVE-XXX-XXXX', 'CVE-XXX-XXXX']
+    ### Step 3: Analyze the CVEs
+    output_dir = "resources_test/xstream"
+    os.makedirs(output_dir, exist_ok=True)
+    ## Scenario 1: Given a list of CVEs, analyze the CVEs
+    # analyze_cve_lst(cve_lst, output_dir)
+    analyze_cve_lst(cve_lst, output_dir, "xstream")  # assume app's name is xstream
+    if track_tokens:
+        print(
+            "Embedding Tokens: ",
+            token_counter.total_embedding_token_count,
+            "\n",
+            "LLM Prompt Tokens: ",
+            token_counter.prompt_llm_token_count,
+            "\n",
+            "LLM Completion Tokens: ",
+            token_counter.completion_llm_token_count,
+            "\n",
+            "Total LLM Token Count: ",
+            token_counter.total_llm_token_count,
+            "\n",
+            "Average LLM Token Count: ",
+            token_counter.total_llm_token_count / len(cve_lst),
+            "\n",
+        )
+        logging.info(
+            "Embedding Tokens: "
+            + str(token_counter.total_embedding_token_count)
+            + "\n"
+            + "LLM Prompt Tokens: "
+            + str(token_counter.prompt_llm_token_count)
+            + "\n"
+            + "LLM Completion Tokens: "
+            + str(token_counter.completion_llm_token_count)
+            + "\n"
+            + "Total LLM Token Count: "
+            + str(token_counter.total_llm_token_count)
+            + "\n"
+            + "Average LLM Token Count: "
+            + str(token_counter.total_llm_token_count / len(cve_lst))
+            + "\n"
+        )
+        token_counter.reset_counts()
+    # epss_csv_path = 'epss_scores-2024-08-06.csv'
+    # cve_analysis_from_epss_csv(epss_csv_path)
+if __name__ == "__main__":
+    main()