PyPI - pyexploitdb - Versions diffs - 0.3.10__py3-none-any.whl - Mend

pyexploitdb 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

pyexploitdb/__init__.py +447 -0
pyexploitdb/cveToEdbid.json +78181 -0
pyexploitdb/edbidToCve.json +102192 -0
pyexploitdb-0.3.10.dist-info/METADATA +110 -0
pyexploitdb-0.3.10.dist-info/RECORD +8 -0
pyexploitdb-0.3.10.dist-info/WHEEL +5 -0
pyexploitdb-0.3.10.dist-info/licenses/LICENSE +674 -0
pyexploitdb-0.3.10.dist-info/top_level.txt +1 -0

pyexploitdb/__init__.py ADDED Viewed

@@ -0,0 +1,447 @@
+from __future__ import annotations
+import csv
+import json
+import os
+import re
+import shutil
+import time
+import warnings
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Set
+import git
+import requests
+__all__ = ["PyExploitDb", "PyExploitDbError", "InvalidDataError"]
+CveDetails = Dict[str, str]
+ExploitMap = Dict[str, List[str]]
+_CVE_PATTERN = re.compile(r"CVE-\d{4}-\d{4,7}", re.IGNORECASE)
+_CVE_PATTERN_BYTES = re.compile(rb"CVE-\d{4}-\d{4,7}", re.IGNORECASE)
+class PyExploitDbError(Exception):
+    """Base exception for pyexploitdb related failures."""
+class InvalidDataError(PyExploitDbError):
+    """Raised when on-disk data cannot be parsed or validated."""
+class PyExploitDb:
+    """High level interface for working with exploit-db offline data."""
+    EXPLOIT_DB_REPO = "https://gitlab.com/exploit-database/exploitdb.git"
+    FILES_EXPLOITS = "files_exploits.csv"
+    USER_AGENT = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) Chrome/39.0 Safari/537.36"}
+    def __init__(
+        self,
+        *,
+        exploit_db_path: Optional[Path] = None,
+        request_cool_off_time: float = 1.0,
+        max_retries: int = 3,
+        session: Optional[requests.Session] = None,
+    ) -> None:
+        self.current_path = Path(__file__).resolve().parent
+        self.exploit_db_path = exploit_db_path or self.current_path / "exploit-database"
+        self.edbid_to_cve_file = self.current_path / "edbidToCve.json"
+        self.cve_to_edbid_file = self.current_path / "cveToEdbid.json"
+        self.request_cool_off_time = max(0.0, float(request_cool_off_time))
+        self.max_retries = max(1, int(max_retries))
+        self.cve_to_exploit_map: ExploitMap = {}
+        self._exploit_cache: Dict[str, Sequence[str]] = {}
+        self._session = session or requests.Session()
+        self._owns_session = session is None
+        self._session.headers.setdefault("User-Agent", self.USER_AGENT["User-Agent"])
+        self.debug = False
+    def log_debug(self, message: str) -> None:
+        if self.debug:
+            print(message)
+    def clone_or_update_repo(self) -> None:
+        if not self.exploit_db_path.exists():
+            self._clone_repo()
+            return
+        try:
+            repo = git.Repo(self.exploit_db_path)
+        except (git.exc.InvalidGitRepositoryError, git.exc.NoSuchPathError):
+            self.log_debug("Existing exploit-database directory invalid, re-cloning...")
+            self.delete_and_reclone_repo()
+            return
+        try:
+            self.log_debug("Pulling exploit-database updates...")
+            repo.remotes.origin.pull("main")
+        except git.exc.GitCommandError as exc:
+            self.log_debug(f"Pull failed ({exc}); attempting to re-clone repository.")
+            self.delete_and_reclone_repo()
+    def pull_latest_updates(self) -> None:
+        self.clone_or_update_repo()
+    def delete_and_reclone_repo(self) -> None:
+        if self.exploit_db_path.exists():
+            shutil.rmtree(self.exploit_db_path, ignore_errors=True)
+        self._clone_repo()
+        self._exploit_cache.clear()
+    def open_file(self, exploit_map: str = "cveToEdbid.json", encoding: str = "utf-8") -> None:
+        self.clone_or_update_repo()
+        data = self._load_json_file(self.current_path / exploit_map, encoding)
+        self._set_cve_map(data)
+    def get_cve_details(self, cve_search: str) -> List[CveDetails]:
+        if not self.cve_to_exploit_map:
+            raise PyExploitDbError("Exploit data is not loaded; call open_file() or update_db() first.")
+        if not cve_search:
+            return []
+        edb_ids = self.cve_to_exploit_map.get(cve_search.upper(), [])
+        if not edb_ids:
+            return []
+        self._ensure_exploit_cache()
+        results: List[CveDetails] = []
+        for edb_id in edb_ids:
+            row = self._exploit_cache.get(edb_id)
+            if row:
+                results.append(self.extract_cve_details(row))
+            else:
+                self.log_debug(f"EDB {edb_id} referenced by CVE {cve_search} not present in CSV data.")
+        return results
+    def extract_cve_details(self, row: Sequence[str]) -> CveDetails:
+        if len(row) < 16:
+            raise InvalidDataError("files_exploits.csv row is malformed; expected at least 16 columns.")
+        details = {
+            "id": row[0],
+            "file": row[1],
+            "description": row[2],
+            "date": row[3],
+            "author": row[4],
+            "type": row[5],
+            "platform": row[6],
+            "port": row[7],
+            "date_updated": row[9],
+            "verified": row[10],
+            "codes": row[11],
+            "tags": row[12],
+            "aliases": row[13],
+            "app_url": row[14],
+            "src_url": row[15],
+        }
+        self.log_debug(f"CVE details extracted: {details}")
+        return details
+    def search_cve(self, cve_search: str) -> List[CveDetails]:
+        return self.get_cve_details(cve_search)
+    def update_db(self, *, workers: Optional[int] = None) -> None:
+        self.clone_or_update_repo()
+        data = self.load_existing_data(self.edbid_to_cve_file)
+        exploits = self.load_exploit_csv()
+        missing_ids: List[str] = []
+        for row in exploits:
+            if not row or not row[0]:
+                continue
+            edb_id = row[0]
+            if edb_id in data:
+                continue
+            cves = self._extract_cves_from_row(row)
+            if cves:
+                data[edb_id] = sorted(cves)
+            else:
+                missing_ids.append(edb_id)
+        if missing_ids:
+            unique_ids = list(dict.fromkeys(missing_ids))
+            fetched = self._fetch_missing_cves(unique_ids, workers=workers)
+            data.update(fetched)
+        self.write_json(self.edbid_to_cve_file, data)
+        self.create_cve_to_exploit_map(data)
+        self.write_json(self.cve_to_edbid_file, self.cve_to_exploit_map)
+    def load_existing_data(self, file_path: Path | str) -> ExploitMap:
+        path = Path(file_path)
+        if not path.exists():
+            return {}
+        try:
+            with path.open(encoding="utf-8") as file_data:
+                raw = json.load(file_data)
+        except json.JSONDecodeError as exc:
+            raise InvalidDataError(f"Corrupt JSON detected at {path}; delete file and retry.") from exc
+        except OSError as exc:
+            raise PyExploitDbError(f"Unable to read {path}") from exc
+        return self._normalise_edbid_mapping(raw)
+    def load_exploit_csv(self) -> List[List[str]]:
+        path = self.exploit_db_path / self.FILES_EXPLOITS
+        try:
+            with path.open(encoding="utf-8", newline="") as file:
+                reader = csv.reader(file)
+                next(reader, None)  # skip header
+                rows = [row for row in reader if row]
+        except FileNotFoundError as exc:
+            raise PyExploitDbError(f"{self.FILES_EXPLOITS} not found at {path}.") from exc
+        except OSError as exc:
+            raise PyExploitDbError(f"Unable to read {path}") from exc
+        self._exploit_cache = {row[0]: row for row in rows if row and row[0]}
+        return rows
+    def fetch_cves_for_exploit(self, edb_id: str, session: Optional[requests.Session] = None) -> Set[str]:
+        request_uri = f"https://www.exploit-db.com/exploits/{edb_id}"
+        active_session = session or self._session
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                response = active_session.get(request_uri, timeout=10)
+            except requests.RequestException as exc:
+                self.log_debug(f"Request error for {request_uri} (attempt {attempt}/{self.max_retries}): {exc}")
+            else:
+                if response.status_code == 404:
+                    self.log_debug(f"Exploit {edb_id} returned 404; no CVEs available.")
+                    return set()
+                if response.ok:
+                    return self.parse_cves_from_content(response.content)
+                self.log_debug(
+                    f"Unexpected status {response.status_code} for {request_uri} "
+                    f"(attempt {attempt}/{self.max_retries})."
+                )
+            if attempt < self.max_retries:
+                time.sleep(self.request_cool_off_time)
+        raise PyExploitDbError(f"Failed to fetch CVEs for exploit {edb_id} after {self.max_retries} attempts.")
+    def parse_cves_from_content(self, content: bytes) -> Set[str]:
+        return {match.group(0).decode("ascii").upper() for match in _CVE_PATTERN_BYTES.finditer(content)}
+    def write_json(self, file_path: Path | str, data: ExploitMap) -> None:
+        path = Path(file_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            with path.open("w", encoding="utf-8") as file_data:
+                json.dump(data, file_data, indent=2, sort_keys=True)
+        except OSError as exc:
+            raise PyExploitDbError(f"Unable to write {path}") from exc
+    def create_cve_to_exploit_map(self, data: Dict[str, Iterable[str]]) -> None:
+        mapping: ExploitMap = {}
+        for edb_id, cves in data.items():
+            if not edb_id:
+                continue
+            for cve in cves:
+                cve_key = str(cve).upper()
+                if not cve_key:
+                    continue
+                mapping.setdefault(cve_key, []).append(str(edb_id))
+        for ids in mapping.values():
+            ids.sort()
+        self.cve_to_exploit_map = mapping
+        self._exploit_cache.clear()
+    def _fetch_missing_cves(self, missing_ids: Sequence[str], *, workers: Optional[int]) -> ExploitMap:
+        max_workers = workers or min(8, max(1, (os.cpu_count() or 4) // 2))
+        results: ExploitMap = {}
+        def task(edb_id: str) -> tuple[str, List[str]]:
+            with requests.Session() as session:
+                session.headers["User-Agent"] = self.USER_AGENT["User-Agent"]
+                try:
+                    cves = sorted(self.fetch_cves_for_exploit(edb_id, session=session))
+                except PyExploitDbError as exc:
+                    self.log_debug(f"Failed to hydrate CVEs for {edb_id}: {exc}")
+                    return edb_id, []
+                return edb_id, cves
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            future_map = {executor.submit(task, edb_id): edb_id for edb_id in missing_ids}
+            for future in as_completed(future_map):
+                edb_id = future_map[future]
+                try:
+                    resolved_id, cves = future.result()
+                except Exception as exc:  # noqa: BLE001
+                    self.log_debug(f"Unhandled exception while hydrating CVEs for {edb_id}: {exc}")
+                    results[edb_id] = []
+                    continue
+                results[resolved_id] = cves
+        return results
+    def _extract_cves_from_row(self, row: Sequence[str]) -> Set[str]:
+        cve_fields = [
+            row[11] if len(row) > 11 else "",
+            row[12] if len(row) > 12 else "",
+            row[13] if len(row) > 13 else "",
+            row[2] if len(row) > 2 else "",
+        ]
+        cves: Set[str] = set()
+        for field in cve_fields:
+            if not field:
+                continue
+            cves.update(match.group(0).upper() for match in _CVE_PATTERN.finditer(field))
+        return cves
+    def close(self) -> None:
+        if self._owns_session:
+            self._session.close()
+    def __enter__(self) -> "PyExploitDb":
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        self.close()
+    def _clone_repo(self) -> None:
+        self.log_debug("Cloning exploit-database repository...")
+        git.Repo.clone_from(self.EXPLOIT_DB_REPO, str(self.exploit_db_path))
+    def _ensure_exploit_cache(self) -> None:
+        if not self._exploit_cache:
+            self.load_exploit_csv()
+    def _set_cve_map(self, raw: object) -> None:
+        self.cve_to_exploit_map = self._normalise_cve_mapping(raw)
+    def _load_json_file(self, path: Path, encoding: str) -> object:
+        try:
+            with path.open(encoding=encoding) as file_data:
+                return json.load(file_data)
+        except FileNotFoundError as exc:
+            raise PyExploitDbError(f"Required JSON file not found: {path}") from exc
+        except json.JSONDecodeError as exc:
+            raise InvalidDataError(f"Corrupt JSON detected in {path}") from exc
+        except OSError as exc:
+            raise PyExploitDbError(f"Unable to read {path}") from exc
+    def _normalise_edbid_mapping(self, raw: object) -> ExploitMap:
+        if not isinstance(raw, dict):
+            raise InvalidDataError("Expected mapping of exploit IDs to CVE lists.")
+        result: ExploitMap = {}
+        for edb_id, cves in raw.items():
+            if not isinstance(edb_id, str):
+                self.log_debug(f"Skipping non-string exploit ID key {edb_id!r}")
+                continue
+            if not isinstance(cves, (list, tuple, set)):
+                self.log_debug(f"Skipping malformed CVE container for exploit {edb_id}")
+                continue
+            normalised = sorted({str(cve).upper() for cve in cves if str(cve).strip()})
+            if normalised:
+                result[edb_id] = normalised
+        return result
+    def _normalise_cve_mapping(self, raw: object) -> ExploitMap:
+        if not isinstance(raw, dict):
+            raise InvalidDataError("Expected mapping of CVE IDs to exploit lists.")
+        mapping: ExploitMap = {}
+        for cve, exploits in raw.items():
+            if not isinstance(cve, str):
+                continue
+            if not isinstance(exploits, (list, tuple, set)):
+                continue
+            cleaned = [str(edb).strip() for edb in exploits if str(edb).strip()]
+            if cleaned:
+                mapping[cve.upper()] = sorted(cleaned)
+        return mapping
+    def __del__(self) -> None:
+        self.close()
+    def logDebug(self, message: str) -> None:
+        warnings.warn("logDebug is deprecated; use log_debug instead.", DeprecationWarning, stacklevel=2)
+        self.log_debug(message)
+    def cloneOrUpdateRepo(self) -> None:
+        warnings.warn("cloneOrUpdateRepo is deprecated; use clone_or_update_repo instead.", DeprecationWarning, stacklevel=2)
+        self.clone_or_update_repo()
+    def pullLatestUpdates(self) -> None:
+        warnings.warn("pullLatestUpdates is deprecated; use pull_latest_updates instead.", DeprecationWarning, stacklevel=2)
+        self.pull_latest_updates()
+    def deleteAndRecloneRepo(self) -> None:
+        warnings.warn("deleteAndRecloneRepo is deprecated; use delete_and_reclone_repo instead.", DeprecationWarning, stacklevel=2)
+        self.delete_and_reclone_repo()
+    def openFile(self, exploitMap: str = "cveToEdbid.json", encoding: str = "utf-8") -> None:
+        warnings.warn("openFile is deprecated; use open_file instead.", DeprecationWarning, stacklevel=2)
+        self.open_file(exploit_map=exploitMap, encoding=encoding)
+    def getCveDetails(self, cveSearch: str) -> List[CveDetails]:
+        warnings.warn("getCveDetails is deprecated; use get_cve_details instead.", DeprecationWarning, stacklevel=2)
+        return self.get_cve_details(cveSearch)
+    def extractCveDetails(self, row: Sequence[str]) -> CveDetails:
+        warnings.warn("extractCveDetails is deprecated; use extract_cve_details instead.", DeprecationWarning, stacklevel=2)
+        return self.extract_cve_details(row)
+    def searchCve(self, cveSearch: str) -> List[CveDetails]:
+        warnings.warn("searchCve is deprecated; use search_cve instead.", DeprecationWarning, stacklevel=2)
+        return self.search_cve(cveSearch)
+    def updateDb(self, *, workers: Optional[int] = None) -> None:
+        warnings.warn("updateDb is deprecated; use update_db instead.", DeprecationWarning, stacklevel=2)
+        self.update_db(workers=workers)
+    def loadExistingData(self, file_path: Path | str) -> ExploitMap:
+        warnings.warn("loadExistingData is deprecated; use load_existing_data instead.", DeprecationWarning, stacklevel=2)
+        return self.load_existing_data(file_path)
+    def loadExploitCsv(self) -> List[List[str]]:
+        warnings.warn("loadExploitCsv is deprecated; use load_exploit_csv instead.", DeprecationWarning, stacklevel=2)
+        return self.load_exploit_csv()
+    def fetchCvesForExploit(self, edb_id: str) -> Set[str]:
+        warnings.warn(
+            "fetchCvesForExploit is deprecated; use fetch_cves_for_exploit instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.fetch_cves_for_exploit(edb_id)
+    def parseCvesFromContent(self, content: bytes) -> Set[str]:
+        warnings.warn(
+            "parseCvesFromContent is deprecated; use parse_cves_from_content instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.parse_cves_from_content(content)
+    def writeJson(self, file_path: Path | str, data: ExploitMap) -> None:
+        warnings.warn("writeJson is deprecated; use write_json instead.", DeprecationWarning, stacklevel=2)
+        self.write_json(file_path, data)
+    def createCveToExploitMap(self, data: Dict[str, Iterable[str]]) -> None:
+        warnings.warn(
+            "createCveToExploitMap is deprecated; use create_cve_to_exploit_map instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        self.create_cve_to_exploit_map(data)
+def test() -> None:
+    exploit_db = PyExploitDb()
+    exploit_db.debug = False
+    exploit_db.open_file()
+    results = exploit_db.search_cve("CVE-2018-14592")
+    print("PASS" if results else "FAIL")
+if __name__ == "__main__":
+    test()