pyexploitdb 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,447 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import json
5
+ import os
6
+ import re
7
+ import shutil
8
+ import time
9
+ import warnings
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
11
+ from pathlib import Path
12
+ from typing import Dict, Iterable, List, Optional, Sequence, Set
13
+
14
+ import git
15
+ import requests
16
+
17
+ __all__ = ["PyExploitDb", "PyExploitDbError", "InvalidDataError"]
18
+
19
+ CveDetails = Dict[str, str]
20
+ ExploitMap = Dict[str, List[str]]
21
+ _CVE_PATTERN = re.compile(r"CVE-\d{4}-\d{4,7}", re.IGNORECASE)
22
+ _CVE_PATTERN_BYTES = re.compile(rb"CVE-\d{4}-\d{4,7}", re.IGNORECASE)
23
+
24
+
25
+ class PyExploitDbError(Exception):
26
+ """Base exception for pyexploitdb related failures."""
27
+
28
+
29
+ class InvalidDataError(PyExploitDbError):
30
+ """Raised when on-disk data cannot be parsed or validated."""
31
+
32
+
33
+ class PyExploitDb:
34
+ """High level interface for working with exploit-db offline data."""
35
+
36
+ EXPLOIT_DB_REPO = "https://gitlab.com/exploit-database/exploitdb.git"
37
+ FILES_EXPLOITS = "files_exploits.csv"
38
+ USER_AGENT = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) Chrome/39.0 Safari/537.36"}
39
+
40
+ def __init__(
41
+ self,
42
+ *,
43
+ exploit_db_path: Optional[Path] = None,
44
+ request_cool_off_time: float = 1.0,
45
+ max_retries: int = 3,
46
+ session: Optional[requests.Session] = None,
47
+ ) -> None:
48
+ self.current_path = Path(__file__).resolve().parent
49
+ self.exploit_db_path = exploit_db_path or self.current_path / "exploit-database"
50
+ self.edbid_to_cve_file = self.current_path / "edbidToCve.json"
51
+ self.cve_to_edbid_file = self.current_path / "cveToEdbid.json"
52
+ self.request_cool_off_time = max(0.0, float(request_cool_off_time))
53
+ self.max_retries = max(1, int(max_retries))
54
+
55
+ self.cve_to_exploit_map: ExploitMap = {}
56
+ self._exploit_cache: Dict[str, Sequence[str]] = {}
57
+
58
+ self._session = session or requests.Session()
59
+ self._owns_session = session is None
60
+ self._session.headers.setdefault("User-Agent", self.USER_AGENT["User-Agent"])
61
+
62
+ self.debug = False
63
+
64
+ def log_debug(self, message: str) -> None:
65
+ if self.debug:
66
+ print(message)
67
+
68
+ def clone_or_update_repo(self) -> None:
69
+ if not self.exploit_db_path.exists():
70
+ self._clone_repo()
71
+ return
72
+
73
+ try:
74
+ repo = git.Repo(self.exploit_db_path)
75
+ except (git.exc.InvalidGitRepositoryError, git.exc.NoSuchPathError):
76
+ self.log_debug("Existing exploit-database directory invalid, re-cloning...")
77
+ self.delete_and_reclone_repo()
78
+ return
79
+
80
+ try:
81
+ self.log_debug("Pulling exploit-database updates...")
82
+ repo.remotes.origin.pull("main")
83
+ except git.exc.GitCommandError as exc:
84
+ self.log_debug(f"Pull failed ({exc}); attempting to re-clone repository.")
85
+ self.delete_and_reclone_repo()
86
+
87
+ def pull_latest_updates(self) -> None:
88
+ self.clone_or_update_repo()
89
+
90
+ def delete_and_reclone_repo(self) -> None:
91
+ if self.exploit_db_path.exists():
92
+ shutil.rmtree(self.exploit_db_path, ignore_errors=True)
93
+ self._clone_repo()
94
+ self._exploit_cache.clear()
95
+
96
+ def open_file(self, exploit_map: str = "cveToEdbid.json", encoding: str = "utf-8") -> None:
97
+ self.clone_or_update_repo()
98
+ data = self._load_json_file(self.current_path / exploit_map, encoding)
99
+ self._set_cve_map(data)
100
+
101
+ def get_cve_details(self, cve_search: str) -> List[CveDetails]:
102
+ if not self.cve_to_exploit_map:
103
+ raise PyExploitDbError("Exploit data is not loaded; call open_file() or update_db() first.")
104
+ if not cve_search:
105
+ return []
106
+
107
+ edb_ids = self.cve_to_exploit_map.get(cve_search.upper(), [])
108
+ if not edb_ids:
109
+ return []
110
+
111
+ self._ensure_exploit_cache()
112
+ results: List[CveDetails] = []
113
+ for edb_id in edb_ids:
114
+ row = self._exploit_cache.get(edb_id)
115
+ if row:
116
+ results.append(self.extract_cve_details(row))
117
+ else:
118
+ self.log_debug(f"EDB {edb_id} referenced by CVE {cve_search} not present in CSV data.")
119
+ return results
120
+
121
+ def extract_cve_details(self, row: Sequence[str]) -> CveDetails:
122
+ if len(row) < 16:
123
+ raise InvalidDataError("files_exploits.csv row is malformed; expected at least 16 columns.")
124
+ details = {
125
+ "id": row[0],
126
+ "file": row[1],
127
+ "description": row[2],
128
+ "date": row[3],
129
+ "author": row[4],
130
+ "type": row[5],
131
+ "platform": row[6],
132
+ "port": row[7],
133
+ "date_updated": row[9],
134
+ "verified": row[10],
135
+ "codes": row[11],
136
+ "tags": row[12],
137
+ "aliases": row[13],
138
+ "app_url": row[14],
139
+ "src_url": row[15],
140
+ }
141
+ self.log_debug(f"CVE details extracted: {details}")
142
+ return details
143
+
144
+ def search_cve(self, cve_search: str) -> List[CveDetails]:
145
+ return self.get_cve_details(cve_search)
146
+
147
+ def update_db(self, *, workers: Optional[int] = None) -> None:
148
+ self.clone_or_update_repo()
149
+ data = self.load_existing_data(self.edbid_to_cve_file)
150
+ exploits = self.load_exploit_csv()
151
+
152
+ missing_ids: List[str] = []
153
+ for row in exploits:
154
+ if not row or not row[0]:
155
+ continue
156
+ edb_id = row[0]
157
+ if edb_id in data:
158
+ continue
159
+
160
+ cves = self._extract_cves_from_row(row)
161
+ if cves:
162
+ data[edb_id] = sorted(cves)
163
+ else:
164
+ missing_ids.append(edb_id)
165
+
166
+ if missing_ids:
167
+ unique_ids = list(dict.fromkeys(missing_ids))
168
+ fetched = self._fetch_missing_cves(unique_ids, workers=workers)
169
+ data.update(fetched)
170
+
171
+ self.write_json(self.edbid_to_cve_file, data)
172
+ self.create_cve_to_exploit_map(data)
173
+ self.write_json(self.cve_to_edbid_file, self.cve_to_exploit_map)
174
+
175
+ def load_existing_data(self, file_path: Path | str) -> ExploitMap:
176
+ path = Path(file_path)
177
+ if not path.exists():
178
+ return {}
179
+
180
+ try:
181
+ with path.open(encoding="utf-8") as file_data:
182
+ raw = json.load(file_data)
183
+ except json.JSONDecodeError as exc:
184
+ raise InvalidDataError(f"Corrupt JSON detected at {path}; delete file and retry.") from exc
185
+ except OSError as exc:
186
+ raise PyExploitDbError(f"Unable to read {path}") from exc
187
+
188
+ return self._normalise_edbid_mapping(raw)
189
+
190
+ def load_exploit_csv(self) -> List[List[str]]:
191
+ path = self.exploit_db_path / self.FILES_EXPLOITS
192
+ try:
193
+ with path.open(encoding="utf-8", newline="") as file:
194
+ reader = csv.reader(file)
195
+ next(reader, None) # skip header
196
+ rows = [row for row in reader if row]
197
+ except FileNotFoundError as exc:
198
+ raise PyExploitDbError(f"{self.FILES_EXPLOITS} not found at {path}.") from exc
199
+ except OSError as exc:
200
+ raise PyExploitDbError(f"Unable to read {path}") from exc
201
+
202
+ self._exploit_cache = {row[0]: row for row in rows if row and row[0]}
203
+ return rows
204
+
205
+ def fetch_cves_for_exploit(self, edb_id: str, session: Optional[requests.Session] = None) -> Set[str]:
206
+ request_uri = f"https://www.exploit-db.com/exploits/{edb_id}"
207
+ active_session = session or self._session
208
+ for attempt in range(1, self.max_retries + 1):
209
+ try:
210
+ response = active_session.get(request_uri, timeout=10)
211
+ except requests.RequestException as exc:
212
+ self.log_debug(f"Request error for {request_uri} (attempt {attempt}/{self.max_retries}): {exc}")
213
+ else:
214
+ if response.status_code == 404:
215
+ self.log_debug(f"Exploit {edb_id} returned 404; no CVEs available.")
216
+ return set()
217
+ if response.ok:
218
+ return self.parse_cves_from_content(response.content)
219
+ self.log_debug(
220
+ f"Unexpected status {response.status_code} for {request_uri} "
221
+ f"(attempt {attempt}/{self.max_retries})."
222
+ )
223
+
224
+ if attempt < self.max_retries:
225
+ time.sleep(self.request_cool_off_time)
226
+
227
+ raise PyExploitDbError(f"Failed to fetch CVEs for exploit {edb_id} after {self.max_retries} attempts.")
228
+
229
+ def parse_cves_from_content(self, content: bytes) -> Set[str]:
230
+ return {match.group(0).decode("ascii").upper() for match in _CVE_PATTERN_BYTES.finditer(content)}
231
+
232
+ def write_json(self, file_path: Path | str, data: ExploitMap) -> None:
233
+ path = Path(file_path)
234
+ path.parent.mkdir(parents=True, exist_ok=True)
235
+ try:
236
+ with path.open("w", encoding="utf-8") as file_data:
237
+ json.dump(data, file_data, indent=2, sort_keys=True)
238
+ except OSError as exc:
239
+ raise PyExploitDbError(f"Unable to write {path}") from exc
240
+
241
+ def create_cve_to_exploit_map(self, data: Dict[str, Iterable[str]]) -> None:
242
+ mapping: ExploitMap = {}
243
+ for edb_id, cves in data.items():
244
+ if not edb_id:
245
+ continue
246
+ for cve in cves:
247
+ cve_key = str(cve).upper()
248
+ if not cve_key:
249
+ continue
250
+ mapping.setdefault(cve_key, []).append(str(edb_id))
251
+ for ids in mapping.values():
252
+ ids.sort()
253
+ self.cve_to_exploit_map = mapping
254
+ self._exploit_cache.clear()
255
+
256
+ def _fetch_missing_cves(self, missing_ids: Sequence[str], *, workers: Optional[int]) -> ExploitMap:
257
+ max_workers = workers or min(8, max(1, (os.cpu_count() or 4) // 2))
258
+ results: ExploitMap = {}
259
+
260
+ def task(edb_id: str) -> tuple[str, List[str]]:
261
+ with requests.Session() as session:
262
+ session.headers["User-Agent"] = self.USER_AGENT["User-Agent"]
263
+ try:
264
+ cves = sorted(self.fetch_cves_for_exploit(edb_id, session=session))
265
+ except PyExploitDbError as exc:
266
+ self.log_debug(f"Failed to hydrate CVEs for {edb_id}: {exc}")
267
+ return edb_id, []
268
+ return edb_id, cves
269
+
270
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
271
+ future_map = {executor.submit(task, edb_id): edb_id for edb_id in missing_ids}
272
+ for future in as_completed(future_map):
273
+ edb_id = future_map[future]
274
+ try:
275
+ resolved_id, cves = future.result()
276
+ except Exception as exc: # noqa: BLE001
277
+ self.log_debug(f"Unhandled exception while hydrating CVEs for {edb_id}: {exc}")
278
+ results[edb_id] = []
279
+ continue
280
+ results[resolved_id] = cves
281
+ return results
282
+
283
+ def _extract_cves_from_row(self, row: Sequence[str]) -> Set[str]:
284
+ cve_fields = [
285
+ row[11] if len(row) > 11 else "",
286
+ row[12] if len(row) > 12 else "",
287
+ row[13] if len(row) > 13 else "",
288
+ row[2] if len(row) > 2 else "",
289
+ ]
290
+ cves: Set[str] = set()
291
+ for field in cve_fields:
292
+ if not field:
293
+ continue
294
+ cves.update(match.group(0).upper() for match in _CVE_PATTERN.finditer(field))
295
+ return cves
296
+
297
+ def close(self) -> None:
298
+ if self._owns_session:
299
+ self._session.close()
300
+
301
+ def __enter__(self) -> "PyExploitDb":
302
+ return self
303
+
304
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
305
+ self.close()
306
+
307
+ def _clone_repo(self) -> None:
308
+ self.log_debug("Cloning exploit-database repository...")
309
+ git.Repo.clone_from(self.EXPLOIT_DB_REPO, str(self.exploit_db_path))
310
+
311
+ def _ensure_exploit_cache(self) -> None:
312
+ if not self._exploit_cache:
313
+ self.load_exploit_csv()
314
+
315
+ def _set_cve_map(self, raw: object) -> None:
316
+ self.cve_to_exploit_map = self._normalise_cve_mapping(raw)
317
+
318
+ def _load_json_file(self, path: Path, encoding: str) -> object:
319
+ try:
320
+ with path.open(encoding=encoding) as file_data:
321
+ return json.load(file_data)
322
+ except FileNotFoundError as exc:
323
+ raise PyExploitDbError(f"Required JSON file not found: {path}") from exc
324
+ except json.JSONDecodeError as exc:
325
+ raise InvalidDataError(f"Corrupt JSON detected in {path}") from exc
326
+ except OSError as exc:
327
+ raise PyExploitDbError(f"Unable to read {path}") from exc
328
+
329
+ def _normalise_edbid_mapping(self, raw: object) -> ExploitMap:
330
+ if not isinstance(raw, dict):
331
+ raise InvalidDataError("Expected mapping of exploit IDs to CVE lists.")
332
+
333
+ result: ExploitMap = {}
334
+ for edb_id, cves in raw.items():
335
+ if not isinstance(edb_id, str):
336
+ self.log_debug(f"Skipping non-string exploit ID key {edb_id!r}")
337
+ continue
338
+ if not isinstance(cves, (list, tuple, set)):
339
+ self.log_debug(f"Skipping malformed CVE container for exploit {edb_id}")
340
+ continue
341
+ normalised = sorted({str(cve).upper() for cve in cves if str(cve).strip()})
342
+ if normalised:
343
+ result[edb_id] = normalised
344
+ return result
345
+
346
+ def _normalise_cve_mapping(self, raw: object) -> ExploitMap:
347
+ if not isinstance(raw, dict):
348
+ raise InvalidDataError("Expected mapping of CVE IDs to exploit lists.")
349
+
350
+ mapping: ExploitMap = {}
351
+ for cve, exploits in raw.items():
352
+ if not isinstance(cve, str):
353
+ continue
354
+ if not isinstance(exploits, (list, tuple, set)):
355
+ continue
356
+ cleaned = [str(edb).strip() for edb in exploits if str(edb).strip()]
357
+ if cleaned:
358
+ mapping[cve.upper()] = sorted(cleaned)
359
+ return mapping
360
+
361
+ def __del__(self) -> None:
362
+ self.close()
363
+
364
+
365
+ def logDebug(self, message: str) -> None:
366
+ warnings.warn("logDebug is deprecated; use log_debug instead.", DeprecationWarning, stacklevel=2)
367
+ self.log_debug(message)
368
+
369
+ def cloneOrUpdateRepo(self) -> None:
370
+ warnings.warn("cloneOrUpdateRepo is deprecated; use clone_or_update_repo instead.", DeprecationWarning, stacklevel=2)
371
+ self.clone_or_update_repo()
372
+
373
+ def pullLatestUpdates(self) -> None:
374
+ warnings.warn("pullLatestUpdates is deprecated; use pull_latest_updates instead.", DeprecationWarning, stacklevel=2)
375
+ self.pull_latest_updates()
376
+
377
+ def deleteAndRecloneRepo(self) -> None:
378
+ warnings.warn("deleteAndRecloneRepo is deprecated; use delete_and_reclone_repo instead.", DeprecationWarning, stacklevel=2)
379
+ self.delete_and_reclone_repo()
380
+
381
+ def openFile(self, exploitMap: str = "cveToEdbid.json", encoding: str = "utf-8") -> None:
382
+ warnings.warn("openFile is deprecated; use open_file instead.", DeprecationWarning, stacklevel=2)
383
+ self.open_file(exploit_map=exploitMap, encoding=encoding)
384
+
385
+ def getCveDetails(self, cveSearch: str) -> List[CveDetails]:
386
+ warnings.warn("getCveDetails is deprecated; use get_cve_details instead.", DeprecationWarning, stacklevel=2)
387
+ return self.get_cve_details(cveSearch)
388
+
389
+ def extractCveDetails(self, row: Sequence[str]) -> CveDetails:
390
+ warnings.warn("extractCveDetails is deprecated; use extract_cve_details instead.", DeprecationWarning, stacklevel=2)
391
+ return self.extract_cve_details(row)
392
+
393
+ def searchCve(self, cveSearch: str) -> List[CveDetails]:
394
+ warnings.warn("searchCve is deprecated; use search_cve instead.", DeprecationWarning, stacklevel=2)
395
+ return self.search_cve(cveSearch)
396
+
397
+ def updateDb(self, *, workers: Optional[int] = None) -> None:
398
+ warnings.warn("updateDb is deprecated; use update_db instead.", DeprecationWarning, stacklevel=2)
399
+ self.update_db(workers=workers)
400
+
401
+ def loadExistingData(self, file_path: Path | str) -> ExploitMap:
402
+ warnings.warn("loadExistingData is deprecated; use load_existing_data instead.", DeprecationWarning, stacklevel=2)
403
+ return self.load_existing_data(file_path)
404
+
405
+ def loadExploitCsv(self) -> List[List[str]]:
406
+ warnings.warn("loadExploitCsv is deprecated; use load_exploit_csv instead.", DeprecationWarning, stacklevel=2)
407
+ return self.load_exploit_csv()
408
+
409
+ def fetchCvesForExploit(self, edb_id: str) -> Set[str]:
410
+ warnings.warn(
411
+ "fetchCvesForExploit is deprecated; use fetch_cves_for_exploit instead.",
412
+ DeprecationWarning,
413
+ stacklevel=2,
414
+ )
415
+ return self.fetch_cves_for_exploit(edb_id)
416
+
417
+ def parseCvesFromContent(self, content: bytes) -> Set[str]:
418
+ warnings.warn(
419
+ "parseCvesFromContent is deprecated; use parse_cves_from_content instead.",
420
+ DeprecationWarning,
421
+ stacklevel=2,
422
+ )
423
+ return self.parse_cves_from_content(content)
424
+
425
+ def writeJson(self, file_path: Path | str, data: ExploitMap) -> None:
426
+ warnings.warn("writeJson is deprecated; use write_json instead.", DeprecationWarning, stacklevel=2)
427
+ self.write_json(file_path, data)
428
+
429
+ def createCveToExploitMap(self, data: Dict[str, Iterable[str]]) -> None:
430
+ warnings.warn(
431
+ "createCveToExploitMap is deprecated; use create_cve_to_exploit_map instead.",
432
+ DeprecationWarning,
433
+ stacklevel=2,
434
+ )
435
+ self.create_cve_to_exploit_map(data)
436
+
437
+
438
+ def test() -> None:
439
+ exploit_db = PyExploitDb()
440
+ exploit_db.debug = False
441
+ exploit_db.open_file()
442
+ results = exploit_db.search_cve("CVE-2018-14592")
443
+ print("PASS" if results else "FAIL")
444
+
445
+
446
+ if __name__ == "__main__":
447
+ test()