cve-sentinel 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,719 @@
1
+ """OSV (Open Source Vulnerabilities) API client."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ import requests
12
+
13
+ from cve_sentinel.utils.cache import Cache
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class OSVAPIError(Exception):
19
+ """Exception raised for OSV API errors."""
20
+
21
+ def __init__(self, message: str, status_code: Optional[int] = None) -> None:
22
+ super().__init__(message)
23
+ self.status_code = status_code
24
+
25
+
26
+ @dataclass
27
+ class OSVVulnerability:
28
+ """Vulnerability data from OSV.
29
+
30
+ Attributes:
31
+ id: The OSV identifier (e.g., GHSA-xxxx-xxxx-xxxx).
32
+ aliases: List of aliases including CVE IDs.
33
+ summary: Short description of the vulnerability.
34
+ details: Detailed description of the vulnerability.
35
+ severity: List of severity information.
36
+ affected: List of affected packages and versions.
37
+ fixed_versions: List of fixed versions.
38
+ references: List of reference URLs.
39
+ published: Date the vulnerability was published.
40
+ modified: Date the vulnerability was last modified.
41
+ """
42
+
43
+ id: str
44
+ aliases: List[str] = field(default_factory=list)
45
+ summary: str = ""
46
+ details: str = ""
47
+ severity: List[Dict[str, Any]] = field(default_factory=list)
48
+ affected: List[Dict[str, Any]] = field(default_factory=list)
49
+ fixed_versions: List[str] = field(default_factory=list)
50
+ references: List[str] = field(default_factory=list)
51
+ published: Optional[datetime] = None
52
+ modified: Optional[datetime] = None
53
+
54
+ def get_cve_ids(self) -> List[str]:
55
+ """Extract CVE IDs from aliases.
56
+
57
+ Returns:
58
+ List of CVE IDs found in aliases.
59
+ """
60
+ return [alias for alias in self.aliases if alias.startswith("CVE-")]
61
+
62
+ def get_cvss_score(self) -> Optional[float]:
63
+ """Extract CVSS score from severity data.
64
+
65
+ Returns:
66
+ CVSS score if available, None otherwise.
67
+ """
68
+ for sev in self.severity:
69
+ if sev.get("type") == "CVSS_V3":
70
+ score = sev.get("score")
71
+ if score is not None:
72
+ # OSV API may return CVSS vector string instead of numeric score
73
+ if isinstance(score, (int, float)):
74
+ return float(score)
75
+ elif isinstance(score, str):
76
+ # Skip CVSS vector strings (e.g., "CVSS:3.1/AV:N/AC:L/...")
77
+ if score.startswith("CVSS:"):
78
+ continue
79
+ # Try to parse numeric string
80
+ try:
81
+ return float(score)
82
+ except ValueError:
83
+ continue
84
+ return None
85
+
86
+ def get_cvss_severity(self) -> Optional[str]:
87
+ """Get CVSS severity level based on score.
88
+
89
+ Returns:
90
+ Severity level (CRITICAL, HIGH, MEDIUM, LOW, NONE) or None.
91
+ """
92
+ score = self.get_cvss_score()
93
+ if score is None:
94
+ return None
95
+
96
+ if score >= 9.0:
97
+ return "CRITICAL"
98
+ elif score >= 7.0:
99
+ return "HIGH"
100
+ elif score >= 4.0:
101
+ return "MEDIUM"
102
+ elif score >= 0.1:
103
+ return "LOW"
104
+ else:
105
+ return "NONE"
106
+
107
+ def to_dict(self) -> Dict[str, Any]:
108
+ """Convert to dictionary for serialization."""
109
+ return {
110
+ "id": self.id,
111
+ "aliases": self.aliases,
112
+ "summary": self.summary,
113
+ "details": self.details,
114
+ "severity": self.severity,
115
+ "affected": self.affected,
116
+ "fixed_versions": self.fixed_versions,
117
+ "references": self.references,
118
+ "published": self.published.isoformat() if self.published else None,
119
+ "modified": self.modified.isoformat() if self.modified else None,
120
+ }
121
+
122
+ @classmethod
123
+ def from_dict(cls, data: Dict[str, Any]) -> OSVVulnerability:
124
+ """Create OSVVulnerability from dictionary."""
125
+ published = None
126
+ modified = None
127
+
128
+ if data.get("published"):
129
+ published = datetime.fromisoformat(data["published"].replace("Z", "+00:00"))
130
+ if data.get("modified"):
131
+ modified = datetime.fromisoformat(data["modified"].replace("Z", "+00:00"))
132
+
133
+ return cls(
134
+ id=data.get("id", ""),
135
+ aliases=data.get("aliases", []),
136
+ summary=data.get("summary", ""),
137
+ details=data.get("details", ""),
138
+ severity=data.get("severity", []),
139
+ affected=data.get("affected", []),
140
+ fixed_versions=data.get("fixed_versions", []),
141
+ references=data.get("references", []),
142
+ published=published,
143
+ modified=modified,
144
+ )
145
+
146
+
147
+ class OSVClient:
148
+ """Client for Google OSV API.
149
+
150
+ This client provides methods to query vulnerability data from the
151
+ Open Source Vulnerabilities (OSV) database.
152
+
153
+ Attributes:
154
+ cache: Optional cache for storing API responses.
155
+ """
156
+
157
+ BASE_URL = "https://api.osv.dev/v1"
158
+ DEFAULT_TIMEOUT = 30
159
+ MAX_RETRIES = 5
160
+ RETRY_DELAY = 2
161
+ MAX_BATCH_SIZE = 100 # Limit batch size to avoid rate limits
162
+
163
+ # Ecosystem mapping from internal names to OSV ecosystem names
164
+ ECOSYSTEM_MAP: Dict[str, str] = {
165
+ "npm": "npm",
166
+ "pypi": "PyPI",
167
+ "go": "Go",
168
+ "maven": "Maven",
169
+ "rubygems": "RubyGems",
170
+ "crates.io": "crates.io",
171
+ "packagist": "Packagist",
172
+ "nuget": "NuGet",
173
+ "hex": "Hex",
174
+ "pub": "Pub",
175
+ "cocoapods": "CocoaPods",
176
+ "swift": "SwiftURL",
177
+ }
178
+
179
+ def __init__(
180
+ self,
181
+ cache_dir: Optional[Path] = None,
182
+ cache_ttl_hours: int = 24,
183
+ ) -> None:
184
+ """Initialize OSV client.
185
+
186
+ Args:
187
+ cache_dir: Directory for caching responses. If None, caching is disabled.
188
+ cache_ttl_hours: Cache time-to-live in hours.
189
+ """
190
+ self.cache: Optional[Cache] = None
191
+ if cache_dir:
192
+ self.cache = Cache(cache_dir, ttl_hours=cache_ttl_hours)
193
+ self._session = requests.Session()
194
+ self._session.headers.update(
195
+ {
196
+ "Content-Type": "application/json",
197
+ "Accept": "application/json",
198
+ }
199
+ )
200
+
201
+ def _get_osv_ecosystem(self, ecosystem: str) -> str:
202
+ """Convert internal ecosystem name to OSV ecosystem name.
203
+
204
+ Args:
205
+ ecosystem: Internal ecosystem name.
206
+
207
+ Returns:
208
+ OSV ecosystem name.
209
+
210
+ Raises:
211
+ ValueError: If ecosystem is not supported.
212
+ """
213
+ ecosystem_lower = ecosystem.lower()
214
+ if ecosystem_lower in self.ECOSYSTEM_MAP:
215
+ return self.ECOSYSTEM_MAP[ecosystem_lower]
216
+ # If already in OSV format, return as-is
217
+ if ecosystem in self.ECOSYSTEM_MAP.values():
218
+ return ecosystem
219
+ raise ValueError(f"Unsupported ecosystem: {ecosystem}")
220
+
221
+ def _make_request(
222
+ self,
223
+ endpoint: str,
224
+ data: Dict[str, Any],
225
+ use_cache: bool = True,
226
+ ) -> Dict[str, Any]:
227
+ """Make a POST request to the OSV API with retry logic.
228
+
229
+ Args:
230
+ endpoint: API endpoint path.
231
+ data: JSON data to send in the request body.
232
+ use_cache: Whether to use caching for this request.
233
+
234
+ Returns:
235
+ JSON response from the API.
236
+
237
+ Raises:
238
+ OSVAPIError: If the API request fails after retries.
239
+ """
240
+ import json
241
+ import time
242
+
243
+ url = f"{self.BASE_URL}{endpoint}"
244
+
245
+ # Check cache first
246
+ cache_key = f"osv_{endpoint}_{hash(json.dumps(data, sort_keys=True))}"
247
+ if use_cache and self.cache:
248
+ cached = self.cache.get(cache_key)
249
+ if cached:
250
+ logger.debug(f"Cache hit for OSV query: {data}")
251
+ return cached
252
+
253
+ last_error: Optional[Exception] = None
254
+ for attempt in range(self.MAX_RETRIES):
255
+ try:
256
+ response = self._session.post(
257
+ url,
258
+ json=data,
259
+ timeout=self.DEFAULT_TIMEOUT,
260
+ )
261
+
262
+ if response.status_code == 200:
263
+ result = response.json()
264
+ # Cache successful response
265
+ if use_cache and self.cache:
266
+ self.cache.set(cache_key, result)
267
+ return result
268
+
269
+ elif response.status_code == 400:
270
+ # Check if it's a rate limit error
271
+ if "Too many queries" in response.text:
272
+ last_error = OSVAPIError(
273
+ f"OSV API rate limit: {response.text}",
274
+ status_code=400,
275
+ )
276
+ logger.warning(
277
+ f"OSV API rate limited (attempt {attempt + 1}/{self.MAX_RETRIES})"
278
+ )
279
+ # Wait longer for rate limit
280
+ if attempt < self.MAX_RETRIES - 1:
281
+ time.sleep(self.RETRY_DELAY * (attempt + 2) * 2)
282
+ continue
283
+ # Bad request - don't retry
284
+ raise OSVAPIError(
285
+ f"OSV API bad request: {response.text}",
286
+ status_code=400,
287
+ )
288
+
289
+ elif response.status_code == 429:
290
+ # Rate limit - retry with backoff
291
+ last_error = OSVAPIError(
292
+ f"OSV API rate limit: {response.text}",
293
+ status_code=429,
294
+ )
295
+ logger.warning(
296
+ f"OSV API rate limited (attempt {attempt + 1}/{self.MAX_RETRIES})"
297
+ )
298
+ if attempt < self.MAX_RETRIES - 1:
299
+ time.sleep(self.RETRY_DELAY * (attempt + 2) * 2)
300
+ continue
301
+
302
+ elif response.status_code == 404:
303
+ # No results found - return empty response
304
+ return {"vulns": []}
305
+
306
+ else:
307
+ raise OSVAPIError(
308
+ f"OSV API error: {response.status_code} - {response.text}",
309
+ status_code=response.status_code,
310
+ )
311
+
312
+ except requests.exceptions.Timeout as e:
313
+ last_error = OSVAPIError(f"Request timeout: {e}")
314
+ logger.warning(f"OSV API timeout (attempt {attempt + 1}/{self.MAX_RETRIES})")
315
+
316
+ except requests.exceptions.ConnectionError as e:
317
+ last_error = OSVAPIError(f"Connection error: {e}")
318
+ logger.warning(
319
+ f"OSV API connection error (attempt {attempt + 1}/{self.MAX_RETRIES})"
320
+ )
321
+
322
+ except OSVAPIError:
323
+ raise
324
+
325
+ # Wait before retry
326
+ if attempt < self.MAX_RETRIES - 1:
327
+ time.sleep(self.RETRY_DELAY * (attempt + 1))
328
+
329
+ raise last_error or OSVAPIError("Unknown error after retries")
330
+
331
+ def _parse_vulnerability(self, vuln_data: Dict[str, Any]) -> OSVVulnerability:
332
+ """Parse a vulnerability from OSV API response.
333
+
334
+ Args:
335
+ vuln_data: Raw vulnerability data from API response.
336
+
337
+ Returns:
338
+ Parsed OSVVulnerability object.
339
+ """
340
+ # Extract fixed versions from affected packages
341
+ fixed_versions: List[str] = []
342
+ affected = vuln_data.get("affected", [])
343
+ for aff in affected:
344
+ ranges = aff.get("ranges", [])
345
+ for r in ranges:
346
+ events = r.get("events", [])
347
+ for event in events:
348
+ if "fixed" in event:
349
+ fixed_versions.append(event["fixed"])
350
+
351
+ # Extract references
352
+ references: List[str] = []
353
+ refs = vuln_data.get("references", [])
354
+ for ref in refs:
355
+ url = ref.get("url", "")
356
+ if url:
357
+ references.append(url)
358
+
359
+ # Parse dates
360
+ published = None
361
+ modified = None
362
+ try:
363
+ if vuln_data.get("published"):
364
+ pub_str = vuln_data["published"].replace("Z", "+00:00")
365
+ published = datetime.fromisoformat(pub_str)
366
+ except (ValueError, TypeError):
367
+ pass
368
+
369
+ try:
370
+ if vuln_data.get("modified"):
371
+ mod_str = vuln_data["modified"].replace("Z", "+00:00")
372
+ modified = datetime.fromisoformat(mod_str)
373
+ except (ValueError, TypeError):
374
+ pass
375
+
376
+ return OSVVulnerability(
377
+ id=vuln_data.get("id", ""),
378
+ aliases=vuln_data.get("aliases", []),
379
+ summary=vuln_data.get("summary", ""),
380
+ details=vuln_data.get("details", ""),
381
+ severity=vuln_data.get("severity", []),
382
+ affected=affected,
383
+ fixed_versions=fixed_versions,
384
+ references=references,
385
+ published=published,
386
+ modified=modified,
387
+ )
388
+
389
+ def query(
390
+ self,
391
+ package_name: str,
392
+ ecosystem: str,
393
+ version: Optional[str] = None,
394
+ ) -> List[OSVVulnerability]:
395
+ """Query vulnerabilities for a package.
396
+
397
+ Args:
398
+ package_name: Name of the package.
399
+ ecosystem: Package ecosystem (e.g., 'npm', 'pypi').
400
+ version: Optional specific version to query.
401
+
402
+ Returns:
403
+ List of vulnerabilities affecting the package.
404
+ """
405
+ try:
406
+ osv_ecosystem = self._get_osv_ecosystem(ecosystem)
407
+ except ValueError as e:
408
+ logger.warning(f"Skipping unsupported ecosystem: {e}")
409
+ return []
410
+
411
+ data: Dict[str, Any] = {
412
+ "package": {
413
+ "name": package_name,
414
+ "ecosystem": osv_ecosystem,
415
+ }
416
+ }
417
+
418
+ if version:
419
+ data["version"] = version
420
+
421
+ response = self._make_request("/query", data)
422
+ vulns = response.get("vulns", [])
423
+
424
+ results: List[OSVVulnerability] = []
425
+ for vuln_data in vulns:
426
+ try:
427
+ vuln = self._parse_vulnerability(vuln_data)
428
+ results.append(vuln)
429
+ except (KeyError, ValueError) as e:
430
+ logger.warning(f"Failed to parse OSV vulnerability: {e}")
431
+ continue
432
+
433
+ return results
434
+
435
+ def query_batch(
436
+ self,
437
+ packages: List[Dict[str, Any]],
438
+ ) -> Dict[str, List[OSVVulnerability]]:
439
+ """Query vulnerabilities for multiple packages.
440
+
441
+ Args:
442
+ packages: List of package dictionaries with 'name', 'ecosystem',
443
+ and optional 'version' keys.
444
+
445
+ Returns:
446
+ Dictionary mapping package names to their vulnerabilities.
447
+ """
448
+ if not packages:
449
+ return {}
450
+
451
+ # Build batch query
452
+ queries: List[Dict[str, Any]] = []
453
+ package_keys: List[str] = []
454
+
455
+ for pkg in packages:
456
+ name = pkg.get("name", "")
457
+ ecosystem = pkg.get("ecosystem", "")
458
+ version = pkg.get("version")
459
+
460
+ try:
461
+ osv_ecosystem = self._get_osv_ecosystem(ecosystem)
462
+ except ValueError as e:
463
+ logger.warning(f"Skipping unsupported ecosystem: {e}")
464
+ continue
465
+
466
+ query: Dict[str, Any] = {
467
+ "package": {
468
+ "name": name,
469
+ "ecosystem": osv_ecosystem,
470
+ }
471
+ }
472
+
473
+ if version:
474
+ query["version"] = version
475
+
476
+ queries.append(query)
477
+ package_keys.append(f"{ecosystem}:{name}")
478
+
479
+ if not queries:
480
+ return {}
481
+
482
+ results: Dict[str, List[OSVVulnerability]] = {}
483
+
484
+ # Split into chunks to avoid rate limits
485
+ import time
486
+
487
+ for chunk_start in range(0, len(queries), self.MAX_BATCH_SIZE):
488
+ chunk_end = min(chunk_start + self.MAX_BATCH_SIZE, len(queries))
489
+ chunk_queries = queries[chunk_start:chunk_end]
490
+ chunk_keys = package_keys[chunk_start:chunk_end]
491
+
492
+ # Add delay between chunks (except for first chunk)
493
+ if chunk_start > 0:
494
+ time.sleep(1)
495
+
496
+ data = {"queries": chunk_queries}
497
+ response = self._make_request("/querybatch", data, use_cache=False)
498
+
499
+ batch_results = response.get("results", [])
500
+
501
+ for i, result in enumerate(batch_results):
502
+ if i >= len(chunk_keys):
503
+ break
504
+
505
+ pkg_key = chunk_keys[i]
506
+ vulns = result.get("vulns", [])
507
+
508
+ pkg_vulns: List[OSVVulnerability] = []
509
+ for vuln_data in vulns:
510
+ vuln_id = vuln_data.get("id")
511
+ if not vuln_id:
512
+ continue
513
+
514
+ # querybatch only returns ID and modified date, not full details
515
+ # We need to fetch full vulnerability data for affected version info
516
+ try:
517
+ full_vuln = self.get_vulnerability(vuln_id)
518
+ if full_vuln:
519
+ pkg_vulns.append(full_vuln)
520
+ except (OSVAPIError, KeyError, ValueError) as e:
521
+ logger.warning(f"Failed to fetch vulnerability {vuln_id}: {e}")
522
+ continue
523
+
524
+ results[pkg_key] = pkg_vulns
525
+
526
+ return results
527
+
528
+ def get_vulnerability(self, vuln_id: str) -> Optional[OSVVulnerability]:
529
+ """Get a specific vulnerability by ID.
530
+
531
+ Args:
532
+ vuln_id: Vulnerability ID (e.g., GHSA-xxxx-xxxx-xxxx).
533
+
534
+ Returns:
535
+ OSVVulnerability object if found, None otherwise.
536
+ """
537
+ # Check cache first
538
+ cache_key = f"osv_vuln_{vuln_id}"
539
+ if self.cache:
540
+ cached = self.cache.get(cache_key)
541
+ if cached:
542
+ logger.debug(f"Cache hit for vulnerability: {vuln_id}")
543
+ return self._parse_vulnerability(cached)
544
+
545
+ url = f"{self.BASE_URL}/vulns/{vuln_id}"
546
+
547
+ try:
548
+ response = self._session.get(url, timeout=self.DEFAULT_TIMEOUT)
549
+
550
+ if response.status_code == 200:
551
+ vuln_data = response.json()
552
+ # Cache the result
553
+ if self.cache:
554
+ self.cache.set(cache_key, vuln_data)
555
+ return self._parse_vulnerability(vuln_data)
556
+ elif response.status_code == 404:
557
+ return None
558
+ else:
559
+ raise OSVAPIError(
560
+ f"OSV API error: {response.status_code}",
561
+ status_code=response.status_code,
562
+ )
563
+
564
+ except requests.exceptions.RequestException as e:
565
+ raise OSVAPIError(f"Request failed: {e}")
566
+
567
+ @staticmethod
568
+ def get_supported_ecosystems() -> List[str]:
569
+ """Get list of supported ecosystems.
570
+
571
+ Returns:
572
+ List of supported ecosystem names.
573
+ """
574
+ return list(OSVClient.ECOSYSTEM_MAP.keys())
575
+
576
+
577
+ @dataclass
578
+ class MergedVulnerability:
579
+ """Merged vulnerability data from NVD and OSV.
580
+
581
+ Attributes:
582
+ cve_id: CVE identifier (primary key).
583
+ osv_ids: List of OSV identifiers.
584
+ description: Description (OSV summary preferred, NVD fallback).
585
+ cvss_score: CVSS score (NVD preferred).
586
+ cvss_severity: Severity level.
587
+ affected_packages: List of affected package info.
588
+ fixed_versions: List of fixed versions (from OSV).
589
+ references: Combined reference URLs.
590
+ published_date: Publication date.
591
+ last_modified: Last modification date.
592
+ source: Primary data source ('nvd', 'osv', 'merged').
593
+ """
594
+
595
+ cve_id: str
596
+ osv_ids: List[str] = field(default_factory=list)
597
+ description: str = ""
598
+ cvss_score: Optional[float] = None
599
+ cvss_severity: Optional[str] = None
600
+ affected_packages: List[Dict[str, Any]] = field(default_factory=list)
601
+ fixed_versions: List[str] = field(default_factory=list)
602
+ references: List[str] = field(default_factory=list)
603
+ published_date: Optional[datetime] = None
604
+ last_modified: Optional[datetime] = None
605
+ source: str = "osv"
606
+
607
+ def to_dict(self) -> Dict[str, Any]:
608
+ """Convert to dictionary for serialization."""
609
+ return {
610
+ "cve_id": self.cve_id,
611
+ "osv_ids": self.osv_ids,
612
+ "description": self.description,
613
+ "cvss_score": self.cvss_score,
614
+ "cvss_severity": self.cvss_severity,
615
+ "affected_packages": self.affected_packages,
616
+ "fixed_versions": self.fixed_versions,
617
+ "references": self.references,
618
+ "published_date": self.published_date.isoformat() if self.published_date else None,
619
+ "last_modified": self.last_modified.isoformat() if self.last_modified else None,
620
+ "source": self.source,
621
+ }
622
+
623
+
624
+ def merge_nvd_osv_data(
625
+ osv_vulns: List[OSVVulnerability],
626
+ nvd_client: Optional[Any] = None,
627
+ ) -> List[MergedVulnerability]:
628
+ """Merge OSV vulnerabilities with NVD data.
629
+
630
+ OSV data is preferred for:
631
+ - Fixed versions
632
+ - Package-specific affected info
633
+
634
+ NVD data is preferred for:
635
+ - CVSS scores (more authoritative)
636
+ - Detailed descriptions
637
+
638
+ Args:
639
+ osv_vulns: List of OSV vulnerabilities.
640
+ nvd_client: Optional NVD client for fetching additional data.
641
+
642
+ Returns:
643
+ List of merged vulnerability records.
644
+ """
645
+ merged: Dict[str, MergedVulnerability] = {}
646
+
647
+ for osv_vuln in osv_vulns:
648
+ cve_ids = osv_vuln.get_cve_ids()
649
+
650
+ if cve_ids:
651
+ # Has CVE ID(s) - use first CVE as primary key
652
+ primary_cve = cve_ids[0]
653
+
654
+ if primary_cve in merged:
655
+ # Merge with existing entry
656
+ existing = merged[primary_cve]
657
+ if osv_vuln.id not in existing.osv_ids:
658
+ existing.osv_ids.append(osv_vuln.id)
659
+ existing.fixed_versions = list(
660
+ set(existing.fixed_versions + osv_vuln.fixed_versions)
661
+ )
662
+ existing.references = list(set(existing.references + osv_vuln.references))
663
+ else:
664
+ # Create new merged entry
665
+ merged[primary_cve] = MergedVulnerability(
666
+ cve_id=primary_cve,
667
+ osv_ids=[osv_vuln.id],
668
+ description=osv_vuln.summary or osv_vuln.details,
669
+ cvss_score=osv_vuln.get_cvss_score(),
670
+ cvss_severity=osv_vuln.get_cvss_severity(),
671
+ affected_packages=osv_vuln.affected,
672
+ fixed_versions=osv_vuln.fixed_versions,
673
+ references=osv_vuln.references,
674
+ published_date=osv_vuln.published,
675
+ last_modified=osv_vuln.modified,
676
+ source="osv",
677
+ )
678
+ else:
679
+ # No CVE ID - use OSV ID as key
680
+ merged[osv_vuln.id] = MergedVulnerability(
681
+ cve_id=osv_vuln.id, # Use OSV ID as identifier
682
+ osv_ids=[osv_vuln.id],
683
+ description=osv_vuln.summary or osv_vuln.details,
684
+ cvss_score=osv_vuln.get_cvss_score(),
685
+ cvss_severity=osv_vuln.get_cvss_severity(),
686
+ affected_packages=osv_vuln.affected,
687
+ fixed_versions=osv_vuln.fixed_versions,
688
+ references=osv_vuln.references,
689
+ published_date=osv_vuln.published,
690
+ last_modified=osv_vuln.modified,
691
+ source="osv",
692
+ )
693
+
694
+ # Optionally enrich with NVD data
695
+ if nvd_client:
696
+ for cve_id, merged_vuln in merged.items():
697
+ if cve_id.startswith("CVE-"):
698
+ try:
699
+ nvd_data = nvd_client.get_cve(cve_id)
700
+ if nvd_data:
701
+ # Prefer NVD CVSS score
702
+ if nvd_data.cvss_score is not None:
703
+ merged_vuln.cvss_score = nvd_data.cvss_score
704
+ merged_vuln.cvss_severity = nvd_data.cvss_severity
705
+
706
+ # Use NVD description if OSV is empty
707
+ if not merged_vuln.description and nvd_data.description:
708
+ merged_vuln.description = nvd_data.description
709
+
710
+ # Merge references
711
+ merged_vuln.references = list(
712
+ set(merged_vuln.references + nvd_data.references)
713
+ )
714
+
715
+ merged_vuln.source = "merged"
716
+ except Exception as e:
717
+ logger.warning(f"Failed to fetch NVD data for {cve_id}: {e}")
718
+
719
+ return list(merged.values())