kekkai-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. kekkai/__init__.py +7 -0
  2. kekkai/cli.py +1038 -0
  3. kekkai/config.py +403 -0
  4. kekkai/dojo.py +419 -0
  5. kekkai/dojo_import.py +213 -0
  6. kekkai/github/__init__.py +16 -0
  7. kekkai/github/commenter.py +198 -0
  8. kekkai/github/models.py +56 -0
  9. kekkai/github/sanitizer.py +112 -0
  10. kekkai/installer/__init__.py +39 -0
  11. kekkai/installer/errors.py +23 -0
  12. kekkai/installer/extract.py +161 -0
  13. kekkai/installer/manager.py +252 -0
  14. kekkai/installer/manifest.py +189 -0
  15. kekkai/installer/verify.py +86 -0
  16. kekkai/manifest.py +77 -0
  17. kekkai/output.py +218 -0
  18. kekkai/paths.py +46 -0
  19. kekkai/policy.py +326 -0
  20. kekkai/runner.py +70 -0
  21. kekkai/scanners/__init__.py +67 -0
  22. kekkai/scanners/backends/__init__.py +14 -0
  23. kekkai/scanners/backends/base.py +73 -0
  24. kekkai/scanners/backends/docker.py +178 -0
  25. kekkai/scanners/backends/native.py +240 -0
  26. kekkai/scanners/base.py +110 -0
  27. kekkai/scanners/container.py +144 -0
  28. kekkai/scanners/falco.py +237 -0
  29. kekkai/scanners/gitleaks.py +237 -0
  30. kekkai/scanners/semgrep.py +227 -0
  31. kekkai/scanners/trivy.py +246 -0
  32. kekkai/scanners/url_policy.py +163 -0
  33. kekkai/scanners/zap.py +340 -0
  34. kekkai/threatflow/__init__.py +94 -0
  35. kekkai/threatflow/artifacts.py +476 -0
  36. kekkai/threatflow/chunking.py +361 -0
  37. kekkai/threatflow/core.py +438 -0
  38. kekkai/threatflow/mermaid.py +374 -0
  39. kekkai/threatflow/model_adapter.py +491 -0
  40. kekkai/threatflow/prompts.py +277 -0
  41. kekkai/threatflow/redaction.py +228 -0
  42. kekkai/threatflow/sanitizer.py +643 -0
  43. kekkai/triage/__init__.py +33 -0
  44. kekkai/triage/app.py +168 -0
  45. kekkai/triage/audit.py +203 -0
  46. kekkai/triage/ignore.py +269 -0
  47. kekkai/triage/models.py +185 -0
  48. kekkai/triage/screens.py +341 -0
  49. kekkai/triage/widgets.py +169 -0
  50. kekkai_cli-1.0.0.dist-info/METADATA +135 -0
  51. kekkai_cli-1.0.0.dist-info/RECORD +90 -0
  52. kekkai_cli-1.0.0.dist-info/WHEEL +5 -0
  53. kekkai_cli-1.0.0.dist-info/entry_points.txt +3 -0
  54. kekkai_cli-1.0.0.dist-info/top_level.txt +3 -0
  55. kekkai_core/__init__.py +3 -0
  56. kekkai_core/ci/__init__.py +11 -0
  57. kekkai_core/ci/benchmarks.py +354 -0
  58. kekkai_core/ci/metadata.py +104 -0
  59. kekkai_core/ci/validators.py +92 -0
  60. kekkai_core/docker/__init__.py +17 -0
  61. kekkai_core/docker/metadata.py +153 -0
  62. kekkai_core/docker/sbom.py +173 -0
  63. kekkai_core/docker/security.py +158 -0
  64. kekkai_core/docker/signing.py +135 -0
  65. kekkai_core/redaction.py +84 -0
  66. kekkai_core/slsa/__init__.py +13 -0
  67. kekkai_core/slsa/verify.py +121 -0
  68. kekkai_core/windows/__init__.py +29 -0
  69. kekkai_core/windows/chocolatey.py +335 -0
  70. kekkai_core/windows/installer.py +256 -0
  71. kekkai_core/windows/scoop.py +165 -0
  72. kekkai_core/windows/validators.py +220 -0
  73. portal/__init__.py +19 -0
  74. portal/api.py +155 -0
  75. portal/auth.py +103 -0
  76. portal/enterprise/__init__.py +32 -0
  77. portal/enterprise/audit.py +435 -0
  78. portal/enterprise/licensing.py +342 -0
  79. portal/enterprise/rbac.py +276 -0
  80. portal/enterprise/saml.py +595 -0
  81. portal/ops/__init__.py +53 -0
  82. portal/ops/backup.py +553 -0
  83. portal/ops/log_shipper.py +469 -0
  84. portal/ops/monitoring.py +517 -0
  85. portal/ops/restore.py +469 -0
  86. portal/ops/secrets.py +408 -0
  87. portal/ops/upgrade.py +591 -0
  88. portal/tenants.py +340 -0
  89. portal/uploads.py +259 -0
  90. portal/web.py +384 -0
@@ -0,0 +1,227 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from .backends import (
8
+ BackendType,
9
+ NativeBackend,
10
+ ToolNotFoundError,
11
+ ToolVersionError,
12
+ detect_tool,
13
+ docker_available,
14
+ )
15
+ from .base import Finding, ScanContext, ScanResult, Severity
16
+ from .container import ContainerConfig, run_container
17
+
18
+ SEMGREP_IMAGE = "returntocorp/semgrep"
19
+ SEMGREP_DIGEST = "sha256:a5a71b85df0c65c58f13e94c0d0ce7d8e7c8d123456789abcdef0123456789ab"
20
+ SCAN_TYPE = "Semgrep JSON Report"
21
+
22
+
23
+ class SemgrepScanner:
24
+ def __init__(
25
+ self,
26
+ image: str = SEMGREP_IMAGE,
27
+ digest: str | None = SEMGREP_DIGEST,
28
+ timeout_seconds: int = 600,
29
+ config: str = "auto",
30
+ backend: BackendType | None = None,
31
+ ) -> None:
32
+ self._image = image
33
+ self._digest = digest
34
+ self._timeout = timeout_seconds
35
+ self._config = config
36
+ self._backend = backend
37
+ self._resolved_backend: BackendType | None = None
38
+
39
+ @property
40
+ def name(self) -> str:
41
+ return "semgrep"
42
+
43
+ @property
44
+ def scan_type(self) -> str:
45
+ return SCAN_TYPE
46
+
47
+ @property
48
+ def backend_used(self) -> BackendType | None:
49
+ """Return the backend used for the last scan."""
50
+ return self._resolved_backend
51
+
52
+ def _select_backend(self) -> BackendType:
53
+ """Select backend: explicit choice, or auto-detect (Docker preferred)."""
54
+ if self._backend is not None:
55
+ return self._backend
56
+
57
+ available, _ = docker_available()
58
+ if available:
59
+ return BackendType.DOCKER
60
+
61
+ try:
62
+ detect_tool("semgrep")
63
+ return BackendType.NATIVE
64
+ except (ToolNotFoundError, ToolVersionError):
65
+ return BackendType.DOCKER
66
+
67
+ def run(self, ctx: ScanContext) -> ScanResult:
68
+ backend = self._select_backend()
69
+ self._resolved_backend = backend
70
+
71
+ if backend == BackendType.NATIVE:
72
+ return self._run_native(ctx)
73
+ return self._run_docker(ctx)
74
+
75
+ def _run_docker(self, ctx: ScanContext) -> ScanResult:
76
+ """Run Semgrep in Docker container."""
77
+ output_file = ctx.output_dir / "semgrep-results.json"
78
+ config = ContainerConfig(
79
+ image=self._image,
80
+ image_digest=self._digest,
81
+ read_only=True,
82
+ network_disabled=False,
83
+ no_new_privileges=True,
84
+ )
85
+
86
+ command = [
87
+ "semgrep",
88
+ "scan",
89
+ "--config",
90
+ self._config,
91
+ "--json",
92
+ "--output",
93
+ "/output/semgrep-results.json",
94
+ "/repo",
95
+ ]
96
+
97
+ result = run_container(
98
+ config=config,
99
+ repo_path=ctx.repo_path,
100
+ output_path=ctx.output_dir,
101
+ command=command,
102
+ timeout_seconds=self._timeout,
103
+ )
104
+
105
+ return self._process_result(
106
+ result.timed_out, result.duration_ms, result.stderr, output_file
107
+ )
108
+
109
+ def _run_native(self, ctx: ScanContext) -> ScanResult:
110
+ """Run Semgrep natively."""
111
+ try:
112
+ tool_info = detect_tool("semgrep")
113
+ except (ToolNotFoundError, ToolVersionError) as e:
114
+ return ScanResult(
115
+ scanner=self.name,
116
+ success=False,
117
+ findings=[],
118
+ error=str(e),
119
+ duration_ms=0,
120
+ )
121
+
122
+ output_file = ctx.output_dir / "semgrep-results.json"
123
+ backend = NativeBackend()
124
+
125
+ args = [
126
+ "scan",
127
+ "--config",
128
+ self._config,
129
+ "--json",
130
+ "--output",
131
+ str(output_file),
132
+ str(ctx.repo_path),
133
+ ]
134
+
135
+ result = backend.execute(
136
+ tool=tool_info.path,
137
+ args=args,
138
+ repo_path=ctx.repo_path,
139
+ output_path=ctx.output_dir,
140
+ timeout_seconds=self._timeout,
141
+ network_required=True,
142
+ )
143
+
144
+ return self._process_result(
145
+ result.timed_out, result.duration_ms, result.stderr, output_file
146
+ )
147
+
148
+ def _process_result(
149
+ self, timed_out: bool, duration_ms: int, stderr: str, output_file: Path
150
+ ) -> ScanResult:
151
+ """Process scan result from either backend."""
152
+ if timed_out:
153
+ return ScanResult(
154
+ scanner=self.name,
155
+ success=False,
156
+ findings=[],
157
+ error="Scan timed out",
158
+ duration_ms=duration_ms,
159
+ )
160
+
161
+ if not output_file.exists():
162
+ return ScanResult(
163
+ scanner=self.name,
164
+ success=False,
165
+ findings=[],
166
+ error=stderr or "No output file produced",
167
+ duration_ms=duration_ms,
168
+ )
169
+
170
+ try:
171
+ findings = self.parse(output_file.read_text())
172
+ except (json.JSONDecodeError, KeyError) as exc:
173
+ return ScanResult(
174
+ scanner=self.name,
175
+ success=False,
176
+ findings=[],
177
+ raw_output_path=output_file,
178
+ error=f"Parse error: {exc}",
179
+ duration_ms=duration_ms,
180
+ )
181
+
182
+ return ScanResult(
183
+ scanner=self.name,
184
+ success=True,
185
+ findings=findings,
186
+ raw_output_path=output_file,
187
+ duration_ms=duration_ms,
188
+ )
189
+
190
+ def parse(self, raw_output: str) -> list[Finding]:
191
+ data = json.loads(raw_output)
192
+ findings: list[Finding] = []
193
+
194
+ for result in data.get("results", []):
195
+ findings.append(self._parse_result(result))
196
+
197
+ return findings
198
+
199
+ def _parse_result(self, result: dict[str, Any]) -> Finding:
200
+ extra_data = result.get("extra", {})
201
+ metadata = extra_data.get("metadata", {})
202
+
203
+ severity_str = extra_data.get("severity", "warning")
204
+ if severity_str == "ERROR":
205
+ severity = Severity.HIGH
206
+ elif severity_str == "WARNING":
207
+ severity = Severity.MEDIUM
208
+ else:
209
+ severity = Severity.from_string(severity_str)
210
+
211
+ cwe_list = metadata.get("cwe", [])
212
+ cwe = cwe_list[0] if cwe_list else None
213
+
214
+ return Finding(
215
+ scanner=self.name,
216
+ title=metadata.get("message") or result.get("check_id", "Semgrep finding"),
217
+ severity=severity,
218
+ description=extra_data.get("message", ""),
219
+ file_path=result.get("path"),
220
+ line=result.get("start", {}).get("line"),
221
+ rule_id=result.get("check_id"),
222
+ cwe=cwe,
223
+ extra={
224
+ "fingerprint": extra_data.get("fingerprint", ""),
225
+ "fix": extra_data.get("fix", ""),
226
+ },
227
+ )
@@ -0,0 +1,246 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import Any
5
+
6
+ from .backends import (
7
+ BackendType,
8
+ NativeBackend,
9
+ ToolNotFoundError,
10
+ ToolVersionError,
11
+ detect_tool,
12
+ docker_available,
13
+ )
14
+ from .base import Finding, ScanContext, ScanResult, Severity
15
+ from .container import ContainerConfig, run_container
16
+
17
+ TRIVY_IMAGE = "aquasec/trivy"
18
+ TRIVY_DIGEST = "sha256:e9d62d670b10c9f78bb7c61d5c1f6e0bb32fc8bd0f6e1a7dd0c4e6b7f5df0a30"
19
+ SCAN_TYPE = "Trivy Scan"
20
+
21
+
22
+ class TrivyScanner:
23
+ def __init__(
24
+ self,
25
+ image: str = TRIVY_IMAGE,
26
+ digest: str | None = TRIVY_DIGEST,
27
+ timeout_seconds: int = 600,
28
+ backend: BackendType | None = None,
29
+ ) -> None:
30
+ self._image = image
31
+ self._digest = digest
32
+ self._timeout = timeout_seconds
33
+ self._backend = backend
34
+ self._resolved_backend: BackendType | None = None
35
+
36
+ @property
37
+ def name(self) -> str:
38
+ return "trivy"
39
+
40
+ @property
41
+ def scan_type(self) -> str:
42
+ return SCAN_TYPE
43
+
44
+ @property
45
+ def backend_used(self) -> BackendType | None:
46
+ """Return the backend used for the last scan."""
47
+ return self._resolved_backend
48
+
49
+ def _select_backend(self) -> BackendType:
50
+ """Select backend: explicit choice, or auto-detect (Docker preferred)."""
51
+ if self._backend is not None:
52
+ return self._backend
53
+
54
+ available, _ = docker_available()
55
+ if available:
56
+ return BackendType.DOCKER
57
+
58
+ try:
59
+ detect_tool("trivy")
60
+ return BackendType.NATIVE
61
+ except (ToolNotFoundError, ToolVersionError):
62
+ return BackendType.DOCKER
63
+
64
+ def run(self, ctx: ScanContext) -> ScanResult:
65
+ backend = self._select_backend()
66
+ self._resolved_backend = backend
67
+
68
+ if backend == BackendType.NATIVE:
69
+ return self._run_native(ctx)
70
+ return self._run_docker(ctx)
71
+
72
+ def _run_docker(self, ctx: ScanContext) -> ScanResult:
73
+ """Run Trivy in Docker container."""
74
+ output_file = ctx.output_dir / "trivy-results.json"
75
+ config = ContainerConfig(
76
+ image=self._image,
77
+ image_digest=self._digest,
78
+ read_only=True,
79
+ network_disabled=False,
80
+ no_new_privileges=True,
81
+ )
82
+
83
+ command = [
84
+ "fs",
85
+ "--format",
86
+ "json",
87
+ "--output",
88
+ "/output/trivy-results.json",
89
+ "--severity",
90
+ "CRITICAL,HIGH,MEDIUM,LOW",
91
+ "--scanners",
92
+ "vuln,secret,misconfig",
93
+ "/repo",
94
+ ]
95
+
96
+ result = run_container(
97
+ config=config,
98
+ repo_path=ctx.repo_path,
99
+ output_path=ctx.output_dir,
100
+ command=command,
101
+ timeout_seconds=self._timeout,
102
+ )
103
+
104
+ return self._process_result(
105
+ result.timed_out, result.duration_ms, result.stderr, output_file
106
+ )
107
+
108
+ def _run_native(self, ctx: ScanContext) -> ScanResult:
109
+ """Run Trivy natively."""
110
+ try:
111
+ tool_info = detect_tool("trivy")
112
+ except (ToolNotFoundError, ToolVersionError) as e:
113
+ return ScanResult(
114
+ scanner=self.name,
115
+ success=False,
116
+ findings=[],
117
+ error=str(e),
118
+ duration_ms=0,
119
+ )
120
+
121
+ output_file = ctx.output_dir / "trivy-results.json"
122
+ backend = NativeBackend()
123
+
124
+ args = [
125
+ "fs",
126
+ "--format",
127
+ "json",
128
+ "--output",
129
+ str(output_file),
130
+ "--severity",
131
+ "CRITICAL,HIGH,MEDIUM,LOW",
132
+ "--scanners",
133
+ "vuln,secret,misconfig",
134
+ str(ctx.repo_path),
135
+ ]
136
+
137
+ result = backend.execute(
138
+ tool=tool_info.path,
139
+ args=args,
140
+ repo_path=ctx.repo_path,
141
+ output_path=ctx.output_dir,
142
+ timeout_seconds=self._timeout,
143
+ network_required=True,
144
+ )
145
+
146
+ return self._process_result(
147
+ result.timed_out, result.duration_ms, result.stderr, output_file
148
+ )
149
+
150
+ def _process_result(
151
+ self, timed_out: bool, duration_ms: int, stderr: str, output_file: Any
152
+ ) -> ScanResult:
153
+ """Process scan result from either backend."""
154
+ if timed_out:
155
+ return ScanResult(
156
+ scanner=self.name,
157
+ success=False,
158
+ findings=[],
159
+ error="Scan timed out",
160
+ duration_ms=duration_ms,
161
+ )
162
+
163
+ if not output_file.exists():
164
+ return ScanResult(
165
+ scanner=self.name,
166
+ success=False,
167
+ findings=[],
168
+ error=stderr or "No output file produced",
169
+ duration_ms=duration_ms,
170
+ )
171
+
172
+ try:
173
+ findings = self.parse(output_file.read_text())
174
+ except (json.JSONDecodeError, KeyError) as exc:
175
+ return ScanResult(
176
+ scanner=self.name,
177
+ success=False,
178
+ findings=[],
179
+ raw_output_path=output_file,
180
+ error=f"Parse error: {exc}",
181
+ duration_ms=duration_ms,
182
+ )
183
+
184
+ return ScanResult(
185
+ scanner=self.name,
186
+ success=True,
187
+ findings=findings,
188
+ raw_output_path=output_file,
189
+ duration_ms=duration_ms,
190
+ )
191
+
192
+ def parse(self, raw_output: str) -> list[Finding]:
193
+ data = json.loads(raw_output)
194
+ findings: list[Finding] = []
195
+
196
+ results = data.get("Results", [])
197
+ for target in results:
198
+ target_name = target.get("Target", "")
199
+ target_type = target.get("Type", "")
200
+
201
+ for vuln in target.get("Vulnerabilities", []):
202
+ findings.append(self._parse_vulnerability(vuln, target_name, target_type))
203
+
204
+ for secret in target.get("Secrets", []):
205
+ findings.append(self._parse_secret(secret, target_name))
206
+
207
+ for misconfig in target.get("Misconfigurations", []):
208
+ findings.append(self._parse_misconfig(misconfig, target_name))
209
+
210
+ return findings
211
+
212
+ def _parse_vulnerability(self, vuln: dict[str, Any], target: str, target_type: str) -> Finding:
213
+ return Finding(
214
+ scanner=self.name,
215
+ title=vuln.get("Title") or vuln.get("VulnerabilityID", "Unknown"),
216
+ severity=Severity.from_string(vuln.get("Severity", "unknown")),
217
+ description=vuln.get("Description", ""),
218
+ file_path=target,
219
+ cve=vuln.get("VulnerabilityID"),
220
+ package_name=vuln.get("PkgName"),
221
+ package_version=vuln.get("InstalledVersion"),
222
+ fixed_version=vuln.get("FixedVersion"),
223
+ extra={"target_type": target_type},
224
+ )
225
+
226
+ def _parse_secret(self, secret: dict[str, Any], target: str) -> Finding:
227
+ return Finding(
228
+ scanner=self.name,
229
+ title=secret.get("Title", "Secret detected"),
230
+ severity=Severity.from_string(secret.get("Severity", "high")),
231
+ description=secret.get("Match", ""),
232
+ file_path=target,
233
+ line=secret.get("StartLine"),
234
+ rule_id=secret.get("RuleID"),
235
+ )
236
+
237
+ def _parse_misconfig(self, misconfig: dict[str, Any], target: str) -> Finding:
238
+ return Finding(
239
+ scanner=self.name,
240
+ title=misconfig.get("Title", "Misconfiguration"),
241
+ severity=Severity.from_string(misconfig.get("Severity", "medium")),
242
+ description=misconfig.get("Description", ""),
243
+ file_path=target,
244
+ rule_id=misconfig.get("ID"),
245
+ extra={"resolution": misconfig.get("Resolution", "")},
246
+ )
@@ -0,0 +1,163 @@
1
+ from __future__ import annotations
2
+
3
+ import ipaddress
4
+ import socket
5
+ import urllib.parse
6
+ from dataclasses import dataclass, field
7
+
8
+
9
+ class UrlPolicyError(ValueError):
10
+ """Raised when a URL fails policy validation."""
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class UrlPolicy:
15
+ """URL validation policy for DAST scanning targets.
16
+
17
+ By default, blocks all private/internal IP ranges (SSRF protection).
18
+ Can be configured with explicit allowlist patterns.
19
+ """
20
+
21
+ allow_private_ips: bool = False
22
+ allowed_domains: frozenset[str] = field(default_factory=frozenset)
23
+ blocked_domains: frozenset[str] = field(default_factory=frozenset)
24
+ max_redirects: int = 2
25
+ allowed_schemes: frozenset[str] = field(default_factory=lambda: frozenset({"http", "https"}))
26
+
27
+
28
+ def validate_target_url(url: str, policy: UrlPolicy | None = None) -> str:
29
+ """Validate and normalize a target URL for DAST scanning.
30
+
31
+ Args:
32
+ url: The URL to validate
33
+ policy: Optional URL policy (uses default restrictive policy if None)
34
+
35
+ Returns:
36
+ Normalized URL string
37
+
38
+ Raises:
39
+ UrlPolicyError: If URL fails validation
40
+ """
41
+ policy = policy or UrlPolicy()
42
+
43
+ parsed = urllib.parse.urlsplit(url)
44
+ scheme = parsed.scheme.lower()
45
+
46
+ if scheme not in policy.allowed_schemes:
47
+ raise UrlPolicyError(f"unsupported scheme: {scheme}")
48
+
49
+ if not parsed.netloc:
50
+ raise UrlPolicyError("missing host")
51
+
52
+ if parsed.username or parsed.password:
53
+ raise UrlPolicyError("credentials in URL not allowed")
54
+
55
+ hostname = parsed.hostname
56
+ if not hostname:
57
+ raise UrlPolicyError("missing hostname")
58
+
59
+ hostname_lower = hostname.lower()
60
+
61
+ # Check blocked domains
62
+ if policy.blocked_domains:
63
+ for blocked in policy.blocked_domains:
64
+ if hostname_lower == blocked or hostname_lower.endswith(f".{blocked}"):
65
+ raise UrlPolicyError(f"blocked domain: {hostname}")
66
+
67
+ # Check localhost variants
68
+ if hostname_lower in {"localhost"} or hostname_lower.endswith(".local"):
69
+ raise UrlPolicyError("local hostnames are blocked")
70
+
71
+ # Check allowed domains (if specified, acts as allowlist)
72
+ if policy.allowed_domains:
73
+ allowed = False
74
+ for domain in policy.allowed_domains:
75
+ if hostname_lower == domain or hostname_lower.endswith(f".{domain}"):
76
+ allowed = True
77
+ break
78
+ if not allowed:
79
+ raise UrlPolicyError(f"domain not in allowlist: {hostname}")
80
+
81
+ # Validate IP addresses
82
+ if _is_ip_literal(hostname):
83
+ ip = ipaddress.ip_address(hostname)
84
+ if not policy.allow_private_ips and _is_blocked_ip(ip):
85
+ raise UrlPolicyError(f"private/internal IP blocked: {hostname}")
86
+ else:
87
+ # Resolve hostname and check all IPs
88
+ resolved = _resolve_host(hostname)
89
+ if not resolved:
90
+ raise UrlPolicyError(f"hostname resolution failed: {hostname}")
91
+ if not policy.allow_private_ips:
92
+ for ip in resolved:
93
+ if _is_blocked_ip(ip):
94
+ raise UrlPolicyError(f"hostname resolves to blocked IP: {hostname} -> {ip}")
95
+
96
+ # Normalize the URL
97
+ normalized = urllib.parse.urlunsplit(
98
+ (scheme, parsed.netloc, parsed.path or "/", parsed.query, "")
99
+ )
100
+ return normalized
101
+
102
+
103
+ def _is_ip_literal(hostname: str) -> bool:
104
+ """Check if hostname is an IP address literal."""
105
+ try:
106
+ ipaddress.ip_address(hostname)
107
+ return True
108
+ except ValueError:
109
+ return False
110
+
111
+
112
+ def _resolve_host(hostname: str) -> list[ipaddress.IPv4Address | ipaddress.IPv6Address]:
113
+ """Resolve hostname to IP addresses."""
114
+ try:
115
+ infos = socket.getaddrinfo(hostname, None)
116
+ except socket.gaierror:
117
+ return []
118
+
119
+ resolved: list[ipaddress.IPv4Address | ipaddress.IPv6Address] = []
120
+ for info in infos:
121
+ sockaddr = info[4]
122
+ if not sockaddr:
123
+ continue
124
+ address = sockaddr[0]
125
+ try:
126
+ resolved.append(ipaddress.ip_address(address))
127
+ except ValueError:
128
+ continue
129
+ return resolved
130
+
131
+
132
+ def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
133
+ """Check if IP is in a blocked range (non-global/public)."""
134
+ # Block all non-global IPs by default (private, loopback, link-local, etc.)
135
+ return not ip.is_global
136
+
137
+
138
+ def is_private_ip_range(cidr: str) -> bool:
139
+ """Check if a CIDR range is private/internal."""
140
+ try:
141
+ network = ipaddress.ip_network(cidr, strict=False)
142
+ # A range is "private" if it's not fully global
143
+ return not all(
144
+ ipaddress.ip_address(int(network.network_address) + i).is_global
145
+ for i in range(min(256, network.num_addresses))
146
+ )
147
+ except ValueError:
148
+ return True # Invalid CIDR = blocked
149
+
150
+
151
+ # Common private/internal CIDR ranges for reference
152
+ PRIVATE_CIDRS = frozenset(
153
+ {
154
+ "10.0.0.0/8", # RFC 1918
155
+ "172.16.0.0/12", # RFC 1918
156
+ "192.168.0.0/16", # RFC 1918
157
+ "127.0.0.0/8", # Loopback
158
+ "169.254.0.0/16", # Link-local
159
+ "::1/128", # IPv6 loopback
160
+ "fe80::/10", # IPv6 link-local
161
+ "fc00::/7", # IPv6 ULA
162
+ }
163
+ )