kekkai-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kekkai/__init__.py +7 -0
- kekkai/cli.py +1038 -0
- kekkai/config.py +403 -0
- kekkai/dojo.py +419 -0
- kekkai/dojo_import.py +213 -0
- kekkai/github/__init__.py +16 -0
- kekkai/github/commenter.py +198 -0
- kekkai/github/models.py +56 -0
- kekkai/github/sanitizer.py +112 -0
- kekkai/installer/__init__.py +39 -0
- kekkai/installer/errors.py +23 -0
- kekkai/installer/extract.py +161 -0
- kekkai/installer/manager.py +252 -0
- kekkai/installer/manifest.py +189 -0
- kekkai/installer/verify.py +86 -0
- kekkai/manifest.py +77 -0
- kekkai/output.py +218 -0
- kekkai/paths.py +46 -0
- kekkai/policy.py +326 -0
- kekkai/runner.py +70 -0
- kekkai/scanners/__init__.py +67 -0
- kekkai/scanners/backends/__init__.py +14 -0
- kekkai/scanners/backends/base.py +73 -0
- kekkai/scanners/backends/docker.py +178 -0
- kekkai/scanners/backends/native.py +240 -0
- kekkai/scanners/base.py +110 -0
- kekkai/scanners/container.py +144 -0
- kekkai/scanners/falco.py +237 -0
- kekkai/scanners/gitleaks.py +237 -0
- kekkai/scanners/semgrep.py +227 -0
- kekkai/scanners/trivy.py +246 -0
- kekkai/scanners/url_policy.py +163 -0
- kekkai/scanners/zap.py +340 -0
- kekkai/threatflow/__init__.py +94 -0
- kekkai/threatflow/artifacts.py +476 -0
- kekkai/threatflow/chunking.py +361 -0
- kekkai/threatflow/core.py +438 -0
- kekkai/threatflow/mermaid.py +374 -0
- kekkai/threatflow/model_adapter.py +491 -0
- kekkai/threatflow/prompts.py +277 -0
- kekkai/threatflow/redaction.py +228 -0
- kekkai/threatflow/sanitizer.py +643 -0
- kekkai/triage/__init__.py +33 -0
- kekkai/triage/app.py +168 -0
- kekkai/triage/audit.py +203 -0
- kekkai/triage/ignore.py +269 -0
- kekkai/triage/models.py +185 -0
- kekkai/triage/screens.py +341 -0
- kekkai/triage/widgets.py +169 -0
- kekkai_cli-1.0.0.dist-info/METADATA +135 -0
- kekkai_cli-1.0.0.dist-info/RECORD +90 -0
- kekkai_cli-1.0.0.dist-info/WHEEL +5 -0
- kekkai_cli-1.0.0.dist-info/entry_points.txt +3 -0
- kekkai_cli-1.0.0.dist-info/top_level.txt +3 -0
- kekkai_core/__init__.py +3 -0
- kekkai_core/ci/__init__.py +11 -0
- kekkai_core/ci/benchmarks.py +354 -0
- kekkai_core/ci/metadata.py +104 -0
- kekkai_core/ci/validators.py +92 -0
- kekkai_core/docker/__init__.py +17 -0
- kekkai_core/docker/metadata.py +153 -0
- kekkai_core/docker/sbom.py +173 -0
- kekkai_core/docker/security.py +158 -0
- kekkai_core/docker/signing.py +135 -0
- kekkai_core/redaction.py +84 -0
- kekkai_core/slsa/__init__.py +13 -0
- kekkai_core/slsa/verify.py +121 -0
- kekkai_core/windows/__init__.py +29 -0
- kekkai_core/windows/chocolatey.py +335 -0
- kekkai_core/windows/installer.py +256 -0
- kekkai_core/windows/scoop.py +165 -0
- kekkai_core/windows/validators.py +220 -0
- portal/__init__.py +19 -0
- portal/api.py +155 -0
- portal/auth.py +103 -0
- portal/enterprise/__init__.py +32 -0
- portal/enterprise/audit.py +435 -0
- portal/enterprise/licensing.py +342 -0
- portal/enterprise/rbac.py +276 -0
- portal/enterprise/saml.py +595 -0
- portal/ops/__init__.py +53 -0
- portal/ops/backup.py +553 -0
- portal/ops/log_shipper.py +469 -0
- portal/ops/monitoring.py +517 -0
- portal/ops/restore.py +469 -0
- portal/ops/secrets.py +408 -0
- portal/ops/upgrade.py +591 -0
- portal/tenants.py +340 -0
- portal/uploads.py +259 -0
- portal/web.py +384 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""GitHub PR commenter for posting scan findings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import httpx # type: ignore[import-not-found,unused-ignore]
|
|
9
|
+
|
|
10
|
+
from ..scanners.base import Finding, Severity
|
|
11
|
+
from .models import GitHubConfig, PRComment, PRCommentResult
|
|
12
|
+
from .sanitizer import escape_markdown, redact_secrets
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Sequence
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
MAX_COMMENTS_PER_PR = 50
|
|
20
|
+
SEVERITY_EMOJI = {
|
|
21
|
+
Severity.CRITICAL: "🔴",
|
|
22
|
+
Severity.HIGH: "🟠",
|
|
23
|
+
Severity.MEDIUM: "🟡",
|
|
24
|
+
Severity.LOW: "🟢",
|
|
25
|
+
Severity.INFO: "🔵",
|
|
26
|
+
Severity.UNKNOWN: "⚪",
|
|
27
|
+
}
|
|
28
|
+
SEVERITY_ORDER = ["critical", "high", "medium", "low", "info", "unknown"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def post_pr_comments(
|
|
32
|
+
findings: Sequence[Finding],
|
|
33
|
+
config: GitHubConfig,
|
|
34
|
+
max_comments: int = MAX_COMMENTS_PER_PR,
|
|
35
|
+
min_severity: str = "medium",
|
|
36
|
+
timeout: float = 60.0,
|
|
37
|
+
) -> PRCommentResult:
|
|
38
|
+
"""Post findings as PR review comments.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
findings: List of findings to post.
|
|
42
|
+
config: GitHub API configuration.
|
|
43
|
+
max_comments: Maximum number of comments to post.
|
|
44
|
+
min_severity: Minimum severity level to include.
|
|
45
|
+
timeout: HTTP request timeout in seconds.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Result containing success status and counts.
|
|
49
|
+
"""
|
|
50
|
+
# Filter and prepare comments
|
|
51
|
+
filtered = _filter_findings(findings, min_severity)
|
|
52
|
+
deduped = _dedupe_by_location(filtered)
|
|
53
|
+
limited = deduped[:max_comments]
|
|
54
|
+
|
|
55
|
+
if not limited:
|
|
56
|
+
return PRCommentResult(
|
|
57
|
+
success=True,
|
|
58
|
+
comments_posted=0,
|
|
59
|
+
comments_skipped=len(findings) - len(limited),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Build comments for findings with file paths
|
|
63
|
+
comments = []
|
|
64
|
+
skipped = 0
|
|
65
|
+
for finding in limited:
|
|
66
|
+
if not finding.file_path:
|
|
67
|
+
skipped += 1
|
|
68
|
+
continue
|
|
69
|
+
comment = PRComment(
|
|
70
|
+
path=finding.file_path,
|
|
71
|
+
line=finding.line or 1,
|
|
72
|
+
body=_format_comment(finding),
|
|
73
|
+
)
|
|
74
|
+
comments.append(comment)
|
|
75
|
+
|
|
76
|
+
if not comments:
|
|
77
|
+
return PRCommentResult(
|
|
78
|
+
success=True,
|
|
79
|
+
comments_posted=0,
|
|
80
|
+
comments_skipped=len(findings),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Post review with comments
|
|
84
|
+
try:
|
|
85
|
+
review_url = _create_review(config, comments, timeout)
|
|
86
|
+
return PRCommentResult(
|
|
87
|
+
success=True,
|
|
88
|
+
comments_posted=len(comments),
|
|
89
|
+
comments_skipped=len(findings) - len(comments),
|
|
90
|
+
review_url=review_url,
|
|
91
|
+
)
|
|
92
|
+
except httpx.HTTPStatusError as e:
|
|
93
|
+
logger.error("GitHub API error: %s", e.response.text)
|
|
94
|
+
return PRCommentResult(
|
|
95
|
+
success=False,
|
|
96
|
+
errors=[f"GitHub API error: {e.response.status_code}"],
|
|
97
|
+
)
|
|
98
|
+
except httpx.RequestError as e:
|
|
99
|
+
logger.error("Request error: %s", e)
|
|
100
|
+
return PRCommentResult(
|
|
101
|
+
success=False,
|
|
102
|
+
errors=[f"Request failed: {e!s}"],
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _create_review(
|
|
107
|
+
config: GitHubConfig,
|
|
108
|
+
comments: list[PRComment],
|
|
109
|
+
timeout: float,
|
|
110
|
+
) -> str | None:
|
|
111
|
+
"""Create a PR review with inline comments."""
|
|
112
|
+
url = f"{config.api_base}/repos/{config.owner}/{config.repo}/pulls/{config.pr_number}/reviews"
|
|
113
|
+
|
|
114
|
+
headers = {
|
|
115
|
+
"Authorization": f"Bearer {config.token}",
|
|
116
|
+
"Accept": "application/vnd.github.v3+json",
|
|
117
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
payload = {
|
|
121
|
+
"event": "COMMENT",
|
|
122
|
+
"body": _format_summary(len(comments)),
|
|
123
|
+
"comments": [c.to_dict() for c in comments],
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
with httpx.Client(timeout=timeout) as client:
|
|
127
|
+
response = client.post(url, headers=headers, json=payload)
|
|
128
|
+
response.raise_for_status()
|
|
129
|
+
data: dict[str, str] = response.json()
|
|
130
|
+
return data.get("html_url")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _format_comment(finding: Finding) -> str:
|
|
134
|
+
"""Format a finding as a safe markdown comment."""
|
|
135
|
+
emoji = SEVERITY_EMOJI.get(finding.severity, "⚪")
|
|
136
|
+
severity_text = finding.severity.value.upper()
|
|
137
|
+
|
|
138
|
+
# Sanitize user-controlled content
|
|
139
|
+
title = escape_markdown(finding.title)
|
|
140
|
+
description = redact_secrets(finding.description)
|
|
141
|
+
description = escape_markdown(description[:500])
|
|
142
|
+
|
|
143
|
+
lines = [
|
|
144
|
+
f"### {emoji} {severity_text}: {title}",
|
|
145
|
+
"",
|
|
146
|
+
f"**Scanner:** {escape_markdown(finding.scanner)}",
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
if finding.rule_id:
|
|
150
|
+
# Rule IDs in code blocks don't need escaping, but sanitize backticks
|
|
151
|
+
rule_id = finding.rule_id.replace("`", "'")
|
|
152
|
+
lines.append(f"**Rule:** `{rule_id}`")
|
|
153
|
+
|
|
154
|
+
if finding.cve:
|
|
155
|
+
cve = finding.cve.replace("`", "'")
|
|
156
|
+
lines.append(f"**CVE:** `{cve}`")
|
|
157
|
+
|
|
158
|
+
if finding.cwe:
|
|
159
|
+
cwe = finding.cwe.replace("`", "'")
|
|
160
|
+
lines.append(f"**CWE:** `{cwe}`")
|
|
161
|
+
|
|
162
|
+
lines.extend(["", description, ""])
|
|
163
|
+
lines.append("---")
|
|
164
|
+
lines.append("<sub>Posted by [Kekkai](https://github.com/kademoslabs/kekkai)</sub>")
|
|
165
|
+
|
|
166
|
+
return "\n".join(lines)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _format_summary(count: int) -> str:
|
|
170
|
+
"""Format the review summary body."""
|
|
171
|
+
return f"🛡️ **Kekkai Security Scan** found {count} finding(s) in this PR."
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _filter_findings(
|
|
175
|
+
findings: Sequence[Finding],
|
|
176
|
+
min_severity: str,
|
|
177
|
+
) -> list[Finding]:
|
|
178
|
+
"""Filter findings by minimum severity level."""
|
|
179
|
+
try:
|
|
180
|
+
min_idx = SEVERITY_ORDER.index(min_severity.lower())
|
|
181
|
+
except ValueError:
|
|
182
|
+
min_idx = 2 # Default to medium
|
|
183
|
+
|
|
184
|
+
return [f for f in findings if SEVERITY_ORDER.index(f.severity.value.lower()) <= min_idx]
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _dedupe_by_location(findings: Sequence[Finding]) -> list[Finding]:
|
|
188
|
+
"""Deduplicate findings by file path and line number."""
|
|
189
|
+
seen: set[tuple[str | None, int | None]] = set()
|
|
190
|
+
result: list[Finding] = []
|
|
191
|
+
|
|
192
|
+
for finding in findings:
|
|
193
|
+
key = (finding.file_path, finding.line)
|
|
194
|
+
if key not in seen:
|
|
195
|
+
seen.add(key)
|
|
196
|
+
result.append(finding)
|
|
197
|
+
|
|
198
|
+
return result
|
kekkai/github/models.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Data models for GitHub PR commenter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class GitHubConfig:
|
|
10
|
+
"""Configuration for GitHub API access."""
|
|
11
|
+
|
|
12
|
+
token: str
|
|
13
|
+
owner: str
|
|
14
|
+
repo: str
|
|
15
|
+
pr_number: int
|
|
16
|
+
api_base: str = "https://api.github.com"
|
|
17
|
+
|
|
18
|
+
def __post_init__(self) -> None:
|
|
19
|
+
if not self.token:
|
|
20
|
+
raise ValueError("GitHub token is required")
|
|
21
|
+
if not self.owner:
|
|
22
|
+
raise ValueError("Repository owner is required")
|
|
23
|
+
if not self.repo:
|
|
24
|
+
raise ValueError("Repository name is required")
|
|
25
|
+
if self.pr_number < 1:
|
|
26
|
+
raise ValueError("PR number must be positive")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class PRComment:
|
|
31
|
+
"""A comment to post on a PR."""
|
|
32
|
+
|
|
33
|
+
path: str
|
|
34
|
+
line: int
|
|
35
|
+
body: str
|
|
36
|
+
side: str = "RIGHT"
|
|
37
|
+
|
|
38
|
+
def to_dict(self) -> dict[str, str | int]:
|
|
39
|
+
"""Convert to GitHub API format."""
|
|
40
|
+
return {
|
|
41
|
+
"path": self.path,
|
|
42
|
+
"line": self.line,
|
|
43
|
+
"body": self.body,
|
|
44
|
+
"side": self.side,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class PRCommentResult:
|
|
50
|
+
"""Result of posting PR comments."""
|
|
51
|
+
|
|
52
|
+
success: bool
|
|
53
|
+
comments_posted: int = 0
|
|
54
|
+
comments_skipped: int = 0
|
|
55
|
+
errors: list[str] = field(default_factory=list)
|
|
56
|
+
review_url: str | None = None
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Sanitization utilities for GitHub PR comments."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
# Markdown special characters that need escaping
|
|
8
|
+
MARKDOWN_SPECIAL_CHARS = r"[\`*_{}\[\]()#+\-.!|>~]"
|
|
9
|
+
|
|
10
|
+
# Patterns for potential secrets (conservative to avoid false positives)
|
|
11
|
+
_AWS_KEY_PATTERN = re.compile(r"AKIA[0-9A-Z]{16}", re.IGNORECASE)
|
|
12
|
+
_API_KEY_PATTERN = re.compile(
|
|
13
|
+
r"(?:api[_-]?key|apikey|secret|password|token)\s*[:=]\s*['\"]?" r"([A-Za-z0-9_\-]{20,})['\"]?",
|
|
14
|
+
re.IGNORECASE,
|
|
15
|
+
)
|
|
16
|
+
_BEARER_PATTERN = re.compile(r"Bearer\s+[A-Za-z0-9_\-\.]{20,}", re.IGNORECASE)
|
|
17
|
+
_JWT_PATTERN = re.compile(r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*")
|
|
18
|
+
_PRIVATE_KEY_PATTERN = re.compile(
|
|
19
|
+
r"-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----.*?" r"-----END\s+(?:RSA\s+)?PRIVATE\s+KEY-----",
|
|
20
|
+
re.DOTALL,
|
|
21
|
+
)
|
|
22
|
+
_GITHUB_TOKEN_PATTERN = re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}")
|
|
23
|
+
_HEX_SECRET_PATTERN = re.compile(r"(?<![A-Fa-f0-9])[A-Fa-f0-9]{40,}(?![A-Fa-f0-9])")
|
|
24
|
+
|
|
25
|
+
# Order matters: specific patterns first, generic last
|
|
26
|
+
SECRET_PATTERNS = [
|
|
27
|
+
# Specific patterns first
|
|
28
|
+
(_JWT_PATTERN, "[JWT_REDACTED]"),
|
|
29
|
+
(_GITHUB_TOKEN_PATTERN, "[GITHUB_TOKEN_REDACTED]"),
|
|
30
|
+
(_AWS_KEY_PATTERN, "[AWS_KEY_REDACTED]"),
|
|
31
|
+
(_PRIVATE_KEY_PATTERN, "[PRIVATE_KEY_REDACTED]"),
|
|
32
|
+
(_BEARER_PATTERN, "Bearer [REDACTED]"),
|
|
33
|
+
# Generic patterns last
|
|
34
|
+
(_API_KEY_PATTERN, "[REDACTED]"),
|
|
35
|
+
(_HEX_SECRET_PATTERN, "[HEX_SECRET_REDACTED]"),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# Patterns for redacting common sensitive values (applied last)
|
|
39
|
+
SENSITIVE_VALUE_PATTERN = re.compile(
|
|
40
|
+
r"(api[_-]?key|apikey|secret|password|credential|auth)\s*[:=]\s*['\"]?([^\s'\"]{8,})['\"]?",
|
|
41
|
+
re.IGNORECASE,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def escape_markdown(text: str) -> str:
|
|
46
|
+
"""Escape markdown special characters to prevent injection.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
text: Raw text that may contain markdown special characters.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Text with markdown characters escaped.
|
|
53
|
+
"""
|
|
54
|
+
if not text:
|
|
55
|
+
return ""
|
|
56
|
+
|
|
57
|
+
# Escape backslashes first to avoid double-escaping
|
|
58
|
+
result = text.replace("\\", "\\\\")
|
|
59
|
+
|
|
60
|
+
# Escape markdown special characters
|
|
61
|
+
for char in "`*_{}[]()#+-.!|>~":
|
|
62
|
+
result = result.replace(char, f"\\{char}")
|
|
63
|
+
|
|
64
|
+
# Remove potential HTML tags
|
|
65
|
+
result = re.sub(r"<[^>]+>", "", result)
|
|
66
|
+
|
|
67
|
+
# Truncate to reasonable length
|
|
68
|
+
max_length = 2000
|
|
69
|
+
if len(result) > max_length:
|
|
70
|
+
result = result[: max_length - 3] + "..."
|
|
71
|
+
|
|
72
|
+
return result
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def redact_secrets(text: str) -> str:
|
|
76
|
+
"""Redact potential secrets from text.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
text: Text that may contain secrets.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Text with secrets redacted.
|
|
83
|
+
"""
|
|
84
|
+
if not text:
|
|
85
|
+
return ""
|
|
86
|
+
|
|
87
|
+
result = text
|
|
88
|
+
|
|
89
|
+
# Apply secret patterns
|
|
90
|
+
for pattern, replacement in SECRET_PATTERNS:
|
|
91
|
+
result = pattern.sub(replacement, result)
|
|
92
|
+
|
|
93
|
+
# Redact sensitive key=value pairs
|
|
94
|
+
result = SENSITIVE_VALUE_PATTERN.sub(r"\1=[REDACTED]", result)
|
|
95
|
+
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def sanitize_for_comment(text: str) -> str:
|
|
100
|
+
"""Full sanitization pipeline for PR comments.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
text: Raw text to sanitize.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Text safe for PR comments.
|
|
107
|
+
"""
|
|
108
|
+
# First redact secrets
|
|
109
|
+
text = redact_secrets(text)
|
|
110
|
+
# Then escape markdown
|
|
111
|
+
text = escape_markdown(text)
|
|
112
|
+
return text
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Tool installer module for automatic binary management."""
|
|
2
|
+
|
|
3
|
+
from .errors import (
|
|
4
|
+
DownloadError,
|
|
5
|
+
ExtractionError,
|
|
6
|
+
InstallerError,
|
|
7
|
+
SecurityError,
|
|
8
|
+
UnsupportedPlatformError,
|
|
9
|
+
)
|
|
10
|
+
from .manager import ToolInstaller, get_installer
|
|
11
|
+
from .manifest import (
|
|
12
|
+
TOOL_MANIFESTS,
|
|
13
|
+
ToolManifest,
|
|
14
|
+
get_download_url,
|
|
15
|
+
get_expected_hash,
|
|
16
|
+
get_manifest,
|
|
17
|
+
get_platform_key,
|
|
18
|
+
validate_manifest_url,
|
|
19
|
+
)
|
|
20
|
+
from .verify import compute_sha256, verify_checksum
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"DownloadError",
|
|
24
|
+
"ExtractionError",
|
|
25
|
+
"InstallerError",
|
|
26
|
+
"SecurityError",
|
|
27
|
+
"TOOL_MANIFESTS",
|
|
28
|
+
"ToolInstaller",
|
|
29
|
+
"ToolManifest",
|
|
30
|
+
"UnsupportedPlatformError",
|
|
31
|
+
"compute_sha256",
|
|
32
|
+
"get_download_url",
|
|
33
|
+
"get_expected_hash",
|
|
34
|
+
"get_installer",
|
|
35
|
+
"get_manifest",
|
|
36
|
+
"get_platform_key",
|
|
37
|
+
"validate_manifest_url",
|
|
38
|
+
"verify_checksum",
|
|
39
|
+
]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Custom exceptions for the installer module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class InstallerError(Exception):
|
|
7
|
+
"""Base exception for installer errors."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SecurityError(InstallerError):
|
|
11
|
+
"""Raised on security verification failure (checksum mismatch, etc.)."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DownloadError(InstallerError):
|
|
15
|
+
"""Raised when download fails."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ExtractionError(InstallerError):
|
|
19
|
+
"""Raised when archive extraction fails."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class UnsupportedPlatformError(InstallerError):
|
|
23
|
+
"""Raised when the current platform is not supported."""
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Archive extraction utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import tarfile
|
|
8
|
+
import zipfile
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .errors import ExtractionError, SecurityError
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _is_safe_path(base_path: Path, target_path: Path) -> bool:
|
|
17
|
+
"""Check if target path is safe (no path traversal).
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
base_path: Base extraction directory.
|
|
21
|
+
target_path: Target path to check.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
True if path is safe.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
target_path.resolve().relative_to(base_path.resolve())
|
|
28
|
+
return True
|
|
29
|
+
except ValueError:
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def extract_tar_gz(archive_path: Path, dest_dir: Path, binary_name: str) -> Path:
|
|
34
|
+
"""Extract a tar.gz archive and return path to binary.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
archive_path: Path to the archive.
|
|
38
|
+
dest_dir: Destination directory.
|
|
39
|
+
binary_name: Name of the binary to extract.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Path to extracted binary.
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
ExtractionError: If extraction fails.
|
|
46
|
+
SecurityError: If archive contains path traversal.
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
with tarfile.open(archive_path, "r:gz") as tar:
|
|
50
|
+
# Security: Check for path traversal
|
|
51
|
+
for member in tar.getmembers():
|
|
52
|
+
member_path = dest_dir / member.name
|
|
53
|
+
if not _is_safe_path(dest_dir, member_path):
|
|
54
|
+
raise SecurityError(f"Path traversal detected in archive: {member.name}")
|
|
55
|
+
|
|
56
|
+
# Find and extract the binary
|
|
57
|
+
binary_member = None
|
|
58
|
+
for member in tar.getmembers():
|
|
59
|
+
if member.name == binary_name or member.name.endswith(f"/{binary_name}"):
|
|
60
|
+
binary_member = member
|
|
61
|
+
break
|
|
62
|
+
|
|
63
|
+
if not binary_member:
|
|
64
|
+
raise ExtractionError(f"Binary '{binary_name}' not found in archive")
|
|
65
|
+
|
|
66
|
+
# Extract just the binary
|
|
67
|
+
tar.extract(binary_member, dest_dir, filter="data")
|
|
68
|
+
|
|
69
|
+
extracted_path = dest_dir / binary_member.name
|
|
70
|
+
if not extracted_path.exists():
|
|
71
|
+
raise ExtractionError(f"Extraction failed: {extracted_path} not found")
|
|
72
|
+
|
|
73
|
+
# Move to final location if nested
|
|
74
|
+
final_path = dest_dir / binary_name
|
|
75
|
+
if extracted_path != final_path:
|
|
76
|
+
extracted_path.rename(final_path)
|
|
77
|
+
|
|
78
|
+
return final_path
|
|
79
|
+
|
|
80
|
+
except tarfile.TarError as e:
|
|
81
|
+
raise ExtractionError(f"Failed to extract tar.gz: {e}") from e
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def extract_zip(archive_path: Path, dest_dir: Path, binary_name: str) -> Path:
|
|
85
|
+
"""Extract a zip archive and return path to binary.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
archive_path: Path to the archive.
|
|
89
|
+
dest_dir: Destination directory.
|
|
90
|
+
binary_name: Name of the binary to extract.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Path to extracted binary.
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
ExtractionError: If extraction fails.
|
|
97
|
+
SecurityError: If archive contains path traversal.
|
|
98
|
+
"""
|
|
99
|
+
try:
|
|
100
|
+
with zipfile.ZipFile(archive_path, "r") as zf:
|
|
101
|
+
# Security: Check for path traversal
|
|
102
|
+
for name in zf.namelist():
|
|
103
|
+
member_path = dest_dir / name
|
|
104
|
+
if not _is_safe_path(dest_dir, member_path):
|
|
105
|
+
raise SecurityError(f"Path traversal detected in archive: {name}")
|
|
106
|
+
|
|
107
|
+
# Find and extract the binary
|
|
108
|
+
binary_name_variants = [binary_name, f"{binary_name}.exe"]
|
|
109
|
+
binary_member = None
|
|
110
|
+
|
|
111
|
+
for name in zf.namelist():
|
|
112
|
+
base_name = os.path.basename(name)
|
|
113
|
+
if base_name in binary_name_variants:
|
|
114
|
+
binary_member = name
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
if not binary_member:
|
|
118
|
+
raise ExtractionError(f"Binary '{binary_name}' not found in archive")
|
|
119
|
+
|
|
120
|
+
# Extract just the binary
|
|
121
|
+
zf.extract(binary_member, dest_dir)
|
|
122
|
+
|
|
123
|
+
extracted_path = dest_dir / binary_member
|
|
124
|
+
if not extracted_path.exists():
|
|
125
|
+
raise ExtractionError(f"Extraction failed: {extracted_path} not found")
|
|
126
|
+
|
|
127
|
+
# Move to final location if nested
|
|
128
|
+
final_name = binary_name
|
|
129
|
+
if extracted_path.suffix == ".exe":
|
|
130
|
+
final_name = f"{binary_name}.exe"
|
|
131
|
+
|
|
132
|
+
final_path = dest_dir / final_name
|
|
133
|
+
if extracted_path != final_path:
|
|
134
|
+
extracted_path.rename(final_path)
|
|
135
|
+
|
|
136
|
+
return final_path
|
|
137
|
+
|
|
138
|
+
except zipfile.BadZipFile as e:
|
|
139
|
+
raise ExtractionError(f"Failed to extract zip: {e}") from e
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def extract_archive(
|
|
143
|
+
archive_path: Path, dest_dir: Path, binary_name: str, archive_type: str
|
|
144
|
+
) -> Path:
|
|
145
|
+
"""Extract an archive and return path to binary.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
archive_path: Path to the archive.
|
|
149
|
+
dest_dir: Destination directory.
|
|
150
|
+
binary_name: Name of the binary to extract.
|
|
151
|
+
archive_type: Type of archive ("tar.gz" or "zip").
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Path to extracted binary.
|
|
155
|
+
"""
|
|
156
|
+
if archive_type == "tar.gz":
|
|
157
|
+
return extract_tar_gz(archive_path, dest_dir, binary_name)
|
|
158
|
+
elif archive_type == "zip":
|
|
159
|
+
return extract_zip(archive_path, dest_dir, binary_name)
|
|
160
|
+
else:
|
|
161
|
+
raise ExtractionError(f"Unsupported archive type: {archive_type}")
|