sentro 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sentro/__init__.py +3 -0
- sentro/__main__.py +4 -0
- sentro/_version.py +1 -0
- sentro/cli.py +195 -0
- sentro/config.py +133 -0
- sentro/data/popular_packages.txt +259 -0
- sentro/extraction/__init__.py +0 -0
- sentro/extraction/extractor.py +27 -0
- sentro/extraction/sdist_extractor.py +91 -0
- sentro/extraction/wheel_extractor.py +54 -0
- sentro/installer.py +169 -0
- sentro/models.py +70 -0
- sentro/orchestrator.py +140 -0
- sentro/pypi/__init__.py +0 -0
- sentro/pypi/client.py +84 -0
- sentro/pypi/downloader.py +70 -0
- sentro/reporting/__init__.py +0 -0
- sentro/reporting/json_reporter.py +41 -0
- sentro/reporting/reporter.py +22 -0
- sentro/reporting/text_reporter.py +95 -0
- sentro/scanners/__init__.py +0 -0
- sentro/scanners/base.py +48 -0
- sentro/scanners/dependency_confusion.py +56 -0
- sentro/scanners/malicious_code.py +332 -0
- sentro/scanners/metadata.py +133 -0
- sentro/scanners/obfuscation.py +173 -0
- sentro/scanners/setup_hooks.py +158 -0
- sentro/scanners/typosquatting.py +112 -0
- sentro-0.1.0.dist-info/METADATA +230 -0
- sentro-0.1.0.dist-info/RECORD +32 -0
- sentro-0.1.0.dist-info/WHEEL +4 -0
- sentro-0.1.0.dist-info/entry_points.txt +2 -0
sentro/__init__.py
ADDED
sentro/__main__.py
ADDED
sentro/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
sentro/cli.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""CLI entry point."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from ._version import __version__
|
|
13
|
+
from .config import load_config
|
|
14
|
+
from .installer import InstallerType, detect_installer
|
|
15
|
+
from .models import RiskLevel
|
|
16
|
+
from .orchestrator import ScanOrchestrator
|
|
17
|
+
from .reporting.reporter import render_report
|
|
18
|
+
|
|
19
|
+
_console = Console(stderr=True)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.group()
|
|
23
|
+
@click.version_option(__version__, prog_name="sentro")
|
|
24
|
+
def cli() -> None:
|
|
25
|
+
"""sentro: pip with a security conscience.
|
|
26
|
+
|
|
27
|
+
Scans Python packages for malicious code before installing them.
|
|
28
|
+
Detects typosquatting, obfuscated payloads, malicious install hooks,
|
|
29
|
+
dependency confusion, and more.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@cli.command(
|
|
34
|
+
name="install",
|
|
35
|
+
context_settings={"ignore_unknown_options": True, "allow_extra_args": True},
|
|
36
|
+
)
|
|
37
|
+
@click.argument("packages", nargs=-1, required=True)
|
|
38
|
+
@click.option(
|
|
39
|
+
"--strict",
|
|
40
|
+
is_flag=True,
|
|
41
|
+
envvar="SENTRO_STRICT",
|
|
42
|
+
help="Block installation if any package scores DANGER.",
|
|
43
|
+
)
|
|
44
|
+
@click.option(
|
|
45
|
+
"--no-install",
|
|
46
|
+
is_flag=True,
|
|
47
|
+
help="Scan only — do not invoke the package installer.",
|
|
48
|
+
)
|
|
49
|
+
@click.option(
|
|
50
|
+
"--skip-scan",
|
|
51
|
+
is_flag=True,
|
|
52
|
+
help="Skip all scanning and forward directly to the installer.",
|
|
53
|
+
)
|
|
54
|
+
@click.option(
|
|
55
|
+
"--output-format",
|
|
56
|
+
type=click.Choice(["text", "json"]),
|
|
57
|
+
default=None,
|
|
58
|
+
envvar="SENTRO_OUTPUT_FORMAT",
|
|
59
|
+
help="Output format for the scan report.",
|
|
60
|
+
)
|
|
61
|
+
@click.option(
|
|
62
|
+
"--installer",
|
|
63
|
+
type=click.Choice(["pip", "uv", "conda", "mamba", "poetry", "pipenv", "pdm", "auto"]),
|
|
64
|
+
default="auto",
|
|
65
|
+
envvar="SENTRO_INSTALLER",
|
|
66
|
+
help="Package installer to use after scanning. Defaults to auto-detect.",
|
|
67
|
+
show_default=True,
|
|
68
|
+
)
|
|
69
|
+
@click.option(
|
|
70
|
+
"--config",
|
|
71
|
+
"config_file",
|
|
72
|
+
type=click.Path(exists=True, path_type=Path),
|
|
73
|
+
default=None,
|
|
74
|
+
help="Path to a TOML config file.",
|
|
75
|
+
)
|
|
76
|
+
@click.pass_context
|
|
77
|
+
def install_cmd(
|
|
78
|
+
ctx: click.Context,
|
|
79
|
+
packages: tuple[str, ...],
|
|
80
|
+
strict: bool,
|
|
81
|
+
no_install: bool,
|
|
82
|
+
skip_scan: bool,
|
|
83
|
+
output_format: Optional[str],
|
|
84
|
+
installer: str,
|
|
85
|
+
config_file: Optional[Path],
|
|
86
|
+
) -> None:
|
|
87
|
+
"""Install PACKAGES after scanning them for malicious code.
|
|
88
|
+
|
|
89
|
+
Unknown options (e.g. --index-url, --constraint) are forwarded to the
|
|
90
|
+
package installer verbatim.
|
|
91
|
+
|
|
92
|
+
Examples:
|
|
93
|
+
|
|
94
|
+
sentro install requests
|
|
95
|
+
|
|
96
|
+
sentro install requests==2.28.0 --strict
|
|
97
|
+
|
|
98
|
+
sentro install numpy --no-install --output-format json
|
|
99
|
+
|
|
100
|
+
sentro install mypackage --installer uv
|
|
101
|
+
"""
|
|
102
|
+
cli_overrides = {}
|
|
103
|
+
if strict:
|
|
104
|
+
cli_overrides["strict"] = True
|
|
105
|
+
if output_format:
|
|
106
|
+
cli_overrides["output_format"] = output_format
|
|
107
|
+
|
|
108
|
+
config = load_config(cli_overrides=cli_overrides, config_file=config_file)
|
|
109
|
+
|
|
110
|
+
# Resolve installer
|
|
111
|
+
if skip_scan:
|
|
112
|
+
resolved_installer = _resolve_installer(installer)
|
|
113
|
+
rc = _forward_to_installer(resolved_installer, list(packages), ctx.args)
|
|
114
|
+
sys.exit(rc)
|
|
115
|
+
|
|
116
|
+
orchestrator = ScanOrchestrator(config=config)
|
|
117
|
+
out_console = Console()
|
|
118
|
+
|
|
119
|
+
blocked = False
|
|
120
|
+
scanned_packages: list[str] = []
|
|
121
|
+
|
|
122
|
+
for package_spec in packages:
|
|
123
|
+
name, _, version = package_spec.partition("==")
|
|
124
|
+
name = name.strip()
|
|
125
|
+
version = version.strip() or None
|
|
126
|
+
|
|
127
|
+
_console.print(f"[dim]Scanning[/dim] [bold]{package_spec}[/bold]...", highlight=False)
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
report = orchestrator.scan_package(name, version)
|
|
131
|
+
except Exception as exc:
|
|
132
|
+
_console.print(f"[red]Error scanning {package_spec}: {exc}[/red]")
|
|
133
|
+
if config.strict:
|
|
134
|
+
sys.exit(2)
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
if config.output_format == "json":
|
|
138
|
+
from .reporting.json_reporter import render_json_report
|
|
139
|
+
click.echo(render_json_report(report, config.thresholds))
|
|
140
|
+
else:
|
|
141
|
+
from .reporting.text_reporter import render_text_report
|
|
142
|
+
render_text_report(report, config.thresholds, console=out_console)
|
|
143
|
+
|
|
144
|
+
level = report.risk_level(config.thresholds)
|
|
145
|
+
if level == RiskLevel.DANGER and config.strict:
|
|
146
|
+
blocked = True
|
|
147
|
+
else:
|
|
148
|
+
scanned_packages.append(package_spec)
|
|
149
|
+
|
|
150
|
+
if blocked:
|
|
151
|
+
_console.print(
|
|
152
|
+
"[bold red]Installation BLOCKED[/bold red] — one or more packages scored DANGER "
|
|
153
|
+
"and --strict mode is enabled."
|
|
154
|
+
)
|
|
155
|
+
sys.exit(1)
|
|
156
|
+
|
|
157
|
+
if no_install:
|
|
158
|
+
sys.exit(0)
|
|
159
|
+
|
|
160
|
+
# Determine which installer to use
|
|
161
|
+
resolved_installer = _resolve_installer(installer)
|
|
162
|
+
_console.print(
|
|
163
|
+
f"[dim]Installing via[/dim] [bold]{resolved_installer.value}[/bold]..."
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
packages_to_install = list(packages) # install all, including warned ones
|
|
167
|
+
rc = _forward_to_installer(resolved_installer, packages_to_install, ctx.args)
|
|
168
|
+
sys.exit(rc)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@cli.command(name="detect-installer")
|
|
172
|
+
def detect_installer_cmd() -> None:
|
|
173
|
+
"""Show which package installer would be used automatically."""
|
|
174
|
+
inst = detect_installer()
|
|
175
|
+
click.echo(f"Detected installer: {inst.value}")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _resolve_installer(installer_str: str) -> InstallerType:
|
|
179
|
+
if installer_str == "auto":
|
|
180
|
+
return detect_installer()
|
|
181
|
+
for inst in InstallerType:
|
|
182
|
+
if inst.value == installer_str:
|
|
183
|
+
return inst
|
|
184
|
+
return InstallerType.PIP
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _forward_to_installer(
|
|
188
|
+
installer: InstallerType,
|
|
189
|
+
packages: list[str],
|
|
190
|
+
extra_args: list[str],
|
|
191
|
+
) -> int:
|
|
192
|
+
from .installer import build_install_command
|
|
193
|
+
import subprocess
|
|
194
|
+
cmd = build_install_command(installer, packages, extra_args)
|
|
195
|
+
return subprocess.run(cmd).returncode
|
sentro/config.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Configuration loading with multi-source merge chain."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
if sys.version_info >= (3, 11):
|
|
12
|
+
import tomllib
|
|
13
|
+
else:
|
|
14
|
+
try:
|
|
15
|
+
import tomllib
|
|
16
|
+
except ImportError:
|
|
17
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class Config:
|
|
22
|
+
strict: bool = False
|
|
23
|
+
thresholds: dict = field(default_factory=lambda: {"warning": 30, "danger": 70})
|
|
24
|
+
whitelist_packages: list[str] = field(default_factory=list)
|
|
25
|
+
scanners_enabled: list[str] = field(default_factory=list) # empty = all enabled
|
|
26
|
+
scanners_disabled: list[str] = field(default_factory=list)
|
|
27
|
+
pypi_timeout: int = 10
|
|
28
|
+
prefer_wheel: bool = True
|
|
29
|
+
output_format: str = "text"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_config(
|
|
33
|
+
cli_overrides: Optional[dict[str, Any]] = None,
|
|
34
|
+
config_file: Optional[Path] = None,
|
|
35
|
+
) -> Config:
|
|
36
|
+
"""
|
|
37
|
+
Merge chain (lowest → highest priority):
|
|
38
|
+
1. Built-in defaults
|
|
39
|
+
2. ~/.config/sentro/config.toml
|
|
40
|
+
3. pyproject.toml [tool.sentro] in cwd
|
|
41
|
+
4. .sentro.toml in cwd
|
|
42
|
+
5. Explicit --config path
|
|
43
|
+
6. SENTRO_* environment variables
|
|
44
|
+
7. CLI flags
|
|
45
|
+
"""
|
|
46
|
+
merged: dict[str, Any] = {}
|
|
47
|
+
|
|
48
|
+
# 2. User-level config
|
|
49
|
+
user_cfg = Path.home() / ".config" / "sentro" / "config.toml"
|
|
50
|
+
if user_cfg.exists():
|
|
51
|
+
merged.update(_load_toml_section(user_cfg))
|
|
52
|
+
|
|
53
|
+
# 3. pyproject.toml in cwd
|
|
54
|
+
cwd_pyproject = Path.cwd() / "pyproject.toml"
|
|
55
|
+
if cwd_pyproject.exists():
|
|
56
|
+
merged.update(_load_toml_section(cwd_pyproject, "sentro"))
|
|
57
|
+
|
|
58
|
+
# 4. .sentro.toml in cwd
|
|
59
|
+
cwd_sentro = Path.cwd() / ".sentro.toml"
|
|
60
|
+
if cwd_sentro.exists():
|
|
61
|
+
merged.update(_load_toml_section(cwd_sentro))
|
|
62
|
+
|
|
63
|
+
# 5. Explicit config file
|
|
64
|
+
if config_file is not None:
|
|
65
|
+
merged.update(_load_toml_section(Path(config_file)))
|
|
66
|
+
|
|
67
|
+
# 6. Environment variables
|
|
68
|
+
merged.update(_apply_env_overrides())
|
|
69
|
+
|
|
70
|
+
# 7. CLI flags (only non-None values)
|
|
71
|
+
if cli_overrides:
|
|
72
|
+
for k, v in cli_overrides.items():
|
|
73
|
+
if v is not None:
|
|
74
|
+
merged[k] = v
|
|
75
|
+
|
|
76
|
+
return _build_config(merged)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _load_toml_section(path: Path, section: str = "sentro") -> dict[str, Any]:
|
|
80
|
+
try:
|
|
81
|
+
with open(path, "rb") as f:
|
|
82
|
+
data = tomllib.load(f)
|
|
83
|
+
# Support [tool.sentro] and [sentro]
|
|
84
|
+
tool_section = data.get("tool", {}).get(section, {})
|
|
85
|
+
top_section = data.get(section, {})
|
|
86
|
+
result = {}
|
|
87
|
+
result.update(top_section)
|
|
88
|
+
result.update(tool_section)
|
|
89
|
+
return result
|
|
90
|
+
except Exception:
|
|
91
|
+
return {}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _apply_env_overrides() -> dict[str, Any]:
|
|
95
|
+
overrides: dict[str, Any] = {}
|
|
96
|
+
if os.environ.get("SENTRO_STRICT", "").lower() in ("1", "true", "yes"):
|
|
97
|
+
overrides["strict"] = True
|
|
98
|
+
if val := os.environ.get("SENTRO_DANGER_THRESHOLD"):
|
|
99
|
+
try:
|
|
100
|
+
overrides.setdefault("thresholds", {})["danger"] = int(val)
|
|
101
|
+
except ValueError:
|
|
102
|
+
pass
|
|
103
|
+
if val := os.environ.get("SENTRO_WARNING_THRESHOLD"):
|
|
104
|
+
try:
|
|
105
|
+
overrides.setdefault("thresholds", {})["warning"] = int(val)
|
|
106
|
+
except ValueError:
|
|
107
|
+
pass
|
|
108
|
+
if val := os.environ.get("SENTRO_WHITELIST"):
|
|
109
|
+
overrides["whitelist_packages"] = [p.strip() for p in val.split(",") if p.strip()]
|
|
110
|
+
if val := os.environ.get("SENTRO_OUTPUT_FORMAT"):
|
|
111
|
+
overrides["output_format"] = val
|
|
112
|
+
return overrides
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _build_config(data: dict[str, Any]) -> Config:
|
|
116
|
+
cfg = Config()
|
|
117
|
+
if "strict" in data:
|
|
118
|
+
cfg.strict = bool(data["strict"])
|
|
119
|
+
if "thresholds" in data:
|
|
120
|
+
cfg.thresholds.update(data["thresholds"])
|
|
121
|
+
if "whitelist_packages" in data:
|
|
122
|
+
cfg.whitelist_packages = list(data["whitelist_packages"])
|
|
123
|
+
if "scanners_enabled" in data:
|
|
124
|
+
cfg.scanners_enabled = list(data["scanners_enabled"])
|
|
125
|
+
if "scanners_disabled" in data:
|
|
126
|
+
cfg.scanners_disabled = list(data["scanners_disabled"])
|
|
127
|
+
if "pypi_timeout" in data:
|
|
128
|
+
cfg.pypi_timeout = int(data["pypi_timeout"])
|
|
129
|
+
if "prefer_wheel" in data:
|
|
130
|
+
cfg.prefer_wheel = bool(data["prefer_wheel"])
|
|
131
|
+
if "output_format" in data:
|
|
132
|
+
cfg.output_format = str(data["output_format"])
|
|
133
|
+
return cfg
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# Top PyPI packages used for typosquatting detection
|
|
2
|
+
# Source: https://hugovk.github.io/top-pypi-packages/
|
|
3
|
+
numpy
|
|
4
|
+
pandas
|
|
5
|
+
requests
|
|
6
|
+
scipy
|
|
7
|
+
matplotlib
|
|
8
|
+
scikit-learn
|
|
9
|
+
tensorflow
|
|
10
|
+
torch
|
|
11
|
+
flask
|
|
12
|
+
django
|
|
13
|
+
fastapi
|
|
14
|
+
sqlalchemy
|
|
15
|
+
boto3
|
|
16
|
+
pytest
|
|
17
|
+
click
|
|
18
|
+
rich
|
|
19
|
+
pydantic
|
|
20
|
+
httpx
|
|
21
|
+
aiohttp
|
|
22
|
+
pillow
|
|
23
|
+
cryptography
|
|
24
|
+
paramiko
|
|
25
|
+
celery
|
|
26
|
+
redis
|
|
27
|
+
pymongo
|
|
28
|
+
setuptools
|
|
29
|
+
wheel
|
|
30
|
+
twine
|
|
31
|
+
black
|
|
32
|
+
mypy
|
|
33
|
+
flake8
|
|
34
|
+
urllib3
|
|
35
|
+
charset-normalizer
|
|
36
|
+
certifi
|
|
37
|
+
idna
|
|
38
|
+
six
|
|
39
|
+
attrs
|
|
40
|
+
packaging
|
|
41
|
+
typing-extensions
|
|
42
|
+
colorama
|
|
43
|
+
tqdm
|
|
44
|
+
pyyaml
|
|
45
|
+
python-dotenv
|
|
46
|
+
jinja2
|
|
47
|
+
werkzeug
|
|
48
|
+
itsdangerous
|
|
49
|
+
markupsafe
|
|
50
|
+
psutil
|
|
51
|
+
lxml
|
|
52
|
+
beautifulsoup4
|
|
53
|
+
pyarrow
|
|
54
|
+
grpcio
|
|
55
|
+
protobuf
|
|
56
|
+
google-cloud-storage
|
|
57
|
+
azure-storage-blob
|
|
58
|
+
cffi
|
|
59
|
+
pyopenssl
|
|
60
|
+
bcrypt
|
|
61
|
+
passlib
|
|
62
|
+
pyjwt
|
|
63
|
+
oauthlib
|
|
64
|
+
stripe
|
|
65
|
+
twilio
|
|
66
|
+
sendgrid
|
|
67
|
+
boto
|
|
68
|
+
botocore
|
|
69
|
+
s3transfer
|
|
70
|
+
awscli
|
|
71
|
+
azure-core
|
|
72
|
+
google-auth
|
|
73
|
+
google-api-python-client
|
|
74
|
+
kubernetes
|
|
75
|
+
docker
|
|
76
|
+
ansible
|
|
77
|
+
fabric
|
|
78
|
+
invoke
|
|
79
|
+
poetry
|
|
80
|
+
pipenv
|
|
81
|
+
virtualenv
|
|
82
|
+
tox
|
|
83
|
+
coverage
|
|
84
|
+
pytest-cov
|
|
85
|
+
mock
|
|
86
|
+
responses
|
|
87
|
+
factory-boy
|
|
88
|
+
hypothesis
|
|
89
|
+
locust
|
|
90
|
+
gunicorn
|
|
91
|
+
uvicorn
|
|
92
|
+
starlette
|
|
93
|
+
aiofiles
|
|
94
|
+
asyncpg
|
|
95
|
+
databases
|
|
96
|
+
alembic
|
|
97
|
+
psycopg2
|
|
98
|
+
pymysql
|
|
99
|
+
motor
|
|
100
|
+
elasticsearch
|
|
101
|
+
opensearch-py
|
|
102
|
+
kafka-python
|
|
103
|
+
pika
|
|
104
|
+
kombu
|
|
105
|
+
dramatiq
|
|
106
|
+
rq
|
|
107
|
+
apscheduler
|
|
108
|
+
arrow
|
|
109
|
+
pendulum
|
|
110
|
+
python-dateutil
|
|
111
|
+
pytz
|
|
112
|
+
tzdata
|
|
113
|
+
humanize
|
|
114
|
+
babel
|
|
115
|
+
babel
|
|
116
|
+
simplejson
|
|
117
|
+
ujson
|
|
118
|
+
orjson
|
|
119
|
+
msgpack
|
|
120
|
+
protobuf
|
|
121
|
+
avro-python3
|
|
122
|
+
pyzmq
|
|
123
|
+
websockets
|
|
124
|
+
socketio
|
|
125
|
+
python-socketio
|
|
126
|
+
channels
|
|
127
|
+
daphne
|
|
128
|
+
twisted
|
|
129
|
+
scrapy
|
|
130
|
+
selenium
|
|
131
|
+
playwright
|
|
132
|
+
httpcore
|
|
133
|
+
h11
|
|
134
|
+
h2
|
|
135
|
+
trio
|
|
136
|
+
anyio
|
|
137
|
+
tenacity
|
|
138
|
+
retry
|
|
139
|
+
backoff
|
|
140
|
+
loguru
|
|
141
|
+
structlog
|
|
142
|
+
sentry-sdk
|
|
143
|
+
prometheus-client
|
|
144
|
+
opentelemetry-sdk
|
|
145
|
+
datadog
|
|
146
|
+
newrelic
|
|
147
|
+
nltk
|
|
148
|
+
spacy
|
|
149
|
+
gensim
|
|
150
|
+
transformers
|
|
151
|
+
huggingface-hub
|
|
152
|
+
datasets
|
|
153
|
+
tokenizers
|
|
154
|
+
accelerate
|
|
155
|
+
diffusers
|
|
156
|
+
langchain
|
|
157
|
+
openai
|
|
158
|
+
anthropic
|
|
159
|
+
cohere
|
|
160
|
+
tiktoken
|
|
161
|
+
faiss-cpu
|
|
162
|
+
pinecone-client
|
|
163
|
+
chromadb
|
|
164
|
+
weaviate-client
|
|
165
|
+
qdrant-client
|
|
166
|
+
Pillow
|
|
167
|
+
opencv-python
|
|
168
|
+
imageio
|
|
169
|
+
scikit-image
|
|
170
|
+
albumentations
|
|
171
|
+
torchvision
|
|
172
|
+
tensorflow-datasets
|
|
173
|
+
keras
|
|
174
|
+
xgboost
|
|
175
|
+
lightgbm
|
|
176
|
+
catboost
|
|
177
|
+
statsmodels
|
|
178
|
+
sympy
|
|
179
|
+
networkx
|
|
180
|
+
igraph
|
|
181
|
+
graph-tool
|
|
182
|
+
dask
|
|
183
|
+
ray
|
|
184
|
+
joblib
|
|
185
|
+
multiprocess
|
|
186
|
+
concurrent-futures
|
|
187
|
+
cloudpickle
|
|
188
|
+
pyzmq
|
|
189
|
+
zarr
|
|
190
|
+
h5py
|
|
191
|
+
netcdf4
|
|
192
|
+
xarray
|
|
193
|
+
geopandas
|
|
194
|
+
shapely
|
|
195
|
+
fiona
|
|
196
|
+
pyproj
|
|
197
|
+
folium
|
|
198
|
+
plotly
|
|
199
|
+
bokeh
|
|
200
|
+
altair
|
|
201
|
+
seaborn
|
|
202
|
+
dash
|
|
203
|
+
streamlit
|
|
204
|
+
gradio
|
|
205
|
+
panel
|
|
206
|
+
hvplot
|
|
207
|
+
pydeck
|
|
208
|
+
kepler.gl
|
|
209
|
+
pyvis
|
|
210
|
+
wordcloud
|
|
211
|
+
reportlab
|
|
212
|
+
fpdf2
|
|
213
|
+
weasyprint
|
|
214
|
+
openpyxl
|
|
215
|
+
xlrd
|
|
216
|
+
xlwt
|
|
217
|
+
xlsxwriter
|
|
218
|
+
python-docx
|
|
219
|
+
python-pptx
|
|
220
|
+
pdfminer.six
|
|
221
|
+
pypdf
|
|
222
|
+
tabula-py
|
|
223
|
+
camelot-py
|
|
224
|
+
pytesseract
|
|
225
|
+
pdf2image
|
|
226
|
+
docx2txt
|
|
227
|
+
chardet
|
|
228
|
+
ftfy
|
|
229
|
+
unidecode
|
|
230
|
+
regex
|
|
231
|
+
parse
|
|
232
|
+
pyparsing
|
|
233
|
+
lark
|
|
234
|
+
antlr4-python3-runtime
|
|
235
|
+
pygments
|
|
236
|
+
mistune
|
|
237
|
+
markdown
|
|
238
|
+
sphinx
|
|
239
|
+
mkdocs
|
|
240
|
+
pdoc
|
|
241
|
+
numpydoc
|
|
242
|
+
rsa
|
|
243
|
+
ecdsa
|
|
244
|
+
pynacl
|
|
245
|
+
paramiko
|
|
246
|
+
fabric
|
|
247
|
+
invoke
|
|
248
|
+
plumbum
|
|
249
|
+
sh
|
|
250
|
+
cmd2
|
|
251
|
+
prompt-toolkit
|
|
252
|
+
blessed
|
|
253
|
+
curtsies
|
|
254
|
+
urwid
|
|
255
|
+
textual
|
|
256
|
+
blessed
|
|
257
|
+
rich
|
|
258
|
+
typer
|
|
259
|
+
argparse
|
|
File without changes
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Dispatcher: detect archive type and delegate to the correct extractor."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ..models import PackageFiles
|
|
8
|
+
from .sdist_extractor import SDistExtractor
|
|
9
|
+
from .wheel_extractor import WheelExtractor
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def extract_package(archive_path: Path, dest_dir: Path) -> PackageFiles:
|
|
13
|
+
"""
|
|
14
|
+
Detect whether archive_path is a wheel or sdist and extract accordingly.
|
|
15
|
+
Returns a populated PackageFiles.
|
|
16
|
+
"""
|
|
17
|
+
name = archive_path.name
|
|
18
|
+
if name.endswith(".whl"):
|
|
19
|
+
return WheelExtractor().extract(archive_path, dest_dir)
|
|
20
|
+
elif name.endswith((".tar.gz", ".tgz", ".zip")):
|
|
21
|
+
return SDistExtractor().extract(archive_path, dest_dir)
|
|
22
|
+
else:
|
|
23
|
+
# Best-effort: try tarball first, then zip
|
|
24
|
+
try:
|
|
25
|
+
return SDistExtractor().extract(archive_path, dest_dir)
|
|
26
|
+
except Exception:
|
|
27
|
+
return WheelExtractor().extract(archive_path, dest_dir)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Extract .tar.gz / .zip sdist files into a temp directory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
import tarfile
|
|
7
|
+
import zipfile
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from ..models import PackageFiles
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PathTraversalError(Exception):
|
|
14
|
+
"""Raised when an archive entry attempts path traversal."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SDistExtractor:
|
|
18
|
+
def extract(self, sdist_path: Path, dest_dir: Path) -> PackageFiles:
|
|
19
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
name_str = sdist_path.name
|
|
21
|
+
|
|
22
|
+
if name_str.endswith(".tar.gz") or name_str.endswith(".tgz"):
|
|
23
|
+
name, version = _parse_sdist_name(name_str)
|
|
24
|
+
_extract_tarball(sdist_path, dest_dir)
|
|
25
|
+
elif name_str.endswith(".zip"):
|
|
26
|
+
name, version = _parse_sdist_name(name_str)
|
|
27
|
+
_extract_zip(sdist_path, dest_dir)
|
|
28
|
+
else:
|
|
29
|
+
name, version = _parse_sdist_name(name_str)
|
|
30
|
+
_extract_tarball(sdist_path, dest_dir)
|
|
31
|
+
|
|
32
|
+
return _build_package_files(name, version, dest_dir)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _parse_sdist_name(filename: str) -> tuple[str, str]:
|
|
36
|
+
for suffix in (".tar.gz", ".tgz", ".zip"):
|
|
37
|
+
if filename.endswith(suffix):
|
|
38
|
+
stem = filename[: -len(suffix)]
|
|
39
|
+
break
|
|
40
|
+
else:
|
|
41
|
+
stem = filename
|
|
42
|
+
parts = stem.rsplit("-", 1)
|
|
43
|
+
if len(parts) == 2:
|
|
44
|
+
return parts[0], parts[1]
|
|
45
|
+
return stem, "unknown"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _guard_tar_member(member: tarfile.TarInfo) -> bool:
|
|
49
|
+
"""Return True if safe, False to skip."""
|
|
50
|
+
name = member.name
|
|
51
|
+
if name.startswith("/") or name.startswith("\\"):
|
|
52
|
+
return False
|
|
53
|
+
normalized = Path(name)
|
|
54
|
+
if ".." in normalized.parts:
|
|
55
|
+
return False
|
|
56
|
+
return True
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _extract_tarball(path: Path, dest: Path) -> None:
|
|
60
|
+
if sys.version_info >= (3, 12):
|
|
61
|
+
with tarfile.open(path, "r:gz") as tf:
|
|
62
|
+
tf.extractall(dest, filter="data")
|
|
63
|
+
else:
|
|
64
|
+
with tarfile.open(path, "r:gz") as tf:
|
|
65
|
+
safe_members = [m for m in tf.getmembers() if _guard_tar_member(m)]
|
|
66
|
+
tf.extractall(dest, members=safe_members)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _extract_zip(path: Path, dest: Path) -> None:
|
|
70
|
+
with zipfile.ZipFile(path, "r") as zf:
|
|
71
|
+
for member in zf.infolist():
|
|
72
|
+
parts = Path(member.filename).parts
|
|
73
|
+
if any(p in ("..", "") for p in parts) or Path(member.filename).is_absolute():
|
|
74
|
+
raise PathTraversalError(f"Unsafe path in zip: {member.filename!r}")
|
|
75
|
+
zf.extractall(dest)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _build_package_files(name: str, version: str, root: Path) -> PackageFiles:
|
|
79
|
+
python_files = [p for p in root.rglob("*.py") if p.name != "setup.py"]
|
|
80
|
+
setup_py_candidates = list(root.rglob("setup.py"))
|
|
81
|
+
setup_py = setup_py_candidates[0] if setup_py_candidates else None
|
|
82
|
+
pyproject_candidates = list(root.rglob("pyproject.toml"))
|
|
83
|
+
pyproject_toml = pyproject_candidates[0] if pyproject_candidates else None
|
|
84
|
+
return PackageFiles(
|
|
85
|
+
name=name,
|
|
86
|
+
version=version,
|
|
87
|
+
source_dir=root,
|
|
88
|
+
python_files=python_files,
|
|
89
|
+
setup_py=setup_py,
|
|
90
|
+
pyproject_toml=pyproject_toml,
|
|
91
|
+
)
|