purl2src 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
purl2src/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """Semantic Copycat Purl2Src - Translate PURLs to download URLs."""
2
+
3
+ import warnings
4
+
5
+ # Suppress urllib3 OpenSSL warning on macOS
6
+ warnings.filterwarnings("ignore", message="urllib3 v2 only supports OpenSSL 1.1.1+")
7
+
8
+ from .parser import parse_purl
9
+ from .handlers import get_download_url
10
+
11
+ try:
12
+ from importlib.metadata import version
13
+
14
+ __version__ = version("purl2src")
15
+ except Exception:
16
+ # Fallback for development installations
17
+ __version__ = "0.0.0+unknown"
18
+
19
+ __all__ = ["parse_purl", "get_download_url"]
purl2src/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for CLI execution."""
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
purl2src/cli.py ADDED
@@ -0,0 +1,151 @@
1
+ """Command-line interface for purl2src."""
2
+
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import List, Optional
7
+
8
+ import click
9
+
10
+ from . import __version__
11
+ from .handlers import get_download_url
12
+
13
+
14
+ @click.command()
15
+ @click.version_option(version=__version__, prog_name="purl2src")
16
+ @click.argument("purl", required=False)
17
+ @click.option(
18
+ "-f",
19
+ "--file",
20
+ type=click.Path(exists=True, path_type=Path),
21
+ help="Read PURLs from file (one per line)",
22
+ )
23
+ @click.option(
24
+ "-o", "--output", type=click.Path(path_type=Path), help="Write results to file (JSON format)"
25
+ )
26
+ @click.option(
27
+ "--validate/--no-validate", default=True, help="Validate that download URLs are accessible"
28
+ )
29
+ @click.option(
30
+ "--format", type=click.Choice(["json", "csv", "plain"]), default="plain", help="Output format"
31
+ )
32
+ @click.option("-v", "--verbose", is_flag=True, help="Verbose output")
33
+ def main(
34
+ purl: Optional[str],
35
+ file: Optional[Path],
36
+ output: Optional[Path],
37
+ validate: bool,
38
+ format: str,
39
+ verbose: bool,
40
+ ) -> None:
41
+ """
42
+ Translate Package URLs (PURLs) to download URLs.
43
+
44
+ Examples:
45
+
46
+ purl2src "pkg:npm/express@4.17.1"
47
+
48
+ purl2src "pkg:pypi/requests@2.28.0" --validate
49
+
50
+ purl2src -f purls.txt --output results.json
51
+ """
52
+ # Collect PURLs to process
53
+ purls: List[str] = []
54
+
55
+ if purl:
56
+ purls.append(purl)
57
+
58
+ if file:
59
+ with open(file, "r") as f:
60
+ for line in f:
61
+ line = line.strip()
62
+ if line and not line.startswith("#"):
63
+ purls.append(line)
64
+
65
+ if not purls:
66
+ click.echo("Error: No PURLs provided. Use --help for usage.", err=True)
67
+ sys.exit(1)
68
+
69
+ # Process PURLs
70
+ results = []
71
+ errors = 0
72
+
73
+ # Only show progress bar for multiple PURLs in verbose mode
74
+ if len(purls) > 1 and verbose:
75
+ with click.progressbar(purls, label="Processing PURLs", show_pos=True) as purl_iter:
76
+ for purl_str in purl_iter:
77
+ try:
78
+ result = get_download_url(purl_str, validate=validate)
79
+ results.append(result.to_dict())
80
+
81
+ if result.status == "failed":
82
+ errors += 1
83
+
84
+ except Exception as e:
85
+ result_dict = {
86
+ "purl": purl_str,
87
+ "download_url": None,
88
+ "status": "failed",
89
+ "error": str(e),
90
+ }
91
+ results.append(result_dict)
92
+ errors += 1
93
+ else:
94
+ for purl_str in purls:
95
+ try:
96
+ result = get_download_url(purl_str, validate=validate)
97
+ results.append(result.to_dict())
98
+
99
+ if result.status == "failed":
100
+ errors += 1
101
+
102
+ except Exception as e:
103
+ result_dict = {
104
+ "purl": purl_str,
105
+ "download_url": None,
106
+ "status": "failed",
107
+ "error": str(e),
108
+ }
109
+ results.append(result_dict)
110
+ errors += 1
111
+
112
+ # Format and output results
113
+ if format == "json":
114
+ output_data = json.dumps(results, indent=2)
115
+ elif format == "csv":
116
+ # Simple CSV output
117
+ lines = ["purl,download_url,status,method"]
118
+ for r in results:
119
+ lines.append(
120
+ f"{r['purl']},{r.get('download_url', '')},"
121
+ f"{r.get('status', 'failed')},{r.get('method', '')}"
122
+ )
123
+ output_data = "\n".join(lines)
124
+ else: # plain
125
+ lines = []
126
+ for r in results:
127
+ if r.get("download_url"):
128
+ lines.append(f"{r['purl']} -> {r['download_url']}")
129
+ else:
130
+ error_msg = r.get("error", "Failed to resolve")
131
+ lines.append(f"{r['purl']} -> ERROR: {error_msg}")
132
+ output_data = "\n".join(lines)
133
+
134
+ # Write output
135
+ if output:
136
+ with open(output, "w") as f:
137
+ f.write(output_data)
138
+ if verbose:
139
+ click.echo(f"Results written to {output}")
140
+ else:
141
+ click.echo(output_data)
142
+
143
+ # Exit with error code if any failures
144
+ if errors > 0:
145
+ if verbose:
146
+ click.echo(f"\nCompleted with {errors} error(s)", err=True)
147
+ sys.exit(1)
148
+
149
+
150
+ if __name__ == "__main__":
151
+ main()
@@ -0,0 +1,85 @@
1
+ """Package ecosystem handlers."""
2
+
3
+ from typing import Dict, Optional, Type
4
+ from .base import BaseHandler, HandlerResult
5
+ from .npm import NpmHandler
6
+ from .pypi import PyPiHandler
7
+ from .cargo import CargoHandler
8
+ from .nuget import NuGetHandler
9
+ from .github import GitHubHandler
10
+ from .generic import GenericHandler
11
+ from .conda import CondaHandler
12
+ from .golang import GoLangHandler
13
+ from .rubygems import RubyGemsHandler
14
+ from .maven import MavenHandler
15
+
16
+ # Registry of all available handlers
17
+ HANDLERS: Dict[str, Type[BaseHandler]] = {
18
+ "npm": NpmHandler,
19
+ "pypi": PyPiHandler,
20
+ "cargo": CargoHandler,
21
+ "nuget": NuGetHandler,
22
+ "github": GitHubHandler,
23
+ "generic": GenericHandler,
24
+ "conda": CondaHandler,
25
+ "golang": GoLangHandler,
26
+ "gem": RubyGemsHandler,
27
+ "rubygems": RubyGemsHandler,
28
+ "maven": MavenHandler,
29
+ }
30
+
31
+
32
+ def get_download_url(purl: str, validate: bool = True) -> HandlerResult:
33
+ """
34
+ Get download URL for a Package URL.
35
+
36
+ Args:
37
+ purl: Package URL string
38
+ validate: Whether to validate the URL is accessible
39
+
40
+ Returns:
41
+ HandlerResult with download URL and metadata
42
+ """
43
+ from ..parser import parse_purl
44
+ from ..utils import HttpClient, URLCache
45
+
46
+ # Check cache first
47
+ cache = URLCache()
48
+ cached = cache.get(purl)
49
+ if cached:
50
+ return HandlerResult(**cached)
51
+
52
+ # Parse PURL
53
+ parsed = parse_purl(purl)
54
+
55
+ # Get appropriate handler
56
+ handler_class = HANDLERS.get(parsed.ecosystem)
57
+ if not handler_class:
58
+ return HandlerResult(
59
+ purl=purl,
60
+ download_url=None,
61
+ validated=False,
62
+ method="unsupported",
63
+ error=f"Unsupported ecosystem: {parsed.ecosystem}",
64
+ status="failed",
65
+ fallback_available=False,
66
+ )
67
+
68
+ # Create handler and get download URL
69
+ with HttpClient() as http_client:
70
+ handler = handler_class(http_client)
71
+ result = handler.get_download_url(parsed, validate=validate)
72
+
73
+ # Cache successful results
74
+ if result.download_url and result.validated:
75
+ cache.set(purl, result.to_dict())
76
+
77
+ return result
78
+
79
+
80
+ __all__ = [
81
+ "BaseHandler",
82
+ "HandlerResult",
83
+ "get_download_url",
84
+ "HANDLERS",
85
+ ]
@@ -0,0 +1,226 @@
1
+ """Base handler class for all package ecosystems."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass, asdict
5
+ from typing import Optional, Dict, Any, List
6
+ import shutil
7
+ import subprocess
8
+ import shlex
9
+
10
+ from ..parser import Purl
11
+ from ..utils.http import HttpClient
12
+
13
+
14
+ @dataclass
15
+ class HandlerResult:
16
+ """Result from handler processing."""
17
+
18
+ purl: str
19
+ download_url: Optional[str]
20
+ validated: bool
21
+ method: str # "direct", "api", or "fallback"
22
+ fallback_command: Optional[str] = None
23
+ error: Optional[str] = None
24
+ status: str = "success" # "success" or "failed"
25
+ fallback_available: bool = True
26
+
27
+ def to_dict(self) -> Dict[str, Any]:
28
+ """Convert to dictionary."""
29
+ return {k: v for k, v in asdict(self).items() if v is not None}
30
+
31
+
32
+ class HandlerError(Exception):
33
+ """Exception raised by handlers."""
34
+
35
+ pass
36
+
37
+
38
+ class BaseHandler(ABC):
39
+ """Base class for all ecosystem handlers."""
40
+
41
+ def __init__(self, http_client: HttpClient):
42
+ self.http_client = http_client
43
+
44
+ def get_download_url(self, purl: Purl, validate: bool = True) -> HandlerResult:
45
+ """
46
+ Get download URL for a package.
47
+
48
+ This method implements the three-level resolution strategy:
49
+ 1. Try direct URL construction
50
+ 2. Try API query
51
+ 3. Try package manager fallback
52
+
53
+ Args:
54
+ purl: Parsed PURL object
55
+ validate: Whether to validate the URL is accessible
56
+
57
+ Returns:
58
+ HandlerResult with download URL and metadata
59
+ """
60
+ # Check fallback availability once
61
+ fallback_cmd = self.get_fallback_cmd(purl)
62
+ fallback_available = bool(fallback_cmd and self.is_package_manager_available())
63
+
64
+ # Level 1: Try direct URL construction
65
+ try:
66
+ url = self.build_download_url(purl)
67
+ if url and (not validate or self.http_client.validate_url(url)):
68
+ return HandlerResult(
69
+ purl=str(purl),
70
+ download_url=url,
71
+ validated=validate,
72
+ method="direct",
73
+ fallback_command=fallback_cmd,
74
+ fallback_available=fallback_available,
75
+ )
76
+ except Exception:
77
+ pass
78
+
79
+ # Level 2: Try API query
80
+ try:
81
+ url = self.get_download_url_from_api(purl)
82
+ if url and (not validate or self.http_client.validate_url(url)):
83
+ return HandlerResult(
84
+ purl=str(purl),
85
+ download_url=url,
86
+ validated=validate,
87
+ method="api",
88
+ fallback_command=fallback_cmd,
89
+ fallback_available=fallback_available,
90
+ )
91
+ except Exception:
92
+ pass
93
+
94
+ # Level 3: Try package manager fallback
95
+ if fallback_available:
96
+ try:
97
+ url = self.execute_fallback_command(purl)
98
+ if url and (not validate or self.http_client.validate_url(url)):
99
+ return HandlerResult(
100
+ purl=str(purl),
101
+ download_url=url,
102
+ validated=validate,
103
+ method="fallback",
104
+ fallback_command=fallback_cmd,
105
+ fallback_available=fallback_available,
106
+ )
107
+ except Exception:
108
+ pass
109
+
110
+ # All methods failed
111
+ return HandlerResult(
112
+ purl=str(purl),
113
+ download_url=None,
114
+ validated=False,
115
+ method="none",
116
+ fallback_command=fallback_cmd,
117
+ error="Failed to resolve download URL",
118
+ status="failed",
119
+ fallback_available=fallback_available,
120
+ )
121
+
122
+ @abstractmethod
123
+ def build_download_url(self, purl: Purl) -> Optional[str]:
124
+ """
125
+ Build direct download URL from PURL components.
126
+
127
+ Args:
128
+ purl: Parsed PURL object
129
+
130
+ Returns:
131
+ Download URL or None if not applicable
132
+ """
133
+ pass
134
+
135
+ @abstractmethod
136
+ def get_download_url_from_api(self, purl: Purl) -> Optional[str]:
137
+ """
138
+ Query package registry API for download URL.
139
+
140
+ Args:
141
+ purl: Parsed PURL object
142
+
143
+ Returns:
144
+ Download URL or None if not found
145
+ """
146
+ pass
147
+
148
+ @abstractmethod
149
+ def get_fallback_cmd(self, purl: Purl) -> Optional[str]:
150
+ """
151
+ Get package manager command for fallback.
152
+
153
+ Args:
154
+ purl: Parsed PURL object
155
+
156
+ Returns:
157
+ Command string or None if not available
158
+ """
159
+ pass
160
+
161
+ @abstractmethod
162
+ def get_package_manager_cmd(self) -> List[str]:
163
+ """
164
+ Get package manager command name(s) to check.
165
+
166
+ Returns:
167
+ List of command names (e.g., ["npm", "yarn"])
168
+ """
169
+ pass
170
+
171
+ def is_package_manager_available(self) -> bool:
172
+ """Check if package manager is installed."""
173
+ for cmd in self.get_package_manager_cmd():
174
+ if shutil.which(cmd):
175
+ return True
176
+ return False
177
+
178
+ def execute_fallback_command(self, purl: Purl) -> Optional[str]:
179
+ """
180
+ Execute package manager command and parse output.
181
+
182
+ Args:
183
+ purl: Parsed PURL object
184
+
185
+ Returns:
186
+ Download URL extracted from command output
187
+
188
+ Raises:
189
+ HandlerError: If command execution fails
190
+ """
191
+ cmd = self.get_fallback_cmd(purl)
192
+ if not cmd:
193
+ return None
194
+
195
+ try:
196
+ # Execute command safely
197
+ result = subprocess.run(
198
+ shlex.split(cmd),
199
+ capture_output=True,
200
+ text=True,
201
+ timeout=30,
202
+ check=True,
203
+ )
204
+
205
+ # Parse output - this should be overridden by subclasses
206
+ return self.parse_fallback_output(result.stdout)
207
+
208
+ except subprocess.TimeoutExpired:
209
+ raise HandlerError(f"Command timed out: {cmd}")
210
+ except subprocess.CalledProcessError as e:
211
+ raise HandlerError(f"Command failed: {cmd}\n{e.stderr}")
212
+
213
+ def parse_fallback_output(self, output: str) -> Optional[str]:
214
+ """
215
+ Parse package manager command output.
216
+
217
+ This should be overridden by subclasses to extract the download URL
218
+ from the specific package manager's output format.
219
+
220
+ Args:
221
+ output: Command stdout
222
+
223
+ Returns:
224
+ Download URL or None if not found
225
+ """
226
+ return None
@@ -0,0 +1,40 @@
1
+ """Cargo (Rust) handler."""
2
+
3
+ from typing import Optional, List
4
+ from urllib.parse import quote
5
+
6
+ from ..parser import Purl
7
+ from .base import BaseHandler
8
+
9
+
10
+ class CargoHandler(BaseHandler):
11
+ """Handler for Cargo/Rust packages."""
12
+
13
+ def build_download_url(self, purl: Purl) -> Optional[str]:
14
+ """
15
+ Build Cargo download URL.
16
+
17
+ Format: https://crates.io/api/v1/crates/{name}/{version}/download
18
+ """
19
+ if not purl.version:
20
+ return None
21
+
22
+ return f"https://crates.io/api/v1/crates/{purl.name}/{purl.version}/download"
23
+
24
+ def get_download_url_from_api(self, purl: Purl) -> Optional[str]:
25
+ """Cargo doesn't need API query - direct URL works."""
26
+ return None
27
+
28
+ def get_fallback_cmd(self, purl: Purl) -> Optional[str]:
29
+ """Get cargo command."""
30
+ return f"cargo search {quote(purl.name)} --limit 1"
31
+
32
+ def get_package_manager_cmd(self) -> List[str]:
33
+ """Cargo command name."""
34
+ return ["cargo"]
35
+
36
+ def parse_fallback_output(self, output: str) -> Optional[str]:
37
+ """Parse cargo search output."""
38
+ # Cargo search doesn't directly provide download URLs
39
+ # This would need more complex handling
40
+ return None
@@ -0,0 +1,75 @@
1
+ """Conda handler."""
2
+
3
+ from typing import Optional, List
4
+
5
+ from ..parser import Purl
6
+ from .base import BaseHandler, HandlerError
7
+
8
+
9
+ class CondaHandler(BaseHandler):
10
+ """Handler for Conda packages."""
11
+
12
+ def build_download_url(self, purl: Purl) -> Optional[str]:
13
+ """
14
+ Build Conda download URL.
15
+
16
+ Requires qualifiers: build, channel, subdir
17
+ Format depends on channel:
18
+ - main/defaults: repo.anaconda.com/pkgs/main/{subdir}/{pkg}-{ver}-{build}.tar.bz2
19
+ - others: anaconda.org/{ch}/{pkg}/{ver}/download/{subdir}/{pkg}-{ver}-{build}.tar.bz2
20
+ """
21
+ if not purl.version:
22
+ return None
23
+
24
+ # Check required qualifiers
25
+ required = ["build", "channel", "subdir"]
26
+ for qual in required:
27
+ if qual not in purl.qualifiers:
28
+ raise HandlerError(f"Missing required qualifier: {qual}")
29
+
30
+ build = purl.qualifiers["build"]
31
+ channel = purl.qualifiers["channel"]
32
+ subdir = purl.qualifiers["subdir"]
33
+
34
+ # Handle different channel types
35
+ if channel in ["main", "defaults"]:
36
+ # Main/defaults channel uses repo.anaconda.com
37
+ return (
38
+ f"https://repo.anaconda.com/pkgs/main/{subdir}/"
39
+ f"{purl.name}-{purl.version}-{build}.tar.bz2"
40
+ )
41
+ else:
42
+ # Community channels (conda-forge, bioconda, etc.) use anaconda.org
43
+ return (
44
+ f"https://anaconda.org/{channel}/{purl.name}/{purl.version}/"
45
+ f"download/{subdir}/{purl.name}-{purl.version}-{build}.tar.bz2"
46
+ )
47
+
48
+ def get_download_url_from_api(self, purl: Purl) -> Optional[str]:
49
+ """Query Anaconda API."""
50
+ # Could implement Anaconda API query here
51
+ return None
52
+
53
+ def get_fallback_cmd(self, purl: Purl) -> Optional[str]:
54
+ """Get conda command."""
55
+ if not purl.version:
56
+ return None
57
+
58
+ channel = purl.qualifiers.get("channel", "conda-forge")
59
+ return f"conda search -c {channel} {purl.name}={purl.version} --info"
60
+
61
+ def get_package_manager_cmd(self) -> List[str]:
62
+ """Conda command names."""
63
+ return ["conda", "mamba", "micromamba"]
64
+
65
+ def parse_fallback_output(self, output: str) -> Optional[str]:
66
+ """Parse conda search output."""
67
+ # Look for "url" line with various formatting
68
+ import re
69
+
70
+ for line in output.split("\n"):
71
+ # Look for pattern: url (spaces) : (spaces) http...
72
+ match = re.search(r"url\s*:\s*(https?://\S+)", line, re.IGNORECASE)
73
+ if match:
74
+ return match.group(1)
75
+ return None
@@ -0,0 +1,94 @@
1
+ """Generic handler using qualifiers."""
2
+
3
+ import re
4
+ from typing import Optional, List
5
+
6
+ from ..parser import Purl
7
+ from .base import BaseHandler, HandlerResult
8
+
9
+
10
+ class GenericHandler(BaseHandler):
11
+ """Handler for generic packages using qualifiers."""
12
+
13
+ def build_download_url(self, purl: Purl) -> Optional[str]:
14
+ """Build URL from qualifiers."""
15
+ # Check for direct download_url qualifier
16
+ if "download_url" in purl.qualifiers:
17
+ return purl.qualifiers["download_url"]
18
+
19
+ # Check for vcs_url qualifier
20
+ if "vcs_url" in purl.qualifiers:
21
+ vcs_url = purl.qualifiers["vcs_url"]
22
+
23
+ # Handle git+https:// prefix
24
+ if vcs_url.startswith("git+"):
25
+ vcs_url = vcs_url[4:]
26
+
27
+ # Extract commit hash if present
28
+ match = re.match(r"(.+)@([a-f0-9]+)$", vcs_url)
29
+ if match:
30
+ repo_url, commit = match.groups()
31
+ # Store commit for later checkout
32
+ self._commit = commit
33
+ return repo_url
34
+
35
+ return vcs_url
36
+
37
+ return None
38
+
39
+ def get_download_url_from_api(self, purl: Purl) -> Optional[str]:
40
+ """Generic packages don't have a registry API."""
41
+ return None
42
+
43
+ def get_fallback_cmd(self, purl: Purl) -> Optional[str]:
44
+ """Get git command for VCS URLs."""
45
+ if "vcs_url" in purl.qualifiers:
46
+ vcs_url = purl.qualifiers["vcs_url"]
47
+
48
+ # Handle git+https:// prefix
49
+ if vcs_url.startswith("git+"):
50
+ vcs_url = vcs_url[4:]
51
+
52
+ # Extract commit hash if present
53
+ match = re.match(r"(.+)@([a-f0-9]+)$", vcs_url)
54
+ if match:
55
+ repo_url, commit = match.groups()
56
+ return f"git clone {repo_url} && git checkout {commit}"
57
+
58
+ return f"git clone {vcs_url}"
59
+
60
+ return None
61
+
62
+ def get_package_manager_cmd(self) -> List[str]:
63
+ """Git is used for VCS URLs."""
64
+ return ["git"]
65
+
66
+ def parse_fallback_output(self, output: str) -> Optional[str]:
67
+ """Parse git output."""
68
+ # Git clone doesn't return download URLs
69
+ return None
70
+
71
+ def get_download_url(self, purl: Purl, validate: bool = True) -> "HandlerResult":
72
+ """Override to handle checksum validation."""
73
+ result = super().get_download_url(purl, validate)
74
+
75
+ # If we have a download URL and checksum, validate it
76
+ if result.download_url and result.validated and "checksum" in purl.qualifiers:
77
+ try:
78
+ # Download and verify checksum
79
+ checksum = purl.qualifiers["checksum"]
80
+ # Extract algorithm if specified (e.g., sha256:abc123)
81
+ if ":" in checksum:
82
+ algo, value = checksum.split(":", 1)
83
+ else:
84
+ algo, value = "sha256", checksum
85
+
86
+ self.http_client.download_and_verify(
87
+ result.download_url, expected_checksum=value, algorithm=algo
88
+ )
89
+ except ValueError as e:
90
+ result.error = str(e)
91
+ result.status = "failed"
92
+ result.validated = False
93
+
94
+ return result