pysfi 0.1.7__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pysfi-0.1.7.dist-info → pysfi-0.1.10.dist-info}/METADATA +5 -3
- pysfi-0.1.10.dist-info/RECORD +39 -0
- {pysfi-0.1.7.dist-info → pysfi-0.1.10.dist-info}/entry_points.txt +4 -1
- sfi/__init__.py +1 -1
- sfi/bumpversion/__init__.py +1 -1
- sfi/docscan/__init__.py +1 -1
- sfi/docscan/docscan.py +407 -103
- sfi/docscan/docscan_gui.py +1282 -596
- sfi/docscan/lang/eng.py +152 -0
- sfi/docscan/lang/zhcn.py +170 -0
- sfi/embedinstall/embedinstall.py +77 -17
- sfi/makepython/makepython.py +29 -28
- sfi/pdfsplit/pdfsplit.py +173 -173
- sfi/pylibpack/__init__.py +0 -0
- sfi/pylibpack/pylibpack.py +913 -0
- sfi/pyloadergen/pyloadergen.py +697 -111
- sfi/pypack/__init__.py +0 -0
- sfi/pypack/pypack.py +791 -0
- sfi/pysourcepack/pysourcepack.py +369 -0
- sfi/workflowengine/__init__.py +0 -0
- sfi/workflowengine/workflowengine.py +444 -0
- pysfi-0.1.7.dist-info/RECORD +0 -31
- sfi/pypacker/fspacker.py +0 -91
- {pysfi-0.1.7.dist-info → pysfi-0.1.10.dist-info}/WHEEL +0 -0
- /sfi/{pypacker → docscan/lang}/__init__.py +0 -0
|
@@ -0,0 +1,913 @@
|
|
|
1
|
+
"""Python Library Packager - Download and pack Python dependencies with caching support.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to:
|
|
4
|
+
1. Read project information from projects.json or run projectparse if needed
|
|
5
|
+
2. Download dependencies to local .cache directory
|
|
6
|
+
3. Pack dependencies into a distributable format
|
|
7
|
+
4. Support batch processing multiple projects recursively
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import platform
|
|
16
|
+
import shutil
|
|
17
|
+
import subprocess
|
|
18
|
+
import sys
|
|
19
|
+
import tempfile
|
|
20
|
+
import time
|
|
21
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
__version__ = "1.0.0"
|
|
30
|
+
__build__ = "20260120"
|
|
31
|
+
|
|
32
|
+
DEFAULT_CACHE_DIR = Path.home() / ".pysfi" / ".cache" / "python-libs"
|
|
33
|
+
|
|
34
|
+
PYPI_MIRRORS = {
|
|
35
|
+
"pypi": "https://pypi.org/simple",
|
|
36
|
+
"tsinghua": "https://pypi.tuna.tsinghua.edu.cn/simple",
|
|
37
|
+
"aliyun": "https://mirrors.aliyun.com/pypi/simple/",
|
|
38
|
+
"ustc": "https://pypi.mirrors.ustc.edu.cn/simple/",
|
|
39
|
+
"douban": "https://pypi.douban.com/simple/",
|
|
40
|
+
"tencent": "https://mirrors.cloud.tencent.com/pypi/simple",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class Dependency:
|
|
46
|
+
"""Represents a Python package dependency."""
|
|
47
|
+
|
|
48
|
+
name: str
|
|
49
|
+
version: str | None = None
|
|
50
|
+
extras: set[str] = field(default_factory=set)
|
|
51
|
+
requires: set[str] = field(default_factory=set)
|
|
52
|
+
|
|
53
|
+
def __post_init__(self):
|
|
54
|
+
"""Normalize package name after initialization."""
|
|
55
|
+
object.__setattr__(self, "name", normalize_package_name(self.name))
|
|
56
|
+
|
|
57
|
+
def __str__(self) -> str:
|
|
58
|
+
"""String representation of dependency."""
|
|
59
|
+
if self.extras:
|
|
60
|
+
return f"{self.name}[{','.join(sorted(self.extras))}]{self.version or ''}"
|
|
61
|
+
return f"{self.name}{self.version or ''}"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class DownloadResult:
|
|
66
|
+
"""Result of downloading packages."""
|
|
67
|
+
|
|
68
|
+
results: dict[str, bool] = field(default_factory=dict)
|
|
69
|
+
total: int = 0
|
|
70
|
+
successful: int = 0
|
|
71
|
+
cached: int = 0
|
|
72
|
+
downloaded: int = 0
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class PackResult:
|
|
77
|
+
"""Result of packing project dependencies."""
|
|
78
|
+
|
|
79
|
+
success: bool
|
|
80
|
+
project: str
|
|
81
|
+
total: int
|
|
82
|
+
successful: int
|
|
83
|
+
failed: int
|
|
84
|
+
packages_dir: str
|
|
85
|
+
extracted_packages: list[str] = field(default_factory=list)
|
|
86
|
+
message: str = ""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class BatchPackResult:
|
|
91
|
+
"""Result of packing multiple projects."""
|
|
92
|
+
|
|
93
|
+
success: bool
|
|
94
|
+
total: int
|
|
95
|
+
successful: int
|
|
96
|
+
failed: int
|
|
97
|
+
failed_projects: list[str] = field(default_factory=list)
|
|
98
|
+
output_dir: str = ""
|
|
99
|
+
total_time: float = 0.0
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class CacheMetadata:
|
|
104
|
+
"""Metadata for cached package."""
|
|
105
|
+
|
|
106
|
+
name: str
|
|
107
|
+
version: str | None
|
|
108
|
+
path: str
|
|
109
|
+
timestamp: float
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class ProjectInfo:
|
|
114
|
+
"""Project information for packing dependencies."""
|
|
115
|
+
|
|
116
|
+
name: str
|
|
117
|
+
dir: Path
|
|
118
|
+
info: dict
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
DEV_TOOLS = frozenset({
|
|
122
|
+
"sphinx",
|
|
123
|
+
"sphinx_rtd_theme",
|
|
124
|
+
"watchdog",
|
|
125
|
+
"pytest",
|
|
126
|
+
"coverage",
|
|
127
|
+
"black",
|
|
128
|
+
"mypy",
|
|
129
|
+
"flake8",
|
|
130
|
+
"pylint",
|
|
131
|
+
"isort",
|
|
132
|
+
"pre-commit",
|
|
133
|
+
"tox",
|
|
134
|
+
"nose",
|
|
135
|
+
"unittest",
|
|
136
|
+
"mock",
|
|
137
|
+
})
|
|
138
|
+
DEV_PATTERNS = frozenset({"dev", "test", "docs", "lint", "example"})
|
|
139
|
+
TYPING_PATTERNS = frozenset({"stubs", "typing", "types"})
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def normalize_package_name(name: str) -> str:
|
|
143
|
+
"""Normalize package name to lowercase with underscores.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
name: Package name to normalize
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Normalized package name
|
|
150
|
+
"""
|
|
151
|
+
return name.lower().replace("-", "_")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
|
|
155
|
+
"""Check if a dependency should be skipped based on common patterns.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
req_name: Package name
|
|
159
|
+
has_extras: Whether the requirement has extras
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
True if should skip, False otherwise
|
|
163
|
+
"""
|
|
164
|
+
req_lower = req_name.lower()
|
|
165
|
+
|
|
166
|
+
# Skip extras
|
|
167
|
+
if has_extras:
|
|
168
|
+
return True
|
|
169
|
+
|
|
170
|
+
# Skip dev/test/docs/lint/example patterns
|
|
171
|
+
if any(keyword in req_lower for keyword in DEV_PATTERNS):
|
|
172
|
+
return True
|
|
173
|
+
|
|
174
|
+
# Skip typing/stubs dependencies
|
|
175
|
+
if any(keyword in req_lower for keyword in TYPING_PATTERNS):
|
|
176
|
+
return True
|
|
177
|
+
|
|
178
|
+
# Skip common dev tools
|
|
179
|
+
return req_lower.replace("-", "_") in DEV_TOOLS
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class ProjectParser:
|
|
183
|
+
"""Parse project information from projects.json or run projectparse."""
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def ensure_projects_json(directory: Path) -> Path | None:
|
|
187
|
+
"""Ensure projects.json exists by running projectparse if needed.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
directory: Directory to check for projects.json
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Path to projects.json if successful, None otherwise
|
|
194
|
+
"""
|
|
195
|
+
projects_json = directory / "projects.json"
|
|
196
|
+
if projects_json.exists():
|
|
197
|
+
logger.debug(f"Found existing projects.json at {projects_json}")
|
|
198
|
+
return projects_json
|
|
199
|
+
|
|
200
|
+
logger.info("projects.json not found, running projectparse...")
|
|
201
|
+
try:
|
|
202
|
+
from sfi.projectparse import projectparse as pp
|
|
203
|
+
|
|
204
|
+
# Save and restore original argv
|
|
205
|
+
original_argv = sys.argv.copy()
|
|
206
|
+
sys.argv = ["projectparse", "--directory", str(directory), "--output", "projects.json", "--recursive"]
|
|
207
|
+
try:
|
|
208
|
+
pp.main()
|
|
209
|
+
finally:
|
|
210
|
+
sys.argv = original_argv
|
|
211
|
+
|
|
212
|
+
if projects_json.exists():
|
|
213
|
+
logger.info("projectparse completed successfully")
|
|
214
|
+
return projects_json
|
|
215
|
+
else:
|
|
216
|
+
logger.error("projectparse failed to generate projects.json")
|
|
217
|
+
return None
|
|
218
|
+
except ImportError:
|
|
219
|
+
# Fallback: run projectparse as script
|
|
220
|
+
sfi_dir = Path(__file__).parent.parent.parent
|
|
221
|
+
projectparse_script = sfi_dir / "projectparse" / "projectparse.py"
|
|
222
|
+
|
|
223
|
+
if not projectparse_script.exists():
|
|
224
|
+
logger.error(f"Cannot find projectparse script at {projectparse_script}")
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
result = subprocess.run(
|
|
228
|
+
[
|
|
229
|
+
sys.executable,
|
|
230
|
+
str(projectparse_script),
|
|
231
|
+
"--directory",
|
|
232
|
+
str(directory),
|
|
233
|
+
"--output",
|
|
234
|
+
"projects.json",
|
|
235
|
+
"--recursive",
|
|
236
|
+
],
|
|
237
|
+
capture_output=True,
|
|
238
|
+
text=True,
|
|
239
|
+
cwd=directory,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
if result.returncode == 0 and projects_json.exists():
|
|
243
|
+
logger.info("projectparse completed successfully")
|
|
244
|
+
return projects_json
|
|
245
|
+
|
|
246
|
+
logger.error(f"projectparse failed: {result.stderr}")
|
|
247
|
+
return None
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.error(f"Failed to run projectparse: {e}")
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def load_projects_json(projects_json: Path) -> dict | None:
|
|
254
|
+
"""Load project information from projects.json.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
projects_json: Path to projects.json file
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Dictionary of project information, None if failed
|
|
261
|
+
"""
|
|
262
|
+
try:
|
|
263
|
+
with open(projects_json, encoding="utf-8") as f:
|
|
264
|
+
return json.load(f)
|
|
265
|
+
except Exception as e:
|
|
266
|
+
logger.error(f"Failed to load projects.json: {e}")
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
@staticmethod
|
|
270
|
+
def parse_requirements_from_project(project_info: dict) -> list[Dependency]:
|
|
271
|
+
"""Parse dependencies from project info.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
project_info: Project information dictionary from projects.json
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
List of Dependency objects
|
|
278
|
+
"""
|
|
279
|
+
from packaging.requirements import Requirement
|
|
280
|
+
|
|
281
|
+
dependencies = []
|
|
282
|
+
dep_list = project_info.get("dependencies", [])
|
|
283
|
+
|
|
284
|
+
for dep_str in dep_list:
|
|
285
|
+
try:
|
|
286
|
+
req = Requirement(dep_str)
|
|
287
|
+
|
|
288
|
+
if should_skip_dependency(req.name, bool(req.extras)):
|
|
289
|
+
logger.info(f"Skipping: {dep_str}")
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
dep = Dependency(name=req.name, version=str(req.specifier) if req.specifier else None)
|
|
293
|
+
dependencies.append(dep)
|
|
294
|
+
logger.debug(f"Parsed dependency: {dep}")
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.warning(f"Failed to parse requirement '{dep_str}': {e}")
|
|
297
|
+
|
|
298
|
+
logger.info(f"Parsed {len(dependencies)} dependencies for project")
|
|
299
|
+
return dependencies
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class LibraryCache:
|
|
303
|
+
"""Manage local cache for Python packages."""
|
|
304
|
+
|
|
305
|
+
def __init__(self, cache_dir: Path | None = None):
|
|
306
|
+
"""Initialize cache manager.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
cache_dir: Cache directory path (default: ~/.pysfi/.cache/pylibpack)
|
|
310
|
+
"""
|
|
311
|
+
self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
|
|
312
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
313
|
+
self.metadata_file = self.cache_dir / "metadata.json"
|
|
314
|
+
|
|
315
|
+
def get_package_path(self, package_name: str, version: str | None = None) -> Path | None:
|
|
316
|
+
"""Get cached package path if available.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
package_name: Name of the package
|
|
320
|
+
version: Version (optional)
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Path to cached package or None
|
|
324
|
+
"""
|
|
325
|
+
# First try filesystem lookup (works even if metadata is missing)
|
|
326
|
+
for whl_file in self.cache_dir.glob("*.whl"):
|
|
327
|
+
parsed_name = self._extract_package_name_from_wheel(whl_file)
|
|
328
|
+
if parsed_name == package_name:
|
|
329
|
+
logger.debug(f"Cache hit (filesystem): {package_name}")
|
|
330
|
+
return whl_file
|
|
331
|
+
|
|
332
|
+
# Fallback to metadata lookup
|
|
333
|
+
metadata = self._load_metadata()
|
|
334
|
+
for info in metadata.values():
|
|
335
|
+
if info["name"] == package_name and (version is None or info.get("version") == version):
|
|
336
|
+
path = self.cache_dir / info["path"]
|
|
337
|
+
if path.exists():
|
|
338
|
+
logger.debug(f"Cache hit (metadata): {package_name}")
|
|
339
|
+
return path
|
|
340
|
+
|
|
341
|
+
logger.debug(f"Cache miss: {package_name}")
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
@staticmethod
|
|
345
|
+
def _extract_package_name_from_wheel(wheel_file: Path) -> str | None:
|
|
346
|
+
"""Extract package name from wheel file.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
wheel_file: Path to wheel file
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
Package name or None
|
|
353
|
+
"""
|
|
354
|
+
try:
|
|
355
|
+
filename = wheel_file.stem # Remove .whl extension
|
|
356
|
+
parts = filename.split("-")
|
|
357
|
+
if parts:
|
|
358
|
+
return normalize_package_name(parts[0])
|
|
359
|
+
except Exception:
|
|
360
|
+
pass
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
@staticmethod
|
|
364
|
+
def _extract_dependencies_from_wheel(wheel_file: Path) -> set[str]:
|
|
365
|
+
"""Extract dependencies from wheel METADATA file.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
wheel_file: Path to wheel file
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
Set of package names (normalized)
|
|
372
|
+
"""
|
|
373
|
+
try:
|
|
374
|
+
import re
|
|
375
|
+
import zipfile
|
|
376
|
+
|
|
377
|
+
dependencies: set[str] = set()
|
|
378
|
+
with zipfile.ZipFile(wheel_file, "r") as zf:
|
|
379
|
+
metadata_files = [name for name in zf.namelist() if name.endswith("METADATA")]
|
|
380
|
+
if not metadata_files:
|
|
381
|
+
return dependencies
|
|
382
|
+
|
|
383
|
+
metadata_content = zf.read(metadata_files[0]).decode("utf-8", errors="ignore")
|
|
384
|
+
|
|
385
|
+
# Parse dependencies from METADATA
|
|
386
|
+
for line in metadata_content.splitlines():
|
|
387
|
+
if line.startswith("Requires-Dist:"):
|
|
388
|
+
dep_str = line.split(":", 1)[1].strip()
|
|
389
|
+
|
|
390
|
+
# Skip extras dependencies
|
|
391
|
+
if re.search(r'extra\s*==\s*["\']?([^"\';\s]+)["\']?', dep_str, re.IGNORECASE):
|
|
392
|
+
logger.debug(f"Skipping extra dependency: {dep_str}")
|
|
393
|
+
continue
|
|
394
|
+
|
|
395
|
+
try:
|
|
396
|
+
from packaging.requirements import Requirement
|
|
397
|
+
|
|
398
|
+
req = Requirement(dep_str)
|
|
399
|
+
if not should_skip_dependency(req.name, bool(req.extras)):
|
|
400
|
+
dep_name = normalize_package_name(req.name)
|
|
401
|
+
dependencies.add(dep_name)
|
|
402
|
+
logger.debug(f"Found core dependency: {dep_name}")
|
|
403
|
+
except Exception:
|
|
404
|
+
pass
|
|
405
|
+
|
|
406
|
+
return dependencies
|
|
407
|
+
except Exception as e:
|
|
408
|
+
logger.warning(f"Failed to extract dependencies from {wheel_file.name}: {e}")
|
|
409
|
+
return set()
|
|
410
|
+
|
|
411
|
+
def add_package(self, package_name: str, package_path: Path, version: str | None = None) -> None:
|
|
412
|
+
"""Add package to cache.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
package_name: Name of the package
|
|
416
|
+
package_path: Path to package files
|
|
417
|
+
version: Package version
|
|
418
|
+
"""
|
|
419
|
+
# Copy package files to cache (flat structure for wheels, nested for dirs)
|
|
420
|
+
if package_path.is_dir():
|
|
421
|
+
dest_dir = self.cache_dir / package_name
|
|
422
|
+
if dest_dir.exists():
|
|
423
|
+
shutil.rmtree(dest_dir)
|
|
424
|
+
shutil.copytree(package_path, dest_dir)
|
|
425
|
+
relative_path = package_name
|
|
426
|
+
else:
|
|
427
|
+
dest_file = self.cache_dir / package_path.name
|
|
428
|
+
shutil.copy2(package_path, dest_file)
|
|
429
|
+
relative_path = package_path.name
|
|
430
|
+
|
|
431
|
+
# Update metadata using CacheMetadata dataclass
|
|
432
|
+
metadata = self._load_metadata()
|
|
433
|
+
metadata[str(package_path)] = CacheMetadata(
|
|
434
|
+
name=package_name,
|
|
435
|
+
version=version,
|
|
436
|
+
path=relative_path,
|
|
437
|
+
timestamp=time.time(),
|
|
438
|
+
).__dict__
|
|
439
|
+
self._save_metadata(metadata)
|
|
440
|
+
|
|
441
|
+
logger.info(f"Cached package: {package_name}")
|
|
442
|
+
|
|
443
|
+
def _load_metadata(self) -> dict[str, Any]:
|
|
444
|
+
"""Load cache metadata.
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
Metadata dictionary
|
|
448
|
+
"""
|
|
449
|
+
if self.metadata_file.exists():
|
|
450
|
+
try:
|
|
451
|
+
with open(self.metadata_file, encoding="utf-8") as f:
|
|
452
|
+
return json.load(f)
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.warning(f"Failed to load cache metadata: {e}")
|
|
455
|
+
|
|
456
|
+
return {}
|
|
457
|
+
|
|
458
|
+
def _save_metadata(self, metadata: dict[str, Any]) -> None:
|
|
459
|
+
"""Save cache metadata.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
metadata: Metadata dictionary
|
|
463
|
+
"""
|
|
464
|
+
with open(self.metadata_file, "w", encoding="utf-8") as f:
|
|
465
|
+
json.dump(metadata, f, indent=2)
|
|
466
|
+
|
|
467
|
+
def clear_cache(self) -> None:
|
|
468
|
+
"""Clear all cached packages."""
|
|
469
|
+
if self.cache_dir.exists():
|
|
470
|
+
shutil.rmtree(self.cache_dir)
|
|
471
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
472
|
+
logger.info("Cache cleared")
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
class LibraryDownloader:
|
|
476
|
+
"""Download Python packages from PyPI."""
|
|
477
|
+
|
|
478
|
+
def __init__(self, cache: LibraryCache, python_version: str | None = None, mirror: str = "pypi"):
|
|
479
|
+
"""Initialize downloader.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
cache: Cache manager
|
|
483
|
+
python_version: Target Python version for platform-specific packages
|
|
484
|
+
mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
|
|
485
|
+
"""
|
|
486
|
+
self.cache = cache
|
|
487
|
+
self.python_version = python_version or f"{sys.version_info.major}.{sys.version_info.minor}"
|
|
488
|
+
self.platform_name = platform.system().lower() + "_" + platform.machine().lower()
|
|
489
|
+
self.mirror_url = PYPI_MIRRORS.get(mirror, PYPI_MIRRORS["pypi"])
|
|
490
|
+
self.pip_executable = self._find_pip_executable()
|
|
491
|
+
|
|
492
|
+
@staticmethod
|
|
493
|
+
def _find_pip_executable() -> str | None:
|
|
494
|
+
"""Find pip executable in the system.
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
Path to pip executable or None
|
|
498
|
+
"""
|
|
499
|
+
return next((shutil.which(cmd) for cmd in ("pip", "pip3")), None)
|
|
500
|
+
|
|
501
|
+
def _download_package(self, dep: Dependency, dest_dir: Path) -> Path | None:
|
|
502
|
+
"""Download a single package without dependencies.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
dep: Dependency to download
|
|
506
|
+
dest_dir: Destination directory
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
Path to downloaded wheel file or None
|
|
510
|
+
"""
|
|
511
|
+
if not self.pip_executable:
|
|
512
|
+
logger.error("pip not found. Please install pip: python -m ensurepip --upgrade")
|
|
513
|
+
return None
|
|
514
|
+
|
|
515
|
+
logger.info(f"Downloading: {dep}")
|
|
516
|
+
|
|
517
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
518
|
+
result = subprocess.run(
|
|
519
|
+
[
|
|
520
|
+
self.pip_executable,
|
|
521
|
+
"download",
|
|
522
|
+
"--no-deps",
|
|
523
|
+
"--index-url",
|
|
524
|
+
self.mirror_url,
|
|
525
|
+
"--dest",
|
|
526
|
+
temp_dir,
|
|
527
|
+
str(dep),
|
|
528
|
+
],
|
|
529
|
+
capture_output=True,
|
|
530
|
+
text=True,
|
|
531
|
+
check=False,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
if result.returncode != 0:
|
|
535
|
+
logger.warning(f"pip download failed for {dep}: {result.stderr}")
|
|
536
|
+
return None
|
|
537
|
+
|
|
538
|
+
# Find and copy the downloaded wheel file
|
|
539
|
+
for file_path in Path(temp_dir).glob("*.whl"):
|
|
540
|
+
self.cache.add_package(dep.name, file_path, dep.version)
|
|
541
|
+
shutil.copy2(file_path, dest_dir / file_path.name)
|
|
542
|
+
logger.info(f"Downloaded: {file_path.name}")
|
|
543
|
+
return dest_dir / file_path.name
|
|
544
|
+
|
|
545
|
+
return None
|
|
546
|
+
|
|
547
|
+
def download_packages(
|
|
548
|
+
self,
|
|
549
|
+
dependencies: list[Dependency],
|
|
550
|
+
dest_dir: Path,
|
|
551
|
+
max_workers: int = 4,
|
|
552
|
+
) -> DownloadResult:
|
|
553
|
+
"""Download multiple packages concurrently.
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
dependencies: List of dependencies to download
|
|
557
|
+
dest_dir: Destination directory
|
|
558
|
+
max_workers: Maximum concurrent downloads
|
|
559
|
+
|
|
560
|
+
Returns:
|
|
561
|
+
DownloadResult containing download statistics
|
|
562
|
+
"""
|
|
563
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
564
|
+
|
|
565
|
+
results: dict[str, bool] = {}
|
|
566
|
+
cached_count = 0
|
|
567
|
+
|
|
568
|
+
logger.info(f"Total direct dependencies: {len(dependencies)}")
|
|
569
|
+
logger.info(f"Using mirror: {self.mirror_url}")
|
|
570
|
+
|
|
571
|
+
# Check cache and mark cached packages
|
|
572
|
+
for dep in dependencies:
|
|
573
|
+
if self.cache.get_package_path(dep.name, dep.version):
|
|
574
|
+
results[dep.name] = True
|
|
575
|
+
cached_count += 1
|
|
576
|
+
logger.info(f"Using cached package: {dep}")
|
|
577
|
+
|
|
578
|
+
# Download remaining packages concurrently
|
|
579
|
+
remaining_deps = [dep for dep in dependencies if dep.name not in results or not results[dep.name]]
|
|
580
|
+
downloaded_count = 0
|
|
581
|
+
|
|
582
|
+
if remaining_deps:
|
|
583
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
584
|
+
future_to_dep = {executor.submit(self._download_package, dep, dest_dir): dep for dep in remaining_deps}
|
|
585
|
+
|
|
586
|
+
for future in as_completed(future_to_dep):
|
|
587
|
+
dep = future_to_dep[future]
|
|
588
|
+
try:
|
|
589
|
+
wheel_file = future.result()
|
|
590
|
+
results[dep.name] = wheel_file is not None
|
|
591
|
+
if wheel_file:
|
|
592
|
+
downloaded_count += 1
|
|
593
|
+
except Exception as e:
|
|
594
|
+
logger.error(f"Error processing {dep.name}: {e}")
|
|
595
|
+
results[dep.name] = False
|
|
596
|
+
|
|
597
|
+
successful = sum(1 for v in results.values() if v)
|
|
598
|
+
logger.info(
|
|
599
|
+
f"Processed {successful}/{len(dependencies)} ({cached_count} cached, {downloaded_count} downloaded)"
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
return DownloadResult(
|
|
603
|
+
results=results,
|
|
604
|
+
total=len(dependencies),
|
|
605
|
+
successful=successful,
|
|
606
|
+
cached=cached_count,
|
|
607
|
+
downloaded=downloaded_count,
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
class PyLibPack:
|
|
612
|
+
"""Main library packer class."""
|
|
613
|
+
|
|
614
|
+
def __init__(
|
|
615
|
+
self,
|
|
616
|
+
cache_dir: Path | None = None,
|
|
617
|
+
python_version: str | None = None,
|
|
618
|
+
mirror: str = "pypi",
|
|
619
|
+
):
|
|
620
|
+
"""Initialize library packer.
|
|
621
|
+
|
|
622
|
+
Args:
|
|
623
|
+
cache_dir: Custom cache directory
|
|
624
|
+
python_version: Target Python version
|
|
625
|
+
mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
|
|
626
|
+
"""
|
|
627
|
+
self.cache = LibraryCache(cache_dir)
|
|
628
|
+
self.downloader = LibraryDownloader(self.cache, python_version, mirror)
|
|
629
|
+
|
|
630
|
+
def pack_project(self, project: ProjectInfo, output_dir: Path, max_workers: int = 4) -> PackResult:
|
|
631
|
+
"""Pack dependencies for a single project.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
project: Project information
|
|
635
|
+
output_dir: Output directory
|
|
636
|
+
max_workers: Maximum concurrent downloads
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
PackResult containing packing statistics
|
|
640
|
+
"""
|
|
641
|
+
logger.info(f"\n{'=' * 60}")
|
|
642
|
+
logger.info(f"Packing dependencies for project: {project.name}")
|
|
643
|
+
logger.info(f"{'=' * 60}")
|
|
644
|
+
|
|
645
|
+
dependencies = ProjectParser.parse_requirements_from_project(project.info)
|
|
646
|
+
|
|
647
|
+
if not dependencies:
|
|
648
|
+
logger.warning(f"No dependencies found for {project.name}")
|
|
649
|
+
return PackResult(
|
|
650
|
+
success=False,
|
|
651
|
+
message="No dependencies found",
|
|
652
|
+
project=project.name,
|
|
653
|
+
total=0,
|
|
654
|
+
successful=0,
|
|
655
|
+
failed=0,
|
|
656
|
+
packages_dir=str(output_dir),
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
logger.info(f"Found {len(dependencies)} dependencies")
|
|
660
|
+
|
|
661
|
+
# Download direct dependencies
|
|
662
|
+
download_result = self.downloader.download_packages(
|
|
663
|
+
dependencies,
|
|
664
|
+
self.cache.cache_dir,
|
|
665
|
+
max_workers=max_workers,
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
# Build wheel map and collect all required packages recursively
|
|
669
|
+
wheel_map: dict[str, Path] = {
|
|
670
|
+
pkg_name: wheel_file
|
|
671
|
+
for wheel_file in self.cache.cache_dir.glob("*.whl")
|
|
672
|
+
if (pkg_name := self.cache._extract_package_name_from_wheel(wheel_file))
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
# Recursively collect all dependencies
|
|
676
|
+
all_packages = self._collect_all_dependencies(wheel_map, list(download_result.results))
|
|
677
|
+
|
|
678
|
+
# Extract all required packages (keep order of dependency resolution)
|
|
679
|
+
extracted_packages = []
|
|
680
|
+
for pkg_name in all_packages:
|
|
681
|
+
if pkg_name in wheel_map:
|
|
682
|
+
# Skip if output directory already exists
|
|
683
|
+
output_pkg_dir = output_dir / pkg_name
|
|
684
|
+
if output_pkg_dir.exists():
|
|
685
|
+
logger.warning(f"Output directory already exists: {output_pkg_dir}")
|
|
686
|
+
continue
|
|
687
|
+
|
|
688
|
+
wheel_file = wheel_map[pkg_name]
|
|
689
|
+
logger.info(f"Extracting {wheel_file.name}...")
|
|
690
|
+
self._extract_wheel(wheel_file, output_dir)
|
|
691
|
+
extracted_packages.append(pkg_name)
|
|
692
|
+
logger.info(f"Extracted {pkg_name}")
|
|
693
|
+
|
|
694
|
+
logger.info(f"Pack complete for {project.name}: {download_result.successful}/{download_result.total}")
|
|
695
|
+
|
|
696
|
+
return PackResult(
|
|
697
|
+
success=download_result.successful > 0,
|
|
698
|
+
project=project.name,
|
|
699
|
+
total=download_result.total,
|
|
700
|
+
successful=download_result.successful,
|
|
701
|
+
failed=download_result.total - download_result.successful,
|
|
702
|
+
packages_dir=str(output_dir),
|
|
703
|
+
extracted_packages=extracted_packages,
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
@staticmethod
|
|
707
|
+
def _collect_all_dependencies(wheel_map: dict[str, Path], root_packages: list[str]) -> set[str]:
|
|
708
|
+
"""Recursively collect all dependencies from wheel files.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
wheel_map: Mapping of package names to wheel files
|
|
712
|
+
root_packages: List of root package names to start from
|
|
713
|
+
|
|
714
|
+
Returns:
|
|
715
|
+
List of all required package names
|
|
716
|
+
"""
|
|
717
|
+
all_packages: set[str] = set()
|
|
718
|
+
visited: set[str] = set()
|
|
719
|
+
|
|
720
|
+
def visit(pkg_name: str, level: int = 0) -> None:
|
|
721
|
+
"""Visit a package and collect its dependencies."""
|
|
722
|
+
if pkg_name in visited:
|
|
723
|
+
return
|
|
724
|
+
|
|
725
|
+
visited.add(pkg_name)
|
|
726
|
+
all_packages.add(pkg_name)
|
|
727
|
+
|
|
728
|
+
if pkg_name in wheel_map:
|
|
729
|
+
deps = LibraryCache._extract_dependencies_from_wheel(wheel_map[pkg_name])
|
|
730
|
+
logger.debug(f"{' ' * level}{pkg_name} -> {deps}")
|
|
731
|
+
for dep in deps:
|
|
732
|
+
visit(dep, level + 1)
|
|
733
|
+
|
|
734
|
+
for pkg_name in root_packages:
|
|
735
|
+
visit(pkg_name)
|
|
736
|
+
|
|
737
|
+
logger.info(f"Collected {len(all_packages)} packages (including recursive dependencies)")
|
|
738
|
+
logger.info(f"Packages: {all_packages}")
|
|
739
|
+
return all_packages
|
|
740
|
+
|
|
741
|
+
@staticmethod
|
|
742
|
+
def _extract_wheel(wheel_file: Path, dest_dir: Path) -> None:
|
|
743
|
+
"""Extract wheel file to destination directory.
|
|
744
|
+
|
|
745
|
+
Args:
|
|
746
|
+
wheel_file: Path to wheel file
|
|
747
|
+
dest_dir: Destination directory
|
|
748
|
+
"""
|
|
749
|
+
import zipfile
|
|
750
|
+
|
|
751
|
+
with zipfile.ZipFile(wheel_file, "r") as zf:
|
|
752
|
+
zf.extractall(dest_dir)
|
|
753
|
+
|
|
754
|
+
def pack(
|
|
755
|
+
self,
|
|
756
|
+
base_dir: Path,
|
|
757
|
+
output_dir: Path | None = None,
|
|
758
|
+
max_workers: int = 4,
|
|
759
|
+
) -> BatchPackResult:
|
|
760
|
+
"""Pack project dependencies from base directory.
|
|
761
|
+
|
|
762
|
+
Args:
|
|
763
|
+
base_dir: Base directory containing projects or a single project
|
|
764
|
+
output_dir: Output directory (default: base_dir/dist/site-packages)
|
|
765
|
+
max_workers: Maximum concurrent downloads
|
|
766
|
+
|
|
767
|
+
Returns:
|
|
768
|
+
BatchPackResult containing batch packing statistics
|
|
769
|
+
"""
|
|
770
|
+
output_dir = output_dir or base_dir / "dist" / "site-packages"
|
|
771
|
+
logger.info(f"Starting dependency pack for: {base_dir}")
|
|
772
|
+
|
|
773
|
+
# Ensure projects.json exists
|
|
774
|
+
projects_json = ProjectParser.ensure_projects_json(base_dir)
|
|
775
|
+
if not projects_json:
|
|
776
|
+
logger.error("Failed to create projects.json")
|
|
777
|
+
return BatchPackResult(
|
|
778
|
+
success=False,
|
|
779
|
+
total=0,
|
|
780
|
+
successful=0,
|
|
781
|
+
failed=0,
|
|
782
|
+
output_dir=str(output_dir),
|
|
783
|
+
total_time=0.0,
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
# Load project information
|
|
787
|
+
projects = ProjectParser.load_projects_json(projects_json)
|
|
788
|
+
if not projects:
|
|
789
|
+
logger.error("Failed to load project information")
|
|
790
|
+
return BatchPackResult(
|
|
791
|
+
success=False,
|
|
792
|
+
total=0,
|
|
793
|
+
successful=0,
|
|
794
|
+
failed=0,
|
|
795
|
+
output_dir=str(output_dir),
|
|
796
|
+
total_time=0.0,
|
|
797
|
+
)
|
|
798
|
+
|
|
799
|
+
logger.info(f"Found {len(projects)} project(s) to process")
|
|
800
|
+
|
|
801
|
+
# Process each project
|
|
802
|
+
total_start = time.perf_counter()
|
|
803
|
+
success_count = 0
|
|
804
|
+
failed_projects: list[str] = []
|
|
805
|
+
use_current_dir = len(projects) == 1
|
|
806
|
+
|
|
807
|
+
for project_name, project_info in projects.items():
|
|
808
|
+
project_dir = base_dir if use_current_dir else base_dir / project_name
|
|
809
|
+
|
|
810
|
+
if not project_dir.is_dir():
|
|
811
|
+
logger.warning(f"Project directory not found: {project_dir}, skipping")
|
|
812
|
+
failed_projects.append(project_name)
|
|
813
|
+
continue
|
|
814
|
+
|
|
815
|
+
project = ProjectInfo(name=project_name, dir=project_dir, info=project_info)
|
|
816
|
+
result = self.pack_project(project, output_dir, max_workers)
|
|
817
|
+
|
|
818
|
+
if result.success:
|
|
819
|
+
success_count += 1
|
|
820
|
+
else:
|
|
821
|
+
failed_projects.append(project_name)
|
|
822
|
+
|
|
823
|
+
total_time = time.perf_counter() - total_start
|
|
824
|
+
|
|
825
|
+
# Summary
|
|
826
|
+
logger.info(f"\n{'=' * 60}")
|
|
827
|
+
logger.info("Summary")
|
|
828
|
+
logger.info(f"{'=' * 60}")
|
|
829
|
+
logger.info(f"Total projects: {len(projects)}")
|
|
830
|
+
logger.info(f"Successfully packed: {success_count}")
|
|
831
|
+
logger.info(f"Failed: {len(failed_projects)}")
|
|
832
|
+
if failed_projects:
|
|
833
|
+
logger.info(f"Failed projects: {', '.join(failed_projects)}")
|
|
834
|
+
logger.info(f"Total time: {total_time:.2f}s")
|
|
835
|
+
|
|
836
|
+
return BatchPackResult(
|
|
837
|
+
success=len(failed_projects) == 0,
|
|
838
|
+
total=len(projects),
|
|
839
|
+
successful=success_count,
|
|
840
|
+
failed=len(failed_projects),
|
|
841
|
+
failed_projects=failed_projects,
|
|
842
|
+
output_dir=str(output_dir),
|
|
843
|
+
total_time=total_time,
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
def clear_cache(self) -> None:
|
|
847
|
+
"""Clear the package cache."""
|
|
848
|
+
self.cache.clear_cache()
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
def main() -> None:
|
|
852
|
+
"""Main entry point for pylibpack tool."""
|
|
853
|
+
parser = argparse.ArgumentParser(
|
|
854
|
+
prog="pylibpack",
|
|
855
|
+
description="Python library packer with caching support",
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
parser.add_argument(
|
|
859
|
+
"directory", type=str, nargs="?", default=str(Path.cwd()), help="Base directory containing projects"
|
|
860
|
+
)
|
|
861
|
+
parser.add_argument("--cache-dir", type=str, default=None, help="Custom cache directory")
|
|
862
|
+
parser.add_argument("--python-version", type=str, default=None, help="Target Python version")
|
|
863
|
+
parser.add_argument("-j", "--jobs", type=int, default=4, help="Maximum concurrent downloads")
|
|
864
|
+
parser.add_argument(
|
|
865
|
+
"--mirror",
|
|
866
|
+
type=str,
|
|
867
|
+
default="aliyun",
|
|
868
|
+
choices=("pypi", "tsinghua", "aliyun", "ustc", "douban", "tencent"),
|
|
869
|
+
help="PyPI mirror source for faster downloads in China",
|
|
870
|
+
)
|
|
871
|
+
parser.add_argument("--debug", "-d", action="store_true", help="Debug mode")
|
|
872
|
+
args = parser.parse_args()
|
|
873
|
+
|
|
874
|
+
# Setup logging
|
|
875
|
+
if args.debug:
|
|
876
|
+
logging.getLogger().setLevel(logging.DEBUG)
|
|
877
|
+
|
|
878
|
+
# Initialize packer
|
|
879
|
+
cache_dir = Path(args.cache_dir) if args.cache_dir else None
|
|
880
|
+
packer = PyLibPack(cache_dir=cache_dir, python_version=args.python_version, mirror=args.mirror)
|
|
881
|
+
|
|
882
|
+
# Pack command
|
|
883
|
+
base_dir = Path(args.directory)
|
|
884
|
+
output_dir = base_dir / "dist" / "site-packages"
|
|
885
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
886
|
+
|
|
887
|
+
try:
|
|
888
|
+
result = packer.pack(
|
|
889
|
+
base_dir=base_dir,
|
|
890
|
+
output_dir=output_dir,
|
|
891
|
+
max_workers=args.jobs,
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
if result.success:
|
|
895
|
+
logger.info("=" * 50)
|
|
896
|
+
logger.info("Packing summary:")
|
|
897
|
+
logger.info(f" Total: {result.total}")
|
|
898
|
+
logger.info(f" Successful: {result.successful}")
|
|
899
|
+
logger.info(f" Failed: {result.failed}")
|
|
900
|
+
logger.info(f" Output directory: {result.output_dir}")
|
|
901
|
+
logger.info(f" Total time: {result.total_time:.2f}s")
|
|
902
|
+
logger.info("=" * 50)
|
|
903
|
+
else:
|
|
904
|
+
logger.error("Packing failed!")
|
|
905
|
+
sys.exit(1)
|
|
906
|
+
|
|
907
|
+
except Exception as e:
|
|
908
|
+
logger.error(f"Packing failed: {e}")
|
|
909
|
+
sys.exit(1)
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
if __name__ == "__main__":
|
|
913
|
+
main()
|