pysfi 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,23 +10,23 @@ This module provides functionality to:
10
10
  from __future__ import annotations
11
11
 
12
12
  import argparse
13
+ import atexit
13
14
  import json
14
15
  import logging
15
- import platform
16
16
  import re
17
17
  import shutil
18
18
  import subprocess
19
- import sys
20
19
  import tarfile
21
20
  import tempfile
22
21
  import time
23
22
  import zipfile
24
23
  from concurrent.futures import ThreadPoolExecutor, as_completed
25
24
  from dataclasses import dataclass, field
25
+ from functools import cached_property
26
26
  from pathlib import Path
27
- from typing import Any, Pattern
27
+ from typing import Any, ClassVar, Final, Pattern
28
28
 
29
- from sfi.pyprojectparse.pyprojectparse import Project, Solution
29
+ from sfi.pyprojectparse.pyprojectparse import Dependency, Project, Solution
30
30
 
31
31
  logging.basicConfig(
32
32
  level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -38,7 +38,21 @@ __build__ = "20260120"
38
38
 
39
39
  DEFAULT_CACHE_DIR = Path.home() / ".pysfi" / ".cache" / "python-libs"
40
40
 
41
- MAX_DEPTH = 50 # Maximum recursion depth to prevent infinite loops
41
+ MAX_DEPTH: Final[int] = 50 # Maximum recursion depth to prevent infinite loops
42
+
43
+ # Archive format constants
44
+ SUPPORTED_ARCHIVE_FORMATS: Final[tuple[str, ...]] = (
45
+ "zip",
46
+ "tar",
47
+ "gztar",
48
+ "bztar",
49
+ "xztar",
50
+ )
51
+
52
+ # Default configuration constants
53
+ DEFAULT_MAX_WORKERS: Final[int] = 4
54
+ DEFAULT_MIRROR: Final[str] = "aliyun"
55
+ DEFAULT_OPTIMIZE: Final[bool] = True
42
56
 
43
57
  PYPI_MIRRORS = {
44
58
  "pypi": "https://pypi.org/simple",
@@ -50,24 +64,74 @@ PYPI_MIRRORS = {
50
64
  }
51
65
 
52
66
 
53
- @dataclass(frozen=True)
54
- class Dependency:
55
- """Represents a Python package dependency."""
67
+ CONFIG_FILE = Path.home() / ".pysfi" / "pylibpack.json"
56
68
 
57
- name: str
58
- version: str | None = None
59
- extras: set[str] = field(default_factory=set)
60
- requires: set[str] = field(default_factory=set)
61
69
 
62
- def __post_init__(self):
63
- """Normalize package name after initialization."""
64
- object.__setattr__(self, "name", normalize_package_name(self.name))
70
+ @dataclass
71
+ class PyLibPackerConfig:
72
+ """Configuration for PyLibPack with persistent settings."""
65
73
 
66
- def __str__(self) -> str:
67
- """String representation of dependency."""
68
- if self.extras:
69
- return f"{self.name}[{','.join(sorted(self.extras))}]{self.version or ''}"
70
- return f"{self.name}{self.version or ''}"
74
+ cache_dir: Path | None = None
75
+ mirror: str = DEFAULT_MIRROR
76
+ optimize: bool = DEFAULT_OPTIMIZE
77
+ max_workers: int = DEFAULT_MAX_WORKERS
78
+
79
+ def __init__(
80
+ self,
81
+ cache_dir: Path | None = None,
82
+ mirror: str = DEFAULT_MIRROR,
83
+ optimize: bool = DEFAULT_OPTIMIZE,
84
+ max_workers: int = DEFAULT_MAX_WORKERS,
85
+ ):
86
+ # Track which parameters were explicitly provided
87
+ self._explicitly_set = {}
88
+
89
+ if cache_dir is not None:
90
+ self._explicitly_set["cache_dir"] = True
91
+ if mirror != DEFAULT_MIRROR:
92
+ self._explicitly_set["mirror"] = True
93
+ if optimize != DEFAULT_OPTIMIZE:
94
+ self._explicitly_set["optimize"] = True
95
+ if max_workers != DEFAULT_MAX_WORKERS:
96
+ self._explicitly_set["max_workers"] = True
97
+
98
+ # Set the values
99
+ self.cache_dir = cache_dir
100
+ self.mirror = mirror
101
+ self.optimize = optimize
102
+ self.max_workers = max_workers
103
+
104
+ # Apply defaults for unset values
105
+ if self.cache_dir is None:
106
+ self.cache_dir = DEFAULT_CACHE_DIR
107
+
108
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
109
+
110
+ # Load configuration from file if it exists
111
+ if CONFIG_FILE.exists():
112
+ try:
113
+ config_data = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
114
+ # Update configuration items, but only for those not explicitly set
115
+ for key, value in config_data.items():
116
+ if (
117
+ hasattr(self, key)
118
+ and isinstance(value, type(getattr(self, key)))
119
+ and key not in self._explicitly_set
120
+ ):
121
+ setattr(self, key, value)
122
+ except (json.JSONDecodeError, TypeError, AttributeError) as e:
123
+ logger.warning(f"Could not load config from {CONFIG_FILE}: {e}")
124
+
125
+ def save(self) -> None:
126
+ """Save current configuration to file."""
127
+ CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
128
+ config_dict = {
129
+ "cache_dir": str(self.cache_dir),
130
+ "mirror": self.mirror,
131
+ "optimize": self.optimize,
132
+ "max_workers": self.max_workers,
133
+ }
134
+ CONFIG_FILE.write_text(json.dumps(config_dict, indent=4), encoding="utf-8")
71
135
 
72
136
 
73
137
  @dataclass
@@ -92,20 +156,6 @@ class PackResult:
92
156
  failed: int
93
157
  packages_dir: str
94
158
  extracted_packages: list[str] = field(default_factory=list)
95
- message: str = ""
96
-
97
-
98
- @dataclass
99
- class BatchPackResult:
100
- """Result of packing multiple projects."""
101
-
102
- success: bool
103
- total: int
104
- successful: int
105
- failed: int
106
- failed_projects: list[str] = field(default_factory=list)
107
- output_dir: str = ""
108
- total_time: float = 0.0
109
159
 
110
160
 
111
161
  @dataclass
@@ -154,7 +204,7 @@ class OptimizationRule:
154
204
  exclude_patterns: list[str] = field(default_factory=list)
155
205
  include_patterns: list[str] = field(default_factory=list)
156
206
 
157
- def __post_init__(self):
207
+ def __post_init__(self) -> None:
158
208
  """Compile regex patterns after initialization."""
159
209
  self.exclude_compiled: list[Pattern] = [
160
210
  re.compile(p) for p in self.exclude_patterns
@@ -164,6 +214,7 @@ class OptimizationRule:
164
214
  ]
165
215
 
166
216
 
217
+ @dataclass(frozen=False)
167
218
  class SelectiveExtractionStrategy:
168
219
  """Optimization strategy that applies inclusion/exclusion rules to specific libraries.
169
220
 
@@ -174,7 +225,7 @@ class SelectiveExtractionStrategy:
174
225
  """
175
226
 
176
227
  # Universal exclusion patterns - applied to all libraries
177
- UNIVERSAL_EXCLUDE_PATTERNS = frozenset({
228
+ UNIVERSAL_EXCLUDE_PATTERNS: ClassVar[frozenset[str]] = frozenset({
178
229
  "doc",
179
230
  "docs",
180
231
  "test",
@@ -223,8 +274,12 @@ class SelectiveExtractionStrategy:
223
274
  for pattern in self.UNIVERSAL_EXCLUDE_PATTERNS
224
275
  ]
225
276
 
226
- def _setup_default_rules(self):
227
- """Setup default optimization rules for common libraries."""
277
+ def _setup_default_rules(self) -> None:
278
+ """Setup default optimization rules for common libraries.
279
+
280
+ This method loads JSON rule files from the rules directory and
281
+ creates OptimizationRule objects for common libraries.
282
+ """
228
283
  # Get the rules directory
229
284
  rules_dir = Path(__file__).parent / "rules"
230
285
 
@@ -359,12 +414,13 @@ def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
359
414
  Returns:
360
415
  True if should skip, False otherwise
361
416
  """
362
- req_lower = req_name.lower()
363
-
364
417
  # Skip extras
365
418
  if has_extras:
366
419
  return True
367
420
 
421
+ req_lower = req_name.lower()
422
+ normalized_req = req_lower.replace("-", "_")
423
+
368
424
  # Skip dev/test/docs/lint/example patterns
369
425
  if any(keyword in req_lower for keyword in DEV_PATTERNS):
370
426
  return True
@@ -374,23 +430,141 @@ def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
374
430
  return True
375
431
 
376
432
  # Skip common dev tools
377
- return req_lower.replace("-", "_") in DEV_TOOLS
433
+ return normalized_req in DEV_TOOLS
378
434
 
379
435
 
436
+ @dataclass(frozen=False)
380
437
  class LibraryCache:
381
438
  """Manage local cache for Python packages."""
382
439
 
383
- def __init__(self, cache_dir: Path | None = None):
384
- """Initialize cache manager.
440
+ cache_dir: Path = field(default_factory=lambda: DEFAULT_CACHE_DIR)
441
+ _dependencies_cache: dict[Path, set[str]] = field(default_factory=dict)
442
+
443
+ def __post_init__(self) -> None:
444
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
445
+
446
+ @cached_property
447
+ def metadata_file(self) -> Path:
448
+ return self.cache_dir / "metadata.json"
449
+
450
+ @cached_property
451
+ def wheel_files(self) -> list[Path]:
452
+ return list(self.cache_dir.glob("*.whl"))
453
+
454
+ @cached_property
455
+ def sdist_files(self) -> list[Path]:
456
+ return list(self.cache_dir.glob("*.tar.gz")) + list(
457
+ self.cache_dir.glob("*.zip")
458
+ )
459
+
460
+ def collect_dependencies_from_list(self, dependency_list: list[str]) -> set[str]:
461
+ """Recursively collect all dependencies from package files (wheel or sdist).
385
462
 
386
463
  Args:
387
- cache_dir: Cache directory path (default: ~/.pysfi/.cache/pylibpack)
464
+ dependency_list: List of root package names to start from
465
+
466
+ Returns:
467
+ Set of all required package names (normalized)
388
468
  """
389
- self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
390
- self.cache_dir.mkdir(parents=True, exist_ok=True)
391
- self.metadata_file = self.cache_dir / "metadata.json"
392
- # In-memory cache for extracted dependencies to avoid repeated IO
393
- self._dependencies_cache: dict[Path, set[str]] = {}
469
+ all_packages: set[str] = set()
470
+ visited: set[str] = set()
471
+ visit_stack: dict[str, int] = {} # Track visit depth for cycle detection
472
+
473
+ def visit(pkg_name: str, level: int = 0) -> None:
474
+ """Visit a package and collect its dependencies."""
475
+ # Normalize package name for consistency
476
+ normalized_pkg_name = pkg_name.lower().replace("-", "_")
477
+
478
+ # Check for cycles
479
+ if normalized_pkg_name in visit_stack:
480
+ logger.warning(
481
+ f"Potential circular dependency detected: {normalized_pkg_name} (current depth: {level}, "
482
+ f"previous depth: {visit_stack[normalized_pkg_name]})"
483
+ )
484
+ return
485
+
486
+ # Check depth limit
487
+ if level > MAX_DEPTH:
488
+ logger.warning(
489
+ f"Maximum dependency depth ({MAX_DEPTH}) reached for {normalized_pkg_name}, stopping recursion"
490
+ )
491
+ return
492
+
493
+ # Skip if already visited
494
+ if normalized_pkg_name in visited:
495
+ return
496
+
497
+ # Mark as visited and track depth
498
+ visited.add(normalized_pkg_name)
499
+ visit_stack[normalized_pkg_name] = level
500
+ all_packages.add(normalized_pkg_name)
501
+
502
+ # Process dependencies if package exists in map
503
+ package_path = self.package_map.get(normalized_pkg_name)
504
+ if package_path:
505
+ deps = self._extract_dependencies_from_wheel(package_path)
506
+ logger.debug(f"{' ' * level}{normalized_pkg_name} -> {deps}")
507
+ for dep in deps:
508
+ visit(dep, level + 1)
509
+
510
+ # Remove from stack when done
511
+ visit_stack.pop(normalized_pkg_name, None)
512
+
513
+ for pkg_name in dependency_list:
514
+ visit(pkg_name)
515
+
516
+ logger.info(f"Collected {len(all_packages)} packages: {all_packages}")
517
+ return all_packages
518
+
519
+ @cached_property
520
+ def package_map(self) -> dict[str, Path]:
521
+ """Create a mapping of package names to their file paths with improved efficiency."""
522
+ packages: dict[str, Path] = {}
523
+
524
+ # Process wheel files first (they take precedence)
525
+ for wheel_file in self.wheel_files:
526
+ pkg_name = self._extract_package_name_from_wheel(wheel_file)
527
+ if pkg_name:
528
+ normalized_pkg_name = normalize_package_name(pkg_name)
529
+ packages[normalized_pkg_name] = wheel_file
530
+
531
+ # Add sdist files only if the package isn't already in the map
532
+ for sdist_file in self.sdist_files:
533
+ pkg_name = self._extract_package_name_from_sdist(sdist_file)
534
+ if pkg_name:
535
+ normalized_pkg_name = normalize_package_name(pkg_name)
536
+ if normalized_pkg_name not in packages:
537
+ packages[normalized_pkg_name] = sdist_file
538
+
539
+ return packages
540
+
541
+ @cached_property
542
+ def cache_size(self) -> int:
543
+ """Calculate total size of cache in bytes."""
544
+ if not self.cache_dir.exists():
545
+ return 0
546
+
547
+ # Use generator expression for memory efficiency
548
+ return sum(
549
+ file_path.stat().st_size
550
+ for file_path in self.cache_dir.rglob("*")
551
+ if file_path.is_file()
552
+ )
553
+
554
+ @cached_property
555
+ def package_count(self) -> int:
556
+ """Get the count of packages in cache."""
557
+ return len(self.package_map)
558
+
559
+ @cached_property
560
+ def cache_stats(self) -> dict[str, int]:
561
+ """Get detailed cache statistics."""
562
+ return {
563
+ "total_packages": self.package_count,
564
+ "wheel_count": len(self.wheel_files),
565
+ "sdist_count": len(self.sdist_files),
566
+ "cache_size_bytes": self.cache_size,
567
+ }
394
568
 
395
569
  def get_package_path(
396
570
  self, package_name: str, version: str | None = None
@@ -404,29 +578,31 @@ class LibraryCache:
404
578
  Returns:
405
579
  Path to cached package or None
406
580
  """
581
+ normalized_name = normalize_package_name(package_name)
582
+
407
583
  # First try filesystem lookup for wheel files (works even if metadata is missing)
408
584
  for whl_file in self.cache_dir.glob("*.whl"):
409
585
  parsed_name = self._extract_package_name_from_wheel(whl_file)
410
- if parsed_name == package_name:
586
+ if parsed_name == normalized_name:
411
587
  logger.debug(f"Cache hit (filesystem wheel): {package_name}")
412
588
  return whl_file
413
589
 
414
590
  # Try filesystem lookup for sdist files (.tar.gz, .zip)
415
591
  for sdist_file in self.cache_dir.glob("*.tar.gz"):
416
592
  parsed_name = self._extract_package_name_from_sdist(sdist_file)
417
- if parsed_name == package_name:
593
+ if parsed_name == normalized_name:
418
594
  logger.debug(f"Cache hit (filesystem sdist): {package_name}")
419
595
  return sdist_file
420
596
  for sdist_file in self.cache_dir.glob("*.zip"):
421
597
  parsed_name = self._extract_package_name_from_sdist(sdist_file)
422
- if parsed_name == package_name:
598
+ if parsed_name == normalized_name:
423
599
  logger.debug(f"Cache hit (filesystem sdist): {package_name}")
424
600
  return sdist_file
425
601
 
426
602
  # Fallback to metadata lookup
427
603
  metadata = self._load_metadata()
428
604
  for info in metadata.values():
429
- if info["name"] == package_name and (
605
+ if info["name"] == normalized_name and (
430
606
  version is None or info.get("version") == version
431
607
  ):
432
608
  path = self.cache_dir / info["path"]
@@ -499,7 +675,7 @@ class LibraryCache:
499
675
  """Extract dependencies from wheel METADATA file with caching.
500
676
 
501
677
  Args:
502
- wheel_file: Path to wheel file
678
+ wheel_file: Path to wheel or sdist file
503
679
 
504
680
  Returns:
505
681
  Set of package names (normalized)
@@ -514,47 +690,26 @@ class LibraryCache:
514
690
  self._dependencies_cache[wheel_file] = dependencies
515
691
  return dependencies
516
692
 
517
- try:
518
- import re
519
- import zipfile
693
+ # Early return if wheel file doesn't exist
694
+ if not wheel_file.exists():
695
+ logger.warning(f"Wheel file does not exist: {wheel_file}")
696
+ self._dependencies_cache[wheel_file] = set()
697
+ return set()
520
698
 
521
- dependencies: set[str] = set()
699
+ try:
522
700
  with zipfile.ZipFile(wheel_file, "r") as zf:
701
+ # Find metadata file
523
702
  metadata_files = [
524
703
  name for name in zf.namelist() if name.endswith("METADATA")
525
704
  ]
526
- if not metadata_files:
527
- self._dependencies_cache[wheel_file] = dependencies
528
- return dependencies
529
-
530
- metadata_content = zf.read(metadata_files[0]).decode(
531
- "utf-8", errors="ignore"
532
- )
533
705
 
534
- # Parse dependencies from METADATA
535
- for line in metadata_content.splitlines():
536
- if line.startswith("Requires-Dist:"):
537
- dep_str = line.split(":", 1)[1].strip()
538
-
539
- # Skip extras dependencies
540
- if re.search(
541
- r'extra\s*==\s*["\']?([^"\';\s]+)["\']?',
542
- dep_str,
543
- re.IGNORECASE,
544
- ):
545
- logger.debug(f"Skipping extra dependency: {dep_str}")
546
- continue
547
-
548
- try:
549
- from packaging.requirements import Requirement
550
-
551
- req = Requirement(dep_str)
552
- if not should_skip_dependency(req.name, bool(req.extras)):
553
- dep_name = normalize_package_name(req.name)
554
- dependencies.add(dep_name)
555
- logger.debug(f"Found core dependency: {dep_name}")
556
- except Exception:
557
- pass
706
+ if not metadata_files:
707
+ dependencies = set()
708
+ else:
709
+ metadata_content = zf.read(metadata_files[0]).decode(
710
+ "utf-8", errors="ignore"
711
+ )
712
+ dependencies = self._parse_metadata_content(metadata_content)
558
713
 
559
714
  # Cache the result
560
715
  self._dependencies_cache[wheel_file] = dependencies
@@ -583,9 +738,7 @@ class LibraryCache:
583
738
  with tarfile.open(sdist_file, "r:gz") as tf:
584
739
  for member in tf.getmembers():
585
740
  # Look for PKG-INFO or METADATA file in the root of the package
586
- if member.name.endswith("PKG-INFO") or member.name.endswith(
587
- "METADATA"
588
- ):
741
+ if member.name.endswith(("PKG-INFO", "METADATA")):
589
742
  # Only use PKG-INFO/METADATA files in the root directory
590
743
  # Count the number of slashes in the path
591
744
  path_parts = member.name.split("/")
@@ -610,7 +763,7 @@ class LibraryCache:
610
763
  with zipfile.ZipFile(sdist_file, "r") as zf:
611
764
  for name in zf.namelist():
612
765
  # Look for PKG-INFO or METADATA file in the root of the package
613
- if name.endswith("PKG-INFO") or name.endswith("METADATA"):
766
+ if name.endswith(("PKG-INFO", "METADATA")):
614
767
  path_parts = name.split("/")
615
768
  if len(path_parts) == 2 or (
616
769
  len(path_parts) == 3
@@ -643,8 +796,6 @@ class LibraryCache:
643
796
  Returns:
644
797
  Set of package names (normalized)
645
798
  """
646
- import re
647
-
648
799
  dependencies: set[str] = set()
649
800
  try:
650
801
  for line in metadata_content.splitlines():
@@ -680,8 +831,6 @@ class LibraryCache:
680
831
  Returns:
681
832
  Set containing the normalized package name, or empty set if should skip
682
833
  """
683
- import re
684
-
685
834
  try:
686
835
  # Skip extras dependencies
687
836
  if re.search(
@@ -763,6 +912,18 @@ class LibraryCache:
763
912
  with open(self.metadata_file, "w", encoding="utf-8") as f:
764
913
  json.dump(metadata, f, indent=2)
765
914
 
915
+ @staticmethod
916
+ def _should_skip_dist_info(file_path: Path) -> bool:
917
+ """Check if the file path should be skipped because it's a dist-info directory."""
918
+ name = file_path.name
919
+ if name.endswith(".dist-info"):
920
+ return True
921
+ # Check if any parent directory ends with .dist-info
922
+ for part in file_path.parts:
923
+ if part.endswith(".dist-info"):
924
+ return True
925
+ return False
926
+
766
927
  def clear_cache(self) -> None:
767
928
  """Clear all cached packages."""
768
929
  if self.cache_dir.exists():
@@ -772,31 +933,21 @@ class LibraryCache:
772
933
  logger.info("Cache cleared")
773
934
 
774
935
 
936
+ @dataclass(frozen=True)
775
937
  class LibraryDownloader:
776
938
  """Download Python packages from PyPI."""
777
939
 
778
- def __init__(
779
- self,
780
- cache: LibraryCache,
781
- python_version: str | None = None,
782
- mirror: str = "pypi",
783
- ):
784
- """Initialize downloader.
940
+ parent: PyLibPacker
941
+ cache: LibraryCache
942
+ _mirror: str = "pypi"
785
943
 
786
- Args:
787
- cache: Cache manager
788
- python_version: Target Python version for platform-specific packages
789
- mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
790
- """
791
- self.cache = cache
792
- self.python_version = (
793
- python_version or f"{sys.version_info.major}.{sys.version_info.minor}"
794
- )
795
- self.platform_name = (
796
- platform.system().lower() + "_" + platform.machine().lower()
797
- )
798
- self.mirror_url = PYPI_MIRRORS.get(mirror, PYPI_MIRRORS["pypi"])
799
- self.pip_executable = self._find_pip_executable()
944
+ @cached_property
945
+ def mirror_url(self) -> str:
946
+ return PYPI_MIRRORS.get(self._mirror, PYPI_MIRRORS["pypi"])
947
+
948
+ @cached_property
949
+ def pip_executable(self) -> str | None:
950
+ return self._find_pip_executable()
800
951
 
801
952
  @staticmethod
802
953
  def _find_pip_executable() -> str | None:
@@ -808,7 +959,7 @@ class LibraryDownloader:
808
959
 
809
960
  Args:
810
961
  dep: Dependency to download
811
- dest_dir: Destination directory
962
+ dest_dir: Destination directory (typically cache_dir)
812
963
 
813
964
  Returns:
814
965
  Path to downloaded package file (wheel or sdist) or None
@@ -865,54 +1016,45 @@ class LibraryDownloader:
865
1016
 
866
1017
  return None
867
1018
 
868
- def download_packages(
869
- self,
870
- dependencies: list[Dependency],
871
- dest_dir: Path,
872
- max_workers: int = 4,
873
- ) -> DownloadResult:
1019
+ def download_packages(self, project: Project) -> DownloadResult:
874
1020
  """Download multiple packages concurrently.
875
1021
 
876
1022
  Args:
877
- dependencies: List of dependencies to download
878
- dest_dir: Destination directory
879
- max_workers: Maximum concurrent downloads
1023
+ project: Project containing dependencies to download
880
1024
 
881
1025
  Returns:
882
1026
  DownloadResult containing download statistics
883
1027
  """
884
- dest_dir.mkdir(parents=True, exist_ok=True)
885
-
886
- # Use list of tuples for thread-safe result collection
887
- # Tuple format: (package_name, success_flag)
888
1028
  results_list: list[tuple[str, bool]] = []
889
1029
  cached_count = 0
890
1030
  cached_packages: set[str] = set() # Track cached package names efficiently
891
1031
 
1032
+ dependencies = project.converted_dependencies
892
1033
  logger.info(f"Total direct dependencies: {len(dependencies)}")
893
1034
  logger.info(f"Using mirror: {self.mirror_url}")
894
1035
 
895
1036
  # Check cache and mark cached packages (single-threaded, safe)
896
1037
  for dep in dependencies:
897
1038
  if self.cache.get_package_path(dep.name, dep.version):
898
- normalized_dep_name = normalize_package_name(dep.name)
899
- results_list.append((normalized_dep_name, True))
900
- cached_packages.add(normalized_dep_name)
1039
+ results_list.append((dep.name, True))
1040
+ cached_packages.add(dep.name)
901
1041
  cached_count += 1
902
1042
  logger.info(f"Using cached package: {dep}")
903
1043
 
904
1044
  # Download remaining packages concurrently
905
1045
  remaining_deps = [
906
- dep
907
- for dep in dependencies
908
- if normalize_package_name(dep.name) not in cached_packages
1046
+ dep for dep in dependencies if dep.name not in cached_packages
909
1047
  ]
910
1048
  downloaded_count = 0
911
1049
 
912
1050
  if remaining_deps:
913
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
1051
+ with ThreadPoolExecutor(
1052
+ max_workers=self.parent.config.max_workers
1053
+ ) as executor:
914
1054
  future_to_dep = {
915
- executor.submit(self._download_package, dep, dest_dir): dep
1055
+ executor.submit(
1056
+ self._download_package, dep, self.cache.cache_dir
1057
+ ): dep
916
1058
  for dep in remaining_deps
917
1059
  }
918
1060
 
@@ -920,17 +1062,12 @@ class LibraryDownloader:
920
1062
  dep = future_to_dep[future]
921
1063
  try:
922
1064
  wheel_file = future.result()
923
- normalized_dep_name = normalize_package_name(dep.name)
924
- results_list.append((
925
- normalized_dep_name,
926
- wheel_file is not None,
927
- ))
1065
+ results_list.append((dep.name, wheel_file is not None))
928
1066
  if wheel_file:
929
1067
  downloaded_count += 1
930
1068
  except Exception as e:
931
- normalized_dep_name = normalize_package_name(dep.name)
932
- logger.error(f"Error processing {normalized_dep_name}: {e}")
933
- results_list.append((normalized_dep_name, False))
1069
+ logger.error(f"Error processing {dep.name}: {e}")
1070
+ results_list.append((dep.name, False))
934
1071
 
935
1072
  # Convert to dictionary for final result
936
1073
  results = dict(results_list)
@@ -948,36 +1085,58 @@ class LibraryDownloader:
948
1085
  )
949
1086
 
950
1087
 
951
- class PyLibPack:
1088
+ @dataclass(frozen=True)
1089
+ class PyLibPacker:
952
1090
  """Main library packer class."""
953
1091
 
954
- def __init__(
955
- self,
956
- cache_dir: Path | None = None,
957
- python_version: str | None = None,
958
- mirror: str = "pypi",
959
- optimize: bool = True,
960
- optimization_strategy: SelectiveExtractionStrategy | None = None,
961
- ):
962
- """Initialize library packer.
1092
+ working_dir: Path
1093
+ config: PyLibPackerConfig
963
1094
 
964
- Args:
965
- cache_dir: Custom cache directory
966
- python_version: Target Python version
967
- mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
968
- """
969
- self.cache = LibraryCache(cache_dir)
970
- self.downloader = LibraryDownloader(self.cache, python_version, mirror)
1095
+ @cached_property
1096
+ def cache(self) -> LibraryCache:
1097
+ return LibraryCache(cache_dir=self.config.cache_dir or DEFAULT_CACHE_DIR)
971
1098
 
972
- # Set up optimization strategy
973
- self.optimize = optimize
974
- self.optimization_strategy = (
975
- optimization_strategy or SelectiveExtractionStrategy() if optimize else None
1099
+ @cached_property
1100
+ def downloader(self) -> LibraryDownloader:
1101
+ return LibraryDownloader(
1102
+ parent=self,
1103
+ cache=self.cache,
1104
+ _mirror=self.config.mirror,
976
1105
  )
977
1106
 
978
- def pack_project(
979
- self, project: Project, output_dir: Path, max_workers: int = 4
980
- ) -> PackResult:
1107
+ @cached_property
1108
+ def optimization_strategy(self) -> SelectiveExtractionStrategy | None:
1109
+ return SelectiveExtractionStrategy() if self.config.optimize else None
1110
+
1111
+ @cached_property
1112
+ def solution(self) -> Solution:
1113
+ return Solution.from_directory(root_dir=self.working_dir)
1114
+
1115
+ @cached_property
1116
+ def projects(self) -> dict[str, Project]:
1117
+ # Return projects as a dictionary mapping project names to Project objects
1118
+ # This follows the Solution API correctly
1119
+ return {project.name: project for project in self.solution.projects.values()}
1120
+
1121
+ @cached_property
1122
+ def project_count(self) -> int:
1123
+ """Get the count of projects to avoid repeated computation."""
1124
+ return len(self.projects)
1125
+
1126
+ @cached_property
1127
+ def working_dir_size(self) -> int:
1128
+ """Calculate total size of the working directory in bytes."""
1129
+ if not self.working_dir.exists():
1130
+ return 0
1131
+
1132
+ # Use generator expression for memory efficiency
1133
+ return sum(
1134
+ file_path.stat().st_size
1135
+ for file_path in self.working_dir.rglob("*")
1136
+ if file_path.is_file()
1137
+ )
1138
+
1139
+ def pack_project(self, project: Project) -> PackResult:
981
1140
  """Pack dependencies for a single project.
982
1141
 
983
1142
  Args:
@@ -987,79 +1146,71 @@ class PyLibPack:
987
1146
 
988
1147
  Returns:
989
1148
  PackResult containing packing statistics
990
- """
991
- logger.info(f"\n{'=' * 60}")
992
- logger.info(f"Packing dependencies for project: {project.name}")
993
- logger.info(f"{'=' * 60}")
994
-
995
- if not project.dependencies:
996
- logger.warning(f"No dependencies found for {project.name}")
997
- return PackResult(
998
- success=False,
999
- message="No dependencies found",
1000
- project=project.name,
1001
- total=0,
1002
- successful=0,
1003
- failed=0,
1004
- packages_dir=str(output_dir),
1005
- )
1006
-
1007
- logger.info(f"Found {len(project.dependencies)} dependencies")
1008
1149
 
1009
- # Download direct dependencies
1010
- download_result = self.downloader.download_packages(
1011
- project.dependencies,
1012
- self.cache.cache_dir,
1013
- max_workers=max_workers,
1014
- )
1015
-
1016
- # Build package map (including both wheel and sdist files) and collect all required packages recursively
1017
- package_map: dict[str, Path] = {}
1018
-
1019
- # Add wheel files to package map
1020
- for wheel_file in self.cache.cache_dir.glob("*.whl"):
1021
- pkg_name = self.cache._extract_package_name_from_wheel(wheel_file)
1022
- if pkg_name and pkg_name not in package_map: # Prefer wheel files
1023
- normalized_pkg_name = normalize_package_name(pkg_name)
1024
- package_map[normalized_pkg_name] = wheel_file
1025
-
1026
- # Add sdist files to package map (only if not already present)
1027
- for sdist_file in self.cache.cache_dir.glob("*.tar.gz"):
1028
- pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
1029
- if pkg_name and normalize_package_name(pkg_name) not in package_map:
1030
- normalized_pkg_name = normalize_package_name(pkg_name)
1031
- package_map[normalized_pkg_name] = sdist_file
1150
+ Raises:
1151
+ ValueError: If project has invalid configuration
1152
+ RuntimeError: If packing fails due to system issues
1153
+ """
1154
+ logger.info(f"{120 * '='}")
1155
+ logger.info(f"Packing dependencies for project: `{project.name}`")
1032
1156
 
1033
- for sdist_file in self.cache.cache_dir.glob("*.zip"):
1034
- pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
1035
- if pkg_name and normalize_package_name(pkg_name) not in package_map:
1036
- normalized_pkg_name = normalize_package_name(pkg_name)
1037
- package_map[normalized_pkg_name] = sdist_file
1157
+ download_result = self.downloader.download_packages(project)
1038
1158
 
1039
1159
  # Recursively collect all dependencies (pass cache instance for dependency extraction)
1040
- all_packages = self._collect_all_dependencies(
1041
- package_map, list(download_result.results), self.cache
1160
+ all_packages = self.cache.collect_dependencies_from_list(
1161
+ list(download_result.results)
1042
1162
  )
1043
1163
 
1044
1164
  # Extract all required packages (keep order of dependency resolution)
1045
1165
  extracted_packages = []
1046
1166
  for pkg_name in all_packages:
1047
- if pkg_name in package_map:
1167
+ logger.info(f"Processing {pkg_name}")
1168
+ if pkg_name in self.cache.package_map:
1048
1169
  # Skip if output directory already exists
1049
- output_pkg_dir = output_dir / pkg_name
1170
+ output_pkg_dir = project.lib_dir / pkg_name
1050
1171
  if output_pkg_dir.exists():
1051
1172
  logger.warning(f"Output directory already exists: {output_pkg_dir}")
1052
1173
  continue
1053
1174
 
1054
- package_file = package_map[pkg_name]
1175
+ package_file = self.cache.package_map[pkg_name]
1055
1176
  logger.info(f"Extracting {package_file.name}...")
1056
- self._extract_package(package_file, output_dir, pkg_name)
1177
+ self._extract_package(package_file, project.lib_dir, pkg_name)
1057
1178
  extracted_packages.append(pkg_name)
1058
1179
  logger.info(f"Extracted {pkg_name}")
1180
+ else:
1181
+ logger.warning(f"Package not found in cache: {pkg_name}")
1182
+ # Attempt to download the missing package
1183
+ logger.info(f"Attempting to download missing package: {pkg_name}")
1184
+
1185
+ # Create a temporary dependency object for the missing package
1186
+ missing_dep = Dependency(
1187
+ name=pkg_name, version=None, extras=set(), requires=set()
1188
+ )
1189
+
1190
+ # Try to download the missing package
1191
+ download_result = self.downloader._download_package(
1192
+ missing_dep, self.cache.cache_dir
1193
+ )
1194
+ if download_result:
1195
+ logger.info(f"Successfully downloaded missing package: {pkg_name}")
1196
+ # Now check again if it's in the cache and extract if available
1197
+ if pkg_name in self.cache.package_map:
1198
+ package_file = self.cache.package_map[pkg_name]
1199
+ logger.info(f"Extracting {package_file.name}...")
1200
+ self._extract_package(package_file, project.lib_dir, pkg_name)
1201
+ extracted_packages.append(pkg_name)
1202
+ logger.info(f"Extracted {pkg_name}")
1203
+ else:
1204
+ logger.error(
1205
+ f"Package {pkg_name} still not found in cache after download attempt"
1206
+ )
1207
+ else:
1208
+ logger.error(f"Failed to download missing package: {pkg_name}")
1059
1209
 
1060
1210
  logger.info(
1061
1211
  f"Pack complete for {project.name}: {download_result.successful}/{download_result.total}"
1062
1212
  )
1213
+ logger.info(f"{120 * '='}")
1063
1214
 
1064
1215
  return PackResult(
1065
1216
  success=download_result.successful > 0,
@@ -1067,76 +1218,10 @@ class PyLibPack:
1067
1218
  total=download_result.total,
1068
1219
  successful=download_result.successful,
1069
1220
  failed=download_result.total - download_result.successful,
1070
- packages_dir=str(output_dir),
1221
+ packages_dir=str(project.lib_dir),
1071
1222
  extracted_packages=extracted_packages,
1072
1223
  )
1073
1224
 
1074
- @staticmethod
1075
- def _collect_all_dependencies(
1076
- package_map: dict[str, Path], root_packages: list[str], cache: LibraryCache
1077
- ) -> set[str]:
1078
- """Recursively collect all dependencies from package files (wheel or sdist).
1079
-
1080
- Args:
1081
- package_map: Mapping of package names to package files (wheel or sdist)
1082
- root_packages: List of root package names to start from
1083
- cache: LibraryCache instance for extracting dependencies
1084
-
1085
- Returns:
1086
- List of all required package names
1087
- """
1088
- all_packages: set[str] = set()
1089
- visited: set[str] = set()
1090
- visit_stack: dict[str, int] = {} # Track visit depth for cycle detection
1091
-
1092
- def visit(pkg_name: str, level: int = 0) -> None:
1093
- """Visit a package and collect its dependencies."""
1094
- # Normalize package name for consistency
1095
- normalized_pkg_name = normalize_package_name(pkg_name)
1096
-
1097
- # Check for cycles
1098
- if normalized_pkg_name in visit_stack:
1099
- logger.warning(
1100
- f"Potential circular dependency detected: {normalized_pkg_name} (current depth: {level}, "
1101
- f"previous depth: {visit_stack[normalized_pkg_name]})"
1102
- )
1103
- return
1104
-
1105
- # Check depth limit
1106
- if level > MAX_DEPTH:
1107
- logger.warning(
1108
- f"Maximum dependency depth ({MAX_DEPTH}) reached for {normalized_pkg_name}, stopping recursion"
1109
- )
1110
- return
1111
-
1112
- if normalized_pkg_name in visited:
1113
- return
1114
-
1115
- # Mark as visited and track depth
1116
- visited.add(normalized_pkg_name)
1117
- visit_stack[normalized_pkg_name] = level
1118
- all_packages.add(normalized_pkg_name)
1119
-
1120
- if normalized_pkg_name in package_map:
1121
- deps = cache._extract_dependencies_from_wheel(
1122
- package_map[normalized_pkg_name]
1123
- )
1124
- logger.debug(f"{' ' * level}{normalized_pkg_name} -> {deps}")
1125
- for dep in deps:
1126
- visit(dep, level + 1)
1127
-
1128
- # Remove from stack when done
1129
- visit_stack.pop(normalized_pkg_name, None)
1130
-
1131
- for pkg_name in root_packages:
1132
- visit(pkg_name)
1133
-
1134
- logger.info(
1135
- f"Collected {len(all_packages)} packages (including recursive dependencies)"
1136
- )
1137
- logger.info(f"Packages: {all_packages}")
1138
- return all_packages
1139
-
1140
1225
  def _build_and_cache_wheel(self, sdist_file: Path, package_name: str) -> None:
1141
1226
  """Build wheel from sdist file and cache it for faster future access.
1142
1227
 
@@ -1199,181 +1284,179 @@ class PyLibPack:
1199
1284
  )
1200
1285
 
1201
1286
  # Handle sdist files (.tar.gz or .zip) - install using pip, and build wheel for cache
1202
- if package_file.suffix == ".gz" or package_file.suffix == ".zip":
1203
- logger.info(f"Installing sdist file for {package_name} using pip...")
1204
-
1205
- # Use pip install --target to install sdist to temporary directory
1206
- with tempfile.TemporaryDirectory() as temp_install_dir:
1207
- result = subprocess.run(
1208
- [
1209
- self.downloader.pip_executable or "pip",
1210
- "install",
1211
- "--target",
1212
- temp_install_dir,
1213
- "--no-deps", # Don't install dependencies (we handle them separately)
1214
- "--no-cache-dir",
1215
- str(package_file),
1216
- ],
1217
- capture_output=True,
1218
- text=True,
1219
- check=False,
1220
- )
1221
-
1222
- if result.returncode != 0:
1223
- logger.error(
1224
- f"Failed to install sdist {package_file.name}: {result.stderr}"
1225
- )
1226
- return
1227
-
1228
- # Copy installed files to dest_dir, skipping *.dist-info directories
1229
- temp_install_path = Path(temp_install_dir)
1230
- for item in temp_install_path.iterdir():
1231
- # Skip dist-info directories
1232
- if item.name.endswith(".dist-info"):
1233
- logger.debug(f"Skipping dist-info directory: {item.name}")
1234
- continue
1235
- dest_path = dest_dir / item.name
1236
- if item.is_dir():
1237
- if dest_path.exists():
1238
- shutil.rmtree(dest_path)
1239
- shutil.copytree(item, dest_path)
1240
- else:
1241
- shutil.copy2(item, dest_path)
1242
-
1243
- logger.info(
1244
- f"Installed sdist file for {package_name} to site-packages structure"
1245
- )
1246
-
1247
- # Build wheel from sdist and cache it for faster future access
1248
- logger.info(f"Building wheel from sdist for {package_name}...")
1249
- self._build_and_cache_wheel(package_file, package_name)
1287
+ if package_file.suffix in (".gz", ".zip"):
1288
+ self._handle_sdist_extraction(package_file, dest_dir, package_name)
1250
1289
  return
1251
1290
 
1252
1291
  # Handle wheel files with optional optimization
1253
- with zipfile.ZipFile(package_file, "r") as zf:
1254
- if self.optimize and self.optimization_strategy:
1255
- # Apply optimization strategy - selectively extract files
1256
- extracted_count = 0
1257
- skipped_count = 0
1258
-
1259
- for file_info in zf.filelist:
1260
- file_path = Path(file_info.filename)
1261
- # Skip dist-info directories
1262
- if file_path.name.endswith(".dist-info") or any(
1263
- parent.endswith(".dist-info") for parent in file_path.parts
1264
- ):
1265
- logger.debug(f"Skipping dist-info: {file_info.filename}")
1266
- skipped_count += 1
1267
- continue
1268
- if self.optimization_strategy.should_extract_file(
1269
- package_name, file_path
1270
- ):
1271
- zf.extract(file_info, dest_dir)
1272
- extracted_count += 1
1273
- logger.debug(f"Extracted {file_path} from {package_name}")
1274
- else:
1275
- skipped_count += 1
1276
- logger.debug(
1277
- f"Skipped {file_path} from {package_name} (filtered by optimization strategy)"
1278
- )
1292
+ self._handle_wheel_extraction(package_file, dest_dir, package_name)
1279
1293
 
1280
- logger.info(
1281
- f"Extraction complete for {package_name}: {extracted_count} extracted, {skipped_count} skipped"
1282
- )
1283
- else:
1284
- # Extract all files without optimization, but skip dist-info directories
1285
- for file_info in zf.filelist:
1286
- file_path = Path(file_info.filename)
1287
- # Skip dist-info directories
1288
- if file_path.name.endswith(".dist-info") or any(
1289
- parent.endswith(".dist-info") for parent in file_path.parts
1290
- ):
1291
- logger.debug(f"Skipping dist-info: {file_info.filename}")
1292
- continue
1293
- zf.extract(file_info, dest_dir)
1294
- logger.info(
1295
- f"All files extracted for {package_name} (no optimization applied, dist-info skipped)"
1296
- )
1294
+ def _handle_sdist_extraction(
1295
+ self, package_file: Path, dest_dir: Path, package_name: str
1296
+ ) -> None:
1297
+ """Handle extraction of sdist files (.tar.gz or .zip)."""
1298
+ logger.info(f"Installing sdist file for {package_name} using pip...")
1297
1299
 
1298
- def pack(
1299
- self,
1300
- working_dir: Path,
1301
- max_workers: int = 4,
1302
- ) -> BatchPackResult:
1303
- """Pack project dependencies from base directory.
1300
+ # Use pip install --target to install sdist to temporary directory
1301
+ with tempfile.TemporaryDirectory() as temp_install_dir:
1302
+ result = subprocess.run(
1303
+ [
1304
+ self.downloader.pip_executable or "pip",
1305
+ "install",
1306
+ "--target",
1307
+ temp_install_dir,
1308
+ "--no-deps", # Don't install dependencies (we handle them separately)
1309
+ "--no-cache-dir",
1310
+ str(package_file),
1311
+ ],
1312
+ capture_output=True,
1313
+ text=True,
1314
+ check=False,
1315
+ )
1304
1316
 
1305
- Args:
1306
- base_dir: Base directory containing projects or a single project
1307
- output_dir: Output directory (default: base_dir/dist/site-packages)
1308
- max_workers: Maximum concurrent downloads
1317
+ if result.returncode != 0:
1318
+ logger.error(
1319
+ f"Failed to install sdist {package_file.name}: {result.stderr}"
1320
+ )
1321
+ return
1309
1322
 
1310
- Returns:
1311
- BatchPackResult containing batch packing statistics
1312
- """
1313
- output_dir = working_dir / "dist" / "site-packages"
1314
- logger.info(f"Starting dependency pack for: {working_dir}")
1315
-
1316
- projects = Solution.from_directory(root_dir=working_dir).projects
1317
- if not projects:
1318
- logger.error("Failed to load project information")
1319
- return BatchPackResult(
1320
- success=False,
1321
- total=0,
1322
- successful=0,
1323
- failed=0,
1324
- output_dir=str(output_dir),
1325
- total_time=0.0,
1326
- )
1323
+ # Copy installed files to dest_dir, skipping *.dist-info directories
1324
+ temp_install_path = Path(temp_install_dir)
1325
+ # Pre-compute dist-info suffix
1326
+ dist_info_suffix = ".dist-info"
1327
1327
 
1328
- logger.info(f"Found {len(projects)} project(s) to process")
1328
+ for item in temp_install_path.iterdir():
1329
+ # Skip dist-info directories
1330
+ if item.name.endswith(dist_info_suffix):
1331
+ logger.debug(f"Skipping dist-info directory: {item.name}")
1332
+ continue
1333
+ dest_path = dest_dir / item.name
1334
+ if item.is_dir():
1335
+ if dest_path.exists():
1336
+ shutil.rmtree(dest_path)
1337
+ shutil.copytree(item, dest_path)
1338
+ else:
1339
+ shutil.copy2(item, dest_path)
1340
+
1341
+ logger.info(
1342
+ f"Installed sdist file for {package_name} to site-packages structure"
1343
+ )
1329
1344
 
1330
- # Process each project
1331
- total_start = time.perf_counter()
1332
- success_count = 0
1333
- failed_projects: list[str] = []
1334
- use_current_dir = len(projects) == 1
1345
+ # Build wheel from sdist and cache it for faster future access
1346
+ logger.info(f"Building wheel from sdist for {package_name}...")
1347
+ self._build_and_cache_wheel(package_file, package_name)
1335
1348
 
1336
- for project in projects.values():
1337
- project_dir = working_dir if use_current_dir else working_dir / project.name
1349
+ def _handle_wheel_extraction(
1350
+ self, package_file: Path, dest_dir: Path, package_name: str
1351
+ ) -> None:
1352
+ """Handle extraction of wheel files with optional optimization."""
1353
+ with zipfile.ZipFile(package_file, "r") as zf:
1354
+ if self.config.optimize and self.optimization_strategy:
1355
+ self._extract_with_optimization(zf, dest_dir, package_name)
1356
+ else:
1357
+ self._extract_without_optimization(zf, dest_dir, package_name)
1338
1358
 
1339
- if not project_dir.is_dir():
1340
- logger.warning(f"Project directory not found: {project_dir}, skipping")
1341
- failed_projects.append(project.name)
1359
+ def _extract_with_optimization(
1360
+ self, zf: zipfile.ZipFile, dest_dir: Path, package_name: str
1361
+ ) -> None:
1362
+ """Extract wheel with optimization strategy applied."""
1363
+ extracted_count = 0
1364
+ skipped_count = 0
1365
+
1366
+ assert self.optimization_strategy is not None, "Optimization strategy is None"
1367
+ should_extract = self.optimization_strategy.should_extract_file
1368
+
1369
+ for file_info in zf.filelist:
1370
+ file_path = Path(file_info.filename)
1371
+
1372
+ # Skip dist-info directories
1373
+ if LibraryCache._should_skip_dist_info(
1374
+ file_path
1375
+ ): # Use LibraryCache method
1376
+ logger.debug(f"Skipping dist-info: {file_info.filename}")
1377
+ skipped_count += 1
1342
1378
  continue
1343
1379
 
1344
- result = self.pack_project(project, output_dir, max_workers)
1345
-
1346
- if result.success:
1347
- success_count += 1
1380
+ if should_extract(package_name, file_path):
1381
+ zf.extract(file_info, dest_dir)
1382
+ extracted_count += 1
1383
+ logger.debug(f"Extracted {file_path} from {package_name}")
1348
1384
  else:
1349
- failed_projects.append(project.name)
1350
-
1351
- total_time = time.perf_counter() - total_start
1352
-
1353
- # Summary
1354
- logger.info(f"\n{'=' * 60}")
1355
- logger.info("Summary")
1356
- logger.info(f"{'=' * 60}")
1357
- logger.info(f"Total projects: {len(projects)}")
1358
- logger.info(f"Successfully packed: {success_count}")
1359
- logger.info(f"Failed: {len(failed_projects)}")
1360
- if failed_projects:
1361
- logger.info(f"Failed projects: {', '.join(failed_projects)}")
1362
- logger.info(f"Total time: {total_time:.2f}s")
1363
-
1364
- return BatchPackResult(
1365
- success=len(failed_projects) == 0,
1366
- total=len(projects),
1367
- successful=success_count,
1368
- failed=len(failed_projects),
1369
- failed_projects=failed_projects,
1370
- output_dir=str(output_dir),
1371
- total_time=total_time,
1385
+ skipped_count += 1
1386
+ logger.debug(
1387
+ f"Skipped {file_path} from {package_name} (filtered by optimization strategy)"
1388
+ )
1389
+
1390
+ logger.info(
1391
+ f"Extraction complete for {package_name}: {extracted_count} extracted, {skipped_count} skipped"
1372
1392
  )
1373
1393
 
1374
- def clear_cache(self) -> None:
1375
- """Clear the package cache."""
1376
- self.cache.clear_cache()
1394
+ def _extract_without_optimization(
1395
+ self, zf: zipfile.ZipFile, dest_dir: Path, package_name: str
1396
+ ) -> None:
1397
+ """Extract wheel without optimization, but skip dist-info directories."""
1398
+ # Pre-compute the skip function
1399
+ should_skip = LibraryCache._should_skip_dist_info # Use LibraryCache method
1400
+
1401
+ for file_info in zf.filelist:
1402
+ file_path = Path(file_info.filename)
1403
+ # Skip dist-info directories
1404
+ if not should_skip(file_path):
1405
+ zf.extract(file_info, dest_dir)
1406
+ logger.info(
1407
+ f"All files extracted for {package_name} (no optimization applied, dist-info skipped)"
1408
+ )
1409
+
1410
+ @staticmethod
1411
+ def _should_skip_dist_info(file_path: Path) -> bool:
1412
+ """Check if the file path should be skipped because it's a dist-info directory."""
1413
+ name = file_path.name
1414
+ if name.endswith(".dist-info"):
1415
+ return True
1416
+ # Check if any parent directory ends with .dist-info
1417
+ for part in file_path.parts:
1418
+ if part.endswith(".dist-info"):
1419
+ return True
1420
+ return False
1421
+
1422
+ def run(self) -> None:
1423
+ """Pack project dependencies from base directory with concurrent processing."""
1424
+ t0 = time.perf_counter()
1425
+ project_count = self.project_count # Use cached property
1426
+
1427
+ logger.info(f"Starting to pack {project_count} projects")
1428
+
1429
+ if project_count == 1:
1430
+ # Single project: process directly
1431
+ project = next(iter(self.projects.values()))
1432
+ self.pack_project(project)
1433
+ else:
1434
+ # Multiple projects: process concurrently
1435
+ logger.info(f"Packing {project_count} projects concurrently...")
1436
+ with ThreadPoolExecutor(max_workers=self.config.max_workers) as executor:
1437
+ futures = [
1438
+ executor.submit(self.pack_project, project)
1439
+ for project in self.projects.values()
1440
+ ]
1441
+
1442
+ # Wait for all tasks to complete
1443
+ for future in as_completed(futures):
1444
+ try:
1445
+ future.result()
1446
+ except Exception as e:
1447
+ logger.error(f"Project packing failed: {e}")
1448
+
1449
+ elapsed_time = time.perf_counter() - t0
1450
+ logger.info(f"Packed {project_count} projects in {elapsed_time:.2f}s")
1451
+
1452
+ # Log cache statistics after packing
1453
+ logger.info(f"Cache statistics: {self.cache.cache_stats}")
1454
+
1455
+ def show_stats(self):
1456
+ logger.info(f"Project count: {self.project_count}")
1457
+ logger.info(f"Total dependencies: {len(self.solution.dependencies)}")
1458
+ logger.info(f"Working directory size: {self.working_dir_size} bytes")
1459
+ logger.info(f"Cache statistics: {self.cache.cache_stats}")
1377
1460
 
1378
1461
 
1379
1462
  def parse_args() -> argparse.Namespace:
@@ -1397,12 +1480,16 @@ def parse_args() -> argparse.Namespace:
1397
1480
  "--python-version", type=str, default=None, help="Target Python version"
1398
1481
  )
1399
1482
  parser.add_argument(
1400
- "-j", "--jobs", type=int, default=4, help="Maximum concurrent downloads"
1483
+ "-j",
1484
+ "--jobs",
1485
+ type=int,
1486
+ default=DEFAULT_MAX_WORKERS,
1487
+ help="Maximum concurrent downloads",
1401
1488
  )
1402
1489
  parser.add_argument(
1403
1490
  "--mirror",
1404
1491
  type=str,
1405
- default="aliyun",
1492
+ default=DEFAULT_MIRROR,
1406
1493
  choices=("pypi", "tsinghua", "aliyun", "ustc", "douban", "tencent"),
1407
1494
  help="PyPI mirror source for faster downloads in China",
1408
1495
  )
@@ -1413,6 +1500,18 @@ def parse_args() -> argparse.Namespace:
1413
1500
  action="store_true",
1414
1501
  help="Disable package optimization (extract all files)",
1415
1502
  )
1503
+
1504
+ # Add option to show package size
1505
+ parser.add_argument(
1506
+ "--show-size",
1507
+ action="store_true",
1508
+ help="Show packed package size",
1509
+ )
1510
+ parser.add_argument(
1511
+ "--show-stats",
1512
+ action="store_true",
1513
+ help="Show detailed project statistics",
1514
+ )
1416
1515
  parser.add_argument(
1417
1516
  "--list-optimizations",
1418
1517
  "-lo",
@@ -1426,6 +1525,10 @@ def main() -> None:
1426
1525
  """Main entry point for pylibpack tool."""
1427
1526
  args = parse_args()
1428
1527
 
1528
+ # Setup logging
1529
+ if args.debug:
1530
+ logger.setLevel(logging.DEBUG)
1531
+
1429
1532
  if args.list_optimizations:
1430
1533
  strategy = SelectiveExtractionStrategy()
1431
1534
  logging.info("Available optimization rules:")
@@ -1433,25 +1536,28 @@ def main() -> None:
1433
1536
  logging.info(f" - {lib_name}")
1434
1537
  return
1435
1538
 
1436
- # Setup logging
1437
- if args.debug:
1438
- logger.setLevel(logging.DEBUG)
1439
-
1440
- # Initialize packer
1441
- cache_dir = Path(args.cache_dir) if args.cache_dir else None
1442
- optimize = not args.no_optimize
1443
- packer = PyLibPack(
1444
- cache_dir=cache_dir,
1445
- python_version=args.python_version,
1539
+ # Create configuration from arguments
1540
+ config = PyLibPackerConfig(
1541
+ cache_dir=Path(args.cache_dir) if args.cache_dir else DEFAULT_CACHE_DIR,
1446
1542
  mirror=args.mirror,
1447
- optimize=optimize,
1543
+ optimize=not args.no_optimize,
1544
+ max_workers=args.jobs,
1448
1545
  )
1449
1546
 
1450
- packer.pack(
1547
+ # Register auto-save on exit
1548
+ atexit.register(config.save)
1549
+
1550
+ packer = PyLibPacker(
1451
1551
  working_dir=Path(args.directory),
1452
- max_workers=args.jobs,
1552
+ config=config,
1453
1553
  )
1454
1554
 
1555
+ # Execute pack operation
1556
+ packer.run()
1557
+
1558
+ if args.show_stats:
1559
+ packer.show_stats()
1560
+
1455
1561
 
1456
1562
  if __name__ == "__main__":
1457
1563
  main()