pysfi 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/METADATA +1 -1
  2. pysfi-0.1.14.dist-info/RECORD +68 -0
  3. {pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/entry_points.txt +3 -0
  4. sfi/__init__.py +19 -2
  5. sfi/alarmclock/__init__.py +3 -0
  6. sfi/alarmclock/alarmclock.py +23 -40
  7. sfi/bumpversion/__init__.py +3 -1
  8. sfi/bumpversion/bumpversion.py +64 -15
  9. sfi/cleanbuild/__init__.py +3 -0
  10. sfi/cleanbuild/cleanbuild.py +5 -1
  11. sfi/cli.py +25 -4
  12. sfi/condasetup/__init__.py +1 -0
  13. sfi/condasetup/condasetup.py +91 -76
  14. sfi/docdiff/__init__.py +1 -0
  15. sfi/docdiff/docdiff.py +3 -2
  16. sfi/docscan/__init__.py +1 -1
  17. sfi/docscan/docscan.py +78 -23
  18. sfi/docscan/docscan_gui.py +152 -48
  19. sfi/filedate/filedate.py +12 -5
  20. sfi/img2pdf/img2pdf.py +453 -0
  21. sfi/llmclient/llmclient.py +31 -8
  22. sfi/llmquantize/llmquantize.py +76 -37
  23. sfi/llmserver/__init__.py +1 -0
  24. sfi/llmserver/llmserver.py +63 -13
  25. sfi/makepython/makepython.py +1145 -201
  26. sfi/pdfsplit/pdfsplit.py +45 -12
  27. sfi/pyarchive/__init__.py +1 -0
  28. sfi/pyarchive/pyarchive.py +908 -278
  29. sfi/pyembedinstall/pyembedinstall.py +88 -89
  30. sfi/pylibpack/pylibpack.py +561 -463
  31. sfi/pyloadergen/pyloadergen.py +372 -218
  32. sfi/pypack/pypack.py +510 -959
  33. sfi/pyprojectparse/pyprojectparse.py +337 -40
  34. sfi/pysourcepack/__init__.py +1 -0
  35. sfi/pysourcepack/pysourcepack.py +210 -131
  36. sfi/quizbase/quizbase_gui.py +2 -2
  37. sfi/taskkill/taskkill.py +168 -59
  38. sfi/which/which.py +11 -3
  39. pysfi-0.1.12.dist-info/RECORD +0 -62
  40. sfi/workflowengine/workflowengine.py +0 -444
  41. {pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/WHEEL +0 -0
  42. /sfi/{workflowengine → img2pdf}/__init__.py +0 -0
@@ -10,23 +10,23 @@ This module provides functionality to:
10
10
  from __future__ import annotations
11
11
 
12
12
  import argparse
13
+ import atexit
13
14
  import json
14
15
  import logging
15
- import platform
16
16
  import re
17
17
  import shutil
18
18
  import subprocess
19
- import sys
20
19
  import tarfile
21
20
  import tempfile
22
21
  import time
23
22
  import zipfile
24
23
  from concurrent.futures import ThreadPoolExecutor, as_completed
25
24
  from dataclasses import dataclass, field
25
+ from functools import cached_property
26
26
  from pathlib import Path
27
- from typing import Any, Pattern
27
+ from typing import Any, ClassVar, Final, Pattern
28
28
 
29
- from sfi.pyprojectparse.pyprojectparse import Project, Solution
29
+ from sfi.pyprojectparse.pyprojectparse import Dependency, Project, Solution
30
30
 
31
31
  logging.basicConfig(
32
32
  level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -38,9 +38,23 @@ __build__ = "20260120"
38
38
 
39
39
  DEFAULT_CACHE_DIR = Path.home() / ".pysfi" / ".cache" / "python-libs"
40
40
 
41
- MAX_DEPTH = 50 # Maximum recursion depth to prevent infinite loops
41
+ MAX_DEPTH: Final[int] = 50 # Maximum recursion depth to prevent infinite loops
42
42
 
43
- PYPI_MIRRORS = {
43
+ # Archive format constants
44
+ SUPPORTED_ARCHIVE_FORMATS: Final[tuple[str, ...]] = (
45
+ "zip",
46
+ "tar",
47
+ "gztar",
48
+ "bztar",
49
+ "xztar",
50
+ )
51
+
52
+ # Default configuration constants
53
+ DEFAULT_MAX_WORKERS: Final[int] = 4
54
+ DEFAULT_MIRROR: Final[str] = "aliyun"
55
+ DEFAULT_OPTIMIZE: Final[bool] = True
56
+
57
+ PYPI_MIRRORS: Final[dict[str, str]] = {
44
58
  "pypi": "https://pypi.org/simple",
45
59
  "tsinghua": "https://pypi.tuna.tsinghua.edu.cn/simple",
46
60
  "aliyun": "https://mirrors.aliyun.com/pypi/simple/",
@@ -50,24 +64,74 @@ PYPI_MIRRORS = {
50
64
  }
51
65
 
52
66
 
53
- @dataclass(frozen=True)
54
- class Dependency:
55
- """Represents a Python package dependency."""
67
+ CONFIG_FILE = Path.home() / ".pysfi" / "pylibpack.json"
56
68
 
57
- name: str
58
- version: str | None = None
59
- extras: set[str] = field(default_factory=set)
60
- requires: set[str] = field(default_factory=set)
61
69
 
62
- def __post_init__(self):
63
- """Normalize package name after initialization."""
64
- object.__setattr__(self, "name", normalize_package_name(self.name))
70
+ @dataclass
71
+ class PyLibPackerConfig:
72
+ """Configuration for PyLibPack with persistent settings."""
73
+
74
+ cache_dir: Path | None = None
75
+ mirror: str = DEFAULT_MIRROR
76
+ optimize: bool = DEFAULT_OPTIMIZE
77
+ max_workers: int = DEFAULT_MAX_WORKERS
78
+
79
+ def __init__(
80
+ self,
81
+ cache_dir: Path | None = None,
82
+ mirror: str = DEFAULT_MIRROR,
83
+ optimize: bool = DEFAULT_OPTIMIZE,
84
+ max_workers: int = DEFAULT_MAX_WORKERS,
85
+ ):
86
+ # Track which parameters were explicitly provided
87
+ self._explicitly_set = {}
88
+
89
+ if cache_dir is not None:
90
+ self._explicitly_set["cache_dir"] = True
91
+ if mirror != DEFAULT_MIRROR:
92
+ self._explicitly_set["mirror"] = True
93
+ if optimize != DEFAULT_OPTIMIZE:
94
+ self._explicitly_set["optimize"] = True
95
+ if max_workers != DEFAULT_MAX_WORKERS:
96
+ self._explicitly_set["max_workers"] = True
97
+
98
+ # Set the values
99
+ self.cache_dir = cache_dir
100
+ self.mirror = mirror
101
+ self.optimize = optimize
102
+ self.max_workers = max_workers
103
+
104
+ # Apply defaults for unset values
105
+ if self.cache_dir is None:
106
+ self.cache_dir = DEFAULT_CACHE_DIR
65
107
 
66
- def __str__(self) -> str:
67
- """String representation of dependency."""
68
- if self.extras:
69
- return f"{self.name}[{','.join(sorted(self.extras))}]{self.version or ''}"
70
- return f"{self.name}{self.version or ''}"
108
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
109
+
110
+ # Load configuration from file if it exists
111
+ if CONFIG_FILE.exists():
112
+ try:
113
+ config_data = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
114
+ # Update configuration items, but only for those not explicitly set
115
+ for key, value in config_data.items():
116
+ if (
117
+ hasattr(self, key)
118
+ and isinstance(value, type(getattr(self, key)))
119
+ and key not in self._explicitly_set
120
+ ):
121
+ setattr(self, key, value)
122
+ except (json.JSONDecodeError, TypeError, AttributeError) as e:
123
+ logger.warning(f"Could not load config from {CONFIG_FILE}: {e}")
124
+
125
+ def save(self) -> None:
126
+ """Save current configuration to file."""
127
+ CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
128
+ config_dict = {
129
+ "cache_dir": str(self.cache_dir),
130
+ "mirror": self.mirror,
131
+ "optimize": self.optimize,
132
+ "max_workers": self.max_workers,
133
+ }
134
+ CONFIG_FILE.write_text(json.dumps(config_dict, indent=4), encoding="utf-8")
71
135
 
72
136
 
73
137
  @dataclass
@@ -92,20 +156,6 @@ class PackResult:
92
156
  failed: int
93
157
  packages_dir: str
94
158
  extracted_packages: list[str] = field(default_factory=list)
95
- message: str = ""
96
-
97
-
98
- @dataclass
99
- class BatchPackResult:
100
- """Result of packing multiple projects."""
101
-
102
- success: bool
103
- total: int
104
- successful: int
105
- failed: int
106
- failed_projects: list[str] = field(default_factory=list)
107
- output_dir: str = ""
108
- total_time: float = 0.0
109
159
 
110
160
 
111
161
  @dataclass
@@ -154,7 +204,7 @@ class OptimizationRule:
154
204
  exclude_patterns: list[str] = field(default_factory=list)
155
205
  include_patterns: list[str] = field(default_factory=list)
156
206
 
157
- def __post_init__(self):
207
+ def __post_init__(self) -> None:
158
208
  """Compile regex patterns after initialization."""
159
209
  self.exclude_compiled: list[Pattern] = [
160
210
  re.compile(p) for p in self.exclude_patterns
@@ -164,6 +214,7 @@ class OptimizationRule:
164
214
  ]
165
215
 
166
216
 
217
+ @dataclass(frozen=False)
167
218
  class SelectiveExtractionStrategy:
168
219
  """Optimization strategy that applies inclusion/exclusion rules to specific libraries.
169
220
 
@@ -174,7 +225,7 @@ class SelectiveExtractionStrategy:
174
225
  """
175
226
 
176
227
  # Universal exclusion patterns - applied to all libraries
177
- UNIVERSAL_EXCLUDE_PATTERNS = frozenset({
228
+ UNIVERSAL_EXCLUDE_PATTERNS: ClassVar[frozenset[str]] = frozenset({
178
229
  "doc",
179
230
  "docs",
180
231
  "test",
@@ -223,8 +274,12 @@ class SelectiveExtractionStrategy:
223
274
  for pattern in self.UNIVERSAL_EXCLUDE_PATTERNS
224
275
  ]
225
276
 
226
- def _setup_default_rules(self):
227
- """Setup default optimization rules for common libraries."""
277
+ def _setup_default_rules(self) -> None:
278
+ """Setup default optimization rules for common libraries.
279
+
280
+ This method loads JSON rule files from the rules directory and
281
+ creates OptimizationRule objects for common libraries.
282
+ """
228
283
  # Get the rules directory
229
284
  rules_dir = Path(__file__).parent / "rules"
230
285
 
@@ -359,12 +414,13 @@ def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
359
414
  Returns:
360
415
  True if should skip, False otherwise
361
416
  """
362
- req_lower = req_name.lower()
363
-
364
417
  # Skip extras
365
418
  if has_extras:
366
419
  return True
367
420
 
421
+ req_lower = req_name.lower()
422
+ normalized_req = req_lower.replace("-", "_")
423
+
368
424
  # Skip dev/test/docs/lint/example patterns
369
425
  if any(keyword in req_lower for keyword in DEV_PATTERNS):
370
426
  return True
@@ -374,23 +430,141 @@ def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
374
430
  return True
375
431
 
376
432
  # Skip common dev tools
377
- return req_lower.replace("-", "_") in DEV_TOOLS
433
+ return normalized_req in DEV_TOOLS
378
434
 
379
435
 
436
+ @dataclass(frozen=False)
380
437
  class LibraryCache:
381
438
  """Manage local cache for Python packages."""
382
439
 
383
- def __init__(self, cache_dir: Path | None = None):
384
- """Initialize cache manager.
440
+ cache_dir: Path = field(default_factory=lambda: DEFAULT_CACHE_DIR)
441
+ _dependencies_cache: dict[Path, set[str]] = field(default_factory=dict)
442
+
443
+ def __post_init__(self) -> None:
444
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
445
+
446
+ @cached_property
447
+ def metadata_file(self) -> Path:
448
+ return self.cache_dir / "metadata.json"
449
+
450
+ @cached_property
451
+ def wheel_files(self) -> list[Path]:
452
+ return list(self.cache_dir.glob("*.whl"))
453
+
454
+ @cached_property
455
+ def sdist_files(self) -> list[Path]:
456
+ return list(self.cache_dir.glob("*.tar.gz")) + list(
457
+ self.cache_dir.glob("*.zip")
458
+ )
459
+
460
+ def collect_dependencies_from_list(self, dependency_list: list[str]) -> set[str]:
461
+ """Recursively collect all dependencies from package files (wheel or sdist).
385
462
 
386
463
  Args:
387
- cache_dir: Cache directory path (default: ~/.pysfi/.cache/pylibpack)
464
+ dependency_list: List of root package names to start from
465
+
466
+ Returns:
467
+ Set of all required package names (normalized)
388
468
  """
389
- self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
390
- self.cache_dir.mkdir(parents=True, exist_ok=True)
391
- self.metadata_file = self.cache_dir / "metadata.json"
392
- # In-memory cache for extracted dependencies to avoid repeated IO
393
- self._dependencies_cache: dict[Path, set[str]] = {}
469
+ all_packages: set[str] = set()
470
+ visited: set[str] = set()
471
+ visit_stack: dict[str, int] = {} # Track visit depth for cycle detection
472
+
473
+ def visit(pkg_name: str, level: int = 0) -> None:
474
+ """Visit a package and collect its dependencies."""
475
+ # Normalize package name for consistency
476
+ normalized_pkg_name = pkg_name.lower().replace("-", "_")
477
+
478
+ # Check for cycles
479
+ if normalized_pkg_name in visit_stack:
480
+ logger.warning(
481
+ f"Potential circular dependency detected: {normalized_pkg_name} (current depth: {level}, "
482
+ f"previous depth: {visit_stack[normalized_pkg_name]})"
483
+ )
484
+ return
485
+
486
+ # Check depth limit
487
+ if level > MAX_DEPTH:
488
+ logger.warning(
489
+ f"Maximum dependency depth ({MAX_DEPTH}) reached for {normalized_pkg_name}, stopping recursion"
490
+ )
491
+ return
492
+
493
+ # Skip if already visited
494
+ if normalized_pkg_name in visited:
495
+ return
496
+
497
+ # Mark as visited and track depth
498
+ visited.add(normalized_pkg_name)
499
+ visit_stack[normalized_pkg_name] = level
500
+ all_packages.add(normalized_pkg_name)
501
+
502
+ # Process dependencies if package exists in map
503
+ package_path = self.package_map.get(normalized_pkg_name)
504
+ if package_path:
505
+ deps = self._extract_dependencies_from_wheel(package_path)
506
+ logger.debug(f"{' ' * level}{normalized_pkg_name} -> {deps}")
507
+ for dep in deps:
508
+ visit(dep, level + 1)
509
+
510
+ # Remove from stack when done
511
+ visit_stack.pop(normalized_pkg_name, None)
512
+
513
+ for pkg_name in dependency_list:
514
+ visit(pkg_name)
515
+
516
+ logger.info(f"Collected {len(all_packages)} packages: {all_packages}")
517
+ return all_packages
518
+
519
+ @cached_property
520
+ def package_map(self) -> dict[str, Path]:
521
+ """Create a mapping of package names to their file paths with improved efficiency."""
522
+ packages: dict[str, Path] = {}
523
+
524
+ # Process wheel files first (they take precedence)
525
+ for wheel_file in self.wheel_files:
526
+ pkg_name = self._extract_package_name_from_wheel(wheel_file)
527
+ if pkg_name:
528
+ normalized_pkg_name = normalize_package_name(pkg_name)
529
+ packages[normalized_pkg_name] = wheel_file
530
+
531
+ # Add sdist files only if the package isn't already in the map
532
+ for sdist_file in self.sdist_files:
533
+ pkg_name = self._extract_package_name_from_sdist(sdist_file)
534
+ if pkg_name:
535
+ normalized_pkg_name = normalize_package_name(pkg_name)
536
+ if normalized_pkg_name not in packages:
537
+ packages[normalized_pkg_name] = sdist_file
538
+
539
+ return packages
540
+
541
+ @cached_property
542
+ def cache_size(self) -> int:
543
+ """Calculate total size of cache in bytes."""
544
+ if not self.cache_dir.exists():
545
+ return 0
546
+
547
+ # Use generator expression for memory efficiency
548
+ return sum(
549
+ file_path.stat().st_size
550
+ for file_path in self.cache_dir.rglob("*")
551
+ if file_path.is_file()
552
+ )
553
+
554
+ @cached_property
555
+ def package_count(self) -> int:
556
+ """Get the count of packages in cache."""
557
+ return len(self.package_map)
558
+
559
+ @cached_property
560
+ def cache_stats(self) -> dict[str, int]:
561
+ """Get detailed cache statistics."""
562
+ return {
563
+ "total_packages": self.package_count,
564
+ "wheel_count": len(self.wheel_files),
565
+ "sdist_count": len(self.sdist_files),
566
+ "cache_size_bytes": self.cache_size,
567
+ }
394
568
 
395
569
  def get_package_path(
396
570
  self, package_name: str, version: str | None = None
@@ -404,29 +578,31 @@ class LibraryCache:
404
578
  Returns:
405
579
  Path to cached package or None
406
580
  """
581
+ normalized_name = normalize_package_name(package_name)
582
+
407
583
  # First try filesystem lookup for wheel files (works even if metadata is missing)
408
584
  for whl_file in self.cache_dir.glob("*.whl"):
409
585
  parsed_name = self._extract_package_name_from_wheel(whl_file)
410
- if parsed_name == package_name:
586
+ if parsed_name == normalized_name:
411
587
  logger.debug(f"Cache hit (filesystem wheel): {package_name}")
412
588
  return whl_file
413
589
 
414
590
  # Try filesystem lookup for sdist files (.tar.gz, .zip)
415
591
  for sdist_file in self.cache_dir.glob("*.tar.gz"):
416
592
  parsed_name = self._extract_package_name_from_sdist(sdist_file)
417
- if parsed_name == package_name:
593
+ if parsed_name == normalized_name:
418
594
  logger.debug(f"Cache hit (filesystem sdist): {package_name}")
419
595
  return sdist_file
420
596
  for sdist_file in self.cache_dir.glob("*.zip"):
421
597
  parsed_name = self._extract_package_name_from_sdist(sdist_file)
422
- if parsed_name == package_name:
598
+ if parsed_name == normalized_name:
423
599
  logger.debug(f"Cache hit (filesystem sdist): {package_name}")
424
600
  return sdist_file
425
601
 
426
602
  # Fallback to metadata lookup
427
603
  metadata = self._load_metadata()
428
604
  for info in metadata.values():
429
- if info["name"] == package_name and (
605
+ if info["name"] == normalized_name and (
430
606
  version is None or info.get("version") == version
431
607
  ):
432
608
  path = self.cache_dir / info["path"]
@@ -499,7 +675,7 @@ class LibraryCache:
499
675
  """Extract dependencies from wheel METADATA file with caching.
500
676
 
501
677
  Args:
502
- wheel_file: Path to wheel file
678
+ wheel_file: Path to wheel or sdist file
503
679
 
504
680
  Returns:
505
681
  Set of package names (normalized)
@@ -514,47 +690,26 @@ class LibraryCache:
514
690
  self._dependencies_cache[wheel_file] = dependencies
515
691
  return dependencies
516
692
 
517
- try:
518
- import re
519
- import zipfile
693
+ # Early return if wheel file doesn't exist
694
+ if not wheel_file.exists():
695
+ logger.warning(f"Wheel file does not exist: {wheel_file}")
696
+ self._dependencies_cache[wheel_file] = set()
697
+ return set()
520
698
 
521
- dependencies: set[str] = set()
699
+ try:
522
700
  with zipfile.ZipFile(wheel_file, "r") as zf:
701
+ # Find metadata file
523
702
  metadata_files = [
524
703
  name for name in zf.namelist() if name.endswith("METADATA")
525
704
  ]
526
- if not metadata_files:
527
- self._dependencies_cache[wheel_file] = dependencies
528
- return dependencies
529
-
530
- metadata_content = zf.read(metadata_files[0]).decode(
531
- "utf-8", errors="ignore"
532
- )
533
-
534
- # Parse dependencies from METADATA
535
- for line in metadata_content.splitlines():
536
- if line.startswith("Requires-Dist:"):
537
- dep_str = line.split(":", 1)[1].strip()
538
705
 
539
- # Skip extras dependencies
540
- if re.search(
541
- r'extra\s*==\s*["\']?([^"\';\s]+)["\']?',
542
- dep_str,
543
- re.IGNORECASE,
544
- ):
545
- logger.debug(f"Skipping extra dependency: {dep_str}")
546
- continue
547
-
548
- try:
549
- from packaging.requirements import Requirement
550
-
551
- req = Requirement(dep_str)
552
- if not should_skip_dependency(req.name, bool(req.extras)):
553
- dep_name = normalize_package_name(req.name)
554
- dependencies.add(dep_name)
555
- logger.debug(f"Found core dependency: {dep_name}")
556
- except Exception:
557
- pass
706
+ if not metadata_files:
707
+ dependencies = set()
708
+ else:
709
+ metadata_content = zf.read(metadata_files[0]).decode(
710
+ "utf-8", errors="ignore"
711
+ )
712
+ dependencies = self._parse_metadata_content(metadata_content)
558
713
 
559
714
  # Cache the result
560
715
  self._dependencies_cache[wheel_file] = dependencies
@@ -583,9 +738,7 @@ class LibraryCache:
583
738
  with tarfile.open(sdist_file, "r:gz") as tf:
584
739
  for member in tf.getmembers():
585
740
  # Look for PKG-INFO or METADATA file in the root of the package
586
- if member.name.endswith("PKG-INFO") or member.name.endswith(
587
- "METADATA"
588
- ):
741
+ if member.name.endswith(("PKG-INFO", "METADATA")):
589
742
  # Only use PKG-INFO/METADATA files in the root directory
590
743
  # Count the number of slashes in the path
591
744
  path_parts = member.name.split("/")
@@ -610,7 +763,7 @@ class LibraryCache:
610
763
  with zipfile.ZipFile(sdist_file, "r") as zf:
611
764
  for name in zf.namelist():
612
765
  # Look for PKG-INFO or METADATA file in the root of the package
613
- if name.endswith("PKG-INFO") or name.endswith("METADATA"):
766
+ if name.endswith(("PKG-INFO", "METADATA")):
614
767
  path_parts = name.split("/")
615
768
  if len(path_parts) == 2 or (
616
769
  len(path_parts) == 3
@@ -643,8 +796,6 @@ class LibraryCache:
643
796
  Returns:
644
797
  Set of package names (normalized)
645
798
  """
646
- import re
647
-
648
799
  dependencies: set[str] = set()
649
800
  try:
650
801
  for line in metadata_content.splitlines():
@@ -680,8 +831,6 @@ class LibraryCache:
680
831
  Returns:
681
832
  Set containing the normalized package name, or empty set if should skip
682
833
  """
683
- import re
684
-
685
834
  try:
686
835
  # Skip extras dependencies
687
836
  if re.search(
@@ -763,6 +912,14 @@ class LibraryCache:
763
912
  with open(self.metadata_file, "w", encoding="utf-8") as f:
764
913
  json.dump(metadata, f, indent=2)
765
914
 
915
+ @staticmethod
916
+ def _should_skip_dist_info(file_path: Path) -> bool:
917
+ """Check if the file path should be skipped because it's a dist-info directory."""
918
+ if file_path.name.endswith(".dist-info"):
919
+ return True
920
+ # Check if any parent directory ends with .dist-info
921
+ return any(part.endswith(".dist-info") for part in file_path.parts)
922
+
766
923
  def clear_cache(self) -> None:
767
924
  """Clear all cached packages."""
768
925
  if self.cache_dir.exists():
@@ -772,31 +929,21 @@ class LibraryCache:
772
929
  logger.info("Cache cleared")
773
930
 
774
931
 
932
+ @dataclass(frozen=True)
775
933
  class LibraryDownloader:
776
934
  """Download Python packages from PyPI."""
777
935
 
778
- def __init__(
779
- self,
780
- cache: LibraryCache,
781
- python_version: str | None = None,
782
- mirror: str = "pypi",
783
- ):
784
- """Initialize downloader.
936
+ parent: PyLibPacker
937
+ cache: LibraryCache
938
+ _mirror: str = "pypi"
785
939
 
786
- Args:
787
- cache: Cache manager
788
- python_version: Target Python version for platform-specific packages
789
- mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
790
- """
791
- self.cache = cache
792
- self.python_version = (
793
- python_version or f"{sys.version_info.major}.{sys.version_info.minor}"
794
- )
795
- self.platform_name = (
796
- platform.system().lower() + "_" + platform.machine().lower()
797
- )
798
- self.mirror_url = PYPI_MIRRORS.get(mirror, PYPI_MIRRORS["pypi"])
799
- self.pip_executable = self._find_pip_executable()
940
+ @cached_property
941
+ def mirror_url(self) -> str:
942
+ return PYPI_MIRRORS.get(self._mirror, PYPI_MIRRORS["pypi"])
943
+
944
+ @cached_property
945
+ def pip_executable(self) -> str | None:
946
+ return self._find_pip_executable()
800
947
 
801
948
  @staticmethod
802
949
  def _find_pip_executable() -> str | None:
@@ -808,7 +955,7 @@ class LibraryDownloader:
808
955
 
809
956
  Args:
810
957
  dep: Dependency to download
811
- dest_dir: Destination directory
958
+ dest_dir: Destination directory (typically cache_dir)
812
959
 
813
960
  Returns:
814
961
  Path to downloaded package file (wheel or sdist) or None
@@ -865,54 +1012,45 @@ class LibraryDownloader:
865
1012
 
866
1013
  return None
867
1014
 
868
- def download_packages(
869
- self,
870
- dependencies: list[Dependency],
871
- dest_dir: Path,
872
- max_workers: int = 4,
873
- ) -> DownloadResult:
1015
+ def download_packages(self, project: Project) -> DownloadResult:
874
1016
  """Download multiple packages concurrently.
875
1017
 
876
1018
  Args:
877
- dependencies: List of dependencies to download
878
- dest_dir: Destination directory
879
- max_workers: Maximum concurrent downloads
1019
+ project: Project containing dependencies to download
880
1020
 
881
1021
  Returns:
882
1022
  DownloadResult containing download statistics
883
1023
  """
884
- dest_dir.mkdir(parents=True, exist_ok=True)
885
-
886
- # Use list of tuples for thread-safe result collection
887
- # Tuple format: (package_name, success_flag)
888
1024
  results_list: list[tuple[str, bool]] = []
889
1025
  cached_count = 0
890
1026
  cached_packages: set[str] = set() # Track cached package names efficiently
891
1027
 
1028
+ dependencies = project.converted_dependencies
892
1029
  logger.info(f"Total direct dependencies: {len(dependencies)}")
893
1030
  logger.info(f"Using mirror: {self.mirror_url}")
894
1031
 
895
1032
  # Check cache and mark cached packages (single-threaded, safe)
896
1033
  for dep in dependencies:
897
1034
  if self.cache.get_package_path(dep.name, dep.version):
898
- normalized_dep_name = normalize_package_name(dep.name)
899
- results_list.append((normalized_dep_name, True))
900
- cached_packages.add(normalized_dep_name)
1035
+ results_list.append((dep.name, True))
1036
+ cached_packages.add(dep.name)
901
1037
  cached_count += 1
902
1038
  logger.info(f"Using cached package: {dep}")
903
1039
 
904
1040
  # Download remaining packages concurrently
905
1041
  remaining_deps = [
906
- dep
907
- for dep in dependencies
908
- if normalize_package_name(dep.name) not in cached_packages
1042
+ dep for dep in dependencies if dep.name not in cached_packages
909
1043
  ]
910
1044
  downloaded_count = 0
911
1045
 
912
1046
  if remaining_deps:
913
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
1047
+ with ThreadPoolExecutor(
1048
+ max_workers=self.parent.config.max_workers
1049
+ ) as executor:
914
1050
  future_to_dep = {
915
- executor.submit(self._download_package, dep, dest_dir): dep
1051
+ executor.submit(
1052
+ self._download_package, dep, self.cache.cache_dir
1053
+ ): dep
916
1054
  for dep in remaining_deps
917
1055
  }
918
1056
 
@@ -920,17 +1058,12 @@ class LibraryDownloader:
920
1058
  dep = future_to_dep[future]
921
1059
  try:
922
1060
  wheel_file = future.result()
923
- normalized_dep_name = normalize_package_name(dep.name)
924
- results_list.append((
925
- normalized_dep_name,
926
- wheel_file is not None,
927
- ))
1061
+ results_list.append((dep.name, wheel_file is not None))
928
1062
  if wheel_file:
929
1063
  downloaded_count += 1
930
1064
  except Exception as e:
931
- normalized_dep_name = normalize_package_name(dep.name)
932
- logger.error(f"Error processing {normalized_dep_name}: {e}")
933
- results_list.append((normalized_dep_name, False))
1065
+ logger.error(f"Error processing {dep.name}: {e}")
1066
+ results_list.append((dep.name, False))
934
1067
 
935
1068
  # Convert to dictionary for final result
936
1069
  results = dict(results_list)
@@ -948,36 +1081,58 @@ class LibraryDownloader:
948
1081
  )
949
1082
 
950
1083
 
951
- class PyLibPack:
1084
+ @dataclass(frozen=True)
1085
+ class PyLibPacker:
952
1086
  """Main library packer class."""
953
1087
 
954
- def __init__(
955
- self,
956
- cache_dir: Path | None = None,
957
- python_version: str | None = None,
958
- mirror: str = "pypi",
959
- optimize: bool = True,
960
- optimization_strategy: SelectiveExtractionStrategy | None = None,
961
- ):
962
- """Initialize library packer.
1088
+ working_dir: Path
1089
+ config: PyLibPackerConfig
963
1090
 
964
- Args:
965
- cache_dir: Custom cache directory
966
- python_version: Target Python version
967
- mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
968
- """
969
- self.cache = LibraryCache(cache_dir)
970
- self.downloader = LibraryDownloader(self.cache, python_version, mirror)
1091
+ @cached_property
1092
+ def cache(self) -> LibraryCache:
1093
+ return LibraryCache(cache_dir=self.config.cache_dir or DEFAULT_CACHE_DIR)
971
1094
 
972
- # Set up optimization strategy
973
- self.optimize = optimize
974
- self.optimization_strategy = (
975
- optimization_strategy or SelectiveExtractionStrategy() if optimize else None
1095
+ @cached_property
1096
+ def downloader(self) -> LibraryDownloader:
1097
+ return LibraryDownloader(
1098
+ parent=self,
1099
+ cache=self.cache,
1100
+ _mirror=self.config.mirror,
976
1101
  )
977
1102
 
978
- def pack_project(
979
- self, project: Project, output_dir: Path, max_workers: int = 4
980
- ) -> PackResult:
1103
+ @cached_property
1104
+ def optimization_strategy(self) -> SelectiveExtractionStrategy | None:
1105
+ return SelectiveExtractionStrategy() if self.config.optimize else None
1106
+
1107
+ @cached_property
1108
+ def solution(self) -> Solution:
1109
+ return Solution.from_directory(root_dir=self.working_dir)
1110
+
1111
+ @cached_property
1112
+ def projects(self) -> dict[str, Project]:
1113
+ # Return projects as a dictionary mapping project names to Project objects
1114
+ # This follows the Solution API correctly
1115
+ return {project.name: project for project in self.solution.projects.values()}
1116
+
1117
+ @cached_property
1118
+ def project_count(self) -> int:
1119
+ """Get the count of projects to avoid repeated computation."""
1120
+ return len(self.projects)
1121
+
1122
+ @cached_property
1123
+ def working_dir_size(self) -> int:
1124
+ """Calculate total size of the working directory in bytes."""
1125
+ if not self.working_dir.exists():
1126
+ return 0
1127
+
1128
+ # Use generator expression for memory efficiency
1129
+ return sum(
1130
+ file_path.stat().st_size
1131
+ for file_path in self.working_dir.rglob("*")
1132
+ if file_path.is_file()
1133
+ )
1134
+
1135
+ def pack_project(self, project: Project) -> PackResult:
981
1136
  """Pack dependencies for a single project.
982
1137
 
983
1138
  Args:
@@ -987,79 +1142,71 @@ class PyLibPack:
987
1142
 
988
1143
  Returns:
989
1144
  PackResult containing packing statistics
990
- """
991
- logger.info(f"\n{'=' * 60}")
992
- logger.info(f"Packing dependencies for project: {project.name}")
993
- logger.info(f"{'=' * 60}")
994
-
995
- if not project.dependencies:
996
- logger.warning(f"No dependencies found for {project.name}")
997
- return PackResult(
998
- success=False,
999
- message="No dependencies found",
1000
- project=project.name,
1001
- total=0,
1002
- successful=0,
1003
- failed=0,
1004
- packages_dir=str(output_dir),
1005
- )
1006
1145
 
1007
- logger.info(f"Found {len(project.dependencies)} dependencies")
1008
-
1009
- # Download direct dependencies
1010
- download_result = self.downloader.download_packages(
1011
- project.dependencies,
1012
- self.cache.cache_dir,
1013
- max_workers=max_workers,
1014
- )
1015
-
1016
- # Build package map (including both wheel and sdist files) and collect all required packages recursively
1017
- package_map: dict[str, Path] = {}
1018
-
1019
- # Add wheel files to package map
1020
- for wheel_file in self.cache.cache_dir.glob("*.whl"):
1021
- pkg_name = self.cache._extract_package_name_from_wheel(wheel_file)
1022
- if pkg_name and pkg_name not in package_map: # Prefer wheel files
1023
- normalized_pkg_name = normalize_package_name(pkg_name)
1024
- package_map[normalized_pkg_name] = wheel_file
1025
-
1026
- # Add sdist files to package map (only if not already present)
1027
- for sdist_file in self.cache.cache_dir.glob("*.tar.gz"):
1028
- pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
1029
- if pkg_name and normalize_package_name(pkg_name) not in package_map:
1030
- normalized_pkg_name = normalize_package_name(pkg_name)
1031
- package_map[normalized_pkg_name] = sdist_file
1146
+ Raises:
1147
+ ValueError: If project has invalid configuration
1148
+ RuntimeError: If packing fails due to system issues
1149
+ """
1150
+ logger.info(f"{120 * '='}")
1151
+ logger.info(f"Packing dependencies for project: `{project.name}`")
1032
1152
 
1033
- for sdist_file in self.cache.cache_dir.glob("*.zip"):
1034
- pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
1035
- if pkg_name and normalize_package_name(pkg_name) not in package_map:
1036
- normalized_pkg_name = normalize_package_name(pkg_name)
1037
- package_map[normalized_pkg_name] = sdist_file
1153
+ download_result = self.downloader.download_packages(project)
1038
1154
 
1039
1155
  # Recursively collect all dependencies (pass cache instance for dependency extraction)
1040
- all_packages = self._collect_all_dependencies(
1041
- package_map, list(download_result.results), self.cache
1156
+ all_packages = self.cache.collect_dependencies_from_list(
1157
+ list(download_result.results)
1042
1158
  )
1043
1159
 
1044
1160
  # Extract all required packages (keep order of dependency resolution)
1045
1161
  extracted_packages = []
1046
1162
  for pkg_name in all_packages:
1047
- if pkg_name in package_map:
1163
+ logger.info(f"Processing {pkg_name}")
1164
+ if pkg_name in self.cache.package_map:
1048
1165
  # Skip if output directory already exists
1049
- output_pkg_dir = output_dir / pkg_name
1166
+ output_pkg_dir = project.lib_dir / pkg_name
1050
1167
  if output_pkg_dir.exists():
1051
1168
  logger.warning(f"Output directory already exists: {output_pkg_dir}")
1052
1169
  continue
1053
1170
 
1054
- package_file = package_map[pkg_name]
1171
+ package_file = self.cache.package_map[pkg_name]
1055
1172
  logger.info(f"Extracting {package_file.name}...")
1056
- self._extract_package(package_file, output_dir, pkg_name)
1173
+ self._extract_package(package_file, project.lib_dir, pkg_name)
1057
1174
  extracted_packages.append(pkg_name)
1058
1175
  logger.info(f"Extracted {pkg_name}")
1176
+ else:
1177
+ logger.warning(f"Package not found in cache: {pkg_name}")
1178
+ # Attempt to download the missing package
1179
+ logger.info(f"Attempting to download missing package: {pkg_name}")
1180
+
1181
+ # Create a temporary dependency object for the missing package
1182
+ missing_dep = Dependency(
1183
+ name=pkg_name, version=None, extras=set(), requires=set()
1184
+ )
1185
+
1186
+ # Try to download the missing package
1187
+ download_result = self.downloader._download_package(
1188
+ missing_dep, self.cache.cache_dir
1189
+ )
1190
+ if download_result:
1191
+ logger.info(f"Successfully downloaded missing package: {pkg_name}")
1192
+ # Now check again if it's in the cache and extract if available
1193
+ if pkg_name in self.cache.package_map:
1194
+ package_file = self.cache.package_map[pkg_name]
1195
+ logger.info(f"Extracting {package_file.name}...")
1196
+ self._extract_package(package_file, project.lib_dir, pkg_name)
1197
+ extracted_packages.append(pkg_name)
1198
+ logger.info(f"Extracted {pkg_name}")
1199
+ else:
1200
+ logger.error(
1201
+ f"Package {pkg_name} still not found in cache after download attempt"
1202
+ )
1203
+ else:
1204
+ logger.error(f"Failed to download missing package: {pkg_name}")
1059
1205
 
1060
1206
  logger.info(
1061
1207
  f"Pack complete for {project.name}: {download_result.successful}/{download_result.total}"
1062
1208
  )
1209
+ logger.info(f"{120 * '='}")
1063
1210
 
1064
1211
  return PackResult(
1065
1212
  success=download_result.successful > 0,
@@ -1067,76 +1214,10 @@ class PyLibPack:
1067
1214
  total=download_result.total,
1068
1215
  successful=download_result.successful,
1069
1216
  failed=download_result.total - download_result.successful,
1070
- packages_dir=str(output_dir),
1217
+ packages_dir=str(project.lib_dir),
1071
1218
  extracted_packages=extracted_packages,
1072
1219
  )
1073
1220
 
1074
- @staticmethod
1075
- def _collect_all_dependencies(
1076
- package_map: dict[str, Path], root_packages: list[str], cache: LibraryCache
1077
- ) -> set[str]:
1078
- """Recursively collect all dependencies from package files (wheel or sdist).
1079
-
1080
- Args:
1081
- package_map: Mapping of package names to package files (wheel or sdist)
1082
- root_packages: List of root package names to start from
1083
- cache: LibraryCache instance for extracting dependencies
1084
-
1085
- Returns:
1086
- List of all required package names
1087
- """
1088
- all_packages: set[str] = set()
1089
- visited: set[str] = set()
1090
- visit_stack: dict[str, int] = {} # Track visit depth for cycle detection
1091
-
1092
- def visit(pkg_name: str, level: int = 0) -> None:
1093
- """Visit a package and collect its dependencies."""
1094
- # Normalize package name for consistency
1095
- normalized_pkg_name = normalize_package_name(pkg_name)
1096
-
1097
- # Check for cycles
1098
- if normalized_pkg_name in visit_stack:
1099
- logger.warning(
1100
- f"Potential circular dependency detected: {normalized_pkg_name} (current depth: {level}, "
1101
- f"previous depth: {visit_stack[normalized_pkg_name]})"
1102
- )
1103
- return
1104
-
1105
- # Check depth limit
1106
- if level > MAX_DEPTH:
1107
- logger.warning(
1108
- f"Maximum dependency depth ({MAX_DEPTH}) reached for {normalized_pkg_name}, stopping recursion"
1109
- )
1110
- return
1111
-
1112
- if normalized_pkg_name in visited:
1113
- return
1114
-
1115
- # Mark as visited and track depth
1116
- visited.add(normalized_pkg_name)
1117
- visit_stack[normalized_pkg_name] = level
1118
- all_packages.add(normalized_pkg_name)
1119
-
1120
- if normalized_pkg_name in package_map:
1121
- deps = cache._extract_dependencies_from_wheel(
1122
- package_map[normalized_pkg_name]
1123
- )
1124
- logger.debug(f"{' ' * level}{normalized_pkg_name} -> {deps}")
1125
- for dep in deps:
1126
- visit(dep, level + 1)
1127
-
1128
- # Remove from stack when done
1129
- visit_stack.pop(normalized_pkg_name, None)
1130
-
1131
- for pkg_name in root_packages:
1132
- visit(pkg_name)
1133
-
1134
- logger.info(
1135
- f"Collected {len(all_packages)} packages (including recursive dependencies)"
1136
- )
1137
- logger.info(f"Packages: {all_packages}")
1138
- return all_packages
1139
-
1140
1221
  def _build_and_cache_wheel(self, sdist_file: Path, package_name: str) -> None:
1141
1222
  """Build wheel from sdist file and cache it for faster future access.
1142
1223
 
@@ -1199,181 +1280,175 @@ class PyLibPack:
1199
1280
  )
1200
1281
 
1201
1282
  # Handle sdist files (.tar.gz or .zip) - install using pip, and build wheel for cache
1202
- if package_file.suffix == ".gz" or package_file.suffix == ".zip":
1203
- logger.info(f"Installing sdist file for {package_name} using pip...")
1204
-
1205
- # Use pip install --target to install sdist to temporary directory
1206
- with tempfile.TemporaryDirectory() as temp_install_dir:
1207
- result = subprocess.run(
1208
- [
1209
- self.downloader.pip_executable or "pip",
1210
- "install",
1211
- "--target",
1212
- temp_install_dir,
1213
- "--no-deps", # Don't install dependencies (we handle them separately)
1214
- "--no-cache-dir",
1215
- str(package_file),
1216
- ],
1217
- capture_output=True,
1218
- text=True,
1219
- check=False,
1220
- )
1283
+ if package_file.suffix in (".gz", ".zip"):
1284
+ self._handle_sdist_extraction(package_file, dest_dir, package_name)
1285
+ return
1221
1286
 
1222
- if result.returncode != 0:
1223
- logger.error(
1224
- f"Failed to install sdist {package_file.name}: {result.stderr}"
1225
- )
1226
- return
1227
-
1228
- # Copy installed files to dest_dir, skipping *.dist-info directories
1229
- temp_install_path = Path(temp_install_dir)
1230
- for item in temp_install_path.iterdir():
1231
- # Skip dist-info directories
1232
- if item.name.endswith(".dist-info"):
1233
- logger.debug(f"Skipping dist-info directory: {item.name}")
1234
- continue
1235
- dest_path = dest_dir / item.name
1236
- if item.is_dir():
1237
- if dest_path.exists():
1238
- shutil.rmtree(dest_path)
1239
- shutil.copytree(item, dest_path)
1240
- else:
1241
- shutil.copy2(item, dest_path)
1287
+ # Handle wheel files with optional optimization
1288
+ self._handle_wheel_extraction(package_file, dest_dir, package_name)
1242
1289
 
1243
- logger.info(
1244
- f"Installed sdist file for {package_name} to site-packages structure"
1290
+ def _handle_sdist_extraction(
1291
+ self, package_file: Path, dest_dir: Path, package_name: str
1292
+ ) -> None:
1293
+ """Handle extraction of sdist files (.tar.gz or .zip)."""
1294
+ logger.info(f"Installing sdist file for {package_name} using pip...")
1295
+
1296
+ # Use pip install --target to install sdist to temporary directory
1297
+ with tempfile.TemporaryDirectory() as temp_install_dir:
1298
+ result = subprocess.run(
1299
+ [
1300
+ self.downloader.pip_executable or "pip",
1301
+ "install",
1302
+ "--target",
1303
+ temp_install_dir,
1304
+ "--no-deps", # Don't install dependencies (we handle them separately)
1305
+ "--no-cache-dir",
1306
+ str(package_file),
1307
+ ],
1308
+ capture_output=True,
1309
+ text=True,
1310
+ check=False,
1311
+ )
1312
+
1313
+ if result.returncode != 0:
1314
+ logger.error(
1315
+ f"Failed to install sdist {package_file.name}: {result.stderr}"
1245
1316
  )
1317
+ return
1246
1318
 
1247
- # Build wheel from sdist and cache it for faster future access
1248
- logger.info(f"Building wheel from sdist for {package_name}...")
1249
- self._build_and_cache_wheel(package_file, package_name)
1250
- return
1319
+ # Copy installed files to dest_dir, skipping *.dist-info directories
1320
+ temp_install_path = Path(temp_install_dir)
1321
+ # Pre-compute dist-info suffix
1322
+ dist_info_suffix = ".dist-info"
1251
1323
 
1252
- # Handle wheel files with optional optimization
1324
+ for item in temp_install_path.iterdir():
1325
+ # Skip dist-info directories
1326
+ if item.name.endswith(dist_info_suffix):
1327
+ logger.debug(f"Skipping dist-info directory: {item.name}")
1328
+ continue
1329
+ dest_path = dest_dir / item.name
1330
+ if item.is_dir():
1331
+ if dest_path.exists():
1332
+ shutil.rmtree(dest_path)
1333
+ shutil.copytree(item, dest_path)
1334
+ else:
1335
+ shutil.copy2(item, dest_path)
1336
+
1337
+ logger.info(
1338
+ f"Installed sdist file for {package_name} to site-packages structure"
1339
+ )
1340
+
1341
+ # Build wheel from sdist and cache it for faster future access
1342
+ logger.info(f"Building wheel from sdist for {package_name}...")
1343
+ self._build_and_cache_wheel(package_file, package_name)
1344
+
1345
+ def _handle_wheel_extraction(
1346
+ self, package_file: Path, dest_dir: Path, package_name: str
1347
+ ) -> None:
1348
+ """Handle extraction of wheel files with optional optimization."""
1253
1349
  with zipfile.ZipFile(package_file, "r") as zf:
1254
- if self.optimize and self.optimization_strategy:
1255
- # Apply optimization strategy - selectively extract files
1256
- extracted_count = 0
1257
- skipped_count = 0
1258
-
1259
- for file_info in zf.filelist:
1260
- file_path = Path(file_info.filename)
1261
- # Skip dist-info directories
1262
- if file_path.name.endswith(".dist-info") or any(
1263
- parent.endswith(".dist-info") for parent in file_path.parts
1264
- ):
1265
- logger.debug(f"Skipping dist-info: {file_info.filename}")
1266
- skipped_count += 1
1267
- continue
1268
- if self.optimization_strategy.should_extract_file(
1269
- package_name, file_path
1270
- ):
1271
- zf.extract(file_info, dest_dir)
1272
- extracted_count += 1
1273
- logger.debug(f"Extracted {file_path} from {package_name}")
1274
- else:
1275
- skipped_count += 1
1276
- logger.debug(
1277
- f"Skipped {file_path} from {package_name} (filtered by optimization strategy)"
1278
- )
1350
+ if self.config.optimize and self.optimization_strategy:
1351
+ self._extract_with_optimization(zf, dest_dir, package_name)
1352
+ else:
1353
+ self._extract_without_optimization(zf, dest_dir, package_name)
1279
1354
 
1280
- logger.info(
1281
- f"Extraction complete for {package_name}: {extracted_count} extracted, {skipped_count} skipped"
1282
- )
1355
+ def _extract_with_optimization(
1356
+ self, zf: zipfile.ZipFile, dest_dir: Path, package_name: str
1357
+ ) -> None:
1358
+ """Extract wheel with optimization strategy applied."""
1359
+ extracted_count = 0
1360
+ skipped_count = 0
1361
+
1362
+ assert self.optimization_strategy is not None, "Optimization strategy is None"
1363
+ should_extract = self.optimization_strategy.should_extract_file
1364
+
1365
+ for file_info in zf.filelist:
1366
+ file_path = Path(file_info.filename)
1367
+
1368
+ # Skip dist-info directories
1369
+ if LibraryCache._should_skip_dist_info(
1370
+ file_path
1371
+ ): # Use LibraryCache method
1372
+ logger.debug(f"Skipping dist-info: {file_info.filename}")
1373
+ skipped_count += 1
1374
+ continue
1375
+
1376
+ if should_extract(package_name, file_path):
1377
+ zf.extract(file_info, dest_dir)
1378
+ extracted_count += 1
1379
+ logger.debug(f"Extracted {file_path} from {package_name}")
1283
1380
  else:
1284
- # Extract all files without optimization, but skip dist-info directories
1285
- for file_info in zf.filelist:
1286
- file_path = Path(file_info.filename)
1287
- # Skip dist-info directories
1288
- if file_path.name.endswith(".dist-info") or any(
1289
- parent.endswith(".dist-info") for parent in file_path.parts
1290
- ):
1291
- logger.debug(f"Skipping dist-info: {file_info.filename}")
1292
- continue
1293
- zf.extract(file_info, dest_dir)
1294
- logger.info(
1295
- f"All files extracted for {package_name} (no optimization applied, dist-info skipped)"
1381
+ skipped_count += 1
1382
+ logger.debug(
1383
+ f"Skipped {file_path} from {package_name} (filtered by optimization strategy)"
1296
1384
  )
1297
1385
 
1298
- def pack(
1299
- self,
1300
- working_dir: Path,
1301
- max_workers: int = 4,
1302
- ) -> BatchPackResult:
1303
- """Pack project dependencies from base directory.
1386
+ logger.info(
1387
+ f"Extraction complete for {package_name}: {extracted_count} extracted, {skipped_count} skipped"
1388
+ )
1304
1389
 
1305
- Args:
1306
- base_dir: Base directory containing projects or a single project
1307
- output_dir: Output directory (default: base_dir/dist/site-packages)
1308
- max_workers: Maximum concurrent downloads
1390
+ def _extract_without_optimization(
1391
+ self, zf: zipfile.ZipFile, dest_dir: Path, package_name: str
1392
+ ) -> None:
1393
+ """Extract wheel without optimization, but skip dist-info directories."""
1394
+ # Pre-compute the skip function
1395
+ should_skip = LibraryCache._should_skip_dist_info # Use LibraryCache method
1396
+
1397
+ for file_info in zf.filelist:
1398
+ file_path = Path(file_info.filename)
1399
+ # Skip dist-info directories
1400
+ if not should_skip(file_path):
1401
+ zf.extract(file_info, dest_dir)
1402
+ logger.info(
1403
+ f"All files extracted for {package_name} (no optimization applied, dist-info skipped)"
1404
+ )
1309
1405
 
1310
- Returns:
1311
- BatchPackResult containing batch packing statistics
1312
- """
1313
- output_dir = working_dir / "dist" / "site-packages"
1314
- logger.info(f"Starting dependency pack for: {working_dir}")
1315
-
1316
- projects = Solution.from_directory(root_dir=working_dir).projects
1317
- if not projects:
1318
- logger.error("Failed to load project information")
1319
- return BatchPackResult(
1320
- success=False,
1321
- total=0,
1322
- successful=0,
1323
- failed=0,
1324
- output_dir=str(output_dir),
1325
- total_time=0.0,
1326
- )
1406
+ @staticmethod
1407
+ def _should_skip_dist_info(file_path: Path) -> bool:
1408
+ """Check if the file path should be skipped because it's a dist-info directory."""
1409
+ if file_path.name.endswith(".dist-info"):
1410
+ return True
1411
+ # Check if any parent directory ends with .dist-info
1412
+ return any(part.endswith(".dist-info") for part in file_path.parts)
1327
1413
 
1328
- logger.info(f"Found {len(projects)} project(s) to process")
1414
+ def run(self) -> None:
1415
+ """Pack project dependencies from base directory with concurrent processing."""
1416
+ t0 = time.perf_counter()
1417
+ project_count = self.project_count # Use cached property
1329
1418
 
1330
- # Process each project
1331
- total_start = time.perf_counter()
1332
- success_count = 0
1333
- failed_projects: list[str] = []
1334
- use_current_dir = len(projects) == 1
1419
+ logger.info(f"Starting to pack {project_count} projects")
1335
1420
 
1336
- for project in projects.values():
1337
- project_dir = working_dir if use_current_dir else working_dir / project.name
1421
+ if project_count == 1:
1422
+ # Single project: process directly
1423
+ project = next(iter(self.projects.values()))
1424
+ self.pack_project(project)
1425
+ else:
1426
+ # Multiple projects: process concurrently
1427
+ logger.info(f"Packing {project_count} projects concurrently...")
1428
+ with ThreadPoolExecutor(max_workers=self.config.max_workers) as executor:
1429
+ futures = [
1430
+ executor.submit(self.pack_project, project)
1431
+ for project in self.projects.values()
1432
+ ]
1338
1433
 
1339
- if not project_dir.is_dir():
1340
- logger.warning(f"Project directory not found: {project_dir}, skipping")
1341
- failed_projects.append(project.name)
1342
- continue
1434
+ # Wait for all tasks to complete
1435
+ for future in as_completed(futures):
1436
+ try:
1437
+ future.result()
1438
+ except Exception as e:
1439
+ logger.error(f"Project packing failed: {e}")
1343
1440
 
1344
- result = self.pack_project(project, output_dir, max_workers)
1441
+ elapsed_time = time.perf_counter() - t0
1442
+ logger.info(f"Packed {project_count} projects in {elapsed_time:.2f}s")
1345
1443
 
1346
- if result.success:
1347
- success_count += 1
1348
- else:
1349
- failed_projects.append(project.name)
1350
-
1351
- total_time = time.perf_counter() - total_start
1352
-
1353
- # Summary
1354
- logger.info(f"\n{'=' * 60}")
1355
- logger.info("Summary")
1356
- logger.info(f"{'=' * 60}")
1357
- logger.info(f"Total projects: {len(projects)}")
1358
- logger.info(f"Successfully packed: {success_count}")
1359
- logger.info(f"Failed: {len(failed_projects)}")
1360
- if failed_projects:
1361
- logger.info(f"Failed projects: {', '.join(failed_projects)}")
1362
- logger.info(f"Total time: {total_time:.2f}s")
1363
-
1364
- return BatchPackResult(
1365
- success=len(failed_projects) == 0,
1366
- total=len(projects),
1367
- successful=success_count,
1368
- failed=len(failed_projects),
1369
- failed_projects=failed_projects,
1370
- output_dir=str(output_dir),
1371
- total_time=total_time,
1372
- )
1444
+ # Log cache statistics after packing
1445
+ logger.info(f"Cache statistics: {self.cache.cache_stats}")
1373
1446
 
1374
- def clear_cache(self) -> None:
1375
- """Clear the package cache."""
1376
- self.cache.clear_cache()
1447
+ def show_stats(self):
1448
+ logger.info(f"Project count: {self.project_count}")
1449
+ logger.info(f"Total dependencies: {len(self.solution.dependencies)}")
1450
+ logger.info(f"Working directory size: {self.working_dir_size} bytes")
1451
+ logger.info(f"Cache statistics: {self.cache.cache_stats}")
1377
1452
 
1378
1453
 
1379
1454
  def parse_args() -> argparse.Namespace:
@@ -1397,12 +1472,16 @@ def parse_args() -> argparse.Namespace:
1397
1472
  "--python-version", type=str, default=None, help="Target Python version"
1398
1473
  )
1399
1474
  parser.add_argument(
1400
- "-j", "--jobs", type=int, default=4, help="Maximum concurrent downloads"
1475
+ "-j",
1476
+ "--jobs",
1477
+ type=int,
1478
+ default=DEFAULT_MAX_WORKERS,
1479
+ help="Maximum concurrent downloads",
1401
1480
  )
1402
1481
  parser.add_argument(
1403
1482
  "--mirror",
1404
1483
  type=str,
1405
- default="aliyun",
1484
+ default=DEFAULT_MIRROR,
1406
1485
  choices=("pypi", "tsinghua", "aliyun", "ustc", "douban", "tencent"),
1407
1486
  help="PyPI mirror source for faster downloads in China",
1408
1487
  )
@@ -1413,6 +1492,18 @@ def parse_args() -> argparse.Namespace:
1413
1492
  action="store_true",
1414
1493
  help="Disable package optimization (extract all files)",
1415
1494
  )
1495
+
1496
+ # Add option to show package size
1497
+ parser.add_argument(
1498
+ "--show-size",
1499
+ action="store_true",
1500
+ help="Show packed package size",
1501
+ )
1502
+ parser.add_argument(
1503
+ "--show-stats",
1504
+ action="store_true",
1505
+ help="Show detailed project statistics",
1506
+ )
1416
1507
  parser.add_argument(
1417
1508
  "--list-optimizations",
1418
1509
  "-lo",
@@ -1426,6 +1517,10 @@ def main() -> None:
1426
1517
  """Main entry point for pylibpack tool."""
1427
1518
  args = parse_args()
1428
1519
 
1520
+ # Setup logging
1521
+ if args.debug:
1522
+ logger.setLevel(logging.DEBUG)
1523
+
1429
1524
  if args.list_optimizations:
1430
1525
  strategy = SelectiveExtractionStrategy()
1431
1526
  logging.info("Available optimization rules:")
@@ -1433,25 +1528,28 @@ def main() -> None:
1433
1528
  logging.info(f" - {lib_name}")
1434
1529
  return
1435
1530
 
1436
- # Setup logging
1437
- if args.debug:
1438
- logger.setLevel(logging.DEBUG)
1439
-
1440
- # Initialize packer
1441
- cache_dir = Path(args.cache_dir) if args.cache_dir else None
1442
- optimize = not args.no_optimize
1443
- packer = PyLibPack(
1444
- cache_dir=cache_dir,
1445
- python_version=args.python_version,
1531
+ # Create configuration from arguments
1532
+ config = PyLibPackerConfig(
1533
+ cache_dir=Path(args.cache_dir) if args.cache_dir else DEFAULT_CACHE_DIR,
1446
1534
  mirror=args.mirror,
1447
- optimize=optimize,
1535
+ optimize=not args.no_optimize,
1536
+ max_workers=args.jobs,
1448
1537
  )
1449
1538
 
1450
- packer.pack(
1539
+ # Register auto-save on exit
1540
+ atexit.register(config.save)
1541
+
1542
+ packer = PyLibPacker(
1451
1543
  working_dir=Path(args.directory),
1452
- max_workers=args.jobs,
1544
+ config=config,
1453
1545
  )
1454
1546
 
1547
+ # Execute pack operation
1548
+ packer.run()
1549
+
1550
+ if args.show_stats:
1551
+ packer.show_stats()
1552
+
1455
1553
 
1456
1554
  if __name__ == "__main__":
1457
1555
  main()