PyPI - pysfi - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

pysfi 0.1.7py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{pysfi-0.1.7.dist-info → pysfi-0.1.11.dist-info}/METADATA +11 -9
pysfi-0.1.11.dist-info/RECORD +60 -0
pysfi-0.1.11.dist-info/entry_points.txt +28 -0
sfi/__init__.py +1 -1
sfi/alarmclock/alarmclock.py +40 -40
sfi/bumpversion/__init__.py +1 -1
sfi/cleanbuild/cleanbuild.py +155 -0
sfi/condasetup/condasetup.py +116 -0
sfi/docscan/__init__.py +1 -1
sfi/docscan/docscan.py +407 -103
sfi/docscan/docscan_gui.py +1282 -596
sfi/docscan/lang/eng.py +152 -0
sfi/docscan/lang/zhcn.py +170 -0
sfi/filedate/filedate.py +185 -112
sfi/gittool/__init__.py +2 -0
sfi/gittool/gittool.py +401 -0
sfi/llmclient/llmclient.py +592 -0
sfi/llmquantize/llmquantize.py +480 -0
sfi/llmserver/llmserver.py +335 -0
sfi/makepython/makepython.py +31 -30
sfi/pdfsplit/pdfsplit.py +173 -173
sfi/pyarchive/pyarchive.py +418 -0
sfi/pyembedinstall/pyembedinstall.py +629 -0
sfi/pylibpack/__init__.py +0 -0
sfi/pylibpack/pylibpack.py +1457 -0
sfi/pylibpack/rules/numpy.json +22 -0
sfi/pylibpack/rules/pymupdf.json +10 -0
sfi/pylibpack/rules/pyqt5.json +19 -0
sfi/pylibpack/rules/pyside2.json +23 -0
sfi/pylibpack/rules/scipy.json +23 -0
sfi/pylibpack/rules/shiboken2.json +24 -0
sfi/pyloadergen/pyloadergen.py +512 -227
sfi/pypack/__init__.py +0 -0
sfi/pypack/pypack.py +1142 -0
sfi/pyprojectparse/__init__.py +0 -0
sfi/pyprojectparse/pyprojectparse.py +500 -0
sfi/pysourcepack/pysourcepack.py +308 -0
sfi/quizbase/__init__.py +0 -0
sfi/quizbase/quizbase.py +828 -0
sfi/quizbase/quizbase_gui.py +987 -0
sfi/regexvalidate/__init__.py +0 -0
sfi/regexvalidate/regex_help.html +284 -0
sfi/regexvalidate/regexvalidate.py +468 -0
sfi/taskkill/taskkill.py +0 -2
sfi/workflowengine/__init__.py +0 -0
sfi/workflowengine/workflowengine.py +444 -0
pysfi-0.1.7.dist-info/RECORD +0 -31
pysfi-0.1.7.dist-info/entry_points.txt +0 -15
sfi/embedinstall/embedinstall.py +0 -418
sfi/projectparse/projectparse.py +0 -152
sfi/pypacker/fspacker.py +0 -91
{pysfi-0.1.7.dist-info → pysfi-0.1.11.dist-info}/WHEEL +0 -0
/sfi/{embedinstall → docscan/lang}/__init__.py +0 -0
/sfi/{projectparse → llmquantize}/__init__.py +0 -0
/sfi/{pypacker → pyembedinstall}/__init__.py +0 -0

sfi/pylibpack/pylibpack.py ADDED Viewed

@@ -0,0 +1,1457 @@
+"""Python Library Packager - Download and pack Python dependencies with caching support.
+This module provides functionality to:
+1. Read project information from projects.json or run pyprojectparse if needed
+2. Download dependencies to local .cache directory
+3. Pack dependencies into a distributable format
+4. Support batch processing multiple projects recursively
+"""
+from __future__ import annotations
+import argparse
+import json
+import logging
+import platform
+import re
+import shutil
+import subprocess
+import sys
+import tarfile
+import tempfile
+import time
+import zipfile
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Pattern
+from sfi.pyprojectparse.pyprojectparse import Project, Solution
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+__version__ = "1.0.0"
+__build__ = "20260120"
+DEFAULT_CACHE_DIR = Path.home() / ".pysfi" / ".cache" / "python-libs"
+MAX_DEPTH = 50  # Maximum recursion depth to prevent infinite loops
+PYPI_MIRRORS = {
+    "pypi": "https://pypi.org/simple",
+    "tsinghua": "https://pypi.tuna.tsinghua.edu.cn/simple",
+    "aliyun": "https://mirrors.aliyun.com/pypi/simple/",
+    "ustc": "https://pypi.mirrors.ustc.edu.cn/simple/",
+    "douban": "https://pypi.douban.com/simple/",
+    "tencent": "https://mirrors.cloud.tencent.com/pypi/simple",
+}
+@dataclass(frozen=True)
+class Dependency:
+    """Represents a Python package dependency."""
+    name: str
+    version: str | None = None
+    extras: set[str] = field(default_factory=set)
+    requires: set[str] = field(default_factory=set)
+    def __post_init__(self):
+        """Normalize package name after initialization."""
+        object.__setattr__(self, "name", normalize_package_name(self.name))
+    def __str__(self) -> str:
+        """String representation of dependency."""
+        if self.extras:
+            return f"{self.name}[{','.join(sorted(self.extras))}]{self.version or ''}"
+        return f"{self.name}{self.version or ''}"
+@dataclass
+class DownloadResult:
+    """Result of downloading packages."""
+    results: dict[str, bool] = field(default_factory=dict)
+    total: int = 0
+    successful: int = 0
+    cached: int = 0
+    downloaded: int = 0
+@dataclass
+class PackResult:
+    """Result of packing project dependencies."""
+    success: bool
+    project: str
+    total: int
+    successful: int
+    failed: int
+    packages_dir: str
+    extracted_packages: list[str] = field(default_factory=list)
+    message: str = ""
+@dataclass
+class BatchPackResult:
+    """Result of packing multiple projects."""
+    success: bool
+    total: int
+    successful: int
+    failed: int
+    failed_projects: list[str] = field(default_factory=list)
+    output_dir: str = ""
+    total_time: float = 0.0
+@dataclass
+class CacheMetadata:
+    """Metadata for cached package."""
+    name: str
+    version: str | None
+    path: str
+    timestamp: float
+DEV_TOOLS = frozenset({
+    "sphinx",
+    "sphinx_rtd_theme",
+    "watchdog",
+    "pytest",
+    "coverage",
+    "black",
+    "mypy",
+    "flake8",
+    "pylint",
+    "isort",
+    "pre-commit",
+    "tox",
+    "nose",
+    "unittest",
+    "mock",
+})
+DEV_PATTERNS = frozenset({"dev", "test", "docs", "lint", "example"})
+TYPING_PATTERNS = frozenset({"stubs", "typing", "types"})
+@dataclass
+class OptimizationRule:
+    """Defines an optimization rule for a specific library.
+    Attributes:
+        library_name: The name of the library to apply the rule to.
+        exclude_patterns: A list of patterns to exclude from the library.
+        include_patterns: A list of patterns to include in the library.
+    """
+    library_name: str = field(default_factory=str)
+    exclude_patterns: list[str] = field(default_factory=list)
+    include_patterns: list[str] = field(default_factory=list)
+    def __post_init__(self):
+        """Compile regex patterns after initialization."""
+        self.exclude_compiled: list[Pattern] = [
+            re.compile(p) for p in self.exclude_patterns
+        ]
+        self.include_compiled: list[Pattern] = [
+            re.compile(p) for p in self.include_patterns
+        ]
+class SelectiveExtractionStrategy:
+    """Optimization strategy that applies inclusion/exclusion rules to specific libraries.
+    This strategy works as follows:
+    1. First, apply universal exclusion rules (doc, test, example, demo, etc.)
+    2. Then, apply library-specific exclusion rules
+    3. Finally, apply inclusion rules (only files matching include patterns are kept)
+    """
+    # Universal exclusion patterns - applied to all libraries
+    UNIVERSAL_EXCLUDE_PATTERNS = frozenset({
+        "doc",
+        "docs",
+        "test",
+        "tests",
+        "example",
+        "examples",
+        "demo",
+        "demos",
+        "sample",
+        "samples",
+        "benchmark",
+        "benchmarks",
+        "tutorial",
+        "tutorials",
+        "notebook",
+        "notebooks",
+        "license",
+        "licenses",
+    })
+    def __init__(
+        self,
+        rules: list[OptimizationRule] | None = None,
+        apply_universal_rules: bool = True,
+    ):
+        """Initialize the strategy with optimization rules.
+        Args:
+            rules: List of optimization rules to apply
+            apply_universal_rules: Whether to apply universal exclusion rules (default: True)
+        """
+        self.rules: dict[str, OptimizationRule] = {}
+        self.apply_universal_rules = apply_universal_rules
+        if rules:
+            for rule in rules:
+                self.rules[rule.library_name.lower()] = rule
+        # Default rules for common libraries
+        if not rules:
+            self._setup_default_rules()
+        # Compile universal exclusion patterns for faster matching
+        self._universal_exclude_compiled = [
+            re.compile(f"(^|/)({pattern})(/|$)", re.IGNORECASE)
+            for pattern in self.UNIVERSAL_EXCLUDE_PATTERNS
+        ]
+    def _setup_default_rules(self):
+        """Setup default optimization rules for common libraries."""
+        # Get the rules directory
+        rules_dir = Path(__file__).parent / "rules"
+        if not rules_dir.exists() or not rules_dir.is_dir():
+            logger.warning(f"Rules directory not found: {rules_dir}")
+            return
+        # Load all JSON rule files
+        for rule_file in rules_dir.glob("*.json"):
+            try:
+                with open(rule_file, encoding="utf-8") as f:
+                    rule_data = json.load(f)
+                # Convert JSON data to OptimizationRule
+                rule = OptimizationRule(
+                    library_name=rule_data["library_name"],
+                    exclude_patterns=rule_data["exclude_patterns"],
+                    include_patterns=rule_data["include_patterns"],
+                )
+                self.rules[rule.library_name.lower()] = rule
+                logger.debug(
+                    f"Loaded optimization rule for {rule.library_name} from {rule_file.name}"
+                )
+            except Exception as e:
+                logger.warning(f"Failed to load rule from {rule_file.name}: {e}")
+    def _matches_universal_exclude_pattern(self, relative_path: str) -> bool:
+        """Check if file path matches any universal exclusion pattern.
+        Args:
+            relative_path: Relative path to the file
+        Returns:
+            True if path should be excluded, False otherwise
+        """
+        return any(
+            pattern.search(relative_path)
+            for pattern in self._universal_exclude_compiled
+        )
+    def should_extract_file(self, library_name: str, file_path: Path) -> bool:
+        """Determine if a file should be extracted based on library-specific rules.
+        Args:
+            library_name: Name of the library
+            file_path: Path to the file to check
+        Returns:
+            True if the file should be extracted, False otherwise
+        """
+        lib_name_lower = library_name.lower()
+        relative_path = file_path.as_posix().lower()
+        # First, apply universal exclusion rules (applied to all libraries)
+        if self.apply_universal_rules and self._matches_universal_exclude_pattern(
+            relative_path
+        ):
+            logger.debug(
+                f"Excluding {file_path} from {library_name} (matches universal exclusion pattern)"
+            )
+            return False
+        # If no specific rule exists for this library, extract everything
+        if lib_name_lower not in self.rules:
+            logger.debug(f"No specific rules for {library_name}, including {file_path}")
+            return True
+        rule = self.rules[lib_name_lower]
+        logger.debug(
+            f"Checking {file_path} for {library_name} with {len(rule.exclude_compiled)} exclude and {len(rule.include_compiled)} include patterns"
+        )
+        # Then, apply library-specific exclusion rules - if file matches any exclude pattern, skip it
+        for exclude_pattern in rule.exclude_compiled:
+            if exclude_pattern.search(relative_path):
+                logger.debug(
+                    f"Excluding {file_path} from {library_name} (matches exclude pattern: {exclude_pattern.pattern})"
+                )
+                return False
+        # If inclusion patterns are defined, only include files that match at least one
+        if rule.include_compiled:
+            for include_pattern in rule.include_compiled:
+                if include_pattern.search(relative_path):
+                    logger.debug(
+                        f"Including {file_path} from {library_name} (matches include pattern: {include_pattern.pattern})"
+                    )
+                    return True
+            # If we have inclusion rules but the file doesn't match any, exclude it
+            logger.debug(
+                f"Excluding {file_path} from {library_name} (doesn't match any include patterns)"
+            )
+            return False
+        # If no inclusion rules are defined, include the file (after exclusion check)
+        logger.debug(
+            f"Including {file_path} from {library_name} (passed exclusion filters)"
+        )
+        return True
+    def get_library_names_with_rules(self) -> set[str]:
+        """Get the names of libraries that have optimization rules defined.
+        Returns:
+            Set of library names with optimization rules
+        """
+        return set(self.rules.keys())
+def normalize_package_name(name: str) -> str:
+    """Normalize package name to lowercase with underscores.
+    Args:
+        name: Package name to normalize
+    Returns:
+        Normalized package name
+    """
+    return name.lower().replace("-", "_")
+def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
+    """Check if a dependency should be skipped based on common patterns.
+    Args:
+        req_name: Package name
+        has_extras: Whether the requirement has extras
+    Returns:
+        True if should skip, False otherwise
+    """
+    req_lower = req_name.lower()
+    # Skip extras
+    if has_extras:
+        return True
+    # Skip dev/test/docs/lint/example patterns
+    if any(keyword in req_lower for keyword in DEV_PATTERNS):
+        return True
+    # Skip typing/stubs dependencies
+    if any(keyword in req_lower for keyword in TYPING_PATTERNS):
+        return True
+    # Skip common dev tools
+    return req_lower.replace("-", "_") in DEV_TOOLS
+class LibraryCache:
+    """Manage local cache for Python packages."""
+    def __init__(self, cache_dir: Path | None = None):
+        """Initialize cache manager.
+        Args:
+            cache_dir: Cache directory path (default: ~/.pysfi/.cache/pylibpack)
+        """
+        self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.metadata_file = self.cache_dir / "metadata.json"
+        # In-memory cache for extracted dependencies to avoid repeated IO
+        self._dependencies_cache: dict[Path, set[str]] = {}
+    def get_package_path(
+        self, package_name: str, version: str | None = None
+    ) -> Path | None:
+        """Get cached package path if available.
+        Args:
+            package_name: Name of the package
+            version: Version (optional)
+        Returns:
+            Path to cached package or None
+        """
+        # First try filesystem lookup for wheel files (works even if metadata is missing)
+        for whl_file in self.cache_dir.glob("*.whl"):
+            parsed_name = self._extract_package_name_from_wheel(whl_file)
+            if parsed_name == package_name:
+                logger.debug(f"Cache hit (filesystem wheel): {package_name}")
+                return whl_file
+        # Try filesystem lookup for sdist files (.tar.gz, .zip)
+        for sdist_file in self.cache_dir.glob("*.tar.gz"):
+            parsed_name = self._extract_package_name_from_sdist(sdist_file)
+            if parsed_name == package_name:
+                logger.debug(f"Cache hit (filesystem sdist): {package_name}")
+                return sdist_file
+        for sdist_file in self.cache_dir.glob("*.zip"):
+            parsed_name = self._extract_package_name_from_sdist(sdist_file)
+            if parsed_name == package_name:
+                logger.debug(f"Cache hit (filesystem sdist): {package_name}")
+                return sdist_file
+        # Fallback to metadata lookup
+        metadata = self._load_metadata()
+        for info in metadata.values():
+            if info["name"] == package_name and (
+                version is None or info.get("version") == version
+            ):
+                path = self.cache_dir / info["path"]
+                if path.exists():
+                    logger.debug(f"Cache hit (metadata): {package_name}")
+                    return path
+        logger.debug(f"Cache miss: {package_name}")
+        return None
+    @staticmethod
+    def _extract_package_name_from_wheel(wheel_file: Path) -> str | None:
+        """Extract package name from wheel file.
+        Args:
+            wheel_file: Path to wheel file
+        Returns:
+            Package name or None
+        """
+        try:
+            filename = wheel_file.stem  # Remove .whl extension
+            parts = filename.split("-")
+            if parts:
+                return normalize_package_name(parts[0])
+        except Exception:
+            pass
+        return None
+    @staticmethod
+    def _extract_package_name_from_sdist(sdist_file: Path) -> str | None:
+        """Extract package name from source distribution file (.tar.gz or .zip).
+        Args:
+            sdist_file: Path to sdist file
+        Returns:
+            Package name or None
+        """
+        try:
+            # Handle .tar.gz files (e.g., package_name-1.0.0.tar.gz)
+            if (
+                sdist_file.suffixes
+                and ".tar" in sdist_file.suffixes
+                and ".gz" in sdist_file.suffixes
+            ):
+                # Remove both .tar.gz extensions by removing the last 7 characters (.tar.gz)
+                stem_without_ext = (
+                    sdist_file.stem
+                )  # This removes .gz, leaving package-1.0.0.tar
+                # Now remove the remaining .tar
+                if stem_without_ext.endswith(".tar"):
+                    stem_without_ext = stem_without_ext[:-4]  # Remove .tar
+                parts = stem_without_ext.rsplit(
+                    "-", 1
+                )  # Split from right: ["package_name", "1.0.0"]
+                if len(parts) >= 1 and parts[0]:
+                    return normalize_package_name(parts[0])
+            # Handle .zip files
+            elif sdist_file.suffix == ".zip":
+                filename = sdist_file.stem  # Remove .zip extension
+                parts = filename.rsplit("-", 1)
+                if len(parts) >= 1 and parts[0]:
+                    return normalize_package_name(parts[0])
+        except Exception as e:
+            logger.debug(f"Failed to extract package name from {sdist_file}: {e}")
+        return None
+    def _extract_dependencies_from_wheel(self, wheel_file: Path) -> set[str]:
+        """Extract dependencies from wheel METADATA file with caching.
+        Args:
+            wheel_file: Path to wheel file
+        Returns:
+            Set of package names (normalized)
+        """
+        # Check cache first
+        if wheel_file in self._dependencies_cache:
+            return self._dependencies_cache[wheel_file]
+        # Check if it's an sdist file (.tar.gz or .zip)
+        if wheel_file.suffix in (".gz", ".zip"):
+            dependencies = self._extract_dependencies_from_sdist(wheel_file)
+            self._dependencies_cache[wheel_file] = dependencies
+            return dependencies
+        try:
+            import re
+            import zipfile
+            dependencies: set[str] = set()
+            with zipfile.ZipFile(wheel_file, "r") as zf:
+                metadata_files = [
+                    name for name in zf.namelist() if name.endswith("METADATA")
+                ]
+                if not metadata_files:
+                    self._dependencies_cache[wheel_file] = dependencies
+                    return dependencies
+                metadata_content = zf.read(metadata_files[0]).decode(
+                    "utf-8", errors="ignore"
+                )
+                # Parse dependencies from METADATA
+                for line in metadata_content.splitlines():
+                    if line.startswith("Requires-Dist:"):
+                        dep_str = line.split(":", 1)[1].strip()
+                        # Skip extras dependencies
+                        if re.search(
+                            r'extra\s*==\s*["\']?([^"\';\s]+)["\']?',
+                            dep_str,
+                            re.IGNORECASE,
+                        ):
+                            logger.debug(f"Skipping extra dependency: {dep_str}")
+                            continue
+                        try:
+                            from packaging.requirements import Requirement
+                            req = Requirement(dep_str)
+                            if not should_skip_dependency(req.name, bool(req.extras)):
+                                dep_name = normalize_package_name(req.name)
+                                dependencies.add(dep_name)
+                                logger.debug(f"Found core dependency: {dep_name}")
+                        except Exception:
+                            pass
+            # Cache the result
+            self._dependencies_cache[wheel_file] = dependencies
+            return dependencies
+        except Exception as e:
+            logger.warning(
+                f"Failed to extract dependencies from {wheel_file.name}: {e}"
+            )
+            return set()
+    def _extract_dependencies_from_sdist(self, sdist_file: Path) -> set[str]:
+        """Extract dependencies from source distribution file with caching.
+        Args:
+            sdist_file: Path to sdist file (.tar.gz or .zip)
+        Returns:
+            Set of package names (normalized)
+        """
+        dependencies: set[str] = set()
+        try:
+            # Handle .tar.gz files
+            if sdist_file.suffix == ".gz":
+                with tarfile.open(sdist_file, "r:gz") as tf:
+                    for member in tf.getmembers():
+                        # Look for PKG-INFO or METADATA file in the root of the package
+                        if member.name.endswith("PKG-INFO") or member.name.endswith(
+                            "METADATA"
+                        ):
+                            # Only use PKG-INFO/METADATA files in the root directory
+                            # Count the number of slashes in the path
+                            path_parts = member.name.split("/")
+                            if len(path_parts) == 2 or (
+                                len(path_parts) == 3
+                                and path_parts[2] in ("PKG-INFO", "METADATA")
+                            ):
+                                content = tf.extractfile(member)
+                                if content:
+                                    metadata_content = content.read().decode(
+                                        "utf-8", errors="ignore"
+                                    )
+                                    dependencies = self._parse_metadata_content(
+                                        metadata_content
+                                    )
+                                    logger.debug(
+                                        f"Extracted dependencies from {member.name} in {sdist_file.name}"
+                                    )
+                                    break
+            # Handle .zip files
+            elif sdist_file.suffix == ".zip":
+                with zipfile.ZipFile(sdist_file, "r") as zf:
+                    for name in zf.namelist():
+                        # Look for PKG-INFO or METADATA file in the root of the package
+                        if name.endswith("PKG-INFO") or name.endswith("METADATA"):
+                            path_parts = name.split("/")
+                            if len(path_parts) == 2 or (
+                                len(path_parts) == 3
+                                and path_parts[2] in ("PKG-INFO", "METADATA")
+                            ):
+                                metadata_content = zf.read(name).decode(
+                                    "utf-8", errors="ignore"
+                                )
+                                dependencies = self._parse_metadata_content(
+                                    metadata_content
+                                )
+                                logger.debug(
+                                    f"Extracted dependencies from {name} in {sdist_file.name}"
+                                )
+                                break
+        except Exception as e:
+            logger.warning(
+                f"Failed to extract dependencies from sdist {sdist_file.name}: {e}"
+            )
+        return dependencies
+    @staticmethod
+    def _parse_metadata_content(metadata_content: str) -> set[str]:
+        """Parse metadata content (PKG-INFO or METADATA) to extract dependencies.
+        Args:
+            metadata_content: Content of PKG-INFO or METADATA file
+        Returns:
+            Set of package names (normalized)
+        """
+        import re
+        dependencies: set[str] = set()
+        try:
+            for line in metadata_content.splitlines():
+                # Look for Requires-Dist or Requires field
+                if line.startswith("Requires-Dist:") or line.startswith("Requires:"):
+                    if line.startswith("Requires:"):
+                        # Requires field contains comma-separated list
+                        dep_str = line.split(":", 1)[1].strip()
+                        for req_str in re.split(r",\s*", dep_str):
+                            req_str = req_str.strip()
+                            if req_str:
+                                dependencies.update(
+                                    LibraryCache._parse_single_requirement(req_str)
+                                )
+                    else:
+                        # Requires-Dist field
+                        dep_str = line.split(":", 1)[1].strip()
+                        dependencies.update(
+                            LibraryCache._parse_single_requirement(dep_str)
+                        )
+        except Exception as e:
+            logger.debug(f"Failed to parse metadata content: {e}")
+        return dependencies
+    @staticmethod
+    def _parse_single_requirement(req_str: str) -> set[str]:
+        """Parse a single requirement string and extract package name.
+        Args:
+            req_str: Requirement string (e.g., "numpy>=1.20.0", "package[extra]>=1.0")
+        Returns:
+            Set containing the normalized package name, or empty set if should skip
+        """
+        import re
+        try:
+            # Skip extras dependencies
+            if re.search(
+                r'extra\s*==\s*["\']?([^"\';\s]+)["\']?', req_str, re.IGNORECASE
+            ):
+                logger.debug(f"Skipping extra dependency: {req_str}")
+                return set()
+            from packaging.requirements import Requirement
+            req = Requirement(req_str)
+            if not should_skip_dependency(req.name, bool(req.extras)):
+                dep_name = normalize_package_name(req.name)
+                logger.debug(f"Found core dependency: {dep_name}")
+                return {dep_name}
+        except Exception:
+            pass
+        return set()
+    def add_package(
+        self, package_name: str, package_path: Path, version: str | None = None
+    ) -> None:
+        """Add package to cache.
+        Args:
+            package_name: Name of the package
+            package_path: Path to package files
+            version: Package version
+        """
+        # Normalize package name to ensure consistency
+        normalized_name = normalize_package_name(package_name)
+        # Copy package files to cache (flat structure for wheels, nested for dirs)
+        if package_path.is_dir():
+            dest_dir = self.cache_dir / normalized_name
+            if dest_dir.exists():
+                shutil.rmtree(dest_dir)
+            shutil.copytree(package_path, dest_dir)
+            relative_path = normalized_name
+        else:
+            dest_file = self.cache_dir / package_path.name
+            shutil.copy2(package_path, dest_file)
+            relative_path = package_path.name
+        # Update metadata using CacheMetadata dataclass
+        metadata = self._load_metadata()
+        metadata[str(package_path)] = CacheMetadata(
+            name=normalized_name,
+            version=version,
+            path=relative_path,
+            timestamp=time.time(),
+        ).__dict__
+        self._save_metadata(metadata)
+        logger.info(f"Cached package: {normalized_name}")
+    def _load_metadata(self) -> dict[str, Any]:
+        """Load cache metadata.
+        Returns:
+            Metadata dictionary
+        """
+        if self.metadata_file.exists():
+            try:
+                with open(self.metadata_file, encoding="utf-8") as f:
+                    return json.load(f)
+            except Exception as e:
+                logger.warning(f"Failed to load cache metadata: {e}")
+        return {}
+    def _save_metadata(self, metadata: dict[str, Any]) -> None:
+        """Save cache metadata.
+        Args:
+            metadata: Metadata dictionary
+        """
+        with open(self.metadata_file, "w", encoding="utf-8") as f:
+            json.dump(metadata, f, indent=2)
+    def clear_cache(self) -> None:
+        """Clear all cached packages."""
+        if self.cache_dir.exists():
+            shutil.rmtree(self.cache_dir)
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self._dependencies_cache.clear()  # Clear in-memory dependencies cache
+        logger.info("Cache cleared")
+class LibraryDownloader:
+    """Download Python packages from PyPI."""
+    def __init__(
+        self,
+        cache: LibraryCache,
+        python_version: str | None = None,
+        mirror: str = "pypi",
+    ):
+        """Initialize downloader.
+        Args:
+            cache: Cache manager
+            python_version: Target Python version for platform-specific packages
+            mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
+        """
+        self.cache = cache
+        self.python_version = (
+            python_version or f"{sys.version_info.major}.{sys.version_info.minor}"
+        )
+        self.platform_name = (
+            platform.system().lower() + "_" + platform.machine().lower()
+        )
+        self.mirror_url = PYPI_MIRRORS.get(mirror, PYPI_MIRRORS["pypi"])
+        self.pip_executable = self._find_pip_executable()
+    @staticmethod
+    def _find_pip_executable() -> str | None:
+        """Find pip executable in the system."""
+        return shutil.which("pip") or shutil.which("pip3")
+    def _download_package(self, dep: Dependency, dest_dir: Path) -> Path | None:
+        """Download a single package without dependencies.
+        Args:
+            dep: Dependency to download
+            dest_dir: Destination directory
+        Returns:
+            Path to downloaded package file (wheel or sdist) or None
+        """
+        if not self.pip_executable:
+            logger.error(
+                "pip not found. Please install pip: python -m ensurepip --upgrade"
+            )
+            return None
+        logger.info(f"Downloading: {dep}")
+        with tempfile.TemporaryDirectory() as temp_dir:
+            result = subprocess.run(
+                [
+                    self.pip_executable,
+                    "download",
+                    "--no-deps",
+                    "--index-url",
+                    self.mirror_url,
+                    "--dest",
+                    temp_dir,
+                    str(dep),
+                ],
+                capture_output=True,
+                text=True,
+                check=False,
+            )
+            if result.returncode != 0:
+                logger.warning(f"pip download failed for {dep}: {result.stderr}")
+                return None
+            # Prefer wheel files over sdist files
+            downloaded_file = None
+            for file_path in Path(temp_dir).glob("*.whl"):
+                downloaded_file = file_path
+                break
+            # If no wheel file, look for sdist files (.tar.gz or .zip)
+            if not downloaded_file:
+                for file_path in Path(temp_dir).glob("*.tar.gz"):
+                    downloaded_file = file_path
+                    break
+                for file_path in Path(temp_dir).glob("*.zip"):
+                    downloaded_file = file_path
+                    break
+            if downloaded_file:
+                self.cache.add_package(dep.name, downloaded_file, dep.version)
+                shutil.copy2(downloaded_file, dest_dir / downloaded_file.name)
+                logger.info(f"Downloaded: {downloaded_file.name}")
+                return dest_dir / downloaded_file.name
+        return None
+    def download_packages(
+        self,
+        dependencies: list[Dependency],
+        dest_dir: Path,
+        max_workers: int = 4,
+    ) -> DownloadResult:
+        """Download multiple packages concurrently.
+        Args:
+            dependencies: List of dependencies to download
+            dest_dir: Destination directory
+            max_workers: Maximum concurrent downloads
+        Returns:
+            DownloadResult containing download statistics
+        """
+        dest_dir.mkdir(parents=True, exist_ok=True)
+        # Use list of tuples for thread-safe result collection
+        # Tuple format: (package_name, success_flag)
+        results_list: list[tuple[str, bool]] = []
+        cached_count = 0
+        cached_packages: set[str] = set()  # Track cached package names efficiently
+        logger.info(f"Total direct dependencies: {len(dependencies)}")
+        logger.info(f"Using mirror: {self.mirror_url}")
+        # Check cache and mark cached packages (single-threaded, safe)
+        for dep in dependencies:
+            if self.cache.get_package_path(dep.name, dep.version):
+                normalized_dep_name = normalize_package_name(dep.name)
+                results_list.append((normalized_dep_name, True))
+                cached_packages.add(normalized_dep_name)
+                cached_count += 1
+                logger.info(f"Using cached package: {dep}")
+        # Download remaining packages concurrently
+        remaining_deps = [
+            dep
+            for dep in dependencies
+            if normalize_package_name(dep.name) not in cached_packages
+        ]
+        downloaded_count = 0
+        if remaining_deps:
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                future_to_dep = {
+                    executor.submit(self._download_package, dep, dest_dir): dep
+                    for dep in remaining_deps
+                }
+                for future in as_completed(future_to_dep):
+                    dep = future_to_dep[future]
+                    try:
+                        wheel_file = future.result()
+                        normalized_dep_name = normalize_package_name(dep.name)
+                        results_list.append((
+                            normalized_dep_name,
+                            wheel_file is not None,
+                        ))
+                        if wheel_file:
+                            downloaded_count += 1
+                    except Exception as e:
+                        normalized_dep_name = normalize_package_name(dep.name)
+                        logger.error(f"Error processing {normalized_dep_name}: {e}")
+                        results_list.append((normalized_dep_name, False))
+        # Convert to dictionary for final result
+        results = dict(results_list)
+        successful = sum(1 for v in results.values() if v)
+        logger.info(
+            f"Processed {successful}/{len(dependencies)} ({cached_count} cached, {downloaded_count} downloaded)"
+        )
+        return DownloadResult(
+            results=results,
+            total=len(dependencies),
+            successful=successful,
+            cached=cached_count,
+            downloaded=downloaded_count,
+        )
+class PyLibPack:
+    """Main library packer class."""
+    def __init__(
+        self,
+        cache_dir: Path | None = None,
+        python_version: str | None = None,
+        mirror: str = "pypi",
+        optimize: bool = True,
+        optimization_strategy: SelectiveExtractionStrategy | None = None,
+    ):
+        """Initialize library packer.
+        Args:
+            cache_dir: Custom cache directory
+            python_version: Target Python version
+            mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
+        """
+        self.cache = LibraryCache(cache_dir)
+        self.downloader = LibraryDownloader(self.cache, python_version, mirror)
+        # Set up optimization strategy
+        self.optimize = optimize
+        self.optimization_strategy = (
+            optimization_strategy or SelectiveExtractionStrategy() if optimize else None
+        )
+    def pack_project(
+        self, project: Project, output_dir: Path, max_workers: int = 4
+    ) -> PackResult:
+        """Pack dependencies for a single project.
+        Args:
+            project: Project information
+            output_dir: Output directory
+            max_workers: Maximum concurrent downloads
+        Returns:
+            PackResult containing packing statistics
+        """
+        logger.info(f"\n{'=' * 60}")
+        logger.info(f"Packing dependencies for project: {project.name}")
+        logger.info(f"{'=' * 60}")
+        if not project.dependencies:
+            logger.warning(f"No dependencies found for {project.name}")
+            return PackResult(
+                success=False,
+                message="No dependencies found",
+                project=project.name,
+                total=0,
+                successful=0,
+                failed=0,
+                packages_dir=str(output_dir),
+            )
+        logger.info(f"Found {len(project.dependencies)} dependencies")
+        # Download direct dependencies
+        download_result = self.downloader.download_packages(
+            project.dependencies,
+            self.cache.cache_dir,
+            max_workers=max_workers,
+        )
+        # Build package map (including both wheel and sdist files) and collect all required packages recursively
+        package_map: dict[str, Path] = {}
+        # Add wheel files to package map
+        for wheel_file in self.cache.cache_dir.glob("*.whl"):
+            pkg_name = self.cache._extract_package_name_from_wheel(wheel_file)
+            if pkg_name and pkg_name not in package_map:  # Prefer wheel files
+                normalized_pkg_name = normalize_package_name(pkg_name)
+                package_map[normalized_pkg_name] = wheel_file
+        # Add sdist files to package map (only if not already present)
+        for sdist_file in self.cache.cache_dir.glob("*.tar.gz"):
+            pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
+            if pkg_name and normalize_package_name(pkg_name) not in package_map:
+                normalized_pkg_name = normalize_package_name(pkg_name)
+                package_map[normalized_pkg_name] = sdist_file
+        for sdist_file in self.cache.cache_dir.glob("*.zip"):
+            pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
+            if pkg_name and normalize_package_name(pkg_name) not in package_map:
+                normalized_pkg_name = normalize_package_name(pkg_name)
+                package_map[normalized_pkg_name] = sdist_file
+        # Recursively collect all dependencies (pass cache instance for dependency extraction)
+        all_packages = self._collect_all_dependencies(
+            package_map, list(download_result.results), self.cache
+        )
+        # Extract all required packages (keep order of dependency resolution)
+        extracted_packages = []
+        for pkg_name in all_packages:
+            if pkg_name in package_map:
+                # Skip if output directory already exists
+                output_pkg_dir = output_dir / pkg_name
+                if output_pkg_dir.exists():
+                    logger.warning(f"Output directory already exists: {output_pkg_dir}")
+                    continue
+                package_file = package_map[pkg_name]
+                logger.info(f"Extracting {package_file.name}...")
+                self._extract_package(package_file, output_dir, pkg_name)
+                extracted_packages.append(pkg_name)
+                logger.info(f"Extracted {pkg_name}")
+        logger.info(
+            f"Pack complete for {project.name}: {download_result.successful}/{download_result.total}"
+        )
+        return PackResult(
+            success=download_result.successful > 0,
+            project=project.name,
+            total=download_result.total,
+            successful=download_result.successful,
+            failed=download_result.total - download_result.successful,
+            packages_dir=str(output_dir),
+            extracted_packages=extracted_packages,
+        )
+    @staticmethod
+    def _collect_all_dependencies(
+        package_map: dict[str, Path], root_packages: list[str], cache: LibraryCache
+    ) -> set[str]:
+        """Recursively collect all dependencies from package files (wheel or sdist).
+        Args:
+            package_map: Mapping of package names to package files (wheel or sdist)
+            root_packages: List of root package names to start from
+            cache: LibraryCache instance for extracting dependencies
+        Returns:
+            List of all required package names
+        """
+        all_packages: set[str] = set()
+        visited: set[str] = set()
+        visit_stack: dict[str, int] = {}  # Track visit depth for cycle detection
+        def visit(pkg_name: str, level: int = 0) -> None:
+            """Visit a package and collect its dependencies."""
+            # Normalize package name for consistency
+            normalized_pkg_name = normalize_package_name(pkg_name)
+            # Check for cycles
+            if normalized_pkg_name in visit_stack:
+                logger.warning(
+                    f"Potential circular dependency detected: {normalized_pkg_name} (current depth: {level}, "
+                    f"previous depth: {visit_stack[normalized_pkg_name]})"
+                )
+                return
+            # Check depth limit
+            if level > MAX_DEPTH:
+                logger.warning(
+                    f"Maximum dependency depth ({MAX_DEPTH}) reached for {normalized_pkg_name}, stopping recursion"
+                )
+                return
+            if normalized_pkg_name in visited:
+                return
+            # Mark as visited and track depth
+            visited.add(normalized_pkg_name)
+            visit_stack[normalized_pkg_name] = level
+            all_packages.add(normalized_pkg_name)
+            if normalized_pkg_name in package_map:
+                deps = cache._extract_dependencies_from_wheel(
+                    package_map[normalized_pkg_name]
+                )
+                logger.debug(f"{'  ' * level}{normalized_pkg_name} -> {deps}")
+                for dep in deps:
+                    visit(dep, level + 1)
+            # Remove from stack when done
+            visit_stack.pop(normalized_pkg_name, None)
+        for pkg_name in root_packages:
+            visit(pkg_name)
+        logger.info(
+            f"Collected {len(all_packages)} packages (including recursive dependencies)"
+        )
+        logger.info(f"Packages: {all_packages}")
+        return all_packages
+    def _build_and_cache_wheel(self, sdist_file: Path, package_name: str) -> None:
+        """Build wheel from sdist file and cache it for faster future access.
+        Args:
+            sdist_file: Path to sdist file (.tar.gz or .zip)
+            package_name: Name of the package
+        """
+        with tempfile.TemporaryDirectory() as temp_wheel_dir:
+            # Use pip wheel to build wheel from sdist
+            result = subprocess.run(
+                [
+                    self.downloader.pip_executable or "pip",
+                    "wheel",
+                    "--no-deps",
+                    "--wheel-dir",
+                    temp_wheel_dir,
+                    "--no-cache-dir",
+                    str(sdist_file),
+                ],
+                capture_output=True,
+                text=True,
+                check=False,
+            )
+            if result.returncode != 0:
+                logger.warning(
+                    f"Failed to build wheel from sdist for {package_name}: {result.stderr}"
+                )
+                return
+            # Find the built wheel file
+            wheel_files = list(Path(temp_wheel_dir).glob("*.whl"))
+            if wheel_files:
+                wheel_file = wheel_files[0]
+                # Copy wheel to cache directory
+                cache_wheel_path = self.cache.cache_dir / wheel_file.name
+                shutil.copy2(wheel_file, cache_wheel_path)
+                # Update cache metadata
+                self.cache.add_package(package_name, wheel_file)
+                logger.info(
+                    f"Built and cached wheel: {wheel_file.name} for {package_name}"
+                )
+            else:
+                logger.warning(f"No wheel file was built from sdist for {package_name}")
+    def _extract_package(
+        self, package_file: Path, dest_dir: Path, package_name: str
+    ) -> None:
+        """Extract package file (wheel or sdist) to destination directory with optional optimization.
+        Args:
+            package_file: Path to package file (wheel or sdist)
+            dest_dir: Destination directory
+            package_name: Name of the package being extracted
+        """
+        logger.info(
+            f"Extracting {package_file.name} for package {package_name} to {dest_dir}"
+        )
+        # Handle sdist files (.tar.gz or .zip) - install using pip, and build wheel for cache
+        if package_file.suffix == ".gz" or package_file.suffix == ".zip":
+            logger.info(f"Installing sdist file for {package_name} using pip...")
+            # Use pip install --target to install sdist to temporary directory
+            with tempfile.TemporaryDirectory() as temp_install_dir:
+                result = subprocess.run(
+                    [
+                        self.downloader.pip_executable or "pip",
+                        "install",
+                        "--target",
+                        temp_install_dir,
+                        "--no-deps",  # Don't install dependencies (we handle them separately)
+                        "--no-cache-dir",
+                        str(package_file),
+                    ],
+                    capture_output=True,
+                    text=True,
+                    check=False,
+                )
+                if result.returncode != 0:
+                    logger.error(
+                        f"Failed to install sdist {package_file.name}: {result.stderr}"
+                    )
+                    return
+                # Copy installed files to dest_dir, skipping *.dist-info directories
+                temp_install_path = Path(temp_install_dir)
+                for item in temp_install_path.iterdir():
+                    # Skip dist-info directories
+                    if item.name.endswith(".dist-info"):
+                        logger.debug(f"Skipping dist-info directory: {item.name}")
+                        continue
+                    dest_path = dest_dir / item.name
+                    if item.is_dir():
+                        if dest_path.exists():
+                            shutil.rmtree(dest_path)
+                        shutil.copytree(item, dest_path)
+                    else:
+                        shutil.copy2(item, dest_path)
+                logger.info(
+                    f"Installed sdist file for {package_name} to site-packages structure"
+                )
+            # Build wheel from sdist and cache it for faster future access
+            logger.info(f"Building wheel from sdist for {package_name}...")
+            self._build_and_cache_wheel(package_file, package_name)
+            return
+        # Handle wheel files with optional optimization
+        with zipfile.ZipFile(package_file, "r") as zf:
+            if self.optimize and self.optimization_strategy:
+                # Apply optimization strategy - selectively extract files
+                extracted_count = 0
+                skipped_count = 0
+                for file_info in zf.filelist:
+                    file_path = Path(file_info.filename)
+                    # Skip dist-info directories
+                    if file_path.name.endswith(".dist-info") or any(
+                        parent.endswith(".dist-info") for parent in file_path.parts
+                    ):
+                        logger.debug(f"Skipping dist-info: {file_info.filename}")
+                        skipped_count += 1
+                        continue
+                    if self.optimization_strategy.should_extract_file(
+                        package_name, file_path
+                    ):
+                        zf.extract(file_info, dest_dir)
+                        extracted_count += 1
+                        logger.debug(f"Extracted {file_path} from {package_name}")
+                    else:
+                        skipped_count += 1
+                        logger.debug(
+                            f"Skipped {file_path} from {package_name} (filtered by optimization strategy)"
+                        )
+                logger.info(
+                    f"Extraction complete for {package_name}: {extracted_count} extracted, {skipped_count} skipped"
+                )
+            else:
+                # Extract all files without optimization, but skip dist-info directories
+                for file_info in zf.filelist:
+                    file_path = Path(file_info.filename)
+                    # Skip dist-info directories
+                    if file_path.name.endswith(".dist-info") or any(
+                        parent.endswith(".dist-info") for parent in file_path.parts
+                    ):
+                        logger.debug(f"Skipping dist-info: {file_info.filename}")
+                        continue
+                    zf.extract(file_info, dest_dir)
+                logger.info(
+                    f"All files extracted for {package_name} (no optimization applied, dist-info skipped)"
+                )
+    def pack(
+        self,
+        working_dir: Path,
+        max_workers: int = 4,
+    ) -> BatchPackResult:
+        """Pack project dependencies from base directory.
+        Args:
+            base_dir: Base directory containing projects or a single project
+            output_dir: Output directory (default: base_dir/dist/site-packages)
+            max_workers: Maximum concurrent downloads
+        Returns:
+            BatchPackResult containing batch packing statistics
+        """
+        output_dir = working_dir / "dist" / "site-packages"
+        logger.info(f"Starting dependency pack for: {working_dir}")
+        projects = Solution.from_directory(root_dir=working_dir).projects
+        if not projects:
+            logger.error("Failed to load project information")
+            return BatchPackResult(
+                success=False,
+                total=0,
+                successful=0,
+                failed=0,
+                output_dir=str(output_dir),
+                total_time=0.0,
+            )
+        logger.info(f"Found {len(projects)} project(s) to process")
+        # Process each project
+        total_start = time.perf_counter()
+        success_count = 0
+        failed_projects: list[str] = []
+        use_current_dir = len(projects) == 1
+        for project in projects.values():
+            project_dir = working_dir if use_current_dir else working_dir / project.name
+            if not project_dir.is_dir():
+                logger.warning(f"Project directory not found: {project_dir}, skipping")
+                failed_projects.append(project.name)
+                continue
+            result = self.pack_project(project, output_dir, max_workers)
+            if result.success:
+                success_count += 1
+            else:
+                failed_projects.append(project.name)
+        total_time = time.perf_counter() - total_start
+        # Summary
+        logger.info(f"\n{'=' * 60}")
+        logger.info("Summary")
+        logger.info(f"{'=' * 60}")
+        logger.info(f"Total projects: {len(projects)}")
+        logger.info(f"Successfully packed: {success_count}")
+        logger.info(f"Failed: {len(failed_projects)}")
+        if failed_projects:
+            logger.info(f"Failed projects: {', '.join(failed_projects)}")
+        logger.info(f"Total time: {total_time:.2f}s")
+        return BatchPackResult(
+            success=len(failed_projects) == 0,
+            total=len(projects),
+            successful=success_count,
+            failed=len(failed_projects),
+            failed_projects=failed_projects,
+            output_dir=str(output_dir),
+            total_time=total_time,
+        )
+    def clear_cache(self) -> None:
+        """Clear the package cache."""
+        self.cache.clear_cache()
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        prog="pylibpack",
+        description="Python library packer with caching support",
+    )
+    parser.add_argument(
+        "directory",
+        type=str,
+        nargs="?",
+        default=str(Path.cwd()),
+        help="Base directory containing projects",
+    )
+    parser.add_argument(
+        "--cache-dir", type=str, default=None, help="Custom cache directory"
+    )
+    parser.add_argument(
+        "--python-version", type=str, default=None, help="Target Python version"
+    )
+    parser.add_argument(
+        "-j", "--jobs", type=int, default=4, help="Maximum concurrent downloads"
+    )
+    parser.add_argument(
+        "--mirror",
+        type=str,
+        default="aliyun",
+        choices=("pypi", "tsinghua", "aliyun", "ustc", "douban", "tencent"),
+        help="PyPI mirror source for faster downloads in China",
+    )
+    parser.add_argument("--debug", "-d", action="store_true", help="Debug mode")
+    parser.add_argument(
+        "--no-optimize",
+        "-no",
+        action="store_true",
+        help="Disable package optimization (extract all files)",
+    )
+    parser.add_argument(
+        "--list-optimizations",
+        "-lo",
+        action="store_true",
+        help="List all available optimization rules",
+    )
+    return parser.parse_args()
+def main() -> None:
+    """Main entry point for pylibpack tool."""
+    args = parse_args()
+    if args.list_optimizations:
+        strategy = SelectiveExtractionStrategy()
+        logging.info("Available optimization rules:")
+        for lib_name in sorted(strategy.get_library_names_with_rules()):
+            logging.info(f"  - {lib_name}")
+        return
+    # Setup logging
+    if args.debug:
+        logger.setLevel(logging.DEBUG)
+    # Initialize packer
+    cache_dir = Path(args.cache_dir) if args.cache_dir else None
+    optimize = not args.no_optimize
+    packer = PyLibPack(
+        cache_dir=cache_dir,
+        python_version=args.python_version,
+        mirror=args.mirror,
+        optimize=optimize,
+    )
+    packer.pack(
+        working_dir=Path(args.directory),
+        max_workers=args.jobs,
+    )
+if __name__ == "__main__":
+    main()

pysfi 0.1.7__py3-none-any.whl → 0.1.11__py3-none-any.whl

pysfi 0.1.7py3-none-any.whl → 0.1.11py3-none-any.whl