pysfi 0.1.7__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {pysfi-0.1.7.dist-info → pysfi-0.1.11.dist-info}/METADATA +11 -9
  2. pysfi-0.1.11.dist-info/RECORD +60 -0
  3. pysfi-0.1.11.dist-info/entry_points.txt +28 -0
  4. sfi/__init__.py +1 -1
  5. sfi/alarmclock/alarmclock.py +40 -40
  6. sfi/bumpversion/__init__.py +1 -1
  7. sfi/cleanbuild/cleanbuild.py +155 -0
  8. sfi/condasetup/condasetup.py +116 -0
  9. sfi/docscan/__init__.py +1 -1
  10. sfi/docscan/docscan.py +407 -103
  11. sfi/docscan/docscan_gui.py +1282 -596
  12. sfi/docscan/lang/eng.py +152 -0
  13. sfi/docscan/lang/zhcn.py +170 -0
  14. sfi/filedate/filedate.py +185 -112
  15. sfi/gittool/__init__.py +2 -0
  16. sfi/gittool/gittool.py +401 -0
  17. sfi/llmclient/llmclient.py +592 -0
  18. sfi/llmquantize/llmquantize.py +480 -0
  19. sfi/llmserver/llmserver.py +335 -0
  20. sfi/makepython/makepython.py +31 -30
  21. sfi/pdfsplit/pdfsplit.py +173 -173
  22. sfi/pyarchive/pyarchive.py +418 -0
  23. sfi/pyembedinstall/pyembedinstall.py +629 -0
  24. sfi/pylibpack/__init__.py +0 -0
  25. sfi/pylibpack/pylibpack.py +1457 -0
  26. sfi/pylibpack/rules/numpy.json +22 -0
  27. sfi/pylibpack/rules/pymupdf.json +10 -0
  28. sfi/pylibpack/rules/pyqt5.json +19 -0
  29. sfi/pylibpack/rules/pyside2.json +23 -0
  30. sfi/pylibpack/rules/scipy.json +23 -0
  31. sfi/pylibpack/rules/shiboken2.json +24 -0
  32. sfi/pyloadergen/pyloadergen.py +512 -227
  33. sfi/pypack/__init__.py +0 -0
  34. sfi/pypack/pypack.py +1142 -0
  35. sfi/pyprojectparse/__init__.py +0 -0
  36. sfi/pyprojectparse/pyprojectparse.py +500 -0
  37. sfi/pysourcepack/pysourcepack.py +308 -0
  38. sfi/quizbase/__init__.py +0 -0
  39. sfi/quizbase/quizbase.py +828 -0
  40. sfi/quizbase/quizbase_gui.py +987 -0
  41. sfi/regexvalidate/__init__.py +0 -0
  42. sfi/regexvalidate/regex_help.html +284 -0
  43. sfi/regexvalidate/regexvalidate.py +468 -0
  44. sfi/taskkill/taskkill.py +0 -2
  45. sfi/workflowengine/__init__.py +0 -0
  46. sfi/workflowengine/workflowengine.py +444 -0
  47. pysfi-0.1.7.dist-info/RECORD +0 -31
  48. pysfi-0.1.7.dist-info/entry_points.txt +0 -15
  49. sfi/embedinstall/embedinstall.py +0 -418
  50. sfi/projectparse/projectparse.py +0 -152
  51. sfi/pypacker/fspacker.py +0 -91
  52. {pysfi-0.1.7.dist-info → pysfi-0.1.11.dist-info}/WHEEL +0 -0
  53. /sfi/{embedinstall → docscan/lang}/__init__.py +0 -0
  54. /sfi/{projectparse → llmquantize}/__init__.py +0 -0
  55. /sfi/{pypacker → pyembedinstall}/__init__.py +0 -0
@@ -0,0 +1,1457 @@
1
+ """Python Library Packager - Download and pack Python dependencies with caching support.
2
+
3
+ This module provides functionality to:
4
+ 1. Read project information from projects.json or run pyprojectparse if needed
5
+ 2. Download dependencies to local .cache directory
6
+ 3. Pack dependencies into a distributable format
7
+ 4. Support batch processing multiple projects recursively
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import json
14
+ import logging
15
+ import platform
16
+ import re
17
+ import shutil
18
+ import subprocess
19
+ import sys
20
+ import tarfile
21
+ import tempfile
22
+ import time
23
+ import zipfile
24
+ from concurrent.futures import ThreadPoolExecutor, as_completed
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+ from typing import Any, Pattern
28
+
29
+ from sfi.pyprojectparse.pyprojectparse import Project, Solution
30
+
31
+ logging.basicConfig(
32
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
33
+ )
34
+ logger = logging.getLogger(__name__)
35
+
36
+ __version__ = "1.0.0"
37
+ __build__ = "20260120"
38
+
39
+ DEFAULT_CACHE_DIR = Path.home() / ".pysfi" / ".cache" / "python-libs"
40
+
41
+ MAX_DEPTH = 50 # Maximum recursion depth to prevent infinite loops
42
+
43
+ PYPI_MIRRORS = {
44
+ "pypi": "https://pypi.org/simple",
45
+ "tsinghua": "https://pypi.tuna.tsinghua.edu.cn/simple",
46
+ "aliyun": "https://mirrors.aliyun.com/pypi/simple/",
47
+ "ustc": "https://pypi.mirrors.ustc.edu.cn/simple/",
48
+ "douban": "https://pypi.douban.com/simple/",
49
+ "tencent": "https://mirrors.cloud.tencent.com/pypi/simple",
50
+ }
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class Dependency:
55
+ """Represents a Python package dependency."""
56
+
57
+ name: str
58
+ version: str | None = None
59
+ extras: set[str] = field(default_factory=set)
60
+ requires: set[str] = field(default_factory=set)
61
+
62
+ def __post_init__(self):
63
+ """Normalize package name after initialization."""
64
+ object.__setattr__(self, "name", normalize_package_name(self.name))
65
+
66
+ def __str__(self) -> str:
67
+ """String representation of dependency."""
68
+ if self.extras:
69
+ return f"{self.name}[{','.join(sorted(self.extras))}]{self.version or ''}"
70
+ return f"{self.name}{self.version or ''}"
71
+
72
+
73
+ @dataclass
74
+ class DownloadResult:
75
+ """Result of downloading packages."""
76
+
77
+ results: dict[str, bool] = field(default_factory=dict)
78
+ total: int = 0
79
+ successful: int = 0
80
+ cached: int = 0
81
+ downloaded: int = 0
82
+
83
+
84
+ @dataclass
85
+ class PackResult:
86
+ """Result of packing project dependencies."""
87
+
88
+ success: bool
89
+ project: str
90
+ total: int
91
+ successful: int
92
+ failed: int
93
+ packages_dir: str
94
+ extracted_packages: list[str] = field(default_factory=list)
95
+ message: str = ""
96
+
97
+
98
+ @dataclass
99
+ class BatchPackResult:
100
+ """Result of packing multiple projects."""
101
+
102
+ success: bool
103
+ total: int
104
+ successful: int
105
+ failed: int
106
+ failed_projects: list[str] = field(default_factory=list)
107
+ output_dir: str = ""
108
+ total_time: float = 0.0
109
+
110
+
111
+ @dataclass
112
+ class CacheMetadata:
113
+ """Metadata for cached package."""
114
+
115
+ name: str
116
+ version: str | None
117
+ path: str
118
+ timestamp: float
119
+
120
+
121
+ DEV_TOOLS = frozenset({
122
+ "sphinx",
123
+ "sphinx_rtd_theme",
124
+ "watchdog",
125
+ "pytest",
126
+ "coverage",
127
+ "black",
128
+ "mypy",
129
+ "flake8",
130
+ "pylint",
131
+ "isort",
132
+ "pre-commit",
133
+ "tox",
134
+ "nose",
135
+ "unittest",
136
+ "mock",
137
+ })
138
+ DEV_PATTERNS = frozenset({"dev", "test", "docs", "lint", "example"})
139
+ TYPING_PATTERNS = frozenset({"stubs", "typing", "types"})
140
+
141
+
142
+ @dataclass
143
+ class OptimizationRule:
144
+ """Defines an optimization rule for a specific library.
145
+
146
+ Attributes:
147
+ library_name: The name of the library to apply the rule to.
148
+ exclude_patterns: A list of patterns to exclude from the library.
149
+ include_patterns: A list of patterns to include in the library.
150
+
151
+ """
152
+
153
+ library_name: str = field(default_factory=str)
154
+ exclude_patterns: list[str] = field(default_factory=list)
155
+ include_patterns: list[str] = field(default_factory=list)
156
+
157
+ def __post_init__(self):
158
+ """Compile regex patterns after initialization."""
159
+ self.exclude_compiled: list[Pattern] = [
160
+ re.compile(p) for p in self.exclude_patterns
161
+ ]
162
+ self.include_compiled: list[Pattern] = [
163
+ re.compile(p) for p in self.include_patterns
164
+ ]
165
+
166
+
167
+ class SelectiveExtractionStrategy:
168
+ """Optimization strategy that applies inclusion/exclusion rules to specific libraries.
169
+
170
+ This strategy works as follows:
171
+ 1. First, apply universal exclusion rules (doc, test, example, demo, etc.)
172
+ 2. Then, apply library-specific exclusion rules
173
+ 3. Finally, apply inclusion rules (only files matching include patterns are kept)
174
+ """
175
+
176
+ # Universal exclusion patterns - applied to all libraries
177
+ UNIVERSAL_EXCLUDE_PATTERNS = frozenset({
178
+ "doc",
179
+ "docs",
180
+ "test",
181
+ "tests",
182
+ "example",
183
+ "examples",
184
+ "demo",
185
+ "demos",
186
+ "sample",
187
+ "samples",
188
+ "benchmark",
189
+ "benchmarks",
190
+ "tutorial",
191
+ "tutorials",
192
+ "notebook",
193
+ "notebooks",
194
+ "license",
195
+ "licenses",
196
+ })
197
+
198
+ def __init__(
199
+ self,
200
+ rules: list[OptimizationRule] | None = None,
201
+ apply_universal_rules: bool = True,
202
+ ):
203
+ """Initialize the strategy with optimization rules.
204
+
205
+ Args:
206
+ rules: List of optimization rules to apply
207
+ apply_universal_rules: Whether to apply universal exclusion rules (default: True)
208
+ """
209
+ self.rules: dict[str, OptimizationRule] = {}
210
+ self.apply_universal_rules = apply_universal_rules
211
+
212
+ if rules:
213
+ for rule in rules:
214
+ self.rules[rule.library_name.lower()] = rule
215
+
216
+ # Default rules for common libraries
217
+ if not rules:
218
+ self._setup_default_rules()
219
+
220
+ # Compile universal exclusion patterns for faster matching
221
+ self._universal_exclude_compiled = [
222
+ re.compile(f"(^|/)({pattern})(/|$)", re.IGNORECASE)
223
+ for pattern in self.UNIVERSAL_EXCLUDE_PATTERNS
224
+ ]
225
+
226
+ def _setup_default_rules(self):
227
+ """Setup default optimization rules for common libraries."""
228
+ # Get the rules directory
229
+ rules_dir = Path(__file__).parent / "rules"
230
+
231
+ if not rules_dir.exists() or not rules_dir.is_dir():
232
+ logger.warning(f"Rules directory not found: {rules_dir}")
233
+ return
234
+
235
+ # Load all JSON rule files
236
+ for rule_file in rules_dir.glob("*.json"):
237
+ try:
238
+ with open(rule_file, encoding="utf-8") as f:
239
+ rule_data = json.load(f)
240
+
241
+ # Convert JSON data to OptimizationRule
242
+ rule = OptimizationRule(
243
+ library_name=rule_data["library_name"],
244
+ exclude_patterns=rule_data["exclude_patterns"],
245
+ include_patterns=rule_data["include_patterns"],
246
+ )
247
+
248
+ self.rules[rule.library_name.lower()] = rule
249
+ logger.debug(
250
+ f"Loaded optimization rule for {rule.library_name} from {rule_file.name}"
251
+ )
252
+
253
+ except Exception as e:
254
+ logger.warning(f"Failed to load rule from {rule_file.name}: {e}")
255
+
256
+ def _matches_universal_exclude_pattern(self, relative_path: str) -> bool:
257
+ """Check if file path matches any universal exclusion pattern.
258
+
259
+ Args:
260
+ relative_path: Relative path to the file
261
+
262
+ Returns:
263
+ True if path should be excluded, False otherwise
264
+ """
265
+ return any(
266
+ pattern.search(relative_path)
267
+ for pattern in self._universal_exclude_compiled
268
+ )
269
+
270
+ def should_extract_file(self, library_name: str, file_path: Path) -> bool:
271
+ """Determine if a file should be extracted based on library-specific rules.
272
+
273
+ Args:
274
+ library_name: Name of the library
275
+ file_path: Path to the file to check
276
+
277
+ Returns:
278
+ True if the file should be extracted, False otherwise
279
+ """
280
+ lib_name_lower = library_name.lower()
281
+ relative_path = file_path.as_posix().lower()
282
+
283
+ # First, apply universal exclusion rules (applied to all libraries)
284
+ if self.apply_universal_rules and self._matches_universal_exclude_pattern(
285
+ relative_path
286
+ ):
287
+ logger.debug(
288
+ f"Excluding {file_path} from {library_name} (matches universal exclusion pattern)"
289
+ )
290
+ return False
291
+
292
+ # If no specific rule exists for this library, extract everything
293
+ if lib_name_lower not in self.rules:
294
+ logger.debug(f"No specific rules for {library_name}, including {file_path}")
295
+ return True
296
+
297
+ rule = self.rules[lib_name_lower]
298
+
299
+ logger.debug(
300
+ f"Checking {file_path} for {library_name} with {len(rule.exclude_compiled)} exclude and {len(rule.include_compiled)} include patterns"
301
+ )
302
+
303
+ # Then, apply library-specific exclusion rules - if file matches any exclude pattern, skip it
304
+ for exclude_pattern in rule.exclude_compiled:
305
+ if exclude_pattern.search(relative_path):
306
+ logger.debug(
307
+ f"Excluding {file_path} from {library_name} (matches exclude pattern: {exclude_pattern.pattern})"
308
+ )
309
+ return False
310
+
311
+ # If inclusion patterns are defined, only include files that match at least one
312
+ if rule.include_compiled:
313
+ for include_pattern in rule.include_compiled:
314
+ if include_pattern.search(relative_path):
315
+ logger.debug(
316
+ f"Including {file_path} from {library_name} (matches include pattern: {include_pattern.pattern})"
317
+ )
318
+ return True
319
+ # If we have inclusion rules but the file doesn't match any, exclude it
320
+ logger.debug(
321
+ f"Excluding {file_path} from {library_name} (doesn't match any include patterns)"
322
+ )
323
+ return False
324
+
325
+ # If no inclusion rules are defined, include the file (after exclusion check)
326
+ logger.debug(
327
+ f"Including {file_path} from {library_name} (passed exclusion filters)"
328
+ )
329
+ return True
330
+
331
+ def get_library_names_with_rules(self) -> set[str]:
332
+ """Get the names of libraries that have optimization rules defined.
333
+
334
+ Returns:
335
+ Set of library names with optimization rules
336
+ """
337
+ return set(self.rules.keys())
338
+
339
+
340
+ def normalize_package_name(name: str) -> str:
341
+ """Normalize package name to lowercase with underscores.
342
+
343
+ Args:
344
+ name: Package name to normalize
345
+
346
+ Returns:
347
+ Normalized package name
348
+ """
349
+ return name.lower().replace("-", "_")
350
+
351
+
352
+ def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
353
+ """Check if a dependency should be skipped based on common patterns.
354
+
355
+ Args:
356
+ req_name: Package name
357
+ has_extras: Whether the requirement has extras
358
+
359
+ Returns:
360
+ True if should skip, False otherwise
361
+ """
362
+ req_lower = req_name.lower()
363
+
364
+ # Skip extras
365
+ if has_extras:
366
+ return True
367
+
368
+ # Skip dev/test/docs/lint/example patterns
369
+ if any(keyword in req_lower for keyword in DEV_PATTERNS):
370
+ return True
371
+
372
+ # Skip typing/stubs dependencies
373
+ if any(keyword in req_lower for keyword in TYPING_PATTERNS):
374
+ return True
375
+
376
+ # Skip common dev tools
377
+ return req_lower.replace("-", "_") in DEV_TOOLS
378
+
379
+
380
+ class LibraryCache:
381
+ """Manage local cache for Python packages."""
382
+
383
+ def __init__(self, cache_dir: Path | None = None):
384
+ """Initialize cache manager.
385
+
386
+ Args:
387
+ cache_dir: Cache directory path (default: ~/.pysfi/.cache/pylibpack)
388
+ """
389
+ self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
390
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
391
+ self.metadata_file = self.cache_dir / "metadata.json"
392
+ # In-memory cache for extracted dependencies to avoid repeated IO
393
+ self._dependencies_cache: dict[Path, set[str]] = {}
394
+
395
+ def get_package_path(
396
+ self, package_name: str, version: str | None = None
397
+ ) -> Path | None:
398
+ """Get cached package path if available.
399
+
400
+ Args:
401
+ package_name: Name of the package
402
+ version: Version (optional)
403
+
404
+ Returns:
405
+ Path to cached package or None
406
+ """
407
+ # First try filesystem lookup for wheel files (works even if metadata is missing)
408
+ for whl_file in self.cache_dir.glob("*.whl"):
409
+ parsed_name = self._extract_package_name_from_wheel(whl_file)
410
+ if parsed_name == package_name:
411
+ logger.debug(f"Cache hit (filesystem wheel): {package_name}")
412
+ return whl_file
413
+
414
+ # Try filesystem lookup for sdist files (.tar.gz, .zip)
415
+ for sdist_file in self.cache_dir.glob("*.tar.gz"):
416
+ parsed_name = self._extract_package_name_from_sdist(sdist_file)
417
+ if parsed_name == package_name:
418
+ logger.debug(f"Cache hit (filesystem sdist): {package_name}")
419
+ return sdist_file
420
+ for sdist_file in self.cache_dir.glob("*.zip"):
421
+ parsed_name = self._extract_package_name_from_sdist(sdist_file)
422
+ if parsed_name == package_name:
423
+ logger.debug(f"Cache hit (filesystem sdist): {package_name}")
424
+ return sdist_file
425
+
426
+ # Fallback to metadata lookup
427
+ metadata = self._load_metadata()
428
+ for info in metadata.values():
429
+ if info["name"] == package_name and (
430
+ version is None or info.get("version") == version
431
+ ):
432
+ path = self.cache_dir / info["path"]
433
+ if path.exists():
434
+ logger.debug(f"Cache hit (metadata): {package_name}")
435
+ return path
436
+
437
+ logger.debug(f"Cache miss: {package_name}")
438
+ return None
439
+
440
+ @staticmethod
441
+ def _extract_package_name_from_wheel(wheel_file: Path) -> str | None:
442
+ """Extract package name from wheel file.
443
+
444
+ Args:
445
+ wheel_file: Path to wheel file
446
+
447
+ Returns:
448
+ Package name or None
449
+ """
450
+ try:
451
+ filename = wheel_file.stem # Remove .whl extension
452
+ parts = filename.split("-")
453
+ if parts:
454
+ return normalize_package_name(parts[0])
455
+ except Exception:
456
+ pass
457
+ return None
458
+
459
+ @staticmethod
460
+ def _extract_package_name_from_sdist(sdist_file: Path) -> str | None:
461
+ """Extract package name from source distribution file (.tar.gz or .zip).
462
+
463
+ Args:
464
+ sdist_file: Path to sdist file
465
+
466
+ Returns:
467
+ Package name or None
468
+ """
469
+ try:
470
+ # Handle .tar.gz files (e.g., package_name-1.0.0.tar.gz)
471
+ if (
472
+ sdist_file.suffixes
473
+ and ".tar" in sdist_file.suffixes
474
+ and ".gz" in sdist_file.suffixes
475
+ ):
476
+ # Remove both .tar.gz extensions by removing the last 7 characters (.tar.gz)
477
+ stem_without_ext = (
478
+ sdist_file.stem
479
+ ) # This removes .gz, leaving package-1.0.0.tar
480
+ # Now remove the remaining .tar
481
+ if stem_without_ext.endswith(".tar"):
482
+ stem_without_ext = stem_without_ext[:-4] # Remove .tar
483
+ parts = stem_without_ext.rsplit(
484
+ "-", 1
485
+ ) # Split from right: ["package_name", "1.0.0"]
486
+ if len(parts) >= 1 and parts[0]:
487
+ return normalize_package_name(parts[0])
488
+ # Handle .zip files
489
+ elif sdist_file.suffix == ".zip":
490
+ filename = sdist_file.stem # Remove .zip extension
491
+ parts = filename.rsplit("-", 1)
492
+ if len(parts) >= 1 and parts[0]:
493
+ return normalize_package_name(parts[0])
494
+ except Exception as e:
495
+ logger.debug(f"Failed to extract package name from {sdist_file}: {e}")
496
+ return None
497
+
498
+ def _extract_dependencies_from_wheel(self, wheel_file: Path) -> set[str]:
499
+ """Extract dependencies from wheel METADATA file with caching.
500
+
501
+ Args:
502
+ wheel_file: Path to wheel file
503
+
504
+ Returns:
505
+ Set of package names (normalized)
506
+ """
507
+ # Check cache first
508
+ if wheel_file in self._dependencies_cache:
509
+ return self._dependencies_cache[wheel_file]
510
+
511
+ # Check if it's an sdist file (.tar.gz or .zip)
512
+ if wheel_file.suffix in (".gz", ".zip"):
513
+ dependencies = self._extract_dependencies_from_sdist(wheel_file)
514
+ self._dependencies_cache[wheel_file] = dependencies
515
+ return dependencies
516
+
517
+ try:
518
+ import re
519
+ import zipfile
520
+
521
+ dependencies: set[str] = set()
522
+ with zipfile.ZipFile(wheel_file, "r") as zf:
523
+ metadata_files = [
524
+ name for name in zf.namelist() if name.endswith("METADATA")
525
+ ]
526
+ if not metadata_files:
527
+ self._dependencies_cache[wheel_file] = dependencies
528
+ return dependencies
529
+
530
+ metadata_content = zf.read(metadata_files[0]).decode(
531
+ "utf-8", errors="ignore"
532
+ )
533
+
534
+ # Parse dependencies from METADATA
535
+ for line in metadata_content.splitlines():
536
+ if line.startswith("Requires-Dist:"):
537
+ dep_str = line.split(":", 1)[1].strip()
538
+
539
+ # Skip extras dependencies
540
+ if re.search(
541
+ r'extra\s*==\s*["\']?([^"\';\s]+)["\']?',
542
+ dep_str,
543
+ re.IGNORECASE,
544
+ ):
545
+ logger.debug(f"Skipping extra dependency: {dep_str}")
546
+ continue
547
+
548
+ try:
549
+ from packaging.requirements import Requirement
550
+
551
+ req = Requirement(dep_str)
552
+ if not should_skip_dependency(req.name, bool(req.extras)):
553
+ dep_name = normalize_package_name(req.name)
554
+ dependencies.add(dep_name)
555
+ logger.debug(f"Found core dependency: {dep_name}")
556
+ except Exception:
557
+ pass
558
+
559
+ # Cache the result
560
+ self._dependencies_cache[wheel_file] = dependencies
561
+ return dependencies
562
+ except Exception as e:
563
+ logger.warning(
564
+ f"Failed to extract dependencies from {wheel_file.name}: {e}"
565
+ )
566
+ return set()
567
+
568
+ def _extract_dependencies_from_sdist(self, sdist_file: Path) -> set[str]:
569
+ """Extract dependencies from source distribution file with caching.
570
+
571
+ Args:
572
+ sdist_file: Path to sdist file (.tar.gz or .zip)
573
+
574
+ Returns:
575
+ Set of package names (normalized)
576
+ """
577
+
578
+ dependencies: set[str] = set()
579
+
580
+ try:
581
+ # Handle .tar.gz files
582
+ if sdist_file.suffix == ".gz":
583
+ with tarfile.open(sdist_file, "r:gz") as tf:
584
+ for member in tf.getmembers():
585
+ # Look for PKG-INFO or METADATA file in the root of the package
586
+ if member.name.endswith("PKG-INFO") or member.name.endswith(
587
+ "METADATA"
588
+ ):
589
+ # Only use PKG-INFO/METADATA files in the root directory
590
+ # Count the number of slashes in the path
591
+ path_parts = member.name.split("/")
592
+ if len(path_parts) == 2 or (
593
+ len(path_parts) == 3
594
+ and path_parts[2] in ("PKG-INFO", "METADATA")
595
+ ):
596
+ content = tf.extractfile(member)
597
+ if content:
598
+ metadata_content = content.read().decode(
599
+ "utf-8", errors="ignore"
600
+ )
601
+ dependencies = self._parse_metadata_content(
602
+ metadata_content
603
+ )
604
+ logger.debug(
605
+ f"Extracted dependencies from {member.name} in {sdist_file.name}"
606
+ )
607
+ break
608
+ # Handle .zip files
609
+ elif sdist_file.suffix == ".zip":
610
+ with zipfile.ZipFile(sdist_file, "r") as zf:
611
+ for name in zf.namelist():
612
+ # Look for PKG-INFO or METADATA file in the root of the package
613
+ if name.endswith("PKG-INFO") or name.endswith("METADATA"):
614
+ path_parts = name.split("/")
615
+ if len(path_parts) == 2 or (
616
+ len(path_parts) == 3
617
+ and path_parts[2] in ("PKG-INFO", "METADATA")
618
+ ):
619
+ metadata_content = zf.read(name).decode(
620
+ "utf-8", errors="ignore"
621
+ )
622
+ dependencies = self._parse_metadata_content(
623
+ metadata_content
624
+ )
625
+ logger.debug(
626
+ f"Extracted dependencies from {name} in {sdist_file.name}"
627
+ )
628
+ break
629
+ except Exception as e:
630
+ logger.warning(
631
+ f"Failed to extract dependencies from sdist {sdist_file.name}: {e}"
632
+ )
633
+
634
+ return dependencies
635
+
636
+ @staticmethod
637
+ def _parse_metadata_content(metadata_content: str) -> set[str]:
638
+ """Parse metadata content (PKG-INFO or METADATA) to extract dependencies.
639
+
640
+ Args:
641
+ metadata_content: Content of PKG-INFO or METADATA file
642
+
643
+ Returns:
644
+ Set of package names (normalized)
645
+ """
646
+ import re
647
+
648
+ dependencies: set[str] = set()
649
+ try:
650
+ for line in metadata_content.splitlines():
651
+ # Look for Requires-Dist or Requires field
652
+ if line.startswith("Requires-Dist:") or line.startswith("Requires:"):
653
+ if line.startswith("Requires:"):
654
+ # Requires field contains comma-separated list
655
+ dep_str = line.split(":", 1)[1].strip()
656
+ for req_str in re.split(r",\s*", dep_str):
657
+ req_str = req_str.strip()
658
+ if req_str:
659
+ dependencies.update(
660
+ LibraryCache._parse_single_requirement(req_str)
661
+ )
662
+ else:
663
+ # Requires-Dist field
664
+ dep_str = line.split(":", 1)[1].strip()
665
+ dependencies.update(
666
+ LibraryCache._parse_single_requirement(dep_str)
667
+ )
668
+ except Exception as e:
669
+ logger.debug(f"Failed to parse metadata content: {e}")
670
+
671
+ return dependencies
672
+
673
+ @staticmethod
674
+ def _parse_single_requirement(req_str: str) -> set[str]:
675
+ """Parse a single requirement string and extract package name.
676
+
677
+ Args:
678
+ req_str: Requirement string (e.g., "numpy>=1.20.0", "package[extra]>=1.0")
679
+
680
+ Returns:
681
+ Set containing the normalized package name, or empty set if should skip
682
+ """
683
+ import re
684
+
685
+ try:
686
+ # Skip extras dependencies
687
+ if re.search(
688
+ r'extra\s*==\s*["\']?([^"\';\s]+)["\']?', req_str, re.IGNORECASE
689
+ ):
690
+ logger.debug(f"Skipping extra dependency: {req_str}")
691
+ return set()
692
+
693
+ from packaging.requirements import Requirement
694
+
695
+ req = Requirement(req_str)
696
+ if not should_skip_dependency(req.name, bool(req.extras)):
697
+ dep_name = normalize_package_name(req.name)
698
+ logger.debug(f"Found core dependency: {dep_name}")
699
+ return {dep_name}
700
+ except Exception:
701
+ pass
702
+
703
+ return set()
704
+
705
+ def add_package(
706
+ self, package_name: str, package_path: Path, version: str | None = None
707
+ ) -> None:
708
+ """Add package to cache.
709
+
710
+ Args:
711
+ package_name: Name of the package
712
+ package_path: Path to package files
713
+ version: Package version
714
+ """
715
+ # Normalize package name to ensure consistency
716
+ normalized_name = normalize_package_name(package_name)
717
+
718
+ # Copy package files to cache (flat structure for wheels, nested for dirs)
719
+ if package_path.is_dir():
720
+ dest_dir = self.cache_dir / normalized_name
721
+ if dest_dir.exists():
722
+ shutil.rmtree(dest_dir)
723
+ shutil.copytree(package_path, dest_dir)
724
+ relative_path = normalized_name
725
+ else:
726
+ dest_file = self.cache_dir / package_path.name
727
+ shutil.copy2(package_path, dest_file)
728
+ relative_path = package_path.name
729
+
730
+ # Update metadata using CacheMetadata dataclass
731
+ metadata = self._load_metadata()
732
+ metadata[str(package_path)] = CacheMetadata(
733
+ name=normalized_name,
734
+ version=version,
735
+ path=relative_path,
736
+ timestamp=time.time(),
737
+ ).__dict__
738
+ self._save_metadata(metadata)
739
+
740
+ logger.info(f"Cached package: {normalized_name}")
741
+
742
+ def _load_metadata(self) -> dict[str, Any]:
743
+ """Load cache metadata.
744
+
745
+ Returns:
746
+ Metadata dictionary
747
+ """
748
+ if self.metadata_file.exists():
749
+ try:
750
+ with open(self.metadata_file, encoding="utf-8") as f:
751
+ return json.load(f)
752
+ except Exception as e:
753
+ logger.warning(f"Failed to load cache metadata: {e}")
754
+
755
+ return {}
756
+
757
+ def _save_metadata(self, metadata: dict[str, Any]) -> None:
758
+ """Save cache metadata.
759
+
760
+ Args:
761
+ metadata: Metadata dictionary
762
+ """
763
+ with open(self.metadata_file, "w", encoding="utf-8") as f:
764
+ json.dump(metadata, f, indent=2)
765
+
766
+ def clear_cache(self) -> None:
767
+ """Clear all cached packages."""
768
+ if self.cache_dir.exists():
769
+ shutil.rmtree(self.cache_dir)
770
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
771
+ self._dependencies_cache.clear() # Clear in-memory dependencies cache
772
+ logger.info("Cache cleared")
773
+
774
+
775
+ class LibraryDownloader:
776
+ """Download Python packages from PyPI."""
777
+
778
+ def __init__(
779
+ self,
780
+ cache: LibraryCache,
781
+ python_version: str | None = None,
782
+ mirror: str = "pypi",
783
+ ):
784
+ """Initialize downloader.
785
+
786
+ Args:
787
+ cache: Cache manager
788
+ python_version: Target Python version for platform-specific packages
789
+ mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
790
+ """
791
+ self.cache = cache
792
+ self.python_version = (
793
+ python_version or f"{sys.version_info.major}.{sys.version_info.minor}"
794
+ )
795
+ self.platform_name = (
796
+ platform.system().lower() + "_" + platform.machine().lower()
797
+ )
798
+ self.mirror_url = PYPI_MIRRORS.get(mirror, PYPI_MIRRORS["pypi"])
799
+ self.pip_executable = self._find_pip_executable()
800
+
801
+ @staticmethod
802
+ def _find_pip_executable() -> str | None:
803
+ """Find pip executable in the system."""
804
+ return shutil.which("pip") or shutil.which("pip3")
805
+
806
+ def _download_package(self, dep: Dependency, dest_dir: Path) -> Path | None:
807
+ """Download a single package without dependencies.
808
+
809
+ Args:
810
+ dep: Dependency to download
811
+ dest_dir: Destination directory
812
+
813
+ Returns:
814
+ Path to downloaded package file (wheel or sdist) or None
815
+ """
816
+ if not self.pip_executable:
817
+ logger.error(
818
+ "pip not found. Please install pip: python -m ensurepip --upgrade"
819
+ )
820
+ return None
821
+
822
+ logger.info(f"Downloading: {dep}")
823
+
824
+ with tempfile.TemporaryDirectory() as temp_dir:
825
+ result = subprocess.run(
826
+ [
827
+ self.pip_executable,
828
+ "download",
829
+ "--no-deps",
830
+ "--index-url",
831
+ self.mirror_url,
832
+ "--dest",
833
+ temp_dir,
834
+ str(dep),
835
+ ],
836
+ capture_output=True,
837
+ text=True,
838
+ check=False,
839
+ )
840
+
841
+ if result.returncode != 0:
842
+ logger.warning(f"pip download failed for {dep}: {result.stderr}")
843
+ return None
844
+
845
+ # Prefer wheel files over sdist files
846
+ downloaded_file = None
847
+ for file_path in Path(temp_dir).glob("*.whl"):
848
+ downloaded_file = file_path
849
+ break
850
+
851
+ # If no wheel file, look for sdist files (.tar.gz or .zip)
852
+ if not downloaded_file:
853
+ for file_path in Path(temp_dir).glob("*.tar.gz"):
854
+ downloaded_file = file_path
855
+ break
856
+ for file_path in Path(temp_dir).glob("*.zip"):
857
+ downloaded_file = file_path
858
+ break
859
+
860
+ if downloaded_file:
861
+ self.cache.add_package(dep.name, downloaded_file, dep.version)
862
+ shutil.copy2(downloaded_file, dest_dir / downloaded_file.name)
863
+ logger.info(f"Downloaded: {downloaded_file.name}")
864
+ return dest_dir / downloaded_file.name
865
+
866
+ return None
867
+
868
+ def download_packages(
869
+ self,
870
+ dependencies: list[Dependency],
871
+ dest_dir: Path,
872
+ max_workers: int = 4,
873
+ ) -> DownloadResult:
874
+ """Download multiple packages concurrently.
875
+
876
+ Args:
877
+ dependencies: List of dependencies to download
878
+ dest_dir: Destination directory
879
+ max_workers: Maximum concurrent downloads
880
+
881
+ Returns:
882
+ DownloadResult containing download statistics
883
+ """
884
+ dest_dir.mkdir(parents=True, exist_ok=True)
885
+
886
+ # Use list of tuples for thread-safe result collection
887
+ # Tuple format: (package_name, success_flag)
888
+ results_list: list[tuple[str, bool]] = []
889
+ cached_count = 0
890
+ cached_packages: set[str] = set() # Track cached package names efficiently
891
+
892
+ logger.info(f"Total direct dependencies: {len(dependencies)}")
893
+ logger.info(f"Using mirror: {self.mirror_url}")
894
+
895
+ # Check cache and mark cached packages (single-threaded, safe)
896
+ for dep in dependencies:
897
+ if self.cache.get_package_path(dep.name, dep.version):
898
+ normalized_dep_name = normalize_package_name(dep.name)
899
+ results_list.append((normalized_dep_name, True))
900
+ cached_packages.add(normalized_dep_name)
901
+ cached_count += 1
902
+ logger.info(f"Using cached package: {dep}")
903
+
904
+ # Download remaining packages concurrently
905
+ remaining_deps = [
906
+ dep
907
+ for dep in dependencies
908
+ if normalize_package_name(dep.name) not in cached_packages
909
+ ]
910
+ downloaded_count = 0
911
+
912
+ if remaining_deps:
913
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
914
+ future_to_dep = {
915
+ executor.submit(self._download_package, dep, dest_dir): dep
916
+ for dep in remaining_deps
917
+ }
918
+
919
+ for future in as_completed(future_to_dep):
920
+ dep = future_to_dep[future]
921
+ try:
922
+ wheel_file = future.result()
923
+ normalized_dep_name = normalize_package_name(dep.name)
924
+ results_list.append((
925
+ normalized_dep_name,
926
+ wheel_file is not None,
927
+ ))
928
+ if wheel_file:
929
+ downloaded_count += 1
930
+ except Exception as e:
931
+ normalized_dep_name = normalize_package_name(dep.name)
932
+ logger.error(f"Error processing {normalized_dep_name}: {e}")
933
+ results_list.append((normalized_dep_name, False))
934
+
935
+ # Convert to dictionary for final result
936
+ results = dict(results_list)
937
+ successful = sum(1 for v in results.values() if v)
938
+ logger.info(
939
+ f"Processed {successful}/{len(dependencies)} ({cached_count} cached, {downloaded_count} downloaded)"
940
+ )
941
+
942
+ return DownloadResult(
943
+ results=results,
944
+ total=len(dependencies),
945
+ successful=successful,
946
+ cached=cached_count,
947
+ downloaded=downloaded_count,
948
+ )
949
+
950
+
951
+ class PyLibPack:
952
+ """Main library packer class."""
953
+
954
+ def __init__(
955
+ self,
956
+ cache_dir: Path | None = None,
957
+ python_version: str | None = None,
958
+ mirror: str = "pypi",
959
+ optimize: bool = True,
960
+ optimization_strategy: SelectiveExtractionStrategy | None = None,
961
+ ):
962
+ """Initialize library packer.
963
+
964
+ Args:
965
+ cache_dir: Custom cache directory
966
+ python_version: Target Python version
967
+ mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
968
+ """
969
+ self.cache = LibraryCache(cache_dir)
970
+ self.downloader = LibraryDownloader(self.cache, python_version, mirror)
971
+
972
+ # Set up optimization strategy
973
+ self.optimize = optimize
974
+ self.optimization_strategy = (
975
+ optimization_strategy or SelectiveExtractionStrategy() if optimize else None
976
+ )
977
+
978
+ def pack_project(
979
+ self, project: Project, output_dir: Path, max_workers: int = 4
980
+ ) -> PackResult:
981
+ """Pack dependencies for a single project.
982
+
983
+ Args:
984
+ project: Project information
985
+ output_dir: Output directory
986
+ max_workers: Maximum concurrent downloads
987
+
988
+ Returns:
989
+ PackResult containing packing statistics
990
+ """
991
+ logger.info(f"\n{'=' * 60}")
992
+ logger.info(f"Packing dependencies for project: {project.name}")
993
+ logger.info(f"{'=' * 60}")
994
+
995
+ if not project.dependencies:
996
+ logger.warning(f"No dependencies found for {project.name}")
997
+ return PackResult(
998
+ success=False,
999
+ message="No dependencies found",
1000
+ project=project.name,
1001
+ total=0,
1002
+ successful=0,
1003
+ failed=0,
1004
+ packages_dir=str(output_dir),
1005
+ )
1006
+
1007
+ logger.info(f"Found {len(project.dependencies)} dependencies")
1008
+
1009
+ # Download direct dependencies
1010
+ download_result = self.downloader.download_packages(
1011
+ project.dependencies,
1012
+ self.cache.cache_dir,
1013
+ max_workers=max_workers,
1014
+ )
1015
+
1016
+ # Build package map (including both wheel and sdist files) and collect all required packages recursively
1017
+ package_map: dict[str, Path] = {}
1018
+
1019
+ # Add wheel files to package map
1020
+ for wheel_file in self.cache.cache_dir.glob("*.whl"):
1021
+ pkg_name = self.cache._extract_package_name_from_wheel(wheel_file)
1022
+ if pkg_name and pkg_name not in package_map: # Prefer wheel files
1023
+ normalized_pkg_name = normalize_package_name(pkg_name)
1024
+ package_map[normalized_pkg_name] = wheel_file
1025
+
1026
+ # Add sdist files to package map (only if not already present)
1027
+ for sdist_file in self.cache.cache_dir.glob("*.tar.gz"):
1028
+ pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
1029
+ if pkg_name and normalize_package_name(pkg_name) not in package_map:
1030
+ normalized_pkg_name = normalize_package_name(pkg_name)
1031
+ package_map[normalized_pkg_name] = sdist_file
1032
+
1033
+ for sdist_file in self.cache.cache_dir.glob("*.zip"):
1034
+ pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
1035
+ if pkg_name and normalize_package_name(pkg_name) not in package_map:
1036
+ normalized_pkg_name = normalize_package_name(pkg_name)
1037
+ package_map[normalized_pkg_name] = sdist_file
1038
+
1039
+ # Recursively collect all dependencies (pass cache instance for dependency extraction)
1040
+ all_packages = self._collect_all_dependencies(
1041
+ package_map, list(download_result.results), self.cache
1042
+ )
1043
+
1044
+ # Extract all required packages (keep order of dependency resolution)
1045
+ extracted_packages = []
1046
+ for pkg_name in all_packages:
1047
+ if pkg_name in package_map:
1048
+ # Skip if output directory already exists
1049
+ output_pkg_dir = output_dir / pkg_name
1050
+ if output_pkg_dir.exists():
1051
+ logger.warning(f"Output directory already exists: {output_pkg_dir}")
1052
+ continue
1053
+
1054
+ package_file = package_map[pkg_name]
1055
+ logger.info(f"Extracting {package_file.name}...")
1056
+ self._extract_package(package_file, output_dir, pkg_name)
1057
+ extracted_packages.append(pkg_name)
1058
+ logger.info(f"Extracted {pkg_name}")
1059
+
1060
+ logger.info(
1061
+ f"Pack complete for {project.name}: {download_result.successful}/{download_result.total}"
1062
+ )
1063
+
1064
+ return PackResult(
1065
+ success=download_result.successful > 0,
1066
+ project=project.name,
1067
+ total=download_result.total,
1068
+ successful=download_result.successful,
1069
+ failed=download_result.total - download_result.successful,
1070
+ packages_dir=str(output_dir),
1071
+ extracted_packages=extracted_packages,
1072
+ )
1073
+
1074
+ @staticmethod
1075
+ def _collect_all_dependencies(
1076
+ package_map: dict[str, Path], root_packages: list[str], cache: LibraryCache
1077
+ ) -> set[str]:
1078
+ """Recursively collect all dependencies from package files (wheel or sdist).
1079
+
1080
+ Args:
1081
+ package_map: Mapping of package names to package files (wheel or sdist)
1082
+ root_packages: List of root package names to start from
1083
+ cache: LibraryCache instance for extracting dependencies
1084
+
1085
+ Returns:
1086
+ List of all required package names
1087
+ """
1088
+ all_packages: set[str] = set()
1089
+ visited: set[str] = set()
1090
+ visit_stack: dict[str, int] = {} # Track visit depth for cycle detection
1091
+
1092
+ def visit(pkg_name: str, level: int = 0) -> None:
1093
+ """Visit a package and collect its dependencies."""
1094
+ # Normalize package name for consistency
1095
+ normalized_pkg_name = normalize_package_name(pkg_name)
1096
+
1097
+ # Check for cycles
1098
+ if normalized_pkg_name in visit_stack:
1099
+ logger.warning(
1100
+ f"Potential circular dependency detected: {normalized_pkg_name} (current depth: {level}, "
1101
+ f"previous depth: {visit_stack[normalized_pkg_name]})"
1102
+ )
1103
+ return
1104
+
1105
+ # Check depth limit
1106
+ if level > MAX_DEPTH:
1107
+ logger.warning(
1108
+ f"Maximum dependency depth ({MAX_DEPTH}) reached for {normalized_pkg_name}, stopping recursion"
1109
+ )
1110
+ return
1111
+
1112
+ if normalized_pkg_name in visited:
1113
+ return
1114
+
1115
+ # Mark as visited and track depth
1116
+ visited.add(normalized_pkg_name)
1117
+ visit_stack[normalized_pkg_name] = level
1118
+ all_packages.add(normalized_pkg_name)
1119
+
1120
+ if normalized_pkg_name in package_map:
1121
+ deps = cache._extract_dependencies_from_wheel(
1122
+ package_map[normalized_pkg_name]
1123
+ )
1124
+ logger.debug(f"{' ' * level}{normalized_pkg_name} -> {deps}")
1125
+ for dep in deps:
1126
+ visit(dep, level + 1)
1127
+
1128
+ # Remove from stack when done
1129
+ visit_stack.pop(normalized_pkg_name, None)
1130
+
1131
+ for pkg_name in root_packages:
1132
+ visit(pkg_name)
1133
+
1134
+ logger.info(
1135
+ f"Collected {len(all_packages)} packages (including recursive dependencies)"
1136
+ )
1137
+ logger.info(f"Packages: {all_packages}")
1138
+ return all_packages
1139
+
1140
+ def _build_and_cache_wheel(self, sdist_file: Path, package_name: str) -> None:
1141
+ """Build wheel from sdist file and cache it for faster future access.
1142
+
1143
+ Args:
1144
+ sdist_file: Path to sdist file (.tar.gz or .zip)
1145
+ package_name: Name of the package
1146
+ """
1147
+ with tempfile.TemporaryDirectory() as temp_wheel_dir:
1148
+ # Use pip wheel to build wheel from sdist
1149
+ result = subprocess.run(
1150
+ [
1151
+ self.downloader.pip_executable or "pip",
1152
+ "wheel",
1153
+ "--no-deps",
1154
+ "--wheel-dir",
1155
+ temp_wheel_dir,
1156
+ "--no-cache-dir",
1157
+ str(sdist_file),
1158
+ ],
1159
+ capture_output=True,
1160
+ text=True,
1161
+ check=False,
1162
+ )
1163
+
1164
+ if result.returncode != 0:
1165
+ logger.warning(
1166
+ f"Failed to build wheel from sdist for {package_name}: {result.stderr}"
1167
+ )
1168
+ return
1169
+
1170
+ # Find the built wheel file
1171
+ wheel_files = list(Path(temp_wheel_dir).glob("*.whl"))
1172
+ if wheel_files:
1173
+ wheel_file = wheel_files[0]
1174
+ # Copy wheel to cache directory
1175
+ cache_wheel_path = self.cache.cache_dir / wheel_file.name
1176
+ shutil.copy2(wheel_file, cache_wheel_path)
1177
+
1178
+ # Update cache metadata
1179
+ self.cache.add_package(package_name, wheel_file)
1180
+
1181
+ logger.info(
1182
+ f"Built and cached wheel: {wheel_file.name} for {package_name}"
1183
+ )
1184
+ else:
1185
+ logger.warning(f"No wheel file was built from sdist for {package_name}")
1186
+
1187
+ def _extract_package(
1188
+ self, package_file: Path, dest_dir: Path, package_name: str
1189
+ ) -> None:
1190
+ """Extract package file (wheel or sdist) to destination directory with optional optimization.
1191
+
1192
+ Args:
1193
+ package_file: Path to package file (wheel or sdist)
1194
+ dest_dir: Destination directory
1195
+ package_name: Name of the package being extracted
1196
+ """
1197
+ logger.info(
1198
+ f"Extracting {package_file.name} for package {package_name} to {dest_dir}"
1199
+ )
1200
+
1201
+ # Handle sdist files (.tar.gz or .zip) - install using pip, and build wheel for cache
1202
+ if package_file.suffix == ".gz" or package_file.suffix == ".zip":
1203
+ logger.info(f"Installing sdist file for {package_name} using pip...")
1204
+
1205
+ # Use pip install --target to install sdist to temporary directory
1206
+ with tempfile.TemporaryDirectory() as temp_install_dir:
1207
+ result = subprocess.run(
1208
+ [
1209
+ self.downloader.pip_executable or "pip",
1210
+ "install",
1211
+ "--target",
1212
+ temp_install_dir,
1213
+ "--no-deps", # Don't install dependencies (we handle them separately)
1214
+ "--no-cache-dir",
1215
+ str(package_file),
1216
+ ],
1217
+ capture_output=True,
1218
+ text=True,
1219
+ check=False,
1220
+ )
1221
+
1222
+ if result.returncode != 0:
1223
+ logger.error(
1224
+ f"Failed to install sdist {package_file.name}: {result.stderr}"
1225
+ )
1226
+ return
1227
+
1228
+ # Copy installed files to dest_dir, skipping *.dist-info directories
1229
+ temp_install_path = Path(temp_install_dir)
1230
+ for item in temp_install_path.iterdir():
1231
+ # Skip dist-info directories
1232
+ if item.name.endswith(".dist-info"):
1233
+ logger.debug(f"Skipping dist-info directory: {item.name}")
1234
+ continue
1235
+ dest_path = dest_dir / item.name
1236
+ if item.is_dir():
1237
+ if dest_path.exists():
1238
+ shutil.rmtree(dest_path)
1239
+ shutil.copytree(item, dest_path)
1240
+ else:
1241
+ shutil.copy2(item, dest_path)
1242
+
1243
+ logger.info(
1244
+ f"Installed sdist file for {package_name} to site-packages structure"
1245
+ )
1246
+
1247
+ # Build wheel from sdist and cache it for faster future access
1248
+ logger.info(f"Building wheel from sdist for {package_name}...")
1249
+ self._build_and_cache_wheel(package_file, package_name)
1250
+ return
1251
+
1252
+ # Handle wheel files with optional optimization
1253
+ with zipfile.ZipFile(package_file, "r") as zf:
1254
+ if self.optimize and self.optimization_strategy:
1255
+ # Apply optimization strategy - selectively extract files
1256
+ extracted_count = 0
1257
+ skipped_count = 0
1258
+
1259
+ for file_info in zf.filelist:
1260
+ file_path = Path(file_info.filename)
1261
+ # Skip dist-info directories
1262
+ if file_path.name.endswith(".dist-info") or any(
1263
+ parent.endswith(".dist-info") for parent in file_path.parts
1264
+ ):
1265
+ logger.debug(f"Skipping dist-info: {file_info.filename}")
1266
+ skipped_count += 1
1267
+ continue
1268
+ if self.optimization_strategy.should_extract_file(
1269
+ package_name, file_path
1270
+ ):
1271
+ zf.extract(file_info, dest_dir)
1272
+ extracted_count += 1
1273
+ logger.debug(f"Extracted {file_path} from {package_name}")
1274
+ else:
1275
+ skipped_count += 1
1276
+ logger.debug(
1277
+ f"Skipped {file_path} from {package_name} (filtered by optimization strategy)"
1278
+ )
1279
+
1280
+ logger.info(
1281
+ f"Extraction complete for {package_name}: {extracted_count} extracted, {skipped_count} skipped"
1282
+ )
1283
+ else:
1284
+ # Extract all files without optimization, but skip dist-info directories
1285
+ for file_info in zf.filelist:
1286
+ file_path = Path(file_info.filename)
1287
+ # Skip dist-info directories
1288
+ if file_path.name.endswith(".dist-info") or any(
1289
+ parent.endswith(".dist-info") for parent in file_path.parts
1290
+ ):
1291
+ logger.debug(f"Skipping dist-info: {file_info.filename}")
1292
+ continue
1293
+ zf.extract(file_info, dest_dir)
1294
+ logger.info(
1295
+ f"All files extracted for {package_name} (no optimization applied, dist-info skipped)"
1296
+ )
1297
+
1298
+ def pack(
1299
+ self,
1300
+ working_dir: Path,
1301
+ max_workers: int = 4,
1302
+ ) -> BatchPackResult:
1303
+ """Pack project dependencies from base directory.
1304
+
1305
+ Args:
1306
+ base_dir: Base directory containing projects or a single project
1307
+ output_dir: Output directory (default: base_dir/dist/site-packages)
1308
+ max_workers: Maximum concurrent downloads
1309
+
1310
+ Returns:
1311
+ BatchPackResult containing batch packing statistics
1312
+ """
1313
+ output_dir = working_dir / "dist" / "site-packages"
1314
+ logger.info(f"Starting dependency pack for: {working_dir}")
1315
+
1316
+ projects = Solution.from_directory(root_dir=working_dir).projects
1317
+ if not projects:
1318
+ logger.error("Failed to load project information")
1319
+ return BatchPackResult(
1320
+ success=False,
1321
+ total=0,
1322
+ successful=0,
1323
+ failed=0,
1324
+ output_dir=str(output_dir),
1325
+ total_time=0.0,
1326
+ )
1327
+
1328
+ logger.info(f"Found {len(projects)} project(s) to process")
1329
+
1330
+ # Process each project
1331
+ total_start = time.perf_counter()
1332
+ success_count = 0
1333
+ failed_projects: list[str] = []
1334
+ use_current_dir = len(projects) == 1
1335
+
1336
+ for project in projects.values():
1337
+ project_dir = working_dir if use_current_dir else working_dir / project.name
1338
+
1339
+ if not project_dir.is_dir():
1340
+ logger.warning(f"Project directory not found: {project_dir}, skipping")
1341
+ failed_projects.append(project.name)
1342
+ continue
1343
+
1344
+ result = self.pack_project(project, output_dir, max_workers)
1345
+
1346
+ if result.success:
1347
+ success_count += 1
1348
+ else:
1349
+ failed_projects.append(project.name)
1350
+
1351
+ total_time = time.perf_counter() - total_start
1352
+
1353
+ # Summary
1354
+ logger.info(f"\n{'=' * 60}")
1355
+ logger.info("Summary")
1356
+ logger.info(f"{'=' * 60}")
1357
+ logger.info(f"Total projects: {len(projects)}")
1358
+ logger.info(f"Successfully packed: {success_count}")
1359
+ logger.info(f"Failed: {len(failed_projects)}")
1360
+ if failed_projects:
1361
+ logger.info(f"Failed projects: {', '.join(failed_projects)}")
1362
+ logger.info(f"Total time: {total_time:.2f}s")
1363
+
1364
+ return BatchPackResult(
1365
+ success=len(failed_projects) == 0,
1366
+ total=len(projects),
1367
+ successful=success_count,
1368
+ failed=len(failed_projects),
1369
+ failed_projects=failed_projects,
1370
+ output_dir=str(output_dir),
1371
+ total_time=total_time,
1372
+ )
1373
+
1374
+ def clear_cache(self) -> None:
1375
+ """Clear the package cache."""
1376
+ self.cache.clear_cache()
1377
+
1378
+
1379
+ def parse_args() -> argparse.Namespace:
1380
+ """Parse command-line arguments."""
1381
+ parser = argparse.ArgumentParser(
1382
+ prog="pylibpack",
1383
+ description="Python library packer with caching support",
1384
+ )
1385
+
1386
+ parser.add_argument(
1387
+ "directory",
1388
+ type=str,
1389
+ nargs="?",
1390
+ default=str(Path.cwd()),
1391
+ help="Base directory containing projects",
1392
+ )
1393
+ parser.add_argument(
1394
+ "--cache-dir", type=str, default=None, help="Custom cache directory"
1395
+ )
1396
+ parser.add_argument(
1397
+ "--python-version", type=str, default=None, help="Target Python version"
1398
+ )
1399
+ parser.add_argument(
1400
+ "-j", "--jobs", type=int, default=4, help="Maximum concurrent downloads"
1401
+ )
1402
+ parser.add_argument(
1403
+ "--mirror",
1404
+ type=str,
1405
+ default="aliyun",
1406
+ choices=("pypi", "tsinghua", "aliyun", "ustc", "douban", "tencent"),
1407
+ help="PyPI mirror source for faster downloads in China",
1408
+ )
1409
+ parser.add_argument("--debug", "-d", action="store_true", help="Debug mode")
1410
+ parser.add_argument(
1411
+ "--no-optimize",
1412
+ "-no",
1413
+ action="store_true",
1414
+ help="Disable package optimization (extract all files)",
1415
+ )
1416
+ parser.add_argument(
1417
+ "--list-optimizations",
1418
+ "-lo",
1419
+ action="store_true",
1420
+ help="List all available optimization rules",
1421
+ )
1422
+ return parser.parse_args()
1423
+
1424
+
1425
+ def main() -> None:
1426
+ """Main entry point for pylibpack tool."""
1427
+ args = parse_args()
1428
+
1429
+ if args.list_optimizations:
1430
+ strategy = SelectiveExtractionStrategy()
1431
+ logging.info("Available optimization rules:")
1432
+ for lib_name in sorted(strategy.get_library_names_with_rules()):
1433
+ logging.info(f" - {lib_name}")
1434
+ return
1435
+
1436
+ # Setup logging
1437
+ if args.debug:
1438
+ logger.setLevel(logging.DEBUG)
1439
+
1440
+ # Initialize packer
1441
+ cache_dir = Path(args.cache_dir) if args.cache_dir else None
1442
+ optimize = not args.no_optimize
1443
+ packer = PyLibPack(
1444
+ cache_dir=cache_dir,
1445
+ python_version=args.python_version,
1446
+ mirror=args.mirror,
1447
+ optimize=optimize,
1448
+ )
1449
+
1450
+ packer.pack(
1451
+ working_dir=Path(args.directory),
1452
+ max_workers=args.jobs,
1453
+ )
1454
+
1455
+
1456
+ if __name__ == "__main__":
1457
+ main()