pysfi 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {pysfi-0.1.10.dist-info → pysfi-0.1.12.dist-info}/METADATA +9 -7
  2. pysfi-0.1.12.dist-info/RECORD +62 -0
  3. {pysfi-0.1.10.dist-info → pysfi-0.1.12.dist-info}/entry_points.txt +13 -2
  4. sfi/__init__.py +1 -1
  5. sfi/alarmclock/alarmclock.py +40 -40
  6. sfi/bumpversion/__init__.py +1 -1
  7. sfi/cleanbuild/cleanbuild.py +155 -0
  8. sfi/condasetup/condasetup.py +116 -0
  9. sfi/docdiff/docdiff.py +238 -0
  10. sfi/docscan/__init__.py +1 -1
  11. sfi/docscan/docscan_gui.py +1 -1
  12. sfi/docscan/lang/eng.py +152 -152
  13. sfi/docscan/lang/zhcn.py +170 -170
  14. sfi/filedate/filedate.py +185 -112
  15. sfi/gittool/__init__.py +2 -0
  16. sfi/gittool/gittool.py +401 -0
  17. sfi/llmclient/llmclient.py +592 -0
  18. sfi/llmquantize/llmquantize.py +480 -0
  19. sfi/llmserver/llmserver.py +335 -0
  20. sfi/makepython/makepython.py +2 -2
  21. sfi/pdfsplit/pdfsplit.py +4 -4
  22. sfi/pyarchive/pyarchive.py +418 -0
  23. sfi/pyembedinstall/__init__.py +0 -0
  24. sfi/pyembedinstall/pyembedinstall.py +629 -0
  25. sfi/pylibpack/pylibpack.py +813 -269
  26. sfi/pylibpack/rules/numpy.json +22 -0
  27. sfi/pylibpack/rules/pymupdf.json +10 -0
  28. sfi/pylibpack/rules/pyqt5.json +19 -0
  29. sfi/pylibpack/rules/pyside2.json +23 -0
  30. sfi/pylibpack/rules/scipy.json +23 -0
  31. sfi/pylibpack/rules/shiboken2.json +24 -0
  32. sfi/pyloadergen/pyloadergen.py +271 -572
  33. sfi/pypack/pypack.py +822 -471
  34. sfi/pyprojectparse/__init__.py +0 -0
  35. sfi/pyprojectparse/pyprojectparse.py +500 -0
  36. sfi/pysourcepack/pysourcepack.py +308 -369
  37. sfi/quizbase/__init__.py +0 -0
  38. sfi/quizbase/quizbase.py +828 -0
  39. sfi/quizbase/quizbase_gui.py +987 -0
  40. sfi/regexvalidate/__init__.py +0 -0
  41. sfi/regexvalidate/regex_help.html +284 -0
  42. sfi/regexvalidate/regexvalidate.py +468 -0
  43. sfi/taskkill/taskkill.py +0 -2
  44. pysfi-0.1.10.dist-info/RECORD +0 -39
  45. sfi/embedinstall/embedinstall.py +0 -478
  46. sfi/projectparse/projectparse.py +0 -152
  47. {pysfi-0.1.10.dist-info → pysfi-0.1.12.dist-info}/WHEEL +0 -0
  48. /sfi/{embedinstall → llmclient}/__init__.py +0 -0
  49. /sfi/{projectparse → llmquantize}/__init__.py +0 -0
@@ -1,7 +1,7 @@
1
1
  """Python Library Packager - Download and pack Python dependencies with caching support.
2
2
 
3
3
  This module provides functionality to:
4
- 1. Read project information from projects.json or run projectparse if needed
4
+ 1. Read project information from projects.json or run pyprojectparse if needed
5
5
  2. Download dependencies to local .cache directory
6
6
  3. Pack dependencies into a distributable format
7
7
  4. Support batch processing multiple projects recursively
@@ -13,17 +13,24 @@ import argparse
13
13
  import json
14
14
  import logging
15
15
  import platform
16
+ import re
16
17
  import shutil
17
18
  import subprocess
18
19
  import sys
20
+ import tarfile
19
21
  import tempfile
20
22
  import time
23
+ import zipfile
21
24
  from concurrent.futures import ThreadPoolExecutor, as_completed
22
25
  from dataclasses import dataclass, field
23
26
  from pathlib import Path
24
- from typing import Any
27
+ from typing import Any, Pattern
25
28
 
26
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
29
+ from sfi.pyprojectparse.pyprojectparse import Project, Solution
30
+
31
+ logging.basicConfig(
32
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
33
+ )
27
34
  logger = logging.getLogger(__name__)
28
35
 
29
36
  __version__ = "1.0.0"
@@ -31,6 +38,8 @@ __build__ = "20260120"
31
38
 
32
39
  DEFAULT_CACHE_DIR = Path.home() / ".pysfi" / ".cache" / "python-libs"
33
40
 
41
+ MAX_DEPTH = 50 # Maximum recursion depth to prevent infinite loops
42
+
34
43
  PYPI_MIRRORS = {
35
44
  "pypi": "https://pypi.org/simple",
36
45
  "tsinghua": "https://pypi.tuna.tsinghua.edu.cn/simple",
@@ -109,15 +118,6 @@ class CacheMetadata:
109
118
  timestamp: float
110
119
 
111
120
 
112
- @dataclass
113
- class ProjectInfo:
114
- """Project information for packing dependencies."""
115
-
116
- name: str
117
- dir: Path
118
- info: dict
119
-
120
-
121
121
  DEV_TOOLS = frozenset({
122
122
  "sphinx",
123
123
  "sphinx_rtd_theme",
@@ -139,164 +139,242 @@ DEV_PATTERNS = frozenset({"dev", "test", "docs", "lint", "example"})
139
139
  TYPING_PATTERNS = frozenset({"stubs", "typing", "types"})
140
140
 
141
141
 
142
- def normalize_package_name(name: str) -> str:
143
- """Normalize package name to lowercase with underscores.
142
+ @dataclass
143
+ class OptimizationRule:
144
+ """Defines an optimization rule for a specific library.
144
145
 
145
- Args:
146
- name: Package name to normalize
146
+ Attributes:
147
+ library_name: The name of the library to apply the rule to.
148
+ exclude_patterns: A list of patterns to exclude from the library.
149
+ include_patterns: A list of patterns to include in the library.
147
150
 
148
- Returns:
149
- Normalized package name
150
151
  """
151
- return name.lower().replace("-", "_")
152
-
153
152
 
154
- def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
155
- """Check if a dependency should be skipped based on common patterns.
156
-
157
- Args:
158
- req_name: Package name
159
- has_extras: Whether the requirement has extras
153
+ library_name: str = field(default_factory=str)
154
+ exclude_patterns: list[str] = field(default_factory=list)
155
+ include_patterns: list[str] = field(default_factory=list)
160
156
 
161
- Returns:
162
- True if should skip, False otherwise
157
+ def __post_init__(self):
158
+ """Compile regex patterns after initialization."""
159
+ self.exclude_compiled: list[Pattern] = [
160
+ re.compile(p) for p in self.exclude_patterns
161
+ ]
162
+ self.include_compiled: list[Pattern] = [
163
+ re.compile(p) for p in self.include_patterns
164
+ ]
165
+
166
+
167
+ class SelectiveExtractionStrategy:
168
+ """Optimization strategy that applies inclusion/exclusion rules to specific libraries.
169
+
170
+ This strategy works as follows:
171
+ 1. First, apply universal exclusion rules (doc, test, example, demo, etc.)
172
+ 2. Then, apply library-specific exclusion rules
173
+ 3. Finally, apply inclusion rules (only files matching include patterns are kept)
163
174
  """
164
- req_lower = req_name.lower()
165
175
 
166
- # Skip extras
167
- if has_extras:
168
- return True
176
+ # Universal exclusion patterns - applied to all libraries
177
+ UNIVERSAL_EXCLUDE_PATTERNS = frozenset({
178
+ "doc",
179
+ "docs",
180
+ "test",
181
+ "tests",
182
+ "example",
183
+ "examples",
184
+ "demo",
185
+ "demos",
186
+ "sample",
187
+ "samples",
188
+ "benchmark",
189
+ "benchmarks",
190
+ "tutorial",
191
+ "tutorials",
192
+ "notebook",
193
+ "notebooks",
194
+ "license",
195
+ "licenses",
196
+ })
169
197
 
170
- # Skip dev/test/docs/lint/example patterns
171
- if any(keyword in req_lower for keyword in DEV_PATTERNS):
172
- return True
198
+ def __init__(
199
+ self,
200
+ rules: list[OptimizationRule] | None = None,
201
+ apply_universal_rules: bool = True,
202
+ ):
203
+ """Initialize the strategy with optimization rules.
173
204
 
174
- # Skip typing/stubs dependencies
175
- if any(keyword in req_lower for keyword in TYPING_PATTERNS):
176
- return True
205
+ Args:
206
+ rules: List of optimization rules to apply
207
+ apply_universal_rules: Whether to apply universal exclusion rules (default: True)
208
+ """
209
+ self.rules: dict[str, OptimizationRule] = {}
210
+ self.apply_universal_rules = apply_universal_rules
211
+
212
+ if rules:
213
+ for rule in rules:
214
+ self.rules[rule.library_name.lower()] = rule
215
+
216
+ # Default rules for common libraries
217
+ if not rules:
218
+ self._setup_default_rules()
219
+
220
+ # Compile universal exclusion patterns for faster matching
221
+ self._universal_exclude_compiled = [
222
+ re.compile(f"(^|/)({pattern})(/|$)", re.IGNORECASE)
223
+ for pattern in self.UNIVERSAL_EXCLUDE_PATTERNS
224
+ ]
225
+
226
+ def _setup_default_rules(self):
227
+ """Setup default optimization rules for common libraries."""
228
+ # Get the rules directory
229
+ rules_dir = Path(__file__).parent / "rules"
230
+
231
+ if not rules_dir.exists() or not rules_dir.is_dir():
232
+ logger.warning(f"Rules directory not found: {rules_dir}")
233
+ return
234
+
235
+ # Load all JSON rule files
236
+ for rule_file in rules_dir.glob("*.json"):
237
+ try:
238
+ with open(rule_file, encoding="utf-8") as f:
239
+ rule_data = json.load(f)
177
240
 
178
- # Skip common dev tools
179
- return req_lower.replace("-", "_") in DEV_TOOLS
241
+ # Convert JSON data to OptimizationRule
242
+ rule = OptimizationRule(
243
+ library_name=rule_data["library_name"],
244
+ exclude_patterns=rule_data["exclude_patterns"],
245
+ include_patterns=rule_data["include_patterns"],
246
+ )
180
247
 
248
+ self.rules[rule.library_name.lower()] = rule
249
+ logger.debug(
250
+ f"Loaded optimization rule for {rule.library_name} from {rule_file.name}"
251
+ )
181
252
 
182
- class ProjectParser:
183
- """Parse project information from projects.json or run projectparse."""
253
+ except Exception as e:
254
+ logger.warning(f"Failed to load rule from {rule_file.name}: {e}")
184
255
 
185
- @staticmethod
186
- def ensure_projects_json(directory: Path) -> Path | None:
187
- """Ensure projects.json exists by running projectparse if needed.
256
+ def _matches_universal_exclude_pattern(self, relative_path: str) -> bool:
257
+ """Check if file path matches any universal exclusion pattern.
188
258
 
189
259
  Args:
190
- directory: Directory to check for projects.json
260
+ relative_path: Relative path to the file
191
261
 
192
262
  Returns:
193
- Path to projects.json if successful, None otherwise
263
+ True if path should be excluded, False otherwise
194
264
  """
195
- projects_json = directory / "projects.json"
196
- if projects_json.exists():
197
- logger.debug(f"Found existing projects.json at {projects_json}")
198
- return projects_json
265
+ return any(
266
+ pattern.search(relative_path)
267
+ for pattern in self._universal_exclude_compiled
268
+ )
199
269
 
200
- logger.info("projects.json not found, running projectparse...")
201
- try:
202
- from sfi.projectparse import projectparse as pp
270
+ def should_extract_file(self, library_name: str, file_path: Path) -> bool:
271
+ """Determine if a file should be extracted based on library-specific rules.
203
272
 
204
- # Save and restore original argv
205
- original_argv = sys.argv.copy()
206
- sys.argv = ["projectparse", "--directory", str(directory), "--output", "projects.json", "--recursive"]
207
- try:
208
- pp.main()
209
- finally:
210
- sys.argv = original_argv
273
+ Args:
274
+ library_name: Name of the library
275
+ file_path: Path to the file to check
211
276
 
212
- if projects_json.exists():
213
- logger.info("projectparse completed successfully")
214
- return projects_json
215
- else:
216
- logger.error("projectparse failed to generate projects.json")
217
- return None
218
- except ImportError:
219
- # Fallback: run projectparse as script
220
- sfi_dir = Path(__file__).parent.parent.parent
221
- projectparse_script = sfi_dir / "projectparse" / "projectparse.py"
277
+ Returns:
278
+ True if the file should be extracted, False otherwise
279
+ """
280
+ lib_name_lower = library_name.lower()
281
+ relative_path = file_path.as_posix().lower()
282
+
283
+ # First, apply universal exclusion rules (applied to all libraries)
284
+ if self.apply_universal_rules and self._matches_universal_exclude_pattern(
285
+ relative_path
286
+ ):
287
+ logger.debug(
288
+ f"Excluding {file_path} from {library_name} (matches universal exclusion pattern)"
289
+ )
290
+ return False
222
291
 
223
- if not projectparse_script.exists():
224
- logger.error(f"Cannot find projectparse script at {projectparse_script}")
225
- return None
292
+ # If no specific rule exists for this library, extract everything
293
+ if lib_name_lower not in self.rules:
294
+ logger.debug(f"No specific rules for {library_name}, including {file_path}")
295
+ return True
226
296
 
227
- result = subprocess.run(
228
- [
229
- sys.executable,
230
- str(projectparse_script),
231
- "--directory",
232
- str(directory),
233
- "--output",
234
- "projects.json",
235
- "--recursive",
236
- ],
237
- capture_output=True,
238
- text=True,
239
- cwd=directory,
240
- )
297
+ rule = self.rules[lib_name_lower]
241
298
 
242
- if result.returncode == 0 and projects_json.exists():
243
- logger.info("projectparse completed successfully")
244
- return projects_json
299
+ logger.debug(
300
+ f"Checking {file_path} for {library_name} with {len(rule.exclude_compiled)} exclude and {len(rule.include_compiled)} include patterns"
301
+ )
245
302
 
246
- logger.error(f"projectparse failed: {result.stderr}")
247
- return None
248
- except Exception as e:
249
- logger.error(f"Failed to run projectparse: {e}")
250
- return None
303
+ # Then, apply library-specific exclusion rules - if file matches any exclude pattern, skip it
304
+ for exclude_pattern in rule.exclude_compiled:
305
+ if exclude_pattern.search(relative_path):
306
+ logger.debug(
307
+ f"Excluding {file_path} from {library_name} (matches exclude pattern: {exclude_pattern.pattern})"
308
+ )
309
+ return False
310
+
311
+ # If inclusion patterns are defined, only include files that match at least one
312
+ if rule.include_compiled:
313
+ for include_pattern in rule.include_compiled:
314
+ if include_pattern.search(relative_path):
315
+ logger.debug(
316
+ f"Including {file_path} from {library_name} (matches include pattern: {include_pattern.pattern})"
317
+ )
318
+ return True
319
+ # If we have inclusion rules but the file doesn't match any, exclude it
320
+ logger.debug(
321
+ f"Excluding {file_path} from {library_name} (doesn't match any include patterns)"
322
+ )
323
+ return False
251
324
 
252
- @staticmethod
253
- def load_projects_json(projects_json: Path) -> dict | None:
254
- """Load project information from projects.json.
325
+ # If no inclusion rules are defined, include the file (after exclusion check)
326
+ logger.debug(
327
+ f"Including {file_path} from {library_name} (passed exclusion filters)"
328
+ )
329
+ return True
255
330
 
256
- Args:
257
- projects_json: Path to projects.json file
331
+ def get_library_names_with_rules(self) -> set[str]:
332
+ """Get the names of libraries that have optimization rules defined.
258
333
 
259
334
  Returns:
260
- Dictionary of project information, None if failed
335
+ Set of library names with optimization rules
261
336
  """
262
- try:
263
- with open(projects_json, encoding="utf-8") as f:
264
- return json.load(f)
265
- except Exception as e:
266
- logger.error(f"Failed to load projects.json: {e}")
267
- return None
337
+ return set(self.rules.keys())
268
338
 
269
- @staticmethod
270
- def parse_requirements_from_project(project_info: dict) -> list[Dependency]:
271
- """Parse dependencies from project info.
272
339
 
273
- Args:
274
- project_info: Project information dictionary from projects.json
340
+ def normalize_package_name(name: str) -> str:
341
+ """Normalize package name to lowercase with underscores.
275
342
 
276
- Returns:
277
- List of Dependency objects
278
- """
279
- from packaging.requirements import Requirement
343
+ Args:
344
+ name: Package name to normalize
280
345
 
281
- dependencies = []
282
- dep_list = project_info.get("dependencies", [])
346
+ Returns:
347
+ Normalized package name
348
+ """
349
+ return name.lower().replace("-", "_")
283
350
 
284
- for dep_str in dep_list:
285
- try:
286
- req = Requirement(dep_str)
287
351
 
288
- if should_skip_dependency(req.name, bool(req.extras)):
289
- logger.info(f"Skipping: {dep_str}")
290
- continue
352
+ def should_skip_dependency(req_name: str, has_extras: bool = False) -> bool:
353
+ """Check if a dependency should be skipped based on common patterns.
291
354
 
292
- dep = Dependency(name=req.name, version=str(req.specifier) if req.specifier else None)
293
- dependencies.append(dep)
294
- logger.debug(f"Parsed dependency: {dep}")
295
- except Exception as e:
296
- logger.warning(f"Failed to parse requirement '{dep_str}': {e}")
355
+ Args:
356
+ req_name: Package name
357
+ has_extras: Whether the requirement has extras
297
358
 
298
- logger.info(f"Parsed {len(dependencies)} dependencies for project")
299
- return dependencies
359
+ Returns:
360
+ True if should skip, False otherwise
361
+ """
362
+ req_lower = req_name.lower()
363
+
364
+ # Skip extras
365
+ if has_extras:
366
+ return True
367
+
368
+ # Skip dev/test/docs/lint/example patterns
369
+ if any(keyword in req_lower for keyword in DEV_PATTERNS):
370
+ return True
371
+
372
+ # Skip typing/stubs dependencies
373
+ if any(keyword in req_lower for keyword in TYPING_PATTERNS):
374
+ return True
375
+
376
+ # Skip common dev tools
377
+ return req_lower.replace("-", "_") in DEV_TOOLS
300
378
 
301
379
 
302
380
  class LibraryCache:
@@ -311,8 +389,12 @@ class LibraryCache:
311
389
  self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
312
390
  self.cache_dir.mkdir(parents=True, exist_ok=True)
313
391
  self.metadata_file = self.cache_dir / "metadata.json"
392
+ # In-memory cache for extracted dependencies to avoid repeated IO
393
+ self._dependencies_cache: dict[Path, set[str]] = {}
314
394
 
315
- def get_package_path(self, package_name: str, version: str | None = None) -> Path | None:
395
+ def get_package_path(
396
+ self, package_name: str, version: str | None = None
397
+ ) -> Path | None:
316
398
  """Get cached package path if available.
317
399
 
318
400
  Args:
@@ -322,17 +404,31 @@ class LibraryCache:
322
404
  Returns:
323
405
  Path to cached package or None
324
406
  """
325
- # First try filesystem lookup (works even if metadata is missing)
407
+ # First try filesystem lookup for wheel files (works even if metadata is missing)
326
408
  for whl_file in self.cache_dir.glob("*.whl"):
327
409
  parsed_name = self._extract_package_name_from_wheel(whl_file)
328
410
  if parsed_name == package_name:
329
- logger.debug(f"Cache hit (filesystem): {package_name}")
411
+ logger.debug(f"Cache hit (filesystem wheel): {package_name}")
330
412
  return whl_file
331
413
 
414
+ # Try filesystem lookup for sdist files (.tar.gz, .zip)
415
+ for sdist_file in self.cache_dir.glob("*.tar.gz"):
416
+ parsed_name = self._extract_package_name_from_sdist(sdist_file)
417
+ if parsed_name == package_name:
418
+ logger.debug(f"Cache hit (filesystem sdist): {package_name}")
419
+ return sdist_file
420
+ for sdist_file in self.cache_dir.glob("*.zip"):
421
+ parsed_name = self._extract_package_name_from_sdist(sdist_file)
422
+ if parsed_name == package_name:
423
+ logger.debug(f"Cache hit (filesystem sdist): {package_name}")
424
+ return sdist_file
425
+
332
426
  # Fallback to metadata lookup
333
427
  metadata = self._load_metadata()
334
428
  for info in metadata.values():
335
- if info["name"] == package_name and (version is None or info.get("version") == version):
429
+ if info["name"] == package_name and (
430
+ version is None or info.get("version") == version
431
+ ):
336
432
  path = self.cache_dir / info["path"]
337
433
  if path.exists():
338
434
  logger.debug(f"Cache hit (metadata): {package_name}")
@@ -361,8 +457,46 @@ class LibraryCache:
361
457
  return None
362
458
 
363
459
  @staticmethod
364
- def _extract_dependencies_from_wheel(wheel_file: Path) -> set[str]:
365
- """Extract dependencies from wheel METADATA file.
460
+ def _extract_package_name_from_sdist(sdist_file: Path) -> str | None:
461
+ """Extract package name from source distribution file (.tar.gz or .zip).
462
+
463
+ Args:
464
+ sdist_file: Path to sdist file
465
+
466
+ Returns:
467
+ Package name or None
468
+ """
469
+ try:
470
+ # Handle .tar.gz files (e.g., package_name-1.0.0.tar.gz)
471
+ if (
472
+ sdist_file.suffixes
473
+ and ".tar" in sdist_file.suffixes
474
+ and ".gz" in sdist_file.suffixes
475
+ ):
476
+ # Remove both .tar.gz extensions by removing the last 7 characters (.tar.gz)
477
+ stem_without_ext = (
478
+ sdist_file.stem
479
+ ) # This removes .gz, leaving package-1.0.0.tar
480
+ # Now remove the remaining .tar
481
+ if stem_without_ext.endswith(".tar"):
482
+ stem_without_ext = stem_without_ext[:-4] # Remove .tar
483
+ parts = stem_without_ext.rsplit(
484
+ "-", 1
485
+ ) # Split from right: ["package_name", "1.0.0"]
486
+ if len(parts) >= 1 and parts[0]:
487
+ return normalize_package_name(parts[0])
488
+ # Handle .zip files
489
+ elif sdist_file.suffix == ".zip":
490
+ filename = sdist_file.stem # Remove .zip extension
491
+ parts = filename.rsplit("-", 1)
492
+ if len(parts) >= 1 and parts[0]:
493
+ return normalize_package_name(parts[0])
494
+ except Exception as e:
495
+ logger.debug(f"Failed to extract package name from {sdist_file}: {e}")
496
+ return None
497
+
498
+ def _extract_dependencies_from_wheel(self, wheel_file: Path) -> set[str]:
499
+ """Extract dependencies from wheel METADATA file with caching.
366
500
 
367
501
  Args:
368
502
  wheel_file: Path to wheel file
@@ -370,17 +504,32 @@ class LibraryCache:
370
504
  Returns:
371
505
  Set of package names (normalized)
372
506
  """
507
+ # Check cache first
508
+ if wheel_file in self._dependencies_cache:
509
+ return self._dependencies_cache[wheel_file]
510
+
511
+ # Check if it's an sdist file (.tar.gz or .zip)
512
+ if wheel_file.suffix in (".gz", ".zip"):
513
+ dependencies = self._extract_dependencies_from_sdist(wheel_file)
514
+ self._dependencies_cache[wheel_file] = dependencies
515
+ return dependencies
516
+
373
517
  try:
374
518
  import re
375
519
  import zipfile
376
520
 
377
521
  dependencies: set[str] = set()
378
522
  with zipfile.ZipFile(wheel_file, "r") as zf:
379
- metadata_files = [name for name in zf.namelist() if name.endswith("METADATA")]
523
+ metadata_files = [
524
+ name for name in zf.namelist() if name.endswith("METADATA")
525
+ ]
380
526
  if not metadata_files:
527
+ self._dependencies_cache[wheel_file] = dependencies
381
528
  return dependencies
382
529
 
383
- metadata_content = zf.read(metadata_files[0]).decode("utf-8", errors="ignore")
530
+ metadata_content = zf.read(metadata_files[0]).decode(
531
+ "utf-8", errors="ignore"
532
+ )
384
533
 
385
534
  # Parse dependencies from METADATA
386
535
  for line in metadata_content.splitlines():
@@ -388,7 +537,11 @@ class LibraryCache:
388
537
  dep_str = line.split(":", 1)[1].strip()
389
538
 
390
539
  # Skip extras dependencies
391
- if re.search(r'extra\s*==\s*["\']?([^"\';\s]+)["\']?', dep_str, re.IGNORECASE):
540
+ if re.search(
541
+ r'extra\s*==\s*["\']?([^"\';\s]+)["\']?',
542
+ dep_str,
543
+ re.IGNORECASE,
544
+ ):
392
545
  logger.debug(f"Skipping extra dependency: {dep_str}")
393
546
  continue
394
547
 
@@ -403,12 +556,155 @@ class LibraryCache:
403
556
  except Exception:
404
557
  pass
405
558
 
559
+ # Cache the result
560
+ self._dependencies_cache[wheel_file] = dependencies
406
561
  return dependencies
407
562
  except Exception as e:
408
- logger.warning(f"Failed to extract dependencies from {wheel_file.name}: {e}")
563
+ logger.warning(
564
+ f"Failed to extract dependencies from {wheel_file.name}: {e}"
565
+ )
409
566
  return set()
410
567
 
411
- def add_package(self, package_name: str, package_path: Path, version: str | None = None) -> None:
568
+ def _extract_dependencies_from_sdist(self, sdist_file: Path) -> set[str]:
569
+ """Extract dependencies from source distribution file with caching.
570
+
571
+ Args:
572
+ sdist_file: Path to sdist file (.tar.gz or .zip)
573
+
574
+ Returns:
575
+ Set of package names (normalized)
576
+ """
577
+
578
+ dependencies: set[str] = set()
579
+
580
+ try:
581
+ # Handle .tar.gz files
582
+ if sdist_file.suffix == ".gz":
583
+ with tarfile.open(sdist_file, "r:gz") as tf:
584
+ for member in tf.getmembers():
585
+ # Look for PKG-INFO or METADATA file in the root of the package
586
+ if member.name.endswith("PKG-INFO") or member.name.endswith(
587
+ "METADATA"
588
+ ):
589
+ # Only use PKG-INFO/METADATA files in the root directory
590
+ # Count the number of slashes in the path
591
+ path_parts = member.name.split("/")
592
+ if len(path_parts) == 2 or (
593
+ len(path_parts) == 3
594
+ and path_parts[2] in ("PKG-INFO", "METADATA")
595
+ ):
596
+ content = tf.extractfile(member)
597
+ if content:
598
+ metadata_content = content.read().decode(
599
+ "utf-8", errors="ignore"
600
+ )
601
+ dependencies = self._parse_metadata_content(
602
+ metadata_content
603
+ )
604
+ logger.debug(
605
+ f"Extracted dependencies from {member.name} in {sdist_file.name}"
606
+ )
607
+ break
608
+ # Handle .zip files
609
+ elif sdist_file.suffix == ".zip":
610
+ with zipfile.ZipFile(sdist_file, "r") as zf:
611
+ for name in zf.namelist():
612
+ # Look for PKG-INFO or METADATA file in the root of the package
613
+ if name.endswith("PKG-INFO") or name.endswith("METADATA"):
614
+ path_parts = name.split("/")
615
+ if len(path_parts) == 2 or (
616
+ len(path_parts) == 3
617
+ and path_parts[2] in ("PKG-INFO", "METADATA")
618
+ ):
619
+ metadata_content = zf.read(name).decode(
620
+ "utf-8", errors="ignore"
621
+ )
622
+ dependencies = self._parse_metadata_content(
623
+ metadata_content
624
+ )
625
+ logger.debug(
626
+ f"Extracted dependencies from {name} in {sdist_file.name}"
627
+ )
628
+ break
629
+ except Exception as e:
630
+ logger.warning(
631
+ f"Failed to extract dependencies from sdist {sdist_file.name}: {e}"
632
+ )
633
+
634
+ return dependencies
635
+
636
+ @staticmethod
637
+ def _parse_metadata_content(metadata_content: str) -> set[str]:
638
+ """Parse metadata content (PKG-INFO or METADATA) to extract dependencies.
639
+
640
+ Args:
641
+ metadata_content: Content of PKG-INFO or METADATA file
642
+
643
+ Returns:
644
+ Set of package names (normalized)
645
+ """
646
+ import re
647
+
648
+ dependencies: set[str] = set()
649
+ try:
650
+ for line in metadata_content.splitlines():
651
+ # Look for Requires-Dist or Requires field
652
+ if line.startswith("Requires-Dist:") or line.startswith("Requires:"):
653
+ if line.startswith("Requires:"):
654
+ # Requires field contains comma-separated list
655
+ dep_str = line.split(":", 1)[1].strip()
656
+ for req_str in re.split(r",\s*", dep_str):
657
+ req_str = req_str.strip()
658
+ if req_str:
659
+ dependencies.update(
660
+ LibraryCache._parse_single_requirement(req_str)
661
+ )
662
+ else:
663
+ # Requires-Dist field
664
+ dep_str = line.split(":", 1)[1].strip()
665
+ dependencies.update(
666
+ LibraryCache._parse_single_requirement(dep_str)
667
+ )
668
+ except Exception as e:
669
+ logger.debug(f"Failed to parse metadata content: {e}")
670
+
671
+ return dependencies
672
+
673
+ @staticmethod
674
+ def _parse_single_requirement(req_str: str) -> set[str]:
675
+ """Parse a single requirement string and extract package name.
676
+
677
+ Args:
678
+ req_str: Requirement string (e.g., "numpy>=1.20.0", "package[extra]>=1.0")
679
+
680
+ Returns:
681
+ Set containing the normalized package name, or empty set if should skip
682
+ """
683
+ import re
684
+
685
+ try:
686
+ # Skip extras dependencies
687
+ if re.search(
688
+ r'extra\s*==\s*["\']?([^"\';\s]+)["\']?', req_str, re.IGNORECASE
689
+ ):
690
+ logger.debug(f"Skipping extra dependency: {req_str}")
691
+ return set()
692
+
693
+ from packaging.requirements import Requirement
694
+
695
+ req = Requirement(req_str)
696
+ if not should_skip_dependency(req.name, bool(req.extras)):
697
+ dep_name = normalize_package_name(req.name)
698
+ logger.debug(f"Found core dependency: {dep_name}")
699
+ return {dep_name}
700
+ except Exception:
701
+ pass
702
+
703
+ return set()
704
+
705
+ def add_package(
706
+ self, package_name: str, package_path: Path, version: str | None = None
707
+ ) -> None:
412
708
  """Add package to cache.
413
709
 
414
710
  Args:
@@ -416,13 +712,16 @@ class LibraryCache:
416
712
  package_path: Path to package files
417
713
  version: Package version
418
714
  """
715
+ # Normalize package name to ensure consistency
716
+ normalized_name = normalize_package_name(package_name)
717
+
419
718
  # Copy package files to cache (flat structure for wheels, nested for dirs)
420
719
  if package_path.is_dir():
421
- dest_dir = self.cache_dir / package_name
720
+ dest_dir = self.cache_dir / normalized_name
422
721
  if dest_dir.exists():
423
722
  shutil.rmtree(dest_dir)
424
723
  shutil.copytree(package_path, dest_dir)
425
- relative_path = package_name
724
+ relative_path = normalized_name
426
725
  else:
427
726
  dest_file = self.cache_dir / package_path.name
428
727
  shutil.copy2(package_path, dest_file)
@@ -431,14 +730,14 @@ class LibraryCache:
431
730
  # Update metadata using CacheMetadata dataclass
432
731
  metadata = self._load_metadata()
433
732
  metadata[str(package_path)] = CacheMetadata(
434
- name=package_name,
733
+ name=normalized_name,
435
734
  version=version,
436
735
  path=relative_path,
437
736
  timestamp=time.time(),
438
737
  ).__dict__
439
738
  self._save_metadata(metadata)
440
739
 
441
- logger.info(f"Cached package: {package_name}")
740
+ logger.info(f"Cached package: {normalized_name}")
442
741
 
443
742
  def _load_metadata(self) -> dict[str, Any]:
444
743
  """Load cache metadata.
@@ -469,13 +768,19 @@ class LibraryCache:
469
768
  if self.cache_dir.exists():
470
769
  shutil.rmtree(self.cache_dir)
471
770
  self.cache_dir.mkdir(parents=True, exist_ok=True)
771
+ self._dependencies_cache.clear() # Clear in-memory dependencies cache
472
772
  logger.info("Cache cleared")
473
773
 
474
774
 
475
775
  class LibraryDownloader:
476
776
  """Download Python packages from PyPI."""
477
777
 
478
- def __init__(self, cache: LibraryCache, python_version: str | None = None, mirror: str = "pypi"):
778
+ def __init__(
779
+ self,
780
+ cache: LibraryCache,
781
+ python_version: str | None = None,
782
+ mirror: str = "pypi",
783
+ ):
479
784
  """Initialize downloader.
480
785
 
481
786
  Args:
@@ -484,19 +789,19 @@ class LibraryDownloader:
484
789
  mirror: PyPI mirror source (pypi, tsinghua, aliyun, ustc, douban, tencent)
485
790
  """
486
791
  self.cache = cache
487
- self.python_version = python_version or f"{sys.version_info.major}.{sys.version_info.minor}"
488
- self.platform_name = platform.system().lower() + "_" + platform.machine().lower()
792
+ self.python_version = (
793
+ python_version or f"{sys.version_info.major}.{sys.version_info.minor}"
794
+ )
795
+ self.platform_name = (
796
+ platform.system().lower() + "_" + platform.machine().lower()
797
+ )
489
798
  self.mirror_url = PYPI_MIRRORS.get(mirror, PYPI_MIRRORS["pypi"])
490
799
  self.pip_executable = self._find_pip_executable()
491
800
 
492
801
  @staticmethod
493
802
  def _find_pip_executable() -> str | None:
494
- """Find pip executable in the system.
495
-
496
- Returns:
497
- Path to pip executable or None
498
- """
499
- return next((shutil.which(cmd) for cmd in ("pip", "pip3")), None)
803
+ """Find pip executable in the system."""
804
+ return shutil.which("pip") or shutil.which("pip3")
500
805
 
501
806
  def _download_package(self, dep: Dependency, dest_dir: Path) -> Path | None:
502
807
  """Download a single package without dependencies.
@@ -506,10 +811,12 @@ class LibraryDownloader:
506
811
  dest_dir: Destination directory
507
812
 
508
813
  Returns:
509
- Path to downloaded wheel file or None
814
+ Path to downloaded package file (wheel or sdist) or None
510
815
  """
511
816
  if not self.pip_executable:
512
- logger.error("pip not found. Please install pip: python -m ensurepip --upgrade")
817
+ logger.error(
818
+ "pip not found. Please install pip: python -m ensurepip --upgrade"
819
+ )
513
820
  return None
514
821
 
515
822
  logger.info(f"Downloading: {dep}")
@@ -535,12 +842,26 @@ class LibraryDownloader:
535
842
  logger.warning(f"pip download failed for {dep}: {result.stderr}")
536
843
  return None
537
844
 
538
- # Find and copy the downloaded wheel file
845
+ # Prefer wheel files over sdist files
846
+ downloaded_file = None
539
847
  for file_path in Path(temp_dir).glob("*.whl"):
540
- self.cache.add_package(dep.name, file_path, dep.version)
541
- shutil.copy2(file_path, dest_dir / file_path.name)
542
- logger.info(f"Downloaded: {file_path.name}")
543
- return dest_dir / file_path.name
848
+ downloaded_file = file_path
849
+ break
850
+
851
+ # If no wheel file, look for sdist files (.tar.gz or .zip)
852
+ if not downloaded_file:
853
+ for file_path in Path(temp_dir).glob("*.tar.gz"):
854
+ downloaded_file = file_path
855
+ break
856
+ for file_path in Path(temp_dir).glob("*.zip"):
857
+ downloaded_file = file_path
858
+ break
859
+
860
+ if downloaded_file:
861
+ self.cache.add_package(dep.name, downloaded_file, dep.version)
862
+ shutil.copy2(downloaded_file, dest_dir / downloaded_file.name)
863
+ logger.info(f"Downloaded: {downloaded_file.name}")
864
+ return dest_dir / downloaded_file.name
544
865
 
545
866
  return None
546
867
 
@@ -562,38 +883,57 @@ class LibraryDownloader:
562
883
  """
563
884
  dest_dir.mkdir(parents=True, exist_ok=True)
564
885
 
565
- results: dict[str, bool] = {}
886
+ # Use list of tuples for thread-safe result collection
887
+ # Tuple format: (package_name, success_flag)
888
+ results_list: list[tuple[str, bool]] = []
566
889
  cached_count = 0
890
+ cached_packages: set[str] = set() # Track cached package names efficiently
567
891
 
568
892
  logger.info(f"Total direct dependencies: {len(dependencies)}")
569
893
  logger.info(f"Using mirror: {self.mirror_url}")
570
894
 
571
- # Check cache and mark cached packages
895
+ # Check cache and mark cached packages (single-threaded, safe)
572
896
  for dep in dependencies:
573
897
  if self.cache.get_package_path(dep.name, dep.version):
574
- results[dep.name] = True
898
+ normalized_dep_name = normalize_package_name(dep.name)
899
+ results_list.append((normalized_dep_name, True))
900
+ cached_packages.add(normalized_dep_name)
575
901
  cached_count += 1
576
902
  logger.info(f"Using cached package: {dep}")
577
903
 
578
904
  # Download remaining packages concurrently
579
- remaining_deps = [dep for dep in dependencies if dep.name not in results or not results[dep.name]]
905
+ remaining_deps = [
906
+ dep
907
+ for dep in dependencies
908
+ if normalize_package_name(dep.name) not in cached_packages
909
+ ]
580
910
  downloaded_count = 0
581
911
 
582
912
  if remaining_deps:
583
913
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
584
- future_to_dep = {executor.submit(self._download_package, dep, dest_dir): dep for dep in remaining_deps}
914
+ future_to_dep = {
915
+ executor.submit(self._download_package, dep, dest_dir): dep
916
+ for dep in remaining_deps
917
+ }
585
918
 
586
919
  for future in as_completed(future_to_dep):
587
920
  dep = future_to_dep[future]
588
921
  try:
589
922
  wheel_file = future.result()
590
- results[dep.name] = wheel_file is not None
923
+ normalized_dep_name = normalize_package_name(dep.name)
924
+ results_list.append((
925
+ normalized_dep_name,
926
+ wheel_file is not None,
927
+ ))
591
928
  if wheel_file:
592
929
  downloaded_count += 1
593
930
  except Exception as e:
594
- logger.error(f"Error processing {dep.name}: {e}")
595
- results[dep.name] = False
931
+ normalized_dep_name = normalize_package_name(dep.name)
932
+ logger.error(f"Error processing {normalized_dep_name}: {e}")
933
+ results_list.append((normalized_dep_name, False))
596
934
 
935
+ # Convert to dictionary for final result
936
+ results = dict(results_list)
597
937
  successful = sum(1 for v in results.values() if v)
598
938
  logger.info(
599
939
  f"Processed {successful}/{len(dependencies)} ({cached_count} cached, {downloaded_count} downloaded)"
@@ -616,6 +956,8 @@ class PyLibPack:
616
956
  cache_dir: Path | None = None,
617
957
  python_version: str | None = None,
618
958
  mirror: str = "pypi",
959
+ optimize: bool = True,
960
+ optimization_strategy: SelectiveExtractionStrategy | None = None,
619
961
  ):
620
962
  """Initialize library packer.
621
963
 
@@ -627,7 +969,15 @@ class PyLibPack:
627
969
  self.cache = LibraryCache(cache_dir)
628
970
  self.downloader = LibraryDownloader(self.cache, python_version, mirror)
629
971
 
630
- def pack_project(self, project: ProjectInfo, output_dir: Path, max_workers: int = 4) -> PackResult:
972
+ # Set up optimization strategy
973
+ self.optimize = optimize
974
+ self.optimization_strategy = (
975
+ optimization_strategy or SelectiveExtractionStrategy() if optimize else None
976
+ )
977
+
978
+ def pack_project(
979
+ self, project: Project, output_dir: Path, max_workers: int = 4
980
+ ) -> PackResult:
631
981
  """Pack dependencies for a single project.
632
982
 
633
983
  Args:
@@ -642,9 +992,7 @@ class PyLibPack:
642
992
  logger.info(f"Packing dependencies for project: {project.name}")
643
993
  logger.info(f"{'=' * 60}")
644
994
 
645
- dependencies = ProjectParser.parse_requirements_from_project(project.info)
646
-
647
- if not dependencies:
995
+ if not project.dependencies:
648
996
  logger.warning(f"No dependencies found for {project.name}")
649
997
  return PackResult(
650
998
  success=False,
@@ -656,42 +1004,62 @@ class PyLibPack:
656
1004
  packages_dir=str(output_dir),
657
1005
  )
658
1006
 
659
- logger.info(f"Found {len(dependencies)} dependencies")
1007
+ logger.info(f"Found {len(project.dependencies)} dependencies")
660
1008
 
661
1009
  # Download direct dependencies
662
1010
  download_result = self.downloader.download_packages(
663
- dependencies,
1011
+ project.dependencies,
664
1012
  self.cache.cache_dir,
665
1013
  max_workers=max_workers,
666
1014
  )
667
1015
 
668
- # Build wheel map and collect all required packages recursively
669
- wheel_map: dict[str, Path] = {
670
- pkg_name: wheel_file
671
- for wheel_file in self.cache.cache_dir.glob("*.whl")
672
- if (pkg_name := self.cache._extract_package_name_from_wheel(wheel_file))
673
- }
674
-
675
- # Recursively collect all dependencies
676
- all_packages = self._collect_all_dependencies(wheel_map, list(download_result.results))
1016
+ # Build package map (including both wheel and sdist files) and collect all required packages recursively
1017
+ package_map: dict[str, Path] = {}
1018
+
1019
+ # Add wheel files to package map
1020
+ for wheel_file in self.cache.cache_dir.glob("*.whl"):
1021
+ pkg_name = self.cache._extract_package_name_from_wheel(wheel_file)
1022
+ if pkg_name and pkg_name not in package_map: # Prefer wheel files
1023
+ normalized_pkg_name = normalize_package_name(pkg_name)
1024
+ package_map[normalized_pkg_name] = wheel_file
1025
+
1026
+ # Add sdist files to package map (only if not already present)
1027
+ for sdist_file in self.cache.cache_dir.glob("*.tar.gz"):
1028
+ pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
1029
+ if pkg_name and normalize_package_name(pkg_name) not in package_map:
1030
+ normalized_pkg_name = normalize_package_name(pkg_name)
1031
+ package_map[normalized_pkg_name] = sdist_file
1032
+
1033
+ for sdist_file in self.cache.cache_dir.glob("*.zip"):
1034
+ pkg_name = self.cache._extract_package_name_from_sdist(sdist_file)
1035
+ if pkg_name and normalize_package_name(pkg_name) not in package_map:
1036
+ normalized_pkg_name = normalize_package_name(pkg_name)
1037
+ package_map[normalized_pkg_name] = sdist_file
1038
+
1039
+ # Recursively collect all dependencies (pass cache instance for dependency extraction)
1040
+ all_packages = self._collect_all_dependencies(
1041
+ package_map, list(download_result.results), self.cache
1042
+ )
677
1043
 
678
1044
  # Extract all required packages (keep order of dependency resolution)
679
1045
  extracted_packages = []
680
1046
  for pkg_name in all_packages:
681
- if pkg_name in wheel_map:
1047
+ if pkg_name in package_map:
682
1048
  # Skip if output directory already exists
683
1049
  output_pkg_dir = output_dir / pkg_name
684
1050
  if output_pkg_dir.exists():
685
1051
  logger.warning(f"Output directory already exists: {output_pkg_dir}")
686
1052
  continue
687
1053
 
688
- wheel_file = wheel_map[pkg_name]
689
- logger.info(f"Extracting {wheel_file.name}...")
690
- self._extract_wheel(wheel_file, output_dir)
1054
+ package_file = package_map[pkg_name]
1055
+ logger.info(f"Extracting {package_file.name}...")
1056
+ self._extract_package(package_file, output_dir, pkg_name)
691
1057
  extracted_packages.append(pkg_name)
692
1058
  logger.info(f"Extracted {pkg_name}")
693
1059
 
694
- logger.info(f"Pack complete for {project.name}: {download_result.successful}/{download_result.total}")
1060
+ logger.info(
1061
+ f"Pack complete for {project.name}: {download_result.successful}/{download_result.total}"
1062
+ )
695
1063
 
696
1064
  return PackResult(
697
1065
  success=download_result.successful > 0,
@@ -704,57 +1072,232 @@ class PyLibPack:
704
1072
  )
705
1073
 
706
1074
  @staticmethod
707
- def _collect_all_dependencies(wheel_map: dict[str, Path], root_packages: list[str]) -> set[str]:
708
- """Recursively collect all dependencies from wheel files.
1075
+ def _collect_all_dependencies(
1076
+ package_map: dict[str, Path], root_packages: list[str], cache: LibraryCache
1077
+ ) -> set[str]:
1078
+ """Recursively collect all dependencies from package files (wheel or sdist).
709
1079
 
710
1080
  Args:
711
- wheel_map: Mapping of package names to wheel files
1081
+ package_map: Mapping of package names to package files (wheel or sdist)
712
1082
  root_packages: List of root package names to start from
1083
+ cache: LibraryCache instance for extracting dependencies
713
1084
 
714
1085
  Returns:
715
1086
  List of all required package names
716
1087
  """
717
1088
  all_packages: set[str] = set()
718
1089
  visited: set[str] = set()
1090
+ visit_stack: dict[str, int] = {} # Track visit depth for cycle detection
719
1091
 
720
1092
  def visit(pkg_name: str, level: int = 0) -> None:
721
1093
  """Visit a package and collect its dependencies."""
722
- if pkg_name in visited:
1094
+ # Normalize package name for consistency
1095
+ normalized_pkg_name = normalize_package_name(pkg_name)
1096
+
1097
+ # Check for cycles
1098
+ if normalized_pkg_name in visit_stack:
1099
+ logger.warning(
1100
+ f"Potential circular dependency detected: {normalized_pkg_name} (current depth: {level}, "
1101
+ f"previous depth: {visit_stack[normalized_pkg_name]})"
1102
+ )
1103
+ return
1104
+
1105
+ # Check depth limit
1106
+ if level > MAX_DEPTH:
1107
+ logger.warning(
1108
+ f"Maximum dependency depth ({MAX_DEPTH}) reached for {normalized_pkg_name}, stopping recursion"
1109
+ )
1110
+ return
1111
+
1112
+ if normalized_pkg_name in visited:
723
1113
  return
724
1114
 
725
- visited.add(pkg_name)
726
- all_packages.add(pkg_name)
1115
+ # Mark as visited and track depth
1116
+ visited.add(normalized_pkg_name)
1117
+ visit_stack[normalized_pkg_name] = level
1118
+ all_packages.add(normalized_pkg_name)
727
1119
 
728
- if pkg_name in wheel_map:
729
- deps = LibraryCache._extract_dependencies_from_wheel(wheel_map[pkg_name])
730
- logger.debug(f"{' ' * level}{pkg_name} -> {deps}")
1120
+ if normalized_pkg_name in package_map:
1121
+ deps = cache._extract_dependencies_from_wheel(
1122
+ package_map[normalized_pkg_name]
1123
+ )
1124
+ logger.debug(f"{' ' * level}{normalized_pkg_name} -> {deps}")
731
1125
  for dep in deps:
732
1126
  visit(dep, level + 1)
733
1127
 
1128
+ # Remove from stack when done
1129
+ visit_stack.pop(normalized_pkg_name, None)
1130
+
734
1131
  for pkg_name in root_packages:
735
1132
  visit(pkg_name)
736
1133
 
737
- logger.info(f"Collected {len(all_packages)} packages (including recursive dependencies)")
1134
+ logger.info(
1135
+ f"Collected {len(all_packages)} packages (including recursive dependencies)"
1136
+ )
738
1137
  logger.info(f"Packages: {all_packages}")
739
1138
  return all_packages
740
1139
 
741
- @staticmethod
742
- def _extract_wheel(wheel_file: Path, dest_dir: Path) -> None:
743
- """Extract wheel file to destination directory.
1140
+ def _build_and_cache_wheel(self, sdist_file: Path, package_name: str) -> None:
1141
+ """Build wheel from sdist file and cache it for faster future access.
744
1142
 
745
1143
  Args:
746
- wheel_file: Path to wheel file
1144
+ sdist_file: Path to sdist file (.tar.gz or .zip)
1145
+ package_name: Name of the package
1146
+ """
1147
+ with tempfile.TemporaryDirectory() as temp_wheel_dir:
1148
+ # Use pip wheel to build wheel from sdist
1149
+ result = subprocess.run(
1150
+ [
1151
+ self.downloader.pip_executable or "pip",
1152
+ "wheel",
1153
+ "--no-deps",
1154
+ "--wheel-dir",
1155
+ temp_wheel_dir,
1156
+ "--no-cache-dir",
1157
+ str(sdist_file),
1158
+ ],
1159
+ capture_output=True,
1160
+ text=True,
1161
+ check=False,
1162
+ )
1163
+
1164
+ if result.returncode != 0:
1165
+ logger.warning(
1166
+ f"Failed to build wheel from sdist for {package_name}: {result.stderr}"
1167
+ )
1168
+ return
1169
+
1170
+ # Find the built wheel file
1171
+ wheel_files = list(Path(temp_wheel_dir).glob("*.whl"))
1172
+ if wheel_files:
1173
+ wheel_file = wheel_files[0]
1174
+ # Copy wheel to cache directory
1175
+ cache_wheel_path = self.cache.cache_dir / wheel_file.name
1176
+ shutil.copy2(wheel_file, cache_wheel_path)
1177
+
1178
+ # Update cache metadata
1179
+ self.cache.add_package(package_name, wheel_file)
1180
+
1181
+ logger.info(
1182
+ f"Built and cached wheel: {wheel_file.name} for {package_name}"
1183
+ )
1184
+ else:
1185
+ logger.warning(f"No wheel file was built from sdist for {package_name}")
1186
+
1187
+ def _extract_package(
1188
+ self, package_file: Path, dest_dir: Path, package_name: str
1189
+ ) -> None:
1190
+ """Extract package file (wheel or sdist) to destination directory with optional optimization.
1191
+
1192
+ Args:
1193
+ package_file: Path to package file (wheel or sdist)
747
1194
  dest_dir: Destination directory
1195
+ package_name: Name of the package being extracted
748
1196
  """
749
- import zipfile
1197
+ logger.info(
1198
+ f"Extracting {package_file.name} for package {package_name} to {dest_dir}"
1199
+ )
750
1200
 
751
- with zipfile.ZipFile(wheel_file, "r") as zf:
752
- zf.extractall(dest_dir)
1201
+ # Handle sdist files (.tar.gz or .zip) - install using pip, and build wheel for cache
1202
+ if package_file.suffix == ".gz" or package_file.suffix == ".zip":
1203
+ logger.info(f"Installing sdist file for {package_name} using pip...")
1204
+
1205
+ # Use pip install --target to install sdist to temporary directory
1206
+ with tempfile.TemporaryDirectory() as temp_install_dir:
1207
+ result = subprocess.run(
1208
+ [
1209
+ self.downloader.pip_executable or "pip",
1210
+ "install",
1211
+ "--target",
1212
+ temp_install_dir,
1213
+ "--no-deps", # Don't install dependencies (we handle them separately)
1214
+ "--no-cache-dir",
1215
+ str(package_file),
1216
+ ],
1217
+ capture_output=True,
1218
+ text=True,
1219
+ check=False,
1220
+ )
1221
+
1222
+ if result.returncode != 0:
1223
+ logger.error(
1224
+ f"Failed to install sdist {package_file.name}: {result.stderr}"
1225
+ )
1226
+ return
1227
+
1228
+ # Copy installed files to dest_dir, skipping *.dist-info directories
1229
+ temp_install_path = Path(temp_install_dir)
1230
+ for item in temp_install_path.iterdir():
1231
+ # Skip dist-info directories
1232
+ if item.name.endswith(".dist-info"):
1233
+ logger.debug(f"Skipping dist-info directory: {item.name}")
1234
+ continue
1235
+ dest_path = dest_dir / item.name
1236
+ if item.is_dir():
1237
+ if dest_path.exists():
1238
+ shutil.rmtree(dest_path)
1239
+ shutil.copytree(item, dest_path)
1240
+ else:
1241
+ shutil.copy2(item, dest_path)
1242
+
1243
+ logger.info(
1244
+ f"Installed sdist file for {package_name} to site-packages structure"
1245
+ )
1246
+
1247
+ # Build wheel from sdist and cache it for faster future access
1248
+ logger.info(f"Building wheel from sdist for {package_name}...")
1249
+ self._build_and_cache_wheel(package_file, package_name)
1250
+ return
1251
+
1252
+ # Handle wheel files with optional optimization
1253
+ with zipfile.ZipFile(package_file, "r") as zf:
1254
+ if self.optimize and self.optimization_strategy:
1255
+ # Apply optimization strategy - selectively extract files
1256
+ extracted_count = 0
1257
+ skipped_count = 0
1258
+
1259
+ for file_info in zf.filelist:
1260
+ file_path = Path(file_info.filename)
1261
+ # Skip dist-info directories
1262
+ if file_path.name.endswith(".dist-info") or any(
1263
+ parent.endswith(".dist-info") for parent in file_path.parts
1264
+ ):
1265
+ logger.debug(f"Skipping dist-info: {file_info.filename}")
1266
+ skipped_count += 1
1267
+ continue
1268
+ if self.optimization_strategy.should_extract_file(
1269
+ package_name, file_path
1270
+ ):
1271
+ zf.extract(file_info, dest_dir)
1272
+ extracted_count += 1
1273
+ logger.debug(f"Extracted {file_path} from {package_name}")
1274
+ else:
1275
+ skipped_count += 1
1276
+ logger.debug(
1277
+ f"Skipped {file_path} from {package_name} (filtered by optimization strategy)"
1278
+ )
1279
+
1280
+ logger.info(
1281
+ f"Extraction complete for {package_name}: {extracted_count} extracted, {skipped_count} skipped"
1282
+ )
1283
+ else:
1284
+ # Extract all files without optimization, but skip dist-info directories
1285
+ for file_info in zf.filelist:
1286
+ file_path = Path(file_info.filename)
1287
+ # Skip dist-info directories
1288
+ if file_path.name.endswith(".dist-info") or any(
1289
+ parent.endswith(".dist-info") for parent in file_path.parts
1290
+ ):
1291
+ logger.debug(f"Skipping dist-info: {file_info.filename}")
1292
+ continue
1293
+ zf.extract(file_info, dest_dir)
1294
+ logger.info(
1295
+ f"All files extracted for {package_name} (no optimization applied, dist-info skipped)"
1296
+ )
753
1297
 
754
1298
  def pack(
755
1299
  self,
756
- base_dir: Path,
757
- output_dir: Path | None = None,
1300
+ working_dir: Path,
758
1301
  max_workers: int = 4,
759
1302
  ) -> BatchPackResult:
760
1303
  """Pack project dependencies from base directory.
@@ -767,24 +1310,10 @@ class PyLibPack:
767
1310
  Returns:
768
1311
  BatchPackResult containing batch packing statistics
769
1312
  """
770
- output_dir = output_dir or base_dir / "dist" / "site-packages"
771
- logger.info(f"Starting dependency pack for: {base_dir}")
1313
+ output_dir = working_dir / "dist" / "site-packages"
1314
+ logger.info(f"Starting dependency pack for: {working_dir}")
772
1315
 
773
- # Ensure projects.json exists
774
- projects_json = ProjectParser.ensure_projects_json(base_dir)
775
- if not projects_json:
776
- logger.error("Failed to create projects.json")
777
- return BatchPackResult(
778
- success=False,
779
- total=0,
780
- successful=0,
781
- failed=0,
782
- output_dir=str(output_dir),
783
- total_time=0.0,
784
- )
785
-
786
- # Load project information
787
- projects = ProjectParser.load_projects_json(projects_json)
1316
+ projects = Solution.from_directory(root_dir=working_dir).projects
788
1317
  if not projects:
789
1318
  logger.error("Failed to load project information")
790
1319
  return BatchPackResult(
@@ -804,21 +1333,20 @@ class PyLibPack:
804
1333
  failed_projects: list[str] = []
805
1334
  use_current_dir = len(projects) == 1
806
1335
 
807
- for project_name, project_info in projects.items():
808
- project_dir = base_dir if use_current_dir else base_dir / project_name
1336
+ for project in projects.values():
1337
+ project_dir = working_dir if use_current_dir else working_dir / project.name
809
1338
 
810
1339
  if not project_dir.is_dir():
811
1340
  logger.warning(f"Project directory not found: {project_dir}, skipping")
812
- failed_projects.append(project_name)
1341
+ failed_projects.append(project.name)
813
1342
  continue
814
1343
 
815
- project = ProjectInfo(name=project_name, dir=project_dir, info=project_info)
816
1344
  result = self.pack_project(project, output_dir, max_workers)
817
1345
 
818
1346
  if result.success:
819
1347
  success_count += 1
820
1348
  else:
821
- failed_projects.append(project_name)
1349
+ failed_projects.append(project.name)
822
1350
 
823
1351
  total_time = time.perf_counter() - total_start
824
1352
 
@@ -848,19 +1376,29 @@ class PyLibPack:
848
1376
  self.cache.clear_cache()
849
1377
 
850
1378
 
851
- def main() -> None:
852
- """Main entry point for pylibpack tool."""
1379
+ def parse_args() -> argparse.Namespace:
1380
+ """Parse command-line arguments."""
853
1381
  parser = argparse.ArgumentParser(
854
1382
  prog="pylibpack",
855
1383
  description="Python library packer with caching support",
856
1384
  )
857
1385
 
858
1386
  parser.add_argument(
859
- "directory", type=str, nargs="?", default=str(Path.cwd()), help="Base directory containing projects"
1387
+ "directory",
1388
+ type=str,
1389
+ nargs="?",
1390
+ default=str(Path.cwd()),
1391
+ help="Base directory containing projects",
1392
+ )
1393
+ parser.add_argument(
1394
+ "--cache-dir", type=str, default=None, help="Custom cache directory"
1395
+ )
1396
+ parser.add_argument(
1397
+ "--python-version", type=str, default=None, help="Target Python version"
1398
+ )
1399
+ parser.add_argument(
1400
+ "-j", "--jobs", type=int, default=4, help="Maximum concurrent downloads"
860
1401
  )
861
- parser.add_argument("--cache-dir", type=str, default=None, help="Custom cache directory")
862
- parser.add_argument("--python-version", type=str, default=None, help="Target Python version")
863
- parser.add_argument("-j", "--jobs", type=int, default=4, help="Maximum concurrent downloads")
864
1402
  parser.add_argument(
865
1403
  "--mirror",
866
1404
  type=str,
@@ -869,44 +1407,50 @@ def main() -> None:
869
1407
  help="PyPI mirror source for faster downloads in China",
870
1408
  )
871
1409
  parser.add_argument("--debug", "-d", action="store_true", help="Debug mode")
872
- args = parser.parse_args()
1410
+ parser.add_argument(
1411
+ "--no-optimize",
1412
+ "-no",
1413
+ action="store_true",
1414
+ help="Disable package optimization (extract all files)",
1415
+ )
1416
+ parser.add_argument(
1417
+ "--list-optimizations",
1418
+ "-lo",
1419
+ action="store_true",
1420
+ help="List all available optimization rules",
1421
+ )
1422
+ return parser.parse_args()
1423
+
1424
+
1425
+ def main() -> None:
1426
+ """Main entry point for pylibpack tool."""
1427
+ args = parse_args()
1428
+
1429
+ if args.list_optimizations:
1430
+ strategy = SelectiveExtractionStrategy()
1431
+ logging.info("Available optimization rules:")
1432
+ for lib_name in sorted(strategy.get_library_names_with_rules()):
1433
+ logging.info(f" - {lib_name}")
1434
+ return
873
1435
 
874
1436
  # Setup logging
875
1437
  if args.debug:
876
- logging.getLogger().setLevel(logging.DEBUG)
1438
+ logger.setLevel(logging.DEBUG)
877
1439
 
878
1440
  # Initialize packer
879
1441
  cache_dir = Path(args.cache_dir) if args.cache_dir else None
880
- packer = PyLibPack(cache_dir=cache_dir, python_version=args.python_version, mirror=args.mirror)
881
-
882
- # Pack command
883
- base_dir = Path(args.directory)
884
- output_dir = base_dir / "dist" / "site-packages"
885
- output_dir.mkdir(parents=True, exist_ok=True)
886
-
887
- try:
888
- result = packer.pack(
889
- base_dir=base_dir,
890
- output_dir=output_dir,
891
- max_workers=args.jobs,
892
- )
893
-
894
- if result.success:
895
- logger.info("=" * 50)
896
- logger.info("Packing summary:")
897
- logger.info(f" Total: {result.total}")
898
- logger.info(f" Successful: {result.successful}")
899
- logger.info(f" Failed: {result.failed}")
900
- logger.info(f" Output directory: {result.output_dir}")
901
- logger.info(f" Total time: {result.total_time:.2f}s")
902
- logger.info("=" * 50)
903
- else:
904
- logger.error("Packing failed!")
905
- sys.exit(1)
1442
+ optimize = not args.no_optimize
1443
+ packer = PyLibPack(
1444
+ cache_dir=cache_dir,
1445
+ python_version=args.python_version,
1446
+ mirror=args.mirror,
1447
+ optimize=optimize,
1448
+ )
906
1449
 
907
- except Exception as e:
908
- logger.error(f"Packing failed: {e}")
909
- sys.exit(1)
1450
+ packer.pack(
1451
+ working_dir=Path(args.directory),
1452
+ max_workers=args.jobs,
1453
+ )
910
1454
 
911
1455
 
912
1456
  if __name__ == "__main__":