comfygit-core 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. comfygit_core/analyzers/custom_node_scanner.py +109 -0
  2. comfygit_core/analyzers/git_change_parser.py +156 -0
  3. comfygit_core/analyzers/model_scanner.py +318 -0
  4. comfygit_core/analyzers/node_classifier.py +58 -0
  5. comfygit_core/analyzers/node_git_analyzer.py +77 -0
  6. comfygit_core/analyzers/status_scanner.py +362 -0
  7. comfygit_core/analyzers/workflow_dependency_parser.py +143 -0
  8. comfygit_core/caching/__init__.py +16 -0
  9. comfygit_core/caching/api_cache.py +210 -0
  10. comfygit_core/caching/base.py +212 -0
  11. comfygit_core/caching/comfyui_cache.py +100 -0
  12. comfygit_core/caching/custom_node_cache.py +320 -0
  13. comfygit_core/caching/workflow_cache.py +797 -0
  14. comfygit_core/clients/__init__.py +4 -0
  15. comfygit_core/clients/civitai_client.py +412 -0
  16. comfygit_core/clients/github_client.py +349 -0
  17. comfygit_core/clients/registry_client.py +230 -0
  18. comfygit_core/configs/comfyui_builtin_nodes.py +1614 -0
  19. comfygit_core/configs/comfyui_models.py +62 -0
  20. comfygit_core/configs/model_config.py +151 -0
  21. comfygit_core/constants.py +82 -0
  22. comfygit_core/core/environment.py +1635 -0
  23. comfygit_core/core/workspace.py +898 -0
  24. comfygit_core/factories/environment_factory.py +419 -0
  25. comfygit_core/factories/uv_factory.py +61 -0
  26. comfygit_core/factories/workspace_factory.py +109 -0
  27. comfygit_core/infrastructure/sqlite_manager.py +156 -0
  28. comfygit_core/integrations/__init__.py +7 -0
  29. comfygit_core/integrations/uv_command.py +318 -0
  30. comfygit_core/logging/logging_config.py +15 -0
  31. comfygit_core/managers/environment_git_orchestrator.py +316 -0
  32. comfygit_core/managers/environment_model_manager.py +296 -0
  33. comfygit_core/managers/export_import_manager.py +116 -0
  34. comfygit_core/managers/git_manager.py +667 -0
  35. comfygit_core/managers/model_download_manager.py +252 -0
  36. comfygit_core/managers/model_symlink_manager.py +166 -0
  37. comfygit_core/managers/node_manager.py +1378 -0
  38. comfygit_core/managers/pyproject_manager.py +1321 -0
  39. comfygit_core/managers/user_content_symlink_manager.py +436 -0
  40. comfygit_core/managers/uv_project_manager.py +569 -0
  41. comfygit_core/managers/workflow_manager.py +1944 -0
  42. comfygit_core/models/civitai.py +432 -0
  43. comfygit_core/models/commit.py +18 -0
  44. comfygit_core/models/environment.py +293 -0
  45. comfygit_core/models/exceptions.py +378 -0
  46. comfygit_core/models/manifest.py +132 -0
  47. comfygit_core/models/node_mapping.py +201 -0
  48. comfygit_core/models/protocols.py +248 -0
  49. comfygit_core/models/registry.py +63 -0
  50. comfygit_core/models/shared.py +356 -0
  51. comfygit_core/models/sync.py +42 -0
  52. comfygit_core/models/system.py +204 -0
  53. comfygit_core/models/workflow.py +914 -0
  54. comfygit_core/models/workspace_config.py +71 -0
  55. comfygit_core/py.typed +0 -0
  56. comfygit_core/repositories/migrate_paths.py +49 -0
  57. comfygit_core/repositories/model_repository.py +958 -0
  58. comfygit_core/repositories/node_mappings_repository.py +246 -0
  59. comfygit_core/repositories/workflow_repository.py +57 -0
  60. comfygit_core/repositories/workspace_config_repository.py +121 -0
  61. comfygit_core/resolvers/global_node_resolver.py +459 -0
  62. comfygit_core/resolvers/model_resolver.py +250 -0
  63. comfygit_core/services/import_analyzer.py +218 -0
  64. comfygit_core/services/model_downloader.py +422 -0
  65. comfygit_core/services/node_lookup_service.py +251 -0
  66. comfygit_core/services/registry_data_manager.py +161 -0
  67. comfygit_core/strategies/__init__.py +4 -0
  68. comfygit_core/strategies/auto.py +72 -0
  69. comfygit_core/strategies/confirmation.py +69 -0
  70. comfygit_core/utils/comfyui_ops.py +125 -0
  71. comfygit_core/utils/common.py +164 -0
  72. comfygit_core/utils/conflict_parser.py +232 -0
  73. comfygit_core/utils/dependency_parser.py +231 -0
  74. comfygit_core/utils/download.py +216 -0
  75. comfygit_core/utils/environment_cleanup.py +111 -0
  76. comfygit_core/utils/filesystem.py +178 -0
  77. comfygit_core/utils/git.py +1184 -0
  78. comfygit_core/utils/input_signature.py +145 -0
  79. comfygit_core/utils/model_categories.py +52 -0
  80. comfygit_core/utils/pytorch.py +71 -0
  81. comfygit_core/utils/requirements.py +211 -0
  82. comfygit_core/utils/retry.py +242 -0
  83. comfygit_core/utils/symlink_utils.py +119 -0
  84. comfygit_core/utils/system_detector.py +258 -0
  85. comfygit_core/utils/uuid.py +28 -0
  86. comfygit_core/utils/uv_error_handler.py +158 -0
  87. comfygit_core/utils/version.py +73 -0
  88. comfygit_core/utils/workflow_hash.py +90 -0
  89. comfygit_core/validation/resolution_tester.py +297 -0
  90. comfygit_core-0.2.0.dist-info/METADATA +939 -0
  91. comfygit_core-0.2.0.dist-info/RECORD +93 -0
  92. comfygit_core-0.2.0.dist-info/WHEEL +4 -0
  93. comfygit_core-0.2.0.dist-info/licenses/LICENSE.txt +661 -0
@@ -0,0 +1,232 @@
1
+ """UV dependency conflict parsing utilities.
2
+
3
+ This module provides utilities for parsing and simplifying UV (uv) package manager
4
+ error messages, particularly dependency conflicts.
5
+ """
6
+
7
+ import re
8
+
9
+ from ..logging.logging_config import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ def parse_uv_conflicts(error_text: str, max_lines: int = 3) -> list[str]:
15
+ """Extract specific conflict messages from UV error output.
16
+
17
+ Looks for patterns like:
18
+ - "X and Y are incompatible"
19
+ - "X depends on Y but Z needs different version"
20
+ - Conclusion statements about incompatibilities
21
+
22
+ Args:
23
+ error_text: Raw UV error output
24
+ max_lines: Maximum number of conflict messages to return
25
+
26
+ Returns:
27
+ List of simplified conflict messages
28
+ """
29
+ conflicts = []
30
+
31
+ if not error_text:
32
+ return conflicts
33
+
34
+ # Pattern: "X and Y are incompatible"
35
+ incompatible_pattern = r"(\S+) and (\S+) are incompatible"
36
+ for match in re.finditer(incompatible_pattern, error_text):
37
+ pkg1, pkg2 = match.groups()
38
+ # Clean up package names (remove version specs)
39
+ pkg1_clean = _clean_package_name(pkg1)
40
+ pkg2_clean = _clean_package_name(pkg2)
41
+ conflicts.append(f"{pkg1_clean} conflicts with {pkg2_clean}")
42
+
43
+ # Pattern: "X depends on Y but Z needs different version"
44
+ depends_pattern = r"(\S+) depends on (\S+)"
45
+ dependencies = {}
46
+ for match in re.finditer(depends_pattern, error_text):
47
+ pkg, dep = match.groups()
48
+ if pkg not in dependencies:
49
+ dependencies[pkg] = []
50
+ dependencies[pkg].append(dep)
51
+
52
+ # If we found specific conflicts, use those
53
+ if conflicts:
54
+ return conflicts[:max_lines] # Limit to top 3 for brevity
55
+
56
+ # Otherwise, try to extract conclusion lines
57
+ lines = error_text.split('\n')
58
+ for line in lines:
59
+ line = line.strip()
60
+ # Skip empty lines and hints
61
+ if not line or line.startswith('hint:'):
62
+ continue
63
+ # Look for conclusion lines
64
+ if 'conclude that' in line and 'incompatible' in line:
65
+ # Extract the key part
66
+ if 'we can conclude that' in line:
67
+ conclusion = line.split('we can conclude that')[1].strip()
68
+ conflicts.append(conclusion)
69
+
70
+ return conflicts[:max_lines] # Limit output
71
+
72
+
73
+ def parse_uv_resolution(output: str | None) -> dict[str, str]:
74
+ """Parse UV resolution output to extract package versions.
75
+
76
+ Args:
77
+ output: UV resolution output text
78
+
79
+ Returns:
80
+ Dict mapping package names to resolved versions
81
+ """
82
+ packages = {}
83
+
84
+ if not output:
85
+ return packages
86
+
87
+ # UV resolution output format varies, this is a simplified parser
88
+ # Look for patterns like: package==version
89
+ for line in output.split('\n'):
90
+ line = line.strip()
91
+ if '==' in line:
92
+ parts = line.split('==')
93
+ if len(parts) == 2:
94
+ name = parts[0].strip()
95
+ version = parts[1].strip()
96
+ # Clean up the name (remove any prefixes)
97
+ name = _clean_package_name(name)
98
+ packages[name] = version
99
+
100
+ return packages
101
+
102
+
103
+ def simplify_conflict_message(full_error: str, max_lines: int = 3) -> list[str]:
104
+ """Simplify a full UV conflict error into user-friendly messages.
105
+
106
+ Args:
107
+ full_error: Complete UV error message
108
+ max_lines: Maximum number of simplified messages to return
109
+
110
+ Returns:
111
+ List of simplified, user-friendly conflict descriptions
112
+ """
113
+ # First try to parse specific conflicts
114
+ conflicts = parse_uv_conflicts(full_error)
115
+ if conflicts:
116
+ return conflicts[:max_lines]
117
+
118
+ # If no specific conflicts found, extract key error lines
119
+ simplified = []
120
+ key_phrases = [
121
+ "incompatible",
122
+ "conflict",
123
+ "cannot satisfy",
124
+ "no solution found",
125
+ "requires",
126
+ "depends on"
127
+ ]
128
+
129
+ lines = full_error.split('\n')
130
+ for line in lines:
131
+ line = line.strip()
132
+ if any(phrase in line.lower() for phrase in key_phrases):
133
+ # Remove UV-specific prefixes
134
+ line = _clean_error_line(line)
135
+ if line and line not in simplified:
136
+ simplified.append(line)
137
+ if len(simplified) >= max_lines:
138
+ break
139
+
140
+ return simplified
141
+
142
+
143
+ def extract_conflicting_packages(error_text: str) -> list[tuple[str, str]]:
144
+ """Extract pairs of conflicting packages from UV error.
145
+
146
+ Args:
147
+ error_text: UV error output
148
+
149
+ Returns:
150
+ List of tuples (package1, package2) that conflict
151
+ """
152
+ pairs = []
153
+
154
+ # Look for explicit incompatibility statements
155
+ pattern = r"(\S+)==[\d\.]+ and (\S+)==[\d\.]+ are incompatible"
156
+ for match in re.finditer(pattern, error_text):
157
+ pkg1 = _clean_package_name(match.group(1))
158
+ pkg2 = _clean_package_name(match.group(2))
159
+ pairs.append((pkg1, pkg2))
160
+
161
+ # Look for version conflict patterns
162
+ pattern = r"(\S+) requires (\S+)==[\d\.]+.*but.*(\S+) requires (\S+)==[\d\.]+"
163
+ for match in re.finditer(pattern, error_text):
164
+ # This pattern suggests pkg1 and pkg3 have conflicting requirements for pkg2/pkg4
165
+ pkg1 = _clean_package_name(match.group(1))
166
+ pkg3 = _clean_package_name(match.group(3))
167
+ if pkg1 != pkg3:
168
+ pairs.append((pkg1, pkg3))
169
+
170
+ # Remove duplicates while preserving order
171
+ seen = set()
172
+ unique_pairs = []
173
+ for pair in pairs:
174
+ # Normalize order (alphabetical)
175
+ normalized = tuple(sorted(pair))
176
+ if normalized not in seen:
177
+ seen.add(normalized)
178
+ unique_pairs.append(pair)
179
+
180
+ return unique_pairs
181
+
182
+
183
+ def _clean_package_name(name: str) -> str:
184
+ """Clean up a package name by removing version specs and extras.
185
+
186
+ Args:
187
+ name: Raw package name (might include version specs)
188
+
189
+ Returns:
190
+ Clean package name
191
+ """
192
+ # Remove version specifiers
193
+ for sep in ['==', '>=', '<=', '>', '<', '~=', '!=']:
194
+ if sep in name:
195
+ name = name.split(sep)[0]
196
+
197
+ # Remove extras [extra1,extra2]
198
+ if '[' in name:
199
+ name = name.split('[')[0]
200
+
201
+ # Remove any remaining whitespace
202
+ return name.strip()
203
+
204
+
205
+ def _clean_error_line(line: str) -> str:
206
+ """Clean up an error line for display.
207
+
208
+ Args:
209
+ line: Raw error line
210
+
211
+ Returns:
212
+ Cleaned error line
213
+ """
214
+ # Remove common UV prefixes
215
+ prefixes_to_remove = [
216
+ "error:",
217
+ "Error:",
218
+ "ERROR:",
219
+ " × ",
220
+ " │ ",
221
+ " ╰─▶ ",
222
+ ]
223
+
224
+ for prefix in prefixes_to_remove:
225
+ if line.startswith(prefix):
226
+ line = line[len(prefix):].strip()
227
+
228
+ # Truncate very long lines
229
+ if len(line) > 100:
230
+ line = line[:97] + "..."
231
+
232
+ return line
@@ -0,0 +1,231 @@
1
+ """Dependency parsing utilities for pyproject.toml files.
2
+
3
+ This module provides utilities for parsing and comparing Python package dependencies
4
+ from pyproject.toml files, including support for UV dependency groups.
5
+ """
6
+
7
+ import re
8
+
9
+ from ..logging.logging_config import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ def parse_dependency_string(dep_str: str) -> tuple[str, str | None]:
15
+ """Parse a dependency string like 'numpy>=1.21.0' into name and version.
16
+
17
+ Args:
18
+ dep_str: Dependency string (e.g., 'numpy>=1.21.0', 'numpy[extra]>=1.21.0')
19
+
20
+ Returns:
21
+ Tuple of (package_name, version_spec) where version_spec may be None
22
+ """
23
+ # Handle various formats: numpy, numpy>=1.21.0, numpy[extra]>=1.21.0
24
+ match = re.match(r'^([a-zA-Z0-9_-]+)(?:\[.*?\])?(.*)$', dep_str.strip())
25
+ if match:
26
+ name = match.group(1)
27
+ version_spec = match.group(2).strip() if match.group(2) else None
28
+ return name, version_spec
29
+
30
+ return dep_str.strip(), None
31
+
32
+
33
+ def extract_all_dependencies(pyproject_data: dict) -> dict[str, dict]:
34
+ """Extract all dependencies from pyproject.toml including groups.
35
+
36
+ When a package appears in multiple places, we track all occurrences
37
+ and use the most restrictive constraint as the effective one.
38
+
39
+ Args:
40
+ pyproject_data: Parsed pyproject.toml data
41
+
42
+ Returns:
43
+ Dict mapping package names to their info (version, source)
44
+ """
45
+ deps = {}
46
+
47
+ # Track all sources and versions for each package
48
+ package_occurrences = {} # name -> [(source, version), ...]
49
+
50
+ # Main dependencies
51
+ if "project" in pyproject_data:
52
+ for dep_str in pyproject_data["project"].get("dependencies", []):
53
+ name, version = parse_dependency_string(dep_str)
54
+ if name not in package_occurrences:
55
+ package_occurrences[name] = []
56
+ package_occurrences[name].append(("main", version))
57
+
58
+ # Dependency groups (UV format)
59
+ if "dependency-groups" in pyproject_data:
60
+ for group_name, group_deps in pyproject_data["dependency-groups"].items():
61
+ for dep_str in group_deps:
62
+ name, version = parse_dependency_string(dep_str)
63
+ if name not in package_occurrences:
64
+ package_occurrences[name] = []
65
+ package_occurrences[name].append((f"group:{group_name}", version))
66
+
67
+ # Tool-specific dependencies (for custom nodes)
68
+ if "tool" in pyproject_data:
69
+ if "uv" in pyproject_data["tool"]:
70
+ uv_config = pyproject_data["tool"]["uv"]
71
+ if "dev-dependencies" in uv_config:
72
+ for dep_str in uv_config["dev-dependencies"]:
73
+ name, version = parse_dependency_string(dep_str)
74
+ if name not in package_occurrences:
75
+ package_occurrences[name] = []
76
+ package_occurrences[name].append(("dev", version))
77
+
78
+ # For each package, determine the effective constraint
79
+ for name, occurrences in package_occurrences.items():
80
+ # Find the most restrictive version constraint
81
+ # Priority: exact pins > bounded constraints > lower bounds > unconstrained
82
+ most_restrictive = None
83
+ all_sources = []
84
+
85
+ for source, version in occurrences:
86
+ all_sources.append(source)
87
+ if most_restrictive is None:
88
+ most_restrictive = version
89
+ elif version is not None:
90
+ # Compare constraints - this is simplified
91
+ # In reality, we'd need proper version parsing
92
+ if most_restrictive is None:
93
+ most_restrictive = version
94
+ elif "==" in version: # Exact pin
95
+ most_restrictive = version
96
+ elif "==" not in most_restrictive:
97
+ # Prefer the more specific constraint
98
+ if len(version) > len(most_restrictive or ""):
99
+ most_restrictive = version
100
+
101
+ deps[name] = {
102
+ "version": most_restrictive,
103
+ "source": ", ".join(set(all_sources)) # Show all sources
104
+ }
105
+
106
+ return deps
107
+
108
+
109
+ def is_meaningful_version_change(old_version: str | None,
110
+ new_version: str | None) -> bool:
111
+ """Determine if a version change is meaningful.
112
+
113
+ Rules:
114
+ - No change if both are None or both are the same
115
+ - No change if one is None and the other is a lower bound (>=)
116
+ since unconstrained effectively means "any version"
117
+ - Change if going from unconstrained to pinned
118
+ - Change if version numbers actually differ
119
+
120
+ Args:
121
+ old_version: Previous version constraint
122
+ new_version: New version constraint
123
+
124
+ Returns:
125
+ True if the change is meaningful
126
+ """
127
+ # Both None or identical - no change
128
+ if old_version == new_version:
129
+ return False
130
+
131
+ # One is None - check if it's effectively the same
132
+ if old_version is None and new_version is not None:
133
+ # Going from unconstrained to constrained
134
+ # Only meaningful if it's not just a lower bound
135
+ if new_version.startswith(">="):
136
+ # Lower bound only - not really a meaningful constraint
137
+ return False
138
+ return True
139
+
140
+ if new_version is None and old_version is not None:
141
+ # Going from constrained to unconstrained
142
+ # Only meaningful if we had a real constraint before
143
+ if old_version.startswith(">="):
144
+ # Was just a lower bound - not meaningful
145
+ return False
146
+ return True
147
+
148
+ # Both have versions - check if they differ meaningfully
149
+ # This is simplified - a full implementation would parse version specs
150
+ return old_version != new_version
151
+
152
+
153
+ def find_most_restrictive_constraint(constraints: list[str]) -> str | None:
154
+ """Find the most restrictive version constraint from a list.
155
+
156
+ Priority: exact pins > upper bounds > ranges > lower bounds > unconstrained
157
+
158
+ Args:
159
+ constraints: List of version constraints
160
+
161
+ Returns:
162
+ The most restrictive constraint or None
163
+ """
164
+ if not constraints:
165
+ return None
166
+
167
+ # Filter out None values
168
+ valid_constraints = [c for c in constraints if c]
169
+ if not valid_constraints:
170
+ return None
171
+
172
+ # Look for exact pins first
173
+ for constraint in valid_constraints:
174
+ if "==" in constraint:
175
+ return constraint
176
+
177
+ # Look for upper bounds or ranges
178
+ for constraint in valid_constraints:
179
+ if "<" in constraint or "," in constraint:
180
+ return constraint
181
+
182
+ # Return any constraint (likely lower bounds)
183
+ return valid_constraints[0]
184
+
185
+
186
+ def compare_dependency_sets(before: dict[str, dict], after: dict[str, dict]) -> dict[str, list]:
187
+ """Compare two sets of dependencies to find changes.
188
+
189
+ Args:
190
+ before: Previous dependencies (from git)
191
+ after: Current dependencies (from file)
192
+
193
+ Returns:
194
+ Dict with 'added', 'removed', and 'updated' lists
195
+ """
196
+ changes = {
197
+ "added": [],
198
+ "removed": [],
199
+ "updated": []
200
+ }
201
+
202
+ all_packages = set(before.keys()) | set(after.keys())
203
+
204
+ for pkg in all_packages:
205
+ if pkg not in before:
206
+ # New package
207
+ changes["added"].append({
208
+ "name": pkg,
209
+ "version": after[pkg].get("version"),
210
+ "source": after[pkg].get("source")
211
+ })
212
+ elif pkg not in after:
213
+ # Removed package
214
+ changes["removed"].append({
215
+ "name": pkg,
216
+ "version": before[pkg].get("version")
217
+ })
218
+ else:
219
+ # Check if there's a meaningful change
220
+ old_version = before[pkg].get("version")
221
+ new_version = after[pkg].get("version")
222
+
223
+ if is_meaningful_version_change(old_version, new_version):
224
+ changes["updated"].append({
225
+ "name": pkg,
226
+ "old_version": old_version,
227
+ "new_version": new_version,
228
+ "source": after[pkg].get("source")
229
+ })
230
+
231
+ return changes
@@ -0,0 +1,216 @@
1
+ """Download and archive extraction utilities."""
2
+
3
+ import tarfile
4
+ import tempfile
5
+ import urllib.request
6
+ import zipfile
7
+ from pathlib import Path
8
+
9
+ from ..logging.logging_config import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ def download_and_extract_archive(url: str, target_path: Path) -> None:
15
+ """Download and extract an archive file with automatic format detection.
16
+
17
+ Args:
18
+ url: URL of the archive to download
19
+ target_path: Directory to extract contents to
20
+
21
+ Raises:
22
+ OSError: If download fails
23
+ ValueError: If archive format is unsupported or corrupted
24
+ """
25
+ temp_file_path = None
26
+ try:
27
+ # Download file first
28
+ temp_file_path = download_file(url)
29
+ # Extract with format detection
30
+ extract_archive(temp_file_path, target_path)
31
+
32
+ except (OSError, ValueError):
33
+ # Re-raise download and extraction errors as-is
34
+ raise
35
+ except Exception as e:
36
+ logger.error(f"Unexpected error during download/extract: {e}")
37
+ raise OSError(f"Unexpected error during download/extract: {e}")
38
+ finally:
39
+ # Always clean up temp file
40
+ if temp_file_path and temp_file_path.exists():
41
+ try:
42
+ temp_file_path.unlink()
43
+ except OSError:
44
+ # Log but don't fail if cleanup fails
45
+ logger.warning(f"Failed to clean up temp file: {temp_file_path}")
46
+
47
+
48
+ def download_file(url: str, suffix: str | None = None) -> Path:
49
+ """Download a file to a temporary location.
50
+
51
+ Args:
52
+ url: URL to download from
53
+ suffix: Optional file suffix for the temp file
54
+
55
+ Returns:
56
+ Path to downloaded file
57
+
58
+ Raises:
59
+ OSError: If download fails
60
+ """
61
+ try:
62
+ if not suffix:
63
+ suffix = Path(url).suffix
64
+
65
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
66
+ logger.info(f"Downloading from {url}")
67
+
68
+ with urllib.request.urlopen(url) as response:
69
+ # Read in chunks for large files
70
+ chunk_size = 8192
71
+ total_size = 0
72
+
73
+ while True:
74
+ chunk = response.read(chunk_size)
75
+ if not chunk:
76
+ break
77
+ tmp_file.write(chunk)
78
+ total_size += len(chunk)
79
+
80
+ tmp_path = Path(tmp_file.name)
81
+ logger.debug(f"Downloaded {total_size / 1024:.1f} KB to {tmp_path}")
82
+ return tmp_path
83
+
84
+ except Exception as e:
85
+ logger.error(f"Download failed: {e}")
86
+ raise OSError(f"Download failed: {e}")
87
+
88
+
89
+ def extract_archive(archive_path: Path, target_path: Path) -> None:
90
+ """Extract an archive with automatic format detection.
91
+
92
+ Tries multiple archive formats until one succeeds:
93
+ - ZIP
94
+ - TAR.GZ (gzipped tar)
95
+ - TAR (plain tar)
96
+ - TAR.BZ2 (bzip2 tar)
97
+
98
+ Args:
99
+ archive_path: Path to archive file
100
+ target_path: Directory to extract to
101
+
102
+ Raises:
103
+ ValueError: If file is not a supported archive format
104
+ OSError: If file system operation fails (permissions, disk space, etc.)
105
+ """
106
+ # Ensure target directory exists
107
+ target_path.mkdir(parents=True, exist_ok=True)
108
+
109
+ # Try different extraction methods
110
+ extractors = [
111
+ (_try_extract_zip, "zip"),
112
+ (_try_extract_tar_gz, "tar.gz"),
113
+ (_try_extract_tar, "tar"),
114
+ (_try_extract_tar_bz2, "tar.bz2"),
115
+ ]
116
+
117
+ extraction_errors = []
118
+
119
+ for extractor, format_name in extractors:
120
+ try:
121
+ extractor(archive_path, target_path)
122
+ logger.info(f"Successfully extracted as {format_name} format")
123
+ return
124
+ except (zipfile.BadZipFile, tarfile.ReadError):
125
+ # Expected errors for wrong format, continue trying
126
+ continue
127
+ except Exception as e:
128
+ # Unexpected errors, collect for reporting
129
+ extraction_errors.append(f"{format_name}: {e}")
130
+
131
+ # If nothing worked, log diagnostic info and raise exception
132
+ _log_extraction_failure(archive_path)
133
+ if extraction_errors:
134
+ # Had OS-level errors during extraction attempts
135
+ error_details = "; ".join(extraction_errors)
136
+ raise OSError(f"Archive extraction failed due to system errors: {error_details}")
137
+ else:
138
+ # No format worked, likely unsupported/corrupted file
139
+ raise ValueError(f"Unsupported or corrupted archive format: {archive_path}")
140
+
141
+
142
+ def _try_extract_zip(archive_path: Path, target_path: Path) -> None:
143
+ """Try to extract as ZIP archive.
144
+
145
+ Raises:
146
+ zipfile.BadZipFile: If not a valid ZIP file
147
+ OSError: If extraction fails
148
+ """
149
+ try:
150
+ with zipfile.ZipFile(archive_path, 'r') as zip_ref:
151
+ zip_ref.extractall(target_path)
152
+ except zipfile.BadZipFile:
153
+ raise
154
+ except Exception as e:
155
+ raise OSError(f"ZIP extraction failed: {e}")
156
+
157
+
158
+ def _try_extract_tar_gz(archive_path: Path, target_path: Path) -> None:
159
+ """Try to extract as gzipped TAR archive.
160
+
161
+ Raises:
162
+ tarfile.ReadError: If not a valid gzipped TAR file
163
+ OSError: If extraction fails
164
+ """
165
+ try:
166
+ with tarfile.open(archive_path, 'r:gz') as tar:
167
+ tar.extractall(target_path)
168
+ except tarfile.ReadError:
169
+ raise
170
+ except Exception as e:
171
+ raise OSError(f"TAR.GZ extraction failed: {e}")
172
+
173
+
174
+ def _try_extract_tar(archive_path: Path, target_path: Path) -> None:
175
+ """Try to extract as plain TAR archive.
176
+
177
+ Raises:
178
+ tarfile.ReadError: If not a valid TAR file
179
+ OSError: If extraction fails
180
+ """
181
+ try:
182
+ with tarfile.open(archive_path, 'r:') as tar:
183
+ tar.extractall(target_path)
184
+ except tarfile.ReadError:
185
+ raise
186
+ except Exception as e:
187
+ raise OSError(f"TAR extraction failed: {e}")
188
+
189
+
190
+ def _try_extract_tar_bz2(archive_path: Path, target_path: Path) -> None:
191
+ """Try to extract as bzip2 TAR archive.
192
+
193
+ Raises:
194
+ tarfile.ReadError: If not a valid bzip2 TAR file
195
+ OSError: If extraction fails
196
+ """
197
+ try:
198
+ with tarfile.open(archive_path, 'r:bz2') as tar:
199
+ tar.extractall(target_path)
200
+ except tarfile.ReadError:
201
+ raise
202
+ except Exception as e:
203
+ raise OSError(f"TAR.BZ2 extraction failed: {e}")
204
+
205
+
206
+ def _log_extraction_failure(archive_path: Path) -> None:
207
+ """Log diagnostic information when extraction fails."""
208
+ logger.error(f"Unable to extract archive: {archive_path}")
209
+
210
+ # Read first few bytes for debugging
211
+ try:
212
+ with open(archive_path, 'rb') as f:
213
+ header = f.read(32)
214
+ logger.debug(f"File header (first 32 bytes): {header}")
215
+ except Exception:
216
+ pass