clang-tool-chain 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of clang-tool-chain might be problematic. Click here for more details.

@@ -0,0 +1,1325 @@
1
+ """
2
+ Toolchain downloader module.
3
+
4
+ Handles downloading and installing the LLVM/Clang toolchain binaries
5
+ from the manifest-based distribution system.
6
+ """
7
+
8
+ import contextlib
9
+ import hashlib
10
+ import json
11
+ import logging
12
+ import os
13
+ import shutil
14
+ import sys
15
+ import tarfile
16
+ import tempfile
17
+ from dataclasses import dataclass
18
+ from pathlib import Path
19
+ from typing import Any, TypeVar
20
+ from urllib.request import Request, urlopen
21
+
22
+ import fasteners
23
+ import pyzstd
24
+
25
+ # Configure logging for GitHub Actions and general debugging
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
29
+ handlers=[logging.StreamHandler(sys.stderr)],
30
+ )
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # Base URL for manifest and downloads
34
+ MANIFEST_BASE_URL = "https://raw.githubusercontent.com/zackees/clang-tool-chain-bins/main/assets/clang"
35
+ IWYU_MANIFEST_BASE_URL = "https://raw.githubusercontent.com/zackees/clang-tool-chain-bins/main/assets/iwyu"
36
+ # MinGW sysroot manifests are also in the bins repository for consistency
37
+ MINGW_MANIFEST_BASE_URL = "https://raw.githubusercontent.com/zackees/clang-tool-chain-bins/main/assets/mingw"
38
+
39
+ # Generic type variable for JSON deserialization
40
+ T = TypeVar("T")
41
+
42
+
43
+ # ============================================================================
44
+ # Custom Exceptions
45
+ # ============================================================================
46
+
47
+
48
+ class ToolchainInfrastructureError(Exception):
49
+ """
50
+ Raised when toolchain infrastructure is broken (404, network errors, etc).
51
+
52
+ This exception indicates a problem with the package's distribution infrastructure
53
+ that should cause tests to FAIL rather than skip. Examples:
54
+ - Manifest files return 404
55
+ - Download URLs are broken
56
+ - Network errors accessing expected resources
57
+ """
58
+
59
+ pass
60
+
61
+
62
+ @dataclass
63
+ class ArchitectureEntry:
64
+ """Represents an architecture entry in the root manifest."""
65
+
66
+ arch: str
67
+ manifest_path: str
68
+
69
+
70
+ @dataclass
71
+ class PlatformEntry:
72
+ """Represents a platform entry in the root manifest."""
73
+
74
+ platform: str
75
+ architectures: list[ArchitectureEntry]
76
+
77
+
78
+ @dataclass
79
+ class RootManifest:
80
+ """Represents the root manifest structure."""
81
+
82
+ platforms: list[PlatformEntry]
83
+
84
+
85
+ @dataclass
86
+ class VersionInfo:
87
+ """Represents version information in a platform manifest."""
88
+
89
+ version: str
90
+ href: str
91
+ sha256: str
92
+
93
+
94
+ @dataclass
95
+ class Manifest:
96
+ """Represents a platform-specific manifest structure."""
97
+
98
+ latest: str
99
+ versions: dict[str, VersionInfo]
100
+
101
+
102
+ def _parse_root_manifest(data: dict[str, Any]) -> RootManifest:
103
+ """
104
+ Parse raw JSON data into a RootManifest dataclass.
105
+
106
+ Args:
107
+ data: Raw JSON dictionary
108
+
109
+ Returns:
110
+ Parsed RootManifest object
111
+ """
112
+ platforms = []
113
+ for platform_data in data.get("platforms", []):
114
+ architectures = []
115
+ for arch_data in platform_data.get("architectures", []):
116
+ architectures.append(ArchitectureEntry(arch=arch_data["arch"], manifest_path=arch_data["manifest_path"]))
117
+ platforms.append(PlatformEntry(platform=platform_data["platform"], architectures=architectures))
118
+ return RootManifest(platforms=platforms)
119
+
120
+
121
+ def _parse_manifest(data: dict[str, Any]) -> Manifest:
122
+ """
123
+ Parse raw JSON data into a Manifest dataclass.
124
+
125
+ Args:
126
+ data: Raw JSON dictionary
127
+
128
+ Returns:
129
+ Parsed Manifest object
130
+ """
131
+ latest = data.get("latest", "")
132
+ versions = {}
133
+
134
+ # Check if versions are nested under a "versions" key
135
+ if "versions" in data and isinstance(data["versions"], dict):
136
+ # Parse nested versions structure
137
+ for key, value in data["versions"].items():
138
+ if isinstance(value, dict) and "href" in value and "sha256" in value:
139
+ versions[key] = VersionInfo(version=key, href=value["href"], sha256=value["sha256"])
140
+ else:
141
+ # Parse flat structure (all non-"latest" keys are version entries)
142
+ for key, value in data.items():
143
+ if key != "latest" and isinstance(value, dict) and "href" in value and "sha256" in value:
144
+ versions[key] = VersionInfo(version=key, href=value["href"], sha256=value["sha256"])
145
+
146
+ return Manifest(latest=latest, versions=versions)
147
+
148
+
149
+ def get_home_toolchain_dir() -> Path:
150
+ """
151
+ Get the home directory for clang-tool-chain downloads.
152
+
153
+ Can be overridden with CLANG_TOOL_CHAIN_DOWNLOAD_PATH environment variable.
154
+
155
+ Returns:
156
+ Path to ~/.clang-tool-chain or the path specified by the environment variable
157
+ """
158
+ # Check for environment variable override
159
+ env_path = os.environ.get("CLANG_TOOL_CHAIN_DOWNLOAD_PATH")
160
+ if env_path:
161
+ return Path(env_path)
162
+
163
+ # Default to ~/.clang-tool-chain
164
+ home = Path.home()
165
+ toolchain_dir = home / ".clang-tool-chain"
166
+ return toolchain_dir
167
+
168
+
169
+ def _robust_rmtree(path: Path, max_retries: int = 3) -> None:
170
+ """
171
+ Remove a directory tree robustly, handling Windows file permission issues.
172
+
173
+ On Windows, files can sometimes be locked or have permission issues that prevent
174
+ immediate deletion. This function handles those cases by:
175
+ 1. Making files writable before deletion (Windows readonly flag)
176
+ 2. Retrying with a delay if deletion fails
177
+ 3. Using ignore_errors as a last resort
178
+
179
+ Args:
180
+ path: Path to the directory to remove
181
+ max_retries: Maximum number of retry attempts (default: 3)
182
+ """
183
+ if not path.exists():
184
+ return
185
+
186
+ def handle_remove_readonly(func: Any, path_str: str, exc: Any) -> None:
187
+ """Error handler to remove readonly flag and retry."""
188
+ import stat
189
+
190
+ # Make the file writable and try again
191
+ os.chmod(path_str, stat.S_IWRITE)
192
+ func(path_str)
193
+
194
+ # Try removing with readonly handler
195
+ try:
196
+ shutil.rmtree(path, onerror=handle_remove_readonly)
197
+ except Exception as e:
198
+ logger.warning(f"Failed to remove {path} on first attempt: {e}")
199
+ # If that fails, try with ignore_errors as last resort
200
+ if max_retries > 0:
201
+ import time
202
+
203
+ time.sleep(0.5) # Wait briefly for file handles to close
204
+ try:
205
+ shutil.rmtree(path, ignore_errors=False, onerror=handle_remove_readonly)
206
+ except Exception as e2:
207
+ logger.warning(f"Failed to remove {path} on retry: {e2}")
208
+ # Last resort: ignore all errors
209
+ shutil.rmtree(path, ignore_errors=True)
210
+
211
+
212
+ def get_lock_path(platform: str, arch: str) -> Path:
213
+ """
214
+ Get the lock file path for a specific platform/arch combination.
215
+
216
+ Args:
217
+ platform: Platform name (e.g., "win", "linux", "darwin")
218
+ arch: Architecture name (e.g., "x86_64", "arm64")
219
+
220
+ Returns:
221
+ Path to the lock file
222
+ """
223
+ toolchain_dir = get_home_toolchain_dir()
224
+ toolchain_dir.mkdir(parents=True, exist_ok=True)
225
+ lock_path = toolchain_dir / f"{platform}-{arch}.lock"
226
+ return lock_path
227
+
228
+
229
+ def get_install_dir(platform: str, arch: str) -> Path:
230
+ """
231
+ Get the installation directory for a specific platform/arch.
232
+
233
+ Args:
234
+ platform: Platform name (e.g., "win", "linux", "darwin")
235
+ arch: Architecture name (e.g., "x86_64", "arm64")
236
+
237
+ Returns:
238
+ Path to the installation directory
239
+ """
240
+ toolchain_dir = get_home_toolchain_dir()
241
+ install_dir = toolchain_dir / "clang" / platform / arch
242
+ return install_dir
243
+
244
+
245
+ def _fetch_json_raw(url: str) -> dict[str, Any]:
246
+ """
247
+ Fetch and parse JSON from a URL.
248
+
249
+ Args:
250
+ url: URL to fetch
251
+
252
+ Returns:
253
+ Parsed JSON as a dictionary
254
+
255
+ Raises:
256
+ ToolchainInfrastructureError: If fetching or parsing fails
257
+ """
258
+ logger.info(f"Fetching JSON from: {url}")
259
+ try:
260
+ req = Request(url, headers={"User-Agent": "clang-tool-chain"})
261
+ with urlopen(req, timeout=30) as response:
262
+ data = response.read()
263
+ logger.debug(f"Received {len(data)} bytes from {url}")
264
+ result: dict[str, Any] = json.loads(data.decode("utf-8"))
265
+ logger.info(f"Successfully fetched and parsed JSON from {url}")
266
+ return result
267
+ except Exception as e:
268
+ logger.error(f"Failed to fetch JSON from {url}: {e}")
269
+ raise ToolchainInfrastructureError(f"Failed to fetch JSON from {url}: {e}") from e
270
+
271
+
272
+ def fetch_root_manifest() -> RootManifest:
273
+ """
274
+ Fetch the root manifest file.
275
+
276
+ Returns:
277
+ Root manifest as a RootManifest object
278
+ """
279
+ logger.info("Fetching root manifest")
280
+ url = f"{MANIFEST_BASE_URL}/manifest.json"
281
+ data = _fetch_json_raw(url)
282
+ manifest = _parse_root_manifest(data)
283
+ logger.info(f"Root manifest loaded with {len(manifest.platforms)} platforms")
284
+ return manifest
285
+
286
+
287
+ def fetch_platform_manifest(platform: str, arch: str) -> Manifest:
288
+ """
289
+ Fetch the platform-specific manifest file.
290
+
291
+ Args:
292
+ platform: Platform name (e.g., "win", "linux", "darwin")
293
+ arch: Architecture name (e.g., "x86_64", "arm64")
294
+
295
+ Returns:
296
+ Platform manifest as a Manifest object
297
+
298
+ Raises:
299
+ RuntimeError: If platform/arch combination is not found
300
+ """
301
+ logger.info(f"Fetching platform manifest for {platform}/{arch}")
302
+ root_manifest = fetch_root_manifest()
303
+
304
+ # Find the platform in the manifest
305
+ for plat_entry in root_manifest.platforms:
306
+ if plat_entry.platform == platform:
307
+ # Find the architecture
308
+ for arch_entry in plat_entry.architectures:
309
+ if arch_entry.arch == arch:
310
+ manifest_path = arch_entry.manifest_path
311
+ logger.info(f"Found manifest path: {manifest_path}")
312
+ url = f"{MANIFEST_BASE_URL}/{manifest_path}"
313
+ data = _fetch_json_raw(url)
314
+ manifest = _parse_manifest(data)
315
+ logger.info(f"Platform manifest loaded successfully for {platform}/{arch}")
316
+ return manifest
317
+
318
+ logger.error(f"Platform {platform}/{arch} not found in manifest")
319
+ raise RuntimeError(f"Platform {platform}/{arch} not found in manifest")
320
+
321
+
322
+ def verify_checksum(file_path: Path, expected_sha256: str) -> bool:
323
+ """
324
+ Verify the SHA256 checksum of a file.
325
+
326
+ Args:
327
+ file_path: Path to the file to verify
328
+ expected_sha256: Expected SHA256 hash (hex string)
329
+
330
+ Returns:
331
+ True if checksum matches, False otherwise
332
+ """
333
+ logger.info(f"Verifying checksum for {file_path}")
334
+ logger.debug(f"Expected SHA256: {expected_sha256}")
335
+ sha256_hash = hashlib.sha256()
336
+ with open(file_path, "rb") as f:
337
+ # Read in chunks to handle large files
338
+ for chunk in iter(lambda: f.read(8192), b""):
339
+ sha256_hash.update(chunk)
340
+
341
+ actual_hash = sha256_hash.hexdigest()
342
+ logger.debug(f"Actual SHA256: {actual_hash}")
343
+ matches = actual_hash.lower() == expected_sha256.lower()
344
+ if matches:
345
+ logger.info("Checksum verification passed")
346
+ else:
347
+ logger.error(f"Checksum verification failed! Expected: {expected_sha256}, Got: {actual_hash}")
348
+ return matches
349
+
350
+
351
+ def download_file(url: str, dest_path: Path, expected_sha256: str | None = None) -> None:
352
+ """
353
+ Download a file from a URL to a destination path.
354
+
355
+ Args:
356
+ url: URL to download from
357
+ dest_path: Path to save the file
358
+ expected_sha256: Optional SHA256 checksum to verify
359
+
360
+ Raises:
361
+ ToolchainInfrastructureError: If download fails or checksum doesn't match
362
+ """
363
+ logger.info(f"Downloading file from {url}")
364
+ logger.info(f"Destination: {dest_path}")
365
+ try:
366
+ req = Request(url, headers={"User-Agent": "clang-tool-chain"})
367
+ with urlopen(req, timeout=300) as response:
368
+ content_length = response.getheader("Content-Length")
369
+ if content_length:
370
+ logger.info(f"Download size: {int(content_length) / (1024*1024):.2f} MB")
371
+
372
+ # Create parent directory if it doesn't exist
373
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
374
+
375
+ # Download to temporary file first
376
+ with tempfile.NamedTemporaryFile(delete=False, dir=dest_path.parent) as tmp_file:
377
+ tmp_path = Path(tmp_file.name)
378
+ logger.debug(f"Downloading to temporary file: {tmp_path}")
379
+ shutil.copyfileobj(response, tmp_file)
380
+ logger.info(f"Download complete: {tmp_path.stat().st_size / (1024*1024):.2f} MB")
381
+
382
+ # Verify checksum if provided
383
+ if expected_sha256 and not verify_checksum(tmp_path, expected_sha256):
384
+ tmp_path.unlink()
385
+ raise ToolchainInfrastructureError(f"Checksum verification failed for {url}")
386
+
387
+ # Move to final destination
388
+ logger.debug(f"Moving {tmp_path} to {dest_path}")
389
+ tmp_path.replace(dest_path)
390
+ logger.info(f"File downloaded successfully to {dest_path}")
391
+
392
+ except ToolchainInfrastructureError:
393
+ # Re-raise infrastructure errors as-is
394
+ raise
395
+ except Exception as e:
396
+ logger.error(f"Download failed: {e}")
397
+ # Clean up temporary file if it exists
398
+ if "tmp_path" in locals():
399
+ tmp_path = locals()["tmp_path"]
400
+ if tmp_path.exists():
401
+ tmp_path.unlink()
402
+ raise ToolchainInfrastructureError(f"Failed to download {url}: {e}") from e
403
+
404
+
405
+ def fix_file_permissions(install_dir: Path) -> None:
406
+ """
407
+ Fix file permissions after extraction to ensure binaries and shared libraries are executable.
408
+
409
+ This function sets correct permissions on Unix/Linux systems:
410
+ - Binaries in bin/ directories: 0o755 (rwxr-xr-x)
411
+ - Shared libraries (.so, .dylib): 0o755 (rwxr-xr-x)
412
+ - Headers, text files, static libs: 0o644 (rw-r--r--)
413
+
414
+ On Windows, this is a no-op as permissions work differently.
415
+
416
+ Args:
417
+ install_dir: Installation directory to fix permissions in
418
+ """
419
+ import os
420
+ import platform
421
+
422
+ logger.info(f"Fixing file permissions in {install_dir}")
423
+
424
+ # Only fix permissions on Unix-like systems (Linux, macOS)
425
+ if platform.system() == "Windows":
426
+ logger.debug("Skipping permission fix on Windows")
427
+ return
428
+
429
+ # Fix permissions for files in bin/ directory
430
+ bin_dir = install_dir / "bin"
431
+ if bin_dir.exists() and bin_dir.is_dir():
432
+ for binary_file in bin_dir.iterdir():
433
+ if binary_file.is_file():
434
+ # Set executable permissions for all binaries
435
+ binary_file.chmod(0o755)
436
+
437
+ # Fix permissions for files in lib/ directory
438
+ lib_dir = install_dir / "lib"
439
+ if lib_dir.exists() and lib_dir.is_dir():
440
+ for file_path in lib_dir.rglob("*"):
441
+ if not file_path.is_file():
442
+ continue
443
+
444
+ # Headers, text files, and static libraries should be readable but not executable
445
+ if file_path.suffix in {".h", ".inc", ".modulemap", ".tcc", ".txt", ".a", ".syms"}:
446
+ file_path.chmod(0o644)
447
+
448
+ # Shared libraries need executable permissions
449
+ elif (
450
+ file_path.suffix in {".so", ".dylib"}
451
+ or ".so." in file_path.name
452
+ or "/bin/" in str(file_path)
453
+ and file_path.suffix not in {".h", ".inc", ".txt", ".a", ".so", ".dylib"}
454
+ ):
455
+ file_path.chmod(0o755)
456
+
457
+ # Force filesystem sync to ensure all permission changes are committed
458
+ # This prevents "Text file busy" errors when another thread tries to execute
459
+ # binaries immediately after this function returns
460
+ if bin_dir and bin_dir.exists():
461
+ # Sync the bin directory to ensure all changes are written
462
+ fd = os.open(str(bin_dir), os.O_RDONLY)
463
+ try:
464
+ os.fsync(fd)
465
+ finally:
466
+ os.close(fd)
467
+
468
+
469
+ def _try_system_tar(tar_file: Path, extract_dir: Path) -> bool:
470
+ """
471
+ Try to use system tar command for extraction.
472
+
473
+ Returns:
474
+ True if extraction succeeded, False if tar is not available or extraction failed
475
+ """
476
+ import subprocess
477
+
478
+ # Check if tar is available
479
+ try:
480
+ result = subprocess.run(["tar", "--version"], capture_output=True, timeout=5)
481
+ if result.returncode != 0:
482
+ logger.debug("System tar not available")
483
+ return False
484
+ logger.info(f"System tar available: {result.stdout.decode()[:100]}")
485
+ except (FileNotFoundError, subprocess.TimeoutExpired, Exception) as e:
486
+ logger.debug(f"System tar not available: {e}")
487
+ return False
488
+
489
+ # Try to extract using system tar
490
+ try:
491
+ logger.info(f"Using system tar to extract {tar_file}")
492
+ result = subprocess.run(
493
+ ["tar", "-xf", str(tar_file), "-C", str(extract_dir)], capture_output=True, timeout=300, check=True
494
+ )
495
+ logger.info("System tar extraction completed successfully")
496
+ return True
497
+ except subprocess.CalledProcessError as e:
498
+ logger.warning(f"System tar extraction failed: {e.stderr.decode()[:500]}")
499
+ return False
500
+ except Exception as e:
501
+ logger.warning(f"System tar extraction failed: {e}")
502
+ return False
503
+
504
+
505
+ def extract_tarball(archive_path: Path, dest_dir: Path) -> None:
506
+ """
507
+ Extract a tar.zst archive to a destination directory.
508
+
509
+ Args:
510
+ archive_path: Path to the archive file
511
+ dest_dir: Directory to extract to
512
+
513
+ Raises:
514
+ RuntimeError: If extraction fails
515
+ """
516
+ logger.info(f"Extracting archive {archive_path} to {dest_dir}")
517
+ try:
518
+ # Decompress zstd to temporary tar file
519
+ temp_tar = archive_path.with_suffix("") # Remove .zst extension
520
+ logger.debug(f"Decompressing zstd archive to {temp_tar}")
521
+
522
+ # Decompress with pyzstd
523
+ with open(archive_path, "rb") as compressed, open(temp_tar, "wb") as decompressed:
524
+ compressed_data = compressed.read()
525
+ logger.info(f"Decompressing {len(compressed_data) / (1024*1024):.2f} MB")
526
+ decompressed.write(pyzstd.decompress(compressed_data))
527
+ logger.info(f"Decompression complete: {temp_tar.stat().st_size / (1024*1024):.2f} MB")
528
+
529
+ # DEBUG: Verify tar file immediately after decompression
530
+ logger.debug(f"Verifying decompressed tar file: {temp_tar}")
531
+ try:
532
+ with tarfile.open(temp_tar, "r") as verify_tar:
533
+ verify_members = verify_tar.getmembers()
534
+ logger.info(f"Decompressed tar has {len(verify_members)} members")
535
+ verify_top = set()
536
+ for m in verify_members[:100]: # Check first 100 members
537
+ parts = m.name.split("/")
538
+ if parts:
539
+ verify_top.add(parts[0])
540
+ logger.info(f"Sample top-level dirs from tar: {sorted(verify_top)}")
541
+ except Exception as e:
542
+ logger.warning(f"Could not verify tar file: {e}")
543
+
544
+ # Remove dest_dir if it exists to ensure clean extraction
545
+ if dest_dir.exists():
546
+ logger.debug(f"Removing existing destination: {dest_dir}")
547
+ _robust_rmtree(dest_dir)
548
+
549
+ # Create parent directory for extraction
550
+ dest_dir.parent.mkdir(parents=True, exist_ok=True)
551
+
552
+ try:
553
+ # MinGW archives must use Python tarfile (system tar has issues with multi-root structure)
554
+ is_mingw_archive = "mingw-sysroot" in archive_path.name
555
+ use_python_tar = is_mingw_archive
556
+
557
+ # Try system tar first (more reliable on Linux/macOS) unless forced to use Python
558
+ if not use_python_tar and _try_system_tar(temp_tar, dest_dir.parent):
559
+ logger.info("Extraction successful using system tar")
560
+ else:
561
+ # Use Python tarfile
562
+ if use_python_tar:
563
+ logger.info("Using Python tarfile for MinGW archive (system tar has multi-root issues)")
564
+ else:
565
+ logger.info("Extracting tar archive using Python tarfile")
566
+
567
+ with tarfile.open(temp_tar, "r") as tar:
568
+ # For MinGW archives, extract to a temporary directory first
569
+ # (workaround for mysterious tar.extractall() bug where lib/ directory is lost)
570
+ if is_mingw_archive:
571
+ import tempfile
572
+
573
+ logger.info("Extracting MinGW archive to temp location first (workaround for extraction bug)")
574
+
575
+ # Sanity check: verify tar file has all expected top-level directories
576
+ members = tar.getmembers()
577
+ logger.info(f"Tar file has {len(members)} members total")
578
+ top_level_dirs = set()
579
+ for m in members:
580
+ parts = m.name.split("/")
581
+ if parts:
582
+ top_level_dirs.add(parts[0])
583
+ logger.info(f"Top-level directories in tar: {sorted(top_level_dirs)}")
584
+
585
+ with tempfile.TemporaryDirectory() as temp_extract:
586
+ temp_extract_path = Path(temp_extract)
587
+ logger.debug(f"Temp extraction dir: {temp_extract_path}")
588
+
589
+ import sys
590
+
591
+ if sys.version_info >= (3, 12):
592
+ tar.extractall(temp_extract_path, filter="tar")
593
+ else:
594
+ tar.extractall(temp_extract_path)
595
+
596
+ # Verify all expected directories are present
597
+ extracted = list(temp_extract_path.iterdir())
598
+ logger.info(f"Extracted {len(extracted)} items to temp: {[e.name for e in extracted]}")
599
+
600
+ # Move to final location
601
+ dest_dir.parent.mkdir(parents=True, exist_ok=True)
602
+ for item in extracted:
603
+ target = dest_dir.parent / item.name
604
+ logger.info(f"Moving {item.name} from temp to {target}")
605
+ shutil.move(str(item), str(target))
606
+ else:
607
+ # Regular extraction for non-MinGW archives
608
+ import sys
609
+
610
+ if sys.version_info >= (3, 12):
611
+ tar.extractall(dest_dir.parent, filter="tar")
612
+ else:
613
+ tar.extractall(dest_dir.parent)
614
+
615
+ logger.info("Python tarfile extraction complete")
616
+
617
+ # DEBUG: Check what was actually extracted
618
+ if is_mingw_archive:
619
+ extracted_check = list(dest_dir.parent.iterdir())
620
+ logger.info(
621
+ f"Post-extraction check: {len(extracted_check)} items in {dest_dir.parent}: "
622
+ f"{[item.name for item in extracted_check]}"
623
+ )
624
+
625
+ # The archive should extract to a single directory with the expected name
626
+ # If it doesn't match dest_dir name, rename it
627
+ if not dest_dir.exists():
628
+ # Look for what was extracted in the parent directory
629
+ extracted_items = list(dest_dir.parent.iterdir())
630
+ extracted_dirs = [d for d in extracted_items if d.is_dir()]
631
+ extracted_files = [f for f in extracted_items if f.is_file() and f.name != "done.txt"]
632
+
633
+ logger.debug(
634
+ f"Found {len(extracted_dirs)} directories and {len(extracted_files)} files in {dest_dir.parent}: "
635
+ f"dirs={[d.name for d in extracted_dirs]}, files={[f.name for f in extracted_files[:5]]}"
636
+ )
637
+
638
+ # Special case: MinGW sysroot archives have intentional multi-root structure
639
+ # They contain: x86_64-w64-mingw32/, include/, lib/
640
+ # This structure should be preserved as-is
641
+ is_mingw_archive = "mingw-sysroot" in archive_path.name
642
+
643
+ # Case 1: Archive extracted to a single top-level directory (e.g., clang archives)
644
+ # Filter out dest_dir itself in case it was already created
645
+ candidates = [d for d in extracted_dirs if d != dest_dir]
646
+ if len(candidates) == 1 and len(extracted_files) == 0:
647
+ actual_dir = candidates[0]
648
+ logger.info(f"Renaming extracted directory {actual_dir} to {dest_dir}")
649
+ shutil.move(str(actual_dir), str(dest_dir))
650
+ # Case 2: Archive has flat structure with bin/, share/, etc. (e.g., IWYU archives)
651
+ # Also handles MinGW archives which have multi-root structure that must be preserved
652
+ elif extracted_dirs or extracted_files:
653
+ if is_mingw_archive:
654
+ logger.info(f"MinGW archive detected, moving multi-root structure into {dest_dir}")
655
+ logger.info(f"Found {len(extracted_dirs)} directories and {len(extracted_files)} files to move")
656
+ else:
657
+ logger.info(f"Archive has flat structure, moving contents into {dest_dir}")
658
+ dest_dir.mkdir(parents=True, exist_ok=True)
659
+ for item in extracted_items:
660
+ if item.is_dir() or (item.is_file() and item.name != "done.txt"):
661
+ target = dest_dir / item.name
662
+ logger.info(f"Moving {item.name} to {target}")
663
+ shutil.move(str(item), str(target))
664
+ else:
665
+ logger.warning(f"No extracted content found to move to {dest_dir}")
666
+
667
+ logger.info(f"Successfully extracted to {dest_dir}")
668
+
669
+ finally:
670
+ # Clean up temporary tar file
671
+ if temp_tar.exists():
672
+ logger.debug(f"Cleaning up temporary tar file: {temp_tar}")
673
+ temp_tar.unlink()
674
+
675
+ except Exception as e:
676
+ logger.error(f"Extraction failed: {e}")
677
+ raise RuntimeError(f"Failed to extract {archive_path}: {e}") from e
678
+
679
+
680
+ def get_latest_version_info(platform_manifest: Manifest) -> tuple[str, str, str]:
681
+ """
682
+ Get the latest version information from a platform manifest.
683
+
684
+ Args:
685
+ platform_manifest: Platform-specific manifest object
686
+
687
+ Returns:
688
+ Tuple of (version, download_url, sha256)
689
+
690
+ Raises:
691
+ RuntimeError: If manifest is invalid or missing required fields
692
+ """
693
+ latest_version = platform_manifest.latest
694
+ if not latest_version:
695
+ raise RuntimeError("Manifest does not specify a 'latest' version")
696
+
697
+ version_info = platform_manifest.versions.get(latest_version)
698
+ if not version_info:
699
+ raise RuntimeError(f"Version {latest_version} not found in manifest")
700
+
701
+ download_url = version_info.href
702
+ sha256 = version_info.sha256
703
+
704
+ if not download_url:
705
+ raise RuntimeError(f"No download URL for version {latest_version}")
706
+
707
+ return latest_version, download_url, sha256
708
+
709
+
710
+ def is_toolchain_installed(platform: str, arch: str) -> bool:
711
+ """
712
+ Check if the toolchain is already installed for the given platform/arch.
713
+
714
+ This checks for the presence of a done.txt file which is created after
715
+ successful download and extraction.
716
+
717
+ Args:
718
+ platform: Platform name (e.g., "win", "linux", "darwin")
719
+ arch: Architecture name (e.g., "x86_64", "arm64")
720
+
721
+ Returns:
722
+ True if installed, False otherwise
723
+ """
724
+ install_dir = get_install_dir(platform, arch)
725
+ done_file = install_dir / "done.txt"
726
+ return done_file.exists()
727
+
728
+
729
+ def download_and_install_toolchain(platform: str, arch: str, verbose: bool = False) -> None:
730
+ """
731
+ Download and install the toolchain for the given platform/arch.
732
+
733
+ This function:
734
+ 1. Fetches the root manifest
735
+ 2. Fetches the platform-specific manifest
736
+ 3. Downloads the latest toolchain archive
737
+ 4. Verifies the checksum
738
+ 5. Extracts to ~/.clang-tool-chain/clang/<platform>/<arch>
739
+
740
+ Args:
741
+ platform: Platform name (e.g., "win", "linux", "darwin")
742
+ arch: Architecture name (e.g., "x86_64", "arm64")
743
+ verbose: If True, print progress messages
744
+
745
+ Raises:
746
+ RuntimeError: If download or installation fails
747
+ """
748
+ if verbose:
749
+ print(f"Downloading clang-tool-chain for {platform}/{arch}...")
750
+
751
+ # Fetch platform manifest
752
+ platform_manifest = fetch_platform_manifest(platform, arch)
753
+
754
+ # Get latest version info
755
+ version, download_url, sha256 = get_latest_version_info(platform_manifest)
756
+
757
+ if verbose:
758
+ print(f"Latest version: {version}")
759
+ print(f"Download URL: {download_url}")
760
+
761
+ # Download archive to a temporary file
762
+ # Use tempfile to avoid conflicts with test cleanup that removes temp directories
763
+ # Create temporary file for download
764
+ with tempfile.NamedTemporaryFile(mode="wb", suffix=".tar.zst", delete=False) as tmp:
765
+ archive_path = Path(tmp.name)
766
+
767
+ try:
768
+ if verbose:
769
+ print(f"Downloading to {archive_path}...")
770
+
771
+ download_file(download_url, archive_path, sha256)
772
+
773
+ if verbose:
774
+ print("Download complete. Verifying checksum...")
775
+
776
+ # Extract to installation directory
777
+ install_dir = get_install_dir(platform, arch)
778
+
779
+ if verbose:
780
+ print(f"Extracting to {install_dir}...")
781
+
782
+ # Remove old installation if it exists (BEFORE extraction)
783
+ if install_dir.exists():
784
+ _robust_rmtree(install_dir)
785
+
786
+ # Ensure parent directory exists
787
+ install_dir.parent.mkdir(parents=True, exist_ok=True)
788
+
789
+ extract_tarball(archive_path, install_dir)
790
+
791
+ # Fix file permissions (set executable bits on binaries and shared libraries)
792
+ if verbose:
793
+ print("Fixing file permissions...")
794
+
795
+ fix_file_permissions(install_dir)
796
+
797
+ # On Linux, copy clang++ to clang for convenience
798
+ if platform == "linux":
799
+ bin_dir = install_dir / "bin"
800
+ clang_cpp = bin_dir / "clang++"
801
+ clang = bin_dir / "clang"
802
+ if clang_cpp.exists() and not clang.exists():
803
+ if verbose:
804
+ print("Copying clang++ to clang on Linux...")
805
+ shutil.copy2(clang_cpp, clang)
806
+
807
+ # Force filesystem sync to ensure all extracted files are fully written to disk
808
+ # This prevents "Text file busy" errors when another thread/process tries to
809
+ # execute the binaries immediately after we release the lock and see done.txt
810
+ import platform as plat
811
+
812
+ if plat.system() != "Windows" and hasattr(os, "sync"):
813
+ # On Unix systems, call sync() to flush all filesystem buffers
814
+ # This ensures that all extracted binaries are fully written to disk
815
+ # before we write done.txt and release the lock
816
+ # If sync fails, continue anyway - better to have a rare race condition
817
+ # than to fail the installation entirely
818
+ with contextlib.suppress(Exception):
819
+ if hasattr(os, "sync"):
820
+ os.sync() # type: ignore[attr-defined]
821
+
822
+ # Write done.txt to mark successful installation
823
+ # Ensure install_dir exists before writing done.txt
824
+ install_dir.mkdir(parents=True, exist_ok=True)
825
+ done_file = install_dir / "done.txt"
826
+ done_file.write_text(f"Installation completed successfully\nVersion: {version}\n")
827
+
828
+ finally:
829
+ # Clean up downloaded archive
830
+ if archive_path.exists():
831
+ archive_path.unlink()
832
+
833
+ if verbose:
834
+ print("Installation complete!")
835
+
836
+
837
+ def ensure_toolchain(platform: str, arch: str) -> None:
838
+ """
839
+ Ensure the toolchain is installed for the given platform/arch.
840
+
841
+ This function uses file locking to prevent concurrent downloads.
842
+ If the toolchain is not installed, it will be downloaded and installed.
843
+
844
+ Args:
845
+ platform: Platform name (e.g., "win", "linux", "darwin")
846
+ arch: Architecture name (e.g., "x86_64", "arm64")
847
+ """
848
+ logger.info(f"Ensuring toolchain is installed for {platform}/{arch}")
849
+
850
+ # Quick check without lock - if already installed, return immediately
851
+ if is_toolchain_installed(platform, arch):
852
+ logger.info(f"Toolchain already installed for {platform}/{arch}")
853
+ return
854
+
855
+ # Need to download - acquire lock
856
+ logger.info(f"Toolchain not installed, acquiring lock for {platform}/{arch}")
857
+ lock_path = get_lock_path(platform, arch)
858
+ logger.debug(f"Lock path: {lock_path}")
859
+ lock = fasteners.InterProcessLock(str(lock_path))
860
+
861
+ logger.info("Waiting to acquire installation lock...")
862
+ with lock:
863
+ logger.info("Lock acquired")
864
+
865
+ # Check again inside lock in case another process just finished installing
866
+ if is_toolchain_installed(platform, arch):
867
+ logger.info("Another process installed the toolchain while we waited")
868
+ return
869
+
870
+ # Download and install
871
+ logger.info("Starting toolchain download and installation")
872
+ download_and_install_toolchain(platform, arch)
873
+ logger.info(f"Toolchain installation complete for {platform}/{arch}")
874
+
875
+
876
+ # ============================================================================
877
+ # IWYU (Include What You Use) Support
878
+ # ============================================================================
879
+
880
+
881
+ def fetch_iwyu_root_manifest() -> RootManifest:
882
+ """
883
+ Fetch the IWYU root manifest file.
884
+
885
+ Returns:
886
+ Root manifest as a RootManifest object
887
+ """
888
+ logger.info("Fetching IWYU root manifest")
889
+ url = f"{IWYU_MANIFEST_BASE_URL}/manifest.json"
890
+ data = _fetch_json_raw(url)
891
+ manifest = _parse_root_manifest(data)
892
+ logger.info(f"IWYU root manifest loaded with {len(manifest.platforms)} platforms")
893
+ return manifest
894
+
895
+
896
+ def fetch_iwyu_platform_manifest(platform: str, arch: str) -> Manifest:
897
+ """
898
+ Fetch the IWYU platform-specific manifest file.
899
+
900
+ Args:
901
+ platform: Platform name (e.g., "win", "linux", "darwin")
902
+ arch: Architecture name (e.g., "x86_64", "arm64")
903
+
904
+ Returns:
905
+ Platform manifest as a Manifest object
906
+
907
+ Raises:
908
+ RuntimeError: If platform/arch combination is not found
909
+ """
910
+ logger.info(f"Fetching IWYU platform manifest for {platform}/{arch}")
911
+ root_manifest = fetch_iwyu_root_manifest()
912
+
913
+ # Find the platform in the manifest
914
+ for plat_entry in root_manifest.platforms:
915
+ if plat_entry.platform == platform:
916
+ # Find the architecture
917
+ for arch_entry in plat_entry.architectures:
918
+ if arch_entry.arch == arch:
919
+ manifest_path = arch_entry.manifest_path
920
+ logger.info(f"Found IWYU manifest path: {manifest_path}")
921
+ url = f"{IWYU_MANIFEST_BASE_URL}/{manifest_path}"
922
+ data = _fetch_json_raw(url)
923
+ manifest = _parse_manifest(data)
924
+ logger.info(f"IWYU platform manifest loaded successfully for {platform}/{arch}")
925
+ return manifest
926
+
927
+ logger.error(f"IWYU platform {platform}/{arch} not found in manifest")
928
+ raise RuntimeError(f"IWYU platform {platform}/{arch} not found in manifest")
929
+
930
+
931
+ def get_iwyu_install_dir(platform: str, arch: str) -> Path:
932
+ """
933
+ Get the installation directory for IWYU.
934
+
935
+ Args:
936
+ platform: Platform name (e.g., "win", "linux", "darwin")
937
+ arch: Architecture name (e.g., "x86_64", "arm64")
938
+
939
+ Returns:
940
+ Path to the IWYU installation directory
941
+ """
942
+ toolchain_dir = get_home_toolchain_dir()
943
+ install_dir = toolchain_dir / "iwyu" / platform / arch
944
+ return install_dir
945
+
946
+
947
+ def get_iwyu_lock_path(platform: str, arch: str) -> Path:
948
+ """
949
+ Get the lock file path for IWYU installation.
950
+
951
+ Args:
952
+ platform: Platform name (e.g., "win", "linux", "darwin")
953
+ arch: Architecture name (e.g., "x86_64", "arm64")
954
+
955
+ Returns:
956
+ Path to the lock file
957
+ """
958
+ toolchain_dir = get_home_toolchain_dir()
959
+ toolchain_dir.mkdir(parents=True, exist_ok=True)
960
+ lock_path = toolchain_dir / f"iwyu-{platform}-{arch}.lock"
961
+ return lock_path
962
+
963
+
964
+ def is_iwyu_installed(platform: str, arch: str) -> bool:
965
+ """
966
+ Check if IWYU is already installed.
967
+
968
+ Args:
969
+ platform: Platform name (e.g., "win", "linux", "darwin")
970
+ arch: Architecture name (e.g., "x86_64", "arm64")
971
+
972
+ Returns:
973
+ True if installed, False otherwise
974
+ """
975
+ install_dir = get_iwyu_install_dir(platform, arch)
976
+ done_file = install_dir / "done.txt"
977
+ return done_file.exists()
978
+
979
+
980
+ def download_and_install_iwyu(platform: str, arch: str) -> None:
981
+ """
982
+ Download and install IWYU for the given platform/arch.
983
+
984
+ Args:
985
+ platform: Platform name (e.g., "win", "linux", "darwin")
986
+ arch: Architecture name (e.g., "x86_64", "arm64")
987
+ """
988
+ logger.info(f"Downloading and installing IWYU for {platform}/{arch}")
989
+
990
+ # Fetch the manifest to get download URL and checksum
991
+ manifest = fetch_iwyu_platform_manifest(platform, arch)
992
+ version_info = manifest.versions[manifest.latest]
993
+
994
+ logger.info(f"IWYU version: {manifest.latest}")
995
+ logger.info(f"Download URL: {version_info.href}")
996
+
997
+ # Create temporary download directory
998
+ install_dir = get_iwyu_install_dir(platform, arch)
999
+ logger.info(f"Installation directory: {install_dir}")
1000
+
1001
+ # Remove old installation if exists
1002
+ if install_dir.exists():
1003
+ logger.info("Removing old IWYU installation")
1004
+ _robust_rmtree(install_dir)
1005
+
1006
+ # Create temp directory for download
1007
+ with tempfile.TemporaryDirectory() as temp_dir:
1008
+ temp_path = Path(temp_dir)
1009
+ archive_file = temp_path / "iwyu.tar.zst"
1010
+
1011
+ # Download the archive
1012
+ download_file(version_info.href, archive_file, version_info.sha256)
1013
+
1014
+ # Extract to installation directory
1015
+ logger.info("Extracting IWYU archive")
1016
+ extract_tarball(archive_file, install_dir)
1017
+
1018
+ # Fix permissions on Unix systems
1019
+ if os.name != "nt":
1020
+ logger.info("Setting executable permissions on IWYU binaries")
1021
+ fix_file_permissions(install_dir)
1022
+
1023
+ # Mark installation as complete
1024
+ # Ensure install_dir exists before writing done.txt
1025
+ install_dir.mkdir(parents=True, exist_ok=True)
1026
+ done_file = install_dir / "done.txt"
1027
+ with open(done_file, "w") as f:
1028
+ f.write(f"IWYU {manifest.latest} installed successfully\n")
1029
+
1030
+ logger.info(f"IWYU installation complete for {platform}/{arch}")
1031
+
1032
+
1033
+ def ensure_iwyu(platform: str, arch: str) -> None:
1034
+ """
1035
+ Ensure IWYU is installed for the given platform/arch.
1036
+
1037
+ This function uses file locking to prevent concurrent downloads.
1038
+ If IWYU is not installed, it will be downloaded and installed.
1039
+
1040
+ Args:
1041
+ platform: Platform name (e.g., "win", "linux", "darwin")
1042
+ arch: Architecture name (e.g., "x86_64", "arm64")
1043
+ """
1044
+ logger.info(f"Ensuring IWYU is installed for {platform}/{arch}")
1045
+
1046
+ # Quick check without lock - if already installed, return immediately
1047
+ if is_iwyu_installed(platform, arch):
1048
+ logger.info(f"IWYU already installed for {platform}/{arch}")
1049
+ return
1050
+
1051
+ # Need to download - acquire lock
1052
+ logger.info(f"IWYU not installed, acquiring lock for {platform}/{arch}")
1053
+ lock_path = get_iwyu_lock_path(platform, arch)
1054
+ logger.debug(f"Lock path: {lock_path}")
1055
+ lock = fasteners.InterProcessLock(str(lock_path))
1056
+
1057
+ logger.info("Waiting to acquire IWYU installation lock...")
1058
+ with lock:
1059
+ logger.info("Lock acquired")
1060
+
1061
+ # Check again inside lock in case another process just finished installing
1062
+ if is_iwyu_installed(platform, arch):
1063
+ logger.info("Another process installed IWYU while we waited")
1064
+ return
1065
+
1066
+ # Download and install
1067
+ logger.info("Starting IWYU download and installation")
1068
+ download_and_install_iwyu(platform, arch)
1069
+ logger.info(f"IWYU installation complete for {platform}/{arch}")
1070
+
1071
+
1072
+ # ============================================================================
1073
+ # MinGW Sysroot Support (Windows GNU ABI)
1074
+ # ============================================================================
1075
+
1076
+
1077
+ def fetch_mingw_root_manifest() -> RootManifest:
1078
+ """
1079
+ Fetch the MinGW sysroot root manifest file.
1080
+
1081
+ Returns:
1082
+ Root manifest as a RootManifest object
1083
+ """
1084
+ logger.info("Fetching MinGW sysroot root manifest")
1085
+ url = f"{MINGW_MANIFEST_BASE_URL}/manifest.json"
1086
+ data = _fetch_json_raw(url)
1087
+ manifest = _parse_root_manifest(data)
1088
+ logger.info(f"MinGW sysroot root manifest loaded with {len(manifest.platforms)} platforms")
1089
+ return manifest
1090
+
1091
+
1092
+ def fetch_mingw_platform_manifest(platform: str, arch: str) -> Manifest:
1093
+ """
1094
+ Fetch the MinGW sysroot platform-specific manifest file.
1095
+
1096
+ Args:
1097
+ platform: Platform name (e.g., "win")
1098
+ arch: Architecture name (e.g., "x86_64", "arm64")
1099
+
1100
+ Returns:
1101
+ Platform manifest as a Manifest object
1102
+
1103
+ Raises:
1104
+ RuntimeError: If platform/arch combination is not found
1105
+ """
1106
+ logger.info(f"Fetching MinGW sysroot platform manifest for {platform}/{arch}")
1107
+ root_manifest = fetch_mingw_root_manifest()
1108
+
1109
+ # Find the platform in the manifest
1110
+ for plat_entry in root_manifest.platforms:
1111
+ if plat_entry.platform == platform:
1112
+ # Find the architecture
1113
+ for arch_entry in plat_entry.architectures:
1114
+ if arch_entry.arch == arch:
1115
+ manifest_path = arch_entry.manifest_path
1116
+ logger.info(f"Found MinGW sysroot manifest path: {manifest_path}")
1117
+ url = f"{MINGW_MANIFEST_BASE_URL}/{manifest_path}"
1118
+ data = _fetch_json_raw(url)
1119
+ manifest = _parse_manifest(data)
1120
+ logger.info(f"MinGW sysroot platform manifest loaded successfully for {platform}/{arch}")
1121
+ return manifest
1122
+
1123
+ logger.error(f"MinGW sysroot platform {platform}/{arch} not found in manifest")
1124
+ raise RuntimeError(f"MinGW sysroot platform {platform}/{arch} not found in manifest")
1125
+
1126
+
1127
+ def get_mingw_install_dir(platform: str, arch: str) -> Path:
1128
+ """
1129
+ Get the installation directory for MinGW sysroot.
1130
+
1131
+ Args:
1132
+ platform: Platform name (e.g., "win")
1133
+ arch: Architecture name (e.g., "x86_64", "arm64")
1134
+
1135
+ Returns:
1136
+ Path to the MinGW sysroot installation directory
1137
+ """
1138
+ toolchain_dir = get_home_toolchain_dir()
1139
+ install_dir = toolchain_dir / "mingw" / platform / arch
1140
+ return install_dir
1141
+
1142
+
1143
+ def get_mingw_lock_path(platform: str, arch: str) -> Path:
1144
+ """
1145
+ Get the lock file path for MinGW sysroot installation.
1146
+
1147
+ Args:
1148
+ platform: Platform name (e.g., "win")
1149
+ arch: Architecture name (e.g., "x86_64", "arm64")
1150
+
1151
+ Returns:
1152
+ Path to the lock file
1153
+ """
1154
+ toolchain_dir = get_home_toolchain_dir()
1155
+ toolchain_dir.mkdir(parents=True, exist_ok=True)
1156
+ lock_path = toolchain_dir / f"mingw-{platform}-{arch}.lock"
1157
+ return lock_path
1158
+
1159
+
1160
+ def is_mingw_installed(platform: str, arch: str) -> bool:
1161
+ """
1162
+ Check if MinGW sysroot is already installed.
1163
+
1164
+ Args:
1165
+ platform: Platform name (e.g., "win")
1166
+ arch: Architecture name (e.g., "x86_64", "arm64")
1167
+
1168
+ Returns:
1169
+ True if installed, False otherwise
1170
+ """
1171
+ install_dir = get_mingw_install_dir(platform, arch)
1172
+ done_file = install_dir / "done.txt"
1173
+ return done_file.exists()
1174
+
1175
+
1176
+ # Alias for consistency with test naming
1177
+ is_mingw_sysroot_installed = is_mingw_installed
1178
+
1179
+
1180
+ def download_and_install_mingw(platform: str, arch: str) -> None:
1181
+ """
1182
+ Download and install MinGW sysroot for the given platform/arch.
1183
+
1184
+ Args:
1185
+ platform: Platform name (e.g., "win")
1186
+ arch: Architecture name (e.g., "x86_64", "arm64")
1187
+ """
1188
+ logger.info(f"Downloading and installing MinGW sysroot for {platform}/{arch}")
1189
+
1190
+ # Fetch the manifest to get download URL and checksum
1191
+ manifest = fetch_mingw_platform_manifest(platform, arch)
1192
+ version_info = manifest.versions[manifest.latest]
1193
+
1194
+ logger.info(f"MinGW sysroot version: {manifest.latest}")
1195
+ logger.info(f"Download URL: {version_info.href}")
1196
+
1197
+ # Create temporary download directory
1198
+ install_dir = get_mingw_install_dir(platform, arch)
1199
+ logger.info(f"Installation directory: {install_dir}")
1200
+
1201
+ # Remove old installation if exists
1202
+ if install_dir.exists():
1203
+ logger.info("Removing old MinGW sysroot installation")
1204
+ _robust_rmtree(install_dir)
1205
+
1206
+ # Create temp directory for download
1207
+ with tempfile.TemporaryDirectory() as temp_dir:
1208
+ temp_path = Path(temp_dir)
1209
+ archive_file = temp_path / "mingw-sysroot.tar.zst"
1210
+
1211
+ # Download the archive
1212
+ download_file(version_info.href, archive_file, version_info.sha256)
1213
+
1214
+ # Extract to installation directory
1215
+ logger.info("Extracting MinGW sysroot archive")
1216
+ extract_tarball(archive_file, install_dir)
1217
+
1218
+ # Fix permissions on Unix systems (not needed for Windows but included for consistency)
1219
+ if os.name != "nt":
1220
+ logger.info("Setting executable permissions on MinGW sysroot")
1221
+ fix_file_permissions(install_dir)
1222
+
1223
+ # Copy clang resource headers (mm_malloc.h, intrinsics, etc.) from clang installation
1224
+ # These are compiler builtin headers needed for compilation
1225
+ logger.info("Copying clang resource headers to MinGW sysroot")
1226
+ try:
1227
+ # Get the clang binary directory
1228
+ from . import wrapper
1229
+
1230
+ clang_bin_dir = wrapper.get_platform_binary_dir()
1231
+ clang_root = clang_bin_dir.parent
1232
+
1233
+ # Find clang resource directory: <clang_root>/lib/clang/<version>/include/
1234
+ clang_lib = clang_root / "lib" / "clang"
1235
+ if clang_lib.exists():
1236
+ # Find first version directory (should only be one)
1237
+ version_dirs = [d for d in clang_lib.iterdir() if d.is_dir()]
1238
+ if version_dirs:
1239
+ clang_version_dir = version_dirs[0]
1240
+ resource_include = clang_version_dir / "include"
1241
+ if resource_include.exists():
1242
+ # Copy to install_dir/lib/clang/<version>/include/
1243
+ dest_resource = install_dir / "lib" / "clang" / clang_version_dir.name / "include"
1244
+ dest_resource.parent.mkdir(parents=True, exist_ok=True)
1245
+
1246
+ # Copy all .h files
1247
+ copied_count = 0
1248
+ for header_file in resource_include.glob("*.h"):
1249
+ dest_file = dest_resource / header_file.name
1250
+ shutil.copy2(header_file, dest_file)
1251
+ copied_count += 1
1252
+
1253
+ logger.info(f"Copied {copied_count} resource headers from clang installation")
1254
+ else:
1255
+ logger.warning(f"Clang resource include directory not found: {resource_include}")
1256
+ else:
1257
+ logger.warning(f"No version directories found in {clang_lib}")
1258
+ else:
1259
+ logger.warning(f"Clang lib directory not found: {clang_lib}")
1260
+ except Exception as e:
1261
+ logger.warning(f"Could not copy clang resource headers: {e}")
1262
+ logger.warning("Compilation may fail for code using Intel intrinsics or SIMD instructions")
1263
+
1264
+ # Mark installation as complete
1265
+ # Ensure install_dir exists before writing done.txt
1266
+ install_dir.mkdir(parents=True, exist_ok=True)
1267
+ done_file = install_dir / "done.txt"
1268
+ with open(done_file, "w") as f:
1269
+ f.write(f"MinGW sysroot {manifest.latest} installed successfully\n")
1270
+
1271
+ logger.info(f"MinGW sysroot installation complete for {platform}/{arch}")
1272
+
1273
+
1274
+ # Alias for consistency with test naming
1275
+ download_and_install_mingw_sysroot = download_and_install_mingw
1276
+
1277
+
1278
+ def ensure_mingw_sysroot_installed(platform: str, arch: str) -> Path:
1279
+ """
1280
+ Ensure MinGW sysroot is installed for Windows GNU ABI support.
1281
+
1282
+ This function uses file locking to prevent concurrent downloads.
1283
+ If the sysroot is not installed, it will be downloaded and installed.
1284
+
1285
+ Args:
1286
+ platform: Platform name ("win")
1287
+ arch: Architecture ("x86_64" or "arm64")
1288
+
1289
+ Returns:
1290
+ Path to the installed MinGW sysroot directory
1291
+
1292
+ Raises:
1293
+ ValueError: If platform is not Windows
1294
+ """
1295
+ if platform != "win":
1296
+ raise ValueError(f"MinGW sysroot only needed on Windows, not {platform}")
1297
+
1298
+ logger.info(f"Ensuring MinGW sysroot is installed for {platform}/{arch}")
1299
+
1300
+ # Quick check without lock - if already installed, return immediately
1301
+ if is_mingw_installed(platform, arch):
1302
+ logger.info(f"MinGW sysroot already installed for {platform}/{arch}")
1303
+ return get_mingw_install_dir(platform, arch)
1304
+
1305
+ # Need to download - acquire lock
1306
+ logger.info(f"MinGW sysroot not installed, acquiring lock for {platform}/{arch}")
1307
+ lock_path = get_mingw_lock_path(platform, arch)
1308
+ logger.debug(f"Lock path: {lock_path}")
1309
+ lock = fasteners.InterProcessLock(str(lock_path))
1310
+
1311
+ logger.info("Waiting to acquire MinGW sysroot installation lock...")
1312
+ with lock:
1313
+ logger.info("Lock acquired")
1314
+
1315
+ # Check again inside lock in case another process just finished installing
1316
+ if is_mingw_installed(platform, arch):
1317
+ logger.info("Another process installed MinGW sysroot while we waited")
1318
+ return get_mingw_install_dir(platform, arch)
1319
+
1320
+ # Download and install
1321
+ logger.info("Starting MinGW sysroot download and installation")
1322
+ download_and_install_mingw(platform, arch)
1323
+ logger.info(f"MinGW sysroot installation complete for {platform}/{arch}")
1324
+
1325
+ return get_mingw_install_dir(platform, arch)