clang-tool-chain 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of clang-tool-chain might be problematic. Click here for more details.
- clang_tool_chain/__init__.py +0 -0
- clang_tool_chain/__version__.py +4 -0
- clang_tool_chain/checksums.py +270 -0
- clang_tool_chain/cli.py +575 -0
- clang_tool_chain/downloader.py +1325 -0
- clang_tool_chain/downloads/README.md +144 -0
- clang_tool_chain/downloads/__init__.py +22 -0
- clang_tool_chain/downloads/__main__.py +11 -0
- clang_tool_chain/downloads/create_hardlink_archive.py +390 -0
- clang_tool_chain/downloads/create_iwyu_archives.py +330 -0
- clang_tool_chain/downloads/deduplicate_binaries.py +217 -0
- clang_tool_chain/downloads/download_binaries.py +463 -0
- clang_tool_chain/downloads/expand_archive.py +260 -0
- clang_tool_chain/downloads/extract_mingw_sysroot.py +349 -0
- clang_tool_chain/downloads/fetch_and_archive.py +1376 -0
- clang_tool_chain/downloads/strip_binaries.py +436 -0
- clang_tool_chain/downloads/test_compression.py +259 -0
- clang_tool_chain/fetch.py +158 -0
- clang_tool_chain/paths.py +93 -0
- clang_tool_chain/sccache_runner.py +160 -0
- clang_tool_chain/wrapper.py +1383 -0
- clang_tool_chain-1.0.2.dist-info/METADATA +1766 -0
- clang_tool_chain-1.0.2.dist-info/RECORD +26 -0
- clang_tool_chain-1.0.2.dist-info/WHEEL +4 -0
- clang_tool_chain-1.0.2.dist-info/entry_points.txt +31 -0
- clang_tool_chain-1.0.2.dist-info/licenses/LICENSE +204 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# Scripts Directory
|
|
2
|
+
|
|
3
|
+
This directory contains utility scripts for downloading and processing LLVM/Clang binaries.
|
|
4
|
+
|
|
5
|
+
## Scripts
|
|
6
|
+
|
|
7
|
+
### download_binaries.py
|
|
8
|
+
|
|
9
|
+
Downloads pre-built LLVM/Clang binaries from official GitHub releases.
|
|
10
|
+
|
|
11
|
+
**Usage:**
|
|
12
|
+
```bash
|
|
13
|
+
# Download binaries for current platform only
|
|
14
|
+
python scripts/download_binaries.py --current-only
|
|
15
|
+
|
|
16
|
+
# Download binaries for all platforms
|
|
17
|
+
python scripts/download_binaries.py
|
|
18
|
+
|
|
19
|
+
# Download specific platform
|
|
20
|
+
python scripts/download_binaries.py --platform linux-x86_64
|
|
21
|
+
|
|
22
|
+
# Specify version and output directory
|
|
23
|
+
python scripts/download_binaries.py --version 21.1.5 --output work
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**Supported Platforms:**
|
|
27
|
+
- `win-x86_64` - Windows 64-bit
|
|
28
|
+
- `linux-x86_64` - Linux x86-64
|
|
29
|
+
- `linux-aarch64` - Linux ARM64
|
|
30
|
+
- `darwin-x86_64` - macOS Intel
|
|
31
|
+
- `darwin-arm64` - macOS Apple Silicon
|
|
32
|
+
|
|
33
|
+
**Output:**
|
|
34
|
+
Downloads are saved to the `work/` directory by default. Each platform is extracted to a separate subdirectory.
|
|
35
|
+
|
|
36
|
+
### strip_binaries.py
|
|
37
|
+
|
|
38
|
+
Optimizes downloaded LLVM distributions by removing unnecessary files and stripping debug symbols.
|
|
39
|
+
|
|
40
|
+
**Usage:**
|
|
41
|
+
```bash
|
|
42
|
+
# Strip binaries for a specific platform
|
|
43
|
+
python scripts/strip_binaries.py \
|
|
44
|
+
work/linux-x86_64-extracted \
|
|
45
|
+
downloads-bins/assets/clang/linux/x86_64 \
|
|
46
|
+
--platform linux-x86_64
|
|
47
|
+
|
|
48
|
+
# Keep header files (not recommended, increases size)
|
|
49
|
+
python scripts/strip_binaries.py <source> <output> --platform <platform> --keep-headers
|
|
50
|
+
|
|
51
|
+
# Skip binary stripping (debug symbols)
|
|
52
|
+
python scripts/strip_binaries.py <source> <output> --platform <platform> --no-strip
|
|
53
|
+
|
|
54
|
+
# Verbose output
|
|
55
|
+
python scripts/strip_binaries.py <source> <output> --platform <platform> --verbose
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**What it removes:**
|
|
59
|
+
- Documentation (share/doc, share/man)
|
|
60
|
+
- Static libraries (*.a, *.lib)
|
|
61
|
+
- CMake files
|
|
62
|
+
- Python bindings
|
|
63
|
+
- Examples and unnecessary tools
|
|
64
|
+
- Debug symbols from binaries
|
|
65
|
+
|
|
66
|
+
**What it keeps:**
|
|
67
|
+
- Essential binaries (clang, clang++, lld, llvm-ar, etc.)
|
|
68
|
+
- Runtime libraries (lib/clang/*, *.so, *.dll, *.dylib)
|
|
69
|
+
- License files
|
|
70
|
+
|
|
71
|
+
**Expected Size Reduction:**
|
|
72
|
+
- Original: ~3.5 GB per platform
|
|
73
|
+
- After stripping: ~300-400 MB per platform
|
|
74
|
+
- Reduction: ~85-90%
|
|
75
|
+
|
|
76
|
+
## Workflow
|
|
77
|
+
|
|
78
|
+
Complete workflow to prepare binaries for the package:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# 1. Download binaries for current platform
|
|
82
|
+
python scripts/download_binaries.py --current-only
|
|
83
|
+
|
|
84
|
+
# 2. Find the extracted directory (example for Windows)
|
|
85
|
+
# It will be something like: work/win-x86_64-extracted/
|
|
86
|
+
|
|
87
|
+
# 3. Strip the binaries and move to assets
|
|
88
|
+
python scripts/strip_binaries.py \
|
|
89
|
+
work/win-x86_64-extracted \
|
|
90
|
+
downloads-bins/assets/clang/win/x86_64 \
|
|
91
|
+
--platform win-x86_64 \
|
|
92
|
+
--verbose
|
|
93
|
+
|
|
94
|
+
# 4. Verify the output
|
|
95
|
+
ls -lh downloads-bins/assets/clang/win/x86_64/bin/
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Platform-Specific Notes
|
|
99
|
+
|
|
100
|
+
### Windows
|
|
101
|
+
- May require 7-Zip installed for installer extraction
|
|
102
|
+
- Alternative: Use the .tar.xz archive instead of .exe installer
|
|
103
|
+
|
|
104
|
+
### macOS
|
|
105
|
+
- Official binaries may not always be available for all versions
|
|
106
|
+
- May need to use Homebrew or community builds as alternatives
|
|
107
|
+
- Separate binaries needed for Intel (x86_64) and Apple Silicon (arm64)
|
|
108
|
+
|
|
109
|
+
### Linux
|
|
110
|
+
- Most reliable platform for official binaries
|
|
111
|
+
- Minimal dependencies required (glibc, libstdc++)
|
|
112
|
+
- Both x86_64 and aarch64 architectures available
|
|
113
|
+
|
|
114
|
+
## Troubleshooting
|
|
115
|
+
|
|
116
|
+
**Download fails with 404 error:**
|
|
117
|
+
- Check if the specified version exists on GitHub releases
|
|
118
|
+
- Try the alternative URL (script will attempt automatically)
|
|
119
|
+
- Verify version format (e.g., "21.1.5" not "21.1")
|
|
120
|
+
|
|
121
|
+
**Extraction fails on Windows:**
|
|
122
|
+
- Install 7-Zip: https://www.7-zip.org/
|
|
123
|
+
- Add 7z.exe to your PATH
|
|
124
|
+
- Or manually extract the installer and provide extracted path to strip script
|
|
125
|
+
|
|
126
|
+
**Strip script fails:**
|
|
127
|
+
- Verify the source directory contains a valid LLVM installation
|
|
128
|
+
- Check that bin/ directory exists in source
|
|
129
|
+
- Use --verbose flag to see detailed error messages
|
|
130
|
+
|
|
131
|
+
## Requirements
|
|
132
|
+
|
|
133
|
+
- Python 3.10+
|
|
134
|
+
- Internet connection (for download_binaries.py)
|
|
135
|
+
- ~4-5 GB free disk space per platform (temporary)
|
|
136
|
+
- 7-Zip (Windows only, for .exe extraction)
|
|
137
|
+
|
|
138
|
+
## Security Note
|
|
139
|
+
|
|
140
|
+
Always verify downloads are from official LLVM GitHub releases:
|
|
141
|
+
- Primary: https://github.com/llvm/llvm-project/releases
|
|
142
|
+
- Mirror: https://releases.llvm.org/
|
|
143
|
+
|
|
144
|
+
The download script only uses these official sources.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Downloads subpackage for LLVM/Clang toolchain archive management.
|
|
3
|
+
|
|
4
|
+
This subpackage provides tools for:
|
|
5
|
+
- Downloading official LLVM releases
|
|
6
|
+
- Stripping and optimizing binaries
|
|
7
|
+
- Creating optimized archives with deduplication
|
|
8
|
+
- Extracting archives
|
|
9
|
+
- Managing hard-linked binary structures
|
|
10
|
+
- Testing compression methods
|
|
11
|
+
|
|
12
|
+
Main modules:
|
|
13
|
+
- fetch_and_archive: Complete pipeline for downloading and packaging
|
|
14
|
+
- download_binaries: Download pre-built LLVM binaries
|
|
15
|
+
- strip_binaries: Strip and optimize LLVM binaries for minimal size
|
|
16
|
+
- deduplicate_binaries: Deduplicate identical binaries
|
|
17
|
+
- test_compression: Test various compression methods
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from .fetch_and_archive import main as fetch_and_archive_main
|
|
21
|
+
|
|
22
|
+
__all__ = ["fetch_and_archive_main"]
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Create a hardlink-based archive that uses tar's native deduplication.
|
|
4
|
+
|
|
5
|
+
This script:
|
|
6
|
+
1. Reads the deduplication manifest
|
|
7
|
+
2. Creates a directory structure with hard links (not copies!)
|
|
8
|
+
3. Creates a tar archive (tar automatically detects and stores hard links efficiently)
|
|
9
|
+
4. Compresses with zstd level 22
|
|
10
|
+
|
|
11
|
+
The tar format natively supports hard links - when multiple files have the
|
|
12
|
+
same inode, tar stores the data once and creates link entries for duplicates.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import shutil
|
|
18
|
+
import sys
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def create_hardlink_structure(manifest_path: Path | str, canonical_dir: Path | str, output_dir: Path | str) -> Path:
|
|
23
|
+
"""
|
|
24
|
+
Create directory structure with hard links based on manifest.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
manifest_path: Path to dedup_manifest.json
|
|
28
|
+
canonical_dir: Directory containing canonical (unique) binaries
|
|
29
|
+
output_dir: Output directory for hardlinked structure
|
|
30
|
+
"""
|
|
31
|
+
manifest_path = Path(manifest_path)
|
|
32
|
+
canonical_dir = Path(canonical_dir)
|
|
33
|
+
output_dir = Path(output_dir)
|
|
34
|
+
|
|
35
|
+
# Load manifest
|
|
36
|
+
with open(manifest_path) as f:
|
|
37
|
+
manifest_data = json.load(f)
|
|
38
|
+
|
|
39
|
+
manifest = manifest_data["manifest"]
|
|
40
|
+
|
|
41
|
+
# Create output bin directory
|
|
42
|
+
bin_dir = output_dir / "bin"
|
|
43
|
+
bin_dir.mkdir(parents=True, exist_ok=True)
|
|
44
|
+
|
|
45
|
+
# Track which canonical files we've copied
|
|
46
|
+
canonical_copied = {}
|
|
47
|
+
|
|
48
|
+
# Process each file in manifest
|
|
49
|
+
for filename, canonical_name in sorted(manifest.items()):
|
|
50
|
+
src = canonical_dir / canonical_name
|
|
51
|
+
dst = bin_dir / filename
|
|
52
|
+
|
|
53
|
+
if not src.exists():
|
|
54
|
+
print(f"Warning: Canonical file not found: {src}")
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
# If this is the first time we're seeing this canonical file,
|
|
58
|
+
# copy it to the first destination
|
|
59
|
+
if canonical_name not in canonical_copied:
|
|
60
|
+
print(f"Copy: {filename} <- {canonical_name}")
|
|
61
|
+
shutil.copy2(src, dst)
|
|
62
|
+
canonical_copied[canonical_name] = dst
|
|
63
|
+
else:
|
|
64
|
+
# Create hard link to the first copy
|
|
65
|
+
first_copy = canonical_copied[canonical_name]
|
|
66
|
+
print(f"Hardlink: {filename} -> {first_copy.name}")
|
|
67
|
+
|
|
68
|
+
# On Windows, we need to use os.link
|
|
69
|
+
# Remove dst if it exists
|
|
70
|
+
if dst.exists():
|
|
71
|
+
dst.unlink()
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
os.link(first_copy, dst)
|
|
75
|
+
except OSError as e:
|
|
76
|
+
print(f" Warning: Hard link failed ({e}), using copy instead")
|
|
77
|
+
shutil.copy2(src, dst)
|
|
78
|
+
|
|
79
|
+
# Copy lib directory if it exists
|
|
80
|
+
lib_src = canonical_dir.parent / "lib"
|
|
81
|
+
if lib_src.exists():
|
|
82
|
+
lib_dst = output_dir / "lib"
|
|
83
|
+
if lib_dst.exists():
|
|
84
|
+
shutil.rmtree(lib_dst)
|
|
85
|
+
print("\nCopying lib directory...")
|
|
86
|
+
shutil.copytree(lib_src, lib_dst)
|
|
87
|
+
|
|
88
|
+
return bin_dir
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def verify_hardlinks(bin_dir: Path | str) -> tuple[int, int]:
|
|
92
|
+
"""Verify that hard links were created successfully."""
|
|
93
|
+
bin_dir = Path(bin_dir)
|
|
94
|
+
|
|
95
|
+
print("\n" + "=" * 70)
|
|
96
|
+
print("VERIFYING HARD LINKS")
|
|
97
|
+
print("=" * 70)
|
|
98
|
+
|
|
99
|
+
# Group files by inode
|
|
100
|
+
inode_to_files = {}
|
|
101
|
+
|
|
102
|
+
for exe_file in bin_dir.glob("*.exe"):
|
|
103
|
+
stat = exe_file.stat()
|
|
104
|
+
inode = stat.st_ino
|
|
105
|
+
|
|
106
|
+
if inode not in inode_to_files:
|
|
107
|
+
inode_to_files[inode] = []
|
|
108
|
+
|
|
109
|
+
inode_to_files[inode].append({"name": exe_file.name, "size": stat.st_size, "nlink": stat.st_nlink})
|
|
110
|
+
|
|
111
|
+
total_files = 0
|
|
112
|
+
unique_inodes = 0
|
|
113
|
+
hardlinked_groups = 0
|
|
114
|
+
|
|
115
|
+
for _inode, files in sorted(inode_to_files.items()):
|
|
116
|
+
total_files += len(files)
|
|
117
|
+
unique_inodes += 1
|
|
118
|
+
|
|
119
|
+
if len(files) > 1:
|
|
120
|
+
hardlinked_groups += 1
|
|
121
|
+
size_mb = files[0]["size"] / (1024 * 1024)
|
|
122
|
+
print(f"\nHard link group {hardlinked_groups} ({len(files)} files, {size_mb:.1f} MB each):")
|
|
123
|
+
for f in sorted(files, key=lambda x: x["name"]): # type: ignore[arg-type]
|
|
124
|
+
print(f" - {f['name']} (nlink={f['nlink']})")
|
|
125
|
+
|
|
126
|
+
print()
|
|
127
|
+
print(f"Total files: {total_files}")
|
|
128
|
+
print(f"Unique inodes: {unique_inodes}")
|
|
129
|
+
print(f"Hard link groups: {hardlinked_groups}")
|
|
130
|
+
print(f"Duplicate files: {total_files - unique_inodes}")
|
|
131
|
+
|
|
132
|
+
return total_files, unique_inodes
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def create_tar_archive(source_dir: Path | str, output_tar: Path | str, compression: str = "none") -> Path:
|
|
136
|
+
"""Create tar archive (tar auto-detects hard links)."""
|
|
137
|
+
import tarfile
|
|
138
|
+
|
|
139
|
+
source_dir = Path(source_dir)
|
|
140
|
+
output_tar = Path(output_tar)
|
|
141
|
+
|
|
142
|
+
print("\n" + "=" * 70)
|
|
143
|
+
print("CREATING TAR ARCHIVE")
|
|
144
|
+
print("=" * 70)
|
|
145
|
+
print(f"Source: {source_dir}")
|
|
146
|
+
print(f"Output: {output_tar}")
|
|
147
|
+
print(f"Compression: {compression}")
|
|
148
|
+
print()
|
|
149
|
+
|
|
150
|
+
def tar_filter(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
|
|
151
|
+
"""Filter to set correct permissions for binaries and shared libraries."""
|
|
152
|
+
if tarinfo.isfile():
|
|
153
|
+
# Set executable permissions for files in main bin/ directory
|
|
154
|
+
if "/bin/" in tarinfo.name and "/lib/" not in tarinfo.name:
|
|
155
|
+
tarinfo.mode = 0o755 # rwxr-xr-x
|
|
156
|
+
print(f" Setting executable: {tarinfo.name}")
|
|
157
|
+
# Set executable permissions for shared libraries and certain executables in lib/
|
|
158
|
+
elif "/lib/" in tarinfo.name:
|
|
159
|
+
# Headers, text files, and static libraries should be readable but not executable (check first)
|
|
160
|
+
if tarinfo.name.endswith((".h", ".inc", ".modulemap", ".tcc", ".txt", ".a", ".syms")):
|
|
161
|
+
tarinfo.mode = 0o644 # rw-r--r--
|
|
162
|
+
# Shared libraries (.so, .dylib) need executable permissions on Unix
|
|
163
|
+
elif tarinfo.name.endswith((".so", ".dylib")) or ".so." in tarinfo.name:
|
|
164
|
+
tarinfo.mode = 0o755 # rwxr-xr-x for shared libraries
|
|
165
|
+
print(f" Setting executable (shared lib): {tarinfo.name}")
|
|
166
|
+
# Executable binaries in lib/clang/*/bin/ directories
|
|
167
|
+
elif "/bin/" in tarinfo.name and not tarinfo.name.endswith(
|
|
168
|
+
(".h", ".inc", ".txt", ".a", ".so", ".dylib")
|
|
169
|
+
):
|
|
170
|
+
tarinfo.mode = 0o755 # rwxr-xr-x
|
|
171
|
+
print(f" Setting executable (lib binary): {tarinfo.name}")
|
|
172
|
+
return tarinfo
|
|
173
|
+
|
|
174
|
+
print("Creating tar archive using Python tarfile module...")
|
|
175
|
+
print("Setting executable permissions for binaries in bin/...")
|
|
176
|
+
|
|
177
|
+
# Map compression type to tarfile mode
|
|
178
|
+
if compression == "none":
|
|
179
|
+
mode = "w"
|
|
180
|
+
elif compression == "gzip":
|
|
181
|
+
mode = "w:gz"
|
|
182
|
+
elif compression == "xz":
|
|
183
|
+
mode = "w:xz"
|
|
184
|
+
else:
|
|
185
|
+
raise ValueError(f"Unknown compression: {compression}")
|
|
186
|
+
|
|
187
|
+
with tarfile.open(output_tar, mode) as tar:
|
|
188
|
+
tar.add(source_dir, arcname=source_dir.name, filter=tar_filter)
|
|
189
|
+
|
|
190
|
+
size = output_tar.stat().st_size
|
|
191
|
+
print(f"Created: {output_tar} ({size / (1024*1024):.2f} MB)")
|
|
192
|
+
|
|
193
|
+
return output_tar
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def verify_tar_permissions(tar_file: Path | str) -> int:
|
|
197
|
+
"""Verify that binaries and shared libraries in the tar archive have correct permissions."""
|
|
198
|
+
import tarfile
|
|
199
|
+
|
|
200
|
+
tar_file = Path(tar_file)
|
|
201
|
+
|
|
202
|
+
print("\n" + "=" * 70)
|
|
203
|
+
print("VERIFYING TAR PERMISSIONS")
|
|
204
|
+
print("=" * 70)
|
|
205
|
+
print(f"Checking permissions in: {tar_file}")
|
|
206
|
+
print()
|
|
207
|
+
|
|
208
|
+
issues_found = []
|
|
209
|
+
binaries_checked = 0
|
|
210
|
+
libs_checked = 0
|
|
211
|
+
headers_checked = 0
|
|
212
|
+
|
|
213
|
+
with tarfile.open(tar_file, "r") as tar:
|
|
214
|
+
for member in tar.getmembers():
|
|
215
|
+
if not member.isfile():
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
# Check files in bin/ directory - should all be executable
|
|
219
|
+
if "/bin/" in member.name:
|
|
220
|
+
binaries_checked += 1
|
|
221
|
+
# Check if executable bit is set (0o100 for user execute)
|
|
222
|
+
if not (member.mode & 0o100):
|
|
223
|
+
issues_found.append((member.name, oct(member.mode), "binary missing executable"))
|
|
224
|
+
print(f" ✗ Missing executable permission: {member.name} (mode: {oct(member.mode)})")
|
|
225
|
+
else:
|
|
226
|
+
# Only print every 10th binary to avoid spam
|
|
227
|
+
if binaries_checked % 10 == 1:
|
|
228
|
+
print(f" ✓ bin: {member.name} (mode: {oct(member.mode)})")
|
|
229
|
+
|
|
230
|
+
# Check files in lib/ directory
|
|
231
|
+
elif "/lib/" in member.name:
|
|
232
|
+
# Headers and static libraries should NOT be executable (check this first)
|
|
233
|
+
if member.name.endswith((".h", ".inc", ".modulemap", ".tcc", ".txt", ".a", ".syms")):
|
|
234
|
+
headers_checked += 1
|
|
235
|
+
if member.mode & 0o100:
|
|
236
|
+
issues_found.append((member.name, oct(member.mode), "header/static lib has executable bit"))
|
|
237
|
+
print(
|
|
238
|
+
f" ✗ Header/static lib should not be executable: {member.name} (mode: {oct(member.mode)})"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Shared libraries (.so, .dylib) should be executable
|
|
242
|
+
elif member.name.endswith((".so", ".dylib")) or ".so." in member.name:
|
|
243
|
+
libs_checked += 1
|
|
244
|
+
if not (member.mode & 0o100):
|
|
245
|
+
issues_found.append((member.name, oct(member.mode), "shared lib missing executable"))
|
|
246
|
+
print(f" ✗ Shared lib missing executable: {member.name} (mode: {oct(member.mode)})")
|
|
247
|
+
elif libs_checked % 10 == 1:
|
|
248
|
+
print(f" ✓ lib: {member.name} (mode: {oct(member.mode)})")
|
|
249
|
+
|
|
250
|
+
# Executable binaries in lib/ (like *symbolize) - must be files without common extensions
|
|
251
|
+
# These are typically in lib/clang/*/bin/ directories
|
|
252
|
+
elif "/bin/" in member.name and not member.name.endswith((".h", ".inc", ".txt", ".a", ".so", ".dylib")):
|
|
253
|
+
binaries_checked += 1
|
|
254
|
+
if not (member.mode & 0o100):
|
|
255
|
+
issues_found.append((member.name, oct(member.mode), "lib binary missing executable"))
|
|
256
|
+
print(f" ✗ Lib binary missing executable: {member.name} (mode: {oct(member.mode)})")
|
|
257
|
+
|
|
258
|
+
print()
|
|
259
|
+
print(f"Total binaries checked: {binaries_checked}")
|
|
260
|
+
print(f"Total shared libraries checked: {libs_checked}")
|
|
261
|
+
print(f"Total headers/text files checked: {headers_checked}")
|
|
262
|
+
|
|
263
|
+
if issues_found:
|
|
264
|
+
print(f"\n⚠️ WARNING: Found {len(issues_found)} files with incorrect permissions!")
|
|
265
|
+
print("\nFiles with issues:")
|
|
266
|
+
for name, mode, issue in issues_found:
|
|
267
|
+
print(f" - {name} (mode: {mode}) - {issue}")
|
|
268
|
+
print("\nThese files may not work correctly when extracted on Unix systems.")
|
|
269
|
+
raise RuntimeError(f"Tar archive has {len(issues_found)} files with incorrect permissions")
|
|
270
|
+
else:
|
|
271
|
+
print("✅ All files have correct permissions")
|
|
272
|
+
|
|
273
|
+
return binaries_checked + libs_checked
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def compress_with_zstd(tar_file: Path | str, output_zst: Path | str, level: int = 22) -> Path:
|
|
277
|
+
"""Compress tar with zstd."""
|
|
278
|
+
import zstandard as zstd
|
|
279
|
+
|
|
280
|
+
tar_file = Path(tar_file)
|
|
281
|
+
output_zst = Path(output_zst)
|
|
282
|
+
|
|
283
|
+
print("\n" + "=" * 70)
|
|
284
|
+
print(f"COMPRESSING WITH ZSTD LEVEL {level}")
|
|
285
|
+
print("=" * 70)
|
|
286
|
+
print(f"Input: {tar_file} ({tar_file.stat().st_size / (1024*1024):.2f} MB)")
|
|
287
|
+
print(f"Output: {output_zst}")
|
|
288
|
+
print()
|
|
289
|
+
|
|
290
|
+
# Read tar file
|
|
291
|
+
with open(tar_file, "rb") as f:
|
|
292
|
+
tar_data = f.read()
|
|
293
|
+
|
|
294
|
+
print(f"Compressing {len(tar_data) / (1024*1024):.1f} MB...")
|
|
295
|
+
|
|
296
|
+
# Compress with zstd
|
|
297
|
+
import time
|
|
298
|
+
|
|
299
|
+
start = time.time()
|
|
300
|
+
cctx = zstd.ZstdCompressor(level=level, threads=-1)
|
|
301
|
+
compressed = cctx.compress(tar_data)
|
|
302
|
+
elapsed = time.time() - start
|
|
303
|
+
|
|
304
|
+
# Write compressed file
|
|
305
|
+
with open(output_zst, "wb") as f:
|
|
306
|
+
f.write(compressed)
|
|
307
|
+
|
|
308
|
+
original_size = len(tar_data)
|
|
309
|
+
compressed_size = len(compressed)
|
|
310
|
+
ratio = original_size / compressed_size
|
|
311
|
+
|
|
312
|
+
print(f"Compressed in {elapsed:.1f}s")
|
|
313
|
+
print(f"Original: {original_size / (1024*1024):.2f} MB")
|
|
314
|
+
print(f"Compressed: {compressed_size / (1024*1024):.2f} MB")
|
|
315
|
+
print(f"Ratio: {ratio:.2f}:1")
|
|
316
|
+
print(f"Reduction: {(1 - compressed_size/original_size) * 100:.1f}%")
|
|
317
|
+
|
|
318
|
+
return output_zst
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def main() -> None:
|
|
322
|
+
import argparse
|
|
323
|
+
|
|
324
|
+
parser = argparse.ArgumentParser(description="Create hardlink-based tar.zst archive")
|
|
325
|
+
parser.add_argument("deduped_dir", help="Directory containing deduplicated structure")
|
|
326
|
+
parser.add_argument("output_dir", help="Output directory for archive")
|
|
327
|
+
parser.add_argument("--name", default="win_binaries", help="Archive base name")
|
|
328
|
+
parser.add_argument("--zstd-level", type=int, default=22, help="Zstd compression level (default: 22)")
|
|
329
|
+
|
|
330
|
+
args = parser.parse_args()
|
|
331
|
+
|
|
332
|
+
deduped_dir = Path(args.deduped_dir)
|
|
333
|
+
output_dir = Path(args.output_dir)
|
|
334
|
+
|
|
335
|
+
# Paths
|
|
336
|
+
manifest_path = deduped_dir / "dedup_manifest.json"
|
|
337
|
+
canonical_dir = deduped_dir / "canonical"
|
|
338
|
+
|
|
339
|
+
if not manifest_path.exists():
|
|
340
|
+
print(f"Error: Manifest not found: {manifest_path}")
|
|
341
|
+
sys.exit(1)
|
|
342
|
+
|
|
343
|
+
if not canonical_dir.exists():
|
|
344
|
+
print(f"Error: Canonical directory not found: {canonical_dir}")
|
|
345
|
+
sys.exit(1)
|
|
346
|
+
|
|
347
|
+
# Create output directory
|
|
348
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
349
|
+
|
|
350
|
+
# Step 1: Create hardlink structure
|
|
351
|
+
print("=" * 70)
|
|
352
|
+
print("STEP 1: CREATE HARDLINK STRUCTURE")
|
|
353
|
+
print("=" * 70)
|
|
354
|
+
print()
|
|
355
|
+
|
|
356
|
+
hardlink_dir = output_dir / "win_hardlinked"
|
|
357
|
+
bin_dir = create_hardlink_structure(manifest_path, canonical_dir, hardlink_dir)
|
|
358
|
+
|
|
359
|
+
# Step 2: Verify hardlinks
|
|
360
|
+
_ = verify_hardlinks(bin_dir) # Returns tuple but we don't need the values
|
|
361
|
+
|
|
362
|
+
# Step 3: Create tar archive
|
|
363
|
+
tar_file = output_dir / f"{args.name}.tar"
|
|
364
|
+
create_tar_archive(hardlink_dir, tar_file)
|
|
365
|
+
|
|
366
|
+
# Step 3.5: Verify tar permissions
|
|
367
|
+
verify_tar_permissions(tar_file)
|
|
368
|
+
|
|
369
|
+
# Step 4: Compress with zstd
|
|
370
|
+
try:
|
|
371
|
+
zst_file = output_dir / f"{args.name}.tar.zst"
|
|
372
|
+
compress_with_zstd(tar_file, zst_file, level=args.zstd_level)
|
|
373
|
+
|
|
374
|
+
# Clean up uncompressed tar
|
|
375
|
+
print(f"\nRemoving uncompressed tar: {tar_file}")
|
|
376
|
+
tar_file.unlink()
|
|
377
|
+
|
|
378
|
+
print("\n" + "=" * 70)
|
|
379
|
+
print("SUCCESS!")
|
|
380
|
+
print("=" * 70)
|
|
381
|
+
print(f"Final archive: {zst_file}")
|
|
382
|
+
print(f"Size: {zst_file.stat().st_size / (1024*1024):.2f} MB")
|
|
383
|
+
|
|
384
|
+
except ImportError:
|
|
385
|
+
print("\nWarning: zstandard module not available")
|
|
386
|
+
print(f"Tar archive created: {tar_file}")
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
if __name__ == "__main__":
|
|
390
|
+
main()
|