clang-tool-chain 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of clang-tool-chain might be problematic. Click here for more details.
- clang_tool_chain/__init__.py +0 -0
- clang_tool_chain/__version__.py +4 -0
- clang_tool_chain/checksums.py +270 -0
- clang_tool_chain/cli.py +575 -0
- clang_tool_chain/downloader.py +1325 -0
- clang_tool_chain/downloads/README.md +144 -0
- clang_tool_chain/downloads/__init__.py +22 -0
- clang_tool_chain/downloads/__main__.py +11 -0
- clang_tool_chain/downloads/create_hardlink_archive.py +390 -0
- clang_tool_chain/downloads/create_iwyu_archives.py +330 -0
- clang_tool_chain/downloads/deduplicate_binaries.py +217 -0
- clang_tool_chain/downloads/download_binaries.py +463 -0
- clang_tool_chain/downloads/expand_archive.py +260 -0
- clang_tool_chain/downloads/extract_mingw_sysroot.py +349 -0
- clang_tool_chain/downloads/fetch_and_archive.py +1376 -0
- clang_tool_chain/downloads/strip_binaries.py +436 -0
- clang_tool_chain/downloads/test_compression.py +259 -0
- clang_tool_chain/fetch.py +158 -0
- clang_tool_chain/paths.py +93 -0
- clang_tool_chain/sccache_runner.py +160 -0
- clang_tool_chain/wrapper.py +1383 -0
- clang_tool_chain-1.0.2.dist-info/METADATA +1766 -0
- clang_tool_chain-1.0.2.dist-info/RECORD +26 -0
- clang_tool_chain-1.0.2.dist-info/WHEEL +4 -0
- clang_tool_chain-1.0.2.dist-info/entry_points.txt +31 -0
- clang_tool_chain-1.0.2.dist-info/licenses/LICENSE +204 -0
|
@@ -0,0 +1,1376 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Fetch and Archive LLVM/Clang Toolchain
|
|
4
|
+
|
|
5
|
+
This script automates the entire process:
|
|
6
|
+
1. Downloads LLVM/Clang binaries for specified platform/architecture
|
|
7
|
+
2. Strips them of unnecessary extras (keeping only essential build tools)
|
|
8
|
+
3. Deduplicates identical binaries
|
|
9
|
+
4. Creates hard-linked structure
|
|
10
|
+
5. Compresses with zstd level 22
|
|
11
|
+
6. Names according to convention: llvm-{version}-{platform}-{arch}.tar.zst
|
|
12
|
+
7. Generates checksums
|
|
13
|
+
8. Places final archive in downloads-bins/assets/clang/{platform}/{arch}/
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
python -m clang_tool_chain.downloads.fetch_and_archive --platform win --arch x86_64
|
|
17
|
+
python -m clang_tool_chain.downloads.fetch_and_archive --platform linux --arch x86_64
|
|
18
|
+
python -m clang_tool_chain.downloads.fetch_and_archive --platform darwin --arch arm64
|
|
19
|
+
|
|
20
|
+
Requirements:
|
|
21
|
+
- Python 3.7+
|
|
22
|
+
- zstandard module: pip install zstandard
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import argparse
|
|
26
|
+
import hashlib
|
|
27
|
+
import json
|
|
28
|
+
import os
|
|
29
|
+
import shutil
|
|
30
|
+
import subprocess
|
|
31
|
+
import sys
|
|
32
|
+
import tarfile
|
|
33
|
+
import urllib.request
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any
|
|
36
|
+
|
|
37
|
+
# ============================================================================
|
|
38
|
+
# Configuration
|
|
39
|
+
# ============================================================================
|
|
40
|
+
|
|
41
|
+
LLVM_VERSION = "19.1.7"
|
|
42
|
+
|
|
43
|
+
# Official LLVM download URLs
|
|
44
|
+
LLVM_DOWNLOAD_URLS = {
|
|
45
|
+
(
|
|
46
|
+
"win",
|
|
47
|
+
"x86_64",
|
|
48
|
+
): f"https://github.com/llvm/llvm-project/releases/download/llvmorg-{LLVM_VERSION}/LLVM-{LLVM_VERSION}-win64.exe",
|
|
49
|
+
(
|
|
50
|
+
"win",
|
|
51
|
+
"arm64",
|
|
52
|
+
): f"https://github.com/llvm/llvm-project/releases/download/llvmorg-{LLVM_VERSION}/LLVM-{LLVM_VERSION}-woa64.exe",
|
|
53
|
+
(
|
|
54
|
+
"linux",
|
|
55
|
+
"x86_64",
|
|
56
|
+
): f"https://github.com/llvm/llvm-project/releases/download/llvmorg-{LLVM_VERSION}/LLVM-{LLVM_VERSION}-Linux-X64.tar.xz",
|
|
57
|
+
(
|
|
58
|
+
"linux",
|
|
59
|
+
"arm64",
|
|
60
|
+
): f"https://github.com/llvm/llvm-project/releases/download/llvmorg-{LLVM_VERSION}/clang+llvm-{LLVM_VERSION}-aarch64-linux-gnu.tar.xz",
|
|
61
|
+
(
|
|
62
|
+
"darwin",
|
|
63
|
+
"x86_64",
|
|
64
|
+
): f"https://github.com/llvm/llvm-project/releases/download/llvmorg-{LLVM_VERSION}/LLVM-{LLVM_VERSION}-macOS-X64.tar.xz",
|
|
65
|
+
(
|
|
66
|
+
"darwin",
|
|
67
|
+
"arm64",
|
|
68
|
+
): f"https://github.com/llvm/llvm-project/releases/download/llvmorg-{LLVM_VERSION}/LLVM-{LLVM_VERSION}-macOS-ARM64.tar.xz",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Essential binaries to keep (for C/C++ compilation)
|
|
72
|
+
ESSENTIAL_BINARIES = {
|
|
73
|
+
# Compilers
|
|
74
|
+
"clang",
|
|
75
|
+
"clang++",
|
|
76
|
+
"clang-cl",
|
|
77
|
+
"clang-cpp",
|
|
78
|
+
# Linkers
|
|
79
|
+
"lld",
|
|
80
|
+
"lld-link",
|
|
81
|
+
"ld.lld",
|
|
82
|
+
"ld64.lld",
|
|
83
|
+
"wasm-ld",
|
|
84
|
+
# Archive tools
|
|
85
|
+
"llvm-ar",
|
|
86
|
+
"llvm-ranlib",
|
|
87
|
+
# Binary utilities
|
|
88
|
+
"llvm-nm",
|
|
89
|
+
"llvm-objdump",
|
|
90
|
+
"llvm-objcopy",
|
|
91
|
+
"llvm-strip",
|
|
92
|
+
"llvm-readobj",
|
|
93
|
+
"llvm-readelf",
|
|
94
|
+
"llvm-symbolizer",
|
|
95
|
+
# NOTE: Removed clang-format and clang-tidy to reduce archive size
|
|
96
|
+
# These are code quality tools, not needed for compilation
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ============================================================================
|
|
101
|
+
# Utility Functions
|
|
102
|
+
# ============================================================================
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def print_section(title: str) -> None:
|
|
106
|
+
"""Print a formatted section header."""
|
|
107
|
+
print("\n" + "=" * 70)
|
|
108
|
+
print(title)
|
|
109
|
+
print("=" * 70)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def download_file(url: str, output_path: Path | str, show_progress: bool = True) -> None:
|
|
113
|
+
"""Download a file with progress indication."""
|
|
114
|
+
print(f"Downloading from: {url}")
|
|
115
|
+
print(f"Saving to: {output_path}")
|
|
116
|
+
|
|
117
|
+
output_path = Path(output_path)
|
|
118
|
+
breadcrumb_path = Path(str(output_path) + ".downloading")
|
|
119
|
+
|
|
120
|
+
# Create breadcrumb file to mark download in progress
|
|
121
|
+
breadcrumb_path.touch()
|
|
122
|
+
|
|
123
|
+
def report_progress(block_num: int, block_size: int, total_size: int) -> None:
|
|
124
|
+
if show_progress and total_size > 0:
|
|
125
|
+
downloaded = block_num * block_size
|
|
126
|
+
percent = min(100, (downloaded / total_size) * 100)
|
|
127
|
+
mb_downloaded = downloaded / (1024 * 1024)
|
|
128
|
+
mb_total = total_size / (1024 * 1024)
|
|
129
|
+
print(f"\rProgress: {percent:5.1f}% ({mb_downloaded:6.1f} MB / {mb_total:6.1f} MB)", end="", flush=True)
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
urllib.request.urlretrieve(url, output_path, reporthook=report_progress)
|
|
133
|
+
if show_progress:
|
|
134
|
+
print() # New line after progress
|
|
135
|
+
# Download completed successfully, remove breadcrumb
|
|
136
|
+
breadcrumb_path.unlink(missing_ok=True)
|
|
137
|
+
except (KeyboardInterrupt, Exception):
|
|
138
|
+
# Download interrupted or failed, clean up partial file and breadcrumb
|
|
139
|
+
if output_path.exists():
|
|
140
|
+
output_path.unlink()
|
|
141
|
+
breadcrumb_path.unlink(missing_ok=True)
|
|
142
|
+
raise
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def get_file_hash(filepath: Path | str, algorithm: str = "md5") -> str:
|
|
146
|
+
"""Calculate hash of a file."""
|
|
147
|
+
h = hashlib.new(algorithm)
|
|
148
|
+
with open(filepath, "rb") as f:
|
|
149
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
150
|
+
h.update(chunk)
|
|
151
|
+
return h.hexdigest()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def find_binaries(directory: Path | str, extensions: list[str] | None = None) -> list[Path]:
|
|
155
|
+
"""Find all binary files in a directory."""
|
|
156
|
+
if extensions is None:
|
|
157
|
+
extensions = [".exe", ""] # Windows executables and Unix executables (no extension)
|
|
158
|
+
|
|
159
|
+
directory = Path(directory)
|
|
160
|
+
binaries = []
|
|
161
|
+
|
|
162
|
+
for ext in extensions:
|
|
163
|
+
if ext:
|
|
164
|
+
binaries.extend(directory.glob(f"**/*{ext}"))
|
|
165
|
+
else:
|
|
166
|
+
# Find files without extension that are executable
|
|
167
|
+
for item in directory.rglob("*"):
|
|
168
|
+
if item.is_file() and os.access(item, os.X_OK) and not item.suffix:
|
|
169
|
+
binaries.append(item)
|
|
170
|
+
|
|
171
|
+
return binaries
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def should_exclude_lib_file(file_path: Path | str) -> bool:
|
|
175
|
+
"""
|
|
176
|
+
Determine if a library file should be excluded to reduce size.
|
|
177
|
+
|
|
178
|
+
Excludes:
|
|
179
|
+
- Fortran runtime libraries (libflang_rt.*) - only needed for Fortran compilation
|
|
180
|
+
- hwasan_symbolize binary - debugging tool, not needed for compilation
|
|
181
|
+
|
|
182
|
+
Keeps:
|
|
183
|
+
- Headers (.h, .inc, .modulemap, .tcc)
|
|
184
|
+
- Runtime libraries (including sanitizers)
|
|
185
|
+
- Builtins
|
|
186
|
+
- Directory structures
|
|
187
|
+
"""
|
|
188
|
+
file_path = Path(file_path)
|
|
189
|
+
name = file_path.name
|
|
190
|
+
|
|
191
|
+
# Always keep directories
|
|
192
|
+
if file_path.is_dir():
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
# Always keep headers and text files
|
|
196
|
+
if file_path.suffix in {".h", ".inc", ".modulemap", ".tcc", ".txt"}:
|
|
197
|
+
return False
|
|
198
|
+
|
|
199
|
+
# Exclude Fortran runtime (27 MB) - not needed for C/C++
|
|
200
|
+
if "libflang_rt" in name:
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
# Exclude hwasan_symbolize binary - debugging tool only
|
|
204
|
+
# Keep everything else (sanitizers, builtins, headers, etc.)
|
|
205
|
+
return "hwasan_symbolize" in name
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# ============================================================================
|
|
209
|
+
# Step 1: Download
|
|
210
|
+
# ============================================================================
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def download_llvm(platform: str, arch: str, work_dir: Path) -> Path:
|
|
214
|
+
"""Download LLVM binaries for the specified platform and architecture."""
|
|
215
|
+
print_section("STEP 1: DOWNLOAD LLVM BINARIES")
|
|
216
|
+
|
|
217
|
+
key = (platform, arch)
|
|
218
|
+
if key not in LLVM_DOWNLOAD_URLS:
|
|
219
|
+
raise ValueError(f"Unsupported platform/arch combination: {platform}/{arch}")
|
|
220
|
+
|
|
221
|
+
url = LLVM_DOWNLOAD_URLS[key]
|
|
222
|
+
filename = Path(url).name
|
|
223
|
+
download_path = work_dir / filename
|
|
224
|
+
breadcrumb_path = Path(str(download_path) + ".downloading")
|
|
225
|
+
|
|
226
|
+
print(f"Platform: {platform}")
|
|
227
|
+
print(f"Architecture: {arch}")
|
|
228
|
+
print(f"LLVM Version: {LLVM_VERSION}")
|
|
229
|
+
print()
|
|
230
|
+
|
|
231
|
+
# Check for incomplete download from previous attempt
|
|
232
|
+
if breadcrumb_path.exists():
|
|
233
|
+
print(f"ā ļø Found incomplete download marker: {breadcrumb_path.name}")
|
|
234
|
+
if download_path.exists():
|
|
235
|
+
print(f"Removing partial download: {download_path}")
|
|
236
|
+
download_path.unlink()
|
|
237
|
+
breadcrumb_path.unlink()
|
|
238
|
+
print()
|
|
239
|
+
|
|
240
|
+
if download_path.exists():
|
|
241
|
+
print(f"File already exists: {download_path}")
|
|
242
|
+
print("Skipping download...")
|
|
243
|
+
else:
|
|
244
|
+
download_file(url, download_path)
|
|
245
|
+
|
|
246
|
+
print(f"\nDownloaded: {download_path}")
|
|
247
|
+
print(f"Size: {download_path.stat().st_size / (1024*1024):.2f} MB")
|
|
248
|
+
|
|
249
|
+
return download_path
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
# ============================================================================
|
|
253
|
+
# Step 2: Extract
|
|
254
|
+
# ============================================================================
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def extract_archive(archive_path: Path, extract_dir: Path) -> Path:
|
|
258
|
+
"""Extract the downloaded archive."""
|
|
259
|
+
print_section("STEP 2: EXTRACT ARCHIVE")
|
|
260
|
+
|
|
261
|
+
archive_path = Path(archive_path)
|
|
262
|
+
extract_dir = Path(extract_dir)
|
|
263
|
+
|
|
264
|
+
print(f"Archive: {archive_path}")
|
|
265
|
+
print(f"Extract to: {extract_dir}")
|
|
266
|
+
print()
|
|
267
|
+
|
|
268
|
+
extract_dir.mkdir(parents=True, exist_ok=True)
|
|
269
|
+
|
|
270
|
+
if archive_path.suffix == ".exe":
|
|
271
|
+
# Windows installer - need 7z or similar
|
|
272
|
+
print("Windows .exe installer detected")
|
|
273
|
+
print("Using 7z to extract...")
|
|
274
|
+
|
|
275
|
+
# Try to use 7z
|
|
276
|
+
try:
|
|
277
|
+
subprocess.run(["7z", "x", str(archive_path), f"-o{extract_dir}", "-y"], check=True)
|
|
278
|
+
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
|
279
|
+
raise RuntimeError(
|
|
280
|
+
"7z is required to extract Windows .exe installer.\n"
|
|
281
|
+
"Install 7z: https://www.7-zip.org/\n"
|
|
282
|
+
"Or provide pre-extracted binaries."
|
|
283
|
+
) from e
|
|
284
|
+
|
|
285
|
+
elif archive_path.suffix == ".xz" or archive_path.name.endswith(".tar.xz"):
|
|
286
|
+
print("Extracting tar.xz archive...")
|
|
287
|
+
print()
|
|
288
|
+
|
|
289
|
+
# Try to use external tar command for better performance (supports multi-threaded decompression)
|
|
290
|
+
# Falls back to Python implementation if tar command not available
|
|
291
|
+
import time
|
|
292
|
+
|
|
293
|
+
start = time.time()
|
|
294
|
+
|
|
295
|
+
# Check if we have tar command available (much faster, can use pixz for parallel decompression)
|
|
296
|
+
tar_available = shutil.which("tar") is not None
|
|
297
|
+
|
|
298
|
+
if tar_available:
|
|
299
|
+
print("Using system tar command for faster extraction...")
|
|
300
|
+
print("NOTE: Progress tracking not available with external tar")
|
|
301
|
+
print(" The process IS working - please wait (typically 30-90 seconds for LLVM)")
|
|
302
|
+
print()
|
|
303
|
+
print("Extracting...")
|
|
304
|
+
sys.stdout.flush()
|
|
305
|
+
try:
|
|
306
|
+
# Use tar command - it's much faster and may use parallel decompression
|
|
307
|
+
subprocess.run(
|
|
308
|
+
["tar", "-xJf", str(archive_path), "-C", str(extract_dir)],
|
|
309
|
+
check=True,
|
|
310
|
+
capture_output=True,
|
|
311
|
+
text=True,
|
|
312
|
+
)
|
|
313
|
+
elapsed = time.time() - start
|
|
314
|
+
print(f"Extraction complete in {elapsed:.1f}s")
|
|
315
|
+
except subprocess.CalledProcessError as e:
|
|
316
|
+
print(f"External tar failed: {e.stderr}")
|
|
317
|
+
print("Falling back to Python extraction...")
|
|
318
|
+
tar_available = False
|
|
319
|
+
|
|
320
|
+
if not tar_available:
|
|
321
|
+
# Fallback to Python's built-in lzma and tarfile modules
|
|
322
|
+
print("Using Python built-in extraction (slower but with progress tracking)...")
|
|
323
|
+
print("Reading archive index (this may take a moment)...")
|
|
324
|
+
import lzma
|
|
325
|
+
|
|
326
|
+
with lzma.open(archive_path) as xz_file, tarfile.open(fileobj=xz_file) as tar:
|
|
327
|
+
# Get list of members for progress tracking
|
|
328
|
+
members = tar.getmembers()
|
|
329
|
+
total_members = len(members)
|
|
330
|
+
total_size = sum(m.size for m in members)
|
|
331
|
+
|
|
332
|
+
print(f"Found {total_members} files/directories to extract ({total_size / (1024*1024):.1f} MB)")
|
|
333
|
+
print()
|
|
334
|
+
|
|
335
|
+
extracted_count = 0
|
|
336
|
+
extracted_size = 0
|
|
337
|
+
last_progress = -1
|
|
338
|
+
last_update_time = start
|
|
339
|
+
progress_counter = 0
|
|
340
|
+
|
|
341
|
+
for member in members:
|
|
342
|
+
tar.extract(member, path=extract_dir)
|
|
343
|
+
extracted_count += 1
|
|
344
|
+
extracted_size += member.size
|
|
345
|
+
|
|
346
|
+
# Show progress every 5% or every 2 seconds
|
|
347
|
+
current_time = time.time()
|
|
348
|
+
# Use data size for progress percentage (more meaningful than file count)
|
|
349
|
+
progress = int((extracted_size / total_size) * 100) if total_size > 0 else 0
|
|
350
|
+
time_since_update = current_time - last_update_time
|
|
351
|
+
|
|
352
|
+
if (progress // 5 > last_progress // 5) or (time_since_update >= 2.0):
|
|
353
|
+
elapsed = current_time - start
|
|
354
|
+
mb_extracted = extracted_size / (1024 * 1024)
|
|
355
|
+
mb_total = total_size / (1024 * 1024)
|
|
356
|
+
mb_per_sec = mb_extracted / elapsed if elapsed > 0 else 0
|
|
357
|
+
|
|
358
|
+
progress_counter += 1
|
|
359
|
+
print(
|
|
360
|
+
f" [{progress_counter:3d}] Progress: {progress:3d}% "
|
|
361
|
+
f"({mb_extracted:7.1f} / {mb_total:7.1f} MB) "
|
|
362
|
+
f"- {mb_per_sec:6.1f} MB/s - {elapsed:5.1f}s elapsed",
|
|
363
|
+
flush=True,
|
|
364
|
+
)
|
|
365
|
+
last_progress = progress
|
|
366
|
+
last_update_time = current_time
|
|
367
|
+
|
|
368
|
+
elapsed = time.time() - start
|
|
369
|
+
print()
|
|
370
|
+
print(f"Extracted {extracted_count} files ({extracted_size / (1024*1024):.1f} MB) in {elapsed:.1f}s")
|
|
371
|
+
|
|
372
|
+
elif archive_path.suffix == ".gz" or archive_path.name.endswith(".tar.gz"):
|
|
373
|
+
print("Extracting tar.gz archive...")
|
|
374
|
+
print()
|
|
375
|
+
import gzip
|
|
376
|
+
import time
|
|
377
|
+
|
|
378
|
+
start = time.time()
|
|
379
|
+
|
|
380
|
+
with gzip.open(archive_path, "rb") as gz_file, tarfile.open(fileobj=gz_file) as tar:
|
|
381
|
+
# Get list of members for progress tracking
|
|
382
|
+
members = tar.getmembers()
|
|
383
|
+
total_members = len(members)
|
|
384
|
+
total_size = sum(m.size for m in members)
|
|
385
|
+
|
|
386
|
+
print(f"Found {total_members} files/directories to extract ({total_size / (1024*1024):.1f} MB)")
|
|
387
|
+
print()
|
|
388
|
+
|
|
389
|
+
extracted_count = 0
|
|
390
|
+
extracted_size = 0
|
|
391
|
+
last_progress = -1
|
|
392
|
+
last_update_time = start
|
|
393
|
+
progress_counter = 0
|
|
394
|
+
|
|
395
|
+
for member in members:
|
|
396
|
+
tar.extract(member, path=extract_dir)
|
|
397
|
+
extracted_count += 1
|
|
398
|
+
extracted_size += member.size
|
|
399
|
+
|
|
400
|
+
# Show progress every 5% or every 2 seconds
|
|
401
|
+
current_time = time.time()
|
|
402
|
+
# Use data size for progress percentage (more meaningful than file count)
|
|
403
|
+
progress = int((extracted_size / total_size) * 100) if total_size > 0 else 0
|
|
404
|
+
time_since_update = current_time - last_update_time
|
|
405
|
+
|
|
406
|
+
if (progress // 5 > last_progress // 5) or (time_since_update >= 2.0):
|
|
407
|
+
elapsed = current_time - start
|
|
408
|
+
mb_extracted = extracted_size / (1024 * 1024)
|
|
409
|
+
mb_total = total_size / (1024 * 1024)
|
|
410
|
+
mb_per_sec = mb_extracted / elapsed if elapsed > 0 else 0
|
|
411
|
+
|
|
412
|
+
progress_counter += 1
|
|
413
|
+
print(
|
|
414
|
+
f" [{progress_counter:3d}] Progress: {progress:3d}% "
|
|
415
|
+
f"({mb_extracted:7.1f} / {mb_total:7.1f} MB) "
|
|
416
|
+
f"- {mb_per_sec:6.1f} MB/s - {elapsed:5.1f}s elapsed",
|
|
417
|
+
flush=True,
|
|
418
|
+
)
|
|
419
|
+
last_progress = progress
|
|
420
|
+
last_update_time = current_time
|
|
421
|
+
|
|
422
|
+
elapsed = time.time() - start
|
|
423
|
+
print()
|
|
424
|
+
print(f"Extracted {extracted_count} files ({extracted_size / (1024*1024):.1f} MB) in {elapsed:.1f}s")
|
|
425
|
+
|
|
426
|
+
else:
|
|
427
|
+
raise ValueError(f"Unsupported archive format: {archive_path.suffix}")
|
|
428
|
+
|
|
429
|
+
print("Extraction complete!")
|
|
430
|
+
return extract_dir
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# ============================================================================
|
|
434
|
+
# Step 3: Strip Extras (Keep Only Essential Binaries)
|
|
435
|
+
# ============================================================================
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def strip_extras(extracted_dir: Path, output_dir: Path, platform: str) -> Path:
|
|
439
|
+
"""Keep only essential binaries, remove extras."""
|
|
440
|
+
print_section("STEP 3: STRIP UNNECESSARY FILES")
|
|
441
|
+
|
|
442
|
+
extracted_dir = Path(extracted_dir)
|
|
443
|
+
output_dir = Path(output_dir)
|
|
444
|
+
|
|
445
|
+
# Find the bin directory
|
|
446
|
+
bin_dirs = list(extracted_dir.glob("**/bin"))
|
|
447
|
+
if not bin_dirs:
|
|
448
|
+
raise RuntimeError(f"No bin directory found in {extracted_dir}")
|
|
449
|
+
|
|
450
|
+
bin_dir = bin_dirs[0]
|
|
451
|
+
print(f"Found bin directory: {bin_dir}")
|
|
452
|
+
|
|
453
|
+
# Create output structure
|
|
454
|
+
output_bin = output_dir / "bin"
|
|
455
|
+
output_bin.mkdir(parents=True, exist_ok=True)
|
|
456
|
+
|
|
457
|
+
# Determine binary extension
|
|
458
|
+
ext = ".exe" if platform == "win" else ""
|
|
459
|
+
|
|
460
|
+
# Copy essential binaries
|
|
461
|
+
kept_count = 0
|
|
462
|
+
skipped_count = 0
|
|
463
|
+
|
|
464
|
+
print("\nKeeping essential binaries:")
|
|
465
|
+
for binary_name in ESSENTIAL_BINARIES:
|
|
466
|
+
binary_file = bin_dir / f"{binary_name}{ext}"
|
|
467
|
+
|
|
468
|
+
if binary_file.exists():
|
|
469
|
+
dest = output_bin / binary_file.name
|
|
470
|
+
shutil.copy2(binary_file, dest)
|
|
471
|
+
print(f" ā {binary_file.name}")
|
|
472
|
+
kept_count += 1
|
|
473
|
+
else:
|
|
474
|
+
print(f" - {binary_name}{ext} (not found)")
|
|
475
|
+
skipped_count += 1
|
|
476
|
+
|
|
477
|
+
# Copy only essential lib/clang directory (builtin headers and runtime)
|
|
478
|
+
# Skip Fortran runtime, sanitizers, and other optional libraries
|
|
479
|
+
lib_src = extracted_dir.glob("**/lib/clang")
|
|
480
|
+
lib_clang_copied = False
|
|
481
|
+
excluded_count = 0
|
|
482
|
+
excluded_size = 0
|
|
483
|
+
|
|
484
|
+
for lib_clang_dir in lib_src:
|
|
485
|
+
if lib_clang_dir.is_dir():
|
|
486
|
+
lib_dst = output_dir / "lib" / "clang"
|
|
487
|
+
print("\nCopying essential lib/clang files (filtering out optional libraries)...")
|
|
488
|
+
print(f"Source: {lib_clang_dir}")
|
|
489
|
+
print(f"Dest: {lib_dst}")
|
|
490
|
+
|
|
491
|
+
# Use factory function to properly bind lib_clang_dir in closure
|
|
492
|
+
def make_ignore_function(base_dir: Path): # type: ignore[return]
|
|
493
|
+
def ignore_optional_libs(directory: str, contents: list[str]) -> list[str]:
|
|
494
|
+
ignored = []
|
|
495
|
+
for item in contents:
|
|
496
|
+
item_path = Path(directory) / item
|
|
497
|
+
if should_exclude_lib_file(item_path):
|
|
498
|
+
# Calculate size if it's a file
|
|
499
|
+
if item_path.is_file():
|
|
500
|
+
size = item_path.stat().st_size
|
|
501
|
+
excluded_size_mb = size / (1024 * 1024)
|
|
502
|
+
print(f" Excluding: {item_path.relative_to(base_dir)} ({excluded_size_mb:.1f} MB)")
|
|
503
|
+
nonlocal excluded_count, excluded_size
|
|
504
|
+
excluded_count += 1
|
|
505
|
+
excluded_size += size
|
|
506
|
+
ignored.append(item)
|
|
507
|
+
return ignored
|
|
508
|
+
|
|
509
|
+
return ignore_optional_libs
|
|
510
|
+
|
|
511
|
+
shutil.copytree(lib_clang_dir, lib_dst, dirs_exist_ok=True, ignore=make_ignore_function(lib_clang_dir))
|
|
512
|
+
lib_clang_copied = True
|
|
513
|
+
break
|
|
514
|
+
|
|
515
|
+
print("\nSummary:")
|
|
516
|
+
print(f" Kept: {kept_count} binaries")
|
|
517
|
+
print(f" Skipped: {skipped_count} binaries (not found)")
|
|
518
|
+
if lib_clang_copied:
|
|
519
|
+
print(" Copied lib/clang directory")
|
|
520
|
+
if excluded_count > 0:
|
|
521
|
+
print(f" Excluded {excluded_count} optional files ({excluded_size / (1024*1024):.1f} MB)")
|
|
522
|
+
print(" (Fortran runtime removed - not needed for C/C++ compilation)")
|
|
523
|
+
|
|
524
|
+
return output_dir
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# ============================================================================
|
|
528
|
+
# Step 3.5: Strip Linux Binaries (Remove Debug Symbols)
|
|
529
|
+
# ============================================================================
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def strip_linux_binaries(bin_dir: Path, platform: str) -> None:
|
|
533
|
+
"""
|
|
534
|
+
Strip debug symbols from Linux binaries to reduce size.
|
|
535
|
+
|
|
536
|
+
Uses llvm-strip (cross-platform) to remove debug symbols from ELF binaries.
|
|
537
|
+
This typically reduces binary size by ~14% without affecting functionality.
|
|
538
|
+
|
|
539
|
+
Windows binaries are skipped as they don't benefit from stripping.
|
|
540
|
+
"""
|
|
541
|
+
if platform != "linux":
|
|
542
|
+
return # Only strip Linux binaries
|
|
543
|
+
|
|
544
|
+
print_section("STEP 3.5: STRIP DEBUG SYMBOLS FROM LINUX BINARIES")
|
|
545
|
+
|
|
546
|
+
bin_dir = Path(bin_dir)
|
|
547
|
+
|
|
548
|
+
# Try to find llvm-strip
|
|
549
|
+
llvm_strip = shutil.which("llvm-strip")
|
|
550
|
+
if not llvm_strip:
|
|
551
|
+
# Try common locations on Windows
|
|
552
|
+
common_paths = [
|
|
553
|
+
r"C:\Program Files\LLVM\bin\llvm-strip.exe",
|
|
554
|
+
r"C:\Program Files (x86)\LLVM\bin\llvm-strip.exe",
|
|
555
|
+
]
|
|
556
|
+
for path in common_paths:
|
|
557
|
+
if Path(path).exists():
|
|
558
|
+
llvm_strip = path
|
|
559
|
+
break
|
|
560
|
+
|
|
561
|
+
if not llvm_strip:
|
|
562
|
+
print("ā ļø llvm-strip not found - skipping binary stripping")
|
|
563
|
+
print(" Install LLVM to enable stripping: https://llvm.org/")
|
|
564
|
+
print(" Binaries will be larger but still functional")
|
|
565
|
+
return
|
|
566
|
+
|
|
567
|
+
print(f"Using: {llvm_strip}")
|
|
568
|
+
print()
|
|
569
|
+
|
|
570
|
+
# Find all binaries
|
|
571
|
+
binaries = sorted(bin_dir.glob("*"))
|
|
572
|
+
binaries = [b for b in binaries if b.is_file()]
|
|
573
|
+
|
|
574
|
+
if not binaries:
|
|
575
|
+
print("No binaries found to strip")
|
|
576
|
+
return
|
|
577
|
+
|
|
578
|
+
print(f"Stripping {len(binaries)} binaries...")
|
|
579
|
+
print()
|
|
580
|
+
|
|
581
|
+
total_before = 0
|
|
582
|
+
total_after = 0
|
|
583
|
+
stripped_count = 0
|
|
584
|
+
|
|
585
|
+
for binary in binaries:
|
|
586
|
+
size_before = binary.stat().st_size
|
|
587
|
+
size_before_mb = size_before / (1024 * 1024)
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
# Use --strip-all for maximum size reduction
|
|
591
|
+
# This removes debug symbols and other non-essential data
|
|
592
|
+
subprocess.run([llvm_strip, "--strip-all", str(binary)], check=True, capture_output=True, text=True)
|
|
593
|
+
|
|
594
|
+
size_after = binary.stat().st_size
|
|
595
|
+
size_after_mb = size_after / (1024 * 1024)
|
|
596
|
+
saved = size_before - size_after
|
|
597
|
+
saved_mb = saved / (1024 * 1024)
|
|
598
|
+
percent = (saved / size_before * 100) if size_before > 0 else 0
|
|
599
|
+
|
|
600
|
+
print(
|
|
601
|
+
f" ā {binary.name:30s} {size_before_mb:7.1f} MB ā {size_after_mb:7.1f} MB (saved {saved_mb:5.1f} MB, {percent:4.1f}%)"
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
total_before += size_before
|
|
605
|
+
total_after += size_after
|
|
606
|
+
stripped_count += 1
|
|
607
|
+
|
|
608
|
+
except subprocess.CalledProcessError as e:
|
|
609
|
+
print(f" ā {binary.name:30s} - Failed to strip: {e.stderr}")
|
|
610
|
+
except Exception as e:
|
|
611
|
+
print(f" ā {binary.name:30s} - Error: {e}")
|
|
612
|
+
|
|
613
|
+
total_saved = total_before - total_after
|
|
614
|
+
|
|
615
|
+
print()
|
|
616
|
+
print("Summary:")
|
|
617
|
+
print(f" Stripped: {stripped_count} binaries")
|
|
618
|
+
print(f" Total before: {total_before / (1024*1024):.2f} MB")
|
|
619
|
+
print(f" Total after: {total_after / (1024*1024):.2f} MB")
|
|
620
|
+
print(f" Total saved: {total_saved / (1024*1024):.2f} MB ({(total_saved/total_before)*100:.1f}%)")
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
# ============================================================================
|
|
624
|
+
# Step 4: Deduplicate (Create Manifest)
|
|
625
|
+
# ============================================================================
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def deduplicate_binaries(bin_dir: Path) -> dict[str, Any]:
|
|
629
|
+
"""Identify duplicate binaries and create deduplication manifest."""
|
|
630
|
+
print_section("STEP 4: ANALYZE AND DEDUPLICATE BINARIES")
|
|
631
|
+
|
|
632
|
+
bin_dir = Path(bin_dir)
|
|
633
|
+
|
|
634
|
+
# Find all binaries
|
|
635
|
+
binaries = sorted(bin_dir.glob("*"))
|
|
636
|
+
binaries = [b for b in binaries if b.is_file()]
|
|
637
|
+
|
|
638
|
+
print(f"Found {len(binaries)} binary files")
|
|
639
|
+
print("\nCalculating MD5 hashes...")
|
|
640
|
+
|
|
641
|
+
# Calculate hashes
|
|
642
|
+
hash_to_files = {}
|
|
643
|
+
hash_to_size = {}
|
|
644
|
+
|
|
645
|
+
for binary in binaries:
|
|
646
|
+
file_hash = get_file_hash(binary, "md5")
|
|
647
|
+
size = binary.stat().st_size
|
|
648
|
+
|
|
649
|
+
if file_hash not in hash_to_files:
|
|
650
|
+
hash_to_files[file_hash] = []
|
|
651
|
+
hash_to_size[file_hash] = size
|
|
652
|
+
|
|
653
|
+
hash_to_files[file_hash].append(binary.name)
|
|
654
|
+
|
|
655
|
+
# Create deduplication manifest
|
|
656
|
+
manifest = {}
|
|
657
|
+
canonical_files = {}
|
|
658
|
+
|
|
659
|
+
for file_hash, files in sorted(hash_to_files.items()):
|
|
660
|
+
# First file (alphabetically) becomes canonical
|
|
661
|
+
canonical = sorted(files)[0]
|
|
662
|
+
canonical_files[file_hash] = canonical
|
|
663
|
+
|
|
664
|
+
for filename in files:
|
|
665
|
+
manifest[filename] = canonical
|
|
666
|
+
|
|
667
|
+
# Calculate savings
|
|
668
|
+
total_files = len(binaries)
|
|
669
|
+
unique_files = len(hash_to_files)
|
|
670
|
+
duplicate_count = total_files - unique_files
|
|
671
|
+
|
|
672
|
+
total_size = sum(hash_to_size[h] * len(files) for h, files in hash_to_files.items())
|
|
673
|
+
deduped_size = sum(hash_to_size.values())
|
|
674
|
+
savings = total_size - deduped_size
|
|
675
|
+
|
|
676
|
+
print("\nDeduplication Analysis:")
|
|
677
|
+
print(f" Total files: {total_files}")
|
|
678
|
+
print(f" Unique files: {unique_files}")
|
|
679
|
+
print(f" Duplicates: {duplicate_count}")
|
|
680
|
+
print(f" Total size: {total_size / (1024*1024):.1f} MB")
|
|
681
|
+
print(f" Deduplicated size: {deduped_size / (1024*1024):.1f} MB")
|
|
682
|
+
print(f" Space savings: {savings / (1024*1024):.1f} MB ({(savings/total_size)*100:.1f}%)")
|
|
683
|
+
|
|
684
|
+
# Print duplicate groups
|
|
685
|
+
if duplicate_count > 0:
|
|
686
|
+
print("\nDuplicate groups:")
|
|
687
|
+
for file_hash, files in sorted(hash_to_files.items()):
|
|
688
|
+
if len(files) > 1:
|
|
689
|
+
size_mb = hash_to_size[file_hash] / (1024 * 1024)
|
|
690
|
+
print(f" {len(files)} files @ {size_mb:.1f} MB each: {', '.join(sorted(files))}")
|
|
691
|
+
|
|
692
|
+
manifest_data = {
|
|
693
|
+
"manifest": manifest,
|
|
694
|
+
"canonical_files": canonical_files,
|
|
695
|
+
"stats": {
|
|
696
|
+
"total_size": total_size,
|
|
697
|
+
"deduped_size": deduped_size,
|
|
698
|
+
"savings": savings,
|
|
699
|
+
"savings_percent": (savings / total_size * 100) if total_size > 0 else 0,
|
|
700
|
+
"duplicate_count": duplicate_count,
|
|
701
|
+
},
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
return manifest_data
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
# ============================================================================
|
|
708
|
+
# Step 5: Create Hard-Linked Structure
|
|
709
|
+
# ============================================================================
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def create_hardlink_structure(manifest_data: dict[str, Any], source_bin_dir: Path, output_dir: Path) -> Path:
|
|
713
|
+
"""Create directory with hard links based on deduplication manifest."""
|
|
714
|
+
print_section("STEP 5: CREATE HARD-LINKED STRUCTURE")
|
|
715
|
+
|
|
716
|
+
source_bin_dir = Path(source_bin_dir)
|
|
717
|
+
output_dir = Path(output_dir)
|
|
718
|
+
|
|
719
|
+
manifest = manifest_data["manifest"]
|
|
720
|
+
|
|
721
|
+
# Create output bin directory
|
|
722
|
+
bin_dir = output_dir / "bin"
|
|
723
|
+
bin_dir.mkdir(parents=True, exist_ok=True)
|
|
724
|
+
|
|
725
|
+
# Track which canonical files we've copied
|
|
726
|
+
canonical_copied = {}
|
|
727
|
+
|
|
728
|
+
print("\nCreating hard-linked structure:")
|
|
729
|
+
for filename, canonical_name in sorted(manifest.items()):
|
|
730
|
+
src = source_bin_dir / canonical_name
|
|
731
|
+
dst = bin_dir / filename
|
|
732
|
+
|
|
733
|
+
if not src.exists():
|
|
734
|
+
print(f" Warning: {canonical_name} not found")
|
|
735
|
+
continue
|
|
736
|
+
|
|
737
|
+
if canonical_name not in canonical_copied:
|
|
738
|
+
# First occurrence - copy the file
|
|
739
|
+
shutil.copy2(src, dst)
|
|
740
|
+
canonical_copied[canonical_name] = dst
|
|
741
|
+
print(f" Copy: {filename} <- {canonical_name}")
|
|
742
|
+
else:
|
|
743
|
+
# Create hard link
|
|
744
|
+
first_copy = canonical_copied[canonical_name]
|
|
745
|
+
print(f" Hardlink: {filename} -> {first_copy.name}")
|
|
746
|
+
|
|
747
|
+
try:
|
|
748
|
+
if dst.exists():
|
|
749
|
+
dst.unlink()
|
|
750
|
+
os.link(first_copy, dst)
|
|
751
|
+
except OSError:
|
|
752
|
+
# Hard link failed, copy instead
|
|
753
|
+
shutil.copy2(src, dst)
|
|
754
|
+
print(" (hard link failed, used copy)")
|
|
755
|
+
|
|
756
|
+
return output_dir
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
# ============================================================================
|
|
760
|
+
# Step 6: Create TAR Archive
|
|
761
|
+
# ============================================================================
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
def create_tar_archive(source_dir: Path, output_tar: Path) -> Path:
|
|
765
|
+
"""Create tar archive (auto-detects hard links)."""
|
|
766
|
+
print_section("STEP 6: CREATE TAR ARCHIVE")
|
|
767
|
+
|
|
768
|
+
source_dir = Path(source_dir)
|
|
769
|
+
output_tar = Path(output_tar)
|
|
770
|
+
|
|
771
|
+
print(f"Source: {source_dir}")
|
|
772
|
+
print(f"Output: {output_tar}")
|
|
773
|
+
print()
|
|
774
|
+
|
|
775
|
+
def tar_filter(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
|
|
776
|
+
"""Filter to set correct permissions for binaries and shared libraries."""
|
|
777
|
+
if tarinfo.isfile():
|
|
778
|
+
# Set executable permissions for files in main bin/ directory
|
|
779
|
+
if "/bin/" in tarinfo.name and "/lib/" not in tarinfo.name:
|
|
780
|
+
tarinfo.mode = 0o755 # rwxr-xr-x
|
|
781
|
+
print(f" Setting executable: {tarinfo.name}")
|
|
782
|
+
# Set executable permissions for shared libraries and certain executables in lib/
|
|
783
|
+
elif "/lib/" in tarinfo.name:
|
|
784
|
+
# Headers, text files, and static libraries should be readable but not executable (check first)
|
|
785
|
+
if tarinfo.name.endswith((".h", ".inc", ".modulemap", ".tcc", ".txt", ".a", ".syms")):
|
|
786
|
+
tarinfo.mode = 0o644 # rw-r--r--
|
|
787
|
+
# Shared libraries (.so, .dylib) need executable permissions on Unix
|
|
788
|
+
elif tarinfo.name.endswith((".so", ".dylib")) or ".so." in tarinfo.name:
|
|
789
|
+
tarinfo.mode = 0o755 # rwxr-xr-x for shared libraries
|
|
790
|
+
print(f" Setting executable (shared lib): {tarinfo.name}")
|
|
791
|
+
# Executable binaries in lib/clang/*/bin/ directories
|
|
792
|
+
elif "/bin/" in tarinfo.name and not tarinfo.name.endswith(
|
|
793
|
+
(".h", ".inc", ".txt", ".a", ".so", ".dylib")
|
|
794
|
+
):
|
|
795
|
+
tarinfo.mode = 0o755 # rwxr-xr-x
|
|
796
|
+
print(f" Setting executable (lib binary): {tarinfo.name}")
|
|
797
|
+
return tarinfo
|
|
798
|
+
|
|
799
|
+
print("Creating tar archive using Python tarfile module...")
|
|
800
|
+
print("Setting executable permissions for binaries in bin/...")
|
|
801
|
+
with tarfile.open(output_tar, "w") as tar:
|
|
802
|
+
tar.add(source_dir, arcname=source_dir.name, filter=tar_filter)
|
|
803
|
+
|
|
804
|
+
size = output_tar.stat().st_size
|
|
805
|
+
print(f"\nCreated: {output_tar}")
|
|
806
|
+
print(f"Size: {size / (1024*1024):.2f} MB")
|
|
807
|
+
|
|
808
|
+
return output_tar
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
def verify_tar_permissions(tar_file: Path) -> int:
|
|
812
|
+
"""Verify that binaries and shared libraries in the tar archive have correct permissions."""
|
|
813
|
+
print_section("STEP 6.5: VERIFY TAR PERMISSIONS")
|
|
814
|
+
|
|
815
|
+
tar_file = Path(tar_file)
|
|
816
|
+
|
|
817
|
+
print(f"Checking permissions in: {tar_file}")
|
|
818
|
+
print()
|
|
819
|
+
|
|
820
|
+
issues_found = []
|
|
821
|
+
binaries_checked = 0
|
|
822
|
+
libs_checked = 0
|
|
823
|
+
headers_checked = 0
|
|
824
|
+
|
|
825
|
+
with tarfile.open(tar_file, "r") as tar:
|
|
826
|
+
for member in tar.getmembers():
|
|
827
|
+
if not member.isfile():
|
|
828
|
+
continue
|
|
829
|
+
|
|
830
|
+
# Check files in bin/ directory - should all be executable
|
|
831
|
+
if "/bin/" in member.name:
|
|
832
|
+
binaries_checked += 1
|
|
833
|
+
# Check if executable bit is set (0o100 for user execute)
|
|
834
|
+
if not (member.mode & 0o100):
|
|
835
|
+
issues_found.append((member.name, oct(member.mode), "binary missing executable"))
|
|
836
|
+
print(f" ā Missing executable permission: {member.name} (mode: {oct(member.mode)})")
|
|
837
|
+
else:
|
|
838
|
+
# Only print every 10th binary to avoid spam
|
|
839
|
+
if binaries_checked % 10 == 1:
|
|
840
|
+
print(f" ā bin: {member.name} (mode: {oct(member.mode)})")
|
|
841
|
+
|
|
842
|
+
# Check files in lib/ directory
|
|
843
|
+
elif "/lib/" in member.name:
|
|
844
|
+
# Headers and static libraries should NOT be executable (check this first)
|
|
845
|
+
if member.name.endswith((".h", ".inc", ".modulemap", ".tcc", ".txt", ".a", ".syms")):
|
|
846
|
+
headers_checked += 1
|
|
847
|
+
if member.mode & 0o100:
|
|
848
|
+
issues_found.append((member.name, oct(member.mode), "header/static lib has executable bit"))
|
|
849
|
+
print(
|
|
850
|
+
f" ā Header/static lib should not be executable: {member.name} (mode: {oct(member.mode)})"
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
# Shared libraries (.so, .dylib) should be executable
|
|
854
|
+
elif member.name.endswith((".so", ".dylib")) or ".so." in member.name:
|
|
855
|
+
libs_checked += 1
|
|
856
|
+
if not (member.mode & 0o100):
|
|
857
|
+
issues_found.append((member.name, oct(member.mode), "shared lib missing executable"))
|
|
858
|
+
print(f" ā Shared lib missing executable: {member.name} (mode: {oct(member.mode)})")
|
|
859
|
+
elif libs_checked % 10 == 1:
|
|
860
|
+
print(f" ā lib: {member.name} (mode: {oct(member.mode)})")
|
|
861
|
+
|
|
862
|
+
# Executable binaries in lib/ (like *symbolize) - must be files without common extensions
|
|
863
|
+
# These are typically in lib/clang/*/bin/ directories
|
|
864
|
+
elif "/bin/" in member.name and not member.name.endswith((".h", ".inc", ".txt", ".a", ".so", ".dylib")):
|
|
865
|
+
binaries_checked += 1
|
|
866
|
+
if not (member.mode & 0o100):
|
|
867
|
+
issues_found.append((member.name, oct(member.mode), "lib binary missing executable"))
|
|
868
|
+
print(f" ā Lib binary missing executable: {member.name} (mode: {oct(member.mode)})")
|
|
869
|
+
|
|
870
|
+
print()
|
|
871
|
+
print(f"Total binaries checked: {binaries_checked}")
|
|
872
|
+
print(f"Total shared libraries checked: {libs_checked}")
|
|
873
|
+
print(f"Total headers/text files checked: {headers_checked}")
|
|
874
|
+
|
|
875
|
+
if issues_found:
|
|
876
|
+
print(f"\nā ļø WARNING: Found {len(issues_found)} files with incorrect permissions!")
|
|
877
|
+
print("\nFiles with issues:")
|
|
878
|
+
for name, mode, issue in issues_found:
|
|
879
|
+
print(f" - {name} (mode: {mode}) - {issue}")
|
|
880
|
+
print("\nThese files may not work correctly when extracted on Unix systems.")
|
|
881
|
+
raise RuntimeError(f"Tar archive has {len(issues_found)} files with incorrect permissions")
|
|
882
|
+
else:
|
|
883
|
+
print("ā
All files have correct permissions")
|
|
884
|
+
|
|
885
|
+
return binaries_checked + libs_checked
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
# ============================================================================
|
|
889
|
+
# Step 7: Compress with ZSTD
|
|
890
|
+
# ============================================================================
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
def compress_with_zstd(tar_file: Path, output_zst: Path, level: int = 22) -> Path:
|
|
894
|
+
"""Compress tar with zstd using streaming compression for better interrupt handling."""
|
|
895
|
+
print_section(f"STEP 7: COMPRESS WITH ZSTD LEVEL {level}")
|
|
896
|
+
|
|
897
|
+
try:
|
|
898
|
+
import zstandard as zstd
|
|
899
|
+
except ImportError as e:
|
|
900
|
+
raise ImportError("zstandard module required!\n" "Install with: pip install zstandard") from e
|
|
901
|
+
|
|
902
|
+
tar_file = Path(tar_file)
|
|
903
|
+
output_zst = Path(output_zst)
|
|
904
|
+
|
|
905
|
+
file_size = tar_file.stat().st_size
|
|
906
|
+
print(f"Input: {tar_file} ({file_size / (1024*1024):.2f} MB)")
|
|
907
|
+
print(f"Output: {output_zst}")
|
|
908
|
+
print(f"Level: {level}")
|
|
909
|
+
print()
|
|
910
|
+
|
|
911
|
+
print(f"Compressing {file_size / (1024*1024):.1f} MB (streaming mode - press Ctrl+C to cancel)...")
|
|
912
|
+
print()
|
|
913
|
+
|
|
914
|
+
# Compress using streaming for better interrupt handling
|
|
915
|
+
import time
|
|
916
|
+
|
|
917
|
+
start = time.time()
|
|
918
|
+
|
|
919
|
+
try:
|
|
920
|
+
cctx = zstd.ZstdCompressor(level=level, threads=-1)
|
|
921
|
+
|
|
922
|
+
# Use streaming compression instead of loading entire file
|
|
923
|
+
# Use 1MB chunks for better interrupt responsiveness on Windows
|
|
924
|
+
chunk_size = 1 * 1024 * 1024 # 1MB chunks for better interrupt handling
|
|
925
|
+
bytes_read = 0
|
|
926
|
+
last_progress = -1
|
|
927
|
+
last_update_time = start
|
|
928
|
+
progress_counter = 0
|
|
929
|
+
|
|
930
|
+
with (
|
|
931
|
+
open(tar_file, "rb") as ifh,
|
|
932
|
+
open(output_zst, "wb") as ofh,
|
|
933
|
+
cctx.stream_writer(ofh, closefd=False) as compressor,
|
|
934
|
+
):
|
|
935
|
+
while True:
|
|
936
|
+
chunk = ifh.read(chunk_size)
|
|
937
|
+
if not chunk:
|
|
938
|
+
break
|
|
939
|
+
|
|
940
|
+
compressor.write(chunk)
|
|
941
|
+
bytes_read += len(chunk)
|
|
942
|
+
|
|
943
|
+
# Show progress every 5% for cleaner output that works on all terminals
|
|
944
|
+
current_time = time.time()
|
|
945
|
+
progress = int((bytes_read / file_size) * 100)
|
|
946
|
+
time_since_update = current_time - last_update_time
|
|
947
|
+
|
|
948
|
+
# Update every 5% OR every 2 seconds (whichever comes first)
|
|
949
|
+
if (progress // 5 > last_progress // 5) or (time_since_update >= 2.0):
|
|
950
|
+
elapsed = current_time - start
|
|
951
|
+
mb_read = bytes_read / (1024 * 1024)
|
|
952
|
+
mb_total = file_size / (1024 * 1024)
|
|
953
|
+
mb_per_sec = mb_read / elapsed if elapsed > 0 else 0
|
|
954
|
+
|
|
955
|
+
# Use simple newline-based progress for cross-platform compatibility
|
|
956
|
+
progress_counter += 1
|
|
957
|
+
print(
|
|
958
|
+
f" [{progress_counter:3d}] Progress: {progress:3d}% "
|
|
959
|
+
f"({mb_read:7.1f} / {mb_total:7.1f} MB) "
|
|
960
|
+
f"- {mb_per_sec:6.1f} MB/s - {elapsed:5.1f}s elapsed",
|
|
961
|
+
flush=True,
|
|
962
|
+
)
|
|
963
|
+
last_progress = progress
|
|
964
|
+
last_update_time = current_time
|
|
965
|
+
|
|
966
|
+
# Print final newline and show finalizing message
|
|
967
|
+
print()
|
|
968
|
+
print(" Data read complete. Now finalizing compression...")
|
|
969
|
+
print(" NOTE: Level 22 compression requires flushing buffers - this may take 30-60 seconds...")
|
|
970
|
+
print(" (The process is NOT stalled, just working hard to achieve maximum compression)")
|
|
971
|
+
print()
|
|
972
|
+
finalize_start = time.time()
|
|
973
|
+
|
|
974
|
+
# The with block closes here, which triggers the final compression flush
|
|
975
|
+
# This is where most of the CPU time is actually spent for level 22
|
|
976
|
+
finalize_elapsed = time.time() - finalize_start
|
|
977
|
+
print(f" Finalization complete! ({finalize_elapsed:.1f}s)")
|
|
978
|
+
print()
|
|
979
|
+
|
|
980
|
+
elapsed = time.time() - start
|
|
981
|
+
|
|
982
|
+
original_size = file_size
|
|
983
|
+
compressed_size = output_zst.stat().st_size
|
|
984
|
+
ratio = original_size / compressed_size
|
|
985
|
+
|
|
986
|
+
print("Compression complete!")
|
|
987
|
+
print(f" Total time: {elapsed:.1f}s")
|
|
988
|
+
print(f" Reading: {elapsed - finalize_elapsed:.1f}s")
|
|
989
|
+
print(f" Finalizing: {finalize_elapsed:.1f}s")
|
|
990
|
+
print(f" Original: {original_size / (1024*1024):.2f} MB")
|
|
991
|
+
print(f" Compressed: {compressed_size / (1024*1024):.2f} MB")
|
|
992
|
+
print(f" Ratio: {ratio:.2f}:1")
|
|
993
|
+
print(f" Reduction: {(1 - compressed_size/original_size) * 100:.1f}%")
|
|
994
|
+
|
|
995
|
+
return output_zst
|
|
996
|
+
|
|
997
|
+
except KeyboardInterrupt:
|
|
998
|
+
# Clean up partial output file on interrupt
|
|
999
|
+
print("\nā ļø Compression interrupted - cleaning up partial file...")
|
|
1000
|
+
if output_zst.exists():
|
|
1001
|
+
output_zst.unlink()
|
|
1002
|
+
raise
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
# ============================================================================
|
|
1006
|
+
# Step 8: Generate Checksums
|
|
1007
|
+
# ============================================================================
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def generate_checksums(archive_path: Path) -> tuple[str, str]:
|
|
1011
|
+
"""Generate SHA256 and MD5 checksums."""
|
|
1012
|
+
print_section("STEP 8: GENERATE CHECKSUMS")
|
|
1013
|
+
|
|
1014
|
+
archive_path = Path(archive_path)
|
|
1015
|
+
|
|
1016
|
+
print(f"Generating checksums for: {archive_path.name}")
|
|
1017
|
+
print()
|
|
1018
|
+
|
|
1019
|
+
# SHA256
|
|
1020
|
+
print("Calculating SHA256...")
|
|
1021
|
+
sha256 = get_file_hash(archive_path, "sha256")
|
|
1022
|
+
sha256_file = archive_path.parent / f"{archive_path.name}.sha256"
|
|
1023
|
+
with open(sha256_file, "w") as f:
|
|
1024
|
+
f.write(f"{sha256} *{archive_path.name}\n")
|
|
1025
|
+
print(f" SHA256: {sha256}")
|
|
1026
|
+
print(f" Saved to: {sha256_file.name}")
|
|
1027
|
+
|
|
1028
|
+
# MD5
|
|
1029
|
+
print("\nCalculating MD5...")
|
|
1030
|
+
md5 = get_file_hash(archive_path, "md5")
|
|
1031
|
+
md5_file = archive_path.parent / f"{archive_path.name}.md5"
|
|
1032
|
+
with open(md5_file, "w") as f:
|
|
1033
|
+
f.write(f"{md5} *{archive_path.name}\n")
|
|
1034
|
+
print(f" MD5: {md5}")
|
|
1035
|
+
print(f" Saved to: {md5_file.name}")
|
|
1036
|
+
|
|
1037
|
+
return sha256, md5
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
# ============================================================================
|
|
1041
|
+
# Step 9: Split Archive (If Needed)
|
|
1042
|
+
# ============================================================================
|
|
1043
|
+
|
|
1044
|
+
|
|
1045
|
+
def split_archive(archive_path: Path, max_size_mb: int = 99) -> list[Path] | None:
|
|
1046
|
+
"""
|
|
1047
|
+
Split archive into parts if it exceeds max_size_mb.
|
|
1048
|
+
|
|
1049
|
+
Creates files like:
|
|
1050
|
+
- archive.tar.zst.part1
|
|
1051
|
+
- archive.tar.zst.part2
|
|
1052
|
+
- archive.tar.zst.join (script to join them back)
|
|
1053
|
+
|
|
1054
|
+
Args:
|
|
1055
|
+
archive_path: Path to the archive file
|
|
1056
|
+
max_size_mb: Maximum size in MB before splitting (default: 99)
|
|
1057
|
+
|
|
1058
|
+
Returns:
|
|
1059
|
+
List of part files created, or None if no split needed
|
|
1060
|
+
"""
|
|
1061
|
+
print_section(f"STEP 9: CHECK IF SPLIT NEEDED (max {max_size_mb} MB)")
|
|
1062
|
+
|
|
1063
|
+
archive_path = Path(archive_path)
|
|
1064
|
+
size_mb = archive_path.stat().st_size / (1024 * 1024)
|
|
1065
|
+
|
|
1066
|
+
print(f"Archive: {archive_path.name}")
|
|
1067
|
+
print(f"Size: {size_mb:.2f} MB")
|
|
1068
|
+
print(f"Limit: {max_size_mb} MB")
|
|
1069
|
+
print()
|
|
1070
|
+
|
|
1071
|
+
if size_mb <= max_size_mb:
|
|
1072
|
+
print(f"ā
Archive is under {max_size_mb} MB - no split needed")
|
|
1073
|
+
return None
|
|
1074
|
+
|
|
1075
|
+
print(f"ā ļø Archive exceeds {max_size_mb} MB - splitting into parts...")
|
|
1076
|
+
print()
|
|
1077
|
+
|
|
1078
|
+
# Calculate part size (slightly under max to account for overhead)
|
|
1079
|
+
part_size = int((max_size_mb - 1) * 1024 * 1024) # Leave 1 MB margin
|
|
1080
|
+
|
|
1081
|
+
# Read and split
|
|
1082
|
+
parts = []
|
|
1083
|
+
part_num = 1
|
|
1084
|
+
|
|
1085
|
+
with open(archive_path, "rb") as f:
|
|
1086
|
+
while True:
|
|
1087
|
+
chunk = f.read(part_size)
|
|
1088
|
+
if not chunk:
|
|
1089
|
+
break
|
|
1090
|
+
|
|
1091
|
+
part_name = f"{archive_path.name}.part{part_num}"
|
|
1092
|
+
part_path = archive_path.parent / part_name
|
|
1093
|
+
|
|
1094
|
+
with open(part_path, "wb") as pf:
|
|
1095
|
+
pf.write(chunk)
|
|
1096
|
+
|
|
1097
|
+
part_size_mb = len(chunk) / (1024 * 1024)
|
|
1098
|
+
print(f" Created: {part_name} ({part_size_mb:.2f} MB)")
|
|
1099
|
+
parts.append(part_path)
|
|
1100
|
+
part_num += 1
|
|
1101
|
+
|
|
1102
|
+
# Create join script for convenience
|
|
1103
|
+
join_script_name = f"{archive_path.name}.join"
|
|
1104
|
+
join_script_path = archive_path.parent / join_script_name
|
|
1105
|
+
|
|
1106
|
+
# Create both shell script and Python script
|
|
1107
|
+
shell_script = f"""#!/bin/bash
|
|
1108
|
+
# Join script for {archive_path.name}
|
|
1109
|
+
# This script joins the split parts back into the original archive
|
|
1110
|
+
|
|
1111
|
+
echo "Joining {len(parts)} parts into {archive_path.name}..."
|
|
1112
|
+
|
|
1113
|
+
cat {' '.join(p.name for p in parts)} > {archive_path.name}
|
|
1114
|
+
|
|
1115
|
+
echo "Done! Created {archive_path.name}"
|
|
1116
|
+
echo "Size: $(du -h {archive_path.name} | cut -f1)"
|
|
1117
|
+
echo ""
|
|
1118
|
+
echo "To extract:"
|
|
1119
|
+
echo " tar --zstd -xf {archive_path.name}"
|
|
1120
|
+
"""
|
|
1121
|
+
|
|
1122
|
+
with open(join_script_path, "w", newline="\n") as f:
|
|
1123
|
+
f.write(shell_script)
|
|
1124
|
+
|
|
1125
|
+
# Make it executable on Unix-like systems
|
|
1126
|
+
import contextlib
|
|
1127
|
+
|
|
1128
|
+
with contextlib.suppress(Exception):
|
|
1129
|
+
os.chmod(join_script_path, 0o755)
|
|
1130
|
+
|
|
1131
|
+
# Also create Python join script for Windows
|
|
1132
|
+
py_script_name = f"{archive_path.name}.join.py"
|
|
1133
|
+
py_script_path = archive_path.parent / py_script_name
|
|
1134
|
+
|
|
1135
|
+
python_script = f"""#!/usr/bin/env python3
|
|
1136
|
+
\"\"\"Join script for {archive_path.name}\"\"\"
|
|
1137
|
+
import sys
|
|
1138
|
+
from pathlib import Path
|
|
1139
|
+
|
|
1140
|
+
parts = {[p.name for p in parts]}
|
|
1141
|
+
output = "{archive_path.name}"
|
|
1142
|
+
|
|
1143
|
+
print(f"Joining {{len(parts)}} parts into {{output}}...")
|
|
1144
|
+
|
|
1145
|
+
try:
|
|
1146
|
+
with open(output, 'wb') as out:
|
|
1147
|
+
for part in parts:
|
|
1148
|
+
print(f" Adding {{part}}...")
|
|
1149
|
+
with open(part, 'rb') as inp:
|
|
1150
|
+
out.write(inp.read())
|
|
1151
|
+
|
|
1152
|
+
size_mb = Path(output).stat().st_size / (1024 * 1024)
|
|
1153
|
+
print(f"\\nDone! Created {{output}} ({{size_mb:.2f}} MB)")
|
|
1154
|
+
print("\\nTo extract:")
|
|
1155
|
+
print(f" tar --zstd -xf {{output}}")
|
|
1156
|
+
|
|
1157
|
+
except Exception as e:
|
|
1158
|
+
print(f"Error: {{e}}", file=sys.stderr)
|
|
1159
|
+
sys.exit(1)
|
|
1160
|
+
"""
|
|
1161
|
+
|
|
1162
|
+
with open(py_script_path, "w") as f:
|
|
1163
|
+
f.write(python_script)
|
|
1164
|
+
|
|
1165
|
+
print()
|
|
1166
|
+
print("Summary:")
|
|
1167
|
+
print(f" Created {len(parts)} parts")
|
|
1168
|
+
print(f" Total size: {size_mb:.2f} MB")
|
|
1169
|
+
print(f" Part size: ~{max_size_mb - 1} MB each")
|
|
1170
|
+
print()
|
|
1171
|
+
print("Join scripts created:")
|
|
1172
|
+
print(f" {join_script_name} (for Linux/Mac)")
|
|
1173
|
+
print(f" {py_script_name} (for Windows/cross-platform)")
|
|
1174
|
+
print()
|
|
1175
|
+
print("To rejoin:")
|
|
1176
|
+
print(f" bash {join_script_name}")
|
|
1177
|
+
print(" or")
|
|
1178
|
+
print(f" python {py_script_name}")
|
|
1179
|
+
|
|
1180
|
+
# Remove original archive
|
|
1181
|
+
print()
|
|
1182
|
+
print(f"Removing original archive: {archive_path.name}")
|
|
1183
|
+
archive_path.unlink()
|
|
1184
|
+
|
|
1185
|
+
return parts
|
|
1186
|
+
|
|
1187
|
+
|
|
1188
|
+
# ============================================================================
|
|
1189
|
+
# Main Pipeline
|
|
1190
|
+
# ============================================================================
|
|
1191
|
+
|
|
1192
|
+
|
|
1193
|
+
def main() -> None:
|
|
1194
|
+
parser = argparse.ArgumentParser(
|
|
1195
|
+
description="Fetch and archive LLVM/Clang toolchain binaries",
|
|
1196
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
1197
|
+
epilog="""
|
|
1198
|
+
Examples:
|
|
1199
|
+
python -m clang_tool_chain.downloads.fetch_and_archive --platform win --arch x86_64
|
|
1200
|
+
python -m clang_tool_chain.downloads.fetch_and_archive --platform linux --arch x86_64
|
|
1201
|
+
python -m clang_tool_chain.downloads.fetch_and_archive --platform darwin --arch arm64
|
|
1202
|
+
|
|
1203
|
+
# Use existing extracted binaries:
|
|
1204
|
+
python -m clang_tool_chain.downloads.fetch_and_archive --platform win --arch x86_64 --source-dir ./assets/win
|
|
1205
|
+
|
|
1206
|
+
Note: Press Ctrl+C at any time to safely interrupt the operation.
|
|
1207
|
+
""",
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1210
|
+
parser.add_argument("--platform", required=True, choices=["win", "linux", "darwin"], help="Target platform")
|
|
1211
|
+
parser.add_argument("--arch", required=True, choices=["x86_64", "arm64"], help="Target architecture")
|
|
1212
|
+
parser.add_argument("--version", default=LLVM_VERSION, help=f"LLVM version (default: {LLVM_VERSION})")
|
|
1213
|
+
parser.add_argument("--source-dir", type=Path, help="Use existing extracted binaries instead of downloading")
|
|
1214
|
+
parser.add_argument(
|
|
1215
|
+
"--work-dir", type=Path, default=Path("work"), help="Working directory for temporary files (default: work)"
|
|
1216
|
+
)
|
|
1217
|
+
parser.add_argument(
|
|
1218
|
+
"--output-dir",
|
|
1219
|
+
type=Path,
|
|
1220
|
+
default=None,
|
|
1221
|
+
help="Output directory (default: downloads-bins/assets/clang/{platform}/{arch})",
|
|
1222
|
+
)
|
|
1223
|
+
parser.add_argument("--zstd-level", type=int, default=22, help="Zstd compression level (default: 22)")
|
|
1224
|
+
parser.add_argument("--keep-intermediate", action="store_true", help="Keep intermediate files (for debugging)")
|
|
1225
|
+
|
|
1226
|
+
args = parser.parse_args()
|
|
1227
|
+
|
|
1228
|
+
# Use version from args
|
|
1229
|
+
llvm_version = args.version
|
|
1230
|
+
|
|
1231
|
+
# Setup directories
|
|
1232
|
+
work_dir = args.work_dir
|
|
1233
|
+
work_dir.mkdir(parents=True, exist_ok=True)
|
|
1234
|
+
|
|
1235
|
+
output_dir = args.output_dir or Path("downloads-bins/assets/clang") / args.platform / args.arch
|
|
1236
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
1237
|
+
|
|
1238
|
+
# Archive name
|
|
1239
|
+
archive_name = f"llvm-{llvm_version}-{args.platform}-{args.arch}"
|
|
1240
|
+
|
|
1241
|
+
print("=" * 70)
|
|
1242
|
+
print("LLVM/Clang Toolchain Fetch and Archive")
|
|
1243
|
+
print("=" * 70)
|
|
1244
|
+
print(f"Platform: {args.platform}")
|
|
1245
|
+
print(f"Architecture: {args.arch}")
|
|
1246
|
+
print(f"Version: {llvm_version}")
|
|
1247
|
+
print(f"Output: {output_dir}/{archive_name}.tar.zst")
|
|
1248
|
+
print("=" * 70)
|
|
1249
|
+
print("\nš” Tip: Press Ctrl+C at any time to safely interrupt the operation.\n")
|
|
1250
|
+
|
|
1251
|
+
try:
|
|
1252
|
+
# Step 1: Download (or use existing)
|
|
1253
|
+
if args.source_dir:
|
|
1254
|
+
print_section("STEP 1: USING EXISTING BINARIES")
|
|
1255
|
+
print(f"Source directory: {args.source_dir}")
|
|
1256
|
+
extracted_dir = args.source_dir
|
|
1257
|
+
else:
|
|
1258
|
+
archive_path = download_llvm(args.platform, args.arch, work_dir)
|
|
1259
|
+
|
|
1260
|
+
# Step 2: Extract
|
|
1261
|
+
extracted_dir = extract_archive(archive_path, work_dir / "extracted")
|
|
1262
|
+
|
|
1263
|
+
# Step 3: Strip extras
|
|
1264
|
+
stripped_dir = work_dir / "stripped"
|
|
1265
|
+
strip_extras(extracted_dir, stripped_dir, args.platform)
|
|
1266
|
+
|
|
1267
|
+
# Step 3.5: Strip Linux binaries (remove debug symbols)
|
|
1268
|
+
strip_linux_binaries(stripped_dir / "bin", args.platform)
|
|
1269
|
+
|
|
1270
|
+
# Step 4: Deduplicate
|
|
1271
|
+
manifest_data = deduplicate_binaries(stripped_dir / "bin")
|
|
1272
|
+
|
|
1273
|
+
# Save manifest
|
|
1274
|
+
manifest_file = stripped_dir / "dedup_manifest.json"
|
|
1275
|
+
with open(manifest_file, "w") as f:
|
|
1276
|
+
json.dump(manifest_data, f, indent=2)
|
|
1277
|
+
print(f"\nManifest saved: {manifest_file}")
|
|
1278
|
+
|
|
1279
|
+
# Step 5: Create hard-linked structure
|
|
1280
|
+
hardlinked_dir = work_dir / "hardlinked"
|
|
1281
|
+
create_hardlink_structure(manifest_data, stripped_dir / "bin", hardlinked_dir)
|
|
1282
|
+
|
|
1283
|
+
# Copy lib/clang directory if it exists (builtin headers only)
|
|
1284
|
+
lib_clang_src = stripped_dir / "lib" / "clang"
|
|
1285
|
+
if lib_clang_src.exists():
|
|
1286
|
+
lib_dst = hardlinked_dir / "lib" / "clang"
|
|
1287
|
+
print("\nCopying lib/clang directory (builtin headers)...")
|
|
1288
|
+
shutil.copytree(lib_clang_src, lib_dst, dirs_exist_ok=True)
|
|
1289
|
+
|
|
1290
|
+
# Step 6: Create TAR
|
|
1291
|
+
tar_file = work_dir / f"{archive_name}.tar"
|
|
1292
|
+
create_tar_archive(hardlinked_dir, tar_file)
|
|
1293
|
+
|
|
1294
|
+
# Step 6.5: Verify permissions in TAR archive
|
|
1295
|
+
verify_tar_permissions(tar_file)
|
|
1296
|
+
|
|
1297
|
+
# Step 7: Compress with ZSTD
|
|
1298
|
+
# Initialize final_archive here, before compression, so it's defined for cleanup
|
|
1299
|
+
final_archive: Path = output_dir / f"{archive_name}.tar.zst"
|
|
1300
|
+
compress_with_zstd(tar_file, final_archive, level=args.zstd_level)
|
|
1301
|
+
|
|
1302
|
+
# Step 8: Generate checksums
|
|
1303
|
+
sha256, md5 = generate_checksums(final_archive)
|
|
1304
|
+
|
|
1305
|
+
# Step 9: Split if too large (before cleanup, so we can remove original)
|
|
1306
|
+
parts = split_archive(final_archive, max_size_mb=99)
|
|
1307
|
+
|
|
1308
|
+
# Cleanup
|
|
1309
|
+
if not args.keep_intermediate:
|
|
1310
|
+
print_section("CLEANUP")
|
|
1311
|
+
print("Removing intermediate files...")
|
|
1312
|
+
if tar_file.exists():
|
|
1313
|
+
tar_file.unlink()
|
|
1314
|
+
print(f" Removed: {tar_file.name}")
|
|
1315
|
+
if not args.source_dir: # Don't remove if using existing source
|
|
1316
|
+
for item in [work_dir / "extracted", work_dir / "stripped", work_dir / "hardlinked"]:
|
|
1317
|
+
if item.exists():
|
|
1318
|
+
shutil.rmtree(item)
|
|
1319
|
+
print(f" Removed: {item}")
|
|
1320
|
+
|
|
1321
|
+
# Final summary
|
|
1322
|
+
print_section("SUCCESS!")
|
|
1323
|
+
|
|
1324
|
+
if parts:
|
|
1325
|
+
# Archive was split
|
|
1326
|
+
print(f"Archive split into {len(parts)} parts:")
|
|
1327
|
+
for i, part in enumerate(parts, 1):
|
|
1328
|
+
size_mb = part.stat().st_size / (1024 * 1024)
|
|
1329
|
+
print(f" {i}. {part.name} ({size_mb:.2f} MB)")
|
|
1330
|
+
print()
|
|
1331
|
+
print("Join scripts:")
|
|
1332
|
+
print(f" {final_archive.name}.join (bash)")
|
|
1333
|
+
print(f" {final_archive.name}.join.py (python)")
|
|
1334
|
+
print()
|
|
1335
|
+
print("To rejoin and extract:")
|
|
1336
|
+
print(f" python {final_archive.name}.join.py")
|
|
1337
|
+
print(f" tar --zstd -xf {final_archive.name}")
|
|
1338
|
+
else:
|
|
1339
|
+
# Single archive
|
|
1340
|
+
print(f"Archive created: {final_archive}")
|
|
1341
|
+
print(f"Size: {final_archive.stat().st_size / (1024*1024):.2f} MB")
|
|
1342
|
+
print(f"SHA256: {sha256}")
|
|
1343
|
+
print(f"MD5: {md5}")
|
|
1344
|
+
print()
|
|
1345
|
+
print("Files created:")
|
|
1346
|
+
print(f" {final_archive.name}")
|
|
1347
|
+
print(f" {final_archive.name}.sha256")
|
|
1348
|
+
print(f" {final_archive.name}.md5")
|
|
1349
|
+
|
|
1350
|
+
print()
|
|
1351
|
+
print("ā
Done!")
|
|
1352
|
+
|
|
1353
|
+
except KeyboardInterrupt:
|
|
1354
|
+
print("\n\n" + "=" * 70)
|
|
1355
|
+
print("ā OPERATION CANCELLED BY USER")
|
|
1356
|
+
print("=" * 70)
|
|
1357
|
+
print("\nInterrupted! Cleaning up...")
|
|
1358
|
+
# Cleanup on interrupt - check if final_archive was defined
|
|
1359
|
+
# Use locals() check to avoid NameError if interrupted before final_archive is set
|
|
1360
|
+
if "final_archive" in locals():
|
|
1361
|
+
final_archive_local: Path = final_archive # type: ignore[possibly-undefined]
|
|
1362
|
+
if final_archive_local.exists():
|
|
1363
|
+
print(f" Removing incomplete archive: {final_archive_local}")
|
|
1364
|
+
final_archive_local.unlink()
|
|
1365
|
+
sys.exit(130) # Standard exit code for SIGINT
|
|
1366
|
+
|
|
1367
|
+
except Exception as e:
|
|
1368
|
+
print(f"\nā Error: {e}", file=sys.stderr)
|
|
1369
|
+
import traceback
|
|
1370
|
+
|
|
1371
|
+
traceback.print_exc()
|
|
1372
|
+
sys.exit(1)
|
|
1373
|
+
|
|
1374
|
+
|
|
1375
|
+
if __name__ == "__main__":
|
|
1376
|
+
main()
|