clang-tool-chain 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of clang-tool-chain might be problematic. Click here for more details.

@@ -0,0 +1,330 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Create IWYU archives for all platforms.
4
+
5
+ This script:
6
+ 1. Scans downloads-bins/assets/iwyu/ for extracted binaries
7
+ 2. Creates tar archives with proper permissions
8
+ 3. Compresses with zstd level 22
9
+ 4. Generates SHA256 checksums
10
+ 5. Outputs archives to downloads-bins/assets/iwyu/{platform}/{arch}/
11
+
12
+ Unlike the Clang toolchain, IWYU has no duplicate binaries, so no deduplication is needed.
13
+ """
14
+
15
+ import hashlib
16
+ import json
17
+ import sys
18
+ import tarfile
19
+ from pathlib import Path
20
+
21
+
22
+ def create_tar_archive(source_dir: Path, output_tar: Path) -> Path:
23
+ """
24
+ Create tar archive with correct permissions for IWYU.
25
+
26
+ Args:
27
+ source_dir: Directory containing bin/ and share/ (e.g., downloads-bins/assets/iwyu/win/x86_64/)
28
+ output_tar: Output tar file path
29
+
30
+ Returns:
31
+ Path to created tar file
32
+ """
33
+ print("\n" + "=" * 70)
34
+ print("CREATING TAR ARCHIVE")
35
+ print("=" * 70)
36
+ print(f"Source: {source_dir}")
37
+ print(f"Output: {output_tar}")
38
+ print()
39
+
40
+ def tar_filter(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
41
+ """Filter to set correct permissions for IWYU files."""
42
+ if tarinfo.isfile():
43
+ # Python scripts and the main binary should be executable
44
+ if "/bin/" in tarinfo.name or tarinfo.name.startswith("bin/"):
45
+ if tarinfo.name.endswith((".py", "include-what-you-use", ".exe")):
46
+ tarinfo.mode = 0o755 # rwxr-xr-x
47
+ print(f" Setting executable: {tarinfo.name}")
48
+ else:
49
+ # Other files in bin/ default to readable
50
+ tarinfo.mode = 0o644 # rw-r--r--
51
+ # Mapping files and other share/ content should be readable
52
+ elif "/share/" in tarinfo.name or tarinfo.name.startswith("share/"):
53
+ tarinfo.mode = 0o644 # rw-r--r--
54
+ # Other files (LICENSE, README, etc.)
55
+ else:
56
+ tarinfo.mode = 0o644 # rw-r--r--
57
+ return tarinfo
58
+
59
+ print("Creating tar archive...")
60
+ print("Setting permissions...")
61
+
62
+ # Get the architecture directory name (x86_64, arm64)
63
+ # We want the archive structure to be flat: bin/, share/, etc.
64
+ with tarfile.open(output_tar, "w") as tar:
65
+ # Add bin/ directory
66
+ bin_dir = source_dir / "bin"
67
+ if bin_dir.exists():
68
+ tar.add(bin_dir, arcname="bin", filter=tar_filter)
69
+
70
+ # Add share/ directory
71
+ share_dir = source_dir / "share"
72
+ if share_dir.exists():
73
+ tar.add(share_dir, arcname="share", filter=tar_filter)
74
+
75
+ # Add any other top-level files (LICENSE, README, etc.)
76
+ for item in source_dir.iterdir():
77
+ if item.is_file():
78
+ tar.add(item, arcname=item.name, filter=tar_filter)
79
+
80
+ size = output_tar.stat().st_size
81
+ print(f"Created: {output_tar} ({size / (1024*1024):.2f} MB)")
82
+
83
+ return output_tar
84
+
85
+
86
+ def verify_tar_permissions(tar_file: Path) -> int:
87
+ """Verify that files in the tar archive have correct permissions."""
88
+ print("\n" + "=" * 70)
89
+ print("VERIFYING TAR PERMISSIONS")
90
+ print("=" * 70)
91
+ print(f"Checking permissions in: {tar_file}")
92
+ print()
93
+
94
+ issues_found = []
95
+ executables_checked = 0
96
+ data_files_checked = 0
97
+
98
+ with tarfile.open(tar_file, "r") as tar:
99
+ for member in tar.getmembers():
100
+ if not member.isfile():
101
+ continue
102
+
103
+ # Check files in bin/ directory
104
+ if "/bin/" in member.name or member.name.startswith("bin/"):
105
+ # Python scripts and binaries should be executable
106
+ if member.name.endswith((".py", "include-what-you-use", ".exe")):
107
+ executables_checked += 1
108
+ if not (member.mode & 0o100):
109
+ issues_found.append((member.name, oct(member.mode), "executable missing +x"))
110
+ print(f" ✗ Missing executable permission: {member.name} (mode: {oct(member.mode)})")
111
+ else:
112
+ print(f" ✓ bin: {member.name} (mode: {oct(member.mode)})")
113
+
114
+ # Check files in share/ directory
115
+ elif "/share/" in member.name or member.name.startswith("share/"):
116
+ data_files_checked += 1
117
+ # These should NOT be executable
118
+ if member.mode & 0o100:
119
+ issues_found.append((member.name, oct(member.mode), "data file has +x"))
120
+ print(f" ✗ Data file should not be executable: {member.name} (mode: {oct(member.mode)})")
121
+
122
+ print()
123
+ print(f"Total executables checked: {executables_checked}")
124
+ print(f"Total data files checked: {data_files_checked}")
125
+
126
+ if issues_found:
127
+ print(f"\n⚠️ WARNING: Found {len(issues_found)} files with incorrect permissions!")
128
+ print("\nFiles with issues:")
129
+ for name, mode, issue in issues_found:
130
+ print(f" - {name} (mode: {mode}) - {issue}")
131
+ raise RuntimeError(f"Tar archive has {len(issues_found)} files with incorrect permissions")
132
+ else:
133
+ print("✅ All files have correct permissions")
134
+
135
+ return executables_checked + data_files_checked
136
+
137
+
138
+ def compress_with_zstd(tar_file: Path, output_zst: Path, level: int = 22) -> Path:
139
+ """Compress tar with zstd."""
140
+ import zstandard as zstd
141
+
142
+ print("\n" + "=" * 70)
143
+ print(f"COMPRESSING WITH ZSTD LEVEL {level}")
144
+ print("=" * 70)
145
+ print(f"Input: {tar_file} ({tar_file.stat().st_size / (1024*1024):.2f} MB)")
146
+ print(f"Output: {output_zst}")
147
+ print()
148
+
149
+ # Use streaming compression to handle large files and allow interruption
150
+ print("Compressing (this may take a while)...")
151
+
152
+ import time
153
+
154
+ start = time.time()
155
+
156
+ # Create compressor with multi-threading
157
+ cctx = zstd.ZstdCompressor(level=level, threads=-1)
158
+
159
+ # Stream compress
160
+ with open(tar_file, "rb") as ifh, open(output_zst, "wb") as ofh:
161
+ # Read in chunks to allow interruption
162
+ chunk_size = 1024 * 1024 # 1MB chunks
163
+ reader = cctx.stream_reader(ifh, size=tar_file.stat().st_size)
164
+
165
+ while True:
166
+ chunk = reader.read(chunk_size)
167
+ if not chunk:
168
+ break
169
+ ofh.write(chunk)
170
+
171
+ elapsed = time.time() - start
172
+
173
+ original_size = tar_file.stat().st_size
174
+ compressed_size = output_zst.stat().st_size
175
+ ratio = original_size / compressed_size if compressed_size > 0 else 0
176
+
177
+ print(f"Compressed in {elapsed:.1f}s")
178
+ print(f"Original: {original_size / (1024*1024):.2f} MB")
179
+ print(f"Compressed: {compressed_size / (1024*1024):.2f} MB")
180
+ print(f"Ratio: {ratio:.2f}:1")
181
+ print(f"Reduction: {(1 - compressed_size/original_size) * 100:.1f}%")
182
+
183
+ return output_zst
184
+
185
+
186
+ def generate_checksum(file_path: Path) -> str:
187
+ """Generate SHA256 checksum for a file."""
188
+ sha256_hash = hashlib.sha256()
189
+
190
+ with open(file_path, "rb") as f:
191
+ # Read in chunks to handle large files
192
+ for byte_block in iter(lambda: f.read(4096), b""):
193
+ sha256_hash.update(byte_block)
194
+
195
+ return sha256_hash.hexdigest()
196
+
197
+
198
+ def process_platform_arch(iwyu_root: Path, platform: str, arch: str, version: str) -> dict[str, str | int] | None:
199
+ """
200
+ Process a single platform/arch combination.
201
+
202
+ Args:
203
+ iwyu_root: Root downloads-bins/assets/iwyu directory
204
+ platform: Platform name (win, linux, darwin)
205
+ arch: Architecture (x86_64, arm64)
206
+ version: IWYU version (e.g., "0.25")
207
+
208
+ Returns:
209
+ Dict with archive info, or None if skipped
210
+ """
211
+ source_dir = iwyu_root / platform / arch
212
+
213
+ # Check if directory exists and has bin/
214
+ if not source_dir.exists() or not (source_dir / "bin").exists():
215
+ print(f"Skipping {platform}/{arch} - no binaries found")
216
+ return None
217
+
218
+ print("\n" + "=" * 70)
219
+ print(f"PROCESSING: {platform}/{arch}")
220
+ print("=" * 70)
221
+
222
+ # Create archive name
223
+ archive_base = f"iwyu-{version}-{platform}-{arch}"
224
+ tar_file = source_dir / f"{archive_base}.tar"
225
+ zst_file = source_dir / f"{archive_base}.tar.zst"
226
+
227
+ # Step 1: Create TAR
228
+ create_tar_archive(source_dir, tar_file)
229
+
230
+ # Step 2: Verify permissions
231
+ verify_tar_permissions(tar_file)
232
+
233
+ # Step 3: Compress with zstd
234
+ compress_with_zstd(tar_file, zst_file)
235
+
236
+ # Step 4: Generate checksum
237
+ print("\nGenerating SHA256 checksum...")
238
+ sha256 = generate_checksum(zst_file)
239
+ print(f"SHA256: {sha256}")
240
+
241
+ # Write checksum file
242
+ checksum_file = zst_file.with_suffix(".tar.zst.sha256")
243
+ with open(checksum_file, "w") as f:
244
+ f.write(f"{sha256} {zst_file.name}\n")
245
+
246
+ # Clean up uncompressed tar
247
+ print(f"\nRemoving uncompressed tar: {tar_file}")
248
+ tar_file.unlink()
249
+
250
+ print("\n✅ SUCCESS!")
251
+ print(f"Archive: {zst_file}")
252
+ print(f"Size: {zst_file.stat().st_size / (1024*1024):.2f} MB")
253
+ print(f"SHA256: {sha256}")
254
+
255
+ return {
256
+ "filename": zst_file.name,
257
+ "path": str(zst_file.relative_to(iwyu_root)),
258
+ "sha256": sha256,
259
+ "size": zst_file.stat().st_size,
260
+ }
261
+
262
+
263
+ def main() -> None:
264
+ """Main entry point."""
265
+ import argparse
266
+
267
+ parser = argparse.ArgumentParser(description="Create IWYU archives for all platforms")
268
+ parser.add_argument(
269
+ "--iwyu-root",
270
+ type=Path,
271
+ default=Path("downloads-bins/assets/iwyu"),
272
+ help="Root IWYU directory (default: downloads-bins/assets/iwyu)",
273
+ )
274
+ parser.add_argument("--version", default="0.25", help="IWYU version (default: 0.25)")
275
+ parser.add_argument("--zstd-level", type=int, default=22, help="Zstd compression level (default: 22)")
276
+ parser.add_argument(
277
+ "--platform", help="Process only this platform (win, linux, darwin). If not specified, process all."
278
+ )
279
+ parser.add_argument("--arch", help="Process only this architecture (x86_64, arm64). If not specified, process all.")
280
+
281
+ args = parser.parse_args()
282
+
283
+ iwyu_root = args.iwyu_root.resolve()
284
+
285
+ if not iwyu_root.exists():
286
+ print(f"Error: IWYU root directory not found: {iwyu_root}")
287
+ sys.exit(1)
288
+
289
+ # Define platforms and architectures to process
290
+ platforms = [args.platform] if args.platform else ["win", "linux", "darwin"]
291
+ architectures = [args.arch] if args.arch else ["x86_64", "arm64"]
292
+
293
+ # Process each platform/arch combination
294
+ results = {}
295
+ for platform in platforms:
296
+ results[platform] = {}
297
+ for arch in architectures:
298
+ result = process_platform_arch(iwyu_root, platform, arch, args.version)
299
+ if result:
300
+ results[platform][arch] = result
301
+
302
+ # Print summary
303
+ print("\n" + "=" * 70)
304
+ print("SUMMARY")
305
+ print("=" * 70)
306
+
307
+ total_archives = sum(len(arches) for arches in results.values())
308
+ print(f"\nCreated {total_archives} archives:")
309
+
310
+ for platform, arches in results.items():
311
+ for arch, info in arches.items():
312
+ print(f"\n{platform}/{arch}:")
313
+ print(f" File: {info['filename']}")
314
+ print(f" Size: {info['size'] / (1024*1024):.2f} MB")
315
+ print(f" SHA256: {info['sha256']}")
316
+
317
+ # Save results to JSON for manifest creation
318
+ results_file = iwyu_root / "archive_results.json"
319
+ with open(results_file, "w") as f:
320
+ json.dump(results, f, indent=2)
321
+
322
+ print(f"\nArchive info saved to: {results_file}")
323
+ print("\nNext steps:")
324
+ print("1. Create manifests with these SHA256 hashes")
325
+ print("2. Upload archives to GitHub")
326
+ print("3. Update downloader.py to support IWYU")
327
+
328
+
329
+ if __name__ == "__main__":
330
+ main()
@@ -0,0 +1,217 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Deduplicate identical binaries in the toolchain by storing one copy
4
+ and creating a manifest for expansion.
5
+
6
+ This script:
7
+ 1. Identifies duplicate files by MD5 hash
8
+ 2. Keeps one "canonical" copy of each unique file
9
+ 3. Creates a manifest mapping all filenames to their canonical source
10
+ 4. Can expand the deduped structure back to full structure
11
+ """
12
+
13
+ import hashlib
14
+ import json
15
+ import shutil
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+
20
+ def get_file_hash(filepath: Path | str) -> str:
21
+ """Calculate MD5 hash of a file."""
22
+ md5 = hashlib.md5()
23
+ with open(filepath, "rb") as f:
24
+ for chunk in iter(lambda: f.read(8192), b""):
25
+ md5.update(chunk)
26
+ return md5.hexdigest()
27
+
28
+
29
+ def analyze_directory(directory: Path | str) -> tuple[dict[str, list[str]], dict[str, int]]:
30
+ """Analyze directory for duplicate files."""
31
+ directory = Path(directory)
32
+
33
+ # Map hash -> list of files
34
+ hash_to_files = {}
35
+ # Map hash -> file size
36
+ hash_to_size = {}
37
+
38
+ for exe_file in directory.glob("*.exe"):
39
+ file_hash = get_file_hash(exe_file)
40
+ size = exe_file.stat().st_size
41
+
42
+ if file_hash not in hash_to_files:
43
+ hash_to_files[file_hash] = []
44
+ hash_to_size[file_hash] = size
45
+
46
+ hash_to_files[file_hash].append(exe_file.name)
47
+
48
+ return hash_to_files, hash_to_size
49
+
50
+
51
+ def calculate_savings(hash_to_files: dict[str, list[str]], hash_to_size: dict[str, int]) -> dict[str, Any]:
52
+ """Calculate potential space savings from deduplication."""
53
+ total_size = 0
54
+ deduped_size = 0
55
+ duplicate_count = 0
56
+
57
+ for file_hash, files in hash_to_files.items():
58
+ size = hash_to_size[file_hash]
59
+ total_size += size * len(files)
60
+ deduped_size += size # Only count once
61
+
62
+ if len(files) > 1:
63
+ duplicate_count += len(files) - 1
64
+
65
+ savings = total_size - deduped_size
66
+
67
+ return {
68
+ "total_size": total_size,
69
+ "deduped_size": deduped_size,
70
+ "savings": savings,
71
+ "savings_percent": (savings / total_size * 100) if total_size > 0 else 0,
72
+ "duplicate_count": duplicate_count,
73
+ }
74
+
75
+
76
+ def create_deduped_structure(source_dir: Path | str, dest_dir: Path | str) -> dict[str, Any]:
77
+ """Create deduplicated directory structure with manifest."""
78
+ source_dir = Path(source_dir)
79
+ dest_dir = Path(dest_dir)
80
+
81
+ # Create destination directories
82
+ bin_dir = dest_dir / "bin"
83
+ canonical_dir = dest_dir / "canonical"
84
+ bin_dir.mkdir(parents=True, exist_ok=True)
85
+ canonical_dir.mkdir(parents=True, exist_ok=True)
86
+
87
+ hash_to_files, hash_to_size = analyze_directory(source_dir)
88
+
89
+ # Manifest: filename -> canonical_file
90
+ manifest = {}
91
+ canonical_files = {} # hash -> canonical filename
92
+
93
+ # Process each unique hash
94
+ for file_hash, files in sorted(hash_to_files.items()):
95
+ # First file in sorted list becomes canonical
96
+ canonical = sorted(files)[0]
97
+ canonical_path = canonical_dir / canonical
98
+
99
+ # Copy canonical file
100
+ shutil.copy2(source_dir / canonical, canonical_path)
101
+ canonical_files[file_hash] = canonical
102
+
103
+ # Map all files to this canonical
104
+ for filename in files:
105
+ manifest[filename] = canonical
106
+
107
+ # Save manifest
108
+ manifest_data = {
109
+ "manifest": manifest,
110
+ "canonical_files": canonical_files,
111
+ "stats": calculate_savings(hash_to_files, hash_to_size),
112
+ }
113
+
114
+ with open(dest_dir / "dedup_manifest.json", "w") as f:
115
+ json.dump(manifest_data, f, indent=2)
116
+
117
+ return manifest_data
118
+
119
+
120
+ def expand_deduped_structure(deduped_dir: Path | str, output_dir: Path | str) -> None:
121
+ """Expand deduplicated structure back to full structure."""
122
+ deduped_dir = Path(deduped_dir)
123
+ output_dir = Path(output_dir)
124
+
125
+ # Load manifest
126
+ with open(deduped_dir / "dedup_manifest.json") as f:
127
+ manifest_data = json.load(f)
128
+
129
+ manifest = manifest_data["manifest"]
130
+ canonical_dir = deduped_dir / "canonical"
131
+ output_bin_dir = output_dir / "bin"
132
+ output_bin_dir.mkdir(parents=True, exist_ok=True)
133
+
134
+ # Copy or hardlink each file
135
+ for filename, canonical in manifest.items():
136
+ src = canonical_dir / canonical
137
+ dst = output_bin_dir / filename
138
+
139
+ # Copy the file
140
+ shutil.copy2(src, dst)
141
+ print(f"Created {filename} from {canonical}")
142
+
143
+ print(f"\nExpanded {len(manifest)} files from {len(set(manifest.values()))} canonical files")
144
+
145
+
146
+ def print_analysis(source_dir: Path | str) -> None:
147
+ """Print detailed analysis of duplicates."""
148
+ hash_to_files, hash_to_size = analyze_directory(source_dir)
149
+ stats = calculate_savings(hash_to_files, hash_to_size)
150
+
151
+ print("=" * 70)
152
+ print("BINARY DEDUPLICATION ANALYSIS")
153
+ print("=" * 70)
154
+ print()
155
+
156
+ print(f"Total uncompressed size: {stats['total_size'] / (1024*1024):.1f} MB")
157
+ print(f"Deduplicated size: {stats['deduped_size'] / (1024*1024):.1f} MB")
158
+ print(f"Space savings: {stats['savings'] / (1024*1024):.1f} MB ({stats['savings_percent']:.1f}%)")
159
+ print(f"Duplicate files: {stats['duplicate_count']}")
160
+ print()
161
+
162
+ print("Duplicate Groups:")
163
+ print("-" * 70)
164
+
165
+ for file_hash, files in sorted(hash_to_files.items()):
166
+ if len(files) > 1:
167
+ size_mb = hash_to_size[file_hash] / (1024 * 1024)
168
+ waste_mb = size_mb * (len(files) - 1)
169
+ print(f"\n{len(files)} identical files ({size_mb:.1f} MB each, {waste_mb:.1f} MB wasted):")
170
+ for filename in sorted(files):
171
+ canonical = "← CANONICAL" if filename == sorted(files)[0] else ""
172
+ print(f" - {filename} {canonical}")
173
+
174
+
175
+ if __name__ == "__main__":
176
+ import sys
177
+
178
+ if len(sys.argv) < 2:
179
+ print("Usage:")
180
+ print(" Analyze: python deduplicate_binaries.py analyze <directory>")
181
+ print(" Deduplicate: python deduplicate_binaries.py dedup <source_dir> <dest_dir>")
182
+ print(" Expand: python deduplicate_binaries.py expand <deduped_dir> <output_dir>")
183
+ sys.exit(1)
184
+
185
+ command = sys.argv[1]
186
+
187
+ if command == "analyze":
188
+ if len(sys.argv) < 3:
189
+ print("Error: Missing directory argument")
190
+ sys.exit(1)
191
+ print_analysis(sys.argv[2])
192
+
193
+ elif command == "dedup":
194
+ if len(sys.argv) < 4:
195
+ print("Error: Missing source or destination directory")
196
+ sys.exit(1)
197
+ source = sys.argv[2]
198
+ dest = sys.argv[3]
199
+ print("Creating deduplicated structure...")
200
+ manifest_data = create_deduped_structure(source, dest)
201
+ print("\nDeduplication complete!")
202
+ print(f"Original size: {manifest_data['stats']['total_size'] / (1024*1024):.1f} MB")
203
+ print(f"Deduped size: {manifest_data['stats']['deduped_size'] / (1024*1024):.1f} MB")
204
+ print(f"Saved: {manifest_data['stats']['savings'] / (1024*1024):.1f} MB")
205
+ print(f"Manifest saved to: {dest}/dedup_manifest.json")
206
+
207
+ elif command == "expand":
208
+ if len(sys.argv) < 4:
209
+ print("Error: Missing deduped or output directory")
210
+ sys.exit(1)
211
+ deduped = sys.argv[2]
212
+ output = sys.argv[3]
213
+ expand_deduped_structure(deduped, output)
214
+
215
+ else:
216
+ print(f"Unknown command: {command}")
217
+ sys.exit(1)