clang-tool-chain 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of clang-tool-chain might be problematic. Click here for more details.

@@ -0,0 +1,436 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Strip and optimize LLVM binaries for minimal package size.
4
+
5
+ This script removes unnecessary files from downloaded LLVM distributions
6
+ and strips debug symbols from binaries to minimize package size.
7
+ """
8
+
9
+ import argparse
10
+ import os
11
+ import shutil
12
+ import subprocess
13
+ import sys
14
+ from pathlib import Path
15
+
16
+ # Essential binaries to keep
17
+ ESSENTIAL_BINARIES = {
18
+ # Core compilation
19
+ "clang",
20
+ "clang++",
21
+ "clang-cl", # Windows only
22
+ "clang-cpp",
23
+ # Linkers
24
+ "lld",
25
+ "lld-link",
26
+ "ld.lld",
27
+ "ld64.lld",
28
+ "wasm-ld",
29
+ # Binary utilities
30
+ "llvm-ar",
31
+ "llvm-nm",
32
+ "llvm-objdump",
33
+ "llvm-objcopy",
34
+ "llvm-ranlib",
35
+ "llvm-strip",
36
+ "llvm-readelf",
37
+ "llvm-readobj",
38
+ # Additional utilities
39
+ "llvm-as",
40
+ "llvm-dis",
41
+ "clang-format",
42
+ "clang-tidy",
43
+ "llvm-symbolizer",
44
+ "llvm-config",
45
+ }
46
+
47
+ # Directories to remove completely
48
+ REMOVE_DIRS = {
49
+ "share/doc",
50
+ "share/man",
51
+ "docs",
52
+ "share/clang",
53
+ "share/opt-viewer",
54
+ "share/scan-build",
55
+ "share/scan-view",
56
+ "python_packages",
57
+ "libexec", # Helper scripts usually not needed
58
+ }
59
+
60
+ # File patterns to remove
61
+ REMOVE_PATTERNS = {
62
+ "*.a", # Static libraries
63
+ "*.lib", # Windows static libraries
64
+ "CMakeLists.txt",
65
+ "*.cmake",
66
+ }
67
+
68
+ # Directories containing files to remove by pattern
69
+ PATTERN_REMOVE_DIRS = {
70
+ "lib",
71
+ "lib64",
72
+ }
73
+
74
+
75
+ class BinaryStripper:
76
+ """Strip and optimize LLVM binary distributions."""
77
+
78
+ def __init__(
79
+ self,
80
+ source_dir: Path,
81
+ output_dir: Path,
82
+ platform: str,
83
+ keep_headers: bool = False,
84
+ strip_binaries: bool = True,
85
+ verbose: bool = False,
86
+ ):
87
+ """
88
+ Initialize the binary stripper.
89
+
90
+ Args:
91
+ source_dir: Directory containing extracted LLVM binaries
92
+ output_dir: Directory to output stripped binaries
93
+ platform: Platform identifier (e.g., "linux-x86_64")
94
+ keep_headers: Whether to keep header files
95
+ strip_binaries: Whether to strip debug symbols
96
+ verbose: Whether to print verbose output
97
+ """
98
+ self.source_dir = Path(source_dir)
99
+ self.output_dir = Path(output_dir)
100
+ self.platform = platform
101
+ self.keep_headers = keep_headers
102
+ self.strip_binaries = strip_binaries
103
+ self.verbose = verbose
104
+
105
+ # Statistics
106
+ self.original_size = 0
107
+ self.final_size = 0
108
+ self.files_removed = 0
109
+ self.files_kept = 0
110
+
111
+ def log(self, message: str) -> None:
112
+ """Print a message if verbose mode is enabled."""
113
+ if self.verbose:
114
+ print(message)
115
+
116
+ def get_dir_size(self, path: Path) -> int:
117
+ """Get total size of a directory in bytes."""
118
+ total = 0
119
+ try:
120
+ for entry in path.rglob("*"):
121
+ if entry.is_file():
122
+ total += entry.stat().st_size
123
+ except Exception as e:
124
+ self.log(f"Warning: Could not calculate size of {path}: {e}")
125
+ return total
126
+
127
+ def find_llvm_root(self) -> Path | None:
128
+ """
129
+ Find the root directory of the LLVM installation.
130
+
131
+ Returns:
132
+ Path to LLVM root, or None if not found
133
+ """
134
+ # Check if source_dir is already the root
135
+ if (self.source_dir / "bin").exists():
136
+ return self.source_dir
137
+
138
+ # Look for subdirectories that might be the root
139
+ for subdir in self.source_dir.iterdir():
140
+ if subdir.is_dir() and (subdir / "bin").exists():
141
+ return subdir
142
+
143
+ return None
144
+
145
+ def should_keep_binary(self, binary_name: str) -> bool:
146
+ """
147
+ Check if a binary should be kept.
148
+
149
+ Args:
150
+ binary_name: Name of the binary (without extension)
151
+
152
+ Returns:
153
+ True if binary should be kept, False otherwise
154
+ """
155
+ # Remove common extensions
156
+ name = binary_name
157
+ for ext in [".exe", ".dll", ".so", ".dylib"]:
158
+ if name.endswith(ext):
159
+ name = name[: -len(ext)]
160
+ break
161
+
162
+ return name in ESSENTIAL_BINARIES
163
+
164
+ def copy_essential_files(self, src_root: Path, dst_root: Path) -> None:
165
+ """
166
+ Copy only essential files from source to destination.
167
+
168
+ Args:
169
+ src_root: Source LLVM root directory
170
+ dst_root: Destination directory
171
+ """
172
+ dst_root.mkdir(parents=True, exist_ok=True)
173
+
174
+ # Copy bin directory (filtered)
175
+ src_bin = src_root / "bin"
176
+ if src_bin.exists():
177
+ dst_bin = dst_root / "bin"
178
+ dst_bin.mkdir(parents=True, exist_ok=True)
179
+
180
+ for binary in src_bin.iterdir():
181
+ if binary.is_file() and self.should_keep_binary(binary.name):
182
+ shutil.copy2(binary, dst_bin / binary.name)
183
+ self.files_kept += 1
184
+ self.log(f"Keeping binary: {binary.name}")
185
+ else:
186
+ self.files_removed += 1
187
+ self.log(f"Removing binary: {binary.name}")
188
+
189
+ # Copy lib directory (filtered - keep only runtime libraries)
190
+ for lib_dir_name in ["lib", "lib64"]:
191
+ src_lib = src_root / lib_dir_name
192
+ if not src_lib.exists():
193
+ continue
194
+
195
+ dst_lib = dst_root / lib_dir_name
196
+ dst_lib.mkdir(parents=True, exist_ok=True)
197
+
198
+ for item in src_lib.iterdir():
199
+ # Keep clang runtime directory
200
+ if item.is_dir() and item.name == "clang":
201
+ dst_clang = dst_lib / "clang"
202
+ shutil.copytree(item, dst_clang, dirs_exist_ok=True)
203
+ self.files_kept += 1
204
+ self.log(f"Keeping runtime: {item.name}")
205
+ # Keep dynamic libraries (.so, .dll, .dylib)
206
+ elif item.is_file():
207
+ if any(item.name.endswith(ext) for ext in [".so", ".dll", ".dylib"]):
208
+ # Check if it's a versioned .so file
209
+ if ".so." in item.name or item.suffix in [".so", ".dll", ".dylib"]:
210
+ shutil.copy2(item, dst_lib / item.name)
211
+ self.files_kept += 1
212
+ self.log(f"Keeping library: {item.name}")
213
+ # Remove static libraries
214
+ elif item.suffix in [".a", ".lib"]:
215
+ self.files_removed += 1
216
+ self.log(f"Removing static library: {item.name}")
217
+ # Keep CMake and other config files if small
218
+ elif item.suffix in [".cmake"] or "LLVMConfig" in item.name:
219
+ self.files_removed += 1
220
+ self.log(f"Removing config file: {item.name}")
221
+ else:
222
+ # Keep other files (might be needed)
223
+ shutil.copy2(item, dst_lib / item.name)
224
+ self.files_kept += 1
225
+
226
+ # Copy include directory only if requested
227
+ if self.keep_headers:
228
+ src_include = src_root / "include"
229
+ if src_include.exists():
230
+ dst_include = dst_root / "include"
231
+ shutil.copytree(src_include, dst_include, dirs_exist_ok=True)
232
+ self.log("Keeping include directory")
233
+ else:
234
+ self.log("Removing include directory")
235
+
236
+ # Copy license and readme files
237
+ for pattern in ["LICENSE*", "README*", "NOTICE*"]:
238
+ for item in src_root.glob(pattern):
239
+ if item.is_file():
240
+ shutil.copy2(item, dst_root / item.name)
241
+ self.log(f"Keeping license file: {item.name}")
242
+
243
+ def strip_binary(self, binary_path: Path) -> bool:
244
+ """
245
+ Strip debug symbols from a binary.
246
+
247
+ Args:
248
+ binary_path: Path to the binary to strip
249
+
250
+ Returns:
251
+ True if stripping was successful, False otherwise
252
+ """
253
+ if not self.strip_binaries:
254
+ return True
255
+
256
+ try:
257
+ # Determine strip command based on platform
258
+ if "win" in self.platform:
259
+ # On Windows, try to find llvm-strip in the output
260
+ llvm_strip = self.output_dir / "bin" / "llvm-strip.exe"
261
+ if not llvm_strip.exists():
262
+ self.log(f"Skipping strip for {binary_path.name}: llvm-strip not found")
263
+ return False
264
+ strip_cmd = [str(llvm_strip), "--strip-all", str(binary_path)]
265
+ else:
266
+ # On Unix, use llvm-strip from the output
267
+ llvm_strip = self.output_dir / "bin" / "llvm-strip"
268
+ if not llvm_strip.exists():
269
+ # Fallback to system strip
270
+ strip_cmd = ["strip", "--strip-all", str(binary_path)]
271
+ else:
272
+ strip_cmd = [str(llvm_strip), "--strip-all", str(binary_path)]
273
+
274
+ # Get original size
275
+ original_size = binary_path.stat().st_size
276
+
277
+ # Run strip command
278
+ result = subprocess.run(strip_cmd, capture_output=True, text=True)
279
+
280
+ if result.returncode == 0:
281
+ new_size = binary_path.stat().st_size
282
+ saved = original_size - new_size
283
+ saved_pct = (saved / original_size * 100) if original_size > 0 else 0
284
+ self.log(
285
+ f"Stripped {binary_path.name}: "
286
+ f"{original_size/1024/1024:.1f}MB -> {new_size/1024/1024:.1f}MB "
287
+ f"(saved {saved_pct:.1f}%)"
288
+ )
289
+ return True
290
+ else:
291
+ self.log(f"Failed to strip {binary_path.name}: {result.stderr}")
292
+ return False
293
+
294
+ except Exception as e:
295
+ self.log(f"Error stripping {binary_path.name}: {e}")
296
+ return False
297
+
298
+ def strip_all_binaries(self) -> None:
299
+ """Strip debug symbols from all binaries in output directory."""
300
+ if not self.strip_binaries:
301
+ print("Skipping binary stripping (disabled)")
302
+ return
303
+
304
+ print("Stripping debug symbols from binaries...")
305
+
306
+ bin_dir = self.output_dir / "bin"
307
+ if not bin_dir.exists():
308
+ print("Warning: No bin directory found")
309
+ return
310
+
311
+ # Get list of binaries to strip
312
+ binaries = []
313
+ for binary in bin_dir.iterdir():
314
+ if binary.is_file():
315
+ # Check if file is executable or library
316
+ if "win" in self.platform:
317
+ if binary.suffix in [".exe", ".dll"]:
318
+ binaries.append(binary)
319
+ else:
320
+ # On Unix, check if file has executable bit
321
+ if os.access(binary, os.X_OK) or binary.suffix in [".so", ".dylib"]:
322
+ binaries.append(binary)
323
+
324
+ print(f"Found {len(binaries)} binaries to strip")
325
+
326
+ # Strip each binary
327
+ success_count = 0
328
+ for binary in binaries:
329
+ if self.strip_binary(binary):
330
+ success_count += 1
331
+
332
+ print(f"Successfully stripped {success_count}/{len(binaries)} binaries")
333
+
334
+ def process(self) -> bool:
335
+ """
336
+ Process the LLVM distribution: copy essential files and strip binaries.
337
+
338
+ Returns:
339
+ True if processing was successful, False otherwise
340
+ """
341
+ print(f"Processing {self.platform}...")
342
+
343
+ # Find LLVM root
344
+ llvm_root = self.find_llvm_root()
345
+ if not llvm_root:
346
+ print(f"Error: Could not find LLVM root in {self.source_dir}")
347
+ return False
348
+
349
+ print(f"Found LLVM root: {llvm_root}")
350
+
351
+ # Calculate original size
352
+ print("Calculating original size...")
353
+ self.original_size = self.get_dir_size(llvm_root)
354
+ print(f"Original size: {self.original_size / 1024 / 1024:.1f} MB")
355
+
356
+ # Copy essential files
357
+ print("Copying essential files...")
358
+ self.copy_essential_files(llvm_root, self.output_dir)
359
+
360
+ # Strip binaries
361
+ if self.strip_binaries:
362
+ self.strip_all_binaries()
363
+
364
+ # Calculate final size
365
+ print("Calculating final size...")
366
+ self.final_size = self.get_dir_size(self.output_dir)
367
+ print(f"Final size: {self.final_size / 1024 / 1024:.1f} MB")
368
+
369
+ # Print statistics
370
+ saved = self.original_size - self.final_size
371
+ saved_pct = (saved / self.original_size * 100) if self.original_size > 0 else 0
372
+
373
+ print(f"\n{'='*60}")
374
+ print("Statistics")
375
+ print(f"{'='*60}")
376
+ print(f"Original size: {self.original_size / 1024 / 1024:>10.1f} MB")
377
+ print(f"Final size: {self.final_size / 1024 / 1024:>10.1f} MB")
378
+ print(f"Saved: {saved / 1024 / 1024:>10.1f} MB ({saved_pct:.1f}%)")
379
+ print(f"Files kept: {self.files_kept:>10}")
380
+ print(f"Files removed: {self.files_removed:>10}")
381
+ print(f"{'='*60}\n")
382
+
383
+ return True
384
+
385
+
386
+ def main() -> None:
387
+ """Main entry point for the strip script."""
388
+ parser = argparse.ArgumentParser(description="Strip and optimize LLVM binaries for minimal package size")
389
+ parser.add_argument("source_dir", help="Directory containing extracted LLVM binaries")
390
+ parser.add_argument("output_dir", help="Directory to output stripped binaries")
391
+ parser.add_argument(
392
+ "--platform",
393
+ required=True,
394
+ choices=["win-x86_64", "linux-x86_64", "linux-aarch64", "darwin-x86_64", "darwin-arm64"],
395
+ help="Platform identifier",
396
+ )
397
+ parser.add_argument(
398
+ "--keep-headers",
399
+ action="store_true",
400
+ help="Keep header files (increases size significantly)",
401
+ )
402
+ parser.add_argument(
403
+ "--no-strip",
404
+ action="store_true",
405
+ help="Don't strip debug symbols from binaries",
406
+ )
407
+ parser.add_argument(
408
+ "--verbose",
409
+ "-v",
410
+ action="store_true",
411
+ help="Print verbose output",
412
+ )
413
+
414
+ args = parser.parse_args()
415
+
416
+ # Create stripper and process
417
+ stripper = BinaryStripper(
418
+ source_dir=args.source_dir,
419
+ output_dir=args.output_dir,
420
+ platform=args.platform,
421
+ keep_headers=args.keep_headers,
422
+ strip_binaries=not args.no_strip,
423
+ verbose=args.verbose,
424
+ )
425
+
426
+ success = stripper.process()
427
+
428
+ if not success:
429
+ print("\nError: Failed to process binaries")
430
+ sys.exit(1)
431
+
432
+ print("\n✓ Successfully processed binaries")
433
+
434
+
435
+ if __name__ == "__main__":
436
+ main()
@@ -0,0 +1,259 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test various compression methods to find the smallest archive size.
4
+
5
+ Tests:
6
+ - gzip (levels 1-9)
7
+ - bzip2 (levels 1-9)
8
+ - xz (levels 0-9, plus extreme mode)
9
+ - zstd (levels 1-22)
10
+ """
11
+
12
+ import os
13
+ import subprocess
14
+ import sys
15
+ import tarfile
16
+ import time
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ try:
21
+ import zstandard as zstd
22
+ except ImportError:
23
+ print("Warning: zstandard module not available")
24
+ zstd = None
25
+
26
+
27
+ def format_size(bytes_size: int) -> str:
28
+ """Format bytes as human-readable string."""
29
+ mb = bytes_size / (1024 * 1024)
30
+ return f"{mb:.2f} MB"
31
+
32
+
33
+ def format_time(seconds: float) -> str:
34
+ """Format seconds as human-readable string."""
35
+ if seconds < 60:
36
+ return f"{seconds:.1f}s"
37
+ minutes = int(seconds // 60)
38
+ secs = seconds % 60
39
+ return f"{minutes}m {secs:.1f}s"
40
+
41
+
42
+ def test_gzip(source_dir: str, output_base: str, levels: list[int] | None = None) -> list[dict[str, Any]]:
43
+ """Test gzip compression at various levels."""
44
+ if levels is None:
45
+ levels = [1, 6, 9] # Fast, default, max
46
+
47
+ results = []
48
+ source_path = Path(source_dir)
49
+
50
+ for level in levels:
51
+ output = f"{output_base}_gzip{level}.tar.gz"
52
+ print(f"Testing gzip level {level}...", end=" ", flush=True)
53
+
54
+ start = time.time()
55
+ cmd = f'tar -czf "{output}" -C "{source_path.parent}" "{source_path.name}"'
56
+ env = {"GZIP": f"-{level}"}
57
+ subprocess.run(cmd, shell=True, env={**os.environ, **env}, check=True)
58
+ elapsed = time.time() - start
59
+
60
+ size = Path(output).stat().st_size
61
+ print(f"{format_size(size)} in {format_time(elapsed)}")
62
+
63
+ results.append({"method": f"gzip-{level}", "file": output, "size": size, "time": elapsed})
64
+
65
+ return results
66
+
67
+
68
+ def test_bzip2(source_dir: str, output_base: str, levels: list[int] | None = None) -> list[dict[str, Any]]:
69
+ """Test bzip2 compression at various levels."""
70
+ if levels is None:
71
+ levels = [1, 6, 9] # Fast, default, max
72
+
73
+ results = []
74
+ source_path = Path(source_dir)
75
+
76
+ for level in levels:
77
+ output = f"{output_base}_bzip2_{level}.tar.bz2"
78
+ print(f"Testing bzip2 level {level}...", end=" ", flush=True)
79
+
80
+ start = time.time()
81
+ cmd = f'tar -cjf "{output}" -C "{source_path.parent}" "{source_path.name}"'
82
+ env = {"BZIP2": f"-{level}"}
83
+ subprocess.run(cmd, shell=True, env={**os.environ, **env}, check=True)
84
+ elapsed = time.time() - start
85
+
86
+ size = Path(output).stat().st_size
87
+ print(f"{format_size(size)} in {format_time(elapsed)}")
88
+
89
+ results.append({"method": f"bzip2-{level}", "file": output, "size": size, "time": elapsed})
90
+
91
+ return results
92
+
93
+
94
+ def test_xz(
95
+ source_dir: str, output_base: str, levels: list[int] | None = None, test_extreme: bool = True
96
+ ) -> list[dict[str, Any]]:
97
+ """Test xz compression at various levels."""
98
+ if levels is None:
99
+ levels = [0, 6, 9] # Fast, default, max
100
+
101
+ results = []
102
+ source_path = Path(source_dir)
103
+
104
+ for level in levels:
105
+ output = f"{output_base}_xz{level}.tar.xz"
106
+ print(f"Testing xz level {level}...", end=" ", flush=True)
107
+
108
+ start = time.time()
109
+ cmd = f'tar -cJf "{output}" -C "{source_path.parent}" "{source_path.name}"'
110
+ env = {"XZ_OPT": f"-{level}"}
111
+ subprocess.run(cmd, shell=True, env={**os.environ, **env}, check=True)
112
+ elapsed = time.time() - start
113
+
114
+ size = Path(output).stat().st_size
115
+ print(f"{format_size(size)} in {format_time(elapsed)}")
116
+
117
+ results.append({"method": f"xz-{level}", "file": output, "size": size, "time": elapsed})
118
+
119
+ # Test extreme mode
120
+ if test_extreme:
121
+ for level in [9]: # Only test extreme on max level
122
+ output = f"{output_base}_xz{level}e.tar.xz"
123
+ print(f"Testing xz level {level} --extreme...", end=" ", flush=True)
124
+
125
+ start = time.time()
126
+ cmd = f'tar -cJf "{output}" -C "{source_path.parent}" "{source_path.name}"'
127
+ env = {"XZ_OPT": f"-{level}e"}
128
+ subprocess.run(cmd, shell=True, env={**os.environ, **env}, check=True)
129
+ elapsed = time.time() - start
130
+
131
+ size = Path(output).stat().st_size
132
+ print(f"{format_size(size)} in {format_time(elapsed)}")
133
+
134
+ results.append({"method": f"xz-{level}e", "file": output, "size": size, "time": elapsed})
135
+
136
+ return results
137
+
138
+
139
+ def test_zstd_python(source_dir: str, output_base: str, levels: list[int] | None = None) -> list[dict[str, Any]]:
140
+ """Test zstd compression using Python library."""
141
+ if zstd is None:
142
+ print("Skipping zstd tests (module not available)")
143
+ return []
144
+
145
+ if levels is None:
146
+ levels = [1, 3, 10, 19, 22] # Fast, default, high, very high, ultra
147
+
148
+ results = []
149
+ source_path = Path(source_dir)
150
+
151
+ for level in levels:
152
+ output = f"{output_base}_zstd{level}.tar.zst"
153
+ print(f"Testing zstd level {level}...", end=" ", flush=True)
154
+
155
+ start = time.time()
156
+
157
+ # Create tar in memory, then compress with zstd
158
+ # Create tar data
159
+ import io
160
+
161
+ tar_buffer = io.BytesIO()
162
+ with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
163
+ tar.add(source_path, arcname=source_path.name)
164
+ tar_data = tar_buffer.getvalue()
165
+
166
+ # Compress with zstd
167
+ cctx = zstd.ZstdCompressor(level=level)
168
+ compressed = cctx.compress(tar_data)
169
+
170
+ # Write to file
171
+ with open(output, "wb") as f:
172
+ f.write(compressed)
173
+
174
+ elapsed = time.time() - start
175
+ size = len(compressed)
176
+ print(f"{format_size(size)} in {format_time(elapsed)}")
177
+
178
+ results.append({"method": f"zstd-{level}", "file": output, "size": size, "time": elapsed})
179
+
180
+ return results
181
+
182
+
183
+ def print_results_table(all_results: list[dict[str, Any]]) -> None:
184
+ """Print formatted results table."""
185
+ print("\n" + "=" * 80)
186
+ print("COMPRESSION COMPARISON RESULTS")
187
+ print("=" * 80)
188
+ print()
189
+ print(f"{'Method':<15} {'Size':<12} {'Time':<10} {'vs Best':<12}")
190
+ print("-" * 80)
191
+
192
+ # Sort by size
193
+ sorted_results = sorted(all_results, key=lambda x: x["size"])
194
+ best_size = sorted_results[0]["size"]
195
+
196
+ for result in sorted_results:
197
+ size_str = format_size(result["size"])
198
+ time_str = format_time(result["time"])
199
+ percent_vs_best = (result["size"] / best_size - 1) * 100
200
+ vs_best = f"+{percent_vs_best:.1f}%" if percent_vs_best > 0 else "BEST"
201
+
202
+ marker = " ⭐" if result["size"] == best_size else ""
203
+ print(f"{result['method']:<15} {size_str:<12} {time_str:<10} {vs_best:<12}{marker}")
204
+
205
+ print()
206
+ print(f"Best compression: {sorted_results[0]['method']} - {format_size(sorted_results[0]['size'])}")
207
+ print(f"Worst compression: {sorted_results[-1]['method']} - {format_size(sorted_results[-1]['size'])}")
208
+ print(f"Difference: {format_size(sorted_results[-1]['size'] - sorted_results[0]['size'])}")
209
+
210
+
211
+ def main() -> None:
212
+ if len(sys.argv) < 2:
213
+ print("Usage: python test_compression.py <directory_to_compress> [output_prefix]")
214
+ sys.exit(1)
215
+
216
+ source_dir = sys.argv[1]
217
+ output_base = sys.argv[2] if len(sys.argv) > 2 else "compressed"
218
+
219
+ if not Path(source_dir).exists():
220
+ print(f"Error: Directory '{source_dir}' does not exist")
221
+ sys.exit(1)
222
+
223
+ print(f"Testing compression methods on: {source_dir}")
224
+ print(f"Output prefix: {output_base}")
225
+ print()
226
+
227
+ all_results = []
228
+
229
+ # Test gzip
230
+ print("=" * 80)
231
+ print("GZIP COMPRESSION")
232
+ print("=" * 80)
233
+ all_results.extend(test_gzip(source_dir, output_base, levels=[1, 6, 9]))
234
+
235
+ # Test bzip2
236
+ print("\n" + "=" * 80)
237
+ print("BZIP2 COMPRESSION")
238
+ print("=" * 80)
239
+ all_results.extend(test_bzip2(source_dir, output_base, levels=[1, 6, 9]))
240
+
241
+ # Test xz
242
+ print("\n" + "=" * 80)
243
+ print("XZ COMPRESSION")
244
+ print("=" * 80)
245
+ all_results.extend(test_xz(source_dir, output_base, levels=[0, 6, 9], test_extreme=True))
246
+
247
+ # Test zstd
248
+ if zstd is not None:
249
+ print("\n" + "=" * 80)
250
+ print("ZSTD COMPRESSION")
251
+ print("=" * 80)
252
+ all_results.extend(test_zstd_python(source_dir, output_base, levels=[1, 3, 10, 15, 19, 22]))
253
+
254
+ # Print final results
255
+ print_results_table(all_results)
256
+
257
+
258
+ if __name__ == "__main__":
259
+ main()