PyPI - quantbenchx - Versions diffs - 0.3.0__py3-none-any.whl - Mend

quantbenchx 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

quantbenchx/__init__.py +132 -0
quantbenchx/_types.py +220 -0
quantbenchx/bandwidth.py +290 -0
quantbenchx/cli.py +153 -0
quantbenchx/compare.py +101 -0
quantbenchx/imatrix.py +201 -0
quantbenchx/layerwise.py +167 -0
quantbenchx/matrix.py +289 -0
quantbenchx/perplexity.py +168 -0
quantbenchx/predict.py +125 -0
quantbenchx/profile.py +301 -0
quantbenchx/py.typed +0 -0
quantbenchx/recommend.py +240 -0
quantbenchx/report.py +171 -0
quantbenchx-0.3.0.dist-info/METADATA +213 -0
quantbenchx-0.3.0.dist-info/RECORD +17 -0
quantbenchx-0.3.0.dist-info/WHEEL +4 -0

quantbenchx/report.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""Report formatting for quantbenchx — text, rich, markdown, JSON."""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict, List
+from quantbenchx._types import ModelProfile, QualityEstimate
+def report_to_dict(profile: ModelProfile, quality: QualityEstimate | None = None) -> Dict[str, Any]:
+    """Convert a profile and optional quality estimate to a dict."""
+    d = profile.to_dict()
+    if quality:
+        d["quality"] = {
+            "estimated_perplexity_delta": quality.estimated_perplexity_delta,
+            "quality_score": quality.quality_score,
+            "risk_level": quality.risk_level,
+            "sensitive_layers": quality.sensitive_layers,
+            "recommendations": quality.recommendations,
+        }
+    return d
+def save_json(profile: ModelProfile, path: str | Path, quality: QualityEstimate | None = None) -> None:
+    """Save a profile report as JSON."""
+    Path(path).write_text(json.dumps(report_to_dict(profile, quality), indent=2, ensure_ascii=False))
+def load_json(path: str | Path) -> Dict[str, Any]:
+    """Load a JSON report."""
+    return json.loads(Path(path).read_text())  # type: ignore[no-any-return]
+def format_report_text(profile: ModelProfile, quality: QualityEstimate | None = None) -> str:
+    """Format a profile as plain text."""
+    lines: List[str] = []
+    lines.append("=" * 60)
+    lines.append("QUANTIZATION PROFILE")
+    lines.append("=" * 60)
+    lines.append(f"  Model:              {profile.name}")
+    lines.append(f"  Format:             {profile.format.value}")
+    lines.append(f"  Parameters:         {profile.total_params:,}")
+    lines.append(f"  Size:               {profile.size_gb:.2f} GB")
+    lines.append(f"  Avg bits/weight:    {profile.quant.avg_bits_per_weight:.2f}")
+    lines.append(f"  Compression ratio:  {profile.compression_ratio:.1f}x")
+    lines.append(f"  Method:             {profile.quant.method.value}")
+    lines.append(f"  Tensors:            {len(profile.tensors)}")
+    lines.append(f"  Layers:             {len(profile.layers)}")
+    lines.append(f"  Quantized tensors:  {profile.quant.n_quantized_layers}")
+    lines.append(f"  FP tensors:         {profile.quant.n_full_precision_layers}")
+    if profile.quant.dtype_distribution:
+        lines.append("")
+        lines.append("  Dtype Distribution:")
+        for dtype, frac in sorted(profile.quant.dtype_distribution.items(), key=lambda x: -x[1]):
+            lines.append(f"    {dtype:12s}  {frac:.1%}")
+    if quality:
+        lines.append("")
+        lines.append("-" * 60)
+        lines.append("QUALITY ESTIMATE")
+        lines.append("-" * 60)
+        lines.append(f"  Perplexity delta:   +{quality.estimated_perplexity_delta:.4f}")
+        lines.append(f"  Quality score:      {quality.quality_score:.4f}")
+        lines.append(f"  Risk level:         {quality.risk_level}")
+        if quality.recommendations:
+            lines.append("")
+            lines.append("  Recommendations:")
+            for rec in quality.recommendations:
+                lines.append(f"    • {rec}")
+    return "\n".join(lines)
+def format_report_rich(profile: ModelProfile, quality: QualityEstimate | None = None) -> str:
+    """Format a profile using rich for terminal display. Returns rendered string."""
+    try:
+        from rich.console import Console
+        from rich.panel import Panel
+        from rich.table import Table
+    except ImportError:
+        return format_report_text(profile, quality)
+    console = Console(record=True, width=90)
+    # Header panel
+    header = Table(show_header=False, box=None, padding=(0, 2))
+    header.add_column(style="bold cyan", width=22)
+    header.add_column()
+    header.add_row("Model", profile.name)
+    header.add_row("Format", profile.format.value)
+    header.add_row("Parameters", f"{profile.total_params:,}")
+    header.add_row("Size", f"{profile.size_gb:.2f} GB")
+    header.add_row("Avg bits/weight", f"{profile.quant.avg_bits_per_weight:.2f}")
+    header.add_row("Compression", f"{profile.compression_ratio:.1f}x vs FP32")
+    header.add_row("Method", profile.quant.method.value)
+    console.print(Panel(header, title="[bold]quantbenchx — Profile[/bold]", border_style="blue"))
+    # Dtype distribution
+    if profile.quant.dtype_distribution:
+        dt_table = Table(title="Dtype Distribution")
+        dt_table.add_column("Dtype", style="bold")
+        dt_table.add_column("Fraction", justify="right")
+        dt_table.add_column("Bar", min_width=20)
+        for dtype, frac in sorted(profile.quant.dtype_distribution.items(), key=lambda x: -x[1]):
+            bar_len = int(frac * 30)
+            bar = "█" * bar_len + "░" * (30 - bar_len)
+            dt_table.add_row(dtype, f"{frac:.1%}", f"[green]{bar}[/green]")
+        console.print(dt_table)
+    # Quality estimate
+    if quality:
+        risk_color = {"low": "green", "medium": "yellow", "high": "red", "critical": "bold red"}.get(
+            quality.risk_level, "white"
+        )
+        q_table = Table(show_header=False, box=None, padding=(0, 2))
+        q_table.add_column(style="bold cyan", width=22)
+        q_table.add_column()
+        q_table.add_row("Perplexity delta", f"+{quality.estimated_perplexity_delta:.4f}")
+        q_table.add_row("Quality score", f"{quality.quality_score:.4f}")
+        q_table.add_row("Risk level", f"[{risk_color}]{quality.risk_level.upper()}[/{risk_color}]")
+        console.print(Panel(q_table, title="[bold]Quality Estimate[/bold]", border_style="yellow"))
+        if quality.recommendations:
+            for rec in quality.recommendations:
+                console.print(f"  • {rec}")
+    return console.export_text()
+def format_markdown(profile: ModelProfile, quality: QualityEstimate | None = None) -> str:
+    """Format a profile as Markdown."""
+    lines: List[str] = []
+    lines.append(f"# Quantization Profile: {profile.name}")
+    lines.append("")
+    lines.append("| Property | Value |")
+    lines.append("|---|---|")
+    lines.append(f"| Format | {profile.format.value} |")
+    lines.append(f"| Parameters | {profile.total_params:,} |")
+    lines.append(f"| Size | {profile.size_gb:.2f} GB |")
+    lines.append(f"| Avg bits/weight | {profile.quant.avg_bits_per_weight:.2f} |")
+    lines.append(f"| Compression | {profile.compression_ratio:.1f}x |")
+    lines.append(f"| Method | {profile.quant.method.value} |")
+    if profile.quant.dtype_distribution:
+        lines.append("")
+        lines.append("## Dtype Distribution")
+        lines.append("")
+        lines.append("| Dtype | Fraction |")
+        lines.append("|---|---|")
+        for dtype, frac in sorted(profile.quant.dtype_distribution.items(), key=lambda x: -x[1]):
+            lines.append(f"| {dtype} | {frac:.1%} |")
+    if quality:
+        lines.append("")
+        lines.append("## Quality Estimate")
+        lines.append("")
+        lines.append(f"- **Perplexity delta**: +{quality.estimated_perplexity_delta:.4f}")
+        lines.append(f"- **Quality score**: {quality.quality_score:.4f}")
+        lines.append(f"- **Risk level**: {quality.risk_level}")
+        if quality.recommendations:
+            lines.append("")
+            lines.append("### Recommendations")
+            lines.append("")
+            for rec in quality.recommendations:
+                lines.append(f"- {rec}")
+    return "\n".join(lines)

quantbenchx-0.3.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,213 @@
+Metadata-Version: 2.4
+Name: quantbenchx
+Version: 0.3.0
+Summary: Quantization quality analyzer — pure-Python GGUF/safetensors parsing, layerwise analysis, quality prediction. Zero deps.
+Project-URL: Homepage, https://github.com/stef41/quantbenchx
+Project-URL: Repository, https://github.com/stef41/quantbenchx
+Project-URL: Issues, https://github.com/stef41/quantbenchx/issues
+Author: Zacharie B
+License: Apache-2.0
+Keywords: analysis,benchmark,compression,evaluation,gguf,llm,model,quality,quantization,safetensors
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Typing :: Typed
+Requires-Python: >=3.9
+Provides-Extra: all
+Requires-Dist: click>=8.0; extra == 'all'
+Requires-Dist: rich>=13.0; extra == 'all'
+Provides-Extra: cli
+Requires-Dist: click>=8.0; extra == 'cli'
+Requires-Dist: rich>=13.0; extra == 'cli'
+Description-Content-Type: text/markdown
+# quantbenchx
+[![CI](https://github.com/stef41/quantbenchxx/actions/workflows/ci.yml/badge.svg)](https://github.com/stef41/quantbenchxx/actions/workflows/ci.yml)
+[![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
+[![License: Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
+**Quantization quality analyzer for LLMs.** Pure-Python GGUF and safetensors parsing, layerwise sensitivity analysis, quality prediction, and mixed-quantization recommendations — zero dependencies.
+Point quantbenchx at any `.gguf` or `.safetensors` file and get an instant quality report: dtype distribution, estimated perplexity impact, layer sensitivity scores, and mixed-precision recommendations.
+<p align="center">
+  <img src="assets/profile.svg" width="700" alt="quantbenchx profile report" />
+</p>
+## Why quantbenchx?
+| Problem | quantbenchx Solution |
+|---|---|
+| "Is Q4_K_M good enough for my use case?" | Estimated perplexity delta + risk level |
+| No way to inspect GGUF internals without llama.cpp | Pure-Python parser — just `pip install` |
+| Which layers are most sensitive to quantization? | Layerwise sensitivity scoring with position awareness |
+| Choosing between Q4_K_M, Q5_K_S, Q6_K, etc. | Side-by-side format comparison with rankings |
+| Mixed-precision quant is complex to configure | Automated recommendations targeting your bpw budget |
+## Installation
+```bash
+pip install quantbenchx          # zero dependencies
+pip install quantbenchx[cli]     # + click, rich for terminal UI
+pip install quantbenchx[all]     # everything
+```
+## Quick Start
+### 1. Profile a quantized model
+```python
+from quantbenchx import profile_gguf, estimate_quality
+profile = profile_gguf("Meta-Llama-3-8B-Q4_K_M.gguf")
+print(f"Model: {profile.name}")
+print(f"Size: {profile.size_gb:.2f} GB")
+print(f"Avg bits/weight: {profile.quant.avg_bits_per_weight:.2f}")
+print(f"Compression: {profile.compression_ratio:.1f}x vs FP32")
+quality = estimate_quality(profile)
+print(f"Risk level: {quality.risk_level}")
+print(f"Est. perplexity delta: +{quality.estimated_perplexity_delta:.4f}")
+```
+### 2. Layerwise analysis
+<p align="center">
+  <img src="assets/layerwise.svg" width="700" alt="quantbenchx layerwise analysis" />
+</p>
+```python
+from quantbenchx import profile_gguf, analyze_layers, layer_sensitivity
+profile = profile_gguf("model.gguf")
+# Get sensitivity scores
+sensitivity = layer_sensitivity(profile)
+for layer_name, score in sorted(sensitivity.items(), key=lambda x: -x[1])[:5]:
+    print(f"  {layer_name}: {score:.3f}")
+# Full layerwise breakdown
+for row in analyze_layers(profile):
+    print(f"{row['name']:30s} {row['avg_bits_per_weight']:5.2f} bpw  sens={row['sensitivity']:.3f}")
+```
+### 3. Compare quantization formats
+```python
+from quantbenchx import profile_gguf, compare_profiles, compare_formats
+q4 = profile_gguf("model-Q4_K_M.gguf")
+q5 = profile_gguf("model-Q5_K_M.gguf")
+q8 = profile_gguf("model-Q8_0.gguf")
+# Pairwise comparison
+diff = compare_profiles(q4, q8)
+print(f"Size delta: {diff['size_delta_bytes'] / 1e9:.2f} GB")
+print(f"BPW delta: {diff['bpw_delta']:.2f}")
+# Multi-format ranking
+ranking = compare_formats([q4, q5, q8])
+for row in ranking["ranking"]:
+    print(f"  #{row['rank']} {row['name']} — {row['avg_bpw']:.2f} bpw, {row['size_gb']:.2f} GB")
+```
+### 4. Mixed-quantization recommendations
+```python
+from quantbenchx import profile_gguf, recommend_mixed_quant
+profile = profile_gguf("model.gguf")
+rec = recommend_mixed_quant(profile, target_bpw=4.5)
+print(f"Target: {rec['target_bpw']} bpw → Estimated: {rec['estimated_avg_bpw']} bpw")
+print(f"High precision: {rec['n_high_precision_layers']} layers ({rec['high_quant']})")
+print(f"Low precision: {rec['n_low_precision_layers']} layers ({rec['low_quant']})")
+```
+### 5. Predict quality for any bpw
+```python
+from quantbenchx import perplexity_delta
+for bpw in [8.0, 6.0, 5.0, 4.5, 4.0, 3.5, 3.0, 2.0]:
+    delta = perplexity_delta(bpw)
+    print(f"  {bpw:.1f} bpw → +{delta:.4f} perplexity")
+```
+## CLI
+```bash
+# Profile a GGUF or safetensors file
+quantbenchx profile model.gguf
+# Markdown output
+quantbenchx profile model.gguf --markdown
+# Save JSON report
+quantbenchx profile model.gguf -o report.json
+# Compare two files
+quantbenchx compare model-Q4.gguf model-Q8.gguf
+# Layerwise analysis
+quantbenchx layers model.gguf
+# Mixed-quant recommendation
+quantbenchx recommend model.gguf --target-bpw 4.5
+```
+## Supported Formats
+| Format | Parser | Status |
+|---|---|---|
+| GGUF (v2, v3) | Pure Python — reads header only | Full support |
+| safetensors | Pure Python — reads JSON header only | Full support |
+### Supported Dtypes
+Q2_K, Q3_K_S, Q3_K_M, Q3_K_L, Q4_0, Q4_1, Q4_K_S, Q4_K_M, Q5_0, Q5_1, Q5_K_S, Q5_K_M, Q6_K, Q8_0, IQ1_S, IQ2_XXS, IQ3_XXS, IQ4_XS, F16, BF16, F32
+## Architecture
+```
+quantbenchx/
+├── _types.py        # DType, TensorInfo, LayerInfo, ModelProfile, QualityEstimate
+├── profile.py       # Pure-Python GGUF & safetensors parsers
+├── layerwise.py     # Layer sensitivity analysis, mixed-quant recommendations
+├── compare.py       # Cross-format and pairwise comparisons
+├── predict.py       # Quality estimation from bits-per-weight curves
+├── report.py        # JSON/text/rich/markdown formatting
+└── cli.py           # Click CLI interface
+```
+## See Also
+Part of the **stef41 LLM toolkit** — open-source tools for every stage of the LLM lifecycle:
+| Project | What it does |
+|---------|-------------|
+| [tokonomics](https://github.com/stef41/tokonomix) | Token counting & cost management for LLM APIs |
+| [datacrux](https://github.com/stef41/datacruxai) | Training data quality — dedup, PII, contamination |
+| [castwright](https://github.com/stef41/castwright) | Synthetic instruction data generation |
+| [datamix](https://github.com/stef41/datamix) | Dataset mixing & curriculum optimization |
+| [toksight](https://github.com/stef41/toksight) | Tokenizer analysis & comparison |
+| [trainpulse](https://github.com/stef41/trainpulse) | Training health monitoring |
+| [ckpt](https://github.com/stef41/ckptkit) | Checkpoint inspection, diffing & merging |
+| [infermark](https://github.com/stef41/infermark) | Inference benchmarking |
+| [modeldiff](https://github.com/stef41/modeldiffx) | Behavioral regression testing |
+| [vibesafe](https://github.com/stef41/vibesafex) | AI-generated code safety scanner |
+| [injectionguard](https://github.com/stef41/injectionguard) | Prompt injection detection |
+## License
+Apache 2.0

quantbenchx-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+quantbenchx/__init__.py,sha256=-1UpDil3NfHThFcFXU1j6R3brrJMyjzxXwP1_bEL2QU,2853
+quantbenchx/_types.py,sha256=DAVHmA6LGMkpHOZmJfV4y_aaDROb3fw-pRc8QwBdxXc,6752
+quantbenchx/bandwidth.py,sha256=mc_c4dBk7yPttbtmHPq4ZnhqqzcMIpW4qNNzoH0h-lY,9339
+quantbenchx/cli.py,sha256=eZNva1ijrtB-u4te7JlyMqNv3QA2zkPjYmFnCHbukKE,5544
+quantbenchx/compare.py,sha256=j0zRCXbEJ2igK26A3aOLEdFkgWr-ttuR1cmYzpY-bz0,3508
+quantbenchx/imatrix.py,sha256=mwv5E83c3_Uiz0PTfA2FXmQbrrE4_zfgVf6LThSbpOw,6216
+quantbenchx/layerwise.py,sha256=_35nsdRx9WDR3IYKSUNeGdXfbrL0XmszjyEqRE36SZo,5243
+quantbenchx/matrix.py,sha256=aKhUazDpfDRL54g8hGJJNkJAyIG4zGe1t2n-oGNvGvc,9958
+quantbenchx/perplexity.py,sha256=wllzEPTIwB4NLNdFmHChfdmTaTQyopCd504eOsLetMM,5767
+quantbenchx/predict.py,sha256=lHM_zSM34l1Dh49pp_9-FR9bU3XJzT584MPTcAy60fc,3858
+quantbenchx/profile.py,sha256=Re0KLFydI_jXvgLPKa0bjX_u2XATla246nYV6dwumZA,10258
+quantbenchx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+quantbenchx/recommend.py,sha256=tqVMJymXhmLHP5v7RJVdTrnF3NHrpyuSAp2HlhbbmYQ,7768
+quantbenchx/report.py,sha256=RE-Lhc5YyNTJl13De5UmjpJIb_E3z9WBc-7RYgsfLFs,7371
+quantbenchx-0.3.0.dist-info/METADATA,sha256=hp1NU8i2Qvir4llB6RKCeTX5-uGYWwfz_Ds3si6nxrM,7987
+quantbenchx-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+quantbenchx-0.3.0.dist-info/RECORD,,

quantbenchx-0.3.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.29.0
+Root-Is-Purelib: true
+Tag: py3-none-any