quantbenchx 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
quantbenchx/report.py ADDED
@@ -0,0 +1,171 @@
1
+ """Report formatting for quantbenchx — text, rich, markdown, JSON."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List
8
+
9
+ from quantbenchx._types import ModelProfile, QualityEstimate
10
+
11
+
12
+ def report_to_dict(profile: ModelProfile, quality: QualityEstimate | None = None) -> Dict[str, Any]:
13
+ """Convert a profile and optional quality estimate to a dict."""
14
+ d = profile.to_dict()
15
+ if quality:
16
+ d["quality"] = {
17
+ "estimated_perplexity_delta": quality.estimated_perplexity_delta,
18
+ "quality_score": quality.quality_score,
19
+ "risk_level": quality.risk_level,
20
+ "sensitive_layers": quality.sensitive_layers,
21
+ "recommendations": quality.recommendations,
22
+ }
23
+ return d
24
+
25
+
26
+ def save_json(profile: ModelProfile, path: str | Path, quality: QualityEstimate | None = None) -> None:
27
+ """Save a profile report as JSON."""
28
+ Path(path).write_text(json.dumps(report_to_dict(profile, quality), indent=2, ensure_ascii=False))
29
+
30
+
31
+ def load_json(path: str | Path) -> Dict[str, Any]:
32
+ """Load a JSON report."""
33
+ return json.loads(Path(path).read_text()) # type: ignore[no-any-return]
34
+
35
+
36
+ def format_report_text(profile: ModelProfile, quality: QualityEstimate | None = None) -> str:
37
+ """Format a profile as plain text."""
38
+ lines: List[str] = []
39
+ lines.append("=" * 60)
40
+ lines.append("QUANTIZATION PROFILE")
41
+ lines.append("=" * 60)
42
+ lines.append(f" Model: {profile.name}")
43
+ lines.append(f" Format: {profile.format.value}")
44
+ lines.append(f" Parameters: {profile.total_params:,}")
45
+ lines.append(f" Size: {profile.size_gb:.2f} GB")
46
+ lines.append(f" Avg bits/weight: {profile.quant.avg_bits_per_weight:.2f}")
47
+ lines.append(f" Compression ratio: {profile.compression_ratio:.1f}x")
48
+ lines.append(f" Method: {profile.quant.method.value}")
49
+ lines.append(f" Tensors: {len(profile.tensors)}")
50
+ lines.append(f" Layers: {len(profile.layers)}")
51
+ lines.append(f" Quantized tensors: {profile.quant.n_quantized_layers}")
52
+ lines.append(f" FP tensors: {profile.quant.n_full_precision_layers}")
53
+
54
+ if profile.quant.dtype_distribution:
55
+ lines.append("")
56
+ lines.append(" Dtype Distribution:")
57
+ for dtype, frac in sorted(profile.quant.dtype_distribution.items(), key=lambda x: -x[1]):
58
+ lines.append(f" {dtype:12s} {frac:.1%}")
59
+
60
+ if quality:
61
+ lines.append("")
62
+ lines.append("-" * 60)
63
+ lines.append("QUALITY ESTIMATE")
64
+ lines.append("-" * 60)
65
+ lines.append(f" Perplexity delta: +{quality.estimated_perplexity_delta:.4f}")
66
+ lines.append(f" Quality score: {quality.quality_score:.4f}")
67
+ lines.append(f" Risk level: {quality.risk_level}")
68
+ if quality.recommendations:
69
+ lines.append("")
70
+ lines.append(" Recommendations:")
71
+ for rec in quality.recommendations:
72
+ lines.append(f" • {rec}")
73
+
74
+ return "\n".join(lines)
75
+
76
+
77
+ def format_report_rich(profile: ModelProfile, quality: QualityEstimate | None = None) -> str:
78
+ """Format a profile using rich for terminal display. Returns rendered string."""
79
+ try:
80
+ from rich.console import Console
81
+ from rich.panel import Panel
82
+ from rich.table import Table
83
+ except ImportError:
84
+ return format_report_text(profile, quality)
85
+
86
+ console = Console(record=True, width=90)
87
+
88
+ # Header panel
89
+ header = Table(show_header=False, box=None, padding=(0, 2))
90
+ header.add_column(style="bold cyan", width=22)
91
+ header.add_column()
92
+ header.add_row("Model", profile.name)
93
+ header.add_row("Format", profile.format.value)
94
+ header.add_row("Parameters", f"{profile.total_params:,}")
95
+ header.add_row("Size", f"{profile.size_gb:.2f} GB")
96
+ header.add_row("Avg bits/weight", f"{profile.quant.avg_bits_per_weight:.2f}")
97
+ header.add_row("Compression", f"{profile.compression_ratio:.1f}x vs FP32")
98
+ header.add_row("Method", profile.quant.method.value)
99
+
100
+ console.print(Panel(header, title="[bold]quantbenchx — Profile[/bold]", border_style="blue"))
101
+
102
+ # Dtype distribution
103
+ if profile.quant.dtype_distribution:
104
+ dt_table = Table(title="Dtype Distribution")
105
+ dt_table.add_column("Dtype", style="bold")
106
+ dt_table.add_column("Fraction", justify="right")
107
+ dt_table.add_column("Bar", min_width=20)
108
+ for dtype, frac in sorted(profile.quant.dtype_distribution.items(), key=lambda x: -x[1]):
109
+ bar_len = int(frac * 30)
110
+ bar = "█" * bar_len + "░" * (30 - bar_len)
111
+ dt_table.add_row(dtype, f"{frac:.1%}", f"[green]{bar}[/green]")
112
+ console.print(dt_table)
113
+
114
+ # Quality estimate
115
+ if quality:
116
+ risk_color = {"low": "green", "medium": "yellow", "high": "red", "critical": "bold red"}.get(
117
+ quality.risk_level, "white"
118
+ )
119
+ q_table = Table(show_header=False, box=None, padding=(0, 2))
120
+ q_table.add_column(style="bold cyan", width=22)
121
+ q_table.add_column()
122
+ q_table.add_row("Perplexity delta", f"+{quality.estimated_perplexity_delta:.4f}")
123
+ q_table.add_row("Quality score", f"{quality.quality_score:.4f}")
124
+ q_table.add_row("Risk level", f"[{risk_color}]{quality.risk_level.upper()}[/{risk_color}]")
125
+ console.print(Panel(q_table, title="[bold]Quality Estimate[/bold]", border_style="yellow"))
126
+
127
+ if quality.recommendations:
128
+ for rec in quality.recommendations:
129
+ console.print(f" • {rec}")
130
+
131
+ return console.export_text()
132
+
133
+
134
+ def format_markdown(profile: ModelProfile, quality: QualityEstimate | None = None) -> str:
135
+ """Format a profile as Markdown."""
136
+ lines: List[str] = []
137
+ lines.append(f"# Quantization Profile: {profile.name}")
138
+ lines.append("")
139
+ lines.append("| Property | Value |")
140
+ lines.append("|---|---|")
141
+ lines.append(f"| Format | {profile.format.value} |")
142
+ lines.append(f"| Parameters | {profile.total_params:,} |")
143
+ lines.append(f"| Size | {profile.size_gb:.2f} GB |")
144
+ lines.append(f"| Avg bits/weight | {profile.quant.avg_bits_per_weight:.2f} |")
145
+ lines.append(f"| Compression | {profile.compression_ratio:.1f}x |")
146
+ lines.append(f"| Method | {profile.quant.method.value} |")
147
+
148
+ if profile.quant.dtype_distribution:
149
+ lines.append("")
150
+ lines.append("## Dtype Distribution")
151
+ lines.append("")
152
+ lines.append("| Dtype | Fraction |")
153
+ lines.append("|---|---|")
154
+ for dtype, frac in sorted(profile.quant.dtype_distribution.items(), key=lambda x: -x[1]):
155
+ lines.append(f"| {dtype} | {frac:.1%} |")
156
+
157
+ if quality:
158
+ lines.append("")
159
+ lines.append("## Quality Estimate")
160
+ lines.append("")
161
+ lines.append(f"- **Perplexity delta**: +{quality.estimated_perplexity_delta:.4f}")
162
+ lines.append(f"- **Quality score**: {quality.quality_score:.4f}")
163
+ lines.append(f"- **Risk level**: {quality.risk_level}")
164
+ if quality.recommendations:
165
+ lines.append("")
166
+ lines.append("### Recommendations")
167
+ lines.append("")
168
+ for rec in quality.recommendations:
169
+ lines.append(f"- {rec}")
170
+
171
+ return "\n".join(lines)
@@ -0,0 +1,213 @@
1
+ Metadata-Version: 2.4
2
+ Name: quantbenchx
3
+ Version: 0.3.0
4
+ Summary: Quantization quality analyzer — pure-Python GGUF/safetensors parsing, layerwise analysis, quality prediction. Zero deps.
5
+ Project-URL: Homepage, https://github.com/stef41/quantbenchx
6
+ Project-URL: Repository, https://github.com/stef41/quantbenchx
7
+ Project-URL: Issues, https://github.com/stef41/quantbenchx/issues
8
+ Author: Zacharie B
9
+ License: Apache-2.0
10
+ Keywords: analysis,benchmark,compression,evaluation,gguf,llm,model,quality,quantization,safetensors
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.9
24
+ Provides-Extra: all
25
+ Requires-Dist: click>=8.0; extra == 'all'
26
+ Requires-Dist: rich>=13.0; extra == 'all'
27
+ Provides-Extra: cli
28
+ Requires-Dist: click>=8.0; extra == 'cli'
29
+ Requires-Dist: rich>=13.0; extra == 'cli'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # quantbenchx
33
+
34
+ [![CI](https://github.com/stef41/quantbenchxx/actions/workflows/ci.yml/badge.svg)](https://github.com/stef41/quantbenchxx/actions/workflows/ci.yml)
35
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
36
+ [![License: Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
37
+
38
+ **Quantization quality analyzer for LLMs.** Pure-Python GGUF and safetensors parsing, layerwise sensitivity analysis, quality prediction, and mixed-quantization recommendations — zero dependencies.
39
+
40
+ Point quantbenchx at any `.gguf` or `.safetensors` file and get an instant quality report: dtype distribution, estimated perplexity impact, layer sensitivity scores, and mixed-precision recommendations.
41
+
42
+ <p align="center">
43
+ <img src="assets/profile.svg" width="700" alt="quantbenchx profile report" />
44
+ </p>
45
+
46
+ ## Why quantbenchx?
47
+
48
+ | Problem | quantbenchx Solution |
49
+ |---|---|
50
+ | "Is Q4_K_M good enough for my use case?" | Estimated perplexity delta + risk level |
51
+ | No way to inspect GGUF internals without llama.cpp | Pure-Python parser — just `pip install` |
52
+ | Which layers are most sensitive to quantization? | Layerwise sensitivity scoring with position awareness |
53
+ | Choosing between Q4_K_M, Q5_K_S, Q6_K, etc. | Side-by-side format comparison with rankings |
54
+ | Mixed-precision quant is complex to configure | Automated recommendations targeting your bpw budget |
55
+
56
+ ## Installation
57
+
58
+ ```bash
59
+ pip install quantbenchx # zero dependencies
60
+ pip install quantbenchx[cli] # + click, rich for terminal UI
61
+ pip install quantbenchx[all] # everything
62
+ ```
63
+
64
+ ## Quick Start
65
+
66
+ ### 1. Profile a quantized model
67
+
68
+ ```python
69
+ from quantbenchx import profile_gguf, estimate_quality
70
+
71
+ profile = profile_gguf("Meta-Llama-3-8B-Q4_K_M.gguf")
72
+
73
+ print(f"Model: {profile.name}")
74
+ print(f"Size: {profile.size_gb:.2f} GB")
75
+ print(f"Avg bits/weight: {profile.quant.avg_bits_per_weight:.2f}")
76
+ print(f"Compression: {profile.compression_ratio:.1f}x vs FP32")
77
+
78
+ quality = estimate_quality(profile)
79
+ print(f"Risk level: {quality.risk_level}")
80
+ print(f"Est. perplexity delta: +{quality.estimated_perplexity_delta:.4f}")
81
+ ```
82
+
83
+ ### 2. Layerwise analysis
84
+
85
+ <p align="center">
86
+ <img src="assets/layerwise.svg" width="700" alt="quantbenchx layerwise analysis" />
87
+ </p>
88
+
89
+ ```python
90
+ from quantbenchx import profile_gguf, analyze_layers, layer_sensitivity
91
+
92
+ profile = profile_gguf("model.gguf")
93
+
94
+ # Get sensitivity scores
95
+ sensitivity = layer_sensitivity(profile)
96
+ for layer_name, score in sorted(sensitivity.items(), key=lambda x: -x[1])[:5]:
97
+ print(f" {layer_name}: {score:.3f}")
98
+
99
+ # Full layerwise breakdown
100
+ for row in analyze_layers(profile):
101
+ print(f"{row['name']:30s} {row['avg_bits_per_weight']:5.2f} bpw sens={row['sensitivity']:.3f}")
102
+ ```
103
+
104
+ ### 3. Compare quantization formats
105
+
106
+ ```python
107
+ from quantbenchx import profile_gguf, compare_profiles, compare_formats
108
+
109
+ q4 = profile_gguf("model-Q4_K_M.gguf")
110
+ q5 = profile_gguf("model-Q5_K_M.gguf")
111
+ q8 = profile_gguf("model-Q8_0.gguf")
112
+
113
+ # Pairwise comparison
114
+ diff = compare_profiles(q4, q8)
115
+ print(f"Size delta: {diff['size_delta_bytes'] / 1e9:.2f} GB")
116
+ print(f"BPW delta: {diff['bpw_delta']:.2f}")
117
+
118
+ # Multi-format ranking
119
+ ranking = compare_formats([q4, q5, q8])
120
+ for row in ranking["ranking"]:
121
+ print(f" #{row['rank']} {row['name']} — {row['avg_bpw']:.2f} bpw, {row['size_gb']:.2f} GB")
122
+ ```
123
+
124
+ ### 4. Mixed-quantization recommendations
125
+
126
+ ```python
127
+ from quantbenchx import profile_gguf, recommend_mixed_quant
128
+
129
+ profile = profile_gguf("model.gguf")
130
+ rec = recommend_mixed_quant(profile, target_bpw=4.5)
131
+
132
+ print(f"Target: {rec['target_bpw']} bpw → Estimated: {rec['estimated_avg_bpw']} bpw")
133
+ print(f"High precision: {rec['n_high_precision_layers']} layers ({rec['high_quant']})")
134
+ print(f"Low precision: {rec['n_low_precision_layers']} layers ({rec['low_quant']})")
135
+ ```
136
+
137
+ ### 5. Predict quality for any bpw
138
+
139
+ ```python
140
+ from quantbenchx import perplexity_delta
141
+
142
+ for bpw in [8.0, 6.0, 5.0, 4.5, 4.0, 3.5, 3.0, 2.0]:
143
+ delta = perplexity_delta(bpw)
144
+ print(f" {bpw:.1f} bpw → +{delta:.4f} perplexity")
145
+ ```
146
+
147
+ ## CLI
148
+
149
+ ```bash
150
+ # Profile a GGUF or safetensors file
151
+ quantbenchx profile model.gguf
152
+
153
+ # Markdown output
154
+ quantbenchx profile model.gguf --markdown
155
+
156
+ # Save JSON report
157
+ quantbenchx profile model.gguf -o report.json
158
+
159
+ # Compare two files
160
+ quantbenchx compare model-Q4.gguf model-Q8.gguf
161
+
162
+ # Layerwise analysis
163
+ quantbenchx layers model.gguf
164
+
165
+ # Mixed-quant recommendation
166
+ quantbenchx recommend model.gguf --target-bpw 4.5
167
+ ```
168
+
169
+ ## Supported Formats
170
+
171
+ | Format | Parser | Status |
172
+ |---|---|---|
173
+ | GGUF (v2, v3) | Pure Python — reads header only | Full support |
174
+ | safetensors | Pure Python — reads JSON header only | Full support |
175
+
176
+ ### Supported Dtypes
177
+
178
+ Q2_K, Q3_K_S, Q3_K_M, Q3_K_L, Q4_0, Q4_1, Q4_K_S, Q4_K_M, Q5_0, Q5_1, Q5_K_S, Q5_K_M, Q6_K, Q8_0, IQ1_S, IQ2_XXS, IQ3_XXS, IQ4_XS, F16, BF16, F32
179
+
180
+ ## Architecture
181
+
182
+ ```
183
+ quantbenchx/
184
+ ├── _types.py # DType, TensorInfo, LayerInfo, ModelProfile, QualityEstimate
185
+ ├── profile.py # Pure-Python GGUF & safetensors parsers
186
+ ├── layerwise.py # Layer sensitivity analysis, mixed-quant recommendations
187
+ ├── compare.py # Cross-format and pairwise comparisons
188
+ ├── predict.py # Quality estimation from bits-per-weight curves
189
+ ├── report.py # JSON/text/rich/markdown formatting
190
+ └── cli.py # Click CLI interface
191
+ ```
192
+
193
+ ## See Also
194
+
195
+ Part of the **stef41 LLM toolkit** — open-source tools for every stage of the LLM lifecycle:
196
+
197
+ | Project | What it does |
198
+ |---------|-------------|
199
+ | [tokonomics](https://github.com/stef41/tokonomix) | Token counting & cost management for LLM APIs |
200
+ | [datacrux](https://github.com/stef41/datacruxai) | Training data quality — dedup, PII, contamination |
201
+ | [castwright](https://github.com/stef41/castwright) | Synthetic instruction data generation |
202
+ | [datamix](https://github.com/stef41/datamix) | Dataset mixing & curriculum optimization |
203
+ | [toksight](https://github.com/stef41/toksight) | Tokenizer analysis & comparison |
204
+ | [trainpulse](https://github.com/stef41/trainpulse) | Training health monitoring |
205
+ | [ckpt](https://github.com/stef41/ckptkit) | Checkpoint inspection, diffing & merging |
206
+ | [infermark](https://github.com/stef41/infermark) | Inference benchmarking |
207
+ | [modeldiff](https://github.com/stef41/modeldiffx) | Behavioral regression testing |
208
+ | [vibesafe](https://github.com/stef41/vibesafex) | AI-generated code safety scanner |
209
+ | [injectionguard](https://github.com/stef41/injectionguard) | Prompt injection detection |
210
+
211
+ ## License
212
+
213
+ Apache 2.0
@@ -0,0 +1,17 @@
1
+ quantbenchx/__init__.py,sha256=-1UpDil3NfHThFcFXU1j6R3brrJMyjzxXwP1_bEL2QU,2853
2
+ quantbenchx/_types.py,sha256=DAVHmA6LGMkpHOZmJfV4y_aaDROb3fw-pRc8QwBdxXc,6752
3
+ quantbenchx/bandwidth.py,sha256=mc_c4dBk7yPttbtmHPq4ZnhqqzcMIpW4qNNzoH0h-lY,9339
4
+ quantbenchx/cli.py,sha256=eZNva1ijrtB-u4te7JlyMqNv3QA2zkPjYmFnCHbukKE,5544
5
+ quantbenchx/compare.py,sha256=j0zRCXbEJ2igK26A3aOLEdFkgWr-ttuR1cmYzpY-bz0,3508
6
+ quantbenchx/imatrix.py,sha256=mwv5E83c3_Uiz0PTfA2FXmQbrrE4_zfgVf6LThSbpOw,6216
7
+ quantbenchx/layerwise.py,sha256=_35nsdRx9WDR3IYKSUNeGdXfbrL0XmszjyEqRE36SZo,5243
8
+ quantbenchx/matrix.py,sha256=aKhUazDpfDRL54g8hGJJNkJAyIG4zGe1t2n-oGNvGvc,9958
9
+ quantbenchx/perplexity.py,sha256=wllzEPTIwB4NLNdFmHChfdmTaTQyopCd504eOsLetMM,5767
10
+ quantbenchx/predict.py,sha256=lHM_zSM34l1Dh49pp_9-FR9bU3XJzT584MPTcAy60fc,3858
11
+ quantbenchx/profile.py,sha256=Re0KLFydI_jXvgLPKa0bjX_u2XATla246nYV6dwumZA,10258
12
+ quantbenchx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ quantbenchx/recommend.py,sha256=tqVMJymXhmLHP5v7RJVdTrnF3NHrpyuSAp2HlhbbmYQ,7768
14
+ quantbenchx/report.py,sha256=RE-Lhc5YyNTJl13De5UmjpJIb_E3z9WBc-7RYgsfLFs,7371
15
+ quantbenchx-0.3.0.dist-info/METADATA,sha256=hp1NU8i2Qvir4llB6RKCeTX5-uGYWwfz_Ds3si6nxrM,7987
16
+ quantbenchx-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
17
+ quantbenchx-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any