mlx-model-doctor 0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlx_model_doctor-0/.gitignore +21 -0
- mlx_model_doctor-0/CHANGELOG.md +204 -0
- mlx_model_doctor-0/EXAMPLES.md +484 -0
- mlx_model_doctor-0/LICENSE +201 -0
- mlx_model_doctor-0/NOTICE +8 -0
- mlx_model_doctor-0/PKG-INFO +178 -0
- mlx_model_doctor-0/README.md +146 -0
- mlx_model_doctor-0/ROADMAP.md +91 -0
- mlx_model_doctor-0/pyproject.toml +136 -0
- mlx_model_doctor-0/src/mlx_model_doctor/__init__.py +26 -0
- mlx_model_doctor-0/src/mlx_model_doctor/_version.py +24 -0
- mlx_model_doctor-0/src/mlx_model_doctor/api.py +81 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/__init__.py +21 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/base.py +16 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/chat_template.py +168 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/compat.py +97 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/config.py +137 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/files.py +48 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/generation_config.py +94 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/memory.py +268 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/quantization.py +482 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/safetensors.py +353 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/smoke.py +196 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/tokenizer.py +121 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/vlm.py +130 -0
- mlx_model_doctor-0/src/mlx_model_doctor/checks/weights.py +129 -0
- mlx_model_doctor-0/src/mlx_model_doctor/cli.py +322 -0
- mlx_model_doctor-0/src/mlx_model_doctor/compat.py +84 -0
- mlx_model_doctor-0/src/mlx_model_doctor/context.py +116 -0
- mlx_model_doctor-0/src/mlx_model_doctor/environment.py +76 -0
- mlx_model_doctor-0/src/mlx_model_doctor/errors.py +47 -0
- mlx_model_doctor-0/src/mlx_model_doctor/exit_codes.py +30 -0
- mlx_model_doctor-0/src/mlx_model_doctor/memory.py +137 -0
- mlx_model_doctor-0/src/mlx_model_doctor/plugins/__init__.py +22 -0
- mlx_model_doctor-0/src/mlx_model_doctor/plugins/base.py +23 -0
- mlx_model_doctor-0/src/mlx_model_doctor/plugins/text.py +71 -0
- mlx_model_doctor-0/src/mlx_model_doctor/py.typed +0 -0
- mlx_model_doctor-0/src/mlx_model_doctor/report.py +213 -0
- mlx_model_doctor-0/src/mlx_model_doctor/runners/__init__.py +6 -0
- mlx_model_doctor-0/src/mlx_model_doctor/runners/core.py +34 -0
- mlx_model_doctor-0/src/mlx_model_doctor/runners/smoke.py +137 -0
- mlx_model_doctor-0/src/mlx_model_doctor/runners/static.py +13 -0
- mlx_model_doctor-0/src/mlx_model_doctor/safetensors_header.py +236 -0
- mlx_model_doctor-0/src/mlx_model_doctor/sampling.py +348 -0
- mlx_model_doctor-0/src/mlx_model_doctor/targets.py +350 -0
- mlx_model_doctor-0/tests/__init__.py +1 -0
- mlx_model_doctor-0/tests/conftest.py +43 -0
- mlx_model_doctor-0/tests/fakes.py +67 -0
- mlx_model_doctor-0/tests/live/README.md +4 -0
- mlx_model_doctor-0/tests/live/known-broken.toml +4 -0
- mlx_model_doctor-0/tests/live/known-good.toml +5 -0
- mlx_model_doctor-0/tests/test_api.py +412 -0
- mlx_model_doctor-0/tests/test_checks_chat_template.py +121 -0
- mlx_model_doctor-0/tests/test_checks_compat.py +147 -0
- mlx_model_doctor-0/tests/test_checks_config.py +158 -0
- mlx_model_doctor-0/tests/test_checks_files.py +39 -0
- mlx_model_doctor-0/tests/test_checks_generation_config.py +77 -0
- mlx_model_doctor-0/tests/test_checks_memory.py +312 -0
- mlx_model_doctor-0/tests/test_checks_quantization.py +634 -0
- mlx_model_doctor-0/tests/test_checks_safetensors.py +350 -0
- mlx_model_doctor-0/tests/test_checks_smoke.py +211 -0
- mlx_model_doctor-0/tests/test_checks_tokenizer.py +85 -0
- mlx_model_doctor-0/tests/test_checks_vlm.py +147 -0
- mlx_model_doctor-0/tests/test_checks_weights.py +135 -0
- mlx_model_doctor-0/tests/test_cli.py +655 -0
- mlx_model_doctor-0/tests/test_collection_gating.py +14 -0
- mlx_model_doctor-0/tests/test_compat.py +87 -0
- mlx_model_doctor-0/tests/test_context.py +266 -0
- mlx_model_doctor-0/tests/test_environment.py +74 -0
- mlx_model_doctor-0/tests/test_exit_codes.py +55 -0
- mlx_model_doctor-0/tests/test_fakes.py +23 -0
- mlx_model_doctor-0/tests/test_hf_target.py +208 -0
- mlx_model_doctor-0/tests/test_integration_assets.py +34 -0
- mlx_model_doctor-0/tests/test_live_models.py +505 -0
- mlx_model_doctor-0/tests/test_memory.py +35 -0
- mlx_model_doctor-0/tests/test_memory_caps.py +74 -0
- mlx_model_doctor-0/tests/test_package_surface.py +23 -0
- mlx_model_doctor-0/tests/test_packaging.py +100 -0
- mlx_model_doctor-0/tests/test_report.py +288 -0
- mlx_model_doctor-0/tests/test_runners.py +82 -0
- mlx_model_doctor-0/tests/test_runners_smoke.py +229 -0
- mlx_model_doctor-0/tests/test_safetensors_header.py +184 -0
- mlx_model_doctor-0/tests/test_sampling.py +31 -0
- mlx_model_doctor-0/tests/test_targets.py +176 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
.codegraph/
|
|
2
|
+
.hypothesis/
|
|
3
|
+
|
|
4
|
+
# Local Claude Code agent guidance — not shipped in the repo (mac/ convention).
|
|
5
|
+
CLAUDE.md
|
|
6
|
+
|
|
7
|
+
# Workspace working-state must never live inside the repo (mac/ convention) —
|
|
8
|
+
# specs/plans/reviews and the granular backlog live at the workspace root.
|
|
9
|
+
docs/superpowers/
|
|
10
|
+
docs/backlog/
|
|
11
|
+
|
|
12
|
+
.coverage
|
|
13
|
+
.mypy_cache/
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.ruff_cache/
|
|
16
|
+
.venv/
|
|
17
|
+
__pycache__/
|
|
18
|
+
build/
|
|
19
|
+
dist/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
src/mlx_model_doctor/_version.py
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.5.1] — 2026-06-18
|
|
9
|
+
|
|
10
|
+
Dependency housekeeping. Nothing about the checks or the report output changed;
|
|
11
|
+
this release only adjusts what gets installed and which Python versions are tested.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- The `huggingface-hub` floor is now `>=1.0`. The tool is built and tested against
|
|
15
|
+
the 1.x line, so the old `>=0.24` floor described a setup that was never tested.
|
|
16
|
+
|
|
17
|
+
### Removed
|
|
18
|
+
- `safetensors` is no longer a runtime dependency. The validator reads the
|
|
19
|
+
safetensors header straight from the file bytes and gets Hugging Face metadata
|
|
20
|
+
through `huggingface-hub`, so it never imported the `safetensors` package.
|
|
21
|
+
Installs are a little lighter. If you have it anyway (for example via the
|
|
22
|
+
`[mlx-lm]` extra), the `version` command still reports it.
|
|
23
|
+
|
|
24
|
+
### Added
|
|
25
|
+
- Python 3.14 is now tested in CI and listed in the package classifiers.
|
|
26
|
+
|
|
27
|
+
## [0.5.0] — 2026-06-15
|
|
28
|
+
|
|
29
|
+
The integration on-ramp: run the validator in other people's CI and pre-commit,
|
|
30
|
+
not just by hand.
|
|
31
|
+
|
|
32
|
+
### Added
|
|
33
|
+
- GitHub Action (`IonDen/mlx-model-doctor@v0`): a composite action that runs the
|
|
34
|
+
static checks on `ubuntu-latest` (no weights, no GPU), writes the report to the
|
|
35
|
+
job summary, sets `pass` / `warn` / `fail` / `skip` / `exit-code` /
|
|
36
|
+
`schema-version` step outputs, and fails the job under your fail policy. Inputs
|
|
37
|
+
mirror the CLI (`source`, `target`, `fail-on`, `max-memory`, `context-length`,
|
|
38
|
+
`skip-weights`, `version`); pin the installed release with `version: "==0.5.0"`.
|
|
39
|
+
- pre-commit hook (`id: mlx-model-doctor`): runs `check local` on a model
|
|
40
|
+
directory you keep in git, with an overridable `args` for the path.
|
|
41
|
+
- `--format github`: emits GitHub Actions annotations — one `::error` or
|
|
42
|
+
`::warning` per failing or warning check, plus a `::notice` summary. Inside a
|
|
43
|
+
workflow it also appends the Markdown report to `$GITHUB_STEP_SUMMARY` and the
|
|
44
|
+
counts to `$GITHUB_OUTPUT`.
|
|
45
|
+
- A documented output contract: `--format json` carries a `schema_version`
|
|
46
|
+
(`1.0`), the `summary` counts, and a `results` array of frozen check records;
|
|
47
|
+
the exit codes (`0` pass, `1` failures, `2` tool error or zero checks) are
|
|
48
|
+
fixed. See the README "Output contract" section.
|
|
49
|
+
|
|
50
|
+
## [0.4.3] — 2026-06-14
|
|
51
|
+
|
|
52
|
+
### Fixed
|
|
53
|
+
- The memory estimate now accounts for mixed-precision quantization. A model can
|
|
54
|
+
give individual layers their own bit width, such as 4-bit experts alongside
|
|
55
|
+
8-bit dense, router, and head layers. The estimate had applied the model-level
|
|
56
|
+
bit width to every weight, so it underreported the memory the model needs.
|
|
57
|
+
It now takes the weight figure from the measured weight-file sizes, which
|
|
58
|
+
already reflect each layer's precision, and adds the context-length KV-cache
|
|
59
|
+
term. If the file sizes can't all be read, it reports the estimate as
|
|
60
|
+
unverified rather than a number that is too low, so the optional `--smoke`
|
|
61
|
+
preflight no longer lets through a load that won't fit. Single-precision models
|
|
62
|
+
are unaffected.
|
|
63
|
+
- The source distribution no longer bundles local working-tree files. The sdist
|
|
64
|
+
is built from an explicit list of what belongs in it (the package, the tests,
|
|
65
|
+
and the README, license, and changelog files), so a local build can't pull in
|
|
66
|
+
editor or tool state. The published wheel was already limited to the package.
|
|
67
|
+
|
|
68
|
+
## [0.4.2] — 2026-06-13
|
|
69
|
+
|
|
70
|
+
### Fixed
|
|
71
|
+
- The quantization-mode check (`text/quantization.mode`) now validates every
|
|
72
|
+
layer, not just the model-level default. An MLX `quantization` block can give
|
|
73
|
+
individual layers their own `mode`, `bits`, and `group_size` — a mixed-precision
|
|
74
|
+
model often pairs 4-bit experts with 8-bit dense, router, and gate layers. The
|
|
75
|
+
check had read only the top-level values, so a broken per-layer entry slipped
|
|
76
|
+
through unnoticed. It now resolves each layer's own values and checks them
|
|
77
|
+
against the MLX table: an unknown per-layer mode fails, and an off-table or
|
|
78
|
+
otherwise invalid value warns. Valid mixed-precision models still pass. This is
|
|
79
|
+
the companion to the v0.4.1 shape-check fix.
|
|
80
|
+
|
|
81
|
+
## [0.4.1] — 2026-06-12
|
|
82
|
+
|
|
83
|
+
### Fixed
|
|
84
|
+
- The quantized-shape check (`text/quantization.shape`) no longer reports a
|
|
85
|
+
load-blocking failure for valid mixed-precision models. An MLX `quantization`
|
|
86
|
+
block can give individual layers their own `bits` and `group_size`; a common
|
|
87
|
+
pattern is 4-bit experts alongside 8-bit dense, router, and gate layers. The
|
|
88
|
+
check had applied the model-level values to every layer, so a model like
|
|
89
|
+
`mlx-community/gpt-oss-20b-MXFP4-Q8` or an nvfp4 mixture-of-experts repository
|
|
90
|
+
failed even though it loads fine. It now reads each layer's own values, and flags
|
|
91
|
+
a layer whose bit width it cannot recognize as unverified instead of failing it.
|
|
92
|
+
|
|
93
|
+
## [0.4.0] — 2026-06-07
|
|
94
|
+
|
|
95
|
+
Two static checks, both metadata-only, added to the built-in `text` plugin.
|
|
96
|
+
|
|
97
|
+
### Added
|
|
98
|
+
- MLX-compatibility signal (`text/compat.mlx_signal`): a single `check local` or
|
|
99
|
+
`check hf` now reports whether a repository looks like an MLX / mlx-lm model and
|
|
100
|
+
which signals say so — an MLX `quantization` block, an `mlx-community` author,
|
|
101
|
+
MLX tags or library metadata, quantized weights in the safetensors header, or an
|
|
102
|
+
`mlx`/`4bit`/`8bit` name hint. It is informational and never fails the run. The
|
|
103
|
+
same signal logic now backs the `sample hf` survey, so the single-repo and survey
|
|
104
|
+
paths agree on what counts as MLX.
|
|
105
|
+
- Vision-language image-processor check (`text/vlm.image_processor`): a
|
|
106
|
+
vision-language repository that declares no way to resolve an image processor —
|
|
107
|
+
no `image_processor_type`, custom `auto_map`, feature extractor, or
|
|
108
|
+
`processor_class` — is flagged before you load it, since the standard
|
|
109
|
+
image-processor path may be unable to resolve it. Repositories that do declare a
|
|
110
|
+
resolution path pass, and text-only repositories are skipped. Validated against
|
|
111
|
+
live Qwen2.5-VL, InternVL3, and Qwen2-Audio repositories.
|
|
112
|
+
|
|
113
|
+
## [0.3.0] — 2026-06-06
|
|
114
|
+
|
|
115
|
+
Deep weight inspection: read the safetensors *header* — on the Hub over an HTTP
|
|
116
|
+
range request, still no weight download — to add four tensor-level checks that
|
|
117
|
+
JSON-only metadata can't see. They run by default on a single `check`; `sample
|
|
118
|
+
hf` stays a config-only survey.
|
|
119
|
+
|
|
120
|
+
### Added
|
|
121
|
+
- Safetensors offset scan (`text/safetensors.offsets`): the tensor byte-offsets
|
|
122
|
+
in the header don't overlap and aren't out of bounds. A corrupt header fails
|
|
123
|
+
at load; this catches it first. On a local file the data-section upper bound
|
|
124
|
+
is checked too; on the Hub the header length isn't exposed, so the upper-bound
|
|
125
|
+
check is skipped (and said so) while overlap and ordering still run.
|
|
126
|
+
- Weight-map parameter sanity (`text/weights.param_count`): every tensor the
|
|
127
|
+
weight map references exists in a shard header, and the parameter count isn't
|
|
128
|
+
zero — an internal consistency check, not a config-derived parameter recount.
|
|
129
|
+
- Tied-embedding consistency (`text/weights.tied_embedding`): a declared
|
|
130
|
+
`tie_word_embeddings` matches which embedding and output-head tensors are
|
|
131
|
+
actually stored. A declared-but-contradicted tie loads silently wrong.
|
|
132
|
+
- MLX quantized shape consistency (`text/quantization.shape`): each quantized
|
|
133
|
+
layer's packed-weight and scales shapes agree with the config's bits and group
|
|
134
|
+
size (`packed_last * 32 / bits == scales_last * group_size`). A mismatch won't
|
|
135
|
+
load.
|
|
136
|
+
- A safetensors header reader: local targets parse the header off disk, Hugging
|
|
137
|
+
Face targets fetch it through `huggingface_hub.get_safetensors_metadata` (a
|
|
138
|
+
range request), with the tensor map exposed to checks as a shared, cached read.
|
|
139
|
+
|
|
140
|
+
### Changed
|
|
141
|
+
- The four tensor-header checks run by default on `check local` / `check hf`.
|
|
142
|
+
The reserved `--include-weights` flag is replaced by an opt-out `--skip-weights`
|
|
143
|
+
for a faster config-only pass. `sample hf` is unchanged (config-only).
|
|
144
|
+
|
|
145
|
+
## [0.2.0] — 2026-06-05
|
|
146
|
+
|
|
147
|
+
Static correctness expansion: four config-level checks that catch the "loads
|
|
148
|
+
fine, then crashes at generation or fails at MLX convert" class of problems,
|
|
149
|
+
without downloading weights.
|
|
150
|
+
|
|
151
|
+
### Added
|
|
152
|
+
- Chat-template presence (`text/chat_template.presence`): a chat/instruct model
|
|
153
|
+
declares a chat template in `tokenizer_config.json` or a sibling
|
|
154
|
+
`chat_template.jinja`. A missing template only crashes at `apply_chat_template`
|
|
155
|
+
time, never at load.
|
|
156
|
+
- Chat-template token consistency (`text/chat_template.special_tokens`): the
|
|
157
|
+
end-of-turn token the template emits is a registered special token. A one-
|
|
158
|
+
character typo in a stop token loads fine and then never stops generating.
|
|
159
|
+
- Generation token IDs (`text/generation_config.tokens`): `eos` / `pad` / `bos`
|
|
160
|
+
IDs are present and agree across `config.json`, `generation_config.json`, and
|
|
161
|
+
`tokenizer_config.json`.
|
|
162
|
+
- MLX quantization mode (`text/quantization.mode`): validates the quantization
|
|
163
|
+
mode and its group size and bit width against what MLX accepts (`affine`,
|
|
164
|
+
`mxfp4`, `mxfp8`, `nvfp4`). An unknown mode is a hard failure, since MLX
|
|
165
|
+
rejects it at convert or load.
|
|
166
|
+
- Size-bounded reads: untrusted metadata files are checked against a size cap
|
|
167
|
+
before they are read, so a malicious or corrupt repo cannot make the tool pull
|
|
168
|
+
a huge file into memory.
|
|
169
|
+
|
|
170
|
+
### Fixed
|
|
171
|
+
- `sample hf --limit N` over-fetches before filtering, so it checks up to `N`
|
|
172
|
+
MLX candidates even when an author's listing leads with non-MLX repos
|
|
173
|
+
(best-effort within a capped window).
|
|
174
|
+
- `_positive_device_bytes` no longer treats a boolean device value as a byte
|
|
175
|
+
count.
|
|
176
|
+
- README quantization wording no longer implies tensor-level validation; the
|
|
177
|
+
quantization checks are config-level.
|
|
178
|
+
- The release workflow uses a Node 24 build of `actions/download-artifact`, ahead
|
|
179
|
+
of the GitHub Node 20 sunset.
|
|
180
|
+
|
|
181
|
+
## [0.1.0] — 2026-06-04
|
|
182
|
+
|
|
183
|
+
Initial public release.
|
|
184
|
+
|
|
185
|
+
### Added
|
|
186
|
+
- Static validation for local model repositories (`check local <path>` /
|
|
187
|
+
`check_local_model`): config presence and consistency, tokenizer files and
|
|
188
|
+
special tokens, safetensors-index integrity, quantization metadata, and a
|
|
189
|
+
context-length-aware memory-budget estimate. The static checks read repository
|
|
190
|
+
metadata only — no MLX or GPU, and no weight download.
|
|
191
|
+
- Hugging Face target (`check hf <repo_id>` / `check_hf_model`): the same checks
|
|
192
|
+
against a Hub repository, reading metadata over `huggingface-hub`. Auth,
|
|
193
|
+
not-found, and rate-limit failures surface as a clear tool error.
|
|
194
|
+
- `sample hf`: survey an author's likely-MLX repositories and validate a
|
|
195
|
+
deterministic sample as a batch report. A per-model error is recorded as a
|
|
196
|
+
batch item and the run continues; a listing failure is a tool error.
|
|
197
|
+
- Optional memory-safe `mlx-lm` smoke check (`--smoke`, `mlx-lm` extra): loads
|
|
198
|
+
the model under an MLX wired-memory cap and refuses to load if the cap cannot
|
|
199
|
+
be installed, so a smoke run can't push the machine into a memory panic.
|
|
200
|
+
- Reports render to text, JSON, and Markdown; results are frozen dataclasses, so
|
|
201
|
+
output is stable to diff. Exit codes: `0` pass, `1` fail-under-policy,
|
|
202
|
+
`2` tool error or zero checks — tunable with `--fail-on`.
|
|
203
|
+
- `version`, `man`, and `plugins` commands; the built-in `text` plugin; a
|
|
204
|
+
`py.typed` marker.
|