mlx-model-doctor 0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. mlx_model_doctor-0/.gitignore +21 -0
  2. mlx_model_doctor-0/CHANGELOG.md +204 -0
  3. mlx_model_doctor-0/EXAMPLES.md +484 -0
  4. mlx_model_doctor-0/LICENSE +201 -0
  5. mlx_model_doctor-0/NOTICE +8 -0
  6. mlx_model_doctor-0/PKG-INFO +178 -0
  7. mlx_model_doctor-0/README.md +146 -0
  8. mlx_model_doctor-0/ROADMAP.md +91 -0
  9. mlx_model_doctor-0/pyproject.toml +136 -0
  10. mlx_model_doctor-0/src/mlx_model_doctor/__init__.py +26 -0
  11. mlx_model_doctor-0/src/mlx_model_doctor/_version.py +24 -0
  12. mlx_model_doctor-0/src/mlx_model_doctor/api.py +81 -0
  13. mlx_model_doctor-0/src/mlx_model_doctor/checks/__init__.py +21 -0
  14. mlx_model_doctor-0/src/mlx_model_doctor/checks/base.py +16 -0
  15. mlx_model_doctor-0/src/mlx_model_doctor/checks/chat_template.py +168 -0
  16. mlx_model_doctor-0/src/mlx_model_doctor/checks/compat.py +97 -0
  17. mlx_model_doctor-0/src/mlx_model_doctor/checks/config.py +137 -0
  18. mlx_model_doctor-0/src/mlx_model_doctor/checks/files.py +48 -0
  19. mlx_model_doctor-0/src/mlx_model_doctor/checks/generation_config.py +94 -0
  20. mlx_model_doctor-0/src/mlx_model_doctor/checks/memory.py +268 -0
  21. mlx_model_doctor-0/src/mlx_model_doctor/checks/quantization.py +482 -0
  22. mlx_model_doctor-0/src/mlx_model_doctor/checks/safetensors.py +353 -0
  23. mlx_model_doctor-0/src/mlx_model_doctor/checks/smoke.py +196 -0
  24. mlx_model_doctor-0/src/mlx_model_doctor/checks/tokenizer.py +121 -0
  25. mlx_model_doctor-0/src/mlx_model_doctor/checks/vlm.py +130 -0
  26. mlx_model_doctor-0/src/mlx_model_doctor/checks/weights.py +129 -0
  27. mlx_model_doctor-0/src/mlx_model_doctor/cli.py +322 -0
  28. mlx_model_doctor-0/src/mlx_model_doctor/compat.py +84 -0
  29. mlx_model_doctor-0/src/mlx_model_doctor/context.py +116 -0
  30. mlx_model_doctor-0/src/mlx_model_doctor/environment.py +76 -0
  31. mlx_model_doctor-0/src/mlx_model_doctor/errors.py +47 -0
  32. mlx_model_doctor-0/src/mlx_model_doctor/exit_codes.py +30 -0
  33. mlx_model_doctor-0/src/mlx_model_doctor/memory.py +137 -0
  34. mlx_model_doctor-0/src/mlx_model_doctor/plugins/__init__.py +22 -0
  35. mlx_model_doctor-0/src/mlx_model_doctor/plugins/base.py +23 -0
  36. mlx_model_doctor-0/src/mlx_model_doctor/plugins/text.py +71 -0
  37. mlx_model_doctor-0/src/mlx_model_doctor/py.typed +0 -0
  38. mlx_model_doctor-0/src/mlx_model_doctor/report.py +213 -0
  39. mlx_model_doctor-0/src/mlx_model_doctor/runners/__init__.py +6 -0
  40. mlx_model_doctor-0/src/mlx_model_doctor/runners/core.py +34 -0
  41. mlx_model_doctor-0/src/mlx_model_doctor/runners/smoke.py +137 -0
  42. mlx_model_doctor-0/src/mlx_model_doctor/runners/static.py +13 -0
  43. mlx_model_doctor-0/src/mlx_model_doctor/safetensors_header.py +236 -0
  44. mlx_model_doctor-0/src/mlx_model_doctor/sampling.py +348 -0
  45. mlx_model_doctor-0/src/mlx_model_doctor/targets.py +350 -0
  46. mlx_model_doctor-0/tests/__init__.py +1 -0
  47. mlx_model_doctor-0/tests/conftest.py +43 -0
  48. mlx_model_doctor-0/tests/fakes.py +67 -0
  49. mlx_model_doctor-0/tests/live/README.md +4 -0
  50. mlx_model_doctor-0/tests/live/known-broken.toml +4 -0
  51. mlx_model_doctor-0/tests/live/known-good.toml +5 -0
  52. mlx_model_doctor-0/tests/test_api.py +412 -0
  53. mlx_model_doctor-0/tests/test_checks_chat_template.py +121 -0
  54. mlx_model_doctor-0/tests/test_checks_compat.py +147 -0
  55. mlx_model_doctor-0/tests/test_checks_config.py +158 -0
  56. mlx_model_doctor-0/tests/test_checks_files.py +39 -0
  57. mlx_model_doctor-0/tests/test_checks_generation_config.py +77 -0
  58. mlx_model_doctor-0/tests/test_checks_memory.py +312 -0
  59. mlx_model_doctor-0/tests/test_checks_quantization.py +634 -0
  60. mlx_model_doctor-0/tests/test_checks_safetensors.py +350 -0
  61. mlx_model_doctor-0/tests/test_checks_smoke.py +211 -0
  62. mlx_model_doctor-0/tests/test_checks_tokenizer.py +85 -0
  63. mlx_model_doctor-0/tests/test_checks_vlm.py +147 -0
  64. mlx_model_doctor-0/tests/test_checks_weights.py +135 -0
  65. mlx_model_doctor-0/tests/test_cli.py +655 -0
  66. mlx_model_doctor-0/tests/test_collection_gating.py +14 -0
  67. mlx_model_doctor-0/tests/test_compat.py +87 -0
  68. mlx_model_doctor-0/tests/test_context.py +266 -0
  69. mlx_model_doctor-0/tests/test_environment.py +74 -0
  70. mlx_model_doctor-0/tests/test_exit_codes.py +55 -0
  71. mlx_model_doctor-0/tests/test_fakes.py +23 -0
  72. mlx_model_doctor-0/tests/test_hf_target.py +208 -0
  73. mlx_model_doctor-0/tests/test_integration_assets.py +34 -0
  74. mlx_model_doctor-0/tests/test_live_models.py +505 -0
  75. mlx_model_doctor-0/tests/test_memory.py +35 -0
  76. mlx_model_doctor-0/tests/test_memory_caps.py +74 -0
  77. mlx_model_doctor-0/tests/test_package_surface.py +23 -0
  78. mlx_model_doctor-0/tests/test_packaging.py +100 -0
  79. mlx_model_doctor-0/tests/test_report.py +288 -0
  80. mlx_model_doctor-0/tests/test_runners.py +82 -0
  81. mlx_model_doctor-0/tests/test_runners_smoke.py +229 -0
  82. mlx_model_doctor-0/tests/test_safetensors_header.py +184 -0
  83. mlx_model_doctor-0/tests/test_sampling.py +31 -0
  84. mlx_model_doctor-0/tests/test_targets.py +176 -0
@@ -0,0 +1,21 @@
1
+ .codegraph/
2
+ .hypothesis/
3
+
4
+ # Local Claude Code agent guidance — not shipped in the repo (mac/ convention).
5
+ CLAUDE.md
6
+
7
+ # Workspace working-state must never live inside the repo (mac/ convention) —
8
+ # specs/plans/reviews and the granular backlog live at the workspace root.
9
+ docs/superpowers/
10
+ docs/backlog/
11
+
12
+ .coverage
13
+ .mypy_cache/
14
+ .pytest_cache/
15
+ .ruff_cache/
16
+ .venv/
17
+ __pycache__/
18
+ build/
19
+ dist/
20
+ *.egg-info/
21
+ src/mlx_model_doctor/_version.py
@@ -0,0 +1,204 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.5.1] — 2026-06-18
9
+
10
+ Dependency housekeeping. Nothing about the checks or the report output changed;
11
+ this release only adjusts what gets installed and which Python versions are tested.
12
+
13
+ ### Changed
14
+ - The `huggingface-hub` floor is now `>=1.0`. The tool is built and tested against
15
+ the 1.x line, so the old `>=0.24` floor described a setup that was never tested.
16
+
17
+ ### Removed
18
+ - `safetensors` is no longer a runtime dependency. The validator reads the
19
+ safetensors header straight from the file bytes and gets Hugging Face metadata
20
+ through `huggingface-hub`, so it never imported the `safetensors` package.
21
+ Installs are a little lighter. If you have it anyway (for example via the
22
+ `[mlx-lm]` extra), the `version` command still reports it.
23
+
24
+ ### Added
25
+ - Python 3.14 is now tested in CI and listed in the package classifiers.
26
+
27
+ ## [0.5.0] — 2026-06-15
28
+
29
+ The integration on-ramp: run the validator in other people's CI and pre-commit,
30
+ not just by hand.
31
+
32
+ ### Added
33
+ - GitHub Action (`IonDen/mlx-model-doctor@v0`): a composite action that runs the
34
+ static checks on `ubuntu-latest` (no weights, no GPU), writes the report to the
35
+ job summary, sets `pass` / `warn` / `fail` / `skip` / `exit-code` /
36
+ `schema-version` step outputs, and fails the job under your fail policy. Inputs
37
+ mirror the CLI (`source`, `target`, `fail-on`, `max-memory`, `context-length`,
38
+ `skip-weights`, `version`); pin the installed release with `version: "==0.5.0"`.
39
+ - pre-commit hook (`id: mlx-model-doctor`): runs `check local` on a model
40
+ directory you keep in git, with an overridable `args` for the path.
41
+ - `--format github`: emits GitHub Actions annotations — one `::error` or
42
+ `::warning` per failing or warning check, plus a `::notice` summary. Inside a
43
+ workflow it also appends the Markdown report to `$GITHUB_STEP_SUMMARY` and the
44
+ counts to `$GITHUB_OUTPUT`.
45
+ - A documented output contract: `--format json` carries a `schema_version`
46
+ (`1.0`), the `summary` counts, and a `results` array of frozen check records;
47
+ the exit codes (`0` pass, `1` failures, `2` tool error or zero checks) are
48
+ fixed. See the README "Output contract" section.
49
+
50
+ ## [0.4.3] — 2026-06-14
51
+
52
+ ### Fixed
53
+ - The memory estimate now accounts for mixed-precision quantization. A model can
54
+ give individual layers their own bit width, such as 4-bit experts alongside
55
+ 8-bit dense, router, and head layers. The estimate had applied the model-level
56
+ bit width to every weight, so it underreported the memory the model needs.
57
+ It now takes the weight figure from the measured weight-file sizes, which
58
+ already reflect each layer's precision, and adds the context-length KV-cache
59
+ term. If the file sizes can't all be read, it reports the estimate as
60
+ unverified rather than a number that is too low, so the optional `--smoke`
61
+ preflight no longer lets through a load that won't fit. Single-precision models
62
+ are unaffected.
63
+ - The source distribution no longer bundles local working-tree files. The sdist
64
+ is built from an explicit list of what belongs in it (the package, the tests,
65
+ and the README, license, and changelog files), so a local build can't pull in
66
+ editor or tool state. The published wheel was already limited to the package.
67
+
68
+ ## [0.4.2] — 2026-06-13
69
+
70
+ ### Fixed
71
+ - The quantization-mode check (`text/quantization.mode`) now validates every
72
+ layer, not just the model-level default. An MLX `quantization` block can give
73
+ individual layers their own `mode`, `bits`, and `group_size` — a mixed-precision
74
+ model often pairs 4-bit experts with 8-bit dense, router, and gate layers. The
75
+ check had read only the top-level values, so a broken per-layer entry slipped
76
+ through unnoticed. It now resolves each layer's own values and checks them
77
+ against the MLX table: an unknown per-layer mode fails, and an off-table or
78
+ otherwise invalid value warns. Valid mixed-precision models still pass. This is
79
+ the companion to the v0.4.1 shape-check fix.
80
+
81
+ ## [0.4.1] — 2026-06-12
82
+
83
+ ### Fixed
84
+ - The quantized-shape check (`text/quantization.shape`) no longer reports a
85
+ load-blocking failure for valid mixed-precision models. An MLX `quantization`
86
+ block can give individual layers their own `bits` and `group_size`; a common
87
+ pattern is 4-bit experts alongside 8-bit dense, router, and gate layers. The
88
+ check had applied the model-level values to every layer, so a model like
89
+ `mlx-community/gpt-oss-20b-MXFP4-Q8` or an nvfp4 mixture-of-experts repository
90
+ failed even though it loads fine. It now reads each layer's own values, and flags
91
+ a layer whose bit width it cannot recognize as unverified instead of failing it.
92
+
93
+ ## [0.4.0] — 2026-06-07
94
+
95
+ Two static checks, both metadata-only, added to the built-in `text` plugin.
96
+
97
+ ### Added
98
+ - MLX-compatibility signal (`text/compat.mlx_signal`): a single `check local` or
99
+ `check hf` now reports whether a repository looks like an MLX / mlx-lm model and
100
+ which signals say so — an MLX `quantization` block, an `mlx-community` author,
101
+ MLX tags or library metadata, quantized weights in the safetensors header, or an
102
+ `mlx`/`4bit`/`8bit` name hint. It is informational and never fails the run. The
103
+ same signal logic now backs the `sample hf` survey, so the single-repo and survey
104
+ paths agree on what counts as MLX.
105
+ - Vision-language image-processor check (`text/vlm.image_processor`): a
106
+ vision-language repository that declares no way to resolve an image processor —
107
+ no `image_processor_type`, custom `auto_map`, feature extractor, or
108
+ `processor_class` — is flagged before you load it, since the standard
109
+ image-processor path may be unable to resolve it. Repositories that do declare a
110
+ resolution path pass, and text-only repositories are skipped. Validated against
111
+ live Qwen2.5-VL, InternVL3, and Qwen2-Audio repositories.
112
+
113
+ ## [0.3.0] — 2026-06-06
114
+
115
+ Deep weight inspection: read the safetensors *header* — on the Hub over an HTTP
116
+ range request, still no weight download — to add four tensor-level checks that
117
+ JSON-only metadata can't see. They run by default on a single `check`; `sample
118
+ hf` stays a config-only survey.
119
+
120
+ ### Added
121
+ - Safetensors offset scan (`text/safetensors.offsets`): the tensor byte-offsets
122
+ in the header don't overlap and aren't out of bounds. A corrupt header fails
123
+ at load; this catches it first. On a local file the data-section upper bound
124
+ is checked too; on the Hub the header length isn't exposed, so the upper-bound
125
+ check is skipped (and said so) while overlap and ordering still run.
126
+ - Weight-map parameter sanity (`text/weights.param_count`): every tensor the
127
+ weight map references exists in a shard header, and the parameter count isn't
128
+ zero — an internal consistency check, not a config-derived parameter recount.
129
+ - Tied-embedding consistency (`text/weights.tied_embedding`): a declared
130
+ `tie_word_embeddings` matches which embedding and output-head tensors are
131
+ actually stored. A declared-but-contradicted tie loads silently wrong.
132
+ - MLX quantized shape consistency (`text/quantization.shape`): each quantized
133
+ layer's packed-weight and scales shapes agree with the config's bits and group
134
+ size (`packed_last * 32 / bits == scales_last * group_size`). A mismatch won't
135
+ load.
136
+ - A safetensors header reader: local targets parse the header off disk, Hugging
137
+ Face targets fetch it through `huggingface_hub.get_safetensors_metadata` (a
138
+ range request), with the tensor map exposed to checks as a shared, cached read.
139
+
140
+ ### Changed
141
+ - The four tensor-header checks run by default on `check local` / `check hf`.
142
+ The reserved `--include-weights` flag is replaced by an opt-out `--skip-weights`
143
+ for a faster config-only pass. `sample hf` is unchanged (config-only).
144
+
145
+ ## [0.2.0] — 2026-06-05
146
+
147
+ Static correctness expansion: four config-level checks that catch the "loads
148
+ fine, then crashes at generation or fails at MLX convert" class of problems,
149
+ without downloading weights.
150
+
151
+ ### Added
152
+ - Chat-template presence (`text/chat_template.presence`): a chat/instruct model
153
+ declares a chat template in `tokenizer_config.json` or a sibling
154
+ `chat_template.jinja`. A missing template only crashes at `apply_chat_template`
155
+ time, never at load.
156
+ - Chat-template token consistency (`text/chat_template.special_tokens`): the
157
+ end-of-turn token the template emits is a registered special token. A one-
158
+ character typo in a stop token loads fine and then never stops generating.
159
+ - Generation token IDs (`text/generation_config.tokens`): `eos` / `pad` / `bos`
160
+ IDs are present and agree across `config.json`, `generation_config.json`, and
161
+ `tokenizer_config.json`.
162
+ - MLX quantization mode (`text/quantization.mode`): validates the quantization
163
+ mode and its group size and bit width against what MLX accepts (`affine`,
164
+ `mxfp4`, `mxfp8`, `nvfp4`). An unknown mode is a hard failure, since MLX
165
+ rejects it at convert or load.
166
+ - Size-bounded reads: untrusted metadata files are checked against a size cap
167
+ before they are read, so a malicious or corrupt repo cannot make the tool pull
168
+ a huge file into memory.
169
+
170
+ ### Fixed
171
+ - `sample hf --limit N` over-fetches before filtering, so it checks up to `N`
172
+ MLX candidates even when an author's listing leads with non-MLX repos
173
+ (best-effort within a capped window).
174
+ - `_positive_device_bytes` no longer treats a boolean device value as a byte
175
+ count.
176
+ - README quantization wording no longer implies tensor-level validation; the
177
+ quantization checks are config-level.
178
+ - The release workflow uses a Node 24 build of `actions/download-artifact`, ahead
179
+ of the GitHub Node 20 sunset.
180
+
181
+ ## [0.1.0] — 2026-06-04
182
+
183
+ Initial public release.
184
+
185
+ ### Added
186
+ - Static validation for local model repositories (`check local <path>` /
187
+ `check_local_model`): config presence and consistency, tokenizer files and
188
+ special tokens, safetensors-index integrity, quantization metadata, and a
189
+ context-length-aware memory-budget estimate. The static checks read repository
190
+ metadata only — no MLX or GPU, and no weight download.
191
+ - Hugging Face target (`check hf <repo_id>` / `check_hf_model`): the same checks
192
+ against a Hub repository, reading metadata over `huggingface-hub`. Auth,
193
+ not-found, and rate-limit failures surface as a clear tool error.
194
+ - `sample hf`: survey an author's likely-MLX repositories and validate a
195
+ deterministic sample as a batch report. A per-model error is recorded as a
196
+ batch item and the run continues; a listing failure is a tool error.
197
+ - Optional memory-safe `mlx-lm` smoke check (`--smoke`, `mlx-lm` extra): loads
198
+ the model under an MLX wired-memory cap and refuses to load if the cap cannot
199
+ be installed, so a smoke run can't push the machine into a memory panic.
200
+ - Reports render to text, JSON, and Markdown; results are frozen dataclasses, so
201
+ output is stable to diff. Exit codes: `0` pass, `1` fail-under-policy,
202
+ `2` tool error or zero checks — tunable with `--fail-on`.
203
+ - `version`, `man`, and `plugins` commands; the built-in `text` plugin; a
204
+ `py.typed` marker.