PyPI - coreml-diffusion - Versions diffs - 0.1.4__tar.gz → 0.1.6__tar.gz - Mend

coreml-diffusion 0.1.4tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

coreml_diffusion-0.1.6/.release-please-manifest.json ADDED Viewed

@@ -0,0 +1,3 @@
+{
+  ".": "0.1.6"
+}

{coreml_diffusion-0.1.4 → coreml_diffusion-0.1.6}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,19 @@
 # Changelog
+## [0.1.6](https://github.com/aszc-dev/coreml-diffusion/compare/v0.1.5...v0.1.6) (2026-06-13)
+### 🐛 Bug Fixes
+* **deps:** drop the stale &lt;3.13 Python cap (allow &gt;=3.12) ([9b44a5a](https://github.com/aszc-dev/coreml-diffusion/commit/9b44a5a4a4118a99c924ccb8301d4e1a300c3b01))
+## [0.1.5](https://github.com/aszc-dev/coreml-diffusion/compare/v0.1.4...v0.1.5) (2026-06-13)
+### ✨ Features
+* **convert:** auto-detect model version from the checkpoint ([2a24d4e](https://github.com/aszc-dev/coreml-diffusion/commit/2a24d4efd196100dbdd0bf9d5dd61c6cce31d2ac))
 ## [0.1.4](https://github.com/aszc-dev/coreml-diffusion/compare/v0.1.3...v0.1.4) (2026-06-13)

{coreml_diffusion-0.1.4 → coreml_diffusion-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coreml-diffusion
-Version: 0.1.4
+Version: 0.1.6
 Summary: Convert diffusion-model checkpoints (SD1.5/SDXL) to Core ML for Apple Neural Engine — framework-free, ComfyUI-independent.
 Project-URL: Homepage, https://github.com/aszc-dev/coreml-diffusion
 Project-URL: Repository, https://github.com/aszc-dev/coreml-diffusion
@@ -14,10 +14,11 @@ Classifier: Intended Audience :: Developers
 Classifier: Operating System :: MacOS
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Typing :: Typed
-Requires-Python: <3.13,>=3.12
+Requires-Python: >=3.12
 Requires-Dist: coremltools<10,>=9
 Requires-Dist: diffusers>=0.30
 Requires-Dist: numpy<3,>=2

{coreml_diffusion-0.1.4 → coreml_diffusion-0.1.6}/coreml_diffusion/__init__.py RENAMED Viewed

@@ -44,6 +44,7 @@ __all__ = [
     "compose_component_name",
     "lora_names_from_params",
     "convert",
+    "detect_model_version",
     "build_pipeline",
     "CoreMLUNet",
     "CoreMLVAE",
@@ -123,7 +124,23 @@ def __getattr__(name):
     if name == "convert":
         from coreml_diffusion.convert import convert as _convert
+        # Importing the submodule binds ``coreml_diffusion.convert`` to the
+        # MODULE as a side effect, which shadows this function on every later
+        # access (a module object isn't callable). Rebind the package attribute
+        # to the function so ``coreml_diffusion.convert(...)`` stays callable in
+        # long-lived processes (e.g. a ComfyUI server doing >1 conversion).
+        globals()["convert"] = _convert
         return _convert
+    if name == "detect_model_version":
+        # Lives in the framework-free state_dict module (reads only the
+        # safetensors header), so exposing it never drags coremltools/diffusers
+        # into the import path.
+        from coreml_diffusion.conversion.state_dict import (
+            detect_model_version as _detect,
+        )
+        globals()["detect_model_version"] = _detect
+        return _detect
     if name in ("build_pipeline", "CoreMLUNet", "CoreMLVAE", "CoreMLTextEncoder"):
         from coreml_diffusion import inference

{coreml_diffusion-0.1.4 → coreml_diffusion-0.1.6}/coreml_diffusion/cli.py RENAMED Viewed

@@ -33,9 +33,14 @@ def _convert_cmd(args):
     sample_size = (args.height // 8, args.width // 8)
     lora_weights = [_parse_lora(spec) for spec in (args.lora or [])]
     ckpt = sources.resolve_checkpoint(args.ckpt, args.source)
+    model_version = (
+        coreml_diffusion.ModelVersion[args.model_version]
+        if args.model_version
+        else None
+    )
     coreml_diffusion.convert(
         ckpt,
-        coreml_diffusion.ModelVersion[args.model_version],
+        model_version,
         args.out,
         component=args.component,
         batch_size=args.batch_size,
@@ -94,11 +99,13 @@ def build_parser():
     )
     conv.add_argument(
         "--model-version",
-        required=True,
-        # include experimental: the CLI is the power-user path. Experimental
-        # versions (LCM, SDXL_REFINER) convert but are not golden-verified.
+        default=None,
+        # Auto-detected from the checkpoint when omitted. Choices stay available
+        # as an explicit override (the CLI is the power-user path; experimental
+        # versions convert but are not golden-verified).
         choices=coreml_diffusion.list_model_versions(include_experimental=True),
-        help="Model architecture (verified: SD15, SDXL; experimental otherwise)",
+        help="Model architecture; auto-detected from the checkpoint when omitted "
+        "(verified: SD15, SDXL; experimental otherwise)",
     )
     conv.add_argument(
         "--component",

coreml_diffusion-0.1.6/coreml_diffusion/conversion/state_dict.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""State-dict layout predicates — framework-free (no coremltools/diffusers).
+Single-file checkpoints come in two layouts: original LDM (UNet keys under
+``model.diffusion_model.``) and diffusers-native UNet-only dumps (block keys at
+the top level — e.g. ``LCM_Dreamshaper_v7_4k.safetensors``, the canonical
+full-distill LCM artifact). diffusers' ``from_single_file`` only understands
+the former and raises ``SingleFileComponentError`` on the latter;
+``convert.load_unet`` routes on this predicate.
+Also home to ``detect_model_version``: it reads only the safetensors header (no
+coremltools/diffusers), so the conversion entrypoint can auto-pick the model
+version without dragging the heavy stack into the discovery path.
+"""
+from coreml_diffusion.model_version import ModelVersion
+DIFFUSERS_UNET_KEY_PREFIXES = ("down_blocks.", "up_blocks.", "mid_block.")
+LDM_UNET_KEY_PREFIX = "model.diffusion_model."
+# cross_attention_dim -> model version. The context (key/value) dim of the UNet's
+# cross-attention is the architecture fingerprint: SD1.5 conditions on a single
+# 768-dim CLIP, SDXL on the 2048-dim concat of both encoders, the SDXL refiner on
+# the 1280-dim OpenCLIP-bigG alone. A guidance embedding (time_embedding.cond_proj)
+# on top of the 768-dim SD1.5 stack marks a full-distill LCM.
+_CROSS_ATTENTION_DIM_TO_VERSION = {
+    768: ModelVersion.SD15,
+    2048: ModelVersion.SDXL,
+    1280: ModelVersion.SDXL_REFINER,
+}
+def is_diffusers_unet_layout(keys) -> bool:
+    """True when ``keys`` form a diffusers-format UNet-only state dict."""
+    keys = list(keys)
+    has_diffusers_blocks = any(k.startswith(DIFFUSERS_UNET_KEY_PREFIXES) for k in keys)
+    has_ldm_prefix = any(k.startswith(LDM_UNET_KEY_PREFIX) for k in keys)
+    return has_diffusers_blocks and not has_ldm_prefix
+def safetensors_keys(ckpt_path):
+    """The file's key list when it is safetensors, else None.
+    Probes by content, not filename — a resolved checkpoint path may point at
+    an extension-less blob (e.g. inside the Hugging Face cache).
+    """
+    from safetensors import SafetensorError, safe_open
+    try:
+        with safe_open(ckpt_path, framework="pt") as f:
+            return list(f.keys())
+    except SafetensorError:
+        return None
+def detect_model_version(ckpt_path):
+    """Infer the ``ModelVersion`` from a checkpoint's UNet weights.
+    Reads two architecture fingerprints straight from the safetensors header
+    (no full model load): the cross-attention context dim (``attn2.to_k``) and
+    whether a guidance embedding (``cond_proj``) is present. Works for both LDM
+    and diffusers key layouts. Raises ``ValueError`` carrying the observed
+    evidence when the architecture is unrecognised, so a bad guess is debuggable
+    rather than silent.
+    """
+    keys = safetensors_keys(ckpt_path)
+    if keys is None:
+        raise ValueError(
+            f"Cannot auto-detect model version from {ckpt_path!r}: not a readable "
+            "safetensors file. Pass model_version explicitly."
+        )
+    cross_attn_key = next((k for k in keys if k.endswith("attn2.to_k.weight")), None)
+    if cross_attn_key is None:
+        raise ValueError(
+            f"Cannot auto-detect model version from {ckpt_path!r}: no cross-attention "
+            "(attn2.to_k) weights found. Pass model_version explicitly."
+        )
+    from safetensors import safe_open
+    with safe_open(ckpt_path, framework="pt") as f:
+        cross_attention_dim = f.get_slice(cross_attn_key).get_shape()[1]
+    has_guidance_embedding = any(k.endswith("cond_proj.weight") for k in keys)
+    if has_guidance_embedding:
+        if cross_attention_dim == 768:
+            return ModelVersion.LCM
+        raise ValueError(
+            f"Cannot auto-detect model version from {ckpt_path!r}: found a guidance "
+            f"embedding (LCM) but cross_attention_dim={cross_attention_dim}; only "
+            "SD1.5-class LCM (cross_attention_dim=768) is supported. Pass "
+            "model_version explicitly."
+        )
+    version = _CROSS_ATTENTION_DIM_TO_VERSION.get(cross_attention_dim)
+    if version is None:
+        raise ValueError(
+            f"Cannot auto-detect model version from {ckpt_path!r}: unrecognised "
+            f"cross_attention_dim={cross_attention_dim}. Supported: 768 (SD15/LCM), "
+            "2048 (SDXL), 1280 (SDXL_REFINER). Pass model_version explicitly."
+        )
+    return version

{coreml_diffusion-0.1.4 → coreml_diffusion-0.1.6}/coreml_diffusion/convert.py RENAMED Viewed

@@ -22,7 +22,11 @@ from coreml_diffusion.attention import ATTENTION_IMPLEMENTATIONS
 from coreml_diffusion.component import Component
 from coreml_diffusion.conversion.attention import apply_attention_implementation
 from coreml_diffusion.conversion.shapes import conv2d_output_shape
-from coreml_diffusion.conversion.state_dict import is_diffusers_unet_layout
+from coreml_diffusion.conversion.state_dict import (
+    detect_model_version,
+    is_diffusers_unet_layout,
+    safetensors_keys,
+)
 from coreml_diffusion.conversion.text_encoder import (
     CoreMLTextEncoderWrapper,
     static_causal_mask,
@@ -460,8 +464,8 @@ def convert_text_encoder(
 def convert(
     ckpt_path: str,
-    model_version: ModelVersion,
-    out_path: str,
+    model_version: ModelVersion = None,
+    out_path: str = None,
     *,
     component: str = Component.UNET.value,
     batch_size: int = 1,
@@ -474,20 +478,28 @@ def convert(
 ):
     """Convert a single-file checkpoint component to a Core ML ``.mlpackage``.
-    ``component`` selects what to convert (default ``"unet"`` — historical
-    behaviour, all UNet-only kwargs apply). ``vae_decoder`` / ``vae_encoder`` /
-    ``text_encoder`` / ``text_encoder_2`` convert the corresponding sub-model and
-    ignore the UNet-only kwargs (``controlnet_support``, ``lora_weights``,
-    ``attn_impl``). Keyword-only past the three required positionals so the package
-    can add capabilities without breaking an older caller. Writes ``out_path``;
-    returns None.
+    ``model_version`` is auto-detected from the checkpoint when left ``None``
+    (the architecture fully determines the conversion); pass it explicitly only
+    to override a misdetection. ``component`` selects what to convert (default
+    ``"unet"`` — historical behaviour, all UNet-only kwargs apply).
+    ``vae_decoder`` / ``vae_encoder`` / ``text_encoder`` / ``text_encoder_2``
+    convert the corresponding sub-model and ignore the UNet-only kwargs
+    (``controlnet_support``, ``lora_weights``, ``attn_impl``). Keyword-only past
+    the leading positionals so the package can add capabilities without breaking
+    an older caller. Writes ``out_path``; returns None.
     """
+    if out_path is None:
+        raise TypeError("convert() requires out_path")
     if os.path.exists(out_path):
         logger.info(f"Found existing model at {out_path}! Skipping..")
         return
     comp = Component(component)
+    if model_version is None:
+        model_version = detect_model_version(ckpt_path)
+        logger.info(f"Auto-detected model version: {model_version.name}")
     if comp is Component.VAE_DECODER:
         convert_vae_decoder(
             load_vae(ckpt_path),
@@ -553,7 +565,7 @@ def load_unet(ckpt_path, config_path):
     e.g. ``LCM_Dreamshaper_v7_4k.safetensors``) are rejected by
     ``from_single_file`` outright, so they get a direct state-dict load.
     """
-    keys = _safetensors_keys(ckpt_path)
+    keys = safetensors_keys(ckpt_path)
     if keys is not None and is_diffusers_unet_layout(keys):
         return load_unet_from_diffusers_state_dict(ckpt_path)
     return UNet2DConditionModel.from_single_file(
@@ -562,21 +574,6 @@ def load_unet(ckpt_path, config_path):
     )
-def _safetensors_keys(ckpt_path):
-    """The file's key list when it is safetensors, else None.
-    Probes by content, not filename — a resolved checkpoint path may point at
-    an extension-less blob (e.g. inside the Hugging Face cache).
-    """
-    from safetensors import SafetensorError, safe_open
-    try:
-        with safe_open(ckpt_path, framework="pt") as f:
-            return list(f.keys())
-    except SafetensorError:
-        return None
 def load_unet_from_diffusers_state_dict(ckpt_path, **config_overrides):
     """Load a diffusers-layout UNet-only safetensors dump (SD1.5-class).

{coreml_diffusion-0.1.4 → coreml_diffusion-0.1.6}/pyproject.toml RENAMED Viewed

@@ -1,10 +1,10 @@
 [project]
 name = "coreml-diffusion"
 description = "Convert diffusion-model checkpoints (SD1.5/SDXL) to Core ML for Apple Neural Engine — framework-free, ComfyUI-independent."
-version = "0.1.4"
+version = "0.1.6"
 license = "MIT"
 license-files = ["LICENSE"]
-requires-python = ">=3.12,<3.13"
+requires-python = ">=3.12"
 readme = "README.md"
 authors = [{ name = "Adrian Szczepański", email = "hi@aszc.dev" }]
 keywords = [
@@ -24,6 +24,7 @@ classifiers = [
     "Operating System :: MacOS",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Multimedia :: Graphics :: Graphics Conversion",
     "Typing :: Typed",

coreml_diffusion-0.1.6/tests/unit/test_detect_model_version.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""Tier 0: model-version auto-detection from checkpoint weights.
+Locks the architecture fingerprinting that lets ``convert(model_version=None)``
+pick the right conversion path: cross-attention context dim (attn2.to_k) plus
+the presence of a guidance embedding (cond_proj). Synthetic safetensors files
+carry only the two keys the detector reads, so the test stays framework-free.
+"""
+import pytest
+import torch
+from safetensors.torch import save_file
+from coreml_diffusion.conversion.state_dict import detect_model_version
+from coreml_diffusion.model_version import ModelVersion
+def _write_ckpt(path, cross_attention_dim, *, guidance=False, with_cross_attn=True):
+    tensors = {}
+    if with_cross_attn:
+        # attn2.to_k maps the context dim -> inner dim; shape[1] is what we read.
+        key = "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight"
+        tensors[key] = torch.zeros(320, cross_attention_dim)
+    if guidance:
+        tensors["time_embedding.cond_proj.weight"] = torch.zeros(320, 256)
+    if not tensors:
+        tensors["dummy"] = torch.zeros(1)
+    save_file(tensors, str(path))
+    return str(path)
+@pytest.mark.parametrize(
+    "cross_attention_dim, guidance, expected",
+    [
+        (768, False, ModelVersion.SD15),
+        (768, True, ModelVersion.LCM),
+        (2048, False, ModelVersion.SDXL),
+        (1280, False, ModelVersion.SDXL_REFINER),
+    ],
+)
+def test_detects_known_architectures(tmp_path, cross_attention_dim, guidance, expected):
+    ckpt = _write_ckpt(
+        tmp_path / "m.safetensors", cross_attention_dim, guidance=guidance
+    )
+    assert detect_model_version(ckpt) is expected
+def test_lcm_lora_merge_detects_as_sd15(tmp_path):
+    # An LCM-LoRA merge is plain SD1.5 architecture (no guidance embedding); it
+    # must NOT be mistaken for a full-distill LCM.
+    ckpt = _write_ckpt(tmp_path / "merge.safetensors", 768, guidance=False)
+    assert detect_model_version(ckpt) is ModelVersion.SD15
+def test_guidance_with_non_sd15_dim_is_rejected(tmp_path):
+    ckpt = _write_ckpt(tmp_path / "sdxl_lcm.safetensors", 2048, guidance=True)
+    with pytest.raises(ValueError, match="only SD1.5-class LCM"):
+        detect_model_version(ckpt)
+def test_unknown_cross_attention_dim_is_rejected(tmp_path):
+    # 1024 is SD2.x — unsupported; the error must name the observed dim.
+    ckpt = _write_ckpt(tmp_path / "sd2.safetensors", 1024)
+    with pytest.raises(ValueError, match="cross_attention_dim=1024"):
+        detect_model_version(ckpt)
+def test_no_cross_attention_weights_is_rejected(tmp_path):
+    ckpt = _write_ckpt(tmp_path / "weird.safetensors", 768, with_cross_attn=False)
+    with pytest.raises(ValueError, match="no cross-attention"):
+        detect_model_version(ckpt)
+def test_non_safetensors_is_rejected(tmp_path):
+    bogus = tmp_path / "model.ckpt"
+    bogus.write_bytes(b"not safetensors")
+    with pytest.raises(ValueError, match="not a readable safetensors"):
+        detect_model_version(str(bogus))

coreml-diffusion 0.1.4__tar.gz → 0.1.6__tar.gz

coreml-diffusion 0.1.4tar.gz → 0.1.6tar.gz