coreml-diffusion 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. coreml_diffusion-0.1.1/.github/workflows/tier2.yml +74 -0
  2. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/.gitignore +1 -0
  3. coreml_diffusion-0.1.1/PKG-INFO +135 -0
  4. coreml_diffusion-0.1.1/README.md +106 -0
  5. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/__init__.py +6 -0
  6. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/cli.py +59 -2
  7. coreml_diffusion-0.1.1/coreml_diffusion/inference.py +176 -0
  8. coreml_diffusion-0.1.1/coreml_diffusion/sources.py +170 -0
  9. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/pyproject.toml +12 -5
  10. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/conftest.py +0 -2
  11. coreml_diffusion-0.1.1/tests/m2/goldens/sd15_astronaut.png +0 -0
  12. coreml_diffusion-0.1.1/tests/m2/goldens/sd15_astronaut.sha256 +1 -0
  13. coreml_diffusion-0.1.1/tests/m2/test_inference_golden.py +111 -0
  14. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_cli.py +8 -0
  15. coreml_diffusion-0.1.1/tests/unit/test_sources.py +103 -0
  16. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/uv.lock +13 -93
  17. coreml_diffusion-0.1.0/PKG-INFO +0 -98
  18. coreml_diffusion-0.1.0/README.md +0 -69
  19. coreml_diffusion-0.1.0/tests/inference/test_pipeline_inference.py +0 -26
  20. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/.github/workflows/publish-pypi.yml +0 -0
  21. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/.github/workflows/tier0.yml +0 -0
  22. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/.github/workflows/tier1.yml +0 -0
  23. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/LICENSE +0 -0
  24. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/attention.py +0 -0
  25. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/__init__.py +0 -0
  26. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/attention.py +0 -0
  27. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/shapes.py +0 -0
  28. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/trace.py +0 -0
  29. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/unet.py +0 -0
  30. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/convert.py +0 -0
  31. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/logger.py +0 -0
  32. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/model_version.py +0 -0
  33. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/naming.py +0 -0
  34. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/smoke/test_split_einsum_attention.py +0 -0
  35. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/smoke/test_synthetic_unet.py +0 -0
  36. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_characterization_out_name.py +0 -0
  37. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_conversion_helpers.py +0 -0
  38. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_discovery_api.py +0 -0
  39. {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_tier0_purity.py +0 -0
@@ -0,0 +1,74 @@
1
+ name: Tier 2 — M2 / ANE (self-hosted)
2
+
3
+ on:
4
+ pull_request:
5
+ # `labeled` fires when run-m2 is first added; `synchronize`/`reopened`
6
+ # re-run on every subsequent push while the label is present, so the result
7
+ # tracks the PR head instead of going stale. The `if` below keeps the run
8
+ # gated on the run-m2 label for all pull_request events.
9
+ types: [labeled, synchronize, reopened]
10
+ schedule:
11
+ # Nightly at 04:00 UTC (~05/06 in PL). Keeps the ANE path honest without
12
+ # burning the runner on every PR.
13
+ - cron: "0 4 * * *"
14
+ workflow_dispatch:
15
+
16
+ jobs:
17
+ m2:
18
+ if: |
19
+ github.event_name == 'schedule' ||
20
+ github.event_name == 'workflow_dispatch' ||
21
+ (github.event_name == 'pull_request' &&
22
+ contains(github.event.pull_request.labels.*.name, 'run-m2'))
23
+ # Self-hosted Apple Silicon runner (shared with ComfyUI-CoreMLSuite's Tier 2,
24
+ # same `coreml` label). The runner's environment MUST export
25
+ # COREML_DIFFUSION_TEST_CKPT: an absolute path to a cached single-file SD1.5
26
+ # checkpoint. The gate converts it fresh and runs the comfy-free inference
27
+ # golden — no ComfyUI involved.
28
+ runs-on: [self-hosted, macOS, ARM64, coreml]
29
+ timeout-minutes: 90
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+
33
+ - uses: astral-sh/setup-uv@v7
34
+ with:
35
+ enable-cache: true
36
+
37
+ - name: uv sync
38
+ run: uv sync
39
+
40
+ - name: Check cached checkpoint
41
+ # The runner's .env must export COREML_DIFFUSION_TEST_CKPT — an absolute
42
+ # path to a cached single-file SD1.5 checkpoint.
43
+ run: |
44
+ set -euo pipefail
45
+ if [ -z "${COREML_DIFFUSION_TEST_CKPT:-}" ]; then
46
+ echo "COREML_DIFFUSION_TEST_CKPT unset — add it to the runner's .env."
47
+ exit 1
48
+ fi
49
+ test -f "$COREML_DIFFUSION_TEST_CKPT" || {
50
+ echo "checkpoint not found: $COREML_DIFFUSION_TEST_CKPT"; exit 1; }
51
+ echo "Tier 2: checkpoint \`$COREML_DIFFUSION_TEST_CKPT\`" >> "$GITHUB_STEP_SUMMARY"
52
+
53
+ - name: Convert UNet fresh (batch=2 for CFG)
54
+ # Convert on every run. The .mlpackage cache key is conversion
55
+ # *parameters* only, not the conversion code or toolchain — a stale model
56
+ # would let a conversion regression pass. batch=2 because guided CFG feeds
57
+ # uncond+cond in a single forward pass, and ANE input shapes are fixed at
58
+ # convert time.
59
+ run: |
60
+ set -euo pipefail
61
+ MLPKG="$RUNNER_TEMP/sd15_b2.mlpackage"
62
+ rm -rf "$MLPKG"
63
+ uv run coreml-diffusion convert \
64
+ --ckpt "$COREML_DIFFUSION_TEST_CKPT" \
65
+ --model-version SD15 \
66
+ --out "$MLPKG" \
67
+ --batch-size 2 --height 512 --width 512 --attn-impl SPLIT_EINSUM
68
+ echo "COREML_DIFFUSION_TEST_MLPACKAGE=$MLPKG" >> "$GITHUB_ENV"
69
+
70
+ - name: Run Tier 2 (m2 marker)
71
+ # Builds a stock diffusers pipeline around the converted UNet and asserts
72
+ # the generated image against the committed golden (exact match, else
73
+ # PSNR >= GOLDEN_PSNR_MIN_DB). VAE/text encoder on torch, UNet on the ANE.
74
+ run: uv run --no-sync pytest -m m2 tests/ -v
@@ -6,3 +6,4 @@ __pycache__/
6
6
  dist/
7
7
  build/
8
8
  *.egg-info/
9
+ CLAUDE.md
@@ -0,0 +1,135 @@
1
+ Metadata-Version: 2.4
2
+ Name: coreml-diffusion
3
+ Version: 0.1.1
4
+ Summary: Convert diffusion-model checkpoints (SD1.5/SDXL) to Core ML for Apple Neural Engine — framework-free, ComfyUI-independent.
5
+ Project-URL: Homepage, https://github.com/aszc-dev/coreml-diffusion
6
+ Project-URL: Repository, https://github.com/aszc-dev/coreml-diffusion
7
+ Project-URL: Issues, https://github.com/aszc-dev/coreml-diffusion/issues
8
+ Author-email: Adrian Szczepański <hi@aszc.dev>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: ane,apple-neural-engine,comfyui,core-ml,coreml,diffusers,diffusion,sdxl,stable-diffusion
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: MacOS
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: <3.13,>=3.12
21
+ Requires-Dist: coremltools<10,>=9
22
+ Requires-Dist: diffusers>=0.30
23
+ Requires-Dist: numpy<3,>=2
24
+ Requires-Dist: omegaconf>=2.3
25
+ Requires-Dist: peft>=0.13
26
+ Requires-Dist: torch>=2.7
27
+ Requires-Dist: transformers<5,>=4.44
28
+ Description-Content-Type: text/markdown
29
+
30
+ # coreml-diffusion
31
+
32
+ Convert diffusion-model checkpoints into Core ML `.mlpackage` artifacts for the
33
+ Apple Neural Engine (ANE) — framework-free and standalone.
34
+
35
+ `coreml-diffusion` takes a single-file Stable Diffusion checkpoint and produces a
36
+ Core ML UNet you can run on-device (macOS/iOS) via Core ML, in a Python pipeline,
37
+ or load into any host that consumes the artifact.
38
+
39
+ ## What this is
40
+
41
+ A standalone toolkit and knowledge base for running diffusion models on the Apple
42
+ Neural Engine via Core ML. The niche is **diffusion on the ANE**: low-power,
43
+ GPU-free, embeddable in a Swift/iOS app. ANE is the differentiator — this is about
44
+ feasibility and power efficiency for SD1.5/SDXL on ANE, not a raw-throughput claim
45
+ against desktop GPUs.
46
+
47
+ The scope is diffusion architectures generally, not Stable Diffusion specifically.
48
+ The project aims to gather, in one place: the conversion path, a reproducible
49
+ benchmarking suite for objective comparison, a per-model catalogue documenting the
50
+ quirks of each architecture on the ANE, and the sources behind it all.
51
+
52
+ Supported today: SD1.5 and SDXL (verified). SDXL refiner and LCM convert but are
53
+ not yet golden-verified (experimental).
54
+
55
+ ## Install
56
+
57
+ ```sh
58
+ uv pip install coreml-diffusion # from PyPI
59
+ uv pip install -e . # from a checkout
60
+ ```
61
+
62
+ Requires Python 3.12 and (for conversion) `coremltools` 9 — conversion runs on
63
+ macOS; the package imports and its CLI parse on any platform.
64
+
65
+ ## CLI
66
+
67
+ ```sh
68
+ coreml-diffusion convert \
69
+ --ckpt path/to/model.safetensors \
70
+ --model-version SD15 \
71
+ --out unet.mlpackage \
72
+ --height 512 --width 512 \
73
+ --attn-impl SPLIT_EINSUM \
74
+ --quantize none
75
+ ```
76
+
77
+ Options: `--batch-size`, `--controlnet`, `--lora PATH[:STRENGTH]` (repeatable),
78
+ `--config` (original-config YAML). `--quantize {none,8,6,4}` applies k-means
79
+ weight palettization. Run `coreml-diffusion convert --help` for the full list.
80
+
81
+ The output `.mlpackage` is the deliverable: load it natively in Swift/Core ML, run
82
+ it through the Python inference pipeline below, or hand it to any consuming host.
83
+
84
+ ### Model sources
85
+
86
+ Register directories so `--ckpt` accepts a bare name instead of a full path:
87
+
88
+ ```sh
89
+ coreml-diffusion sources add comfy /path/to/ComfyUI/models # --kind comfy|flat
90
+ coreml-diffusion sources list # sources + checkpoints
91
+ coreml-diffusion convert --ckpt v1-5-pruned-emaonly --model-version SD15 --out unet.mlpackage
92
+ ```
93
+
94
+ Sources are recorded in `~/.config/coreml-diffusion/sources.toml`. `comfy` knows the
95
+ `models/{checkpoints,loras,vae,...}` layout; `flat` is a plain checkpoint directory.
96
+
97
+ ## Library
98
+
99
+ ```python
100
+ import coreml_diffusion
101
+ from coreml_diffusion import ModelVersion
102
+
103
+ coreml_diffusion.convert(
104
+ "model.safetensors", ModelVersion.SD15, "unet.mlpackage",
105
+ height=512, width=512, attn_impl="SPLIT_EINSUM",
106
+ )
107
+ ```
108
+
109
+ ## Inference (in progress)
110
+
111
+ A framework-free inference path lets a converted `.mlpackage` generate images with
112
+ no host framework: a `diffusers` pipeline runs the stock VAE / text encoder on
113
+ torch while the UNet is served from Core ML on the ANE. This doubles as the
114
+ package's own regression anchor — the Tier 2 (`m2`) golden image, asserted on an
115
+ Apple Silicon runner — and as the reference for the on-device write-up. See
116
+ `tests/m2/`.
117
+
118
+ ## Discovery API
119
+
120
+ `list_model_versions`, `list_attention_impls`, `list_quant_modes`, and
121
+ `CONTRACT_VERSION` report what this build can convert. The identifiers are an
122
+ additive-only contract: removing or renaming one is a major version bump, because
123
+ downstream consumers reference these strings verbatim.
124
+
125
+ ## ComfyUI
126
+
127
+ [ComfyUI-CoreMLSuite](https://github.com/aszc-dev/ComfyUI-CoreMLSuite) consumes
128
+ this package for its conversion path and drives its node dropdowns from the
129
+ discovery API above — installing a newer `coreml-diffusion` surfaces new
130
+ conversion types in the node with no Suite change. The Suite is one consumer;
131
+ this package neither depends on nor requires ComfyUI.
132
+
133
+ ## License
134
+
135
+ MIT
@@ -0,0 +1,106 @@
1
+ # coreml-diffusion
2
+
3
+ Convert diffusion-model checkpoints into Core ML `.mlpackage` artifacts for the
4
+ Apple Neural Engine (ANE) — framework-free and standalone.
5
+
6
+ `coreml-diffusion` takes a single-file Stable Diffusion checkpoint and produces a
7
+ Core ML UNet you can run on-device (macOS/iOS) via Core ML, in a Python pipeline,
8
+ or load into any host that consumes the artifact.
9
+
10
+ ## What this is
11
+
12
+ A standalone toolkit and knowledge base for running diffusion models on the Apple
13
+ Neural Engine via Core ML. The niche is **diffusion on the ANE**: low-power,
14
+ GPU-free, embeddable in a Swift/iOS app. ANE is the differentiator — this is about
15
+ feasibility and power efficiency for SD1.5/SDXL on ANE, not a raw-throughput claim
16
+ against desktop GPUs.
17
+
18
+ The scope is diffusion architectures generally, not Stable Diffusion specifically.
19
+ The project aims to gather, in one place: the conversion path, a reproducible
20
+ benchmarking suite for objective comparison, a per-model catalogue documenting the
21
+ quirks of each architecture on the ANE, and the sources behind it all.
22
+
23
+ Supported today: SD1.5 and SDXL (verified). SDXL refiner and LCM convert but are
24
+ not yet golden-verified (experimental).
25
+
26
+ ## Install
27
+
28
+ ```sh
29
+ uv pip install coreml-diffusion # from PyPI
30
+ uv pip install -e . # from a checkout
31
+ ```
32
+
33
+ Requires Python 3.12 and (for conversion) `coremltools` 9 — conversion runs on
34
+ macOS; the package imports and its CLI parse on any platform.
35
+
36
+ ## CLI
37
+
38
+ ```sh
39
+ coreml-diffusion convert \
40
+ --ckpt path/to/model.safetensors \
41
+ --model-version SD15 \
42
+ --out unet.mlpackage \
43
+ --height 512 --width 512 \
44
+ --attn-impl SPLIT_EINSUM \
45
+ --quantize none
46
+ ```
47
+
48
+ Options: `--batch-size`, `--controlnet`, `--lora PATH[:STRENGTH]` (repeatable),
49
+ `--config` (original-config YAML). `--quantize {none,8,6,4}` applies k-means
50
+ weight palettization. Run `coreml-diffusion convert --help` for the full list.
51
+
52
+ The output `.mlpackage` is the deliverable: load it natively in Swift/Core ML, run
53
+ it through the Python inference pipeline below, or hand it to any consuming host.
54
+
55
+ ### Model sources
56
+
57
+ Register directories so `--ckpt` accepts a bare name instead of a full path:
58
+
59
+ ```sh
60
+ coreml-diffusion sources add comfy /path/to/ComfyUI/models # --kind comfy|flat
61
+ coreml-diffusion sources list # sources + checkpoints
62
+ coreml-diffusion convert --ckpt v1-5-pruned-emaonly --model-version SD15 --out unet.mlpackage
63
+ ```
64
+
65
+ Sources are recorded in `~/.config/coreml-diffusion/sources.toml`. `comfy` knows the
66
+ `models/{checkpoints,loras,vae,...}` layout; `flat` is a plain checkpoint directory.
67
+
68
+ ## Library
69
+
70
+ ```python
71
+ import coreml_diffusion
72
+ from coreml_diffusion import ModelVersion
73
+
74
+ coreml_diffusion.convert(
75
+ "model.safetensors", ModelVersion.SD15, "unet.mlpackage",
76
+ height=512, width=512, attn_impl="SPLIT_EINSUM",
77
+ )
78
+ ```
79
+
80
+ ## Inference (in progress)
81
+
82
+ A framework-free inference path lets a converted `.mlpackage` generate images with
83
+ no host framework: a `diffusers` pipeline runs the stock VAE / text encoder on
84
+ torch while the UNet is served from Core ML on the ANE. This doubles as the
85
+ package's own regression anchor — the Tier 2 (`m2`) golden image, asserted on an
86
+ Apple Silicon runner — and as the reference for the on-device write-up. See
87
+ `tests/m2/`.
88
+
89
+ ## Discovery API
90
+
91
+ `list_model_versions`, `list_attention_impls`, `list_quant_modes`, and
92
+ `CONTRACT_VERSION` report what this build can convert. The identifiers are an
93
+ additive-only contract: removing or renaming one is a major version bump, because
94
+ downstream consumers reference these strings verbatim.
95
+
96
+ ## ComfyUI
97
+
98
+ [ComfyUI-CoreMLSuite](https://github.com/aszc-dev/ComfyUI-CoreMLSuite) consumes
99
+ this package for its conversion path and drives its node dropdowns from the
100
+ discovery API above — installing a newer `coreml-diffusion` surfaces new
101
+ conversion types in the node with no Suite change. The Suite is one consumer;
102
+ this package neither depends on nor requires ComfyUI.
103
+
104
+ ## License
105
+
106
+ MIT
@@ -40,6 +40,8 @@ __all__ = [
40
40
  "compose_out_name",
41
41
  "lora_names_from_params",
42
42
  "convert",
43
+ "build_pipeline",
44
+ "CoreMLUNet",
43
45
  ]
44
46
 
45
47
 
@@ -105,4 +107,8 @@ def __getattr__(name):
105
107
  from coreml_diffusion.convert import convert as _convert
106
108
 
107
109
  return _convert
110
+ if name in ("build_pipeline", "CoreMLUNet"):
111
+ from coreml_diffusion import inference
112
+
113
+ return getattr(inference, name)
108
114
  raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -14,6 +14,7 @@ Example:
14
14
  import argparse
15
15
 
16
16
  import coreml_diffusion
17
+ from coreml_diffusion import sources
17
18
 
18
19
 
19
20
  def _parse_lora(spec):
@@ -31,8 +32,9 @@ def _parse_lora(spec):
31
32
  def _convert_cmd(args):
32
33
  sample_size = (args.height // 8, args.width // 8)
33
34
  lora_weights = [_parse_lora(spec) for spec in (args.lora or [])]
35
+ ckpt = sources.resolve_checkpoint(args.ckpt, args.source)
34
36
  coreml_diffusion.convert(
35
- args.ckpt,
37
+ ckpt,
36
38
  coreml_diffusion.ModelVersion[args.model_version],
37
39
  args.out,
38
40
  batch_size=args.batch_size,
@@ -45,6 +47,31 @@ def _convert_cmd(args):
45
47
  )
46
48
 
47
49
 
50
+ def _sources_add_cmd(args):
51
+ entry = sources.add_source(args.name, args.path, args.kind)
52
+ print(f"Added source {args.name!r} ({entry['kind']}): {entry['path']}")
53
+
54
+
55
+ def _sources_list_cmd(args):
56
+ registered = sources.load_sources()
57
+ if not registered:
58
+ print(f"No sources registered. Config: {sources.config_path()}")
59
+ return
60
+ for name, entry in sorted(registered.items()):
61
+ ckpts = sources.iter_checkpoints(entry)
62
+ print(f"{name} ({entry['kind']}): {entry['path']}")
63
+ if ckpts:
64
+ for stem in ckpts:
65
+ print(f" - {stem}")
66
+ else:
67
+ print(" (no checkpoints found)")
68
+
69
+
70
+ def _sources_remove_cmd(args):
71
+ sources.remove_source(args.name)
72
+ print(f"Removed source {args.name!r}")
73
+
74
+
48
75
  def build_parser():
49
76
  parser = argparse.ArgumentParser(
50
77
  prog="coreml-diffusion",
@@ -54,7 +81,15 @@ def build_parser():
54
81
 
55
82
  conv = sub.add_parser("convert", help="Convert a checkpoint's UNet to a .mlpackage")
56
83
  conv.add_argument(
57
- "--ckpt", required=True, help="Path to the source .safetensors checkpoint"
84
+ "--ckpt",
85
+ required=True,
86
+ help="Checkpoint path, or a name resolved against registered sources "
87
+ "(see 'coreml-diffusion sources')",
88
+ )
89
+ conv.add_argument(
90
+ "--source",
91
+ default=None,
92
+ help="Restrict --ckpt name resolution to this registered source",
58
93
  )
59
94
  conv.add_argument(
60
95
  "--model-version",
@@ -101,6 +136,28 @@ def build_parser():
101
136
  help="K-means weight palettization bits (default none = unquantized)",
102
137
  )
103
138
  conv.set_defaults(func=_convert_cmd)
139
+
140
+ src = sub.add_parser("sources", help="Manage model source directories")
141
+ src_sub = src.add_subparsers(dest="sources_command", required=True)
142
+
143
+ s_add = src_sub.add_parser("add", help="Register (or overwrite) a model source")
144
+ s_add.add_argument("name", help="Short name for the source, e.g. 'comfy'")
145
+ s_add.add_argument("path", help="Base directory of the source")
146
+ s_add.add_argument(
147
+ "--kind",
148
+ choices=sources.SOURCE_KINDS,
149
+ default="comfy",
150
+ help="Directory layout (default comfy: models/{checkpoints,loras,vae,...})",
151
+ )
152
+ s_add.set_defaults(func=_sources_add_cmd)
153
+
154
+ s_list = src_sub.add_parser("list", help="List sources and their checkpoints")
155
+ s_list.set_defaults(func=_sources_list_cmd)
156
+
157
+ s_rm = src_sub.add_parser("remove", help="Unregister a source")
158
+ s_rm.add_argument("name", help="Source name to remove")
159
+ s_rm.set_defaults(func=_sources_remove_cmd)
160
+
104
161
  return parser
105
162
 
106
163
 
@@ -0,0 +1,176 @@
1
+ """Framework-free inference for a converted Core ML UNet.
2
+
3
+ Runs a `.mlpackage` UNet inside a stock ``diffusers`` pipeline with NO ComfyUI:
4
+ the VAE and text encoder run on torch (CPU/MPS) while the UNet is served from
5
+ Core ML on the ANE. This is the package-side counterpart to ``convert`` — it
6
+ proves a converted artifact generates an image end-to-end, and provides the
7
+ Tier 2 (``m2``) golden anchor (``tests/m2/``).
8
+
9
+ ``CoreMLUNet`` is the inverse of ``conversion.unet.CoreMLUNetWrapper``: the
10
+ wrapper adapts diffusers UNet *inputs* into the flat Core ML tensor contract at
11
+ convert time; this adapter unpacks a diffusers call back into that same flat
12
+ contract at run time, feeds it to ``coremltools``, and re-wraps the output as a
13
+ diffusers ``UNet2DConditionOutput``.
14
+
15
+ Core ML input contract (see ``convert.convert_unet``): every input is float16.
16
+ sample (B,C,H,W), timestep (B,), encoder_hidden_states (B,77,cross_dim);
17
+ + LCM: timestep_cond (B,256)
18
+ + SDXL: time_ids (B,6), text_embeds (B,proj_dim)
19
+ + ctrl: additional_residual_0..N
20
+ Single output ``noise_pred`` (float32).
21
+
22
+ The traced model has FIXED input shapes: the batch and resolution are baked in
23
+ at conversion. Classifier-free guidance feeds a batch of 2 (uncond+cond), so the
24
+ artifact must be converted with ``batch_size=2`` to run a guided pipeline in one
25
+ forward pass. Height/width must match the converted ``sample_size``.
26
+ """
27
+
28
+ import numpy as np
29
+ import torch
30
+
31
+ from coreml_diffusion.logger import logger
32
+ from coreml_diffusion.model_version import ModelVersion
33
+
34
+ # Compute-unit default: let Core ML place ops on the ANE (falling back to CPU for
35
+ # unsupported ops). Override per call for A/B against CPU_ONLY / CPU_AND_GPU.
36
+ DEFAULT_COMPUTE_UNIT = "CPU_AND_NE"
37
+
38
+
39
+ def _f16(tensor):
40
+ """diffusers tensor -> contiguous float16 numpy, matching the trace dtype."""
41
+ return np.ascontiguousarray(tensor.detach().to(torch.float16).cpu().numpy())
42
+
43
+
44
+ class CoreMLUNet(torch.nn.Module):
45
+ """A ``UNet2DConditionModel`` stand-in backed by a Core ML ``.mlpackage``.
46
+
47
+ Carries the reference UNet's ``config`` so the surrounding diffusers pipeline
48
+ reads the right ``in_channels`` / ``addition_embed_type`` / ``time_cond_proj_dim``
49
+ etc., but runs the forward pass through coremltools instead of torch.
50
+ """
51
+
52
+ def __init__(
53
+ self, mlpackage_path, ref_unet, model_version, compute_unit=DEFAULT_COMPUTE_UNIT
54
+ ):
55
+ super().__init__()
56
+ import coremltools as ct
57
+
58
+ self.config = ref_unet.config
59
+ self.dtype = torch.float16
60
+ self.model_version = model_version
61
+
62
+ unit = (
63
+ compute_unit
64
+ if isinstance(compute_unit, ct.ComputeUnit)
65
+ else ct.ComputeUnit[compute_unit]
66
+ )
67
+ logger.info(f"Loading {mlpackage_path} to {unit.name}")
68
+ self.model = ct.models.MLModel(mlpackage_path, compute_units=unit)
69
+ self._output_name = self.model.get_spec().description.output[0].name
70
+
71
+ @property
72
+ def _is_lcm(self):
73
+ return self.model_version is ModelVersion.LCM
74
+
75
+ @property
76
+ def _is_sdxl(self):
77
+ return self.model_version in {ModelVersion.SDXL, ModelVersion.SDXL_REFINER}
78
+
79
+ def forward(
80
+ self,
81
+ sample,
82
+ timestep,
83
+ encoder_hidden_states,
84
+ timestep_cond=None,
85
+ added_cond_kwargs=None,
86
+ down_block_additional_residuals=None,
87
+ mid_block_additional_residual=None,
88
+ return_dict=True,
89
+ **_ignored,
90
+ ):
91
+ batch = sample.shape[0]
92
+
93
+ # timestep arrives as a python scalar, 0-dim, or (1,) tensor; the trace
94
+ # baked a (B,) timestep, so broadcast to the batch.
95
+ ts = torch.as_tensor(timestep, dtype=torch.float32).reshape(-1)
96
+ if ts.numel() == 1:
97
+ ts = ts.expand(batch)
98
+
99
+ inputs = {
100
+ "sample": _f16(sample),
101
+ "timestep": _f16(ts),
102
+ "encoder_hidden_states": _f16(encoder_hidden_states),
103
+ }
104
+
105
+ if self._is_lcm:
106
+ if timestep_cond is None:
107
+ raise ValueError(
108
+ "LCM UNet requires timestep_cond (guidance embedding)."
109
+ )
110
+ inputs["timestep_cond"] = _f16(timestep_cond)
111
+
112
+ if self._is_sdxl:
113
+ if not added_cond_kwargs or "time_ids" not in added_cond_kwargs:
114
+ raise ValueError(
115
+ "SDXL UNet requires added_cond_kwargs with time_ids/text_embeds."
116
+ )
117
+ inputs["time_ids"] = _f16(added_cond_kwargs["time_ids"])
118
+ inputs["text_embeds"] = _f16(added_cond_kwargs["text_embeds"])
119
+
120
+ if down_block_additional_residuals is not None:
121
+ residuals = list(down_block_additional_residuals)
122
+ if mid_block_additional_residual is not None:
123
+ residuals.append(mid_block_additional_residual)
124
+ for i, residual in enumerate(residuals):
125
+ inputs[f"additional_residual_{i}"] = _f16(residual)
126
+
127
+ prediction = self.model.predict(inputs)[self._output_name]
128
+ noise_pred = torch.from_numpy(np.ascontiguousarray(prediction)).to(
129
+ sample.device
130
+ )
131
+
132
+ if not return_dict:
133
+ return (noise_pred,)
134
+ from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
135
+
136
+ return UNet2DConditionOutput(sample=noise_pred)
137
+
138
+
139
+ # Stock diffusers pipeline per verified model. Experimental versions are convertible
140
+ # but their pipelines are not wired here yet.
141
+ _PIPELINE_IMPORTS = {
142
+ ModelVersion.SD15: ("diffusers", "StableDiffusionPipeline"),
143
+ ModelVersion.SDXL: ("diffusers", "StableDiffusionXLPipeline"),
144
+ }
145
+
146
+
147
+ def build_pipeline(
148
+ ckpt_path,
149
+ mlpackage_path,
150
+ model_version,
151
+ *,
152
+ compute_unit=DEFAULT_COMPUTE_UNIT,
153
+ torch_device="cpu",
154
+ **from_single_file_kwargs,
155
+ ):
156
+ """Load a stock diffusers pipeline from ``ckpt_path`` and swap in the Core ML UNet.
157
+
158
+ The VAE / text encoder / scheduler come from the same checkpoint and run on
159
+ ``torch_device``; only the UNet is served from ``mlpackage_path`` via Core ML.
160
+ Returns the pipeline ready to call. Wired for SD15 and SDXL; the golden
161
+ anchor that verifies output is captured by the Tier 2 (``m2``) test tier.
162
+ """
163
+ if model_version not in _PIPELINE_IMPORTS:
164
+ raise NotImplementedError(
165
+ f"No inference pipeline wired for {model_version.name}; "
166
+ f"supported: {[v.name for v in _PIPELINE_IMPORTS]}."
167
+ )
168
+ import importlib
169
+
170
+ module_name, class_name = _PIPELINE_IMPORTS[model_version]
171
+ pipeline_cls = getattr(importlib.import_module(module_name), class_name)
172
+
173
+ pipe = pipeline_cls.from_single_file(ckpt_path, **from_single_file_kwargs)
174
+ pipe.to(torch_device)
175
+ pipe.unet = CoreMLUNet(mlpackage_path, pipe.unet, model_version, compute_unit)
176
+ return pipe