coreml-diffusion 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coreml_diffusion-0.1.1/.github/workflows/tier2.yml +74 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/.gitignore +1 -0
- coreml_diffusion-0.1.1/PKG-INFO +135 -0
- coreml_diffusion-0.1.1/README.md +106 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/__init__.py +6 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/cli.py +59 -2
- coreml_diffusion-0.1.1/coreml_diffusion/inference.py +176 -0
- coreml_diffusion-0.1.1/coreml_diffusion/sources.py +170 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/pyproject.toml +12 -5
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/conftest.py +0 -2
- coreml_diffusion-0.1.1/tests/m2/goldens/sd15_astronaut.png +0 -0
- coreml_diffusion-0.1.1/tests/m2/goldens/sd15_astronaut.sha256 +1 -0
- coreml_diffusion-0.1.1/tests/m2/test_inference_golden.py +111 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_cli.py +8 -0
- coreml_diffusion-0.1.1/tests/unit/test_sources.py +103 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/uv.lock +13 -93
- coreml_diffusion-0.1.0/PKG-INFO +0 -98
- coreml_diffusion-0.1.0/README.md +0 -69
- coreml_diffusion-0.1.0/tests/inference/test_pipeline_inference.py +0 -26
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/.github/workflows/publish-pypi.yml +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/.github/workflows/tier0.yml +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/.github/workflows/tier1.yml +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/LICENSE +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/attention.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/__init__.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/attention.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/shapes.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/trace.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/conversion/unet.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/convert.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/logger.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/model_version.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/coreml_diffusion/naming.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/smoke/test_split_einsum_attention.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/smoke/test_synthetic_unet.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_characterization_out_name.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_conversion_helpers.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_discovery_api.py +0 -0
- {coreml_diffusion-0.1.0 → coreml_diffusion-0.1.1}/tests/unit/test_tier0_purity.py +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
name: Tier 2 — M2 / ANE (self-hosted)
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
# `labeled` fires when run-m2 is first added; `synchronize`/`reopened`
|
|
6
|
+
# re-run on every subsequent push while the label is present, so the result
|
|
7
|
+
# tracks the PR head instead of going stale. The `if` below keeps the run
|
|
8
|
+
# gated on the run-m2 label for all pull_request events.
|
|
9
|
+
types: [labeled, synchronize, reopened]
|
|
10
|
+
schedule:
|
|
11
|
+
# Nightly at 04:00 UTC (~05/06 in PL). Keeps the ANE path honest without
|
|
12
|
+
# burning the runner on every PR.
|
|
13
|
+
- cron: "0 4 * * *"
|
|
14
|
+
workflow_dispatch:
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
m2:
|
|
18
|
+
if: |
|
|
19
|
+
github.event_name == 'schedule' ||
|
|
20
|
+
github.event_name == 'workflow_dispatch' ||
|
|
21
|
+
(github.event_name == 'pull_request' &&
|
|
22
|
+
contains(github.event.pull_request.labels.*.name, 'run-m2'))
|
|
23
|
+
# Self-hosted Apple Silicon runner (shared with ComfyUI-CoreMLSuite's Tier 2,
|
|
24
|
+
# same `coreml` label). The runner's environment MUST export
|
|
25
|
+
# COREML_DIFFUSION_TEST_CKPT: an absolute path to a cached single-file SD1.5
|
|
26
|
+
# checkpoint. The gate converts it fresh and runs the comfy-free inference
|
|
27
|
+
# golden — no ComfyUI involved.
|
|
28
|
+
runs-on: [self-hosted, macOS, ARM64, coreml]
|
|
29
|
+
timeout-minutes: 90
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/checkout@v4
|
|
32
|
+
|
|
33
|
+
- uses: astral-sh/setup-uv@v7
|
|
34
|
+
with:
|
|
35
|
+
enable-cache: true
|
|
36
|
+
|
|
37
|
+
- name: uv sync
|
|
38
|
+
run: uv sync
|
|
39
|
+
|
|
40
|
+
- name: Check cached checkpoint
|
|
41
|
+
# The runner's .env must export COREML_DIFFUSION_TEST_CKPT — an absolute
|
|
42
|
+
# path to a cached single-file SD1.5 checkpoint.
|
|
43
|
+
run: |
|
|
44
|
+
set -euo pipefail
|
|
45
|
+
if [ -z "${COREML_DIFFUSION_TEST_CKPT:-}" ]; then
|
|
46
|
+
echo "COREML_DIFFUSION_TEST_CKPT unset — add it to the runner's .env."
|
|
47
|
+
exit 1
|
|
48
|
+
fi
|
|
49
|
+
test -f "$COREML_DIFFUSION_TEST_CKPT" || {
|
|
50
|
+
echo "checkpoint not found: $COREML_DIFFUSION_TEST_CKPT"; exit 1; }
|
|
51
|
+
echo "Tier 2: checkpoint \`$COREML_DIFFUSION_TEST_CKPT\`" >> "$GITHUB_STEP_SUMMARY"
|
|
52
|
+
|
|
53
|
+
- name: Convert UNet fresh (batch=2 for CFG)
|
|
54
|
+
# Convert on every run. The .mlpackage cache key is conversion
|
|
55
|
+
# *parameters* only, not the conversion code or toolchain — a stale model
|
|
56
|
+
# would let a conversion regression pass. batch=2 because guided CFG feeds
|
|
57
|
+
# uncond+cond in a single forward pass, and ANE input shapes are fixed at
|
|
58
|
+
# convert time.
|
|
59
|
+
run: |
|
|
60
|
+
set -euo pipefail
|
|
61
|
+
MLPKG="$RUNNER_TEMP/sd15_b2.mlpackage"
|
|
62
|
+
rm -rf "$MLPKG"
|
|
63
|
+
uv run coreml-diffusion convert \
|
|
64
|
+
--ckpt "$COREML_DIFFUSION_TEST_CKPT" \
|
|
65
|
+
--model-version SD15 \
|
|
66
|
+
--out "$MLPKG" \
|
|
67
|
+
--batch-size 2 --height 512 --width 512 --attn-impl SPLIT_EINSUM
|
|
68
|
+
echo "COREML_DIFFUSION_TEST_MLPACKAGE=$MLPKG" >> "$GITHUB_ENV"
|
|
69
|
+
|
|
70
|
+
- name: Run Tier 2 (m2 marker)
|
|
71
|
+
# Builds a stock diffusers pipeline around the converted UNet and asserts
|
|
72
|
+
# the generated image against the committed golden (exact match, else
|
|
73
|
+
# PSNR >= GOLDEN_PSNR_MIN_DB). VAE/text encoder on torch, UNet on the ANE.
|
|
74
|
+
run: uv run --no-sync pytest -m m2 tests/ -v
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: coreml-diffusion
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Convert diffusion-model checkpoints (SD1.5/SDXL) to Core ML for Apple Neural Engine — framework-free, ComfyUI-independent.
|
|
5
|
+
Project-URL: Homepage, https://github.com/aszc-dev/coreml-diffusion
|
|
6
|
+
Project-URL: Repository, https://github.com/aszc-dev/coreml-diffusion
|
|
7
|
+
Project-URL: Issues, https://github.com/aszc-dev/coreml-diffusion/issues
|
|
8
|
+
Author-email: Adrian Szczepański <hi@aszc.dev>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ane,apple-neural-engine,comfyui,core-ml,coreml,diffusers,diffusion,sdxl,stable-diffusion
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: MacOS
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: <3.13,>=3.12
|
|
21
|
+
Requires-Dist: coremltools<10,>=9
|
|
22
|
+
Requires-Dist: diffusers>=0.30
|
|
23
|
+
Requires-Dist: numpy<3,>=2
|
|
24
|
+
Requires-Dist: omegaconf>=2.3
|
|
25
|
+
Requires-Dist: peft>=0.13
|
|
26
|
+
Requires-Dist: torch>=2.7
|
|
27
|
+
Requires-Dist: transformers<5,>=4.44
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# coreml-diffusion
|
|
31
|
+
|
|
32
|
+
Convert diffusion-model checkpoints into Core ML `.mlpackage` artifacts for the
|
|
33
|
+
Apple Neural Engine (ANE) — framework-free and standalone.
|
|
34
|
+
|
|
35
|
+
`coreml-diffusion` takes a single-file Stable Diffusion checkpoint and produces a
|
|
36
|
+
Core ML UNet you can run on-device (macOS/iOS) via Core ML, in a Python pipeline,
|
|
37
|
+
or load into any host that consumes the artifact.
|
|
38
|
+
|
|
39
|
+
## What this is
|
|
40
|
+
|
|
41
|
+
A standalone toolkit and knowledge base for running diffusion models on the Apple
|
|
42
|
+
Neural Engine via Core ML. The niche is **diffusion on the ANE**: low-power,
|
|
43
|
+
GPU-free, embeddable in a Swift/iOS app. ANE is the differentiator — this is about
|
|
44
|
+
feasibility and power efficiency for SD1.5/SDXL on ANE, not a raw-throughput claim
|
|
45
|
+
against desktop GPUs.
|
|
46
|
+
|
|
47
|
+
The scope is diffusion architectures generally, not Stable Diffusion specifically.
|
|
48
|
+
The project aims to gather, in one place: the conversion path, a reproducible
|
|
49
|
+
benchmarking suite for objective comparison, a per-model catalogue documenting the
|
|
50
|
+
quirks of each architecture on the ANE, and the sources behind it all.
|
|
51
|
+
|
|
52
|
+
Supported today: SD1.5 and SDXL (verified). SDXL refiner and LCM convert but are
|
|
53
|
+
not yet golden-verified (experimental).
|
|
54
|
+
|
|
55
|
+
## Install
|
|
56
|
+
|
|
57
|
+
```sh
|
|
58
|
+
uv pip install coreml-diffusion # from PyPI
|
|
59
|
+
uv pip install -e . # from a checkout
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Requires Python 3.12 and (for conversion) `coremltools` 9 — conversion runs on
|
|
63
|
+
macOS; the package imports and its CLI parse on any platform.
|
|
64
|
+
|
|
65
|
+
## CLI
|
|
66
|
+
|
|
67
|
+
```sh
|
|
68
|
+
coreml-diffusion convert \
|
|
69
|
+
--ckpt path/to/model.safetensors \
|
|
70
|
+
--model-version SD15 \
|
|
71
|
+
--out unet.mlpackage \
|
|
72
|
+
--height 512 --width 512 \
|
|
73
|
+
--attn-impl SPLIT_EINSUM \
|
|
74
|
+
--quantize none
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Options: `--batch-size`, `--controlnet`, `--lora PATH[:STRENGTH]` (repeatable),
|
|
78
|
+
`--config` (original-config YAML). `--quantize {none,8,6,4}` applies k-means
|
|
79
|
+
weight palettization. Run `coreml-diffusion convert --help` for the full list.
|
|
80
|
+
|
|
81
|
+
The output `.mlpackage` is the deliverable: load it natively in Swift/Core ML, run
|
|
82
|
+
it through the Python inference pipeline below, or hand it to any consuming host.
|
|
83
|
+
|
|
84
|
+
### Model sources
|
|
85
|
+
|
|
86
|
+
Register directories so `--ckpt` accepts a bare name instead of a full path:
|
|
87
|
+
|
|
88
|
+
```sh
|
|
89
|
+
coreml-diffusion sources add comfy /path/to/ComfyUI/models # --kind comfy|flat
|
|
90
|
+
coreml-diffusion sources list # sources + checkpoints
|
|
91
|
+
coreml-diffusion convert --ckpt v1-5-pruned-emaonly --model-version SD15 --out unet.mlpackage
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Sources are recorded in `~/.config/coreml-diffusion/sources.toml`. `comfy` knows the
|
|
95
|
+
`models/{checkpoints,loras,vae,...}` layout; `flat` is a plain checkpoint directory.
|
|
96
|
+
|
|
97
|
+
## Library
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import coreml_diffusion
|
|
101
|
+
from coreml_diffusion import ModelVersion
|
|
102
|
+
|
|
103
|
+
coreml_diffusion.convert(
|
|
104
|
+
"model.safetensors", ModelVersion.SD15, "unet.mlpackage",
|
|
105
|
+
height=512, width=512, attn_impl="SPLIT_EINSUM",
|
|
106
|
+
)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Inference (in progress)
|
|
110
|
+
|
|
111
|
+
A framework-free inference path lets a converted `.mlpackage` generate images with
|
|
112
|
+
no host framework: a `diffusers` pipeline runs the stock VAE / text encoder on
|
|
113
|
+
torch while the UNet is served from Core ML on the ANE. This doubles as the
|
|
114
|
+
package's own regression anchor — the Tier 2 (`m2`) golden image, asserted on an
|
|
115
|
+
Apple Silicon runner — and as the reference for the on-device write-up. See
|
|
116
|
+
`tests/m2/`.
|
|
117
|
+
|
|
118
|
+
## Discovery API
|
|
119
|
+
|
|
120
|
+
`list_model_versions`, `list_attention_impls`, `list_quant_modes`, and
|
|
121
|
+
`CONTRACT_VERSION` report what this build can convert. The identifiers are an
|
|
122
|
+
additive-only contract: removing or renaming one is a major version bump, because
|
|
123
|
+
downstream consumers reference these strings verbatim.
|
|
124
|
+
|
|
125
|
+
## ComfyUI
|
|
126
|
+
|
|
127
|
+
[ComfyUI-CoreMLSuite](https://github.com/aszc-dev/ComfyUI-CoreMLSuite) consumes
|
|
128
|
+
this package for its conversion path and drives its node dropdowns from the
|
|
129
|
+
discovery API above — installing a newer `coreml-diffusion` surfaces new
|
|
130
|
+
conversion types in the node with no Suite change. The Suite is one consumer;
|
|
131
|
+
this package neither depends on nor requires ComfyUI.
|
|
132
|
+
|
|
133
|
+
## License
|
|
134
|
+
|
|
135
|
+
MIT
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# coreml-diffusion
|
|
2
|
+
|
|
3
|
+
Convert diffusion-model checkpoints into Core ML `.mlpackage` artifacts for the
|
|
4
|
+
Apple Neural Engine (ANE) — framework-free and standalone.
|
|
5
|
+
|
|
6
|
+
`coreml-diffusion` takes a single-file Stable Diffusion checkpoint and produces a
|
|
7
|
+
Core ML UNet you can run on-device (macOS/iOS) via Core ML, in a Python pipeline,
|
|
8
|
+
or load into any host that consumes the artifact.
|
|
9
|
+
|
|
10
|
+
## What this is
|
|
11
|
+
|
|
12
|
+
A standalone toolkit and knowledge base for running diffusion models on the Apple
|
|
13
|
+
Neural Engine via Core ML. The niche is **diffusion on the ANE**: low-power,
|
|
14
|
+
GPU-free, embeddable in a Swift/iOS app. ANE is the differentiator — this is about
|
|
15
|
+
feasibility and power efficiency for SD1.5/SDXL on ANE, not a raw-throughput claim
|
|
16
|
+
against desktop GPUs.
|
|
17
|
+
|
|
18
|
+
The scope is diffusion architectures generally, not Stable Diffusion specifically.
|
|
19
|
+
The project aims to gather, in one place: the conversion path, a reproducible
|
|
20
|
+
benchmarking suite for objective comparison, a per-model catalogue documenting the
|
|
21
|
+
quirks of each architecture on the ANE, and the sources behind it all.
|
|
22
|
+
|
|
23
|
+
Supported today: SD1.5 and SDXL (verified). SDXL refiner and LCM convert but are
|
|
24
|
+
not yet golden-verified (experimental).
|
|
25
|
+
|
|
26
|
+
## Install
|
|
27
|
+
|
|
28
|
+
```sh
|
|
29
|
+
uv pip install coreml-diffusion # from PyPI
|
|
30
|
+
uv pip install -e . # from a checkout
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Requires Python 3.12 and (for conversion) `coremltools` 9 — conversion runs on
|
|
34
|
+
macOS; the package imports and its CLI parse on any platform.
|
|
35
|
+
|
|
36
|
+
## CLI
|
|
37
|
+
|
|
38
|
+
```sh
|
|
39
|
+
coreml-diffusion convert \
|
|
40
|
+
--ckpt path/to/model.safetensors \
|
|
41
|
+
--model-version SD15 \
|
|
42
|
+
--out unet.mlpackage \
|
|
43
|
+
--height 512 --width 512 \
|
|
44
|
+
--attn-impl SPLIT_EINSUM \
|
|
45
|
+
--quantize none
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Options: `--batch-size`, `--controlnet`, `--lora PATH[:STRENGTH]` (repeatable),
|
|
49
|
+
`--config` (original-config YAML). `--quantize {none,8,6,4}` applies k-means
|
|
50
|
+
weight palettization. Run `coreml-diffusion convert --help` for the full list.
|
|
51
|
+
|
|
52
|
+
The output `.mlpackage` is the deliverable: load it natively in Swift/Core ML, run
|
|
53
|
+
it through the Python inference pipeline below, or hand it to any consuming host.
|
|
54
|
+
|
|
55
|
+
### Model sources
|
|
56
|
+
|
|
57
|
+
Register directories so `--ckpt` accepts a bare name instead of a full path:
|
|
58
|
+
|
|
59
|
+
```sh
|
|
60
|
+
coreml-diffusion sources add comfy /path/to/ComfyUI/models # --kind comfy|flat
|
|
61
|
+
coreml-diffusion sources list # sources + checkpoints
|
|
62
|
+
coreml-diffusion convert --ckpt v1-5-pruned-emaonly --model-version SD15 --out unet.mlpackage
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Sources are recorded in `~/.config/coreml-diffusion/sources.toml`. `comfy` knows the
|
|
66
|
+
`models/{checkpoints,loras,vae,...}` layout; `flat` is a plain checkpoint directory.
|
|
67
|
+
|
|
68
|
+
## Library
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import coreml_diffusion
|
|
72
|
+
from coreml_diffusion import ModelVersion
|
|
73
|
+
|
|
74
|
+
coreml_diffusion.convert(
|
|
75
|
+
"model.safetensors", ModelVersion.SD15, "unet.mlpackage",
|
|
76
|
+
height=512, width=512, attn_impl="SPLIT_EINSUM",
|
|
77
|
+
)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Inference (in progress)
|
|
81
|
+
|
|
82
|
+
A framework-free inference path lets a converted `.mlpackage` generate images with
|
|
83
|
+
no host framework: a `diffusers` pipeline runs the stock VAE / text encoder on
|
|
84
|
+
torch while the UNet is served from Core ML on the ANE. This doubles as the
|
|
85
|
+
package's own regression anchor — the Tier 2 (`m2`) golden image, asserted on an
|
|
86
|
+
Apple Silicon runner — and as the reference for the on-device write-up. See
|
|
87
|
+
`tests/m2/`.
|
|
88
|
+
|
|
89
|
+
## Discovery API
|
|
90
|
+
|
|
91
|
+
`list_model_versions`, `list_attention_impls`, `list_quant_modes`, and
|
|
92
|
+
`CONTRACT_VERSION` report what this build can convert. The identifiers are an
|
|
93
|
+
additive-only contract: removing or renaming one is a major version bump, because
|
|
94
|
+
downstream consumers reference these strings verbatim.
|
|
95
|
+
|
|
96
|
+
## ComfyUI
|
|
97
|
+
|
|
98
|
+
[ComfyUI-CoreMLSuite](https://github.com/aszc-dev/ComfyUI-CoreMLSuite) consumes
|
|
99
|
+
this package for its conversion path and drives its node dropdowns from the
|
|
100
|
+
discovery API above — installing a newer `coreml-diffusion` surfaces new
|
|
101
|
+
conversion types in the node with no Suite change. The Suite is one consumer;
|
|
102
|
+
this package neither depends on nor requires ComfyUI.
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
MIT
|
|
@@ -40,6 +40,8 @@ __all__ = [
|
|
|
40
40
|
"compose_out_name",
|
|
41
41
|
"lora_names_from_params",
|
|
42
42
|
"convert",
|
|
43
|
+
"build_pipeline",
|
|
44
|
+
"CoreMLUNet",
|
|
43
45
|
]
|
|
44
46
|
|
|
45
47
|
|
|
@@ -105,4 +107,8 @@ def __getattr__(name):
|
|
|
105
107
|
from coreml_diffusion.convert import convert as _convert
|
|
106
108
|
|
|
107
109
|
return _convert
|
|
110
|
+
if name in ("build_pipeline", "CoreMLUNet"):
|
|
111
|
+
from coreml_diffusion import inference
|
|
112
|
+
|
|
113
|
+
return getattr(inference, name)
|
|
108
114
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -14,6 +14,7 @@ Example:
|
|
|
14
14
|
import argparse
|
|
15
15
|
|
|
16
16
|
import coreml_diffusion
|
|
17
|
+
from coreml_diffusion import sources
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
def _parse_lora(spec):
|
|
@@ -31,8 +32,9 @@ def _parse_lora(spec):
|
|
|
31
32
|
def _convert_cmd(args):
|
|
32
33
|
sample_size = (args.height // 8, args.width // 8)
|
|
33
34
|
lora_weights = [_parse_lora(spec) for spec in (args.lora or [])]
|
|
35
|
+
ckpt = sources.resolve_checkpoint(args.ckpt, args.source)
|
|
34
36
|
coreml_diffusion.convert(
|
|
35
|
-
|
|
37
|
+
ckpt,
|
|
36
38
|
coreml_diffusion.ModelVersion[args.model_version],
|
|
37
39
|
args.out,
|
|
38
40
|
batch_size=args.batch_size,
|
|
@@ -45,6 +47,31 @@ def _convert_cmd(args):
|
|
|
45
47
|
)
|
|
46
48
|
|
|
47
49
|
|
|
50
|
+
def _sources_add_cmd(args):
|
|
51
|
+
entry = sources.add_source(args.name, args.path, args.kind)
|
|
52
|
+
print(f"Added source {args.name!r} ({entry['kind']}): {entry['path']}")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _sources_list_cmd(args):
|
|
56
|
+
registered = sources.load_sources()
|
|
57
|
+
if not registered:
|
|
58
|
+
print(f"No sources registered. Config: {sources.config_path()}")
|
|
59
|
+
return
|
|
60
|
+
for name, entry in sorted(registered.items()):
|
|
61
|
+
ckpts = sources.iter_checkpoints(entry)
|
|
62
|
+
print(f"{name} ({entry['kind']}): {entry['path']}")
|
|
63
|
+
if ckpts:
|
|
64
|
+
for stem in ckpts:
|
|
65
|
+
print(f" - {stem}")
|
|
66
|
+
else:
|
|
67
|
+
print(" (no checkpoints found)")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _sources_remove_cmd(args):
|
|
71
|
+
sources.remove_source(args.name)
|
|
72
|
+
print(f"Removed source {args.name!r}")
|
|
73
|
+
|
|
74
|
+
|
|
48
75
|
def build_parser():
|
|
49
76
|
parser = argparse.ArgumentParser(
|
|
50
77
|
prog="coreml-diffusion",
|
|
@@ -54,7 +81,15 @@ def build_parser():
|
|
|
54
81
|
|
|
55
82
|
conv = sub.add_parser("convert", help="Convert a checkpoint's UNet to a .mlpackage")
|
|
56
83
|
conv.add_argument(
|
|
57
|
-
"--ckpt",
|
|
84
|
+
"--ckpt",
|
|
85
|
+
required=True,
|
|
86
|
+
help="Checkpoint path, or a name resolved against registered sources "
|
|
87
|
+
"(see 'coreml-diffusion sources')",
|
|
88
|
+
)
|
|
89
|
+
conv.add_argument(
|
|
90
|
+
"--source",
|
|
91
|
+
default=None,
|
|
92
|
+
help="Restrict --ckpt name resolution to this registered source",
|
|
58
93
|
)
|
|
59
94
|
conv.add_argument(
|
|
60
95
|
"--model-version",
|
|
@@ -101,6 +136,28 @@ def build_parser():
|
|
|
101
136
|
help="K-means weight palettization bits (default none = unquantized)",
|
|
102
137
|
)
|
|
103
138
|
conv.set_defaults(func=_convert_cmd)
|
|
139
|
+
|
|
140
|
+
src = sub.add_parser("sources", help="Manage model source directories")
|
|
141
|
+
src_sub = src.add_subparsers(dest="sources_command", required=True)
|
|
142
|
+
|
|
143
|
+
s_add = src_sub.add_parser("add", help="Register (or overwrite) a model source")
|
|
144
|
+
s_add.add_argument("name", help="Short name for the source, e.g. 'comfy'")
|
|
145
|
+
s_add.add_argument("path", help="Base directory of the source")
|
|
146
|
+
s_add.add_argument(
|
|
147
|
+
"--kind",
|
|
148
|
+
choices=sources.SOURCE_KINDS,
|
|
149
|
+
default="comfy",
|
|
150
|
+
help="Directory layout (default comfy: models/{checkpoints,loras,vae,...})",
|
|
151
|
+
)
|
|
152
|
+
s_add.set_defaults(func=_sources_add_cmd)
|
|
153
|
+
|
|
154
|
+
s_list = src_sub.add_parser("list", help="List sources and their checkpoints")
|
|
155
|
+
s_list.set_defaults(func=_sources_list_cmd)
|
|
156
|
+
|
|
157
|
+
s_rm = src_sub.add_parser("remove", help="Unregister a source")
|
|
158
|
+
s_rm.add_argument("name", help="Source name to remove")
|
|
159
|
+
s_rm.set_defaults(func=_sources_remove_cmd)
|
|
160
|
+
|
|
104
161
|
return parser
|
|
105
162
|
|
|
106
163
|
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""Framework-free inference for a converted Core ML UNet.
|
|
2
|
+
|
|
3
|
+
Runs a `.mlpackage` UNet inside a stock ``diffusers`` pipeline with NO ComfyUI:
|
|
4
|
+
the VAE and text encoder run on torch (CPU/MPS) while the UNet is served from
|
|
5
|
+
Core ML on the ANE. This is the package-side counterpart to ``convert`` — it
|
|
6
|
+
proves a converted artifact generates an image end-to-end, and provides the
|
|
7
|
+
Tier 2 (``m2``) golden anchor (``tests/m2/``).
|
|
8
|
+
|
|
9
|
+
``CoreMLUNet`` is the inverse of ``conversion.unet.CoreMLUNetWrapper``: the
|
|
10
|
+
wrapper adapts diffusers UNet *inputs* into the flat Core ML tensor contract at
|
|
11
|
+
convert time; this adapter unpacks a diffusers call back into that same flat
|
|
12
|
+
contract at run time, feeds it to ``coremltools``, and re-wraps the output as a
|
|
13
|
+
diffusers ``UNet2DConditionOutput``.
|
|
14
|
+
|
|
15
|
+
Core ML input contract (see ``convert.convert_unet``): every input is float16.
|
|
16
|
+
sample (B,C,H,W), timestep (B,), encoder_hidden_states (B,77,cross_dim);
|
|
17
|
+
+ LCM: timestep_cond (B,256)
|
|
18
|
+
+ SDXL: time_ids (B,6), text_embeds (B,proj_dim)
|
|
19
|
+
+ ctrl: additional_residual_0..N
|
|
20
|
+
Single output ``noise_pred`` (float32).
|
|
21
|
+
|
|
22
|
+
The traced model has FIXED input shapes: the batch and resolution are baked in
|
|
23
|
+
at conversion. Classifier-free guidance feeds a batch of 2 (uncond+cond), so the
|
|
24
|
+
artifact must be converted with ``batch_size=2`` to run a guided pipeline in one
|
|
25
|
+
forward pass. Height/width must match the converted ``sample_size``.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import numpy as np
|
|
29
|
+
import torch
|
|
30
|
+
|
|
31
|
+
from coreml_diffusion.logger import logger
|
|
32
|
+
from coreml_diffusion.model_version import ModelVersion
|
|
33
|
+
|
|
34
|
+
# Compute-unit default: let Core ML place ops on the ANE (falling back to CPU for
|
|
35
|
+
# unsupported ops). Override per call for A/B against CPU_ONLY / CPU_AND_GPU.
|
|
36
|
+
DEFAULT_COMPUTE_UNIT = "CPU_AND_NE"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _f16(tensor):
|
|
40
|
+
"""diffusers tensor -> contiguous float16 numpy, matching the trace dtype."""
|
|
41
|
+
return np.ascontiguousarray(tensor.detach().to(torch.float16).cpu().numpy())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CoreMLUNet(torch.nn.Module):
|
|
45
|
+
"""A ``UNet2DConditionModel`` stand-in backed by a Core ML ``.mlpackage``.
|
|
46
|
+
|
|
47
|
+
Carries the reference UNet's ``config`` so the surrounding diffusers pipeline
|
|
48
|
+
reads the right ``in_channels`` / ``addition_embed_type`` / ``time_cond_proj_dim``
|
|
49
|
+
etc., but runs the forward pass through coremltools instead of torch.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self, mlpackage_path, ref_unet, model_version, compute_unit=DEFAULT_COMPUTE_UNIT
|
|
54
|
+
):
|
|
55
|
+
super().__init__()
|
|
56
|
+
import coremltools as ct
|
|
57
|
+
|
|
58
|
+
self.config = ref_unet.config
|
|
59
|
+
self.dtype = torch.float16
|
|
60
|
+
self.model_version = model_version
|
|
61
|
+
|
|
62
|
+
unit = (
|
|
63
|
+
compute_unit
|
|
64
|
+
if isinstance(compute_unit, ct.ComputeUnit)
|
|
65
|
+
else ct.ComputeUnit[compute_unit]
|
|
66
|
+
)
|
|
67
|
+
logger.info(f"Loading {mlpackage_path} to {unit.name}")
|
|
68
|
+
self.model = ct.models.MLModel(mlpackage_path, compute_units=unit)
|
|
69
|
+
self._output_name = self.model.get_spec().description.output[0].name
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def _is_lcm(self):
|
|
73
|
+
return self.model_version is ModelVersion.LCM
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def _is_sdxl(self):
|
|
77
|
+
return self.model_version in {ModelVersion.SDXL, ModelVersion.SDXL_REFINER}
|
|
78
|
+
|
|
79
|
+
def forward(
|
|
80
|
+
self,
|
|
81
|
+
sample,
|
|
82
|
+
timestep,
|
|
83
|
+
encoder_hidden_states,
|
|
84
|
+
timestep_cond=None,
|
|
85
|
+
added_cond_kwargs=None,
|
|
86
|
+
down_block_additional_residuals=None,
|
|
87
|
+
mid_block_additional_residual=None,
|
|
88
|
+
return_dict=True,
|
|
89
|
+
**_ignored,
|
|
90
|
+
):
|
|
91
|
+
batch = sample.shape[0]
|
|
92
|
+
|
|
93
|
+
# timestep arrives as a python scalar, 0-dim, or (1,) tensor; the trace
|
|
94
|
+
# baked a (B,) timestep, so broadcast to the batch.
|
|
95
|
+
ts = torch.as_tensor(timestep, dtype=torch.float32).reshape(-1)
|
|
96
|
+
if ts.numel() == 1:
|
|
97
|
+
ts = ts.expand(batch)
|
|
98
|
+
|
|
99
|
+
inputs = {
|
|
100
|
+
"sample": _f16(sample),
|
|
101
|
+
"timestep": _f16(ts),
|
|
102
|
+
"encoder_hidden_states": _f16(encoder_hidden_states),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if self._is_lcm:
|
|
106
|
+
if timestep_cond is None:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
"LCM UNet requires timestep_cond (guidance embedding)."
|
|
109
|
+
)
|
|
110
|
+
inputs["timestep_cond"] = _f16(timestep_cond)
|
|
111
|
+
|
|
112
|
+
if self._is_sdxl:
|
|
113
|
+
if not added_cond_kwargs or "time_ids" not in added_cond_kwargs:
|
|
114
|
+
raise ValueError(
|
|
115
|
+
"SDXL UNet requires added_cond_kwargs with time_ids/text_embeds."
|
|
116
|
+
)
|
|
117
|
+
inputs["time_ids"] = _f16(added_cond_kwargs["time_ids"])
|
|
118
|
+
inputs["text_embeds"] = _f16(added_cond_kwargs["text_embeds"])
|
|
119
|
+
|
|
120
|
+
if down_block_additional_residuals is not None:
|
|
121
|
+
residuals = list(down_block_additional_residuals)
|
|
122
|
+
if mid_block_additional_residual is not None:
|
|
123
|
+
residuals.append(mid_block_additional_residual)
|
|
124
|
+
for i, residual in enumerate(residuals):
|
|
125
|
+
inputs[f"additional_residual_{i}"] = _f16(residual)
|
|
126
|
+
|
|
127
|
+
prediction = self.model.predict(inputs)[self._output_name]
|
|
128
|
+
noise_pred = torch.from_numpy(np.ascontiguousarray(prediction)).to(
|
|
129
|
+
sample.device
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
if not return_dict:
|
|
133
|
+
return (noise_pred,)
|
|
134
|
+
from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
|
|
135
|
+
|
|
136
|
+
return UNet2DConditionOutput(sample=noise_pred)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# Stock diffusers pipeline per verified model. Experimental versions are convertible
|
|
140
|
+
# but their pipelines are not wired here yet.
|
|
141
|
+
_PIPELINE_IMPORTS = {
|
|
142
|
+
ModelVersion.SD15: ("diffusers", "StableDiffusionPipeline"),
|
|
143
|
+
ModelVersion.SDXL: ("diffusers", "StableDiffusionXLPipeline"),
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def build_pipeline(
|
|
148
|
+
ckpt_path,
|
|
149
|
+
mlpackage_path,
|
|
150
|
+
model_version,
|
|
151
|
+
*,
|
|
152
|
+
compute_unit=DEFAULT_COMPUTE_UNIT,
|
|
153
|
+
torch_device="cpu",
|
|
154
|
+
**from_single_file_kwargs,
|
|
155
|
+
):
|
|
156
|
+
"""Load a stock diffusers pipeline from ``ckpt_path`` and swap in the Core ML UNet.
|
|
157
|
+
|
|
158
|
+
The VAE / text encoder / scheduler come from the same checkpoint and run on
|
|
159
|
+
``torch_device``; only the UNet is served from ``mlpackage_path`` via Core ML.
|
|
160
|
+
Returns the pipeline ready to call. Wired for SD15 and SDXL; the golden
|
|
161
|
+
anchor that verifies output is captured by the Tier 2 (``m2``) test tier.
|
|
162
|
+
"""
|
|
163
|
+
if model_version not in _PIPELINE_IMPORTS:
|
|
164
|
+
raise NotImplementedError(
|
|
165
|
+
f"No inference pipeline wired for {model_version.name}; "
|
|
166
|
+
f"supported: {[v.name for v in _PIPELINE_IMPORTS]}."
|
|
167
|
+
)
|
|
168
|
+
import importlib
|
|
169
|
+
|
|
170
|
+
module_name, class_name = _PIPELINE_IMPORTS[model_version]
|
|
171
|
+
pipeline_cls = getattr(importlib.import_module(module_name), class_name)
|
|
172
|
+
|
|
173
|
+
pipe = pipeline_cls.from_single_file(ckpt_path, **from_single_file_kwargs)
|
|
174
|
+
pipe.to(torch_device)
|
|
175
|
+
pipe.unet = CoreMLUNet(mlpackage_path, pipe.unet, model_version, compute_unit)
|
|
176
|
+
return pipe
|