mate-workload-imagegen 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ # Python
2
+ .venv/
3
+ __pycache__/
4
+ *.py[cod]
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ .pytest_cache/
9
+
10
+ # uv
11
+ uv.lock
12
+
13
+ # Node / Cloudflare Worker
14
+ worker/node_modules/
15
+ worker/.wrangler/
16
+ worker/dist/
17
+
18
+ # Results (local benchmark output)
19
+ results/
20
+
21
+ # Secrets / local config
22
+ .env
23
+ *.env.local
24
+
25
+ # OS
26
+ .DS_Store
27
+ Thumbs.db
28
+
29
+ # Internal planning notes
30
+ BENCH_NOTES.md
@@ -0,0 +1,46 @@
1
+ Metadata-Version: 2.4
2
+ Name: mate-workload-imagegen
3
+ Version: 0.1.0
4
+ Summary: Image generation workload plugin for mate-bench
5
+ Project-URL: Homepage, https://github.com/T0nd3/mate-bench
6
+ Project-URL: Repository, https://github.com/T0nd3/mate-bench
7
+ Author-email: Benjamin Fäuster <benjamin.faeuster@web.de>
8
+ License: MIT
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: System :: Benchmark
14
+ Requires-Python: >=3.11
15
+ Requires-Dist: mate-bench<0.2,>=0.1
16
+ Description-Content-Type: text/markdown
17
+
18
+ # mate-workload-imagegen
19
+
20
+ Image generation workload plugin for [mate-bench](https://github.com/T0nd3/mate-bench).
21
+
22
+ **Open mode only.** Closed mode is intentionally unsupported — image model weights are
23
+ not standardised across installations, making reproducible comparison impossible without
24
+ a canonical checkpoint reference.
25
+
26
+ ## Metrics
27
+
28
+ | Metric | Description |
29
+ |--------|-------------|
30
+ | `images_per_second` | Generation throughput (higher is better) |
31
+ | `steps_per_second` | Sampler step throughput |
32
+ | `time_per_image_s` | Wall-clock time per image |
33
+
34
+ ## Profiles
35
+
36
+ | Profile | Prompts | Resolution | Steps |
37
+ |---------|---------|------------|-------|
38
+ | `quick-512` | 5 | 512×512 | 20 |
39
+ | `standard-1024` | 3 | 1024×1024 | 20 |
40
+
41
+ ## Usage
42
+
43
+ ```bash
44
+ mate-bench run imagegen --profile quick-512 --mode open --model "v1-5-pruned-emaonly.ckpt"
45
+ mate-bench run imagegen --profile standard-1024 --mode open --model "sdxl_base_1.0.safetensors"
46
+ ```
@@ -0,0 +1,29 @@
1
+ # mate-workload-imagegen
2
+
3
+ Image generation workload plugin for [mate-bench](https://github.com/T0nd3/mate-bench).
4
+
5
+ **Open mode only.** Closed mode is intentionally unsupported — image model weights are
6
+ not standardised across installations, making reproducible comparison impossible without
7
+ a canonical checkpoint reference.
8
+
9
+ ## Metrics
10
+
11
+ | Metric | Description |
12
+ |--------|-------------|
13
+ | `images_per_second` | Generation throughput (higher is better) |
14
+ | `steps_per_second` | Sampler step throughput |
15
+ | `time_per_image_s` | Wall-clock time per image |
16
+
17
+ ## Profiles
18
+
19
+ | Profile | Prompts | Resolution | Steps |
20
+ |---------|---------|------------|-------|
21
+ | `quick-512` | 5 | 512×512 | 20 |
22
+ | `standard-1024` | 3 | 1024×1024 | 20 |
23
+
24
+ ## Usage
25
+
26
+ ```bash
27
+ mate-bench run imagegen --profile quick-512 --mode open --model "v1-5-pruned-emaonly.ckpt"
28
+ mate-bench run imagegen --profile standard-1024 --mode open --model "sdxl_base_1.0.safetensors"
29
+ ```
@@ -0,0 +1,35 @@
1
+ [project]
2
+ name = "mate-workload-imagegen"
3
+ version = "0.1.0"
4
+ description = "Image generation workload plugin for mate-bench"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = {text = "MIT"}
8
+ authors = [{name = "Benjamin Fäuster", email = "benjamin.faeuster@web.de"}]
9
+ classifiers = [
10
+ "License :: OSI Approved :: MIT License",
11
+ "Programming Language :: Python :: 3",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12",
14
+ "Topic :: System :: Benchmark",
15
+ ]
16
+ dependencies = [
17
+ "mate-bench>=0.1,<0.2",
18
+ ]
19
+
20
+ [project.urls]
21
+ Homepage = "https://github.com/T0nd3/mate-bench"
22
+ Repository = "https://github.com/T0nd3/mate-bench"
23
+
24
+ [project.entry-points."mate_bench.workload"]
25
+ imagegen = "mate_workload_imagegen:ImageGenWorkload"
26
+
27
+ [dependency-groups]
28
+ dev = ["pytest>=8.0", "ruff>=0.4"]
29
+
30
+ [build-system]
31
+ requires = ["hatchling"]
32
+ build-backend = "hatchling.build"
33
+
34
+ [tool.hatch.build.targets.wheel]
35
+ packages = ["src/mate_workload_imagegen"]
@@ -0,0 +1,120 @@
1
+ """Image generation workload plugin for mate-bench (open mode only).
2
+
3
+ Measures txt2img throughput (images/s, steps/s) across a fixed prompt
4
+ suite. Closed mode is intentionally unsupported — see ImageGenWorkload.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from typing import Any
11
+
12
+ from mate_bench.plugin import (
13
+ EnginePlugin,
14
+ Measurement,
15
+ Mode,
16
+ PluginManifest,
17
+ ProfileConfig,
18
+ TestSetSpec,
19
+ )
20
+
21
+ from ._measure import measure
22
+ from ._profiles import BUNDLED_TEST_SETS, PROFILES, TEST_SETS
23
+
24
+ __all__ = ["ImageGenWorkload"]
25
+
26
+
27
+ class ImageGenWorkload:
28
+ """Image generation workload — open mode only.
29
+
30
+ Closed mode is intentionally unsupported: image model weights are not
31
+ standardised across installations, making apples-to-apples comparison
32
+ impossible without a canonical model reference. Users run open mode and
33
+ specify the model they have locally.
34
+ """
35
+
36
+ name = "imagegen"
37
+ manifest = PluginManifest(requires_mate_bench=">=0.1,<0.2", api_version=1)
38
+ profiles: dict[str, ProfileConfig] = PROFILES
39
+ test_sets: dict[str, TestSetSpec] = TEST_SETS
40
+
41
+ def estimate_download(self, profile: str) -> int:
42
+ return PROFILES[profile].download_size_bytes
43
+
44
+ def estimate_vram(self, profile: str) -> int:
45
+ return int(PROFILES[profile].vram_required_gb * 1024**3)
46
+
47
+ def estimate_runtime(self, profile: str) -> int:
48
+ return PROFILES[profile].estimated_runtime_seconds
49
+
50
+ def required_models(self, profile: str) -> list[str]:
51
+ return [] # user-supplied in open mode
52
+
53
+ def setup_closed(self, profile: str) -> None:
54
+ raise NotImplementedError(
55
+ "imagegen workload supports open mode only — "
56
+ "run with --mode open and --model <your-checkpoint>"
57
+ )
58
+
59
+ def setup_open(self, profile: str, user_inputs: dict[str, Any]) -> None:
60
+ pass # test sets are bundled, nothing to fetch
61
+
62
+ def _load_tasks(self, profile: str) -> tuple[list[dict], int, int, int, float, str]:
63
+ test_set_id = PROFILES[profile].test_set_id
64
+ path = BUNDLED_TEST_SETS[test_set_id]
65
+ with path.open() as f:
66
+ data = json.load(f)
67
+ return (
68
+ data["tasks"],
69
+ data["width"],
70
+ data["height"],
71
+ data["steps"],
72
+ data["cfg"],
73
+ data["sampler"],
74
+ )
75
+
76
+ def run(
77
+ self,
78
+ profile: str,
79
+ mode: Mode,
80
+ engine: EnginePlugin,
81
+ runs: int,
82
+ warmup_runs: int,
83
+ model: str = "",
84
+ seed: int = 42,
85
+ ) -> Measurement:
86
+ if mode != Mode.OPEN:
87
+ raise NotImplementedError(
88
+ "imagegen workload supports open mode only — "
89
+ "run with --mode open and --model <your-checkpoint>"
90
+ )
91
+ if not model:
92
+ raise ValueError("model must be specified for imagegen open-mode runs")
93
+
94
+ tasks, width, height, steps, cfg, sampler = self._load_tasks(profile)
95
+
96
+ median_stats, std_dev_stats, throttling_detected = measure(
97
+ engine, # type: ignore[arg-type]
98
+ model,
99
+ tasks,
100
+ width,
101
+ height,
102
+ steps,
103
+ cfg,
104
+ sampler,
105
+ runs,
106
+ warmup_runs,
107
+ seed=seed,
108
+ )
109
+
110
+ return Measurement(
111
+ runs=runs,
112
+ warmup_runs=warmup_runs,
113
+ median=median_stats,
114
+ std_dev=std_dev_stats,
115
+ vram_peak_gb=0.0,
116
+ throttling_detected=throttling_detected,
117
+ )
118
+
119
+ def cleanup(self, profile: str) -> None:
120
+ pass # nothing cached
@@ -0,0 +1,110 @@
1
+ from __future__ import annotations
2
+
3
+ import statistics
4
+ from dataclasses import dataclass
5
+ from typing import Any, Protocol
6
+
7
+
8
+ class _ImageResult(Protocol):
9
+ generation_time_s: float
10
+
11
+
12
+ class _ImageEngine(Protocol):
13
+ def txt2img(
14
+ self,
15
+ prompt: str,
16
+ negative_prompt: str,
17
+ steps: int,
18
+ width: int,
19
+ height: int,
20
+ cfg: float,
21
+ sampler: str,
22
+ model: str,
23
+ seed: int,
24
+ ) -> _ImageResult: ...
25
+
26
+
27
+ @dataclass
28
+ class _RunStats:
29
+ images_per_second: float
30
+ steps_per_second: float
31
+ time_per_image_s: float
32
+
33
+
34
+ def _aggregate_run(results: list[_ImageResult], steps: int) -> _RunStats:
35
+ """Compute images/s, steps/s and time/image from a single run's results."""
36
+ total_time = sum(r.generation_time_s for r in results)
37
+ n = len(results)
38
+ time_per_image = total_time / n if n > 0 else 0.0
39
+ images_per_second = n / total_time if total_time > 0 else 0.0
40
+ steps_per_second = steps / time_per_image if time_per_image > 0 else 0.0
41
+ return _RunStats(
42
+ images_per_second=images_per_second,
43
+ steps_per_second=steps_per_second,
44
+ time_per_image_s=time_per_image,
45
+ )
46
+
47
+
48
+ def measure(
49
+ engine: _ImageEngine,
50
+ model: str,
51
+ tasks: list[dict[str, Any]],
52
+ width: int,
53
+ height: int,
54
+ steps: int,
55
+ cfg: float,
56
+ sampler: str,
57
+ runs: int,
58
+ warmup_runs: int,
59
+ seed: int = 42,
60
+ ) -> tuple[dict[str, Any], dict[str, Any], bool]:
61
+ """Run the image-gen benchmark loop; return (median, std_dev, throttling_detected)."""
62
+ all_stats: list[_RunStats] = []
63
+
64
+ for i in range(warmup_runs + runs):
65
+ run_results: list[_ImageResult] = []
66
+ for task in tasks:
67
+ result = engine.txt2img(
68
+ prompt=task["prompt"],
69
+ negative_prompt=task.get("negative_prompt", ""),
70
+ steps=steps,
71
+ width=width,
72
+ height=height,
73
+ cfg=cfg,
74
+ sampler=sampler,
75
+ model=model,
76
+ seed=seed,
77
+ )
78
+ run_results.append(result)
79
+
80
+ if i >= warmup_runs:
81
+ all_stats.append(_aggregate_run(run_results, steps))
82
+
83
+ ips_values = [s.images_per_second for s in all_stats]
84
+ sps_values = [s.steps_per_second for s in all_stats]
85
+ tpi_values = [s.time_per_image_s for s in all_stats]
86
+
87
+ median_ips = statistics.median(ips_values) if ips_values else 0.0
88
+ median_sps = statistics.median(sps_values) if sps_values else 0.0
89
+ median_tpi = statistics.median(tpi_values) if tpi_values else 0.0
90
+ std_ips = statistics.stdev(ips_values) if len(ips_values) > 1 else 0.0
91
+ std_sps = statistics.stdev(sps_values) if len(sps_values) > 1 else 0.0
92
+ std_tpi = statistics.stdev(tpi_values) if len(tpi_values) > 1 else 0.0
93
+
94
+ cv = std_ips / median_ips if median_ips > 0 else 0.0
95
+ throttling_detected = cv > 0.15
96
+
97
+ median_stats: dict[str, Any] = {
98
+ "images_per_second": median_ips,
99
+ "steps_per_second": median_sps,
100
+ "time_per_image_s": median_tpi,
101
+ "resolution": f"{width}x{height}",
102
+ "steps": steps,
103
+ }
104
+ std_dev_stats: dict[str, Any] = {
105
+ "images_per_second": std_ips,
106
+ "steps_per_second": std_sps,
107
+ "time_per_image_s": std_tpi,
108
+ }
109
+
110
+ return median_stats, std_dev_stats, throttling_detected
@@ -0,0 +1,55 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from mate_bench.plugin import ProfileConfig, TestSetSpec
6
+
7
+ _DATA_DIR = Path(__file__).parent / "data" / "test-sets"
8
+
9
+ BUNDLED_TEST_SETS: dict[str, Path] = {
10
+ "imagegen-512-v1": _DATA_DIR / "imagegen-512-v1.json",
11
+ "imagegen-1024-v1": _DATA_DIR / "imagegen-1024-v1.json",
12
+ }
13
+
14
+ # No CDN URLs — test sets are bundled (prompts are plain text, no media).
15
+ # sha256 values are informational only; integrity is checked against the bundled file.
16
+ TEST_SETS: dict[str, TestSetSpec] = {
17
+ "imagegen-512-v1": TestSetSpec(
18
+ id="imagegen-512-v1",
19
+ url="bundled",
20
+ sha256="sha256:bundled",
21
+ size_bytes=780,
22
+ license="CC0-1.0",
23
+ source="mate-bench project (original)",
24
+ ),
25
+ "imagegen-1024-v1": TestSetSpec(
26
+ id="imagegen-1024-v1",
27
+ url="bundled",
28
+ sha256="sha256:bundled",
29
+ size_bytes=520,
30
+ license="CC0-1.0",
31
+ source="mate-bench project (original)",
32
+ ),
33
+ }
34
+
35
+ # Open-mode profiles: user specifies the model, we supply the prompt set + resolution.
36
+ PROFILES: dict[str, ProfileConfig] = {
37
+ "quick-512": ProfileConfig(
38
+ name="quick-512",
39
+ description="5 prompts at 512×512, 20 steps — open mode, specify your own model",
40
+ test_set_id="imagegen-512-v1",
41
+ reference_engine_config={"engine": "comfyui"},
42
+ vram_required_gb=4.0,
43
+ download_size_bytes=0,
44
+ estimated_runtime_seconds=120,
45
+ ),
46
+ "standard-1024": ProfileConfig(
47
+ name="standard-1024",
48
+ description="3 prompts at 1024×1024, 20 steps — open mode, specify your own model",
49
+ test_set_id="imagegen-1024-v1",
50
+ reference_engine_config={"engine": "comfyui"},
51
+ vram_required_gb=8.0,
52
+ download_size_bytes=0,
53
+ estimated_runtime_seconds=300,
54
+ ),
55
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "version": "imagegen-1024-v1",
3
+ "width": 1024,
4
+ "height": 1024,
5
+ "steps": 20,
6
+ "cfg": 7.0,
7
+ "sampler": "euler_ancestral",
8
+ "tasks": [
9
+ {"id": "t001", "prompt": "a detailed portrait of an astronaut in space, photorealistic, 8k, cinematic lighting", "negative_prompt": "blurry, low quality, artifacts"},
10
+ {"id": "t002", "prompt": "a fantasy castle on a cliff above clouds, epic fantasy landscape, golden hour", "negative_prompt": "blurry, low quality, artifacts"},
11
+ {"id": "t003", "prompt": "a close-up of a butterfly on a flower, macro photography, sharp detail, bokeh background", "negative_prompt": "blurry, low quality, artifacts"}
12
+ ]
13
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "version": "imagegen-512-v1",
3
+ "width": 512,
4
+ "height": 512,
5
+ "steps": 20,
6
+ "cfg": 7.0,
7
+ "sampler": "euler_ancestral",
8
+ "tasks": [
9
+ {"id": "t001", "prompt": "a red apple on a wooden table, photorealistic, sharp focus, studio lighting", "negative_prompt": "blurry, low quality"},
10
+ {"id": "t002", "prompt": "a cat sitting on a windowsill at sunset, warm light, digital art", "negative_prompt": "blurry, low quality"},
11
+ {"id": "t003", "prompt": "a futuristic city at night, neon lights reflecting in rain puddles, cyberpunk", "negative_prompt": "blurry, low quality"},
12
+ {"id": "t004", "prompt": "a bowl of ramen with steam rising, food photography, top down view", "negative_prompt": "blurry, low quality"},
13
+ {"id": "t005", "prompt": "a mountain landscape with snow and pine trees, oil painting style", "negative_prompt": "blurry, low quality"}
14
+ ]
15
+ }
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+
3
+ from unittest.mock import MagicMock
4
+
5
+ import pytest
6
+
7
+ from mate_workload_imagegen._measure import measure
8
+
9
+ TASKS = [
10
+ {"id": "t001", "prompt": "a red apple", "negative_prompt": "blurry"},
11
+ {"id": "t002", "prompt": "a blue car", "negative_prompt": "blurry"},
12
+ ]
13
+
14
+
15
+ def _make_engine(generation_time: float) -> MagicMock:
16
+ result = MagicMock()
17
+ result.generation_time_s = generation_time
18
+ engine = MagicMock()
19
+ engine.txt2img.return_value = result
20
+ return engine
21
+
22
+
23
+ class TestMeasure:
24
+ def test_images_per_second(self):
25
+ engine = _make_engine(2.0)
26
+ median, _, _ = measure(
27
+ engine,
28
+ "model.safetensors",
29
+ TASKS,
30
+ width=512,
31
+ height=512,
32
+ steps=20,
33
+ cfg=7.0,
34
+ sampler="euler_ancestral",
35
+ runs=1,
36
+ warmup_runs=0,
37
+ )
38
+ # 2 tasks × 2.0s each → total 4.0s, 2 images → 0.5 img/s
39
+ assert median["images_per_second"] == pytest.approx(0.5)
40
+
41
+ def test_steps_per_second(self):
42
+ engine = _make_engine(2.0)
43
+ median, _, _ = measure(
44
+ engine,
45
+ "model.safetensors",
46
+ TASKS,
47
+ width=512,
48
+ height=512,
49
+ steps=20,
50
+ cfg=7.0,
51
+ sampler="euler_ancestral",
52
+ runs=1,
53
+ warmup_runs=0,
54
+ )
55
+ # avg time_per_image = 2.0s, steps=20 → 10.0 steps/s
56
+ assert median["steps_per_second"] == pytest.approx(10.0)
57
+
58
+ def test_resolution_in_output(self):
59
+ engine = _make_engine(1.0)
60
+ median, _, _ = measure(
61
+ engine,
62
+ "m",
63
+ TASKS,
64
+ width=1024,
65
+ height=1024,
66
+ steps=20,
67
+ cfg=7.0,
68
+ sampler="euler_ancestral",
69
+ runs=1,
70
+ warmup_runs=0,
71
+ )
72
+ assert median["resolution"] == "1024x1024"
73
+
74
+ def test_warmup_excluded(self):
75
+ call_count = 0
76
+
77
+ def side_effect(**kwargs):
78
+ nonlocal call_count
79
+ call_count += 1
80
+ r = MagicMock()
81
+ r.generation_time_s = 1.0
82
+ return r
83
+
84
+ engine = MagicMock()
85
+ engine.txt2img.side_effect = side_effect
86
+ measure(
87
+ engine,
88
+ "m",
89
+ TASKS,
90
+ width=512,
91
+ height=512,
92
+ steps=20,
93
+ cfg=7.0,
94
+ sampler="euler_ancestral",
95
+ runs=2,
96
+ warmup_runs=1,
97
+ )
98
+ # 1 warmup + 2 runs, each with 2 tasks → 6 calls
99
+ assert call_count == 6
100
+
101
+ def test_no_throttling_stable(self):
102
+ engine = _make_engine(1.0)
103
+ _, _, throttling = measure(
104
+ engine,
105
+ "m",
106
+ TASKS,
107
+ width=512,
108
+ height=512,
109
+ steps=20,
110
+ cfg=7.0,
111
+ sampler="euler_ancestral",
112
+ runs=3,
113
+ warmup_runs=0,
114
+ )
115
+ assert throttling is False