PyPI - mate-workload-imagegen - Versions diffs - 0.1.0__tar.gz - Mend

mate-workload-imagegen 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

mate_workload_imagegen-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,30 @@
+# Python
+.venv/
+__pycache__/
+*.py[cod]
+*.egg-info/
+dist/
+build/
+.pytest_cache/
+# uv
+uv.lock
+# Node / Cloudflare Worker
+worker/node_modules/
+worker/.wrangler/
+worker/dist/
+# Results (local benchmark output)
+results/
+# Secrets / local config
+.env
+*.env.local
+# OS
+.DS_Store
+Thumbs.db
+# Internal planning notes
+BENCH_NOTES.md

mate_workload_imagegen-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,46 @@
+Metadata-Version: 2.4
+Name: mate-workload-imagegen
+Version: 0.1.0
+Summary: Image generation workload plugin for mate-bench
+Project-URL: Homepage, https://github.com/T0nd3/mate-bench
+Project-URL: Repository, https://github.com/T0nd3/mate-bench
+Author-email: Benjamin Fäuster <benjamin.faeuster@web.de>
+License: MIT
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: System :: Benchmark
+Requires-Python: >=3.11
+Requires-Dist: mate-bench<0.2,>=0.1
+Description-Content-Type: text/markdown
+# mate-workload-imagegen
+Image generation workload plugin for [mate-bench](https://github.com/T0nd3/mate-bench).
+**Open mode only.** Closed mode is intentionally unsupported — image model weights are
+not standardised across installations, making reproducible comparison impossible without
+a canonical checkpoint reference.
+## Metrics
+| Metric | Description |
+|--------|-------------|
+| `images_per_second` | Generation throughput (higher is better) |
+| `steps_per_second` | Sampler step throughput |
+| `time_per_image_s` | Wall-clock time per image |
+## Profiles
+| Profile | Prompts | Resolution | Steps |
+|---------|---------|------------|-------|
+| `quick-512` | 5 | 512×512 | 20 |
+| `standard-1024` | 3 | 1024×1024 | 20 |
+## Usage
+```bash
+mate-bench run imagegen --profile quick-512 --mode open --model "v1-5-pruned-emaonly.ckpt"
+mate-bench run imagegen --profile standard-1024 --mode open --model "sdxl_base_1.0.safetensors"
+```

mate_workload_imagegen-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,29 @@
+# mate-workload-imagegen
+Image generation workload plugin for [mate-bench](https://github.com/T0nd3/mate-bench).
+**Open mode only.** Closed mode is intentionally unsupported — image model weights are
+not standardised across installations, making reproducible comparison impossible without
+a canonical checkpoint reference.
+## Metrics
+| Metric | Description |
+|--------|-------------|
+| `images_per_second` | Generation throughput (higher is better) |
+| `steps_per_second` | Sampler step throughput |
+| `time_per_image_s` | Wall-clock time per image |
+## Profiles
+| Profile | Prompts | Resolution | Steps |
+|---------|---------|------------|-------|
+| `quick-512` | 5 | 512×512 | 20 |
+| `standard-1024` | 3 | 1024×1024 | 20 |
+## Usage
+```bash
+mate-bench run imagegen --profile quick-512 --mode open --model "v1-5-pruned-emaonly.ckpt"
+mate-bench run imagegen --profile standard-1024 --mode open --model "sdxl_base_1.0.safetensors"
+```

mate_workload_imagegen-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,35 @@
+[project]
+name = "mate-workload-imagegen"
+version = "0.1.0"
+description = "Image generation workload plugin for mate-bench"
+readme = "README.md"
+requires-python = ">=3.11"
+license = {text = "MIT"}
+authors = [{name = "Benjamin Fäuster", email = "benjamin.faeuster@web.de"}]
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: System :: Benchmark",
+]
+dependencies = [
+    "mate-bench>=0.1,<0.2",
+]
+[project.urls]
+Homepage = "https://github.com/T0nd3/mate-bench"
+Repository = "https://github.com/T0nd3/mate-bench"
+[project.entry-points."mate_bench.workload"]
+imagegen = "mate_workload_imagegen:ImageGenWorkload"
+[dependency-groups]
+dev = ["pytest>=8.0", "ruff>=0.4"]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/mate_workload_imagegen"]

mate_workload_imagegen-0.1.0/src/mate_workload_imagegen/__init__.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Image generation workload plugin for mate-bench (open mode only).
+Measures txt2img throughput (images/s, steps/s) across a fixed prompt
+suite.  Closed mode is intentionally unsupported — see ImageGenWorkload.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+from mate_bench.plugin import (
+    EnginePlugin,
+    Measurement,
+    Mode,
+    PluginManifest,
+    ProfileConfig,
+    TestSetSpec,
+)
+from ._measure import measure
+from ._profiles import BUNDLED_TEST_SETS, PROFILES, TEST_SETS
+__all__ = ["ImageGenWorkload"]
+class ImageGenWorkload:
+    """Image generation workload — open mode only.
+    Closed mode is intentionally unsupported: image model weights are not
+    standardised across installations, making apples-to-apples comparison
+    impossible without a canonical model reference.  Users run open mode and
+    specify the model they have locally.
+    """
+    name = "imagegen"
+    manifest = PluginManifest(requires_mate_bench=">=0.1,<0.2", api_version=1)
+    profiles: dict[str, ProfileConfig] = PROFILES
+    test_sets: dict[str, TestSetSpec] = TEST_SETS
+    def estimate_download(self, profile: str) -> int:
+        return PROFILES[profile].download_size_bytes
+    def estimate_vram(self, profile: str) -> int:
+        return int(PROFILES[profile].vram_required_gb * 1024**3)
+    def estimate_runtime(self, profile: str) -> int:
+        return PROFILES[profile].estimated_runtime_seconds
+    def required_models(self, profile: str) -> list[str]:
+        return []  # user-supplied in open mode
+    def setup_closed(self, profile: str) -> None:
+        raise NotImplementedError(
+            "imagegen workload supports open mode only — "
+            "run with --mode open and --model <your-checkpoint>"
+        )
+    def setup_open(self, profile: str, user_inputs: dict[str, Any]) -> None:
+        pass  # test sets are bundled, nothing to fetch
+    def _load_tasks(self, profile: str) -> tuple[list[dict], int, int, int, float, str]:
+        test_set_id = PROFILES[profile].test_set_id
+        path = BUNDLED_TEST_SETS[test_set_id]
+        with path.open() as f:
+            data = json.load(f)
+        return (
+            data["tasks"],
+            data["width"],
+            data["height"],
+            data["steps"],
+            data["cfg"],
+            data["sampler"],
+        )
+    def run(
+        self,
+        profile: str,
+        mode: Mode,
+        engine: EnginePlugin,
+        runs: int,
+        warmup_runs: int,
+        model: str = "",
+        seed: int = 42,
+    ) -> Measurement:
+        if mode != Mode.OPEN:
+            raise NotImplementedError(
+                "imagegen workload supports open mode only — "
+                "run with --mode open and --model <your-checkpoint>"
+            )
+        if not model:
+            raise ValueError("model must be specified for imagegen open-mode runs")
+        tasks, width, height, steps, cfg, sampler = self._load_tasks(profile)
+        median_stats, std_dev_stats, throttling_detected = measure(
+            engine,  # type: ignore[arg-type]
+            model,
+            tasks,
+            width,
+            height,
+            steps,
+            cfg,
+            sampler,
+            runs,
+            warmup_runs,
+            seed=seed,
+        )
+        return Measurement(
+            runs=runs,
+            warmup_runs=warmup_runs,
+            median=median_stats,
+            std_dev=std_dev_stats,
+            vram_peak_gb=0.0,
+            throttling_detected=throttling_detected,
+        )
+    def cleanup(self, profile: str) -> None:
+        pass  # nothing cached

mate_workload_imagegen-0.1.0/src/mate_workload_imagegen/_measure.py ADDED Viewed

@@ -0,0 +1,110 @@
+from __future__ import annotations
+import statistics
+from dataclasses import dataclass
+from typing import Any, Protocol
+class _ImageResult(Protocol):
+    generation_time_s: float
+class _ImageEngine(Protocol):
+    def txt2img(
+        self,
+        prompt: str,
+        negative_prompt: str,
+        steps: int,
+        width: int,
+        height: int,
+        cfg: float,
+        sampler: str,
+        model: str,
+        seed: int,
+    ) -> _ImageResult: ...
+@dataclass
+class _RunStats:
+    images_per_second: float
+    steps_per_second: float
+    time_per_image_s: float
+def _aggregate_run(results: list[_ImageResult], steps: int) -> _RunStats:
+    """Compute images/s, steps/s and time/image from a single run's results."""
+    total_time = sum(r.generation_time_s for r in results)
+    n = len(results)
+    time_per_image = total_time / n if n > 0 else 0.0
+    images_per_second = n / total_time if total_time > 0 else 0.0
+    steps_per_second = steps / time_per_image if time_per_image > 0 else 0.0
+    return _RunStats(
+        images_per_second=images_per_second,
+        steps_per_second=steps_per_second,
+        time_per_image_s=time_per_image,
+    )
+def measure(
+    engine: _ImageEngine,
+    model: str,
+    tasks: list[dict[str, Any]],
+    width: int,
+    height: int,
+    steps: int,
+    cfg: float,
+    sampler: str,
+    runs: int,
+    warmup_runs: int,
+    seed: int = 42,
+) -> tuple[dict[str, Any], dict[str, Any], bool]:
+    """Run the image-gen benchmark loop; return (median, std_dev, throttling_detected)."""
+    all_stats: list[_RunStats] = []
+    for i in range(warmup_runs + runs):
+        run_results: list[_ImageResult] = []
+        for task in tasks:
+            result = engine.txt2img(
+                prompt=task["prompt"],
+                negative_prompt=task.get("negative_prompt", ""),
+                steps=steps,
+                width=width,
+                height=height,
+                cfg=cfg,
+                sampler=sampler,
+                model=model,
+                seed=seed,
+            )
+            run_results.append(result)
+        if i >= warmup_runs:
+            all_stats.append(_aggregate_run(run_results, steps))
+    ips_values = [s.images_per_second for s in all_stats]
+    sps_values = [s.steps_per_second for s in all_stats]
+    tpi_values = [s.time_per_image_s for s in all_stats]
+    median_ips = statistics.median(ips_values) if ips_values else 0.0
+    median_sps = statistics.median(sps_values) if sps_values else 0.0
+    median_tpi = statistics.median(tpi_values) if tpi_values else 0.0
+    std_ips = statistics.stdev(ips_values) if len(ips_values) > 1 else 0.0
+    std_sps = statistics.stdev(sps_values) if len(sps_values) > 1 else 0.0
+    std_tpi = statistics.stdev(tpi_values) if len(tpi_values) > 1 else 0.0
+    cv = std_ips / median_ips if median_ips > 0 else 0.0
+    throttling_detected = cv > 0.15
+    median_stats: dict[str, Any] = {
+        "images_per_second": median_ips,
+        "steps_per_second": median_sps,
+        "time_per_image_s": median_tpi,
+        "resolution": f"{width}x{height}",
+        "steps": steps,
+    }
+    std_dev_stats: dict[str, Any] = {
+        "images_per_second": std_ips,
+        "steps_per_second": std_sps,
+        "time_per_image_s": std_tpi,
+    }
+    return median_stats, std_dev_stats, throttling_detected

mate_workload_imagegen-0.1.0/src/mate_workload_imagegen/_profiles.py ADDED Viewed

@@ -0,0 +1,55 @@
+from __future__ import annotations
+from pathlib import Path
+from mate_bench.plugin import ProfileConfig, TestSetSpec
+_DATA_DIR = Path(__file__).parent / "data" / "test-sets"
+BUNDLED_TEST_SETS: dict[str, Path] = {
+    "imagegen-512-v1": _DATA_DIR / "imagegen-512-v1.json",
+    "imagegen-1024-v1": _DATA_DIR / "imagegen-1024-v1.json",
+}
+# No CDN URLs — test sets are bundled (prompts are plain text, no media).
+# sha256 values are informational only; integrity is checked against the bundled file.
+TEST_SETS: dict[str, TestSetSpec] = {
+    "imagegen-512-v1": TestSetSpec(
+        id="imagegen-512-v1",
+        url="bundled",
+        sha256="sha256:bundled",
+        size_bytes=780,
+        license="CC0-1.0",
+        source="mate-bench project (original)",
+    ),
+    "imagegen-1024-v1": TestSetSpec(
+        id="imagegen-1024-v1",
+        url="bundled",
+        sha256="sha256:bundled",
+        size_bytes=520,
+        license="CC0-1.0",
+        source="mate-bench project (original)",
+    ),
+}
+# Open-mode profiles: user specifies the model, we supply the prompt set + resolution.
+PROFILES: dict[str, ProfileConfig] = {
+    "quick-512": ProfileConfig(
+        name="quick-512",
+        description="5 prompts at 512×512, 20 steps — open mode, specify your own model",
+        test_set_id="imagegen-512-v1",
+        reference_engine_config={"engine": "comfyui"},
+        vram_required_gb=4.0,
+        download_size_bytes=0,
+        estimated_runtime_seconds=120,
+    ),
+    "standard-1024": ProfileConfig(
+        name="standard-1024",
+        description="3 prompts at 1024×1024, 20 steps — open mode, specify your own model",
+        test_set_id="imagegen-1024-v1",
+        reference_engine_config={"engine": "comfyui"},
+        vram_required_gb=8.0,
+        download_size_bytes=0,
+        estimated_runtime_seconds=300,
+    ),
+}

mate_workload_imagegen-0.1.0/src/mate_workload_imagegen/data/test-sets/imagegen-1024-v1.json ADDED Viewed

@@ -0,0 +1,13 @@
+{
+  "version": "imagegen-1024-v1",
+  "width": 1024,
+  "height": 1024,
+  "steps": 20,
+  "cfg": 7.0,
+  "sampler": "euler_ancestral",
+  "tasks": [
+    {"id": "t001", "prompt": "a detailed portrait of an astronaut in space, photorealistic, 8k, cinematic lighting", "negative_prompt": "blurry, low quality, artifacts"},
+    {"id": "t002", "prompt": "a fantasy castle on a cliff above clouds, epic fantasy landscape, golden hour", "negative_prompt": "blurry, low quality, artifacts"},
+    {"id": "t003", "prompt": "a close-up of a butterfly on a flower, macro photography, sharp detail, bokeh background", "negative_prompt": "blurry, low quality, artifacts"}
+  ]
+}

mate_workload_imagegen-0.1.0/src/mate_workload_imagegen/data/test-sets/imagegen-512-v1.json ADDED Viewed

@@ -0,0 +1,15 @@
+{
+  "version": "imagegen-512-v1",
+  "width": 512,
+  "height": 512,
+  "steps": 20,
+  "cfg": 7.0,
+  "sampler": "euler_ancestral",
+  "tasks": [
+    {"id": "t001", "prompt": "a red apple on a wooden table, photorealistic, sharp focus, studio lighting", "negative_prompt": "blurry, low quality"},
+    {"id": "t002", "prompt": "a cat sitting on a windowsill at sunset, warm light, digital art", "negative_prompt": "blurry, low quality"},
+    {"id": "t003", "prompt": "a futuristic city at night, neon lights reflecting in rain puddles, cyberpunk", "negative_prompt": "blurry, low quality"},
+    {"id": "t004", "prompt": "a bowl of ramen with steam rising, food photography, top down view", "negative_prompt": "blurry, low quality"},
+    {"id": "t005", "prompt": "a mountain landscape with snow and pine trees, oil painting style", "negative_prompt": "blurry, low quality"}
+  ]
+}

mate_workload_imagegen-0.1.0/src/mate_workload_imagegen/py.typed ADDED Viewed

File without changes

mate_workload_imagegen-0.1.0/tests/test_measure.py ADDED Viewed

@@ -0,0 +1,115 @@
+from __future__ import annotations
+from unittest.mock import MagicMock
+import pytest
+from mate_workload_imagegen._measure import measure
+TASKS = [
+    {"id": "t001", "prompt": "a red apple", "negative_prompt": "blurry"},
+    {"id": "t002", "prompt": "a blue car", "negative_prompt": "blurry"},
+]
+def _make_engine(generation_time: float) -> MagicMock:
+    result = MagicMock()
+    result.generation_time_s = generation_time
+    engine = MagicMock()
+    engine.txt2img.return_value = result
+    return engine
+class TestMeasure:
+    def test_images_per_second(self):
+        engine = _make_engine(2.0)
+        median, _, _ = measure(
+            engine,
+            "model.safetensors",
+            TASKS,
+            width=512,
+            height=512,
+            steps=20,
+            cfg=7.0,
+            sampler="euler_ancestral",
+            runs=1,
+            warmup_runs=0,
+        )
+        # 2 tasks × 2.0s each → total 4.0s, 2 images → 0.5 img/s
+        assert median["images_per_second"] == pytest.approx(0.5)
+    def test_steps_per_second(self):
+        engine = _make_engine(2.0)
+        median, _, _ = measure(
+            engine,
+            "model.safetensors",
+            TASKS,
+            width=512,
+            height=512,
+            steps=20,
+            cfg=7.0,
+            sampler="euler_ancestral",
+            runs=1,
+            warmup_runs=0,
+        )
+        # avg time_per_image = 2.0s, steps=20 → 10.0 steps/s
+        assert median["steps_per_second"] == pytest.approx(10.0)
+    def test_resolution_in_output(self):
+        engine = _make_engine(1.0)
+        median, _, _ = measure(
+            engine,
+            "m",
+            TASKS,
+            width=1024,
+            height=1024,
+            steps=20,
+            cfg=7.0,
+            sampler="euler_ancestral",
+            runs=1,
+            warmup_runs=0,
+        )
+        assert median["resolution"] == "1024x1024"
+    def test_warmup_excluded(self):
+        call_count = 0
+        def side_effect(**kwargs):
+            nonlocal call_count
+            call_count += 1
+            r = MagicMock()
+            r.generation_time_s = 1.0
+            return r
+        engine = MagicMock()
+        engine.txt2img.side_effect = side_effect
+        measure(
+            engine,
+            "m",
+            TASKS,
+            width=512,
+            height=512,
+            steps=20,
+            cfg=7.0,
+            sampler="euler_ancestral",
+            runs=2,
+            warmup_runs=1,
+        )
+        # 1 warmup + 2 runs, each with 2 tasks → 6 calls
+        assert call_count == 6
+    def test_no_throttling_stable(self):
+        engine = _make_engine(1.0)
+        _, _, throttling = measure(
+            engine,
+            "m",
+            TASKS,
+            width=512,
+            height=512,
+            steps=20,
+            cfg=7.0,
+            sampler="euler_ancestral",
+            runs=3,
+            warmup_runs=0,
+        )
+        assert throttling is False