abstractvision 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,406 @@
1
+ {
2
+ "schema_version": "1.0",
3
+ "tasks": {
4
+ "text_to_image": {
5
+ "description": "Generate an image from text (prompt + optional negative prompt)."
6
+ },
7
+ "image_to_image": {
8
+ "description": "Edit/transform an input image using a text prompt (optionally with a mask)."
9
+ },
10
+ "multi_view_image": {
11
+ "description": "Generate multiple consistent views/angles for a concept (optionally conditioned on a reference image)."
12
+ },
13
+ "text_to_video": {
14
+ "description": "Generate a video from text (prompt + optional negative prompt)."
15
+ },
16
+ "image_to_video": {
17
+ "description": "Generate a video conditioned on an image (optionally with text guidance)."
18
+ }
19
+ },
20
+ "models": {
21
+ "Qwen/Qwen-Image-2512": {
22
+ "provider": "huggingface",
23
+ "license": "apache-2.0",
24
+ "notes": "Primary text-to-image model (Diffusers: QwenImagePipeline).",
25
+ "tasks": {
26
+ "text_to_image": {
27
+ "inputs": ["text"],
28
+ "outputs": ["image"],
29
+ "params": {
30
+ "prompt": {"required": true},
31
+ "negative_prompt": {"required": false},
32
+ "width": {"required": false},
33
+ "height": {"required": false},
34
+ "steps": {"required": false},
35
+ "guidance_scale": {"required": false},
36
+ "seed": {"required": false}
37
+ }
38
+ }
39
+ }
40
+ },
41
+ "Qwen/Qwen-Image": {
42
+ "provider": "huggingface",
43
+ "license": "apache-2.0",
44
+ "notes": "Older Qwen Image release (Diffusers: QwenImagePipeline).",
45
+ "tasks": {
46
+ "text_to_image": {
47
+ "inputs": ["text"],
48
+ "outputs": ["image"],
49
+ "params": {
50
+ "prompt": {"required": true},
51
+ "negative_prompt": {"required": false},
52
+ "width": {"required": false},
53
+ "height": {"required": false},
54
+ "steps": {"required": false},
55
+ "guidance_scale": {"required": false},
56
+ "seed": {"required": false}
57
+ }
58
+ }
59
+ }
60
+ },
61
+ "black-forest-labs/FLUX.2-klein-4B": {
62
+ "provider": "huggingface",
63
+ "license": "apache-2.0",
64
+ "notes": "FLUX 2 klein (4B, open) (Diffusers: Flux2KleinPipeline). Requires Diffusers from source (diffusers@main) today; runtime stays offline-only. Supports generation + editing pipelines.",
65
+ "tasks": {
66
+ "text_to_image": {
67
+ "inputs": ["text"],
68
+ "outputs": ["image"],
69
+ "params": {
70
+ "prompt": {"required": true},
71
+ "negative_prompt": {"required": false},
72
+ "width": {"required": false},
73
+ "height": {"required": false},
74
+ "steps": {"required": false},
75
+ "guidance_scale": {"required": false},
76
+ "seed": {"required": false}
77
+ }
78
+ },
79
+ "image_to_image": {
80
+ "inputs": ["image", "text"],
81
+ "outputs": ["image"],
82
+ "params": {
83
+ "prompt": {"required": true},
84
+ "image": {"required": true},
85
+ "mask": {"required": false},
86
+ "negative_prompt": {"required": false},
87
+ "steps": {"required": false},
88
+ "guidance_scale": {"required": false},
89
+ "seed": {"required": false}
90
+ }
91
+ }
92
+ }
93
+ },
94
+ "black-forest-labs/FLUX.2-dev": {
95
+ "provider": "huggingface",
96
+ "license": "flux-dev-non-commercial-license",
97
+ "notes": "FLUX 2 dev (gated on HF; non-commercial) (Diffusers: Flux2Pipeline). Supports generation + editing pipelines.",
98
+ "tasks": {
99
+ "text_to_image": {
100
+ "inputs": ["text"],
101
+ "outputs": ["image"],
102
+ "params": {
103
+ "prompt": {"required": true},
104
+ "negative_prompt": {"required": false},
105
+ "width": {"required": false},
106
+ "height": {"required": false},
107
+ "steps": {"required": false},
108
+ "guidance_scale": {"required": false},
109
+ "seed": {"required": false}
110
+ }
111
+ },
112
+ "image_to_image": {
113
+ "inputs": ["image", "text"],
114
+ "outputs": ["image"],
115
+ "params": {
116
+ "prompt": {"required": true},
117
+ "image": {"required": true},
118
+ "mask": {"required": false},
119
+ "negative_prompt": {"required": false},
120
+ "steps": {"required": false},
121
+ "guidance_scale": {"required": false},
122
+ "seed": {"required": false}
123
+ }
124
+ }
125
+ }
126
+ },
127
+ "runwayml/stable-diffusion-v1-5": {
128
+ "provider": "huggingface",
129
+ "license": "creativeml-openrail-m",
130
+ "notes": "Stable Diffusion 1.5 (Diffusers: StableDiffusionPipeline / StableDiffusionImg2ImgPipeline / StableDiffusionInpaintPipeline).",
131
+ "tasks": {
132
+ "text_to_image": {
133
+ "inputs": ["text"],
134
+ "outputs": ["image"],
135
+ "params": {
136
+ "prompt": {"required": true},
137
+ "negative_prompt": {"required": false},
138
+ "width": {"required": false},
139
+ "height": {"required": false},
140
+ "steps": {"required": false},
141
+ "guidance_scale": {"required": false},
142
+ "seed": {"required": false}
143
+ }
144
+ },
145
+ "image_to_image": {
146
+ "inputs": ["image", "text"],
147
+ "outputs": ["image"],
148
+ "params": {
149
+ "prompt": {"required": true},
150
+ "image": {"required": true},
151
+ "mask": {"required": false},
152
+ "negative_prompt": {"required": false},
153
+ "steps": {"required": false},
154
+ "guidance_scale": {"required": false},
155
+ "seed": {"required": false}
156
+ }
157
+ }
158
+ }
159
+ },
160
+ "stabilityai/stable-diffusion-3.5-large-turbo": {
161
+ "provider": "huggingface",
162
+ "license": "stabilityai-ai-community",
163
+ "notes": "Stable Diffusion 3.5 Large Turbo (gated on HF; Stability AI Community License) (Diffusers: StableDiffusion3Pipeline). Optimized for low step counts.",
164
+ "tasks": {
165
+ "text_to_image": {
166
+ "inputs": ["text"],
167
+ "outputs": ["image"],
168
+ "params": {
169
+ "prompt": {"required": true},
170
+ "negative_prompt": {"required": false},
171
+ "width": {"required": false},
172
+ "height": {"required": false},
173
+ "steps": {"required": false},
174
+ "guidance_scale": {"required": false},
175
+ "seed": {"required": false}
176
+ }
177
+ }
178
+ }
179
+ },
180
+ "stabilityai/stable-diffusion-3.5-large": {
181
+ "provider": "huggingface",
182
+ "license": "stabilityai-ai-community",
183
+ "notes": "Stable Diffusion 3.5 Large (gated on HF; Stability AI Community License) (Diffusers: StableDiffusion3Pipeline).",
184
+ "tasks": {
185
+ "text_to_image": {
186
+ "inputs": ["text"],
187
+ "outputs": ["image"],
188
+ "params": {
189
+ "prompt": {"required": true},
190
+ "negative_prompt": {"required": false},
191
+ "width": {"required": false},
192
+ "height": {"required": false},
193
+ "steps": {"required": false},
194
+ "guidance_scale": {"required": false},
195
+ "seed": {"required": false}
196
+ }
197
+ }
198
+ }
199
+ },
200
+ "stabilityai/stable-diffusion-3.5-medium": {
201
+ "provider": "huggingface",
202
+ "license": "stabilityai-ai-community",
203
+ "notes": "Stable Diffusion 3.5 Medium (gated on HF; Stability AI Community License) (Diffusers: StableDiffusion3Pipeline).",
204
+ "tasks": {
205
+ "text_to_image": {
206
+ "inputs": ["text"],
207
+ "outputs": ["image"],
208
+ "params": {
209
+ "prompt": {"required": true},
210
+ "negative_prompt": {"required": false},
211
+ "width": {"required": false},
212
+ "height": {"required": false},
213
+ "steps": {"required": false},
214
+ "guidance_scale": {"required": false},
215
+ "seed": {"required": false}
216
+ }
217
+ }
218
+ }
219
+ },
220
+ "Tongyi-MAI/Z-Image-Turbo": {
221
+ "provider": "huggingface",
222
+ "license": "unknown",
223
+ "notes": "Turbo text-to-image option; backend may clamp/ignore some params.",
224
+ "tasks": {
225
+ "text_to_image": {
226
+ "inputs": ["text"],
227
+ "outputs": ["image"],
228
+ "params": {
229
+ "prompt": {"required": true},
230
+ "negative_prompt": {"required": false},
231
+ "width": {"required": false},
232
+ "height": {"required": false},
233
+ "steps": {"required": false},
234
+ "guidance_scale": {"required": false},
235
+ "seed": {"required": false}
236
+ }
237
+ }
238
+ }
239
+ },
240
+ "zai-org/GLM-Image": {
241
+ "provider": "huggingface",
242
+ "license": "unknown",
243
+ "notes": "General image model family (generation + edit in some pipelines). Requires Diffusers from source (diffusers@main) today; runtime stays offline-only.",
244
+ "tasks": {
245
+ "text_to_image": {
246
+ "inputs": ["text"],
247
+ "outputs": ["image"],
248
+ "params": {
249
+ "prompt": {"required": true},
250
+ "negative_prompt": {"required": false},
251
+ "width": {"required": false},
252
+ "height": {"required": false},
253
+ "steps": {"required": false},
254
+ "guidance_scale": {"required": false},
255
+ "seed": {"required": false}
256
+ }
257
+ },
258
+ "image_to_image": {
259
+ "inputs": ["image", "text"],
260
+ "outputs": ["image"],
261
+ "params": {
262
+ "prompt": {"required": true},
263
+ "image": {"required": true},
264
+ "mask": {"required": false},
265
+ "negative_prompt": {"required": false},
266
+ "steps": {"required": false},
267
+ "guidance_scale": {"required": false},
268
+ "seed": {"required": false}
269
+ }
270
+ }
271
+ }
272
+ },
273
+ "Qwen/Qwen-Image-Edit-2511": {
274
+ "provider": "huggingface",
275
+ "license": "unknown",
276
+ "notes": "Primary image edit model (image-to-image).",
277
+ "tasks": {
278
+ "image_to_image": {
279
+ "inputs": ["image", "text"],
280
+ "outputs": ["image"],
281
+ "params": {
282
+ "prompt": {"required": true},
283
+ "image": {"required": true},
284
+ "mask": {"required": false},
285
+ "negative_prompt": {"required": false},
286
+ "steps": {"required": false},
287
+ "guidance_scale": {"required": false},
288
+ "seed": {"required": false}
289
+ }
290
+ }
291
+ }
292
+ },
293
+ "fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA": {
294
+ "provider": "huggingface",
295
+ "license": "apache-2.0",
296
+ "notes": "LoRA adapter used with a compatible base (typically Qwen-Image-Edit-2511) to generate multiple angles.",
297
+ "tasks": {
298
+ "multi_view_image": {
299
+ "inputs": ["text", "image_optional"],
300
+ "outputs": ["image[]"],
301
+ "requires": {
302
+ "base_model_id": "Qwen/Qwen-Image-Edit-2511"
303
+ },
304
+ "params": {
305
+ "prompt": {"required": true},
306
+ "reference_image": {"required": false},
307
+ "angles": {"required": false},
308
+ "negative_prompt": {"required": false},
309
+ "steps": {"required": false},
310
+ "guidance_scale": {"required": false},
311
+ "seed": {"required": false}
312
+ }
313
+ }
314
+ }
315
+ },
316
+ "Wan-AI/Wan2.2-T2V-A14B": {
317
+ "provider": "huggingface",
318
+ "license": "unknown",
319
+ "notes": "Text-to-video foundation model.",
320
+ "tasks": {
321
+ "text_to_video": {
322
+ "inputs": ["text"],
323
+ "outputs": ["video"],
324
+ "params": {
325
+ "prompt": {"required": true},
326
+ "negative_prompt": {"required": false},
327
+ "width": {"required": false},
328
+ "height": {"required": false},
329
+ "fps": {"required": false},
330
+ "num_frames": {"required": false},
331
+ "steps": {"required": false},
332
+ "guidance_scale": {"required": false},
333
+ "seed": {"required": false}
334
+ }
335
+ }
336
+ }
337
+ },
338
+ "tencent/HunyuanVideo-1.5": {
339
+ "provider": "huggingface",
340
+ "license": "unknown",
341
+ "notes": "Text-to-video model.",
342
+ "tasks": {
343
+ "text_to_video": {
344
+ "inputs": ["text"],
345
+ "outputs": ["video"],
346
+ "params": {
347
+ "prompt": {"required": true},
348
+ "negative_prompt": {"required": false},
349
+ "width": {"required": false},
350
+ "height": {"required": false},
351
+ "fps": {"required": false},
352
+ "num_frames": {"required": false},
353
+ "steps": {"required": false},
354
+ "guidance_scale": {"required": false},
355
+ "seed": {"required": false}
356
+ }
357
+ }
358
+ }
359
+ },
360
+ "genmo/mochi-1-preview": {
361
+ "provider": "huggingface",
362
+ "license": "apache-2.0",
363
+ "notes": "State-of-the-art open text-to-video model (Diffusers).",
364
+ "tasks": {
365
+ "text_to_video": {
366
+ "inputs": ["text"],
367
+ "outputs": ["video"],
368
+ "params": {
369
+ "prompt": {"required": true},
370
+ "negative_prompt": {"required": false},
371
+ "width": {"required": false},
372
+ "height": {"required": false},
373
+ "fps": {"required": false},
374
+ "num_frames": {"required": false},
375
+ "steps": {"required": false},
376
+ "guidance_scale": {"required": false},
377
+ "seed": {"required": false}
378
+ }
379
+ }
380
+ }
381
+ },
382
+ "Lightricks/LTX-2": {
383
+ "provider": "huggingface",
384
+ "license": "ltx-2-community-license-agreement",
385
+ "notes": "Image-to-video model family; parameters have divisibility constraints (implementation detail).",
386
+ "tasks": {
387
+ "image_to_video": {
388
+ "inputs": ["image", "text_optional"],
389
+ "outputs": ["video"],
390
+ "params": {
391
+ "image": {"required": true},
392
+ "prompt": {"required": false},
393
+ "negative_prompt": {"required": false},
394
+ "width": {"required": false},
395
+ "height": {"required": false},
396
+ "fps": {"required": false},
397
+ "num_frames": {"required": false},
398
+ "steps": {"required": false},
399
+ "guidance_scale": {"required": false},
400
+ "seed": {"required": false}
401
+ }
402
+ }
403
+ }
404
+ }
405
+ }
406
+ }
@@ -0,0 +1,43 @@
1
+ """Backend exports.
2
+
3
+ Important: this package must stay import-light.
4
+
5
+ Some backends are intentionally heavy (Torch/Diffusers). Import them lazily so
6
+ `import abstractvision` (and AbstractCore plugin discovery) does not pull GPU
7
+ stacks unless the caller explicitly selects a local backend.
8
+ """
9
+
10
+ from .base_backend import VisionBackend
11
+
12
+ __all__ = [
13
+ "VisionBackend",
14
+ "OpenAICompatibleBackendConfig",
15
+ "OpenAICompatibleVisionBackend",
16
+ "HuggingFaceDiffusersBackendConfig",
17
+ "HuggingFaceDiffusersVisionBackend",
18
+ "StableDiffusionCppBackendConfig",
19
+ "StableDiffusionCppVisionBackend",
20
+ ]
21
+
22
+
23
+ def __getattr__(name: str):
24
+ if name in {"OpenAICompatibleBackendConfig", "OpenAICompatibleVisionBackend"}:
25
+ from .openai_compatible import OpenAICompatibleBackendConfig, OpenAICompatibleVisionBackend
26
+
27
+ return OpenAICompatibleBackendConfig if name == "OpenAICompatibleBackendConfig" else OpenAICompatibleVisionBackend
28
+
29
+ if name in {"StableDiffusionCppBackendConfig", "StableDiffusionCppVisionBackend"}:
30
+ from .stable_diffusion_cpp import StableDiffusionCppBackendConfig, StableDiffusionCppVisionBackend
31
+
32
+ return StableDiffusionCppBackendConfig if name == "StableDiffusionCppBackendConfig" else StableDiffusionCppVisionBackend
33
+
34
+ if name in {"HuggingFaceDiffusersBackendConfig", "HuggingFaceDiffusersVisionBackend"}:
35
+ from .huggingface_diffusers import HuggingFaceDiffusersBackendConfig, HuggingFaceDiffusersVisionBackend
36
+
37
+ return (
38
+ HuggingFaceDiffusersBackendConfig
39
+ if name == "HuggingFaceDiffusersBackendConfig"
40
+ else HuggingFaceDiffusersVisionBackend
41
+ )
42
+
43
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Callable, Optional
5
+
6
+ from ..types import (
7
+ GeneratedAsset,
8
+ ImageEditRequest,
9
+ ImageGenerationRequest,
10
+ ImageToVideoRequest,
11
+ MultiAngleRequest,
12
+ VideoGenerationRequest,
13
+ VisionBackendCapabilities,
14
+ )
15
+
16
+
17
+ class VisionBackend(ABC):
18
+ """Backend interface for generative vision tasks."""
19
+
20
+ def generate_image_with_progress(
21
+ self,
22
+ request: ImageGenerationRequest,
23
+ progress_callback: Optional[Callable[[int, Optional[int]], None]] = None,
24
+ ) -> GeneratedAsset:
25
+ """Generate an image, optionally reporting progress (best-effort)."""
26
+ _ = progress_callback
27
+ return self.generate_image(request)
28
+
29
+ def edit_image_with_progress(
30
+ self,
31
+ request: ImageEditRequest,
32
+ progress_callback: Optional[Callable[[int, Optional[int]], None]] = None,
33
+ ) -> GeneratedAsset:
34
+ """Edit an image, optionally reporting progress (best-effort)."""
35
+ _ = progress_callback
36
+ return self.edit_image(request)
37
+
38
+ def get_capabilities(self) -> Optional[VisionBackendCapabilities]:
39
+ """Return backend-level capability constraints (optional)."""
40
+ return None
41
+
42
+ def preload(self) -> None:
43
+ """Best-effort: load model weights into memory for faster first inference."""
44
+ return None
45
+
46
+ def unload(self) -> None:
47
+ """Best-effort: release model weights from memory."""
48
+ return None
49
+
50
+ @abstractmethod
51
+ def generate_image(self, request: ImageGenerationRequest) -> GeneratedAsset: ...
52
+
53
+ @abstractmethod
54
+ def edit_image(self, request: ImageEditRequest) -> GeneratedAsset: ...
55
+
56
+ @abstractmethod
57
+ def generate_angles(self, request: MultiAngleRequest) -> list[GeneratedAsset]: ...
58
+
59
+ @abstractmethod
60
+ def generate_video(self, request: VideoGenerationRequest) -> GeneratedAsset: ...
61
+
62
+ @abstractmethod
63
+ def image_to_video(self, request: ImageToVideoRequest) -> GeneratedAsset: ...