abstractvision 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
abstractvision/cli.py ADDED
@@ -0,0 +1,778 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import shlex
7
+ import subprocess
8
+ import sys
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Sequence, Tuple
12
+
13
+ from .artifacts import LocalAssetStore, is_artifact_ref
14
+ from .backends import (
15
+ HuggingFaceDiffusersBackendConfig,
16
+ HuggingFaceDiffusersVisionBackend,
17
+ OpenAICompatibleBackendConfig,
18
+ OpenAICompatibleVisionBackend,
19
+ StableDiffusionCppBackendConfig,
20
+ StableDiffusionCppVisionBackend,
21
+ )
22
+ from .model_capabilities import VisionModelCapabilitiesRegistry
23
+ from .vision_manager import VisionManager
24
+
25
+
26
+ def _env(key: str, default: Optional[str] = None) -> Optional[str]:
27
+ v = os.environ.get(key)
28
+ if v is None:
29
+ return default
30
+ s = str(v).strip()
31
+ return s if s else default
32
+
33
+
34
+ def _env_bool(key: str, default: bool = False) -> bool:
35
+ v = _env(key)
36
+ if v is None:
37
+ return bool(default)
38
+ return str(v).strip().lower() in {"1", "true", "yes", "on"}
39
+
40
+
41
+ def _print_json(obj: Any) -> None:
42
+ print(json.dumps(obj, indent=2, sort_keys=True))
43
+
44
+
45
+ def _open_file(path: Path) -> None:
46
+ p = Path(path).expanduser().resolve()
47
+ if not p.exists():
48
+ raise FileNotFoundError(str(p))
49
+ if sys.platform == "darwin":
50
+ subprocess.run(["open", str(p)], check=False)
51
+ return
52
+ if sys.platform.startswith("win"):
53
+ # Best-effort. `start` is a shell built-in.
54
+ subprocess.run(["cmd", "/c", "start", "", str(p)], check=False)
55
+ return
56
+ subprocess.run(["xdg-open", str(p)], check=False)
57
+
58
+
59
+ def _build_openai_backend_from_args(args: argparse.Namespace) -> OpenAICompatibleVisionBackend:
60
+ base_url = str(args.base_url or "").strip()
61
+ if not base_url:
62
+ raise SystemExit("Missing --base-url (or $ABSTRACTVISION_BASE_URL).")
63
+ cfg = OpenAICompatibleBackendConfig(
64
+ base_url=base_url,
65
+ api_key=str(args.api_key) if args.api_key else None,
66
+ model_id=str(args.model_id) if args.model_id else None,
67
+ timeout_s=float(args.timeout_s),
68
+ image_generations_path=str(args.images_generations_path),
69
+ image_edits_path=str(args.images_edits_path),
70
+ text_to_video_path=str(args.text_to_video_path) if args.text_to_video_path else None,
71
+ image_to_video_path=str(args.image_to_video_path) if args.image_to_video_path else None,
72
+ image_to_video_mode=str(args.image_to_video_mode),
73
+ )
74
+ return OpenAICompatibleVisionBackend(config=cfg)
75
+
76
+
77
+ def _build_manager_from_args(args: argparse.Namespace) -> VisionManager:
78
+ store = LocalAssetStore(args.store_dir) if args.store_dir else LocalAssetStore()
79
+ backend = _build_openai_backend_from_args(args)
80
+ reg = VisionModelCapabilitiesRegistry()
81
+
82
+ cap_model_id = str(args.capabilities_model_id) if getattr(args, "capabilities_model_id", None) else None
83
+ if cap_model_id and cap_model_id not in set(reg.list_models()):
84
+ raise SystemExit(
85
+ f"--capabilities-model-id '{cap_model_id}' is not present in the registry. "
86
+ "Use `abstractvision models` to list known ids, or omit this flag to disable gating."
87
+ )
88
+
89
+ return VisionManager(backend=backend, store=store, model_id=cap_model_id, registry=reg if cap_model_id else None)
90
+
91
+
92
+ def _cmd_models(_: argparse.Namespace) -> int:
93
+ reg = VisionModelCapabilitiesRegistry()
94
+ for mid in reg.list_models():
95
+ print(mid)
96
+ return 0
97
+
98
+
99
+ def _cmd_tasks(_: argparse.Namespace) -> int:
100
+ reg = VisionModelCapabilitiesRegistry()
101
+ for t in reg.list_tasks():
102
+ desc = reg.get_task(t).get("description")
103
+ if isinstance(desc, str) and desc.strip():
104
+ print(f"{t}: {desc.strip()}")
105
+ else:
106
+ print(t)
107
+ return 0
108
+
109
+
110
+ def _cmd_show_model(args: argparse.Namespace) -> int:
111
+ reg = VisionModelCapabilitiesRegistry()
112
+ spec = reg.get(str(args.model_id))
113
+ print(spec.model_id)
114
+ print(f"provider: {spec.provider}")
115
+ print(f"license: {spec.license}")
116
+ if spec.notes:
117
+ print(f"notes: {spec.notes}")
118
+ print("tasks:")
119
+ for task_name, ts in sorted(spec.tasks.items()):
120
+ print(f" - {task_name}")
121
+ if ts.requires:
122
+ print(f" requires: {json.dumps(ts.requires, sort_keys=True)}")
123
+ if ts.params:
124
+ required = sorted([k for k, v in ts.params.items() if isinstance(v, dict) and v.get("required") is True])
125
+ optional = sorted([k for k, v in ts.params.items() if isinstance(v, dict) and v.get("required") is False])
126
+ if required:
127
+ print(f" required params: {', '.join(required)}")
128
+ if optional:
129
+ print(f" optional params: {', '.join(optional)}")
130
+ return 0
131
+
132
+
133
+ def _cmd_t2i(args: argparse.Namespace) -> int:
134
+ vm = _build_manager_from_args(args)
135
+ out = vm.generate_image(
136
+ args.prompt,
137
+ negative_prompt=args.negative_prompt,
138
+ width=args.width,
139
+ height=args.height,
140
+ steps=args.steps,
141
+ guidance_scale=args.guidance_scale,
142
+ seed=args.seed,
143
+ )
144
+ _print_json(out)
145
+ if isinstance(vm.store, LocalAssetStore) and isinstance(out, dict) and is_artifact_ref(out):
146
+ p = vm.store.get_content_path(out["$artifact"])
147
+ if p is not None:
148
+ print(str(p))
149
+ if args.open:
150
+ _open_file(p)
151
+ return 0
152
+
153
+
154
+ def _cmd_i2i(args: argparse.Namespace) -> int:
155
+ vm = _build_manager_from_args(args)
156
+ image_bytes = Path(args.image).expanduser().read_bytes()
157
+ mask_bytes = Path(args.mask).expanduser().read_bytes() if args.mask else None
158
+ out = vm.edit_image(
159
+ args.prompt,
160
+ image=image_bytes,
161
+ mask=mask_bytes,
162
+ negative_prompt=args.negative_prompt,
163
+ steps=args.steps,
164
+ guidance_scale=args.guidance_scale,
165
+ seed=args.seed,
166
+ )
167
+ _print_json(out)
168
+ if isinstance(vm.store, LocalAssetStore) and isinstance(out, dict) and is_artifact_ref(out):
169
+ p = vm.store.get_content_path(out["$artifact"])
170
+ if p is not None:
171
+ print(str(p))
172
+ if args.open:
173
+ _open_file(p)
174
+ return 0
175
+
176
+
177
+ @dataclass
178
+ class _ReplState:
179
+ backend_kind: str = _env("ABSTRACTVISION_BACKEND", "openai") or "openai"
180
+ base_url: Optional[str] = _env("ABSTRACTVISION_BASE_URL")
181
+ api_key: Optional[str] = _env("ABSTRACTVISION_API_KEY")
182
+ model_id: Optional[str] = _env("ABSTRACTVISION_MODEL_ID")
183
+ capabilities_model_id: Optional[str] = _env("ABSTRACTVISION_CAPABILITIES_MODEL_ID")
184
+ store_dir: Optional[str] = _env("ABSTRACTVISION_STORE_DIR")
185
+ timeout_s: float = float(_env("ABSTRACTVISION_TIMEOUT_S", "300") or "300")
186
+
187
+ images_generations_path: str = _env("ABSTRACTVISION_IMAGES_GENERATIONS_PATH", "/images/generations") or "/images/generations"
188
+ images_edits_path: str = _env("ABSTRACTVISION_IMAGES_EDITS_PATH", "/images/edits") or "/images/edits"
189
+ text_to_video_path: Optional[str] = _env("ABSTRACTVISION_TEXT_TO_VIDEO_PATH")
190
+ image_to_video_path: Optional[str] = _env("ABSTRACTVISION_IMAGE_TO_VIDEO_PATH")
191
+ image_to_video_mode: str = _env("ABSTRACTVISION_IMAGE_TO_VIDEO_MODE", "multipart") or "multipart"
192
+
193
+ diffusers_device: str = _env("ABSTRACTVISION_DIFFUSERS_DEVICE", "auto") or "auto"
194
+ diffusers_torch_dtype: Optional[str] = _env("ABSTRACTVISION_DIFFUSERS_TORCH_DTYPE")
195
+ diffusers_allow_download: bool = _env_bool("ABSTRACTVISION_DIFFUSERS_ALLOW_DOWNLOAD", True)
196
+ diffusers_auto_retry_fp32: bool = _env_bool("ABSTRACTVISION_DIFFUSERS_AUTO_RETRY_FP32", True)
197
+
198
+ sdcpp_bin: str = _env("ABSTRACTVISION_SDCPP_BIN", "sd-cli") or "sd-cli"
199
+ sdcpp_model: Optional[str] = _env("ABSTRACTVISION_SDCPP_MODEL")
200
+ sdcpp_diffusion_model: Optional[str] = _env("ABSTRACTVISION_SDCPP_DIFFUSION_MODEL")
201
+ sdcpp_vae: Optional[str] = _env("ABSTRACTVISION_SDCPP_VAE")
202
+ sdcpp_llm: Optional[str] = _env("ABSTRACTVISION_SDCPP_LLM")
203
+ sdcpp_llm_vision: Optional[str] = _env("ABSTRACTVISION_SDCPP_LLM_VISION")
204
+ sdcpp_extra_args: Optional[str] = _env("ABSTRACTVISION_SDCPP_EXTRA_ARGS")
205
+
206
+ defaults: Dict[str, Any] = None
207
+ _cached_backend_key: Optional[Tuple[Any, ...]] = None
208
+ _cached_backend: Any = None
209
+ _cached_store_dir: Optional[str] = None
210
+ _cached_store: Optional[LocalAssetStore] = None
211
+
212
+ def __post_init__(self) -> None:
213
+ if self.defaults is None:
214
+ self.defaults = {
215
+ "t2i": {"width": 512, "height": 512, "steps": 10, "guidance_scale": None, "seed": None, "negative_prompt": None},
216
+ "i2i": {"steps": 10, "guidance_scale": None, "seed": None, "negative_prompt": None},
217
+ }
218
+
219
+
220
+ def _repl_help() -> str:
221
+ return (
222
+ "Commands:\n"
223
+ " /help Show this help\n"
224
+ " /exit Quit (aliases: /quit, /q)\n"
225
+ " /models List known capability model ids\n"
226
+ " /tasks List known task keys\n"
227
+ " /show-model <id> Show a model's tasks + params\n"
228
+ " /config Show current backend/store config\n"
229
+ " /backend openai <base_url> [api_key] [model_id]\n"
230
+ " /backend diffusers <model_id_or_path> [device] [torch_dtype]\n"
231
+ " (downloads enabled by default; set ABSTRACTVISION_DIFFUSERS_ALLOW_DOWNLOAD=0 for cache-only)\n"
232
+ " /backend sdcpp <diffusion_model.gguf> <vae.safetensors> <llm.gguf> [sd_cli_path]\n"
233
+ " (Qwen Image: requires diffusion-model + vae + llm)\n"
234
+ " /cap-model <id|off> Set capability-gating model id (from registry) or 'off'\n"
235
+ " /store <dir|default> Set local store dir\n"
236
+ " /set <k> <v> Set default param (k like width/height/steps/seed/guidance_scale/negative_prompt)\n"
237
+ " /unset <k> Unset default param\n"
238
+ " /defaults Show current defaults\n"
239
+ " /t2i <prompt...> [--width N --height N --steps N --seed N --guidance-scale F --negative ...] [--open]\n"
240
+ " (extra flags are forwarded via request.extra)\n"
241
+ " /i2i --image path <prompt...> [--mask path --steps N --seed N --guidance-scale F --negative ...] [--open]\n"
242
+ " (extra flags are forwarded via request.extra)\n"
243
+ " /open <artifact_id> Open a locally stored artifact (LocalAssetStore only)\n"
244
+ "\n"
245
+ "Tip: typing plain text runs /t2i with that prompt.\n"
246
+ )
247
+
248
+
249
+ def _parse_flags_and_rest(tokens: List[str]) -> Tuple[Dict[str, Any], List[str]]:
250
+ flags: Dict[str, Any] = {}
251
+ rest: List[str] = []
252
+ i = 0
253
+ while i < len(tokens):
254
+ t = tokens[i]
255
+ if not t.startswith("--"):
256
+ rest.append(t)
257
+ i += 1
258
+ continue
259
+ key = t[2:].replace("-", "_")
260
+ if i + 1 >= len(tokens):
261
+ flags[key] = True
262
+ i += 1
263
+ continue
264
+ val = tokens[i + 1]
265
+ if val.startswith("--"):
266
+ flags[key] = True
267
+ i += 1
268
+ continue
269
+ flags[key] = val
270
+ i += 2
271
+ return flags, rest
272
+
273
+
274
+ def _parse_flag_args(tokens: List[str]) -> Dict[str, Any]:
275
+ flags, _ = _parse_flags_and_rest(tokens)
276
+ return flags
277
+
278
+
279
+ def _coerce_int(v: Any) -> Optional[int]:
280
+ if v is None:
281
+ return None
282
+ if isinstance(v, int):
283
+ return v
284
+ s = str(v).strip()
285
+ if not s:
286
+ return None
287
+ try:
288
+ return int(s)
289
+ except Exception:
290
+ return None
291
+
292
+
293
+ def _coerce_float(v: Any) -> Optional[float]:
294
+ if v is None:
295
+ return None
296
+ if isinstance(v, float):
297
+ return v
298
+ s = str(v).strip()
299
+ if not s:
300
+ return None
301
+ try:
302
+ return float(s)
303
+ except Exception:
304
+ return None
305
+
306
+
307
+ def _coerce_scalar(v: Any) -> Any:
308
+ if v is None:
309
+ return None
310
+ if isinstance(v, (bool, int, float)):
311
+ return v
312
+ s = str(v).strip()
313
+ if not s:
314
+ return None
315
+ low = s.lower()
316
+ if low in {"1", "true", "yes", "on"}:
317
+ return True
318
+ if low in {"0", "false", "no", "off"}:
319
+ return False
320
+ try:
321
+ return int(s)
322
+ except Exception:
323
+ pass
324
+ try:
325
+ return float(s)
326
+ except Exception:
327
+ return s
328
+
329
+
330
+ def _build_manager_from_state(state: _ReplState) -> VisionManager:
331
+ if state._cached_store is not None and state._cached_store_dir == state.store_dir:
332
+ store = state._cached_store
333
+ else:
334
+ store = LocalAssetStore(state.store_dir) if state.store_dir else LocalAssetStore()
335
+ state._cached_store = store
336
+ state._cached_store_dir = state.store_dir
337
+
338
+ backend_kind = str(state.backend_kind or "").strip().lower() or "openai"
339
+ backend_key: Tuple[Any, ...]
340
+ if backend_kind == "openai":
341
+ base_url = str(state.base_url or "").strip()
342
+ if not base_url:
343
+ raise ValueError("Backend is not configured. Use: /backend openai <base_url> [api_key] [model_id]")
344
+ backend_key = (
345
+ "openai",
346
+ base_url,
347
+ state.api_key,
348
+ state.model_id,
349
+ state.timeout_s,
350
+ state.images_generations_path,
351
+ state.images_edits_path,
352
+ state.text_to_video_path,
353
+ state.image_to_video_path,
354
+ state.image_to_video_mode,
355
+ )
356
+ if state._cached_backend is not None and state._cached_backend_key == backend_key:
357
+ backend = state._cached_backend
358
+ else:
359
+ cfg = OpenAICompatibleBackendConfig(
360
+ base_url=base_url,
361
+ api_key=str(state.api_key) if state.api_key else None,
362
+ model_id=str(state.model_id) if state.model_id else None,
363
+ timeout_s=float(state.timeout_s),
364
+ image_generations_path=str(state.images_generations_path),
365
+ image_edits_path=str(state.images_edits_path),
366
+ text_to_video_path=str(state.text_to_video_path) if state.text_to_video_path else None,
367
+ image_to_video_path=str(state.image_to_video_path) if state.image_to_video_path else None,
368
+ image_to_video_mode=str(state.image_to_video_mode),
369
+ )
370
+ backend = OpenAICompatibleVisionBackend(config=cfg)
371
+ state._cached_backend = backend
372
+ state._cached_backend_key = backend_key
373
+ elif backend_kind == "diffusers":
374
+ model_id = str(state.model_id or "").strip()
375
+ if not model_id:
376
+ raise ValueError("Diffusers backend is not configured. Use: /backend diffusers <model_id_or_path> [device]")
377
+ backend_key = (
378
+ "diffusers",
379
+ model_id,
380
+ str(state.diffusers_device),
381
+ str(state.diffusers_torch_dtype) if state.diffusers_torch_dtype else None,
382
+ bool(state.diffusers_allow_download),
383
+ bool(state.diffusers_auto_retry_fp32),
384
+ )
385
+ if state._cached_backend is not None and state._cached_backend_key == backend_key:
386
+ backend = state._cached_backend
387
+ else:
388
+ cfg = HuggingFaceDiffusersBackendConfig(
389
+ model_id=model_id,
390
+ device=str(state.diffusers_device),
391
+ torch_dtype=str(state.diffusers_torch_dtype) if state.diffusers_torch_dtype else None,
392
+ allow_download=bool(state.diffusers_allow_download),
393
+ auto_retry_fp32=bool(state.diffusers_auto_retry_fp32),
394
+ )
395
+ backend = HuggingFaceDiffusersVisionBackend(config=cfg)
396
+ state._cached_backend = backend
397
+ state._cached_backend_key = backend_key
398
+ elif backend_kind in {"sdcpp", "stable-diffusion.cpp", "stable_diffusion_cpp", "stable-diffusion-cpp"}:
399
+ backend_key = (
400
+ "sdcpp",
401
+ str(state.sdcpp_bin),
402
+ str(state.sdcpp_model) if state.sdcpp_model else None,
403
+ str(state.sdcpp_diffusion_model) if state.sdcpp_diffusion_model else None,
404
+ str(state.sdcpp_vae) if state.sdcpp_vae else None,
405
+ str(state.sdcpp_llm) if state.sdcpp_llm else None,
406
+ str(state.sdcpp_llm_vision) if state.sdcpp_llm_vision else None,
407
+ str(state.sdcpp_extra_args) if state.sdcpp_extra_args else None,
408
+ )
409
+ if state._cached_backend is not None and state._cached_backend_key == backend_key:
410
+ backend = state._cached_backend
411
+ else:
412
+ cfg = StableDiffusionCppBackendConfig(
413
+ sd_cli_path=str(state.sdcpp_bin),
414
+ model=str(state.sdcpp_model) if state.sdcpp_model else None,
415
+ diffusion_model=str(state.sdcpp_diffusion_model) if state.sdcpp_diffusion_model else None,
416
+ vae=str(state.sdcpp_vae) if state.sdcpp_vae else None,
417
+ llm=str(state.sdcpp_llm) if state.sdcpp_llm else None,
418
+ llm_vision=str(state.sdcpp_llm_vision) if state.sdcpp_llm_vision else None,
419
+ extra_args=shlex.split(str(state.sdcpp_extra_args)) if state.sdcpp_extra_args else (),
420
+ )
421
+ backend = StableDiffusionCppVisionBackend(config=cfg)
422
+ state._cached_backend = backend
423
+ state._cached_backend_key = backend_key
424
+ else:
425
+ raise ValueError(f"Unknown backend kind: {backend_kind!r} (expected 'openai', 'diffusers', or 'sdcpp')")
426
+
427
+ reg = VisionModelCapabilitiesRegistry()
428
+ cap_id = str(state.capabilities_model_id) if state.capabilities_model_id else None
429
+ if cap_id and cap_id not in set(reg.list_models()):
430
+ raise ValueError(f"capability model id not in registry: {cap_id!r}")
431
+ return VisionManager(backend=backend, store=store, model_id=cap_id, registry=reg if cap_id else None)
432
+
433
+
434
+ def _cmd_repl(_: argparse.Namespace) -> int:
435
+ reg = VisionModelCapabilitiesRegistry()
436
+ state = _ReplState()
437
+
438
+ print("AbstractVision REPL")
439
+ print(f"- registry schema_version: {reg.schema_version()}")
440
+ print("Type /help for commands.\n")
441
+
442
+ while True:
443
+ try:
444
+ line = input("abstractvision> ").strip()
445
+ except (EOFError, KeyboardInterrupt):
446
+ print()
447
+ return 0
448
+ if not line:
449
+ continue
450
+ if not line.startswith("/"):
451
+ line = "/t2i " + line
452
+
453
+ try:
454
+ tokens = shlex.split(line)
455
+ except ValueError as e:
456
+ print(f"Parse error: {e}")
457
+ continue
458
+ if not tokens:
459
+ continue
460
+ cmd = tokens[0].lstrip("/").strip().lower()
461
+ args = tokens[1:]
462
+
463
+ try:
464
+ if cmd in {"exit", "quit", "q"}:
465
+ return 0
466
+ if cmd == "help":
467
+ print(_repl_help())
468
+ continue
469
+ if cmd == "models":
470
+ for mid in reg.list_models():
471
+ print(mid)
472
+ continue
473
+ if cmd == "tasks":
474
+ for t in reg.list_tasks():
475
+ desc = reg.get_task(t).get("description")
476
+ if isinstance(desc, str) and desc.strip():
477
+ print(f"{t}: {desc.strip()}")
478
+ else:
479
+ print(t)
480
+ continue
481
+ if cmd == "show-model":
482
+ if not args:
483
+ print("Usage: /show-model <model_id>")
484
+ continue
485
+ _cmd_show_model(argparse.Namespace(model_id=" ".join(args)))
486
+ continue
487
+ if cmd == "config":
488
+ out: Dict[str, Any] = {
489
+ "backend_kind": state.backend_kind,
490
+ "base_url": state.base_url,
491
+ "model_id": state.model_id,
492
+ "capabilities_model_id": state.capabilities_model_id,
493
+ "store_dir": state.store_dir,
494
+ "timeout_s": state.timeout_s,
495
+ "images_generations_path": state.images_generations_path,
496
+ "images_edits_path": state.images_edits_path,
497
+ "text_to_video_path": state.text_to_video_path,
498
+ "image_to_video_path": state.image_to_video_path,
499
+ "image_to_video_mode": state.image_to_video_mode,
500
+ "diffusers_device": state.diffusers_device,
501
+ "diffusers_torch_dtype": state.diffusers_torch_dtype,
502
+ "diffusers_allow_download": state.diffusers_allow_download,
503
+ "diffusers_auto_retry_fp32": state.diffusers_auto_retry_fp32,
504
+ "sdcpp_bin": state.sdcpp_bin,
505
+ "sdcpp_model": state.sdcpp_model,
506
+ "sdcpp_diffusion_model": state.sdcpp_diffusion_model,
507
+ "sdcpp_vae": state.sdcpp_vae,
508
+ "sdcpp_llm": state.sdcpp_llm,
509
+ "sdcpp_llm_vision": state.sdcpp_llm_vision,
510
+ "sdcpp_extra_args": state.sdcpp_extra_args,
511
+ "defaults": state.defaults,
512
+ }
513
+ _print_json(out)
514
+ continue
515
+ if cmd == "backend":
516
+ if not args:
517
+ print(
518
+ "Usage: /backend openai <base_url> [api_key] [model_id] OR "
519
+ "/backend diffusers <model_id_or_path> [device] [torch_dtype] OR "
520
+ "/backend sdcpp <diffusion_model.gguf> <vae.safetensors> <llm.gguf> [sd_cli_path]"
521
+ )
522
+ continue
523
+ kind = str(args[0]).strip().lower()
524
+ if kind == "openai":
525
+ if len(args) < 2:
526
+ print("Usage: /backend openai <base_url> [api_key] [model_id]")
527
+ continue
528
+ state.backend_kind = "openai"
529
+ state.base_url = args[1]
530
+ state.api_key = args[2] if len(args) >= 3 else state.api_key
531
+ state.model_id = args[3] if len(args) >= 4 else state.model_id
532
+ print("ok")
533
+ continue
534
+ if kind == "diffusers":
535
+ if len(args) < 2:
536
+ print("Usage: /backend diffusers <model_id_or_path> [device] [torch_dtype]")
537
+ continue
538
+ state.backend_kind = "diffusers"
539
+ state.model_id = args[1]
540
+ # Allow: /backend diffusers <model> [device] [torch_dtype]
541
+ # And also: /backend diffusers <model> <torch_dtype> (keeps existing device)
542
+ dtype_tokens = {"auto", "float16", "fp16", "bfloat16", "bf16", "float32", "fp32"}
543
+ if len(args) >= 3 and str(args[2]).strip().lower() in dtype_tokens:
544
+ state.diffusers_torch_dtype = str(args[2]).strip()
545
+ else:
546
+ state.diffusers_device = args[2] if len(args) >= 3 else state.diffusers_device
547
+ state.diffusers_torch_dtype = str(args[3]).strip() if len(args) >= 4 else state.diffusers_torch_dtype
548
+ print("ok")
549
+ continue
550
+ if kind == "sdcpp":
551
+ if len(args) < 4:
552
+ print("Usage: /backend sdcpp <diffusion_model.gguf> <vae.safetensors> <llm.gguf> [sd_cli_path]")
553
+ continue
554
+ state.backend_kind = "sdcpp"
555
+ state.sdcpp_diffusion_model = args[1]
556
+ state.sdcpp_vae = args[2]
557
+ state.sdcpp_llm = args[3]
558
+ state.sdcpp_bin = args[4] if len(args) >= 5 else state.sdcpp_bin
559
+ print("ok")
560
+ continue
561
+ print("Unknown backend kind. Use: openai | diffusers | sdcpp")
562
+ continue
563
+ if cmd == "cap-model":
564
+ if not args:
565
+ print("Usage: /cap-model <model_id|off>")
566
+ continue
567
+ if args[0].lower() == "off":
568
+ state.capabilities_model_id = None
569
+ print("ok (capability gating disabled)")
570
+ continue
571
+ mid = " ".join(args).strip()
572
+ if mid not in set(reg.list_models()):
573
+ print("Unknown model id (use /models).")
574
+ continue
575
+ state.capabilities_model_id = mid
576
+ print("ok")
577
+ continue
578
+ if cmd == "store":
579
+ if not args:
580
+ print("Usage: /store <dir|default>")
581
+ continue
582
+ if args[0].lower() == "default":
583
+ state.store_dir = None
584
+ else:
585
+ state.store_dir = str(Path(args[0]).expanduser())
586
+ print("ok")
587
+ continue
588
+ if cmd == "set":
589
+ if len(args) < 2:
590
+ print("Usage: /set <key> <value>")
591
+ continue
592
+ key = args[0].replace("-", "_")
593
+ value = " ".join(args[1:])
594
+ updated = False
595
+ for group in ("t2i", "i2i"):
596
+ if key in state.defaults.get(group, {}):
597
+ state.defaults[group][key] = value
598
+ updated = True
599
+ if not updated:
600
+ for group in ("t2i", "i2i"):
601
+ state.defaults.setdefault(group, {})[key] = value
602
+ print("ok")
603
+ continue
604
+ if cmd == "unset":
605
+ if not args:
606
+ print("Usage: /unset <key>")
607
+ continue
608
+ key = args[0].replace("-", "_")
609
+ for group in ("t2i", "i2i"):
610
+ if key in state.defaults.get(group, {}):
611
+ state.defaults[group][key] = None
612
+ print("ok")
613
+ continue
614
+ if cmd == "defaults":
615
+ _print_json(state.defaults)
616
+ continue
617
+ if cmd == "open":
618
+ if not args:
619
+ print("Usage: /open <artifact_id>")
620
+ continue
621
+ store = LocalAssetStore(state.store_dir) if state.store_dir else LocalAssetStore()
622
+ p = store.get_content_path(args[0])
623
+ if p is None:
624
+ print("Not found in local store.")
625
+ continue
626
+ print(str(p))
627
+ _open_file(p)
628
+ continue
629
+ if cmd == "t2i":
630
+ if not args:
631
+ print("Usage: /t2i <prompt...> [--width ...]")
632
+ continue
633
+ flags, rest = _parse_flags_and_rest(args)
634
+ prompt = " ".join(rest).strip()
635
+ if not prompt:
636
+ print("Missing prompt.")
637
+ continue
638
+
639
+ vm = _build_manager_from_state(state)
640
+ d = dict(state.defaults.get("t2i", {}))
641
+ d.update(flags)
642
+ extra = {
643
+ k: _coerce_scalar(v)
644
+ for k, v in d.items()
645
+ if k not in {"width", "height", "steps", "guidance_scale", "seed", "negative_prompt", "open"} and v is not None
646
+ }
647
+ out = vm.generate_image(
648
+ prompt,
649
+ negative_prompt=d.get("negative_prompt"),
650
+ width=_coerce_int(d.get("width")),
651
+ height=_coerce_int(d.get("height")),
652
+ steps=_coerce_int(d.get("steps")),
653
+ guidance_scale=_coerce_float(d.get("guidance_scale")),
654
+ seed=_coerce_int(d.get("seed")),
655
+ extra=extra,
656
+ )
657
+ _print_json(out)
658
+ if isinstance(vm.store, LocalAssetStore) and isinstance(out, dict) and is_artifact_ref(out):
659
+ p = vm.store.get_content_path(out["$artifact"])
660
+ if p is not None:
661
+ print(str(p))
662
+ if bool(flags.get("open")):
663
+ _open_file(p)
664
+ continue
665
+ if cmd == "i2i":
666
+ if not args:
667
+ print("Usage: /i2i --image path <prompt...> [--mask path ...]")
668
+ continue
669
+ flags, rest = _parse_flags_and_rest(args)
670
+ image_path = flags.get("image")
671
+ if not image_path:
672
+ print("Missing --image path")
673
+ continue
674
+ mask_path = flags.get("mask")
675
+ prompt = " ".join(rest).strip()
676
+ if not prompt:
677
+ print("Missing prompt.")
678
+ continue
679
+
680
+ vm = _build_manager_from_state(state)
681
+ d = dict(state.defaults.get("i2i", {}))
682
+ d.update(flags)
683
+ extra = {
684
+ k: _coerce_scalar(v)
685
+ for k, v in d.items()
686
+ if k not in {"image", "mask", "steps", "guidance_scale", "seed", "negative_prompt", "open"} and v is not None
687
+ }
688
+ img = Path(str(image_path)).expanduser().read_bytes()
689
+ mask = Path(str(mask_path)).expanduser().read_bytes() if mask_path else None
690
+ out = vm.edit_image(
691
+ prompt,
692
+ image=img,
693
+ mask=mask,
694
+ negative_prompt=d.get("negative_prompt"),
695
+ steps=_coerce_int(d.get("steps")),
696
+ guidance_scale=_coerce_float(d.get("guidance_scale")),
697
+ seed=_coerce_int(d.get("seed")),
698
+ extra=extra,
699
+ )
700
+ _print_json(out)
701
+ if isinstance(vm.store, LocalAssetStore) and isinstance(out, dict) and is_artifact_ref(out):
702
+ p = vm.store.get_content_path(out["$artifact"])
703
+ if p is not None:
704
+ print(str(p))
705
+ if bool(flags.get("open")):
706
+ _open_file(p)
707
+ continue
708
+
709
+ print("Unknown command. Type /help.")
710
+ except Exception as e:
711
+ print(f"Error: {e}")
712
+
713
+
714
+ def build_parser() -> argparse.ArgumentParser:
715
+ p = argparse.ArgumentParser(prog="abstractvision", description="AbstractVision CLI (capabilities + generation).")
716
+ sub = p.add_subparsers(dest="cmd", required=True)
717
+
718
+ sub.add_parser("models", help="List known model ids (from capability registry).").set_defaults(_fn=_cmd_models)
719
+ sub.add_parser("tasks", help="List known task keys (from capability registry).").set_defaults(_fn=_cmd_tasks)
720
+
721
+ sm = sub.add_parser("show-model", help="Show a model's supported tasks and params.")
722
+ sm.add_argument("model_id")
723
+ sm.set_defaults(_fn=_cmd_show_model)
724
+
725
+ repl = sub.add_parser("repl", help="Interactive REPL for testing capabilities and generation.")
726
+ repl.set_defaults(_fn=_cmd_repl)
727
+
728
+ def _add_backend_flags(ap: argparse.ArgumentParser) -> None:
729
+ ap.add_argument("--base-url", default=_env("ABSTRACTVISION_BASE_URL"), help="OpenAI-compatible base URL (e.g. http://localhost:1234/v1).")
730
+ ap.add_argument("--api-key", default=_env("ABSTRACTVISION_API_KEY"), help="API key (Bearer).")
731
+ ap.add_argument("--model-id", default=_env("ABSTRACTVISION_MODEL_ID"), help="Remote model id/name.")
732
+ ap.add_argument("--capabilities-model-id", default=_env("ABSTRACTVISION_CAPABILITIES_MODEL_ID"), help="Optional: enforce support using a registry model id.")
733
+ ap.add_argument("--timeout-s", type=float, default=float(_env("ABSTRACTVISION_TIMEOUT_S", "300") or "300"), help="HTTP timeout seconds (default: 300).")
734
+ ap.add_argument("--store-dir", default=_env("ABSTRACTVISION_STORE_DIR"), help="Local asset store dir (default: ~/.abstractvision/assets).")
735
+ ap.add_argument("--images-generations-path", default=_env("ABSTRACTVISION_IMAGES_GENERATIONS_PATH", "/images/generations"), help="Path for image generations.")
736
+ ap.add_argument("--images-edits-path", default=_env("ABSTRACTVISION_IMAGES_EDITS_PATH", "/images/edits"), help="Path for image edits.")
737
+ ap.add_argument("--text-to-video-path", default=_env("ABSTRACTVISION_TEXT_TO_VIDEO_PATH"), help="Optional path for text-to-video.")
738
+ ap.add_argument("--image-to-video-path", default=_env("ABSTRACTVISION_IMAGE_TO_VIDEO_PATH"), help="Optional path for image-to-video.")
739
+ ap.add_argument("--image-to-video-mode", default=_env("ABSTRACTVISION_IMAGE_TO_VIDEO_MODE", "multipart"), help="image_to_video mode: multipart|json_b64.")
740
+
741
+ t2i = sub.add_parser("t2i", help="One-shot text-to-image (stores output and prints artifact ref + path).")
742
+ _add_backend_flags(t2i)
743
+ t2i.add_argument("prompt")
744
+ t2i.add_argument("--negative-prompt", default=None)
745
+ t2i.add_argument("--width", type=int, default=512)
746
+ t2i.add_argument("--height", type=int, default=512)
747
+ t2i.add_argument("--steps", type=int, default=10)
748
+ t2i.add_argument("--guidance-scale", type=float, default=None, dest="guidance_scale")
749
+ t2i.add_argument("--seed", type=int, default=None)
750
+ t2i.add_argument("--open", action="store_true", help="Open the output file (best-effort).")
751
+ t2i.set_defaults(_fn=_cmd_t2i)
752
+
753
+ i2i = sub.add_parser("i2i", help="One-shot image-to-image edit (stores output and prints artifact ref + path).")
754
+ _add_backend_flags(i2i)
755
+ i2i.add_argument("--image", required=True, help="Input image file path.")
756
+ i2i.add_argument("--mask", default=None, help="Optional mask file path.")
757
+ i2i.add_argument("prompt")
758
+ i2i.add_argument("--negative-prompt", default=None)
759
+ i2i.add_argument("--steps", type=int, default=10)
760
+ i2i.add_argument("--guidance-scale", type=float, default=None, dest="guidance_scale")
761
+ i2i.add_argument("--seed", type=int, default=None)
762
+ i2i.add_argument("--open", action="store_true", help="Open the output file (best-effort).")
763
+ i2i.set_defaults(_fn=_cmd_i2i)
764
+
765
+ return p
766
+
767
+
768
+ def main(argv: Optional[Sequence[str]] = None) -> int:
769
+ parser = build_parser()
770
+ args = parser.parse_args(list(argv) if argv is not None else None)
771
+ fn = getattr(args, "_fn", None)
772
+ if not callable(fn):
773
+ raise SystemExit(2)
774
+ return int(fn(args))
775
+
776
+
777
+ if __name__ == "__main__":
778
+ raise SystemExit(main())