omnius 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -250369,10 +250369,19 @@ function inferImageGenerationBackend(model, requested) {
250369
250369
  return "sdcpp";
250370
250370
  return "diffusers";
250371
250371
  }
250372
+ function imageGenerationDir(repoRoot = ".") {
250373
+ return join36(repoRoot, ".omnius", "image-gen");
250374
+ }
250375
+ function diffusersVenvDir(repoRoot = ".") {
250376
+ return join36(imageGenerationDir(repoRoot), ".venv");
250377
+ }
250378
+ function sdcppVenvDir(repoRoot = ".") {
250379
+ return join36(imageGenerationDir(repoRoot), ".venv-sdcpp");
250380
+ }
250372
250381
  function imageGenerationSetupPlan(backend, repoRoot = ".", model) {
250373
- const imageDir = join36(repoRoot, ".omnius", "image-gen");
250382
+ const imageDir = imageGenerationDir(repoRoot);
250374
250383
  if (backend === "ollama") {
250375
- const chosen = model && model !== "auto" ? model : "x/z-image-turbo";
250384
+ const chosen = model && model !== "auto" ? model : DEFAULT_OLLAMA_IMAGE_MODEL;
250376
250385
  return {
250377
250386
  backend,
250378
250387
  title: "Ollama image generation",
@@ -250387,26 +250396,30 @@ function imageGenerationSetupPlan(backend, repoRoot = ".", model) {
250387
250396
  };
250388
250397
  }
250389
250398
  if (backend === "diffusers") {
250399
+ const venvDir2 = diffusersVenvDir(repoRoot);
250400
+ const chosen = model && model !== "auto" ? model : DEFAULT_DIFFUSERS_IMAGE_MODEL;
250390
250401
  return {
250391
250402
  backend,
250392
250403
  title: "Python Diffusers image generation",
250393
250404
  commands: [
250394
- `python3 -m venv ${join36(imageDir, "venv")}`,
250395
- `${join36(imageDir, "venv", "bin", "python")} -m pip install -U pip torch diffusers transformers accelerate safetensors pillow`,
250396
- `omnius /image "a compact robot painter" --backend diffusers --model ${model && model !== "auto" ? model : "segmind/tiny-sd"}`
250405
+ `python3 -m venv ${venvDir2}`,
250406
+ `${venvPython(venvDir2)} -m pip install -U pip ${DIFFUSERS_PYTHON_PACKAGES.join(" ")}`,
250407
+ `omnius /image "a compact robot painter" --backend diffusers --model ${chosen}`
250397
250408
  ],
250398
250409
  notes: [
250399
- "Good lightweight candidates: segmind/tiny-sd, nota-ai/bk-sdm-tiny-2m, SimianLuo/LCM_Dreamshaper_v7, stabilityai/sd-turbo.",
250410
+ `Default first-run model: ${DEFAULT_DIFFUSERS_IMAGE_MODEL}. Good lighter candidates: stabilityai/sd-turbo, segmind/tiny-sd, nota-ai/bk-sdm-tiny-2m.`,
250411
+ "The venv, Hugging Face cache, Torch cache, and pip cache stay under .omnius/image-gen.",
250400
250412
  "The runner script is created automatically at .omnius/image-gen/diffusers_text2image.py."
250401
250413
  ]
250402
250414
  };
250403
250415
  }
250416
+ const venvDir = sdcppVenvDir(repoRoot);
250404
250417
  return {
250405
250418
  backend,
250406
250419
  title: "stable-diffusion.cpp Python image generation",
250407
250420
  commands: [
250408
- `python3 -m venv ${join36(imageDir, "venv-sdcpp")}`,
250409
- `${join36(imageDir, "venv-sdcpp", "bin", "python")} -m pip install -U pip stable-diffusion-cpp-python pillow`,
250421
+ `python3 -m venv ${venvDir}`,
250422
+ `${venvPython(venvDir)} -m pip install -U pip ${SDCPP_PYTHON_PACKAGES.join(" ")}`,
250410
250423
  `omnius /image "a compact robot painter" --backend sdcpp --model /absolute/path/to/model.gguf`
250411
250424
  ],
250412
250425
  notes: [
@@ -250450,17 +250463,75 @@ function trimProcessText(text, max = 1800) {
250450
250463
  return clean3;
250451
250464
  return clean3.slice(0, max - 20) + "\n... (truncated)";
250452
250465
  }
250453
- function pythonFor(repoRoot, kind, explicit) {
250466
+ function imageGenerationPythonEnv(repoRoot) {
250467
+ const root = imageGenerationDir(repoRoot);
250468
+ const hf = join36(root, "huggingface");
250469
+ return {
250470
+ PYTHONUNBUFFERED: "1",
250471
+ HF_HOME: hf,
250472
+ HUGGINGFACE_HUB_CACHE: join36(hf, "hub"),
250473
+ TRANSFORMERS_CACHE: join36(hf, "transformers"),
250474
+ DIFFUSERS_CACHE: join36(hf, "diffusers"),
250475
+ TORCH_HOME: join36(root, "torch"),
250476
+ XDG_CACHE_HOME: join36(root, "cache"),
250477
+ PIP_CACHE_DIR: join36(root, "pip-cache")
250478
+ };
250479
+ }
250480
+ async function ensureImageGenerationCacheDirs(repoRoot) {
250481
+ const env2 = imageGenerationPythonEnv(repoRoot);
250482
+ await Promise.all([
250483
+ imageGenerationDir(repoRoot),
250484
+ env2["HF_HOME"],
250485
+ env2["HUGGINGFACE_HUB_CACHE"],
250486
+ env2["TRANSFORMERS_CACHE"],
250487
+ env2["DIFFUSERS_CACHE"],
250488
+ env2["TORCH_HOME"],
250489
+ env2["XDG_CACHE_HOME"],
250490
+ env2["PIP_CACHE_DIR"]
250491
+ ].filter((value2) => Boolean(value2)).map((dir) => mkdir11(dir, { recursive: true })));
250492
+ }
250493
+ async function pythonCanImport(command, code8, repoRoot, env2) {
250494
+ const result = await runProcess2(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
250495
+ return result.code === 0;
250496
+ }
250497
+ async function ensurePythonFor(repoRoot, kind, explicit) {
250498
+ const pythonEnv = imageGenerationPythonEnv(repoRoot);
250499
+ await ensureImageGenerationCacheDirs(repoRoot);
250454
250500
  if (explicit)
250455
- return explicit;
250456
- const env2 = process.env["OMNIUS_IMAGE_PYTHON"];
250457
- if (env2)
250458
- return env2;
250459
- const venv = kind === "diffusers" ? join36(repoRoot, ".omnius", "image-gen", "venv", "bin", "python") : join36(repoRoot, ".omnius", "image-gen", "venv-sdcpp", "bin", "python");
250460
- return existsSync23(venv) ? venv : "python3";
250501
+ return { command: explicit, env: pythonEnv };
250502
+ const configuredPython = process.env["OMNIUS_IMAGE_PYTHON"];
250503
+ if (configuredPython)
250504
+ return { command: configuredPython, env: pythonEnv };
250505
+ const venvDir = kind === "diffusers" ? diffusersVenvDir(repoRoot) : sdcppVenvDir(repoRoot);
250506
+ const command = venvPython(venvDir);
250507
+ if (!existsSync23(command)) {
250508
+ const created = await runProcess2("python3", ["-m", "venv", venvDir], { cwd: repoRoot, timeoutMs: 18e4, env: pythonEnv });
250509
+ if (created.code !== 0) {
250510
+ throw new Error(`Failed to create image-generation venv at ${venvDir}.
250511
+ ${trimProcessText(created.stderr || created.stdout)}`);
250512
+ }
250513
+ }
250514
+ const importCheck = kind === "diffusers" ? "import torch, diffusers, PIL\nfrom diffusers import AutoPipelineForText2Image\n" : "import stable_diffusion_cpp, PIL\n";
250515
+ if (await pythonCanImport(command, importCheck, repoRoot, pythonEnv)) {
250516
+ return { command, env: pythonEnv };
250517
+ }
250518
+ const packages = kind === "diffusers" ? DIFFUSERS_PYTHON_PACKAGES : SDCPP_PYTHON_PACKAGES;
250519
+ const pip = await runProcess2(command, ["-m", "pip", "install", "-U", "pip", ...packages], {
250520
+ cwd: repoRoot,
250521
+ timeoutMs: 18e5,
250522
+ env: pythonEnv
250523
+ });
250524
+ if (pip.code !== 0) {
250525
+ throw new Error(`Failed to install ${kind} image-generation packages into ${venvDir}.
250526
+ ${trimProcessText(pip.stderr || pip.stdout)}`);
250527
+ }
250528
+ if (!await pythonCanImport(command, importCheck, repoRoot, pythonEnv)) {
250529
+ throw new Error(`Image-generation Python environment at ${venvDir} was created, but required ${kind} imports still fail.`);
250530
+ }
250531
+ return { command, env: pythonEnv };
250461
250532
  }
250462
250533
  async function ensureRunner(repoRoot, kind) {
250463
- const dir = join36(repoRoot, ".omnius", "image-gen");
250534
+ const dir = imageGenerationDir(repoRoot);
250464
250535
  await mkdir11(dir, { recursive: true });
250465
250536
  const script = kind === "diffusers" ? join36(dir, "diffusers_text2image.py") : join36(dir, "sdcpp_text2image.py");
250466
250537
  await writeFile16(script, kind === "diffusers" ? DIFFUSERS_RUNNER : SDCPP_RUNNER, "utf8");
@@ -250494,16 +250565,39 @@ function parseRunnerJson(stdout) {
250494
250565
  }
250495
250566
  return null;
250496
250567
  }
250497
- var IMAGE_GENERATION_MODEL_PRESETS, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
250568
+ var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
250498
250569
  var init_image_generate = __esm({
250499
250570
  "packages/execution/dist/tools/image-generate.js"() {
250500
250571
  "use strict";
250572
+ init_venv_paths();
250573
+ DEFAULT_DIFFUSERS_IMAGE_MODEL = "stabilityai/sdxl-turbo";
250574
+ DEFAULT_OLLAMA_IMAGE_MODEL = "x/z-image-turbo";
250575
+ DIFFUSERS_PYTHON_PACKAGES = [
250576
+ "torch",
250577
+ "diffusers",
250578
+ "transformers",
250579
+ "accelerate",
250580
+ "safetensors",
250581
+ "pillow",
250582
+ "sentencepiece",
250583
+ "protobuf"
250584
+ ];
250585
+ SDCPP_PYTHON_PACKAGES = [
250586
+ "stable-diffusion-cpp-python",
250587
+ "pillow"
250588
+ ];
250501
250589
  IMAGE_GENERATION_MODEL_PRESETS = [
250502
250590
  {
250503
- id: "x/z-image-turbo",
250591
+ id: DEFAULT_OLLAMA_IMAGE_MODEL,
250504
250592
  label: "Z-Image Turbo",
250505
250593
  backend: "ollama",
250506
250594
  install: "ollama pull x/z-image-turbo",
250595
+ category: "Modern deployable",
250596
+ sizeClass: "6B-class efficient image model",
250597
+ quality: "Modern high-quality output with a practical inference footprint; below FLUX.1 dev/SD3.5 Large for peak photorealism.",
250598
+ minVramGB: 16,
250599
+ recommendedVramGB: 24,
250600
+ deployment: "Ollama model path; good high-end consumer GPU target.",
250507
250601
  steps: 8,
250508
250602
  width: 1024,
250509
250603
  height: 1024,
@@ -250514,16 +250608,161 @@ var init_image_generate = __esm({
250514
250608
  label: "FLUX.2 Klein",
250515
250609
  backend: "ollama",
250516
250610
  install: "ollama pull x/flux2-klein",
250611
+ category: "Modern deployable",
250612
+ sizeClass: "4B compact FLUX-family",
250613
+ quality: "Modern FLUX-family quality in a smaller package; useful when full FLUX.1 is too heavy.",
250614
+ minVramGB: 12,
250615
+ recommendedVramGB: 16,
250616
+ deployment: "Ollama model path for practical local experimentation.",
250517
250617
  steps: 8,
250518
250618
  width: 1024,
250519
250619
  height: 1024,
250520
250620
  note: "Compact FLUX-family Ollama path for interactive local generation."
250521
250621
  },
250622
+ {
250623
+ id: "black-forest-labs/FLUX.1-dev",
250624
+ label: "FLUX.1 dev",
250625
+ backend: "diffusers",
250626
+ install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250627
+ category: "Primary hyper-realistic baseline",
250628
+ sizeClass: "12B rectified-flow transformer",
250629
+ quality: "Top-tier open-weight photorealism, prompt adherence, texture detail, composition, and typography.",
250630
+ minVramGB: 24,
250631
+ recommendedVramGB: 48,
250632
+ deployment: "Heavy. Best with Diffusers CPU offload, FP8/quantized variants, ComfyUI, multi-GPU, or cloud GPU workers.",
250633
+ steps: 28,
250634
+ guidance: 3.5,
250635
+ width: 1024,
250636
+ height: 1024,
250637
+ note: "Primary serious-generation baseline for maximum photorealism."
250638
+ },
250639
+ {
250640
+ id: "stabilityai/stable-diffusion-3.5-large",
250641
+ label: "Stable Diffusion 3.5 Large",
250642
+ backend: "diffusers",
250643
+ install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large --steps 28 --guidance 4.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250644
+ category: "Primary hyper-realistic baseline",
250645
+ sizeClass: "8B MMDiT",
250646
+ quality: "Serious open Stable Diffusion ecosystem baseline with strong realism, complex prompt understanding, typography, and controllability.",
250647
+ minVramGB: 24,
250648
+ recommendedVramGB: 40,
250649
+ deployment: "Best local candidate for SD/LoRA/ControlNet-style workflows; use offload or quantization below high-VRAM GPUs.",
250650
+ steps: 28,
250651
+ guidance: 4.5,
250652
+ width: 1024,
250653
+ height: 1024,
250654
+ note: "Primary serious-generation baseline for the Stable Diffusion ecosystem."
250655
+ },
250656
+ {
250657
+ id: "black-forest-labs/FLUX.1-schnell",
250658
+ label: "FLUX.1 schnell",
250659
+ backend: "diffusers",
250660
+ install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-schnell --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250661
+ category: "Fast large-model iteration",
250662
+ sizeClass: "12B rectified-flow transformer",
250663
+ quality: "FLUX-style output with fewer steps; better for rapid iteration than absolute peak quality.",
250664
+ minVramGB: 16,
250665
+ recommendedVramGB: 24,
250666
+ deployment: "Use for fast prompt iteration; verify current license terms before commercial use.",
250667
+ steps: 4,
250668
+ guidance: 0,
250669
+ width: 1024,
250670
+ height: 1024,
250671
+ note: "Fast FLUX-family iteration path."
250672
+ },
250673
+ {
250674
+ id: "stabilityai/stable-diffusion-3.5-large-turbo",
250675
+ label: "SD3.5 Large Turbo",
250676
+ backend: "diffusers",
250677
+ install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large-turbo --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250678
+ category: "Fast large-model iteration",
250679
+ sizeClass: "8B distilled MMDiT",
250680
+ quality: "SD3.5-family quality optimized for fewer inference steps; throughput over peak fidelity.",
250681
+ minVramGB: 16,
250682
+ recommendedVramGB: 24,
250683
+ deployment: "Good for interactive SD3.5-family concepting with offload/quantization when needed.",
250684
+ steps: 4,
250685
+ guidance: 0,
250686
+ width: 1024,
250687
+ height: 1024,
250688
+ note: "Fast SD3.5-family iteration path."
250689
+ },
250690
+ {
250691
+ id: "Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers",
250692
+ label: "HunyuanDiT v1.2",
250693
+ backend: "diffusers",
250694
+ install: 'python .omnius/image-gen/diffusers_text2image.py --model Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers --steps 30 --guidance 7.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250695
+ category: "Large multilingual diffusion",
250696
+ sizeClass: "Large DiT text-to-image",
250697
+ quality: "Strong bilingual English/Chinese prompt understanding with detailed, realistic multi-resolution output.",
250698
+ minVramGB: 24,
250699
+ recommendedVramGB: 40,
250700
+ deployment: "Significant GPU memory requirements; prefer Diffusers-compatible variants and offload on smaller GPUs.",
250701
+ steps: 30,
250702
+ guidance: 7.5,
250703
+ width: 1024,
250704
+ height: 1024,
250705
+ note: "Large DiT option for bilingual and detailed realism workflows."
250706
+ },
250707
+ {
250708
+ id: "Tongyi-MAI/Z-Image-Turbo",
250709
+ label: "Z-Image-Turbo",
250710
+ backend: "diffusers",
250711
+ install: 'python .omnius/image-gen/diffusers_text2image.py --model Tongyi-MAI/Z-Image-Turbo --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250712
+ category: "Modern deployable",
250713
+ sizeClass: "6B image generation model",
250714
+ quality: "Efficient newer large-model quality; useful below full FLUX/SD3.5 hardware budgets.",
250715
+ minVramGB: 16,
250716
+ recommendedVramGB: 24,
250717
+ deployment: "Candidate for high-end consumer GPUs and optimized runtimes.",
250718
+ steps: 8,
250719
+ width: 1024,
250720
+ height: 1024,
250721
+ note: "Efficient modern large image model."
250722
+ },
250723
+ {
250724
+ id: "black-forest-labs/FLUX.2-klein-4B",
250725
+ label: "FLUX.2 Klein 4B",
250726
+ backend: "diffusers",
250727
+ install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.2-klein-4B --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250728
+ category: "Modern deployable",
250729
+ sizeClass: "4B compact FLUX-family",
250730
+ quality: "Bridge between practical deployment and modern FLUX-family visual quality.",
250731
+ minVramGB: 12,
250732
+ recommendedVramGB: 16,
250733
+ deployment: "Better fit for consumer GPU experimentation than 8B-12B baselines.",
250734
+ steps: 8,
250735
+ width: 1024,
250736
+ height: 1024,
250737
+ note: "More deployable compact FLUX-family model."
250738
+ },
250739
+ {
250740
+ id: "deepseek-ai/Janus-Pro-7B",
250741
+ label: "Janus-Pro-7B",
250742
+ backend: "diffusers",
250743
+ install: "experimental research model; use a dedicated Janus pipeline/runtime rather than the generic Diffusers text-to-image runner",
250744
+ category: "Experimental multimodal research",
250745
+ sizeClass: "7B multimodal image generation model",
250746
+ quality: "Relevant research model, but not a classic diffusion baseline for production image pipelines.",
250747
+ minVramGB: 16,
250748
+ recommendedVramGB: 24,
250749
+ deployment: "Experimental/non-classic diffusion-adjacent; list for awareness, not a default production path.",
250750
+ steps: 20,
250751
+ width: 1024,
250752
+ height: 1024,
250753
+ note: "Experimental multimodal generation research model."
250754
+ },
250522
250755
  {
250523
250756
  id: "segmind/tiny-sd",
250524
250757
  label: "Segmind Tiny-SD",
250525
250758
  backend: "diffusers",
250526
250759
  install: 'python .omnius/image-gen/diffusers_text2image.py --model segmind/tiny-sd --prompt "..." --output .omnius/images/out.png',
250760
+ category: "Lightweight smoke test",
250761
+ sizeClass: "Small SD-compatible",
250762
+ quality: "Fast validation model; not a serious photorealism baseline.",
250763
+ minVramGB: 4,
250764
+ recommendedVramGB: 8,
250765
+ deployment: "Use to verify the local Diffusers stack works before pulling large models.",
250527
250766
  steps: 20,
250528
250767
  guidance: 7,
250529
250768
  width: 512,
@@ -250535,6 +250774,12 @@ var init_image_generate = __esm({
250535
250774
  label: "BK-SDM Tiny 2M",
250536
250775
  backend: "diffusers",
250537
250776
  install: 'python .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-tiny-2m --prompt "..." --output .omnius/images/out.png',
250777
+ category: "Lightweight smoke test",
250778
+ sizeClass: "Compressed SD-compatible",
250779
+ quality: "Very small and practical; quality is mainly for tests and rough drafts.",
250780
+ minVramGB: 4,
250781
+ recommendedVramGB: 8,
250782
+ deployment: "Low-friction compressed Stable Diffusion-style model.",
250538
250783
  steps: 20,
250539
250784
  guidance: 7,
250540
250785
  width: 512,
@@ -250546,6 +250791,12 @@ var init_image_generate = __esm({
250546
250791
  label: "BK-SDM Small 2M",
250547
250792
  backend: "diffusers",
250548
250793
  install: 'python .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-small-2m --prompt "..." --output .omnius/images/out.png',
250794
+ category: "Lightweight smoke test",
250795
+ sizeClass: "Compressed SD-compatible",
250796
+ quality: "Slightly better compressed-SD quality than tiny variants; still not a high-fidelity baseline.",
250797
+ minVramGB: 4,
250798
+ recommendedVramGB: 8,
250799
+ deployment: "Small quality/size tradeoff for weak hardware.",
250549
250800
  steps: 20,
250550
250801
  guidance: 7,
250551
250802
  width: 512,
@@ -250557,6 +250808,12 @@ var init_image_generate = __esm({
250557
250808
  label: "LCM DreamShaper v7",
250558
250809
  backend: "diffusers",
250559
250810
  install: 'python .omnius/image-gen/diffusers_text2image.py --model SimianLuo/LCM_Dreamshaper_v7 --steps 4 --prompt "..." --output .omnius/images/out.png',
250811
+ category: "Fast iteration",
250812
+ sizeClass: "Few-step SD-compatible",
250813
+ quality: "Good for low-latency concepting; below SDXL/SD3.5/FLUX for photoreal detail.",
250814
+ minVramGB: 6,
250815
+ recommendedVramGB: 8,
250816
+ deployment: "Few-step latent-consistency route.",
250560
250817
  steps: 4,
250561
250818
  guidance: 8,
250562
250819
  width: 512,
@@ -250568,6 +250825,12 @@ var init_image_generate = __esm({
250568
250825
  label: "SD-Turbo",
250569
250826
  backend: "diffusers",
250570
250827
  install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/sd-turbo --steps 1 --guidance 0 --prompt "..." --output .omnius/images/out.png',
250828
+ category: "Fast iteration",
250829
+ sizeClass: "One-to-four-step SD",
250830
+ quality: "Fast SD-family output; useful for iteration but lower ceiling than SDXL Turbo and large baselines.",
250831
+ minVramGB: 6,
250832
+ recommendedVramGB: 8,
250833
+ deployment: "Check Stability license for your use case.",
250571
250834
  steps: 1,
250572
250835
  guidance: 0,
250573
250836
  width: 512,
@@ -250575,10 +250838,16 @@ var init_image_generate = __esm({
250575
250838
  note: "One-to-four-step Stable Diffusion family model; check Stability license."
250576
250839
  },
250577
250840
  {
250578
- id: "stabilityai/sdxl-turbo",
250841
+ id: DEFAULT_DIFFUSERS_IMAGE_MODEL,
250579
250842
  label: "SDXL-Turbo",
250580
250843
  backend: "diffusers",
250581
250844
  install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/sdxl-turbo --steps 1 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250845
+ category: "Default local generation",
250846
+ sizeClass: "Few-step SDXL",
250847
+ quality: "Strong fast default for local image generation; not as realistic as FLUX.1 dev or SD3.5 Large, but much more practical.",
250848
+ minVramGB: 8,
250849
+ recommendedVramGB: 12,
250850
+ deployment: "Auto-installed first-run Diffusers default.",
250582
250851
  steps: 1,
250583
250852
  guidance: 0,
250584
250853
  width: 1024,
@@ -250590,6 +250859,12 @@ var init_image_generate = __esm({
250590
250859
  label: "Sana Sprint 0.6B",
250591
250860
  backend: "diffusers",
250592
250861
  install: 'python .omnius/image-gen/diffusers_text2image.py --model Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250862
+ category: "Modern efficient",
250863
+ sizeClass: "0.6B efficient diffusion",
250864
+ quality: "Modern efficient output under smaller compute budgets; below primary large baselines.",
250865
+ minVramGB: 8,
250866
+ recommendedVramGB: 12,
250867
+ deployment: "Efficient Diffusers path for consumer VRAM.",
250593
250868
  steps: 4,
250594
250869
  guidance: 0,
250595
250870
  width: 1024,
@@ -250601,6 +250876,12 @@ var init_image_generate = __esm({
250601
250876
  label: "stable-diffusion.cpp local checkpoint",
250602
250877
  backend: "sdcpp",
250603
250878
  install: 'python .omnius/image-gen/sdcpp_text2image.py --model-path /path/to/model.gguf --prompt "..." --output .omnius/images/out.png',
250879
+ category: "Local checkpoint/GGUF",
250880
+ sizeClass: "Depends on checkpoint",
250881
+ quality: "Quality depends entirely on the local checkpoint or GGUF variant.",
250882
+ minVramGB: 0,
250883
+ recommendedVramGB: 8,
250884
+ deployment: "CPU/GGUF/checkpoint route for custom local workflows.",
250604
250885
  steps: 20,
250605
250886
  width: 512,
250606
250887
  height: 512,
@@ -250623,6 +250904,21 @@ def _device():
250623
250904
  return "mps"
250624
250905
  return "cpu"
250625
250906
 
250907
+ def _pipeline_class(model):
250908
+ lowered = model.lower()
250909
+ if "flux" in lowered:
250910
+ from diffusers import FluxPipeline
250911
+ return FluxPipeline
250912
+ if "stable-diffusion-3.5" in lowered or "stable_diffusion_3" in lowered or "sd3" in lowered:
250913
+ from diffusers import StableDiffusion3Pipeline
250914
+ return StableDiffusion3Pipeline
250915
+ from diffusers import AutoPipelineForText2Image
250916
+ return AutoPipelineForText2Image
250917
+
250918
+ def _large_model(model):
250919
+ lowered = model.lower()
250920
+ return any(token in lowered for token in ["flux.1", "flux.2", "stable-diffusion-3.5", "hunyuan", "z-image", "janus"])
250921
+
250626
250922
  def main():
250627
250923
  parser = argparse.ArgumentParser()
250628
250924
  parser.add_argument("--model", required=True)
@@ -250639,7 +250935,6 @@ def main():
250639
250935
 
250640
250936
  t0 = time.perf_counter()
250641
250937
  import torch
250642
- from diffusers import AutoPipelineForText2Image
250643
250938
 
250644
250939
  device = _device() if args.device == "auto" else args.device
250645
250940
  dtype = torch.float16 if device == "cuda" else torch.float32
@@ -250648,17 +250943,25 @@ def main():
250648
250943
  kwargs["variant"] = args.variant
250649
250944
 
250650
250945
  try:
250651
- pipe = AutoPipelineForText2Image.from_pretrained(args.model, **kwargs)
250946
+ pipeline_cls = _pipeline_class(args.model)
250947
+ pipe = pipeline_cls.from_pretrained(args.model, **kwargs)
250652
250948
  except Exception:
250653
250949
  kwargs.pop("variant", None)
250654
- pipe = AutoPipelineForText2Image.from_pretrained(args.model, **kwargs)
250950
+ pipeline_cls = _pipeline_class(args.model)
250951
+ pipe = pipeline_cls.from_pretrained(args.model, **kwargs)
250655
250952
 
250656
250953
  if hasattr(pipe, "enable_attention_slicing"):
250657
250954
  try:
250658
250955
  pipe.enable_attention_slicing()
250659
250956
  except Exception:
250660
250957
  pass
250661
- pipe = pipe.to(device)
250958
+ if device == "cuda" and _large_model(args.model) and hasattr(pipe, "enable_model_cpu_offload"):
250959
+ try:
250960
+ pipe.enable_model_cpu_offload()
250961
+ except Exception:
250962
+ pipe = pipe.to(device)
250963
+ else:
250964
+ pipe = pipe.to(device)
250662
250965
 
250663
250966
  generator = None
250664
250967
  if args.seed is not None:
@@ -250739,7 +251042,7 @@ if __name__ == "__main__":
250739
251042
  `;
250740
251043
  ImageGenerateTool = class {
250741
251044
  name = "generate_image";
250742
- description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (Tiny-SD, BK-SDM, LCM, SD-Turbo, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. Saves a PNG under .omnius/images and returns the file path.";
251045
+ description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. Saves a PNG under .omnius/images and returns the file path.";
250743
251046
  parameters = {
250744
251047
  type: "object",
250745
251048
  properties: {
@@ -250838,11 +251141,11 @@ if __name__ == "__main__":
250838
251141
  let backend = inferImageGenerationBackend(requestedModel, requestedBackend);
250839
251142
  let model = requestedModel;
250840
251143
  if (backend === "auto") {
250841
- model = await this.findImageGenModel() ?? void 0;
250842
- backend = model ? "ollama" : "diffusers";
251144
+ backend = "diffusers";
251145
+ model = DEFAULT_DIFFUSERS_IMAGE_MODEL;
250843
251146
  }
250844
251147
  if (!model) {
250845
- model = backend === "diffusers" ? "segmind/tiny-sd" : "x/z-image-turbo";
251148
+ model = backend === "diffusers" ? DEFAULT_DIFFUSERS_IMAGE_MODEL : DEFAULT_OLLAMA_IMAGE_MODEL;
250846
251149
  }
250847
251150
  if (backend === "ollama") {
250848
251151
  return await this.generateWithOllama({ prompt, model, width, height, steps, start: start2 });
@@ -250934,7 +251237,23 @@ ${errText.slice(0, 800)}`,
250934
251237
  const runner = await ensureRunner(this.cwd, "diffusers");
250935
251238
  await mkdir11(join36(this.cwd, ".omnius", "images"), { recursive: true });
250936
251239
  const filepath = outputPath(this.cwd);
250937
- const command = pythonFor(this.cwd, "diffusers", typeof args.python === "string" ? args.python : void 0);
251240
+ let python;
251241
+ try {
251242
+ python = await ensurePythonFor(this.cwd, "diffusers", typeof args.python === "string" ? args.python : void 0);
251243
+ } catch (err) {
251244
+ const plan = imageGenerationSetupPlan("diffusers", this.cwd, args.model);
251245
+ return {
251246
+ success: false,
251247
+ output: [
251248
+ `Diffusers setup failed before image generation.`,
251249
+ err instanceof Error ? err.message : String(err),
251250
+ "",
251251
+ "Setup path:",
251252
+ ...plan.commands.map((cmd) => ` ${cmd}`)
251253
+ ].filter(Boolean).join("\n"),
251254
+ durationMs: performance.now() - args.start
251255
+ };
251256
+ }
250938
251257
  const argv = [
250939
251258
  runner,
250940
251259
  "--model",
@@ -250954,7 +251273,7 @@ ${errText.slice(0, 800)}`,
250954
251273
  ];
250955
251274
  if (args.seed !== void 0)
250956
251275
  argv.push("--seed", String(args.seed));
250957
- const result = await runProcess2(command, argv, { cwd: this.cwd, timeoutMs: 9e5 });
251276
+ const result = await runProcess2(python.command, argv, { cwd: this.cwd, timeoutMs: 9e5, env: python.env });
250958
251277
  if (result.code !== 0 || !existsSync23(filepath)) {
250959
251278
  const plan = imageGenerationSetupPlan("diffusers", this.cwd, args.model);
250960
251279
  return {
@@ -251008,7 +251327,23 @@ ${errText.slice(0, 800)}`,
251008
251327
  const runner = await ensureRunner(this.cwd, "sdcpp");
251009
251328
  await mkdir11(join36(this.cwd, ".omnius", "images"), { recursive: true });
251010
251329
  const filepath = outputPath(this.cwd);
251011
- const command = pythonFor(this.cwd, "sdcpp", typeof args.python === "string" ? args.python : void 0);
251330
+ let python;
251331
+ try {
251332
+ python = await ensurePythonFor(this.cwd, "sdcpp", typeof args.python === "string" ? args.python : void 0);
251333
+ } catch (err) {
251334
+ const plan = imageGenerationSetupPlan("sdcpp", this.cwd, args.model);
251335
+ return {
251336
+ success: false,
251337
+ output: [
251338
+ `stable-diffusion.cpp setup failed before image generation.`,
251339
+ err instanceof Error ? err.message : String(err),
251340
+ "",
251341
+ "Setup path:",
251342
+ ...plan.commands.map((cmd) => ` ${cmd}`)
251343
+ ].filter(Boolean).join("\n"),
251344
+ durationMs: performance.now() - args.start
251345
+ };
251346
+ }
251012
251347
  const argv = [
251013
251348
  runner,
251014
251349
  "--model-path",
@@ -251026,7 +251361,7 @@ ${errText.slice(0, 800)}`,
251026
251361
  ];
251027
251362
  if (args.seed !== void 0)
251028
251363
  argv.push("--seed", String(args.seed));
251029
- const result = await runProcess2(command, argv, { cwd: this.cwd, timeoutMs: 9e5 });
251364
+ const result = await runProcess2(python.command, argv, { cwd: this.cwd, timeoutMs: 9e5, env: python.env });
251030
251365
  if (result.code !== 0 || !existsSync23(filepath)) {
251031
251366
  const plan = imageGenerationSetupPlan("sdcpp", this.cwd, args.model);
251032
251367
  return {
@@ -512881,6 +513216,8 @@ __export(dist_exports, {
512881
513216
  CreateToolTool: () => CreateToolTool,
512882
513217
  CronAgentTool: () => CronAgentTool,
512883
513218
  CustomTool: () => CustomTool,
513219
+ DEFAULT_DIFFUSERS_IMAGE_MODEL: () => DEFAULT_DIFFUSERS_IMAGE_MODEL,
513220
+ DEFAULT_OLLAMA_IMAGE_MODEL: () => DEFAULT_OLLAMA_IMAGE_MODEL,
512884
513221
  DESKTOP_DEPS: () => DESKTOP_DEPS,
512885
513222
  DebateTool: () => DebateTool,
512886
513223
  DesktopClickTool: () => DesktopClickTool,
@@ -513002,6 +513339,7 @@ __export(dist_exports, {
513002
513339
  deleteTodos: () => deleteTodos,
513003
513340
  detectElevationMethod: () => detectElevationMethod,
513004
513341
  detectSearchProvider: () => detectSearchProvider,
513342
+ diffusersVenvDir: () => diffusersVenvDir,
513005
513343
  discoverPlugins: () => discoverPlugins,
513006
513344
  discoverSkills: () => discoverSkills,
513007
513345
  emitIndexed: () => emitIndexed,
@@ -513034,6 +513372,7 @@ __export(dist_exports, {
513034
513372
  getWorkingNotesSummary: () => getWorkingNotesSummary,
513035
513373
  getWorktreeSession: () => getWorktreeSession,
513036
513374
  hashGeneratedArtifactContent: () => hashGeneratedArtifactContent,
513375
+ imageGenerationDir: () => imageGenerationDir,
513037
513376
  imageGenerationSetupPlan: () => imageGenerationSetupPlan,
513038
513377
  inferImageGenerationBackend: () => inferImageGenerationBackend,
513039
513378
  isFortemiAvailable: () => isFortemiAvailable,
@@ -513084,6 +513423,7 @@ __export(dist_exports, {
513084
513423
  saveCustomToolDefinition: () => saveCustomToolDefinition,
513085
513424
  saveMcpServerToConfig: () => saveMcpServerToConfig,
513086
513425
  savePacket: () => savePacket,
513426
+ sdcppVenvDir: () => sdcppVenvDir,
513087
513427
  serializeMap: () => serializeMap,
513088
513428
  setChangeLogSession: () => setChangeLogSession,
513089
513429
  setSudoPassword: () => setSudoPassword,
@@ -550222,7 +550562,7 @@ var init_command_registry = __esm({
550222
550562
  ["/image <prompt>", "Generate an image from a prompt and show an ASCII preview"],
550223
550563
  ["/image --model <model> <prompt>", "Generate with an explicit image model"],
550224
550564
  ["/image setup <ollama|diffusers|sdcpp>", "Show setup commands for an image-generation backend"],
550225
- ["/image models", "List known lightweight/local image-generation model presets"],
550565
+ ["/image list", "List image models by category, quality, size, and hardware fit"],
550226
550566
  ["/call", "Start voice call session (cloudflared tunnel + ASR/TTS)"],
550227
550567
  ["/hangup", "End active call session"],
550228
550568
  ["/queue <prompt>", "Queue a prompt for the next turn without interrupting the current run"],
@@ -572281,14 +572621,16 @@ ${preview.ascii}`;
572281
572621
  function extractSavedImagePath(text, repoRoot) {
572282
572622
  const patterns = [
572283
572623
  /Image generated:\s*([^\n\r]+)/i,
572624
+ /Screenshot saved:\s*([^\n\r]+)/i,
572625
+ /Screenshot:\s*([^\n\r]+)/i,
572284
572626
  /Saved to:\s*([^\n\r]+)/i,
572285
572627
  /Image attached:\s*([^\n\r]+)/i,
572286
- /image saved at\s+([^\n\r.]+)/i
572628
+ /image saved at\s+([^\n\r]+)/i
572287
572629
  ];
572288
572630
  for (const pattern of patterns) {
572289
572631
  const match = text.match(pattern);
572290
572632
  if (!match?.[1]) continue;
572291
- const raw = match[1].trim().replace(/^["']|["']$/g, "");
572633
+ const raw = match[1].trim().replace(/\s+\([^)]+\)\s*$/g, "").replace(/^["']|["']$/g, "");
572292
572634
  const candidate = raw.startsWith("/") ? raw : resolve37(repoRoot, raw);
572293
572635
  if (existsSync93(candidate)) return candidate;
572294
572636
  }
@@ -582626,18 +582968,123 @@ function parseImageNumber(value2) {
582626
582968
  const n2 = Number(value2);
582627
582969
  return Number.isFinite(n2) ? n2 : void 0;
582628
582970
  }
582971
+ function rateImagePresetForHardware(preset, specs) {
582972
+ const min = preset.minVramGB ?? (preset.backend === "sdcpp" ? 0 : 8);
582973
+ const recommended = preset.recommendedVramGB ?? Math.max(min, 12);
582974
+ const vram = specs.gpuVramGB;
582975
+ const ram = Math.max(specs.availableRamGB, specs.totalRamGB * 0.65);
582976
+ if (preset.id === "deepseek-ai/Janus-Pro-7B") {
582977
+ const base3 = vram >= recommended ? 65 : vram >= min ? 50 : 25;
582978
+ return {
582979
+ score: base3,
582980
+ label: base3 >= 60 ? "experimental" : "not turnkey",
582981
+ note: "Experimental multimodal model; use a dedicated Janus runtime, not the generic Diffusers runner."
582982
+ };
582983
+ }
582984
+ if (min <= 0) {
582985
+ const score2 = vram >= recommended ? 85 : vram > 0 ? 70 : ram >= 32 ? 45 : 25;
582986
+ return {
582987
+ score: score2,
582988
+ label: score2 >= 80 ? "comfortable" : score2 >= 60 ? "workable" : "checkpoint-dependent",
582989
+ note: vram > 0 ? "Fit depends on the local checkpoint/GGUF size and runtime flags." : "CPU/GGUF path is possible but will be slow; choose small quantized checkpoints."
582990
+ };
582991
+ }
582992
+ let score;
582993
+ let label;
582994
+ let note;
582995
+ if (vram >= recommended) {
582996
+ score = Math.min(100, Math.round(90 + Math.min(10, (vram - recommended) / Math.max(1, recommended) * 10)));
582997
+ label = "excellent";
582998
+ note = `Runs well on ${vram.toFixed(0)}GB VRAM; enough headroom for ${preset.sizeClass ?? "this model"}.`;
582999
+ } else if (vram >= min) {
583000
+ score = Math.round(70 + (vram - min) / Math.max(1, recommended - min) * 18);
583001
+ label = "comfortable";
583002
+ note = `Fits the stated ${min.toFixed(0)}GB minimum; expect lower batching/headroom than the ${recommended.toFixed(0)}GB target.`;
583003
+ } else if (vram > 0 && ram >= recommended * 1.5) {
583004
+ score = 52;
583005
+ label = "offload";
583006
+ note = `Below ${min.toFixed(0)}GB VRAM; may work with CPU offload or quantization, but latency and reliability will suffer.`;
583007
+ } else if (ram >= recommended * 2) {
583008
+ score = 35;
583009
+ label = "cloud/quant";
583010
+ note = "System RAM is large enough for experiments, but VRAM is below target; prefer quantized runtimes or cloud GPU.";
583011
+ } else {
583012
+ score = 18;
583013
+ label = "too heavy";
583014
+ note = `Needs roughly ${min.toFixed(0)}GB VRAM minimum and ${recommended.toFixed(0)}GB recommended.`;
583015
+ }
583016
+ return { score, label, note };
583017
+ }
583018
+ function imageFitIcon(score) {
583019
+ if (score >= 85) return c3.green("✔");
583020
+ if (score >= 60) return c3.green("◐");
583021
+ if (score >= 40) return c3.yellow("△");
583022
+ return c3.red("✖");
583023
+ }
583024
+ function wrapImageListText(text, width = 94) {
583025
+ const words = text.split(/\s+/).filter(Boolean);
583026
+ const lines = [];
583027
+ let line = "";
583028
+ for (const word2 of words) {
583029
+ if (!line) line = word2;
583030
+ else if (line.length + 1 + word2.length <= width) line += ` ${word2}`;
583031
+ else {
583032
+ lines.push(line);
583033
+ line = word2;
583034
+ }
583035
+ }
583036
+ if (line) lines.push(line);
583037
+ return lines.length > 0 ? lines : [""];
583038
+ }
583039
+ function renderImagePresetDetail(prefix, text) {
583040
+ const [first2, ...rest] = wrapImageListText(text, 92);
583041
+ renderInfo(`${prefix}${first2}`);
583042
+ for (const line of rest) renderInfo(`${" ".repeat(prefix.length)}${line}`);
583043
+ }
583044
+ function renderImageModelList() {
583045
+ const specs = detectSystemSpecs();
583046
+ const hardware = `${specs.totalRamGB.toFixed(0)}GB RAM` + (specs.gpuVramGB > 0 ? ` + ${specs.gpuVramGB.toFixed(0)}GB VRAM (${specs.gpuName || "NVIDIA GPU"})` : " + no NVIDIA VRAM detected");
583047
+ renderInfo(`Image models for this hardware: ${hardware}`);
583048
+ renderInfo("Fit legend: 85+ excellent, 60+ comfortable, 40+ offload/quantized, below 40 heavy/cloud.");
583049
+ renderInfo("Primary hyper-realistic baselines: FLUX.1 dev and Stable Diffusion 3.5 Large.");
583050
+ const byCategory = /* @__PURE__ */ new Map();
583051
+ for (const preset of IMAGE_GENERATION_MODEL_PRESETS) {
583052
+ const category = preset.category ?? "Other";
583053
+ const list = byCategory.get(category) ?? [];
583054
+ list.push(preset);
583055
+ byCategory.set(category, list);
583056
+ }
583057
+ for (const [category, presets] of byCategory) {
583058
+ renderInfo("");
583059
+ renderInfo(c3.bold(category));
583060
+ for (const preset of presets) {
583061
+ const fit2 = rateImagePresetForHardware(preset, specs);
583062
+ const primary = category === "Primary hyper-realistic baseline" ? c3.cyan(" ★") : "";
583063
+ renderInfo(`${imageFitIcon(fit2.score)} ${String(fit2.score).padStart(3)}/100 ${c3.bold(preset.label)}${primary}`);
583064
+ renderInfo(` id: ${preset.id}`);
583065
+ renderInfo(` type: ${preset.backend} · ${preset.sizeClass ?? "unknown size"} · ${fit2.label}`);
583066
+ renderImagePresetDetail(" quality: ", preset.quality ?? preset.note);
583067
+ renderImagePresetDetail(" fit: ", fit2.note);
583068
+ if (preset.deployment) renderImagePresetDetail(" deploy: ", preset.deployment);
583069
+ }
583070
+ }
583071
+ }
582629
583072
  async function showImageModelsMenu(ctx3, hasLocal) {
582630
583073
  const settings = resolveSettings(ctx3.repoRoot);
583074
+ const specs = detectSystemSpecs();
582631
583075
  const items = [
582632
583076
  { key: "setup:ollama", label: "Setup Ollama", detail: "Pull x/z-image-turbo or x/flux2-klein" },
582633
- { key: "setup:diffusers", label: "Setup Diffusers", detail: "Python venv for Tiny-SD, BK-SDM, SD-Turbo, Sana" },
583077
+ { key: "setup:diffusers", label: "Setup Diffusers", detail: "Auto-installs SDXL Turbo under .omnius/image-gen/.venv" },
582634
583078
  { key: "setup:sdcpp", label: "Setup stable-diffusion.cpp", detail: "CPU/GGUF/checkpoint route" },
582635
583079
  { key: "hdr:models", label: selectColors.dim("─── Models ───") },
582636
- ...IMAGE_GENERATION_MODEL_PRESETS.map((preset) => ({
582637
- key: `model:${preset.id}`,
582638
- label: preset.label,
582639
- detail: `${preset.backend} · ${preset.id} · ${preset.note}`
582640
- }))
583080
+ ...IMAGE_GENERATION_MODEL_PRESETS.map((preset) => {
583081
+ const fit2 = rateImagePresetForHardware(preset, specs);
583082
+ return {
583083
+ key: `model:${preset.id}`,
583084
+ label: preset.label,
583085
+ detail: `${fit2.score}/100 ${fit2.label} · ${preset.category ?? preset.backend} · ${preset.sizeClass ?? preset.id}`
583086
+ };
583087
+ })
582641
583088
  ];
582642
583089
  const result = await tuiSelect({
582643
583090
  items,
@@ -582673,10 +583120,8 @@ async function handleImageCommand(ctx3, arg, hasLocal) {
582673
583120
  await showImageModelsMenu(ctx3, hasLocal);
582674
583121
  return "handled";
582675
583122
  }
582676
- if (parsed.subcommand === "models") {
582677
- for (const preset of IMAGE_GENERATION_MODEL_PRESETS) {
582678
- renderInfo(`${preset.id} [${preset.backend}] ${preset.note}`);
582679
- }
583123
+ if (parsed.subcommand === "models" || parsed.subcommand === "list") {
583124
+ renderImageModelList();
582680
583125
  return "handled";
582681
583126
  }
582682
583127
  if (parsed.subcommand === "setup") {
@@ -582688,7 +583133,7 @@ async function handleImageCommand(ctx3, arg, hasLocal) {
582688
583133
  for (const note of plan.notes) renderInfo(`- ${note}`);
582689
583134
  return "handled";
582690
583135
  }
582691
- const model = String(parsed.flags["model"] ?? settings.imageModel ?? "auto");
583136
+ const model = String(parsed.flags["model"] ?? settings.imageModel ?? DEFAULT_DIFFUSERS_IMAGE_MODEL);
582692
583137
  const backend = String(parsed.flags["backend"] ?? settings.imageBackend ?? inferImageGenerationBackend(model, void 0));
582693
583138
  const tool = new ImageGenerateTool(ctx3.repoRoot, ctx3.config.backendUrl);
582694
583139
  const prompt = parsed.prompt;
@@ -593167,11 +593612,11 @@ function runProcess3(command, args, cwd4, timeoutMs) {
593167
593612
  child.stderr?.on("data", (chunk) => {
593168
593613
  stderr += chunk.toString();
593169
593614
  });
593170
- child.on("error", (err) => {
593615
+ onChildError(child, (err) => {
593171
593616
  clearTimeout(timer);
593172
593617
  resolveProcess({ code: 127, stdout, stderr: stderr + String(err.message || err) });
593173
593618
  });
593174
- child.on("close", (code8) => {
593619
+ onChildClose(child, (code8) => {
593175
593620
  clearTimeout(timer);
593176
593621
  resolveProcess({ code: code8, stdout, stderr });
593177
593622
  });
@@ -593181,6 +593626,7 @@ var MANIFEST_FILE, PATH_KEYS, MEDIA_PATH_RE, CreativeAudioFileTool;
593181
593626
  var init_telegram_creative_tools = __esm({
593182
593627
  "packages/cli/src/tui/telegram-creative-tools.ts"() {
593183
593628
  "use strict";
593629
+ init_typed_node_events();
593184
593630
  init_dist5();
593185
593631
  MANIFEST_FILE = ".omnius-creative-manifest.json";
593186
593632
  PATH_KEYS = ["path", "file", "file_path", "filename", "filepath", "filePath"];
@@ -593477,6 +593923,13 @@ function sanitizeTelegramProgressText(text, maxLength) {
593477
593923
  function compactTelegramVisibleText(text) {
593478
593924
  return stripTelegramHiddenThinking(text).replace(/\s+/g, " ").trim();
593479
593925
  }
593926
+ function stableTelegramValueKey(value2) {
593927
+ if (value2 === void 0) return "undefined";
593928
+ if (value2 === null || typeof value2 !== "object") return JSON.stringify(value2) ?? String(value2);
593929
+ if (Array.isArray(value2)) return `[${value2.map(stableTelegramValueKey).join(",")}]`;
593930
+ const record = value2;
593931
+ return `{${Object.keys(record).sort().map((key) => `${JSON.stringify(key)}:${stableTelegramValueKey(record[key])}`).join(",")}}`;
593932
+ }
593480
593933
  function isTelegramPotentialNoReplyPrefix(text) {
593481
593934
  const lower = compactTelegramVisibleText(text).toLowerCase();
593482
593935
  return Boolean(lower) && "no_reply".startsWith(lower);
@@ -593507,7 +593960,39 @@ function cleanTelegramVisibleReply(text, options2 = {}) {
593507
593960
  if (!clean3) return "";
593508
593961
  if (options2.suppressPotentialNoReplyPrefix && isTelegramPotentialNoReplyPrefix(clean3)) return "";
593509
593962
  if (isTelegramInternalStatusText(clean3)) return "";
593510
- return clean3;
593963
+ return dedupeTelegramVisibleReply(clean3);
593964
+ }
593965
+ function dedupeTelegramVisibleReply(text) {
593966
+ const paragraphs = text.split(/\n{2,}/);
593967
+ const seenParagraphs = /* @__PURE__ */ new Set();
593968
+ const collapsedParagraphs = [];
593969
+ for (const paragraph of paragraphs) {
593970
+ const clean3 = paragraph.trim();
593971
+ if (!clean3) continue;
593972
+ const key = compactTelegramVisibleText(clean3).toLowerCase();
593973
+ if (seenParagraphs.has(key)) continue;
593974
+ seenParagraphs.add(key);
593975
+ collapsedParagraphs.push(clean3);
593976
+ }
593977
+ const paragraphCollapsed = collapsedParagraphs.join("\n\n");
593978
+ const sentenceLike = paragraphCollapsed.match(/[^.!?]+[.!?]+|[^.!?]+$/g);
593979
+ if (!sentenceLike || sentenceLike.length < 3) return paragraphCollapsed;
593980
+ const seenSentences = /* @__PURE__ */ new Set();
593981
+ const out = [];
593982
+ let duplicates = 0;
593983
+ for (const raw of sentenceLike) {
593984
+ const sentence = raw.trim();
593985
+ if (!sentence) continue;
593986
+ const key = sentence.replace(/\s+/g, " ").toLowerCase();
593987
+ if (seenSentences.has(key)) {
593988
+ duplicates++;
593989
+ continue;
593990
+ }
593991
+ seenSentences.add(key);
593992
+ out.push(sentence);
593993
+ }
593994
+ if (duplicates === 0) return paragraphCollapsed;
593995
+ return out.join(" ").trim();
593511
593996
  }
593512
593997
  function truncateTelegramContext(text, maxLength) {
593513
593998
  const trimmed = text.trim();
@@ -593631,9 +594116,7 @@ function selectTelegramFinalResponse(args) {
593631
594116
  args.assistantText
593632
594117
  ].map((candidate) => cleanTelegramVisibleReply(candidate || "")).filter(Boolean);
593633
594118
  if (visibleCandidates.length > 0) {
593634
- return visibleCandidates.reduce(
593635
- (best, current) => current.length > best.length ? current : best
593636
- );
594119
+ return visibleCandidates[0];
593637
594120
  }
593638
594121
  void args.summary;
593639
594122
  return "";
@@ -593925,6 +594408,26 @@ function telegramImageMime(media) {
593925
594408
  if (ext === ".tif" || ext === ".tiff") return "image/tiff";
593926
594409
  return "image/jpeg";
593927
594410
  }
594411
+ function extractTelegramMentionedUsernames(message2, text) {
594412
+ const usernames = /* @__PURE__ */ new Set();
594413
+ const entities = [
594414
+ ...Array.isArray(message2.entities) ? message2.entities : [],
594415
+ ...Array.isArray(message2.caption_entities) ? message2.caption_entities : []
594416
+ ];
594417
+ for (const entity of entities) {
594418
+ if (!entity || typeof entity !== "object") continue;
594419
+ if (entity.type === "mention") {
594420
+ const offset = Number(entity.offset);
594421
+ const length4 = Number(entity.length);
594422
+ if (!Number.isFinite(offset) || !Number.isFinite(length4) || length4 <= 1) continue;
594423
+ const mention = text.slice(offset, offset + length4).replace(/^@/, "").trim();
594424
+ if (mention) usernames.add(mention);
594425
+ } else if (entity.type === "text_mention" && typeof entity.user?.username === "string") {
594426
+ usernames.add(entity.user.username);
594427
+ }
594428
+ }
594429
+ return [...usernames];
594430
+ }
593928
594431
  function normalizeTelegramUpdate(update2) {
593929
594432
  const sourceUpdateType = update2.guest_message ? "guest_message" : update2.message ? "message" : null;
593930
594433
  if (!sourceUpdateType) return null;
@@ -593957,6 +594460,9 @@ function normalizeTelegramUpdate(update2) {
593957
594460
  isDirectMessages: Boolean(message2.chat?.is_direct_messages),
593958
594461
  parentChatId: message2.chat?.parent_chat?.id ?? message2.direct_messages_topic?.parent_topic?.id,
593959
594462
  replyToMessageId: message2.reply_to_message?.message_id,
594463
+ replyToUsername: message2.reply_to_message?.from?.username ?? message2.reply_to_message?.sender_chat?.username,
594464
+ replyToBot: Boolean(message2.reply_to_message?.from?.is_bot),
594465
+ mentionedUsernames: extractTelegramMentionedUsernames(message2, text),
593960
594466
  sourceUpdateType
593961
594467
  };
593962
594468
  }
@@ -594015,10 +594521,10 @@ function renderTelegramStart(botUsername, adminId, mode = "auto") {
594015
594521
  if (adminId) {
594016
594522
  process.stdout.write(` ${c3.dim(`Admin: ${adminId} (full memory + tools)`)}
594017
594523
  `);
594018
- process.stdout.write(` ${c3.dim("Public users: light memory + web search only")}
594524
+ process.stdout.write(` ${c3.dim("Public users: scoped memory + web + per-chat creative file/image/audio tools")}
594019
594525
  `);
594020
594526
  }
594021
- process.stdout.write(` ${c3.dim("Safety filter: ACTIVE — public channel mode")}
594527
+ process.stdout.write(` ${c3.dim("Safety filter: ACTIVE — public channel mode; creative writes are sandboxed under .omnius/telegram-creative/<chat>")}
594022
594528
  `);
594023
594529
  process.stdout.write(` ${c3.dim("Use /telegram to toggle off, or /telegram stop")}
594024
594530
 
@@ -594858,16 +595364,27 @@ ${lines.join("\n")}`);
594858
595364
  this.groupSkipLogAt.set(sessionKey, now);
594859
595365
  this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `${reason} (context retained)`));
594860
595366
  }
595367
+ telegramMessageAddressesBot(msg) {
595368
+ const bot = this.state.botUsername.trim().replace(/^@/, "").toLowerCase();
595369
+ if (!bot) return false;
595370
+ const mentioned = (msg.mentionedUsernames ?? []).some(
595371
+ (name10) => name10.trim().replace(/^@/, "").toLowerCase() === bot
595372
+ );
595373
+ if (mentioned) return true;
595374
+ if (msg.replyToUsername && msg.replyToUsername.trim().replace(/^@/, "").toLowerCase() === bot) return true;
595375
+ return false;
595376
+ }
594861
595377
  async inferTelegramInteractionDecision(msg, toolContext) {
594862
595378
  const config = this.agentConfig;
594863
595379
  const forcedRoute = this.interactionMode === "chat" || this.interactionMode === "action" ? this.interactionMode : null;
594864
595380
  const isGroup = msg.chatType !== "private";
595381
+ const addressesBot = this.telegramMessageAddressesBot(msg);
594865
595382
  if (!config) {
594866
595383
  return {
594867
- route: forcedRoute ?? "action",
594868
- shouldReply: !isGroup,
595384
+ route: forcedRoute ?? (isGroup ? "action" : "chat"),
595385
+ shouldReply: !isGroup || addressesBot,
594869
595386
  confidence: 0,
594870
- reason: isGroup ? "router inference unavailable; public group fails closed without keyword heuristics" : "router inference unavailable; private chat defaults to reply",
595387
+ reason: isGroup ? addressesBot ? "router inference unavailable; Telegram message directly addresses the bot" : "router inference unavailable; public group fails closed without keyword heuristics" : "router inference unavailable; private chat defaults to quick reply",
594871
595388
  source: "inference-unavailable"
594872
595389
  };
594873
595390
  }
@@ -594888,6 +595405,7 @@ ${lines.join("\n")}`);
594888
595405
  `Route meanings:`,
594889
595406
  `- chat: a short conversational answer can be produced without tools.`,
594890
595407
  `- action: tools, workspace context, media processing, web lookup, delegation, or a multi-step agent loop may be needed.`,
595408
+ `Route discipline: greetings, acknowledgements, casual tone/style discussion, and simple conversational questions are chat. Use action only when the message asks you to inspect, create, change, send, remember, search, analyze media, or otherwise do tool-backed work.`,
594891
595409
  ``,
594892
595410
  `Reply discretion: infer from the live thread, speaker relationships, direct mentions, replies, tone, and current message. Do not use static keyword rules.`,
594893
595411
  `Private chats: should_reply is normally true.`,
@@ -594896,9 +595414,12 @@ ${lines.join("\n")}`);
594896
595414
  ``,
594897
595415
  `Tool context: ${toolContext}`,
594898
595416
  `Bot username: ${this.state.botUsername || "unknown"}`,
595417
+ `Current message directly addresses this bot: ${addressesBot ? "yes" : "no"}`,
594899
595418
  `Current chat type: ${msg.chatType}`,
594900
595419
  `Current sender: ${telegramSpeakerLabel(msg)}`,
594901
595420
  msg.replyToMessageId ? `Current message replies to message_id ${msg.replyToMessageId}` : "",
595421
+ msg.replyToUsername ? `Current message replies to @${msg.replyToUsername}` : "",
595422
+ (msg.mentionedUsernames ?? []).length > 0 ? `Current message mentions: ${(msg.mentionedUsernames ?? []).map((name10) => `@${name10}`).join(", ")}` : "",
594902
595423
  msg.media ? `Current message has media: ${summarizeTelegramMessageAttachments(msg)}` : "",
594903
595424
  ``,
594904
595425
  context2,
@@ -594929,10 +595450,10 @@ ${msg.text}`
594929
595450
  } catch {
594930
595451
  }
594931
595452
  return {
594932
- route: forcedRoute ?? "action",
594933
- shouldReply: !isGroup,
595453
+ route: forcedRoute ?? (isGroup ? "action" : "chat"),
595454
+ shouldReply: !isGroup || addressesBot,
594934
595455
  confidence: 0,
594935
- reason: isGroup ? "router inference failed; public group fails closed without keyword heuristics" : "router inference failed; private chat defaults to reply",
595456
+ reason: isGroup ? addressesBot ? "router inference failed; Telegram message directly addresses the bot" : "router inference failed; public group fails closed without keyword heuristics" : "router inference failed; private chat defaults to quick reply",
594936
595457
  source: "inference-unavailable"
594937
595458
  };
594938
595459
  }
@@ -595407,7 +595928,8 @@ ${mediaContext}`;
595407
595928
  toolContext,
595408
595929
  pendingMessages: [],
595409
595930
  creativeWorkspaceRoot: this.creativeWorkspaceRootForMessage(msg, toolContext),
595410
- generatedArtifacts: []
595931
+ generatedArtifacts: [],
595932
+ surfacedToolCallFingerprints: /* @__PURE__ */ new Set()
595411
595933
  };
595412
595934
  this.subAgents.set(sessionKey, subAgent);
595413
595935
  this.refreshActiveTelegramInteractionCount();
@@ -595504,7 +596026,8 @@ ${mediaContext}`;
595504
596026
  toolContext,
595505
596027
  pendingMessages: [],
595506
596028
  creativeWorkspaceRoot: this.creativeWorkspaceRootForMessage(msg, toolContext),
595507
- generatedArtifacts: []
596029
+ generatedArtifacts: [],
596030
+ surfacedToolCallFingerprints: /* @__PURE__ */ new Set()
595508
596031
  };
595509
596032
  this.subAgents.set(sessionKey, subAgent);
595510
596033
  this.refreshActiveTelegramInteractionCount();
@@ -595807,8 +596330,20 @@ ${mediaContext}` : ""}`
595807
596330
  runner.registerTools(tools);
595808
596331
  runner.onEvent((event) => {
595809
596332
  if (subAgent.aborted) return;
595810
- this.onSubAgentEvent?.(msg.chatId, msg.username, event);
596333
+ let suppressExternalEvent = false;
596334
+ if (event.type === "tool_call" && event.toolName) {
596335
+ const fp = `${event.toolName}:${stableTelegramValueKey(event.toolArgs ?? {})}`;
596336
+ if (subAgent.surfacedToolCallFingerprints.has(fp)) {
596337
+ suppressExternalEvent = true;
596338
+ } else {
596339
+ subAgent.surfacedToolCallFingerprints.add(fp);
596340
+ }
596341
+ }
596342
+ if (!suppressExternalEvent) {
596343
+ this.onSubAgentEvent?.(msg.chatId, msg.username, event);
596344
+ }
595811
596345
  if (event.type === "tool_call" && event.toolName) {
596346
+ if (suppressExternalEvent) return;
595812
596347
  const argsPreview = event.toolArgs ? JSON.stringify(event.toolArgs).slice(0, 100) : "";
595813
596348
  this.subAgentViewCallbacks?.onWrite(subAgent.viewId, `tool: ${event.toolName}(${argsPreview})`);
595814
596349
  } else if (event.type === "tool_result" && event.toolName) {
@@ -595823,6 +596358,9 @@ ${mediaContext}` : ""}`
595823
596358
  this.subAgentViewCallbacks?.onWrite(subAgent.viewId, `status: ${event.content}`);
595824
596359
  } else if (event.type === "assistant_text" && event.content && event.source !== "task_complete_summary") {
595825
596360
  subAgent.assistantText = event.content;
596361
+ } else if (event.type === "stream_start") {
596362
+ subAgent.accumulated = "";
596363
+ subAgent.streamText = "";
595826
596364
  } else if (event.type === "stream_end" && event.content) {
595827
596365
  subAgent.streamText = event.content;
595828
596366
  }
@@ -595885,6 +596423,8 @@ ${msg.text}`;
595885
596423
  const toolHint = [
595886
596424
  "You have access to isolated per-chat memory (memory_write, memory_read, memory_search) scoped to this conversation.",
595887
596425
  "You can remember facts about users and retrieve them later. You also have web_search and web_fetch to look up information.",
596426
+ "If the user asks you to create or send a file, image, or audio artifact, use the scoped creative tools. The bridge will attach generated files back to Telegram when tool results record them.",
596427
+ "For image generation requests, decide from the conversation whether generate_image is appropriate; do not ask the user to use a hardcoded shortcut when the request is clear.",
595888
596428
  creativeWorkspace
595889
596429
  ].filter(Boolean).join("\n\n");
595890
596430
  userPrompt = `${systemPrompt}${discretionPrompt}
@@ -621869,13 +622409,14 @@ async function renderAsciiPreviewForImage(imagePath, displayPath, title, writer)
621869
622409
  }
621870
622410
  }
621871
622411
  async function renderAsciiPreviewForToolResult(toolName, output, repoRoot, writer) {
621872
- if (toolName !== "camera_capture" || !output) return;
622412
+ if (!output) return;
621873
622413
  try {
621874
622414
  const { extractSavedImagePath: extractSavedImagePath2 } = await Promise.resolve().then(() => (init_image_ascii_preview(), image_ascii_preview_exports));
621875
622415
  const imagePath = extractSavedImagePath2(output, repoRoot);
621876
622416
  if (!imagePath) return;
621877
622417
  const displayPath = relative13(repoRoot, imagePath).startsWith("..") ? imagePath : relative13(repoRoot, imagePath);
621878
- await renderAsciiPreviewForImage(imagePath, displayPath, "Camera frame", writer);
622418
+ const title = toolName === "generate_image" ? "Generated image" : toolName === "screenshot" ? "Screenshot" : toolName === "camera_capture" ? "Camera frame" : "Image";
622419
+ await renderAsciiPreviewForImage(imagePath, displayPath, title, writer);
621879
622420
  } catch {
621880
622421
  }
621881
622422
  }
@@ -623026,7 +623567,7 @@ ${entry.fullContent}`
623026
623567
  }
623027
623568
  });
623028
623569
  }
623029
- if (event.success && event.toolName === "camera_capture") {
623570
+ if (event.success) {
623030
623571
  void renderAsciiPreviewForToolResult(event.toolName, event.content ?? "", repoRoot, contentWrite);
623031
623572
  }
623032
623573
  if (voice?.enabled && voice.voiceMode === "voicechat" && _voiceChatSession2?.isActive && event.toolName === "task_complete") {
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.5",
9
+ "version": "1.0.7",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",