omnius 1.0.45 → 1.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -251178,10 +251178,6 @@ function parseStructuredProgress(text) {
251178
251178
  return null;
251179
251179
  }
251180
251180
  }
251181
- function numberArg(value2, fallback) {
251182
- const n2 = Number(value2);
251183
- return Number.isFinite(n2) && n2 > 0 ? n2 : fallback;
251184
- }
251185
251181
  function optionalNumberArg(value2) {
251186
251182
  if (value2 === void 0 || value2 === null || value2 === "")
251187
251183
  return void 0;
@@ -251199,6 +251195,26 @@ function booleanArg(value2, fallback) {
251199
251195
  }
251200
251196
  return fallback;
251201
251197
  }
251198
+ function roundToMultipleOf8(value2) {
251199
+ if (!Number.isFinite(value2) || value2 <= 0)
251200
+ return 1024;
251201
+ const rounded = Math.round(value2 / 8) * 8;
251202
+ return Math.max(64, rounded);
251203
+ }
251204
+ function resolveAspectRatioToSize(ratio, presetWidth, presetHeight) {
251205
+ const match = ratio.match(/^\s*(\d+(?:\.\d+)?)\s*[:xX/×]\s*(\d+(?:\.\d+)?)\s*$/);
251206
+ if (!match)
251207
+ return null;
251208
+ const w = Number(match[1]);
251209
+ const h = Number(match[2]);
251210
+ if (!Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0)
251211
+ return null;
251212
+ const longSide = Math.max(presetWidth, presetHeight);
251213
+ if (w >= h) {
251214
+ return { width: longSide, height: Math.round(longSide * h / w) };
251215
+ }
251216
+ return { width: Math.round(longSide * w / h), height: longSide };
251217
+ }
251202
251218
  function generationFallbackEnabled(args) {
251203
251219
  if (booleanArg(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
251204
251220
  return false;
@@ -252151,7 +252167,7 @@ if __name__ == "__main__":
252151
252167
  `;
252152
252168
  ImageGenerateTool = class {
252153
252169
  name = "generate_image";
252154
- description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Saves a PNG under .omnius/images and returns the file path.";
252170
+ description = `Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Aspect ratio and resolution are model-controllable: pass aspect_ratio (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2") to derive width/height around the selected model's preferred base resolution, or pass explicit width/height (in pixels, both rounded to a multiple of 8) when a specific size is required. A preliminary prompt-expansion stage rewrites the user's prompt into a richer, model-tuned version before generation when an LLM expander is wired; pass expand_prompt=false to skip. Saves a PNG under .omnius/images and returns the file path.`;
252155
252171
  parameters = {
252156
252172
  type: "object",
252157
252173
  properties: {
@@ -252168,13 +252184,17 @@ if __name__ == "__main__":
252168
252184
  enum: ["auto", "ollama", "diffusers", "sdcpp"],
252169
252185
  description: "Generation backend. Defaults to auto."
252170
252186
  },
252187
+ aspect_ratio: {
252188
+ type: "string",
252189
+ description: `Desired aspect ratio expressed as W:H (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2"). When provided, width/height are derived from the selected model's preferred base resolution so the longer side stays in that model's sweet spot. Ignored if explicit width and height are also provided.`
252190
+ },
252171
252191
  width: {
252172
252192
  type: "number",
252173
- description: "Image width in pixels"
252193
+ description: "Image width in pixels. Optional — defaults to the selected model's preset width, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
252174
252194
  },
252175
252195
  height: {
252176
252196
  type: "number",
252177
- description: "Image height in pixels"
252197
+ description: "Image height in pixels. Optional — defaults to the selected model's preset height, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
252178
252198
  },
252179
252199
  steps: {
252180
252200
  type: "number",
@@ -252204,6 +252224,10 @@ if __name__ == "__main__":
252204
252224
  strict_model: {
252205
252225
  type: "boolean",
252206
252226
  description: "When true, use only the requested model/backend and do not fall back. Defaults false."
252227
+ },
252228
+ expand_prompt: {
252229
+ type: "boolean",
252230
+ description: "When true (default), a preliminary LLM stage rewrites the prompt into a richer, model-tuned version before generation. Set false to send the raw prompt unchanged."
252207
252231
  }
252208
252232
  },
252209
252233
  required: ["prompt"]
@@ -252216,15 +252240,23 @@ if __name__ == "__main__":
252216
252240
  lastProgressAt = 0;
252217
252241
  defaultModel;
252218
252242
  defaultBackend;
252243
+ promptExpander = null;
252219
252244
  constructor(cwd4, ollamaUrl = "http://localhost:11434", defaults3 = {}) {
252220
252245
  this.cwd = cwd4;
252221
252246
  this.ollamaUrl = ollamaUrl.replace(/\/v1\/?$/, "").replace(/\/$/, "");
252222
252247
  this.defaultModel = defaults3.model;
252223
252248
  this.defaultBackend = defaults3.backend;
252249
+ this.promptExpander = defaults3.promptExpander ?? null;
252224
252250
  }
252225
252251
  setDefaults(defaults3) {
252226
252252
  this.defaultModel = defaults3.model;
252227
252253
  this.defaultBackend = defaults3.backend;
252254
+ if (defaults3.promptExpander !== void 0) {
252255
+ this.promptExpander = defaults3.promptExpander;
252256
+ }
252257
+ }
252258
+ setPromptExpander(expander) {
252259
+ this.promptExpander = expander;
252228
252260
  }
252229
252261
  setProgressCallback(handler) {
252230
252262
  this.progressHandler = handler;
@@ -252321,19 +252353,39 @@ if __name__ == "__main__":
252321
252353
  }
252322
252354
  async generateCandidateLadder(args) {
252323
252355
  const failed = [];
252356
+ const expansionEnabled = args.args["expand_prompt"] === false ? false : true;
252357
+ const aspectRatio = typeof args.args["aspect_ratio"] === "string" ? String(args.args["aspect_ratio"]).trim() : "";
252324
252358
  for (let index = 0; index < args.candidates.length; index++) {
252325
252359
  const candidate = args.candidates[index];
252326
- const width = numberArg(args.args["width"], candidate.preset?.width ?? 1024);
252327
- const height = numberArg(args.args["height"], candidate.preset?.height ?? 1024);
252360
+ const presetW = candidate.preset?.width ?? 1024;
252361
+ const presetH = candidate.preset?.height ?? 1024;
252362
+ const explicitWidth = optionalNumberArg(args.args["width"]);
252363
+ const explicitHeight = optionalNumberArg(args.args["height"]);
252364
+ const derived = (explicitWidth === void 0 || explicitHeight === void 0) && aspectRatio ? resolveAspectRatioToSize(aspectRatio, presetW, presetH) : null;
252365
+ const width = roundToMultipleOf8(explicitWidth ?? derived?.width ?? presetW);
252366
+ const height = roundToMultipleOf8(explicitHeight ?? derived?.height ?? presetH);
252328
252367
  const steps = optionalNumberArg(args.args["steps"]) ?? candidate.preset?.steps;
252329
252368
  const guidance = optionalNumberArg(args.args["guidance"]) ?? candidate.preset?.guidance;
252330
252369
  this.emitProgress({
252331
252370
  stage: "setup",
252332
252371
  message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
252333
252372
  });
252334
- const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: args.prompt, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: args.prompt, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: args.prompt, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
252335
- if (result.success)
252336
- return annotateImageFallbackSuccess(result, failed, candidate);
252373
+ const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
252374
+ const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
252375
+ if (result.success) {
252376
+ await this.writeImageSidecar(result, {
252377
+ originalPrompt: args.prompt,
252378
+ expandedPrompt: promptForCandidate,
252379
+ model: candidate.model,
252380
+ backend: candidate.backend,
252381
+ width,
252382
+ height,
252383
+ aspectRatio: aspectRatio || null,
252384
+ seed: args.seed
252385
+ }).catch(() => {
252386
+ });
252387
+ return annotateImageFallbackSuccess(this.annotateResultWithSourcePrompt(result, args.prompt, promptForCandidate), failed, candidate);
252388
+ }
252337
252389
  failed.push({ candidate, reason: summarizeToolResult(result) });
252338
252390
  if (index < args.candidates.length - 1) {
252339
252391
  this.emitProgress({
@@ -252350,6 +252402,95 @@ if __name__ == "__main__":
252350
252402
  durationMs: performance.now() - args.start
252351
252403
  };
252352
252404
  }
252405
+ /**
252406
+ * Persist a sidecar JSON next to a generated image capturing the
252407
+ * original (user-typed) prompt, the expanded prompt actually sent to the
252408
+ * model, model identity, resolution, and any aspect-ratio request. The
252409
+ * Telegram bridge reads this when the user replies to a generated image
252410
+ * so the model can answer "what prompt made this?" or modify the prompt
252411
+ * for a follow-up generation without losing the original intent.
252412
+ */
252413
+ async writeImageSidecar(result, meta) {
252414
+ const imagePath = this.extractImagePathFromResult(result);
252415
+ if (!imagePath)
252416
+ return;
252417
+ const sidecarPath2 = `${imagePath}.json`;
252418
+ const payload = {
252419
+ version: 1,
252420
+ kind: "image-generation",
252421
+ image_path: imagePath,
252422
+ original_prompt: meta.originalPrompt,
252423
+ expanded_prompt: meta.expandedPrompt,
252424
+ prompt_was_expanded: meta.originalPrompt.trim() !== meta.expandedPrompt.trim(),
252425
+ model: meta.model,
252426
+ backend: meta.backend,
252427
+ width: meta.width,
252428
+ height: meta.height,
252429
+ aspect_ratio: meta.aspectRatio,
252430
+ seed: meta.seed ?? null,
252431
+ created_at: (/* @__PURE__ */ new Date()).toISOString()
252432
+ };
252433
+ await writeFile17(sidecarPath2, JSON.stringify(payload, null, 2) + "\n", "utf8");
252434
+ }
252435
+ extractImagePathFromResult(result) {
252436
+ const mutated = result.mutatedFiles;
252437
+ if (Array.isArray(mutated) && mutated.length > 0) {
252438
+ const first2 = mutated[0];
252439
+ if (typeof first2 === "string" && first2.trim())
252440
+ return first2;
252441
+ }
252442
+ const match = result.output.match(/Image generated:\s*([^\n]+)/);
252443
+ if (match && match[1])
252444
+ return match[1].trim();
252445
+ return null;
252446
+ }
252447
+ /**
252448
+ * Add the original user prompt to the result output when prompt
252449
+ * expansion produced a different string. This gives downstream
252450
+ * consumers (Telegram reply context, TUI display, memory) access to
252451
+ * both the user's intent and the model-tuned prompt actually rendered.
252452
+ */
252453
+ annotateResultWithSourcePrompt(result, originalPrompt, expandedPrompt) {
252454
+ if (originalPrompt.trim() === expandedPrompt.trim())
252455
+ return result;
252456
+ const annotation = ` Original prompt: "${this.truncatePromptForOutput(originalPrompt)}"`;
252457
+ const llmAnnotation = `Original user prompt: ${originalPrompt}`;
252458
+ const output = result.output ? `${result.output}
252459
+ ${annotation}` : annotation;
252460
+ const llmContent = typeof result.llmContent === "string" && result.llmContent ? `${result.llmContent}
252461
+ ${llmAnnotation}` : result.llmContent;
252462
+ return { ...result, output, llmContent };
252463
+ }
252464
+ truncatePromptForOutput(prompt) {
252465
+ return prompt.length > 200 ? prompt.slice(0, 197) + "..." : prompt;
252466
+ }
252467
+ async expandPromptForCandidate(originalPrompt, candidate, candidateIndex, candidateCount) {
252468
+ if (!this.promptExpander)
252469
+ return originalPrompt;
252470
+ try {
252471
+ this.emitProgress({
252472
+ stage: "setup",
252473
+ message: `Expanding prompt for ${candidate.model}`
252474
+ });
252475
+ const expanded = await this.promptExpander({
252476
+ model: candidate.model,
252477
+ backend: candidate.backend,
252478
+ originalPrompt,
252479
+ candidateIndex,
252480
+ candidateCount
252481
+ });
252482
+ const trimmed = typeof expanded === "string" ? expanded.trim() : "";
252483
+ if (!trimmed)
252484
+ return originalPrompt;
252485
+ this.emitProgress({
252486
+ stage: "setup",
252487
+ message: `Expanded prompt (${trimmed.length} chars) for ${candidate.model}`
252488
+ });
252489
+ return trimmed;
252490
+ } catch {
252491
+ return originalPrompt;
252492
+ }
252493
+ }
252353
252494
  async prewarmOllama(args) {
252354
252495
  const model = args.model || DEFAULT_OLLAMA_IMAGE_MODEL;
252355
252496
  if (await this.ollamaHasModel(model)) {
@@ -253248,7 +253389,7 @@ async function ensureAudioRunner(repoRoot, backend) {
253248
253389
  function audioOutputPath(repoRoot) {
253249
253390
  return join37(audioOutputDir(repoRoot), `audio-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.wav`);
253250
253391
  }
253251
- function numberArg2(value2, fallback) {
253392
+ function numberArg(value2, fallback) {
253252
253393
  if (typeof value2 === "number" && Number.isFinite(value2))
253253
253394
  return value2;
253254
253395
  if (typeof value2 === "string" && value2.trim()) {
@@ -254457,7 +254598,7 @@ if __name__ == "__main__":
254457
254598
  const failed = [];
254458
254599
  for (let index = 0; index < args.candidates.length; index++) {
254459
254600
  const candidate = args.candidates[index];
254460
- const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
254601
+ const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
254461
254602
  this.emitProgress({
254462
254603
  stage: "setup",
254463
254604
  message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
@@ -254492,7 +254633,7 @@ if __name__ == "__main__":
254492
254633
  const failed = [];
254493
254634
  for (let index = 0; index < args.candidates.length; index++) {
254494
254635
  const candidate = args.candidates[index];
254495
- const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
254636
+ const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
254496
254637
  const steps = optionalNumberArg2(args.args["steps"]) ?? candidate.preset?.defaultSteps;
254497
254638
  this.emitProgress({
254498
254639
  stage: "setup",
@@ -509592,7 +509733,7 @@ function boolArg(value2, fallback) {
509592
509733
  }
509593
509734
  return fallback;
509594
509735
  }
509595
- function numberArg3(value2, fallback) {
509736
+ function numberArg2(value2, fallback) {
509596
509737
  if (typeof value2 === "number" && Number.isFinite(value2))
509597
509738
  return value2;
509598
509739
  if (typeof value2 === "string" && value2.trim()) {
@@ -510317,7 +510458,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
510317
510458
  const cloneRef = cloneRefForSynthesis(args);
510318
510459
  if (!cloneRef)
510319
510460
  throw new Error(`No LuxTTS clone source found. Provide source_audio=<voice clip> or clone_ref=<registered clip>.`);
510320
- const speed = numberArg3(args["speed"], 1);
510461
+ const speed = numberArg2(args["speed"], 1);
510321
510462
  ensureLuxttsInstalled();
510322
510463
  const daemonReady = await ensureLuxttsDaemon();
510323
510464
  if (daemonReady) {
@@ -510351,8 +510492,8 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
510351
510492
  const venvPy = ensureSupertonicInstalled();
510352
510493
  const voice = typeof args["voice"] === "string" ? args["voice"] : "M4";
510353
510494
  const lang = typeof args["lang"] === "string" ? args["lang"] : "en";
510354
- const speed = numberArg3(args["speed"], 1.05);
510355
- const totalStep = Math.round(numberArg3(args["total_step"], 8));
510495
+ const speed = numberArg2(args["speed"], 1.05);
510496
+ const totalStep = Math.round(numberArg2(args["total_step"], 8));
510356
510497
  const stdout = execFileSync4(venvPy, [supertonicInferScript()], {
510357
510498
  input: JSON.stringify({ text, output_path: outputPath2, voice_name: voice, lang, speed, total_step: totalStep }),
510358
510499
  encoding: "utf8",
@@ -510404,7 +510545,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
510404
510545
  if (!hasCommand3("espeak-ng"))
510405
510546
  throw new Error("Local fallback TTS command not found.");
510406
510547
  const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
510407
- const speed = Math.round(numberArg3(args["speed"], 160));
510548
+ const speed = Math.round(numberArg2(args["speed"], 160));
510408
510549
  execFileSync4("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
510409
510550
  stdio: "pipe",
510410
510551
  timeout: 6e4
@@ -537096,6 +537237,62 @@ ${blob}
537096
537237
  /** Register a tool for the agent to use */
537097
537238
  registerTool(tool) {
537098
537239
  this.tools.set(tool.name, tool);
537240
+ if (tool.name === "generate_image") {
537241
+ this.maybeInstallImagePromptExpander(tool);
537242
+ }
537243
+ }
537244
+ /**
537245
+ * Install an LLM-backed prompt expander on the registered generate_image
537246
+ * tool. The expander runs a preliminary stage that rewrites the user's
537247
+ * raw prompt into a richer, model-tuned version before the diffusion
537248
+ * call. The instruction is intentionally generic — the LLM uses its own
537249
+ * knowledge of the target model's prompt conventions to shape syntax,
537250
+ * length, and detail style, rather than relying on hard-coded templates.
537251
+ */
537252
+ maybeInstallImagePromptExpander(tool) {
537253
+ const setExpander = tool.setPromptExpander;
537254
+ if (typeof setExpander !== "function")
537255
+ return;
537256
+ if (process.env["OMNIUS_IMAGE_PROMPT_EXPAND"] === "0") {
537257
+ setExpander.call(tool, null);
537258
+ return;
537259
+ }
537260
+ const backend = this.backend;
537261
+ if (!backend || typeof backend.chatCompletion !== "function")
537262
+ return;
537263
+ setExpander.call(tool, async (ctx3) => {
537264
+ const userPrompt = (ctx3.originalPrompt ?? "").trim();
537265
+ if (!userPrompt)
537266
+ return null;
537267
+ const system = "You are a prompt-engineering stage that rewrites a user's short image request into a richer, more detailed image-generation prompt. The expanded prompt should match the prompt conventions and tokenizer preferences of the specific image model named in the request — use your own knowledge of that model's training distribution to choose length, syntax, weighting markers, ordering, and descriptor density. Do not invent new subject matter or contradict the user's intent. Output only the expanded prompt: no preamble, no quotes, no labels.";
537268
+ const user = `Target image model: ${ctx3.model}
537269
+ Backend: ${ctx3.backend}
537270
+ Candidate position: ${ctx3.candidateIndex + 1} of ${ctx3.candidateCount} (fallback ladder)
537271
+
537272
+ User prompt to expand:
537273
+ ${userPrompt}
537274
+
537275
+ Rewrite it now for ${ctx3.model}.`;
537276
+ try {
537277
+ const response = await backend.chatCompletion({
537278
+ messages: [
537279
+ { role: "system", content: system },
537280
+ { role: "user", content: user }
537281
+ ],
537282
+ tools: [],
537283
+ temperature: 0.4,
537284
+ maxTokens: 600,
537285
+ timeoutMs: 3e4
537286
+ });
537287
+ const text = response?.choices?.[0]?.message?.content;
537288
+ if (typeof text !== "string")
537289
+ return null;
537290
+ const cleaned = text.replace(/^["'`]+|["'`]+$/g, "").replace(/^(?:expanded prompt|prompt|output)\s*:\s*/i, "").trim();
537291
+ return cleaned.length > 0 ? cleaned : null;
537292
+ } catch {
537293
+ return null;
537294
+ }
537295
+ });
537099
537296
  }
537100
537297
  /** Register multiple tools */
537101
537298
  registerTools(tools) {
@@ -601106,7 +601303,7 @@ function buildTelegramCreativeTools(repoRoot, chatId, backendUrl2, imageDefaults
601106
601303
  }
601107
601304
  function scopedTool(base3, root, mode) {
601108
601305
  const rootAbs = resolve41(root);
601109
- return {
601306
+ const wrapper = {
601110
601307
  name: base3.name,
601111
601308
  description: `[PUBLIC TELEGRAM CREATIVE WORKSPACE: ${rootAbs}] ${base3.description} Paths are restricted to this workspace. This tool cannot access or modify files outside the workspace. ` + (mode === "edit" ? "It can only edit files already created in this workspace manifest. " : ""),
601112
601309
  parameters: base3.parameters,
@@ -601215,6 +601412,11 @@ function scopedTool(base3, root, mode) {
601215
601412
  return withTelegramAutoAttachmentNotice(result, recordedPaths.size);
601216
601413
  }
601217
601414
  };
601415
+ const baseSetExpander = base3.setPromptExpander;
601416
+ if (typeof baseSetExpander === "function") {
601417
+ wrapper.setPromptExpander = (expander) => baseSetExpander.call(base3, expander);
601418
+ }
601419
+ return wrapper;
601218
601420
  }
601219
601421
  function withTelegramAutoAttachmentNotice(result, artifactCount) {
601220
601422
  if (!result.success || artifactCount <= 0) return result;
@@ -603559,6 +603761,20 @@ function summarizeTelegramMessageAttachments(msg) {
603559
603761
  parts.push(`replied-to caption: ${truncateTelegramContextLine(msg.replyToMedia.caption, 180)}`);
603560
603762
  }
603561
603763
  }
603764
+ const gen = msg.replyContext?.generatedMediaPromptInfo;
603765
+ if (gen?.originalPrompt) {
603766
+ parts.push(`replied-to image source prompt: "${truncateTelegramContextLine(gen.originalPrompt, 400)}"`);
603767
+ if (gen.promptWasExpanded && gen.expandedPrompt && gen.expandedPrompt !== gen.originalPrompt) {
603768
+ parts.push(`replied-to image expanded prompt: "${truncateTelegramContextLine(gen.expandedPrompt, 400)}"`);
603769
+ }
603770
+ const meta = [
603771
+ gen.model ? `model=${gen.model}` : "",
603772
+ gen.backend ? `backend=${gen.backend}` : "",
603773
+ gen.width && gen.height ? `size=${gen.width}x${gen.height}` : "",
603774
+ gen.aspectRatio ? `aspect=${gen.aspectRatio}` : ""
603775
+ ].filter(Boolean).join(", ");
603776
+ if (meta) parts.push(`replied-to image generation: ${meta}`);
603777
+ }
603562
603778
  if (msg.poll) {
603563
603779
  parts.push(`poll: ${truncateTelegramContextLine(msg.poll.question, 180)}`);
603564
603780
  }
@@ -604511,7 +604727,7 @@ var init_telegram_bridge = __esm({
604511
604727
  ]);
604512
604728
  DEFAULT_TELEGRAM_TOOL_GROUP_POLICY = {
604513
604729
  read: true,
604514
- message: false,
604730
+ message: true,
604515
604731
  media: true,
604516
604732
  janitorial: false,
604517
604733
  reaction: false,
@@ -605153,7 +605369,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
605153
605369
  threadId: entry.messageThreadId,
605154
605370
  sender: this.telegramReplySenderWithSelfFlag(sender),
605155
605371
  text: entry.text,
605156
- mediaSummary: entry.mediaSummary
605372
+ mediaSummary: entry.mediaSummary,
605373
+ generatedMediaPromptInfo: entry.generatedMediaPromptInfo
605157
605374
  };
605158
605375
  }
605159
605376
  resolveTelegramReplyContext(sessionKey, msg) {
@@ -605174,7 +605391,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
605174
605391
  ...msg.replyContext.sender ?? {}
605175
605392
  }),
605176
605393
  text: msg.replyContext.text ?? localContext.text,
605177
- mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary
605394
+ mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary,
605395
+ generatedMediaPromptInfo: msg.replyContext.generatedMediaPromptInfo ?? localContext.generatedMediaPromptInfo
605178
605396
  };
605179
605397
  } else if (msg.replyContext) {
605180
605398
  reply = {
@@ -609706,10 +609924,66 @@ Content-Type: ${contentType}\r
609706
609924
  const result = await res.json();
609707
609925
  if (result.ok) {
609708
609926
  this.state.messagesSent++;
609709
- return result.result?.message_id ?? null;
609927
+ const outboundMessageId = result.result?.message_id ?? null;
609928
+ if (outboundMessageId && media.kind === "image" && media.source === "file") {
609929
+ this.recordOutboundGeneratedImagePrompt(chatId, outboundMessageId, media.value, caption);
609930
+ }
609931
+ return outboundMessageId;
609710
609932
  }
609711
609933
  throw new Error(String(result.description || `Telegram ${method} failed`));
609712
609934
  }
609935
+ /**
609936
+ * After the bot sends an outbound photo, look for a `<image>.json`
609937
+ * sidecar emitted by ImageGenerateTool and stash the source prompt info
609938
+ * on a chatHistory entry keyed by the outbound message_id. When the user
609939
+ * later replies to that image, resolveTelegramReplyContext finds the
609940
+ * entry and exposes the original prompt to the model.
609941
+ */
609942
+ recordOutboundGeneratedImagePrompt(chatId, messageId, imagePath, caption) {
609943
+ const sidecarPath2 = `${imagePath}.json`;
609944
+ if (!existsSync108(sidecarPath2)) return;
609945
+ let info = null;
609946
+ try {
609947
+ const raw = readFileSync88(sidecarPath2, "utf8");
609948
+ const parsed = JSON.parse(raw);
609949
+ if (parsed && typeof parsed === "object" && typeof parsed["original_prompt"] === "string") {
609950
+ info = {
609951
+ imagePath,
609952
+ originalPrompt: String(parsed["original_prompt"]),
609953
+ expandedPrompt: typeof parsed["expanded_prompt"] === "string" ? String(parsed["expanded_prompt"]) : void 0,
609954
+ promptWasExpanded: parsed["prompt_was_expanded"] === true,
609955
+ model: typeof parsed["model"] === "string" ? String(parsed["model"]) : void 0,
609956
+ backend: typeof parsed["backend"] === "string" ? String(parsed["backend"]) : void 0,
609957
+ width: typeof parsed["width"] === "number" ? parsed["width"] : void 0,
609958
+ height: typeof parsed["height"] === "number" ? parsed["height"] : void 0,
609959
+ aspectRatio: typeof parsed["aspect_ratio"] === "string" || parsed["aspect_ratio"] === null ? parsed["aspect_ratio"] : void 0,
609960
+ seed: typeof parsed["seed"] === "number" ? parsed["seed"] : null,
609961
+ createdAt: typeof parsed["created_at"] === "string" ? String(parsed["created_at"]) : void 0
609962
+ };
609963
+ }
609964
+ } catch {
609965
+ return;
609966
+ }
609967
+ if (!info) return;
609968
+ const sessionKey = `chat:${String(chatId)}`;
609969
+ const captionText = (caption ?? "").trim();
609970
+ const summary = `photo (generated, model=${info.model ?? "?"}, ${info.width ?? "?"}x${info.height ?? "?"})`;
609971
+ const entry = {
609972
+ role: "assistant",
609973
+ text: captionText,
609974
+ mode: "action",
609975
+ chatId,
609976
+ speaker: this.state.botUsername ? `@${this.state.botUsername}` : "Assistant",
609977
+ messageId,
609978
+ mediaSummary: summary,
609979
+ generatedMediaPromptInfo: info
609980
+ };
609981
+ try {
609982
+ this.recordChatHistory(sessionKey, entry);
609983
+ this.saveTelegramConversationState(sessionKey);
609984
+ } catch {
609985
+ }
609986
+ }
609713
609987
  async sendGeneratedArtifactsFromSubAgent(msg, subAgent, finalText, includeMentioned) {
609714
609988
  const root = subAgent.creativeWorkspaceRoot;
609715
609989
  if (!root) return;
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.45",
3
+ "version": "1.0.46",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.45",
9
+ "version": "1.0.46",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.45",
3
+ "version": "1.0.46",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",