omnius 1.0.45 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3606,7 +3606,7 @@ While the sub-agent is working, users see:
3606
3606
 
3607
3607
  ### Public User Isolation
3608
3608
 
3609
- Public users get **per-chat isolated memory** — each chat is stored with explicit multimodal scope (`scope.kind = "group"|"private"`, `scope.id = chatId`) so public users can store and retrieve facts about their conversation without accessing or polluting unrelated chat memory. Public tools include: `memory_read`, `memory_write` (scoped), `memory_search`, `identity_memory` (scoped explicit identity evidence), `web_search`, `web_fetch`, and scoped minimal reminders via `reminder`/`remind`.
3609
+ Public users get **per-chat isolated memory** — each chat is stored with explicit multimodal scope (`scope.kind = "group"|"private"`, `scope.id = chatId`) so public users can store and retrieve facts about their conversation without accessing or polluting unrelated chat memory. Public tools include: `memory_read`, `memory_write` (scoped), `memory_search`, `identity_memory` (scoped explicit identity evidence), `web_search`, `web_fetch`, scoped advanced media analysis (`telegram_media_recent`, `image_read`, `ocr`, `ocr_image_advanced`, `vision`, `pdf_to_text`, `ocr_pdf`, `transcribe_file`, `video_understand`, `audio_analyze`), and scoped minimal reminders via `reminder`/`remind`.
3610
3610
 
3611
3611
  The bridge also maintains a per-chat conversation state file with recent history, participants, relationship signals, and lightweight Zettelkasten memory cards. Each Telegram group or private chat gets its own scoped personality document under `.omnius/scoped-personality/telegram-chat/`; that profile is updated as people talk and injected into future Telegram context so tone, pacing, names, and relationships stay available turn to turn.
3612
3612
 
@@ -3627,8 +3627,8 @@ Tools are gated per execution context. The system enforces strict separation bet
3627
3627
  |---------|--------------|-------|
3628
3628
  | `terminal` | All tools | Wide open — shell, file read/write, everything |
3629
3629
  | `telegram-admin-dm` | All except shell + scoped `telegram` tool | Admin DM — full tools, shell blocked by default (overridable); Telegram janitorial/moderation actions still require explicit policy and Bot API rights |
3630
- | `telegram-admin-group` | Read-only + web + vision/OCR + scoped reminders + scoped `telegram` tool | Admin in public group — current-chat only; high-risk Telegram actions require policy enablement |
3631
- | `telegram-public` | Memory r/w, web fetch/search, scoped creative tools, scoped minimal reminders + read/media `telegram` actions | Public users — no arbitrary local file access, shell, moderation, bot-admin, or janitorial actions |
3630
+ | `telegram-admin-group` | Scoped memory + web + advanced vision/OCR/media tools + scoped reminders + scoped `telegram` tool | Admin in public group — current-chat only; high-risk Telegram actions require policy enablement |
3631
+ | `telegram-public` | Scoped memory + web fetch/search + advanced current-chat vision/OCR/media tools + scoped creative tools + scoped minimal reminders + read/media `telegram` actions | Public users — no arbitrary local file access, shell, moderation, bot-admin, or janitorial actions |
3632
3632
  | `api` | All tools | API endpoint — configurable |
3633
3633
 
3634
3634
  **System tools** (`shell`, `file_write`, `file_edit`, `file_read`, `file_patch`, `batch_edit`, `grep_search`, `glob_find`, `list_directory`, `code_sandbox`, `codebase_map`, `git_info`, etc.) are **never exposed** in public-facing contexts.
package/dist/index.js CHANGED
@@ -251178,10 +251178,6 @@ function parseStructuredProgress(text) {
251178
251178
  return null;
251179
251179
  }
251180
251180
  }
251181
- function numberArg(value2, fallback) {
251182
- const n2 = Number(value2);
251183
- return Number.isFinite(n2) && n2 > 0 ? n2 : fallback;
251184
- }
251185
251181
  function optionalNumberArg(value2) {
251186
251182
  if (value2 === void 0 || value2 === null || value2 === "")
251187
251183
  return void 0;
@@ -251199,6 +251195,26 @@ function booleanArg(value2, fallback) {
251199
251195
  }
251200
251196
  return fallback;
251201
251197
  }
251198
+ function roundToMultipleOf8(value2) {
251199
+ if (!Number.isFinite(value2) || value2 <= 0)
251200
+ return 1024;
251201
+ const rounded = Math.round(value2 / 8) * 8;
251202
+ return Math.max(64, rounded);
251203
+ }
251204
+ function resolveAspectRatioToSize(ratio, presetWidth, presetHeight) {
251205
+ const match = ratio.match(/^\s*(\d+(?:\.\d+)?)\s*[:xX/×]\s*(\d+(?:\.\d+)?)\s*$/);
251206
+ if (!match)
251207
+ return null;
251208
+ const w = Number(match[1]);
251209
+ const h = Number(match[2]);
251210
+ if (!Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0)
251211
+ return null;
251212
+ const longSide = Math.max(presetWidth, presetHeight);
251213
+ if (w >= h) {
251214
+ return { width: longSide, height: Math.round(longSide * h / w) };
251215
+ }
251216
+ return { width: Math.round(longSide * w / h), height: longSide };
251217
+ }
251202
251218
  function generationFallbackEnabled(args) {
251203
251219
  if (booleanArg(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
251204
251220
  return false;
@@ -252151,7 +252167,7 @@ if __name__ == "__main__":
252151
252167
  `;
252152
252168
  ImageGenerateTool = class {
252153
252169
  name = "generate_image";
252154
- description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Saves a PNG under .omnius/images and returns the file path.";
252170
+ description = `Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Aspect ratio and resolution are model-controllable: pass aspect_ratio (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2") to derive width/height around the selected model's preferred base resolution, or pass explicit width/height (in pixels, both rounded to a multiple of 8) when a specific size is required. A preliminary prompt-expansion stage rewrites the user's prompt into a richer, model-tuned version before generation when an LLM expander is wired; pass expand_prompt=false to skip. Saves a PNG under .omnius/images and returns the file path.`;
252155
252171
  parameters = {
252156
252172
  type: "object",
252157
252173
  properties: {
@@ -252168,13 +252184,17 @@ if __name__ == "__main__":
252168
252184
  enum: ["auto", "ollama", "diffusers", "sdcpp"],
252169
252185
  description: "Generation backend. Defaults to auto."
252170
252186
  },
252187
+ aspect_ratio: {
252188
+ type: "string",
252189
+ description: `Desired aspect ratio expressed as W:H (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2"). When provided, width/height are derived from the selected model's preferred base resolution so the longer side stays in that model's sweet spot. Ignored if explicit width and height are also provided.`
252190
+ },
252171
252191
  width: {
252172
252192
  type: "number",
252173
- description: "Image width in pixels"
252193
+ description: "Image width in pixels. Optional — defaults to the selected model's preset width, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
252174
252194
  },
252175
252195
  height: {
252176
252196
  type: "number",
252177
- description: "Image height in pixels"
252197
+ description: "Image height in pixels. Optional — defaults to the selected model's preset height, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
252178
252198
  },
252179
252199
  steps: {
252180
252200
  type: "number",
@@ -252204,6 +252224,10 @@ if __name__ == "__main__":
252204
252224
  strict_model: {
252205
252225
  type: "boolean",
252206
252226
  description: "When true, use only the requested model/backend and do not fall back. Defaults false."
252227
+ },
252228
+ expand_prompt: {
252229
+ type: "boolean",
252230
+ description: "When true (default), a preliminary LLM stage rewrites the prompt into a richer, model-tuned version before generation. Set false to send the raw prompt unchanged."
252207
252231
  }
252208
252232
  },
252209
252233
  required: ["prompt"]
@@ -252216,15 +252240,23 @@ if __name__ == "__main__":
252216
252240
  lastProgressAt = 0;
252217
252241
  defaultModel;
252218
252242
  defaultBackend;
252243
+ promptExpander = null;
252219
252244
  constructor(cwd4, ollamaUrl = "http://localhost:11434", defaults3 = {}) {
252220
252245
  this.cwd = cwd4;
252221
252246
  this.ollamaUrl = ollamaUrl.replace(/\/v1\/?$/, "").replace(/\/$/, "");
252222
252247
  this.defaultModel = defaults3.model;
252223
252248
  this.defaultBackend = defaults3.backend;
252249
+ this.promptExpander = defaults3.promptExpander ?? null;
252224
252250
  }
252225
252251
  setDefaults(defaults3) {
252226
252252
  this.defaultModel = defaults3.model;
252227
252253
  this.defaultBackend = defaults3.backend;
252254
+ if (defaults3.promptExpander !== void 0) {
252255
+ this.promptExpander = defaults3.promptExpander;
252256
+ }
252257
+ }
252258
+ setPromptExpander(expander) {
252259
+ this.promptExpander = expander;
252228
252260
  }
252229
252261
  setProgressCallback(handler) {
252230
252262
  this.progressHandler = handler;
@@ -252321,19 +252353,39 @@ if __name__ == "__main__":
252321
252353
  }
252322
252354
  async generateCandidateLadder(args) {
252323
252355
  const failed = [];
252356
+ const expansionEnabled = args.args["expand_prompt"] === false ? false : true;
252357
+ const aspectRatio = typeof args.args["aspect_ratio"] === "string" ? String(args.args["aspect_ratio"]).trim() : "";
252324
252358
  for (let index = 0; index < args.candidates.length; index++) {
252325
252359
  const candidate = args.candidates[index];
252326
- const width = numberArg(args.args["width"], candidate.preset?.width ?? 1024);
252327
- const height = numberArg(args.args["height"], candidate.preset?.height ?? 1024);
252360
+ const presetW = candidate.preset?.width ?? 1024;
252361
+ const presetH = candidate.preset?.height ?? 1024;
252362
+ const explicitWidth = optionalNumberArg(args.args["width"]);
252363
+ const explicitHeight = optionalNumberArg(args.args["height"]);
252364
+ const derived = (explicitWidth === void 0 || explicitHeight === void 0) && aspectRatio ? resolveAspectRatioToSize(aspectRatio, presetW, presetH) : null;
252365
+ const width = roundToMultipleOf8(explicitWidth ?? derived?.width ?? presetW);
252366
+ const height = roundToMultipleOf8(explicitHeight ?? derived?.height ?? presetH);
252328
252367
  const steps = optionalNumberArg(args.args["steps"]) ?? candidate.preset?.steps;
252329
252368
  const guidance = optionalNumberArg(args.args["guidance"]) ?? candidate.preset?.guidance;
252330
252369
  this.emitProgress({
252331
252370
  stage: "setup",
252332
252371
  message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
252333
252372
  });
252334
- const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: args.prompt, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: args.prompt, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: args.prompt, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
252335
- if (result.success)
252336
- return annotateImageFallbackSuccess(result, failed, candidate);
252373
+ const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
252374
+ const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
252375
+ if (result.success) {
252376
+ await this.writeImageSidecar(result, {
252377
+ originalPrompt: args.prompt,
252378
+ expandedPrompt: promptForCandidate,
252379
+ model: candidate.model,
252380
+ backend: candidate.backend,
252381
+ width,
252382
+ height,
252383
+ aspectRatio: aspectRatio || null,
252384
+ seed: args.seed
252385
+ }).catch(() => {
252386
+ });
252387
+ return annotateImageFallbackSuccess(this.annotateResultWithSourcePrompt(result, args.prompt, promptForCandidate), failed, candidate);
252388
+ }
252337
252389
  failed.push({ candidate, reason: summarizeToolResult(result) });
252338
252390
  if (index < args.candidates.length - 1) {
252339
252391
  this.emitProgress({
@@ -252350,6 +252402,95 @@ if __name__ == "__main__":
252350
252402
  durationMs: performance.now() - args.start
252351
252403
  };
252352
252404
  }
252405
+ /**
252406
+ * Persist a sidecar JSON next to a generated image capturing the
252407
+ * original (user-typed) prompt, the expanded prompt actually sent to the
252408
+ * model, model identity, resolution, and any aspect-ratio request. The
252409
+ * Telegram bridge reads this when the user replies to a generated image
252410
+ * so the model can answer "what prompt made this?" or modify the prompt
252411
+ * for a follow-up generation without losing the original intent.
252412
+ */
252413
+ async writeImageSidecar(result, meta) {
252414
+ const imagePath = this.extractImagePathFromResult(result);
252415
+ if (!imagePath)
252416
+ return;
252417
+ const sidecarPath2 = `${imagePath}.json`;
252418
+ const payload = {
252419
+ version: 1,
252420
+ kind: "image-generation",
252421
+ image_path: imagePath,
252422
+ original_prompt: meta.originalPrompt,
252423
+ expanded_prompt: meta.expandedPrompt,
252424
+ prompt_was_expanded: meta.originalPrompt.trim() !== meta.expandedPrompt.trim(),
252425
+ model: meta.model,
252426
+ backend: meta.backend,
252427
+ width: meta.width,
252428
+ height: meta.height,
252429
+ aspect_ratio: meta.aspectRatio,
252430
+ seed: meta.seed ?? null,
252431
+ created_at: (/* @__PURE__ */ new Date()).toISOString()
252432
+ };
252433
+ await writeFile17(sidecarPath2, JSON.stringify(payload, null, 2) + "\n", "utf8");
252434
+ }
252435
+ extractImagePathFromResult(result) {
252436
+ const mutated = result.mutatedFiles;
252437
+ if (Array.isArray(mutated) && mutated.length > 0) {
252438
+ const first2 = mutated[0];
252439
+ if (typeof first2 === "string" && first2.trim())
252440
+ return first2;
252441
+ }
252442
+ const match = result.output.match(/Image generated:\s*([^\n]+)/);
252443
+ if (match && match[1])
252444
+ return match[1].trim();
252445
+ return null;
252446
+ }
252447
+ /**
252448
+ * Add the original user prompt to the result output when prompt
252449
+ * expansion produced a different string. This gives downstream
252450
+ * consumers (Telegram reply context, TUI display, memory) access to
252451
+ * both the user's intent and the model-tuned prompt actually rendered.
252452
+ */
252453
+ annotateResultWithSourcePrompt(result, originalPrompt, expandedPrompt) {
252454
+ if (originalPrompt.trim() === expandedPrompt.trim())
252455
+ return result;
252456
+ const annotation = ` Original prompt: "${this.truncatePromptForOutput(originalPrompt)}"`;
252457
+ const llmAnnotation = `Original user prompt: ${originalPrompt}`;
252458
+ const output = result.output ? `${result.output}
252459
+ ${annotation}` : annotation;
252460
+ const llmContent = typeof result.llmContent === "string" && result.llmContent ? `${result.llmContent}
252461
+ ${llmAnnotation}` : result.llmContent;
252462
+ return { ...result, output, llmContent };
252463
+ }
252464
+ truncatePromptForOutput(prompt) {
252465
+ return prompt.length > 200 ? prompt.slice(0, 197) + "..." : prompt;
252466
+ }
252467
+ async expandPromptForCandidate(originalPrompt, candidate, candidateIndex, candidateCount) {
252468
+ if (!this.promptExpander)
252469
+ return originalPrompt;
252470
+ try {
252471
+ this.emitProgress({
252472
+ stage: "setup",
252473
+ message: `Expanding prompt for ${candidate.model}`
252474
+ });
252475
+ const expanded = await this.promptExpander({
252476
+ model: candidate.model,
252477
+ backend: candidate.backend,
252478
+ originalPrompt,
252479
+ candidateIndex,
252480
+ candidateCount
252481
+ });
252482
+ const trimmed = typeof expanded === "string" ? expanded.trim() : "";
252483
+ if (!trimmed)
252484
+ return originalPrompt;
252485
+ this.emitProgress({
252486
+ stage: "setup",
252487
+ message: `Expanded prompt (${trimmed.length} chars) for ${candidate.model}`
252488
+ });
252489
+ return trimmed;
252490
+ } catch {
252491
+ return originalPrompt;
252492
+ }
252493
+ }
252353
252494
  async prewarmOllama(args) {
252354
252495
  const model = args.model || DEFAULT_OLLAMA_IMAGE_MODEL;
252355
252496
  if (await this.ollamaHasModel(model)) {
@@ -253248,7 +253389,7 @@ async function ensureAudioRunner(repoRoot, backend) {
253248
253389
  function audioOutputPath(repoRoot) {
253249
253390
  return join37(audioOutputDir(repoRoot), `audio-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.wav`);
253250
253391
  }
253251
- function numberArg2(value2, fallback) {
253392
+ function numberArg(value2, fallback) {
253252
253393
  if (typeof value2 === "number" && Number.isFinite(value2))
253253
253394
  return value2;
253254
253395
  if (typeof value2 === "string" && value2.trim()) {
@@ -254457,7 +254598,7 @@ if __name__ == "__main__":
254457
254598
  const failed = [];
254458
254599
  for (let index = 0; index < args.candidates.length; index++) {
254459
254600
  const candidate = args.candidates[index];
254460
- const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
254601
+ const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
254461
254602
  this.emitProgress({
254462
254603
  stage: "setup",
254463
254604
  message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
@@ -254492,7 +254633,7 @@ if __name__ == "__main__":
254492
254633
  const failed = [];
254493
254634
  for (let index = 0; index < args.candidates.length; index++) {
254494
254635
  const candidate = args.candidates[index];
254495
- const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
254636
+ const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
254496
254637
  const steps = optionalNumberArg2(args.args["steps"]) ?? candidate.preset?.defaultSteps;
254497
254638
  this.emitProgress({
254498
254639
  stage: "setup",
@@ -509592,7 +509733,7 @@ function boolArg(value2, fallback) {
509592
509733
  }
509593
509734
  return fallback;
509594
509735
  }
509595
- function numberArg3(value2, fallback) {
509736
+ function numberArg2(value2, fallback) {
509596
509737
  if (typeof value2 === "number" && Number.isFinite(value2))
509597
509738
  return value2;
509598
509739
  if (typeof value2 === "string" && value2.trim()) {
@@ -510317,7 +510458,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
510317
510458
  const cloneRef = cloneRefForSynthesis(args);
510318
510459
  if (!cloneRef)
510319
510460
  throw new Error(`No LuxTTS clone source found. Provide source_audio=<voice clip> or clone_ref=<registered clip>.`);
510320
- const speed = numberArg3(args["speed"], 1);
510461
+ const speed = numberArg2(args["speed"], 1);
510321
510462
  ensureLuxttsInstalled();
510322
510463
  const daemonReady = await ensureLuxttsDaemon();
510323
510464
  if (daemonReady) {
@@ -510351,8 +510492,8 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
510351
510492
  const venvPy = ensureSupertonicInstalled();
510352
510493
  const voice = typeof args["voice"] === "string" ? args["voice"] : "M4";
510353
510494
  const lang = typeof args["lang"] === "string" ? args["lang"] : "en";
510354
- const speed = numberArg3(args["speed"], 1.05);
510355
- const totalStep = Math.round(numberArg3(args["total_step"], 8));
510495
+ const speed = numberArg2(args["speed"], 1.05);
510496
+ const totalStep = Math.round(numberArg2(args["total_step"], 8));
510356
510497
  const stdout = execFileSync4(venvPy, [supertonicInferScript()], {
510357
510498
  input: JSON.stringify({ text, output_path: outputPath2, voice_name: voice, lang, speed, total_step: totalStep }),
510358
510499
  encoding: "utf8",
@@ -510404,7 +510545,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
510404
510545
  if (!hasCommand3("espeak-ng"))
510405
510546
  throw new Error("Local fallback TTS command not found.");
510406
510547
  const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
510407
- const speed = Math.round(numberArg3(args["speed"], 160));
510548
+ const speed = Math.round(numberArg2(args["speed"], 160));
510408
510549
  execFileSync4("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
510409
510550
  stdio: "pipe",
510410
510551
  timeout: 6e4
@@ -537096,6 +537237,62 @@ ${blob}
537096
537237
  /** Register a tool for the agent to use */
537097
537238
  registerTool(tool) {
537098
537239
  this.tools.set(tool.name, tool);
537240
+ if (tool.name === "generate_image") {
537241
+ this.maybeInstallImagePromptExpander(tool);
537242
+ }
537243
+ }
537244
+ /**
537245
+ * Install an LLM-backed prompt expander on the registered generate_image
537246
+ * tool. The expander runs a preliminary stage that rewrites the user's
537247
+ * raw prompt into a richer, model-tuned version before the diffusion
537248
+ * call. The instruction is intentionally generic — the LLM uses its own
537249
+ * knowledge of the target model's prompt conventions to shape syntax,
537250
+ * length, and detail style, rather than relying on hard-coded templates.
537251
+ */
537252
+ maybeInstallImagePromptExpander(tool) {
537253
+ const setExpander = tool.setPromptExpander;
537254
+ if (typeof setExpander !== "function")
537255
+ return;
537256
+ if (process.env["OMNIUS_IMAGE_PROMPT_EXPAND"] === "0") {
537257
+ setExpander.call(tool, null);
537258
+ return;
537259
+ }
537260
+ const backend = this.backend;
537261
+ if (!backend || typeof backend.chatCompletion !== "function")
537262
+ return;
537263
+ setExpander.call(tool, async (ctx3) => {
537264
+ const userPrompt = (ctx3.originalPrompt ?? "").trim();
537265
+ if (!userPrompt)
537266
+ return null;
537267
+ const system = "You are a prompt-engineering stage that rewrites a user's short image request into a richer, more detailed image-generation prompt. The expanded prompt should match the prompt conventions and tokenizer preferences of the specific image model named in the request — use your own knowledge of that model's training distribution to choose length, syntax, weighting markers, ordering, and descriptor density. Do not invent new subject matter or contradict the user's intent. Output only the expanded prompt: no preamble, no quotes, no labels.";
537268
+ const user = `Target image model: ${ctx3.model}
537269
+ Backend: ${ctx3.backend}
537270
+ Candidate position: ${ctx3.candidateIndex + 1} of ${ctx3.candidateCount} (fallback ladder)
537271
+
537272
+ User prompt to expand:
537273
+ ${userPrompt}
537274
+
537275
+ Rewrite it now for ${ctx3.model}.`;
537276
+ try {
537277
+ const response = await backend.chatCompletion({
537278
+ messages: [
537279
+ { role: "system", content: system },
537280
+ { role: "user", content: user }
537281
+ ],
537282
+ tools: [],
537283
+ temperature: 0.4,
537284
+ maxTokens: 600,
537285
+ timeoutMs: 3e4
537286
+ });
537287
+ const text = response?.choices?.[0]?.message?.content;
537288
+ if (typeof text !== "string")
537289
+ return null;
537290
+ const cleaned = text.replace(/^["'`]+|["'`]+$/g, "").replace(/^(?:expanded prompt|prompt|output)\s*:\s*/i, "").trim();
537291
+ return cleaned.length > 0 ? cleaned : null;
537292
+ } catch {
537293
+ return null;
537294
+ }
537295
+ });
537099
537296
  }
537100
537297
  /** Register multiple tools */
537101
537298
  registerTools(tools) {
@@ -600951,6 +601148,7 @@ var init_tool_policy = __esm({
600951
601148
  "memory_read",
600952
601149
  "memory_write",
600953
601150
  "memory_search",
601151
+ "identity_memory",
600954
601152
  "todo_read",
600955
601153
  "todo_write",
600956
601154
  "web_search",
@@ -600983,6 +601181,7 @@ var init_tool_policy = __esm({
600983
601181
  "memory_read",
600984
601182
  "memory_write",
600985
601183
  "memory_search",
601184
+ "identity_memory",
600986
601185
  "todo_read",
600987
601186
  "todo_write",
600988
601187
  "web_search",
@@ -601106,7 +601305,7 @@ function buildTelegramCreativeTools(repoRoot, chatId, backendUrl2, imageDefaults
601106
601305
  }
601107
601306
  function scopedTool(base3, root, mode) {
601108
601307
  const rootAbs = resolve41(root);
601109
- return {
601308
+ const wrapper = {
601110
601309
  name: base3.name,
601111
601310
  description: `[PUBLIC TELEGRAM CREATIVE WORKSPACE: ${rootAbs}] ${base3.description} Paths are restricted to this workspace. This tool cannot access or modify files outside the workspace. ` + (mode === "edit" ? "It can only edit files already created in this workspace manifest. " : ""),
601112
601311
  parameters: base3.parameters,
@@ -601215,6 +601414,11 @@ function scopedTool(base3, root, mode) {
601215
601414
  return withTelegramAutoAttachmentNotice(result, recordedPaths.size);
601216
601415
  }
601217
601416
  };
601417
+ const baseSetExpander = base3.setPromptExpander;
601418
+ if (typeof baseSetExpander === "function") {
601419
+ wrapper.setPromptExpander = (expander) => baseSetExpander.call(base3, expander);
601420
+ }
601421
+ return wrapper;
601218
601422
  }
601219
601423
  function withTelegramAutoAttachmentNotice(result, artifactCount) {
601220
601424
  if (!result.success || artifactCount <= 0) return result;
@@ -603559,6 +603763,20 @@ function summarizeTelegramMessageAttachments(msg) {
603559
603763
  parts.push(`replied-to caption: ${truncateTelegramContextLine(msg.replyToMedia.caption, 180)}`);
603560
603764
  }
603561
603765
  }
603766
+ const gen = msg.replyContext?.generatedMediaPromptInfo;
603767
+ if (gen?.originalPrompt) {
603768
+ parts.push(`replied-to image source prompt: "${truncateTelegramContextLine(gen.originalPrompt, 400)}"`);
603769
+ if (gen.promptWasExpanded && gen.expandedPrompt && gen.expandedPrompt !== gen.originalPrompt) {
603770
+ parts.push(`replied-to image expanded prompt: "${truncateTelegramContextLine(gen.expandedPrompt, 400)}"`);
603771
+ }
603772
+ const meta = [
603773
+ gen.model ? `model=${gen.model}` : "",
603774
+ gen.backend ? `backend=${gen.backend}` : "",
603775
+ gen.width && gen.height ? `size=${gen.width}x${gen.height}` : "",
603776
+ gen.aspectRatio ? `aspect=${gen.aspectRatio}` : ""
603777
+ ].filter(Boolean).join(", ");
603778
+ if (meta) parts.push(`replied-to image generation: ${meta}`);
603779
+ }
603562
603780
  if (msg.poll) {
603563
603781
  parts.push(`poll: ${truncateTelegramContextLine(msg.poll.question, 180)}`);
603564
603782
  }
@@ -603567,6 +603785,32 @@ function summarizeTelegramMessageAttachments(msg) {
603567
603785
  }
603568
603786
  return parts.join("; ");
603569
603787
  }
603788
+ function formatTelegramGeneratedImagePromptInfo(info, maxPromptLength = 900) {
603789
+ if (!info?.originalPrompt) return "";
603790
+ const lines = [
603791
+ `Generated image original prompt:
603792
+ ${quoteTelegramContextText(info.originalPrompt, maxPromptLength)}`
603793
+ ];
603794
+ if (info.promptWasExpanded && info.expandedPrompt && info.expandedPrompt.trim() !== info.originalPrompt.trim()) {
603795
+ lines.push(`Generated image expanded prompt actually sent to image model:
603796
+ ${quoteTelegramContextText(info.expandedPrompt, maxPromptLength)}`);
603797
+ }
603798
+ const meta = [
603799
+ info.model ? `model=${info.model}` : "",
603800
+ info.backend ? `backend=${info.backend}` : "",
603801
+ info.width && info.height ? `size=${info.width}x${info.height}` : "",
603802
+ info.aspectRatio ? `aspect=${info.aspectRatio}` : "",
603803
+ info.seed !== void 0 && info.seed !== null ? `seed=${info.seed}` : "",
603804
+ info.createdAt ? `created_at=${info.createdAt}` : ""
603805
+ ].filter(Boolean).join(", ");
603806
+ if (meta) lines.push(`Generated image metadata: ${meta}`);
603807
+ return lines.join("\n");
603808
+ }
603809
+ function quoteTelegramContextText(text, maxLength) {
603810
+ const clipped = text.length > maxLength ? `${text.slice(0, Math.max(0, maxLength - 60)).trimEnd()}
603811
+ [generated prompt truncated]` : text;
603812
+ return clipped.split(/\r?\n/).map((line) => `> ${line}`).join("\n");
603813
+ }
603570
603814
  function inferTelegramToneTags(text) {
603571
603815
  const lower = text.toLowerCase();
603572
603816
  const tags = /* @__PURE__ */ new Set();
@@ -604425,7 +604669,7 @@ function renderTelegramSubAgentError(username, error) {
604425
604669
  process.stdout.write(` ${c3.dim("⎿")} ${c3.red("✘")} @${username}: ${c3.dim(preview)}
604426
604670
  `);
604427
604671
  }
604428
- var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TelegramBridge;
604672
+ var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TelegramBridge;
604429
604673
  var init_telegram_bridge = __esm({
604430
604674
  "packages/cli/src/tui/telegram-bridge.ts"() {
604431
604675
  "use strict";
@@ -604511,7 +604755,7 @@ var init_telegram_bridge = __esm({
604511
604755
  ]);
604512
604756
  DEFAULT_TELEGRAM_TOOL_GROUP_POLICY = {
604513
604757
  read: true,
604514
- message: false,
604758
+ message: true,
604515
604759
  media: true,
604516
604760
  janitorial: false,
604517
604761
  reaction: false,
@@ -604571,7 +604815,7 @@ Although this is an admin, the group is PUBLIC — other people can see your res
604571
604815
 
604572
604816
  RULES FOR GROUP CONTEXT:
604573
604817
  1. NEVER share private information, API keys, file paths, or system internals
604574
- 2. You have limited tools: web search, memory, and media analysis only
604818
+ 2. You have limited tools: scoped web search/fetch, scoped memory, scoped identity memory, and scoped media analysis only
604575
604819
  3. Keep responses helpful and relevant to the conversation
604576
604820
  4. Be concise — group chats should have shorter responses
604577
604821
  5. Only respond if the message is directed at you or clearly relevant
@@ -604607,6 +604851,18 @@ PUBLIC TELEGRAM MEMORY SCOPE
604607
604851
  This turn may use memory and conversation history for the current Telegram group/private chat scope only.
604608
604852
  Users in a shared public group may ask questions about that shared group history and group memory, scoped by the current group id or by a user id/username inside that same group.
604609
604853
  Private chats, admin DMs, other groups, local terminal sessions, and fragmented private contexts are not visible from this public group. Do not imply they exist and do not answer from them.
604854
+ `.trim();
604855
+ TELEGRAM_PUBLIC_VISION_STACK_CONTRACT = `
604856
+ PUBLIC TELEGRAM VISION / MEDIA STACK
604857
+
604858
+ Public Telegram runs have the full scoped media-analysis stack for media posted in this chat:
604859
+ - Use telegram_media_recent to find recent scoped media, then use path/media aliases 'reply' and 'latest' instead of exposing local paths to users.
604860
+ - Use ocr_image_advanced for complex textual imagery: screenshots, dense documents, forms, receipts, scans, diagrams with labels, low-contrast photos, or uneven lighting.
604861
+ - Use ocr for quick image text extraction, image_read for image metadata + OCR + multimodal image payload, and vision for captioning, visual QA, object detection, or pointing.
604862
+ - Use pdf_to_text for embedded-text PDFs and ocr_pdf for scanned PDFs.
604863
+ - Use video_understand and transcribe_file for video/audio media posted in this chat.
604864
+ - Use identity_memory for explicit user-provided identity assertions, staged next-image names, and "who is this?" recall from scoped media. Do not guess real identities from images.
604865
+ - These tools are current-chat scoped. Never inspect arbitrary local files, reveal local paths, or claim access to media outside this Telegram chat scope.
604610
604866
  `.trim();
604611
604867
  GROUP_REPLY_DISCRETION_PROMPT = `
604612
604868
  REPLY DISCRETION: You are in a group chat. The live router has already filtered
@@ -605153,7 +605409,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
605153
605409
  threadId: entry.messageThreadId,
605154
605410
  sender: this.telegramReplySenderWithSelfFlag(sender),
605155
605411
  text: entry.text,
605156
- mediaSummary: entry.mediaSummary
605412
+ mediaSummary: entry.mediaSummary,
605413
+ generatedMediaPromptInfo: entry.generatedMediaPromptInfo
605157
605414
  };
605158
605415
  }
605159
605416
  resolveTelegramReplyContext(sessionKey, msg) {
@@ -605174,7 +605431,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
605174
605431
  ...msg.replyContext.sender ?? {}
605175
605432
  }),
605176
605433
  text: msg.replyContext.text ?? localContext.text,
605177
- mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary
605434
+ mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary,
605435
+ generatedMediaPromptInfo: msg.replyContext.generatedMediaPromptInfo ?? localContext.generatedMediaPromptInfo
605178
605436
  };
605179
605437
  } else if (msg.replyContext) {
605180
605438
  reply = {
@@ -605228,6 +605486,8 @@ ${this.quoteTelegramContextBlock(reply.quote, 1e3)}` : "",
605228
605486
  ${this.quoteTelegramContextBlock(content, 2200)}` : "",
605229
605487
  reply.mediaSummary ? `Replied-to media: ${reply.mediaSummary}` : "",
605230
605488
  reply.media && !reply.mediaSummary ? `Replied-to media: ${reply.media.type}${reply.media.fileName ? ` ${reply.media.fileName}` : ""}${reply.media.mimeType ? ` ${reply.media.mimeType}` : ""}` : "",
605489
+ reply.generatedMediaPromptInfo ? `Replied-to generated image provenance:
605490
+ ${formatTelegramGeneratedImagePromptInfo(reply.generatedMediaPromptInfo, 1400)}` : "",
605231
605491
  msg.text ? `Current user message:
605232
605492
  ${this.quoteTelegramContextBlock(msg.text, 1e3)}` : "",
605233
605493
  'Instruction: resolve pronouns, follow-up requests, and requests like "links", "repos", "instructions", "that", or "this" against the replied-to content before broader chat/workspace context.'
@@ -606179,8 +606439,9 @@ ${olderLines.join("\n")}`);
606179
606439
  const replySender = entry.replyContext?.sender ? `/${telegramReplySenderLabel(entry.replyContext.sender)}` : "";
606180
606440
  const reply = entry.replyToMessageId ? ` reply_to:${entry.replyToMessageId}${replySender}` : "";
606181
606441
  const media = entry.mediaSummary ? ` [${entry.mediaSummary}]` : "";
606442
+ const generatedPrompt = entry.generatedMediaPromptInfo?.originalPrompt ? ` generated_image_prompt="${truncateTelegramContextLine(entry.generatedMediaPromptInfo.originalPrompt, 220)}"` : "";
606182
606443
  const prefix = [when, `${speaker}${mode}${reply}${media}`].filter(Boolean).join(" ");
606183
- return `${prefix}: ${truncateTelegramContextLine(entry.text)}`;
606444
+ return `${prefix}: ${truncateTelegramContextLine(entry.text)}${generatedPrompt}`;
606184
606445
  });
606185
606446
  sections.push(`### Recent Thread, Oldest To Newest
606186
606447
  ${lines.join("\n")}`);
@@ -606290,7 +606551,7 @@ ${lines.join("\n")}`);
606290
606551
  `Route meanings:`,
606291
606552
  `- chat: a short conversational answer can be produced without tools.`,
606292
606553
  `- action: tools, workspace context, media processing, web lookup, delegation, or a multi-step agent loop may be needed.`,
606293
- `Route discipline: greetings, acknowledgements, casual tone/style discussion, and simple conversational questions are chat. Use action only when the message asks you to inspect, create, change, send, remember, search, analyze media, name/enroll/identify a person/face/voice from media, or otherwise do tool-backed work.`,
606554
+ `Route discipline: greetings, acknowledgements, casual tone/style discussion, and simple conversational questions are chat. Use action only when the message asks you to inspect, create, change, send, remember, search, analyze media, extract text from images/screenshots/forms/scans, name/enroll/identify a person/face/voice from media, or otherwise do tool-backed work.`,
606294
606555
  ``,
606295
606556
  `Reply discretion: infer from the live thread, speaker relationships, direct platform signals, replies, tone, current message, and any private channel daydream artifact supplied in context. Do not use static keyword rules.`,
606296
606557
  `Private chats: should_reply is normally true.`,
@@ -606568,6 +606829,8 @@ ${TELEGRAM_PUBLIC_SOUL_PROFILE}
606568
606829
 
606569
606830
  ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
606570
606831
 
606832
+ ${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
606833
+
606571
606834
  ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
606572
606835
  } else {
606573
606836
  sections.push(`## Telegram Safety Contract
@@ -606578,6 +606841,8 @@ ${TELEGRAM_PUBLIC_SOUL_PROFILE}
606578
606841
 
606579
606842
  ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
606580
606843
 
606844
+ ${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
606845
+
606581
606846
  ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
606582
606847
  }
606583
606848
  return { sessionKey, sessionId, context: sections.join("\n\n") };
@@ -607368,11 +607633,15 @@ Join: ${newUrl}`);
607368
607633
 
607369
607634
  ${TELEGRAM_PUBLIC_SOUL_PROFILE}
607370
607635
 
607371
- ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}` : `${TELEGRAM_SAFETY_PROMPT}
607636
+ ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
607637
+
607638
+ ${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}` : `${TELEGRAM_SAFETY_PROMPT}
607372
607639
 
607373
607640
  ${TELEGRAM_PUBLIC_SOUL_PROFILE}
607374
607641
 
607375
- ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}`;
607642
+ ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
607643
+
607644
+ ${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}`;
607376
607645
  const groupHint = isGroup ? `Telegram group: ${msg.chatTitle || "unknown"}. The live router selected this turn as reply-worthy; keep the reply short and relevant. Never output a skip decision, no_reply marker, memory-stage note, or completion status.` : "Telegram private chat.";
607377
607646
  const runtime = buildTelegramRuntimeContext(/* @__PURE__ */ new Date());
607378
607647
  const messages2 = [
@@ -607636,6 +607905,7 @@ ${currentTelegramPrompt}`;
607636
607905
  "You have access to isolated per-chat memory (memory_write, memory_read, memory_search) scoped to this conversation.",
607637
607906
  "memory_search may use scope=group/current_chat for this group or scope=user with user_id/username for a participant in this same group. Other groups, admin chats, and private DMs are not accessible here.",
607638
607907
  "You can remember facts about users and retrieve them later. You also have web_search and web_fetch to look up information.",
607908
+ "You have the full scoped Telegram media-analysis stack by default: telegram_media_recent, image_read, ocr, ocr_image_advanced, vision, pdf_to_text, ocr_pdf, transcribe_file, video_understand, audio_analyze, and identity_memory. For complex textual imagery, screenshots, forms, scans, or dense labels, prefer ocr_image_advanced after resolving media with path='reply' or path='latest'.",
607639
607909
  formatIdentityMemoryContext(chatLabel || "Telegram private chat"),
607640
607910
  reminderToolContract,
607641
607911
  "If the user asks you to create an image, audio file, or document artifact, create it with the scoped creative tools. Freshly generated artifacts are recorded and automatically attached to this Telegram chat when the turn completes, so do not call telegram_send_file for those same artifacts unless the user asked for a specific caption, existing/unrecorded file, or non-default target.",
@@ -607970,6 +608240,8 @@ ${lines.join("\n\n")}` };
607970
608240
 
607971
608241
  ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
607972
608242
 
608243
+ ${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
608244
+
607973
608245
  ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}
607974
608246
 
607975
608247
  ${conversation}`
@@ -609053,7 +609325,7 @@ Scoped workspace: ${scopedRoot}`,
609053
609325
  const bridge = this;
609054
609326
  return {
609055
609327
  name: "telegram_media_recent",
609056
- description: "List recent media files available in this Telegram chat scope, including safe aliases for image_read, ocr, vision, transcribe_file, pdf_to_text, video_understand, and audio_analyze.",
609328
+ description: "List recent media files available in this Telegram chat scope, including safe aliases for image_read, ocr, ocr_image_advanced, vision, identity_memory, transcribe_file, pdf_to_text, video_understand, and audio_analyze.",
609057
609329
  parameters: {
609058
609330
  type: "object",
609059
609331
  properties: {
@@ -609208,7 +609480,8 @@ Scoped workspace: ${scopedRoot}`,
609208
609480
  const messageId = await bridge.sendTelegramFileToChat(target.chatId, file.path, {
609209
609481
  kind,
609210
609482
  caption: caption || void 0,
609211
- replyToMessageId
609483
+ replyToMessageId,
609484
+ sourcePromptPath: ledgerPath
609212
609485
  });
609213
609486
  bridge.rememberTelegramFileSendForMessage(currentMsg, sendFingerprint);
609214
609487
  bridge.rememberTelegramDeliveredArtifactForMessage(currentMsg, ledgerPath);
@@ -609440,7 +609713,7 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
609440
609713
  description = `[${sourceLabel}image received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
609441
609714
  ${visionContext}]`;
609442
609715
  } else {
609443
- description = `[${sourceLabel}image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read, ocr, or vision tools to analyze it.]`;
609716
+ description = `[${sourceLabel}image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read, ocr, ocr_image_advanced, vision, or identity_memory tools to analyze it.]`;
609444
609717
  }
609445
609718
  const ingestPayload = this.telegramMemoryIngestPayload(msg, media, localPath, source, cacheEntry.extractedContent);
609446
609719
  let visualIdentityContext = "";
@@ -609706,10 +609979,69 @@ Content-Type: ${contentType}\r
609706
609979
  const result = await res.json();
609707
609980
  if (result.ok) {
609708
609981
  this.state.messagesSent++;
609709
- return result.result?.message_id ?? null;
609982
+ const outboundMessageId = result.result?.message_id ?? null;
609983
+ if (outboundMessageId && media.kind === "image" && media.source === "file") {
609984
+ this.recordOutboundGeneratedImagePrompt(chatId, outboundMessageId, options2.sourcePromptPath ?? media.value, caption);
609985
+ }
609986
+ return outboundMessageId;
609710
609987
  }
609711
609988
  throw new Error(String(result.description || `Telegram ${method} failed`));
609712
609989
  }
609990
+ /**
609991
+ * After the bot sends an outbound photo, look for a `<image>.json`
609992
+ * sidecar emitted by ImageGenerateTool and stash the source prompt info
609993
+ * on a chatHistory entry keyed by the outbound message_id. When the user
609994
+ * later replies to that image, resolveTelegramReplyContext finds the
609995
+ * entry and exposes the original prompt to the model.
609996
+ */
609997
+ recordOutboundGeneratedImagePrompt(chatId, messageId, imagePath, caption) {
609998
+ const info = this.readGeneratedImagePromptInfo(imagePath);
609999
+ if (!info) return;
610000
+ const sessionKey = `chat:${String(chatId)}`;
610001
+ const captionText = (caption ?? "").trim();
610002
+ const summary = `photo (generated, model=${info.model ?? "?"}, ${info.width ?? "?"}x${info.height ?? "?"})`;
610003
+ const entry = {
610004
+ role: "assistant",
610005
+ text: captionText,
610006
+ mode: "action",
610007
+ chatId,
610008
+ speaker: this.state.botUsername ? `@${this.state.botUsername}` : "Assistant",
610009
+ messageId,
610010
+ mediaSummary: summary,
610011
+ generatedMediaPromptInfo: info
610012
+ };
610013
+ try {
610014
+ this.recordChatHistory(sessionKey, entry);
610015
+ this.saveTelegramConversationState(sessionKey);
610016
+ } catch {
610017
+ }
610018
+ }
610019
+ readGeneratedImagePromptInfo(imagePath) {
610020
+ const sidecarPath2 = `${imagePath}.json`;
610021
+ if (!existsSync108(sidecarPath2)) return null;
610022
+ try {
610023
+ const raw = readFileSync88(sidecarPath2, "utf8");
610024
+ const parsed = JSON.parse(raw);
610025
+ if (!parsed || typeof parsed !== "object" || typeof parsed["original_prompt"] !== "string") {
610026
+ return null;
610027
+ }
610028
+ return {
610029
+ imagePath,
610030
+ originalPrompt: String(parsed["original_prompt"]),
610031
+ expandedPrompt: typeof parsed["expanded_prompt"] === "string" ? String(parsed["expanded_prompt"]) : void 0,
610032
+ promptWasExpanded: parsed["prompt_was_expanded"] === true,
610033
+ model: typeof parsed["model"] === "string" ? String(parsed["model"]) : void 0,
610034
+ backend: typeof parsed["backend"] === "string" ? String(parsed["backend"]) : void 0,
610035
+ width: typeof parsed["width"] === "number" ? parsed["width"] : void 0,
610036
+ height: typeof parsed["height"] === "number" ? parsed["height"] : void 0,
610037
+ aspectRatio: typeof parsed["aspect_ratio"] === "string" || parsed["aspect_ratio"] === null ? parsed["aspect_ratio"] : void 0,
610038
+ seed: typeof parsed["seed"] === "number" ? parsed["seed"] : null,
610039
+ createdAt: typeof parsed["created_at"] === "string" ? String(parsed["created_at"]) : void 0
610040
+ };
610041
+ } catch {
610042
+ return null;
610043
+ }
610044
+ }
609713
610045
  async sendGeneratedArtifactsFromSubAgent(msg, subAgent, finalText, includeMentioned) {
609714
610046
  const root = subAgent.creativeWorkspaceRoot;
609715
610047
  if (!root) return;
@@ -609741,6 +610073,8 @@ Content-Type: ${contentType}\r
609741
610073
  kind,
609742
610074
  source: "file",
609743
610075
  audioAsVoice: kind === "voice"
610076
+ }, {
610077
+ sourcePromptPath: abs
609744
610078
  }).then((messageId) => {
609745
610079
  if (messageId !== null) {
609746
610080
  this.rememberTelegramDeliveredArtifact(subAgent, abs);
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.45",
3
+ "version": "1.0.47",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.45",
9
+ "version": "1.0.47",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.45",
3
+ "version": "1.0.47",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",