npm - omnius - Versions diffs - 1.0.45 → 1.0.47 - Mend

omnius 1.0.45 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -3606,7 +3606,7 @@ While the sub-agent is working, users see:
 ### Public User Isolation
-Public users get **per-chat isolated memory** — each chat is stored with explicit multimodal scope (`scope.kind = "group"|"private"`, `scope.id = chatId`) so public users can store and retrieve facts about their conversation without accessing or polluting unrelated chat memory. Public tools include: `memory_read`, `memory_write` (scoped), `memory_search`, `identity_memory` (scoped explicit identity evidence), `web_search`, `web_fetch`, and scoped minimal reminders via `reminder`/`remind`.
+Public users get **per-chat isolated memory** — each chat is stored with explicit multimodal scope (`scope.kind = "group"|"private"`, `scope.id = chatId`) so public users can store and retrieve facts about their conversation without accessing or polluting unrelated chat memory. Public tools include: `memory_read`, `memory_write` (scoped), `memory_search`, `identity_memory` (scoped explicit identity evidence), `web_search`, `web_fetch`, scoped advanced media analysis (`telegram_media_recent`, `image_read`, `ocr`, `ocr_image_advanced`, `vision`, `pdf_to_text`, `ocr_pdf`, `transcribe_file`, `video_understand`, `audio_analyze`), and scoped minimal reminders via `reminder`/`remind`.
 The bridge also maintains a per-chat conversation state file with recent history, participants, relationship signals, and lightweight Zettelkasten memory cards. Each Telegram group or private chat gets its own scoped personality document under `.omnius/scoped-personality/telegram-chat/`; that profile is updated as people talk and injected into future Telegram context so tone, pacing, names, and relationships stay available turn to turn.
@@ -3627,8 +3627,8 @@ Tools are gated per execution context. The system enforces strict separation bet
 |---------|--------------|-------|
 | `terminal` | All tools | Wide open — shell, file read/write, everything |
 | `telegram-admin-dm` | All except shell + scoped `telegram` tool | Admin DM — full tools, shell blocked by default (overridable); Telegram janitorial/moderation actions still require explicit policy and Bot API rights |
-| `telegram-admin-group` | Read-only + web + vision/OCR + scoped reminders + scoped `telegram` tool | Admin in public group — current-chat only; high-risk Telegram actions require policy enablement |
-| `telegram-public` | Memory r/w, web fetch/search, scoped creative tools, scoped minimal reminders + read/media `telegram` actions | Public users — no arbitrary local file access, shell, moderation, bot-admin, or janitorial actions |
+| `telegram-admin-group` | Scoped memory + web + advanced vision/OCR/media tools + scoped reminders + scoped `telegram` tool | Admin in public group — current-chat only; high-risk Telegram actions require policy enablement |
+| `telegram-public` | Scoped memory + web fetch/search + advanced current-chat vision/OCR/media tools + scoped creative tools + scoped minimal reminders + read/media `telegram` actions | Public users — no arbitrary local file access, shell, moderation, bot-admin, or janitorial actions |
 | `api` | All tools | API endpoint — configurable |
 **System tools** (`shell`, `file_write`, `file_edit`, `file_read`, `file_patch`, `batch_edit`, `grep_search`, `glob_find`, `list_directory`, `code_sandbox`, `codebase_map`, `git_info`, etc.) are **never exposed** in public-facing contexts.

package/dist/index.js CHANGED Viewed

@@ -251178,10 +251178,6 @@ function parseStructuredProgress(text) {
     return null;
   }
 }
-function numberArg(value2, fallback) {
-  const n2 = Number(value2);
-  return Number.isFinite(n2) && n2 > 0 ? n2 : fallback;
-}
 function optionalNumberArg(value2) {
   if (value2 === void 0 || value2 === null || value2 === "")
     return void 0;
@@ -251199,6 +251195,26 @@ function booleanArg(value2, fallback) {
   }
   return fallback;
 }
+function roundToMultipleOf8(value2) {
+  if (!Number.isFinite(value2) || value2 <= 0)
+    return 1024;
+  const rounded = Math.round(value2 / 8) * 8;
+  return Math.max(64, rounded);
+}
+function resolveAspectRatioToSize(ratio, presetWidth, presetHeight) {
+  const match = ratio.match(/^\s*(\d+(?:\.\d+)?)\s*[:xX/×]\s*(\d+(?:\.\d+)?)\s*$/);
+  if (!match)
+    return null;
+  const w = Number(match[1]);
+  const h = Number(match[2]);
+  if (!Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0)
+    return null;
+  const longSide = Math.max(presetWidth, presetHeight);
+  if (w >= h) {
+    return { width: longSide, height: Math.round(longSide * h / w) };
+  }
+  return { width: Math.round(longSide * w / h), height: longSide };
+}
 function generationFallbackEnabled(args) {
   if (booleanArg(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
     return false;
@@ -252151,7 +252167,7 @@ if __name__ == "__main__":
 `;
     ImageGenerateTool = class {
       name = "generate_image";
-      description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Saves a PNG under .omnius/images and returns the file path.";
+      description = `Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Aspect ratio and resolution are model-controllable: pass aspect_ratio (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2") to derive width/height around the selected model's preferred base resolution, or pass explicit width/height (in pixels, both rounded to a multiple of 8) when a specific size is required. A preliminary prompt-expansion stage rewrites the user's prompt into a richer, model-tuned version before generation when an LLM expander is wired; pass expand_prompt=false to skip. Saves a PNG under .omnius/images and returns the file path.`;
       parameters = {
         type: "object",
         properties: {
@@ -252168,13 +252184,17 @@ if __name__ == "__main__":
             enum: ["auto", "ollama", "diffusers", "sdcpp"],
             description: "Generation backend. Defaults to auto."
           },
+          aspect_ratio: {
+            type: "string",
+            description: `Desired aspect ratio expressed as W:H (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2"). When provided, width/height are derived from the selected model's preferred base resolution so the longer side stays in that model's sweet spot. Ignored if explicit width and height are also provided.`
+          },
           width: {
             type: "number",
-            description: "Image width in pixels"
+            description: "Image width in pixels. Optional — defaults to the selected model's preset width, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
           },
           height: {
             type: "number",
-            description: "Image height in pixels"
+            description: "Image height in pixels. Optional — defaults to the selected model's preset height, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
           },
           steps: {
             type: "number",
@@ -252204,6 +252224,10 @@ if __name__ == "__main__":
           strict_model: {
             type: "boolean",
             description: "When true, use only the requested model/backend and do not fall back. Defaults false."
+          },
+          expand_prompt: {
+            type: "boolean",
+            description: "When true (default), a preliminary LLM stage rewrites the prompt into a richer, model-tuned version before generation. Set false to send the raw prompt unchanged."
           }
         },
         required: ["prompt"]
@@ -252216,15 +252240,23 @@ if __name__ == "__main__":
       lastProgressAt = 0;
       defaultModel;
       defaultBackend;
+      promptExpander = null;
       constructor(cwd4, ollamaUrl = "http://localhost:11434", defaults3 = {}) {
         this.cwd = cwd4;
         this.ollamaUrl = ollamaUrl.replace(/\/v1\/?$/, "").replace(/\/$/, "");
         this.defaultModel = defaults3.model;
         this.defaultBackend = defaults3.backend;
+        this.promptExpander = defaults3.promptExpander ?? null;
       }
       setDefaults(defaults3) {
         this.defaultModel = defaults3.model;
         this.defaultBackend = defaults3.backend;
+        if (defaults3.promptExpander !== void 0) {
+          this.promptExpander = defaults3.promptExpander;
+        }
+      }
+      setPromptExpander(expander) {
+        this.promptExpander = expander;
       }
       setProgressCallback(handler) {
         this.progressHandler = handler;
@@ -252321,19 +252353,39 @@ if __name__ == "__main__":
       }
       async generateCandidateLadder(args) {
         const failed = [];
+        const expansionEnabled = args.args["expand_prompt"] === false ? false : true;
+        const aspectRatio = typeof args.args["aspect_ratio"] === "string" ? String(args.args["aspect_ratio"]).trim() : "";
         for (let index = 0; index < args.candidates.length; index++) {
           const candidate = args.candidates[index];
-          const width = numberArg(args.args["width"], candidate.preset?.width ?? 1024);
-          const height = numberArg(args.args["height"], candidate.preset?.height ?? 1024);
+          const presetW = candidate.preset?.width ?? 1024;
+          const presetH = candidate.preset?.height ?? 1024;
+          const explicitWidth = optionalNumberArg(args.args["width"]);
+          const explicitHeight = optionalNumberArg(args.args["height"]);
+          const derived = (explicitWidth === void 0 || explicitHeight === void 0) && aspectRatio ? resolveAspectRatioToSize(aspectRatio, presetW, presetH) : null;
+          const width = roundToMultipleOf8(explicitWidth ?? derived?.width ?? presetW);
+          const height = roundToMultipleOf8(explicitHeight ?? derived?.height ?? presetH);
           const steps = optionalNumberArg(args.args["steps"]) ?? candidate.preset?.steps;
           const guidance = optionalNumberArg(args.args["guidance"]) ?? candidate.preset?.guidance;
           this.emitProgress({
             stage: "setup",
             message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: args.prompt, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: args.prompt, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: args.prompt, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
-          if (result.success)
-            return annotateImageFallbackSuccess(result, failed, candidate);
+          const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
+          const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
+          if (result.success) {
+            await this.writeImageSidecar(result, {
+              originalPrompt: args.prompt,
+              expandedPrompt: promptForCandidate,
+              model: candidate.model,
+              backend: candidate.backend,
+              width,
+              height,
+              aspectRatio: aspectRatio || null,
+              seed: args.seed
+            }).catch(() => {
+            });
+            return annotateImageFallbackSuccess(this.annotateResultWithSourcePrompt(result, args.prompt, promptForCandidate), failed, candidate);
+          }
           failed.push({ candidate, reason: summarizeToolResult(result) });
           if (index < args.candidates.length - 1) {
             this.emitProgress({
@@ -252350,6 +252402,95 @@ if __name__ == "__main__":
           durationMs: performance.now() - args.start
         };
       }
+      /**
+       * Persist a sidecar JSON next to a generated image capturing the
+       * original (user-typed) prompt, the expanded prompt actually sent to the
+       * model, model identity, resolution, and any aspect-ratio request. The
+       * Telegram bridge reads this when the user replies to a generated image
+       * so the model can answer "what prompt made this?" or modify the prompt
+       * for a follow-up generation without losing the original intent.
+       */
+      async writeImageSidecar(result, meta) {
+        const imagePath = this.extractImagePathFromResult(result);
+        if (!imagePath)
+          return;
+        const sidecarPath2 = `${imagePath}.json`;
+        const payload = {
+          version: 1,
+          kind: "image-generation",
+          image_path: imagePath,
+          original_prompt: meta.originalPrompt,
+          expanded_prompt: meta.expandedPrompt,
+          prompt_was_expanded: meta.originalPrompt.trim() !== meta.expandedPrompt.trim(),
+          model: meta.model,
+          backend: meta.backend,
+          width: meta.width,
+          height: meta.height,
+          aspect_ratio: meta.aspectRatio,
+          seed: meta.seed ?? null,
+          created_at: (/* @__PURE__ */ new Date()).toISOString()
+        };
+        await writeFile17(sidecarPath2, JSON.stringify(payload, null, 2) + "\n", "utf8");
+      }
+      extractImagePathFromResult(result) {
+        const mutated = result.mutatedFiles;
+        if (Array.isArray(mutated) && mutated.length > 0) {
+          const first2 = mutated[0];
+          if (typeof first2 === "string" && first2.trim())
+            return first2;
+        }
+        const match = result.output.match(/Image generated:\s*([^\n]+)/);
+        if (match && match[1])
+          return match[1].trim();
+        return null;
+      }
+      /**
+       * Add the original user prompt to the result output when prompt
+       * expansion produced a different string. This gives downstream
+       * consumers (Telegram reply context, TUI display, memory) access to
+       * both the user's intent and the model-tuned prompt actually rendered.
+       */
+      annotateResultWithSourcePrompt(result, originalPrompt, expandedPrompt) {
+        if (originalPrompt.trim() === expandedPrompt.trim())
+          return result;
+        const annotation = `  Original prompt: "${this.truncatePromptForOutput(originalPrompt)}"`;
+        const llmAnnotation = `Original user prompt: ${originalPrompt}`;
+        const output = result.output ? `${result.output}
+${annotation}` : annotation;
+        const llmContent = typeof result.llmContent === "string" && result.llmContent ? `${result.llmContent}
+${llmAnnotation}` : result.llmContent;
+        return { ...result, output, llmContent };
+      }
+      truncatePromptForOutput(prompt) {
+        return prompt.length > 200 ? prompt.slice(0, 197) + "..." : prompt;
+      }
+      async expandPromptForCandidate(originalPrompt, candidate, candidateIndex, candidateCount) {
+        if (!this.promptExpander)
+          return originalPrompt;
+        try {
+          this.emitProgress({
+            stage: "setup",
+            message: `Expanding prompt for ${candidate.model}`
+          });
+          const expanded = await this.promptExpander({
+            model: candidate.model,
+            backend: candidate.backend,
+            originalPrompt,
+            candidateIndex,
+            candidateCount
+          });
+          const trimmed = typeof expanded === "string" ? expanded.trim() : "";
+          if (!trimmed)
+            return originalPrompt;
+          this.emitProgress({
+            stage: "setup",
+            message: `Expanded prompt (${trimmed.length} chars) for ${candidate.model}`
+          });
+          return trimmed;
+        } catch {
+          return originalPrompt;
+        }
+      }
       async prewarmOllama(args) {
         const model = args.model || DEFAULT_OLLAMA_IMAGE_MODEL;
         if (await this.ollamaHasModel(model)) {
@@ -253248,7 +253389,7 @@ async function ensureAudioRunner(repoRoot, backend) {
 function audioOutputPath(repoRoot) {
   return join37(audioOutputDir(repoRoot), `audio-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.wav`);
 }
-function numberArg2(value2, fallback) {
+function numberArg(value2, fallback) {
   if (typeof value2 === "number" && Number.isFinite(value2))
     return value2;
   if (typeof value2 === "string" && value2.trim()) {
@@ -254457,7 +254598,7 @@ if __name__ == "__main__":
         const failed = [];
         for (let index = 0; index < args.candidates.length; index++) {
           const candidate = args.candidates[index];
-          const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
+          const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
           this.emitProgress({
             stage: "setup",
             message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
@@ -254492,7 +254633,7 @@ if __name__ == "__main__":
         const failed = [];
         for (let index = 0; index < args.candidates.length; index++) {
           const candidate = args.candidates[index];
-          const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
+          const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
           const steps = optionalNumberArg2(args.args["steps"]) ?? candidate.preset?.defaultSteps;
           this.emitProgress({
             stage: "setup",
@@ -509592,7 +509733,7 @@ function boolArg(value2, fallback) {
   }
   return fallback;
 }
-function numberArg3(value2, fallback) {
+function numberArg2(value2, fallback) {
   if (typeof value2 === "number" && Number.isFinite(value2))
     return value2;
   if (typeof value2 === "string" && value2.trim()) {
@@ -510317,7 +510458,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
         const cloneRef = cloneRefForSynthesis(args);
         if (!cloneRef)
           throw new Error(`No LuxTTS clone source found. Provide source_audio=<voice clip> or clone_ref=<registered clip>.`);
-        const speed = numberArg3(args["speed"], 1);
+        const speed = numberArg2(args["speed"], 1);
         ensureLuxttsInstalled();
         const daemonReady = await ensureLuxttsDaemon();
         if (daemonReady) {
@@ -510351,8 +510492,8 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
         const venvPy = ensureSupertonicInstalled();
         const voice = typeof args["voice"] === "string" ? args["voice"] : "M4";
         const lang = typeof args["lang"] === "string" ? args["lang"] : "en";
-        const speed = numberArg3(args["speed"], 1.05);
-        const totalStep = Math.round(numberArg3(args["total_step"], 8));
+        const speed = numberArg2(args["speed"], 1.05);
+        const totalStep = Math.round(numberArg2(args["total_step"], 8));
         const stdout = execFileSync4(venvPy, [supertonicInferScript()], {
           input: JSON.stringify({ text, output_path: outputPath2, voice_name: voice, lang, speed, total_step: totalStep }),
           encoding: "utf8",
@@ -510404,7 +510545,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
         if (!hasCommand3("espeak-ng"))
           throw new Error("Local fallback TTS command not found.");
         const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
-        const speed = Math.round(numberArg3(args["speed"], 160));
+        const speed = Math.round(numberArg2(args["speed"], 160));
         execFileSync4("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
           stdio: "pipe",
           timeout: 6e4
@@ -537096,6 +537237,62 @@ ${blob}
       /** Register a tool for the agent to use */
       registerTool(tool) {
         this.tools.set(tool.name, tool);
+        if (tool.name === "generate_image") {
+          this.maybeInstallImagePromptExpander(tool);
+        }
+      }
+      /**
+       * Install an LLM-backed prompt expander on the registered generate_image
+       * tool. The expander runs a preliminary stage that rewrites the user's
+       * raw prompt into a richer, model-tuned version before the diffusion
+       * call. The instruction is intentionally generic — the LLM uses its own
+       * knowledge of the target model's prompt conventions to shape syntax,
+       * length, and detail style, rather than relying on hard-coded templates.
+       */
+      maybeInstallImagePromptExpander(tool) {
+        const setExpander = tool.setPromptExpander;
+        if (typeof setExpander !== "function")
+          return;
+        if (process.env["OMNIUS_IMAGE_PROMPT_EXPAND"] === "0") {
+          setExpander.call(tool, null);
+          return;
+        }
+        const backend = this.backend;
+        if (!backend || typeof backend.chatCompletion !== "function")
+          return;
+        setExpander.call(tool, async (ctx3) => {
+          const userPrompt = (ctx3.originalPrompt ?? "").trim();
+          if (!userPrompt)
+            return null;
+          const system = "You are a prompt-engineering stage that rewrites a user's short image request into a richer, more detailed image-generation prompt. The expanded prompt should match the prompt conventions and tokenizer preferences of the specific image model named in the request — use your own knowledge of that model's training distribution to choose length, syntax, weighting markers, ordering, and descriptor density. Do not invent new subject matter or contradict the user's intent. Output only the expanded prompt: no preamble, no quotes, no labels.";
+          const user = `Target image model: ${ctx3.model}
+Backend: ${ctx3.backend}
+Candidate position: ${ctx3.candidateIndex + 1} of ${ctx3.candidateCount} (fallback ladder)
+User prompt to expand:
+${userPrompt}
+Rewrite it now for ${ctx3.model}.`;
+          try {
+            const response = await backend.chatCompletion({
+              messages: [
+                { role: "system", content: system },
+                { role: "user", content: user }
+              ],
+              tools: [],
+              temperature: 0.4,
+              maxTokens: 600,
+              timeoutMs: 3e4
+            });
+            const text = response?.choices?.[0]?.message?.content;
+            if (typeof text !== "string")
+              return null;
+            const cleaned = text.replace(/^["'`]+|["'`]+$/g, "").replace(/^(?:expanded prompt|prompt|output)\s*:\s*/i, "").trim();
+            return cleaned.length > 0 ? cleaned : null;
+          } catch {
+            return null;
+          }
+        });
       }
       /** Register multiple tools */
       registerTools(tools) {
@@ -600951,6 +601148,7 @@ var init_tool_policy = __esm({
       "memory_read",
       "memory_write",
       "memory_search",
+      "identity_memory",
       "todo_read",
       "todo_write",
       "web_search",
@@ -600983,6 +601181,7 @@ var init_tool_policy = __esm({
       "memory_read",
       "memory_write",
       "memory_search",
+      "identity_memory",
       "todo_read",
       "todo_write",
       "web_search",
@@ -601106,7 +601305,7 @@ function buildTelegramCreativeTools(repoRoot, chatId, backendUrl2, imageDefaults
 }
 function scopedTool(base3, root, mode) {
   const rootAbs = resolve41(root);
-  return {
+  const wrapper = {
     name: base3.name,
     description: `[PUBLIC TELEGRAM CREATIVE WORKSPACE: ${rootAbs}] ${base3.description} Paths are restricted to this workspace. This tool cannot access or modify files outside the workspace. ` + (mode === "edit" ? "It can only edit files already created in this workspace manifest. " : ""),
     parameters: base3.parameters,
@@ -601215,6 +601414,11 @@ function scopedTool(base3, root, mode) {
       return withTelegramAutoAttachmentNotice(result, recordedPaths.size);
     }
   };
+  const baseSetExpander = base3.setPromptExpander;
+  if (typeof baseSetExpander === "function") {
+    wrapper.setPromptExpander = (expander) => baseSetExpander.call(base3, expander);
+  }
+  return wrapper;
 }
 function withTelegramAutoAttachmentNotice(result, artifactCount) {
   if (!result.success || artifactCount <= 0) return result;
@@ -603559,6 +603763,20 @@ function summarizeTelegramMessageAttachments(msg) {
       parts.push(`replied-to caption: ${truncateTelegramContextLine(msg.replyToMedia.caption, 180)}`);
     }
   }
+  const gen = msg.replyContext?.generatedMediaPromptInfo;
+  if (gen?.originalPrompt) {
+    parts.push(`replied-to image source prompt: "${truncateTelegramContextLine(gen.originalPrompt, 400)}"`);
+    if (gen.promptWasExpanded && gen.expandedPrompt && gen.expandedPrompt !== gen.originalPrompt) {
+      parts.push(`replied-to image expanded prompt: "${truncateTelegramContextLine(gen.expandedPrompt, 400)}"`);
+    }
+    const meta = [
+      gen.model ? `model=${gen.model}` : "",
+      gen.backend ? `backend=${gen.backend}` : "",
+      gen.width && gen.height ? `size=${gen.width}x${gen.height}` : "",
+      gen.aspectRatio ? `aspect=${gen.aspectRatio}` : ""
+    ].filter(Boolean).join(", ");
+    if (meta) parts.push(`replied-to image generation: ${meta}`);
+  }
   if (msg.poll) {
     parts.push(`poll: ${truncateTelegramContextLine(msg.poll.question, 180)}`);
   }
@@ -603567,6 +603785,32 @@ function summarizeTelegramMessageAttachments(msg) {
   }
   return parts.join("; ");
 }
+function formatTelegramGeneratedImagePromptInfo(info, maxPromptLength = 900) {
+  if (!info?.originalPrompt) return "";
+  const lines = [
+    `Generated image original prompt:
+${quoteTelegramContextText(info.originalPrompt, maxPromptLength)}`
+  ];
+  if (info.promptWasExpanded && info.expandedPrompt && info.expandedPrompt.trim() !== info.originalPrompt.trim()) {
+    lines.push(`Generated image expanded prompt actually sent to image model:
+${quoteTelegramContextText(info.expandedPrompt, maxPromptLength)}`);
+  }
+  const meta = [
+    info.model ? `model=${info.model}` : "",
+    info.backend ? `backend=${info.backend}` : "",
+    info.width && info.height ? `size=${info.width}x${info.height}` : "",
+    info.aspectRatio ? `aspect=${info.aspectRatio}` : "",
+    info.seed !== void 0 && info.seed !== null ? `seed=${info.seed}` : "",
+    info.createdAt ? `created_at=${info.createdAt}` : ""
+  ].filter(Boolean).join(", ");
+  if (meta) lines.push(`Generated image metadata: ${meta}`);
+  return lines.join("\n");
+}
+function quoteTelegramContextText(text, maxLength) {
+  const clipped = text.length > maxLength ? `${text.slice(0, Math.max(0, maxLength - 60)).trimEnd()}
+[generated prompt truncated]` : text;
+  return clipped.split(/\r?\n/).map((line) => `> ${line}`).join("\n");
+}
 function inferTelegramToneTags(text) {
   const lower = text.toLowerCase();
   const tags = /* @__PURE__ */ new Set();
@@ -604425,7 +604669,7 @@ function renderTelegramSubAgentError(username, error) {
   process.stdout.write(`    ${c3.dim("⎿")} ${c3.red("✘")} @${username}: ${c3.dim(preview)}
 `);
 }
-var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TelegramBridge;
+var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TelegramBridge;
 var init_telegram_bridge = __esm({
   "packages/cli/src/tui/telegram-bridge.ts"() {
     "use strict";
@@ -604511,7 +604755,7 @@ var init_telegram_bridge = __esm({
     ]);
     DEFAULT_TELEGRAM_TOOL_GROUP_POLICY = {
       read: true,
-      message: false,
+      message: true,
       media: true,
       janitorial: false,
       reaction: false,
@@ -604571,7 +604815,7 @@ Although this is an admin, the group is PUBLIC — other people can see your res
 RULES FOR GROUP CONTEXT:
 1. NEVER share private information, API keys, file paths, or system internals
-2. You have limited tools: web search, memory, and media analysis only
+2. You have limited tools: scoped web search/fetch, scoped memory, scoped identity memory, and scoped media analysis only
 3. Keep responses helpful and relevant to the conversation
 4. Be concise — group chats should have shorter responses
 5. Only respond if the message is directed at you or clearly relevant
@@ -604607,6 +604851,18 @@ PUBLIC TELEGRAM MEMORY SCOPE
 This turn may use memory and conversation history for the current Telegram group/private chat scope only.
 Users in a shared public group may ask questions about that shared group history and group memory, scoped by the current group id or by a user id/username inside that same group.
 Private chats, admin DMs, other groups, local terminal sessions, and fragmented private contexts are not visible from this public group. Do not imply they exist and do not answer from them.
+`.trim();
+    TELEGRAM_PUBLIC_VISION_STACK_CONTRACT = `
+PUBLIC TELEGRAM VISION / MEDIA STACK
+Public Telegram runs have the full scoped media-analysis stack for media posted in this chat:
+- Use telegram_media_recent to find recent scoped media, then use path/media aliases 'reply' and 'latest' instead of exposing local paths to users.
+- Use ocr_image_advanced for complex textual imagery: screenshots, dense documents, forms, receipts, scans, diagrams with labels, low-contrast photos, or uneven lighting.
+- Use ocr for quick image text extraction, image_read for image metadata + OCR + multimodal image payload, and vision for captioning, visual QA, object detection, or pointing.
+- Use pdf_to_text for embedded-text PDFs and ocr_pdf for scanned PDFs.
+- Use video_understand and transcribe_file for video/audio media posted in this chat.
+- Use identity_memory for explicit user-provided identity assertions, staged next-image names, and "who is this?" recall from scoped media. Do not guess real identities from images.
+- These tools are current-chat scoped. Never inspect arbitrary local files, reveal local paths, or claim access to media outside this Telegram chat scope.
 `.trim();
     GROUP_REPLY_DISCRETION_PROMPT = `
 REPLY DISCRETION: You are in a group chat. The live router has already filtered
@@ -605153,7 +605409,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
           threadId: entry.messageThreadId,
           sender: this.telegramReplySenderWithSelfFlag(sender),
           text: entry.text,
-          mediaSummary: entry.mediaSummary
+          mediaSummary: entry.mediaSummary,
+          generatedMediaPromptInfo: entry.generatedMediaPromptInfo
         };
       }
       resolveTelegramReplyContext(sessionKey, msg) {
@@ -605174,7 +605431,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
               ...msg.replyContext.sender ?? {}
             }),
             text: msg.replyContext.text ?? localContext.text,
-            mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary
+            mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary,
+            generatedMediaPromptInfo: msg.replyContext.generatedMediaPromptInfo ?? localContext.generatedMediaPromptInfo
           };
         } else if (msg.replyContext) {
           reply = {
@@ -605228,6 +605486,8 @@ ${this.quoteTelegramContextBlock(reply.quote, 1e3)}` : "",
 ${this.quoteTelegramContextBlock(content, 2200)}` : "",
           reply.mediaSummary ? `Replied-to media: ${reply.mediaSummary}` : "",
           reply.media && !reply.mediaSummary ? `Replied-to media: ${reply.media.type}${reply.media.fileName ? ` ${reply.media.fileName}` : ""}${reply.media.mimeType ? ` ${reply.media.mimeType}` : ""}` : "",
+          reply.generatedMediaPromptInfo ? `Replied-to generated image provenance:
+${formatTelegramGeneratedImagePromptInfo(reply.generatedMediaPromptInfo, 1400)}` : "",
           msg.text ? `Current user message:
 ${this.quoteTelegramContextBlock(msg.text, 1e3)}` : "",
           'Instruction: resolve pronouns, follow-up requests, and requests like "links", "repos", "instructions", "that", or "this" against the replied-to content before broader chat/workspace context.'
@@ -606179,8 +606439,9 @@ ${olderLines.join("\n")}`);
             const replySender = entry.replyContext?.sender ? `/${telegramReplySenderLabel(entry.replyContext.sender)}` : "";
             const reply = entry.replyToMessageId ? ` reply_to:${entry.replyToMessageId}${replySender}` : "";
             const media = entry.mediaSummary ? ` [${entry.mediaSummary}]` : "";
+            const generatedPrompt = entry.generatedMediaPromptInfo?.originalPrompt ? ` generated_image_prompt="${truncateTelegramContextLine(entry.generatedMediaPromptInfo.originalPrompt, 220)}"` : "";
             const prefix = [when, `${speaker}${mode}${reply}${media}`].filter(Boolean).join(" ");
-            return `${prefix}: ${truncateTelegramContextLine(entry.text)}`;
+            return `${prefix}: ${truncateTelegramContextLine(entry.text)}${generatedPrompt}`;
           });
           sections.push(`### Recent Thread, Oldest To Newest
 ${lines.join("\n")}`);
@@ -606290,7 +606551,7 @@ ${lines.join("\n")}`);
           `Route meanings:`,
           `- chat: a short conversational answer can be produced without tools.`,
           `- action: tools, workspace context, media processing, web lookup, delegation, or a multi-step agent loop may be needed.`,
-          `Route discipline: greetings, acknowledgements, casual tone/style discussion, and simple conversational questions are chat. Use action only when the message asks you to inspect, create, change, send, remember, search, analyze media, name/enroll/identify a person/face/voice from media, or otherwise do tool-backed work.`,
+          `Route discipline: greetings, acknowledgements, casual tone/style discussion, and simple conversational questions are chat. Use action only when the message asks you to inspect, create, change, send, remember, search, analyze media, extract text from images/screenshots/forms/scans, name/enroll/identify a person/face/voice from media, or otherwise do tool-backed work.`,
           ``,
           `Reply discretion: infer from the live thread, speaker relationships, direct platform signals, replies, tone, current message, and any private channel daydream artifact supplied in context. Do not use static keyword rules.`,
           `Private chats: should_reply is normally true.`,
@@ -606568,6 +606829,8 @@ ${TELEGRAM_PUBLIC_SOUL_PROFILE}
 ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
+${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
 ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
         } else {
           sections.push(`## Telegram Safety Contract
@@ -606578,6 +606841,8 @@ ${TELEGRAM_PUBLIC_SOUL_PROFILE}
 ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
+${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
 ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
         }
         return { sessionKey, sessionId, context: sections.join("\n\n") };
@@ -607368,11 +607633,15 @@ Join: ${newUrl}`);
 ${TELEGRAM_PUBLIC_SOUL_PROFILE}
-${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}` : `${TELEGRAM_SAFETY_PROMPT}
+${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
+${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}` : `${TELEGRAM_SAFETY_PROMPT}
 ${TELEGRAM_PUBLIC_SOUL_PROFILE}
-${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}`;
+${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
+${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}`;
         const groupHint = isGroup ? `Telegram group: ${msg.chatTitle || "unknown"}. The live router selected this turn as reply-worthy; keep the reply short and relevant. Never output a skip decision, no_reply marker, memory-stage note, or completion status.` : "Telegram private chat.";
         const runtime = buildTelegramRuntimeContext(/* @__PURE__ */ new Date());
         const messages2 = [
@@ -607636,6 +607905,7 @@ ${currentTelegramPrompt}`;
             "You have access to isolated per-chat memory (memory_write, memory_read, memory_search) scoped to this conversation.",
             "memory_search may use scope=group/current_chat for this group or scope=user with user_id/username for a participant in this same group. Other groups, admin chats, and private DMs are not accessible here.",
             "You can remember facts about users and retrieve them later. You also have web_search and web_fetch to look up information.",
+            "You have the full scoped Telegram media-analysis stack by default: telegram_media_recent, image_read, ocr, ocr_image_advanced, vision, pdf_to_text, ocr_pdf, transcribe_file, video_understand, audio_analyze, and identity_memory. For complex textual imagery, screenshots, forms, scans, or dense labels, prefer ocr_image_advanced after resolving media with path='reply' or path='latest'.",
             formatIdentityMemoryContext(chatLabel || "Telegram private chat"),
             reminderToolContract,
             "If the user asks you to create an image, audio file, or document artifact, create it with the scoped creative tools. Freshly generated artifacts are recorded and automatically attached to this Telegram chat when the turn completes, so do not call telegram_send_file for those same artifacts unless the user asked for a specific caption, existing/unrecorded file, or non-default target.",
@@ -607970,6 +608240,8 @@ ${lines.join("\n\n")}` };
 ${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
+${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
 ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}
 ${conversation}`
@@ -609053,7 +609325,7 @@ Scoped workspace: ${scopedRoot}`,
         const bridge = this;
         return {
           name: "telegram_media_recent",
-          description: "List recent media files available in this Telegram chat scope, including safe aliases for image_read, ocr, vision, transcribe_file, pdf_to_text, video_understand, and audio_analyze.",
+          description: "List recent media files available in this Telegram chat scope, including safe aliases for image_read, ocr, ocr_image_advanced, vision, identity_memory, transcribe_file, pdf_to_text, video_understand, and audio_analyze.",
           parameters: {
             type: "object",
             properties: {
@@ -609208,7 +609480,8 @@ Scoped workspace: ${scopedRoot}`,
               const messageId = await bridge.sendTelegramFileToChat(target.chatId, file.path, {
                 kind,
                 caption: caption || void 0,
-                replyToMessageId
+                replyToMessageId,
+                sourcePromptPath: ledgerPath
               });
               bridge.rememberTelegramFileSendForMessage(currentMsg, sendFingerprint);
               bridge.rememberTelegramDeliveredArtifactForMessage(currentMsg, ledgerPath);
@@ -609440,7 +609713,7 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
             description = `[${sourceLabel}image received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
 ${visionContext}]`;
           } else {
-            description = `[${sourceLabel}image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read, ocr, or vision tools to analyze it.]`;
+            description = `[${sourceLabel}image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read, ocr, ocr_image_advanced, vision, or identity_memory tools to analyze it.]`;
           }
           const ingestPayload = this.telegramMemoryIngestPayload(msg, media, localPath, source, cacheEntry.extractedContent);
           let visualIdentityContext = "";
@@ -609706,10 +609979,69 @@ Content-Type: ${contentType}\r
         const result = await res.json();
         if (result.ok) {
           this.state.messagesSent++;
-          return result.result?.message_id ?? null;
+          const outboundMessageId = result.result?.message_id ?? null;
+          if (outboundMessageId && media.kind === "image" && media.source === "file") {
+            this.recordOutboundGeneratedImagePrompt(chatId, outboundMessageId, options2.sourcePromptPath ?? media.value, caption);
+          }
+          return outboundMessageId;
         }
         throw new Error(String(result.description || `Telegram ${method} failed`));
       }
+      /**
+       * After the bot sends an outbound photo, look for a `<image>.json`
+       * sidecar emitted by ImageGenerateTool and stash the source prompt info
+       * on a chatHistory entry keyed by the outbound message_id. When the user
+       * later replies to that image, resolveTelegramReplyContext finds the
+       * entry and exposes the original prompt to the model.
+       */
+      recordOutboundGeneratedImagePrompt(chatId, messageId, imagePath, caption) {
+        const info = this.readGeneratedImagePromptInfo(imagePath);
+        if (!info) return;
+        const sessionKey = `chat:${String(chatId)}`;
+        const captionText = (caption ?? "").trim();
+        const summary = `photo (generated, model=${info.model ?? "?"}, ${info.width ?? "?"}x${info.height ?? "?"})`;
+        const entry = {
+          role: "assistant",
+          text: captionText,
+          mode: "action",
+          chatId,
+          speaker: this.state.botUsername ? `@${this.state.botUsername}` : "Assistant",
+          messageId,
+          mediaSummary: summary,
+          generatedMediaPromptInfo: info
+        };
+        try {
+          this.recordChatHistory(sessionKey, entry);
+          this.saveTelegramConversationState(sessionKey);
+        } catch {
+        }
+      }
+      readGeneratedImagePromptInfo(imagePath) {
+        const sidecarPath2 = `${imagePath}.json`;
+        if (!existsSync108(sidecarPath2)) return null;
+        try {
+          const raw = readFileSync88(sidecarPath2, "utf8");
+          const parsed = JSON.parse(raw);
+          if (!parsed || typeof parsed !== "object" || typeof parsed["original_prompt"] !== "string") {
+            return null;
+          }
+          return {
+            imagePath,
+            originalPrompt: String(parsed["original_prompt"]),
+            expandedPrompt: typeof parsed["expanded_prompt"] === "string" ? String(parsed["expanded_prompt"]) : void 0,
+            promptWasExpanded: parsed["prompt_was_expanded"] === true,
+            model: typeof parsed["model"] === "string" ? String(parsed["model"]) : void 0,
+            backend: typeof parsed["backend"] === "string" ? String(parsed["backend"]) : void 0,
+            width: typeof parsed["width"] === "number" ? parsed["width"] : void 0,
+            height: typeof parsed["height"] === "number" ? parsed["height"] : void 0,
+            aspectRatio: typeof parsed["aspect_ratio"] === "string" || parsed["aspect_ratio"] === null ? parsed["aspect_ratio"] : void 0,
+            seed: typeof parsed["seed"] === "number" ? parsed["seed"] : null,
+            createdAt: typeof parsed["created_at"] === "string" ? String(parsed["created_at"]) : void 0
+          };
+        } catch {
+          return null;
+        }
+      }
       async sendGeneratedArtifactsFromSubAgent(msg, subAgent, finalText, includeMentioned) {
         const root = subAgent.creativeWorkspaceRoot;
         if (!root) return;
@@ -609741,6 +610073,8 @@ Content-Type: ${contentType}\r
             kind,
             source: "file",
             audioAsVoice: kind === "voice"
+          }, {
+            sourcePromptPath: abs
           }).then((messageId) => {
             if (messageId !== null) {
               this.rememberTelegramDeliveredArtifact(subAgent, abs);

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.45",
+  "version": "1.0.47",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.45",
+      "version": "1.0.47",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.45",
+  "version": "1.0.47",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",