omnius 1.0.45 → 1.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.js +368 -34
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -3606,7 +3606,7 @@ While the sub-agent is working, users see:
|
|
|
3606
3606
|
|
|
3607
3607
|
### Public User Isolation
|
|
3608
3608
|
|
|
3609
|
-
Public users get **per-chat isolated memory** — each chat is stored with explicit multimodal scope (`scope.kind = "group"|"private"`, `scope.id = chatId`) so public users can store and retrieve facts about their conversation without accessing or polluting unrelated chat memory. Public tools include: `memory_read`, `memory_write` (scoped), `memory_search`, `identity_memory` (scoped explicit identity evidence), `web_search`, `web_fetch`, and scoped minimal reminders via `reminder`/`remind`.
|
|
3609
|
+
Public users get **per-chat isolated memory** — each chat is stored with explicit multimodal scope (`scope.kind = "group"|"private"`, `scope.id = chatId`) so public users can store and retrieve facts about their conversation without accessing or polluting unrelated chat memory. Public tools include: `memory_read`, `memory_write` (scoped), `memory_search`, `identity_memory` (scoped explicit identity evidence), `web_search`, `web_fetch`, scoped advanced media analysis (`telegram_media_recent`, `image_read`, `ocr`, `ocr_image_advanced`, `vision`, `pdf_to_text`, `ocr_pdf`, `transcribe_file`, `video_understand`, `audio_analyze`), and scoped minimal reminders via `reminder`/`remind`.
|
|
3610
3610
|
|
|
3611
3611
|
The bridge also maintains a per-chat conversation state file with recent history, participants, relationship signals, and lightweight Zettelkasten memory cards. Each Telegram group or private chat gets its own scoped personality document under `.omnius/scoped-personality/telegram-chat/`; that profile is updated as people talk and injected into future Telegram context so tone, pacing, names, and relationships stay available turn to turn.
|
|
3612
3612
|
|
|
@@ -3627,8 +3627,8 @@ Tools are gated per execution context. The system enforces strict separation bet
|
|
|
3627
3627
|
|---------|--------------|-------|
|
|
3628
3628
|
| `terminal` | All tools | Wide open — shell, file read/write, everything |
|
|
3629
3629
|
| `telegram-admin-dm` | All except shell + scoped `telegram` tool | Admin DM — full tools, shell blocked by default (overridable); Telegram janitorial/moderation actions still require explicit policy and Bot API rights |
|
|
3630
|
-
| `telegram-admin-group` |
|
|
3631
|
-
| `telegram-public` |
|
|
3630
|
+
| `telegram-admin-group` | Scoped memory + web + advanced vision/OCR/media tools + scoped reminders + scoped `telegram` tool | Admin in public group — current-chat only; high-risk Telegram actions require policy enablement |
|
|
3631
|
+
| `telegram-public` | Scoped memory + web fetch/search + advanced current-chat vision/OCR/media tools + scoped creative tools + scoped minimal reminders + read/media `telegram` actions | Public users — no arbitrary local file access, shell, moderation, bot-admin, or janitorial actions |
|
|
3632
3632
|
| `api` | All tools | API endpoint — configurable |
|
|
3633
3633
|
|
|
3634
3634
|
**System tools** (`shell`, `file_write`, `file_edit`, `file_read`, `file_patch`, `batch_edit`, `grep_search`, `glob_find`, `list_directory`, `code_sandbox`, `codebase_map`, `git_info`, etc.) are **never exposed** in public-facing contexts.
|
package/dist/index.js
CHANGED
|
@@ -251178,10 +251178,6 @@ function parseStructuredProgress(text) {
|
|
|
251178
251178
|
return null;
|
|
251179
251179
|
}
|
|
251180
251180
|
}
|
|
251181
|
-
function numberArg(value2, fallback) {
|
|
251182
|
-
const n2 = Number(value2);
|
|
251183
|
-
return Number.isFinite(n2) && n2 > 0 ? n2 : fallback;
|
|
251184
|
-
}
|
|
251185
251181
|
function optionalNumberArg(value2) {
|
|
251186
251182
|
if (value2 === void 0 || value2 === null || value2 === "")
|
|
251187
251183
|
return void 0;
|
|
@@ -251199,6 +251195,26 @@ function booleanArg(value2, fallback) {
|
|
|
251199
251195
|
}
|
|
251200
251196
|
return fallback;
|
|
251201
251197
|
}
|
|
251198
|
+
function roundToMultipleOf8(value2) {
|
|
251199
|
+
if (!Number.isFinite(value2) || value2 <= 0)
|
|
251200
|
+
return 1024;
|
|
251201
|
+
const rounded = Math.round(value2 / 8) * 8;
|
|
251202
|
+
return Math.max(64, rounded);
|
|
251203
|
+
}
|
|
251204
|
+
function resolveAspectRatioToSize(ratio, presetWidth, presetHeight) {
|
|
251205
|
+
const match = ratio.match(/^\s*(\d+(?:\.\d+)?)\s*[:xX/×]\s*(\d+(?:\.\d+)?)\s*$/);
|
|
251206
|
+
if (!match)
|
|
251207
|
+
return null;
|
|
251208
|
+
const w = Number(match[1]);
|
|
251209
|
+
const h = Number(match[2]);
|
|
251210
|
+
if (!Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0)
|
|
251211
|
+
return null;
|
|
251212
|
+
const longSide = Math.max(presetWidth, presetHeight);
|
|
251213
|
+
if (w >= h) {
|
|
251214
|
+
return { width: longSide, height: Math.round(longSide * h / w) };
|
|
251215
|
+
}
|
|
251216
|
+
return { width: Math.round(longSide * w / h), height: longSide };
|
|
251217
|
+
}
|
|
251202
251218
|
function generationFallbackEnabled(args) {
|
|
251203
251219
|
if (booleanArg(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
|
|
251204
251220
|
return false;
|
|
@@ -252151,7 +252167,7 @@ if __name__ == "__main__":
|
|
|
252151
252167
|
`;
|
|
252152
252168
|
ImageGenerateTool = class {
|
|
252153
252169
|
name = "generate_image";
|
|
252154
|
-
description =
|
|
252170
|
+
description = `Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Aspect ratio and resolution are model-controllable: pass aspect_ratio (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2") to derive width/height around the selected model's preferred base resolution, or pass explicit width/height (in pixels, both rounded to a multiple of 8) when a specific size is required. A preliminary prompt-expansion stage rewrites the user's prompt into a richer, model-tuned version before generation when an LLM expander is wired; pass expand_prompt=false to skip. Saves a PNG under .omnius/images and returns the file path.`;
|
|
252155
252171
|
parameters = {
|
|
252156
252172
|
type: "object",
|
|
252157
252173
|
properties: {
|
|
@@ -252168,13 +252184,17 @@ if __name__ == "__main__":
|
|
|
252168
252184
|
enum: ["auto", "ollama", "diffusers", "sdcpp"],
|
|
252169
252185
|
description: "Generation backend. Defaults to auto."
|
|
252170
252186
|
},
|
|
252187
|
+
aspect_ratio: {
|
|
252188
|
+
type: "string",
|
|
252189
|
+
description: `Desired aspect ratio expressed as W:H (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2"). When provided, width/height are derived from the selected model's preferred base resolution so the longer side stays in that model's sweet spot. Ignored if explicit width and height are also provided.`
|
|
252190
|
+
},
|
|
252171
252191
|
width: {
|
|
252172
252192
|
type: "number",
|
|
252173
|
-
description: "Image width in pixels"
|
|
252193
|
+
description: "Image width in pixels. Optional — defaults to the selected model's preset width, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
|
|
252174
252194
|
},
|
|
252175
252195
|
height: {
|
|
252176
252196
|
type: "number",
|
|
252177
|
-
description: "Image height in pixels"
|
|
252197
|
+
description: "Image height in pixels. Optional — defaults to the selected model's preset height, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
|
|
252178
252198
|
},
|
|
252179
252199
|
steps: {
|
|
252180
252200
|
type: "number",
|
|
@@ -252204,6 +252224,10 @@ if __name__ == "__main__":
|
|
|
252204
252224
|
strict_model: {
|
|
252205
252225
|
type: "boolean",
|
|
252206
252226
|
description: "When true, use only the requested model/backend and do not fall back. Defaults false."
|
|
252227
|
+
},
|
|
252228
|
+
expand_prompt: {
|
|
252229
|
+
type: "boolean",
|
|
252230
|
+
description: "When true (default), a preliminary LLM stage rewrites the prompt into a richer, model-tuned version before generation. Set false to send the raw prompt unchanged."
|
|
252207
252231
|
}
|
|
252208
252232
|
},
|
|
252209
252233
|
required: ["prompt"]
|
|
@@ -252216,15 +252240,23 @@ if __name__ == "__main__":
|
|
|
252216
252240
|
lastProgressAt = 0;
|
|
252217
252241
|
defaultModel;
|
|
252218
252242
|
defaultBackend;
|
|
252243
|
+
promptExpander = null;
|
|
252219
252244
|
constructor(cwd4, ollamaUrl = "http://localhost:11434", defaults3 = {}) {
|
|
252220
252245
|
this.cwd = cwd4;
|
|
252221
252246
|
this.ollamaUrl = ollamaUrl.replace(/\/v1\/?$/, "").replace(/\/$/, "");
|
|
252222
252247
|
this.defaultModel = defaults3.model;
|
|
252223
252248
|
this.defaultBackend = defaults3.backend;
|
|
252249
|
+
this.promptExpander = defaults3.promptExpander ?? null;
|
|
252224
252250
|
}
|
|
252225
252251
|
setDefaults(defaults3) {
|
|
252226
252252
|
this.defaultModel = defaults3.model;
|
|
252227
252253
|
this.defaultBackend = defaults3.backend;
|
|
252254
|
+
if (defaults3.promptExpander !== void 0) {
|
|
252255
|
+
this.promptExpander = defaults3.promptExpander;
|
|
252256
|
+
}
|
|
252257
|
+
}
|
|
252258
|
+
setPromptExpander(expander) {
|
|
252259
|
+
this.promptExpander = expander;
|
|
252228
252260
|
}
|
|
252229
252261
|
setProgressCallback(handler) {
|
|
252230
252262
|
this.progressHandler = handler;
|
|
@@ -252321,19 +252353,39 @@ if __name__ == "__main__":
|
|
|
252321
252353
|
}
|
|
252322
252354
|
async generateCandidateLadder(args) {
|
|
252323
252355
|
const failed = [];
|
|
252356
|
+
const expansionEnabled = args.args["expand_prompt"] === false ? false : true;
|
|
252357
|
+
const aspectRatio = typeof args.args["aspect_ratio"] === "string" ? String(args.args["aspect_ratio"]).trim() : "";
|
|
252324
252358
|
for (let index = 0; index < args.candidates.length; index++) {
|
|
252325
252359
|
const candidate = args.candidates[index];
|
|
252326
|
-
const
|
|
252327
|
-
const
|
|
252360
|
+
const presetW = candidate.preset?.width ?? 1024;
|
|
252361
|
+
const presetH = candidate.preset?.height ?? 1024;
|
|
252362
|
+
const explicitWidth = optionalNumberArg(args.args["width"]);
|
|
252363
|
+
const explicitHeight = optionalNumberArg(args.args["height"]);
|
|
252364
|
+
const derived = (explicitWidth === void 0 || explicitHeight === void 0) && aspectRatio ? resolveAspectRatioToSize(aspectRatio, presetW, presetH) : null;
|
|
252365
|
+
const width = roundToMultipleOf8(explicitWidth ?? derived?.width ?? presetW);
|
|
252366
|
+
const height = roundToMultipleOf8(explicitHeight ?? derived?.height ?? presetH);
|
|
252328
252367
|
const steps = optionalNumberArg(args.args["steps"]) ?? candidate.preset?.steps;
|
|
252329
252368
|
const guidance = optionalNumberArg(args.args["guidance"]) ?? candidate.preset?.guidance;
|
|
252330
252369
|
this.emitProgress({
|
|
252331
252370
|
stage: "setup",
|
|
252332
252371
|
message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
252333
252372
|
});
|
|
252334
|
-
const
|
|
252335
|
-
|
|
252336
|
-
|
|
252373
|
+
const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
|
|
252374
|
+
const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
|
|
252375
|
+
if (result.success) {
|
|
252376
|
+
await this.writeImageSidecar(result, {
|
|
252377
|
+
originalPrompt: args.prompt,
|
|
252378
|
+
expandedPrompt: promptForCandidate,
|
|
252379
|
+
model: candidate.model,
|
|
252380
|
+
backend: candidate.backend,
|
|
252381
|
+
width,
|
|
252382
|
+
height,
|
|
252383
|
+
aspectRatio: aspectRatio || null,
|
|
252384
|
+
seed: args.seed
|
|
252385
|
+
}).catch(() => {
|
|
252386
|
+
});
|
|
252387
|
+
return annotateImageFallbackSuccess(this.annotateResultWithSourcePrompt(result, args.prompt, promptForCandidate), failed, candidate);
|
|
252388
|
+
}
|
|
252337
252389
|
failed.push({ candidate, reason: summarizeToolResult(result) });
|
|
252338
252390
|
if (index < args.candidates.length - 1) {
|
|
252339
252391
|
this.emitProgress({
|
|
@@ -252350,6 +252402,95 @@ if __name__ == "__main__":
|
|
|
252350
252402
|
durationMs: performance.now() - args.start
|
|
252351
252403
|
};
|
|
252352
252404
|
}
|
|
252405
|
+
/**
|
|
252406
|
+
* Persist a sidecar JSON next to a generated image capturing the
|
|
252407
|
+
* original (user-typed) prompt, the expanded prompt actually sent to the
|
|
252408
|
+
* model, model identity, resolution, and any aspect-ratio request. The
|
|
252409
|
+
* Telegram bridge reads this when the user replies to a generated image
|
|
252410
|
+
* so the model can answer "what prompt made this?" or modify the prompt
|
|
252411
|
+
* for a follow-up generation without losing the original intent.
|
|
252412
|
+
*/
|
|
252413
|
+
async writeImageSidecar(result, meta) {
|
|
252414
|
+
const imagePath = this.extractImagePathFromResult(result);
|
|
252415
|
+
if (!imagePath)
|
|
252416
|
+
return;
|
|
252417
|
+
const sidecarPath2 = `${imagePath}.json`;
|
|
252418
|
+
const payload = {
|
|
252419
|
+
version: 1,
|
|
252420
|
+
kind: "image-generation",
|
|
252421
|
+
image_path: imagePath,
|
|
252422
|
+
original_prompt: meta.originalPrompt,
|
|
252423
|
+
expanded_prompt: meta.expandedPrompt,
|
|
252424
|
+
prompt_was_expanded: meta.originalPrompt.trim() !== meta.expandedPrompt.trim(),
|
|
252425
|
+
model: meta.model,
|
|
252426
|
+
backend: meta.backend,
|
|
252427
|
+
width: meta.width,
|
|
252428
|
+
height: meta.height,
|
|
252429
|
+
aspect_ratio: meta.aspectRatio,
|
|
252430
|
+
seed: meta.seed ?? null,
|
|
252431
|
+
created_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
252432
|
+
};
|
|
252433
|
+
await writeFile17(sidecarPath2, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
|
252434
|
+
}
|
|
252435
|
+
extractImagePathFromResult(result) {
|
|
252436
|
+
const mutated = result.mutatedFiles;
|
|
252437
|
+
if (Array.isArray(mutated) && mutated.length > 0) {
|
|
252438
|
+
const first2 = mutated[0];
|
|
252439
|
+
if (typeof first2 === "string" && first2.trim())
|
|
252440
|
+
return first2;
|
|
252441
|
+
}
|
|
252442
|
+
const match = result.output.match(/Image generated:\s*([^\n]+)/);
|
|
252443
|
+
if (match && match[1])
|
|
252444
|
+
return match[1].trim();
|
|
252445
|
+
return null;
|
|
252446
|
+
}
|
|
252447
|
+
/**
|
|
252448
|
+
* Add the original user prompt to the result output when prompt
|
|
252449
|
+
* expansion produced a different string. This gives downstream
|
|
252450
|
+
* consumers (Telegram reply context, TUI display, memory) access to
|
|
252451
|
+
* both the user's intent and the model-tuned prompt actually rendered.
|
|
252452
|
+
*/
|
|
252453
|
+
annotateResultWithSourcePrompt(result, originalPrompt, expandedPrompt) {
|
|
252454
|
+
if (originalPrompt.trim() === expandedPrompt.trim())
|
|
252455
|
+
return result;
|
|
252456
|
+
const annotation = ` Original prompt: "${this.truncatePromptForOutput(originalPrompt)}"`;
|
|
252457
|
+
const llmAnnotation = `Original user prompt: ${originalPrompt}`;
|
|
252458
|
+
const output = result.output ? `${result.output}
|
|
252459
|
+
${annotation}` : annotation;
|
|
252460
|
+
const llmContent = typeof result.llmContent === "string" && result.llmContent ? `${result.llmContent}
|
|
252461
|
+
${llmAnnotation}` : result.llmContent;
|
|
252462
|
+
return { ...result, output, llmContent };
|
|
252463
|
+
}
|
|
252464
|
+
truncatePromptForOutput(prompt) {
|
|
252465
|
+
return prompt.length > 200 ? prompt.slice(0, 197) + "..." : prompt;
|
|
252466
|
+
}
|
|
252467
|
+
async expandPromptForCandidate(originalPrompt, candidate, candidateIndex, candidateCount) {
|
|
252468
|
+
if (!this.promptExpander)
|
|
252469
|
+
return originalPrompt;
|
|
252470
|
+
try {
|
|
252471
|
+
this.emitProgress({
|
|
252472
|
+
stage: "setup",
|
|
252473
|
+
message: `Expanding prompt for ${candidate.model}`
|
|
252474
|
+
});
|
|
252475
|
+
const expanded = await this.promptExpander({
|
|
252476
|
+
model: candidate.model,
|
|
252477
|
+
backend: candidate.backend,
|
|
252478
|
+
originalPrompt,
|
|
252479
|
+
candidateIndex,
|
|
252480
|
+
candidateCount
|
|
252481
|
+
});
|
|
252482
|
+
const trimmed = typeof expanded === "string" ? expanded.trim() : "";
|
|
252483
|
+
if (!trimmed)
|
|
252484
|
+
return originalPrompt;
|
|
252485
|
+
this.emitProgress({
|
|
252486
|
+
stage: "setup",
|
|
252487
|
+
message: `Expanded prompt (${trimmed.length} chars) for ${candidate.model}`
|
|
252488
|
+
});
|
|
252489
|
+
return trimmed;
|
|
252490
|
+
} catch {
|
|
252491
|
+
return originalPrompt;
|
|
252492
|
+
}
|
|
252493
|
+
}
|
|
252353
252494
|
async prewarmOllama(args) {
|
|
252354
252495
|
const model = args.model || DEFAULT_OLLAMA_IMAGE_MODEL;
|
|
252355
252496
|
if (await this.ollamaHasModel(model)) {
|
|
@@ -253248,7 +253389,7 @@ async function ensureAudioRunner(repoRoot, backend) {
|
|
|
253248
253389
|
function audioOutputPath(repoRoot) {
|
|
253249
253390
|
return join37(audioOutputDir(repoRoot), `audio-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.wav`);
|
|
253250
253391
|
}
|
|
253251
|
-
function
|
|
253392
|
+
function numberArg(value2, fallback) {
|
|
253252
253393
|
if (typeof value2 === "number" && Number.isFinite(value2))
|
|
253253
253394
|
return value2;
|
|
253254
253395
|
if (typeof value2 === "string" && value2.trim()) {
|
|
@@ -254457,7 +254598,7 @@ if __name__ == "__main__":
|
|
|
254457
254598
|
const failed = [];
|
|
254458
254599
|
for (let index = 0; index < args.candidates.length; index++) {
|
|
254459
254600
|
const candidate = args.candidates[index];
|
|
254460
|
-
const duration =
|
|
254601
|
+
const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
|
|
254461
254602
|
this.emitProgress({
|
|
254462
254603
|
stage: "setup",
|
|
254463
254604
|
message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
@@ -254492,7 +254633,7 @@ if __name__ == "__main__":
|
|
|
254492
254633
|
const failed = [];
|
|
254493
254634
|
for (let index = 0; index < args.candidates.length; index++) {
|
|
254494
254635
|
const candidate = args.candidates[index];
|
|
254495
|
-
const duration =
|
|
254636
|
+
const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
|
|
254496
254637
|
const steps = optionalNumberArg2(args.args["steps"]) ?? candidate.preset?.defaultSteps;
|
|
254497
254638
|
this.emitProgress({
|
|
254498
254639
|
stage: "setup",
|
|
@@ -509592,7 +509733,7 @@ function boolArg(value2, fallback) {
|
|
|
509592
509733
|
}
|
|
509593
509734
|
return fallback;
|
|
509594
509735
|
}
|
|
509595
|
-
function
|
|
509736
|
+
function numberArg2(value2, fallback) {
|
|
509596
509737
|
if (typeof value2 === "number" && Number.isFinite(value2))
|
|
509597
509738
|
return value2;
|
|
509598
509739
|
if (typeof value2 === "string" && value2.trim()) {
|
|
@@ -510317,7 +510458,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
510317
510458
|
const cloneRef = cloneRefForSynthesis(args);
|
|
510318
510459
|
if (!cloneRef)
|
|
510319
510460
|
throw new Error(`No LuxTTS clone source found. Provide source_audio=<voice clip> or clone_ref=<registered clip>.`);
|
|
510320
|
-
const speed =
|
|
510461
|
+
const speed = numberArg2(args["speed"], 1);
|
|
510321
510462
|
ensureLuxttsInstalled();
|
|
510322
510463
|
const daemonReady = await ensureLuxttsDaemon();
|
|
510323
510464
|
if (daemonReady) {
|
|
@@ -510351,8 +510492,8 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
510351
510492
|
const venvPy = ensureSupertonicInstalled();
|
|
510352
510493
|
const voice = typeof args["voice"] === "string" ? args["voice"] : "M4";
|
|
510353
510494
|
const lang = typeof args["lang"] === "string" ? args["lang"] : "en";
|
|
510354
|
-
const speed =
|
|
510355
|
-
const totalStep = Math.round(
|
|
510495
|
+
const speed = numberArg2(args["speed"], 1.05);
|
|
510496
|
+
const totalStep = Math.round(numberArg2(args["total_step"], 8));
|
|
510356
510497
|
const stdout = execFileSync4(venvPy, [supertonicInferScript()], {
|
|
510357
510498
|
input: JSON.stringify({ text, output_path: outputPath2, voice_name: voice, lang, speed, total_step: totalStep }),
|
|
510358
510499
|
encoding: "utf8",
|
|
@@ -510404,7 +510545,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
510404
510545
|
if (!hasCommand3("espeak-ng"))
|
|
510405
510546
|
throw new Error("Local fallback TTS command not found.");
|
|
510406
510547
|
const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
|
|
510407
|
-
const speed = Math.round(
|
|
510548
|
+
const speed = Math.round(numberArg2(args["speed"], 160));
|
|
510408
510549
|
execFileSync4("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
|
|
510409
510550
|
stdio: "pipe",
|
|
510410
510551
|
timeout: 6e4
|
|
@@ -537096,6 +537237,62 @@ ${blob}
|
|
|
537096
537237
|
/** Register a tool for the agent to use */
|
|
537097
537238
|
registerTool(tool) {
|
|
537098
537239
|
this.tools.set(tool.name, tool);
|
|
537240
|
+
if (tool.name === "generate_image") {
|
|
537241
|
+
this.maybeInstallImagePromptExpander(tool);
|
|
537242
|
+
}
|
|
537243
|
+
}
|
|
537244
|
+
/**
|
|
537245
|
+
* Install an LLM-backed prompt expander on the registered generate_image
|
|
537246
|
+
* tool. The expander runs a preliminary stage that rewrites the user's
|
|
537247
|
+
* raw prompt into a richer, model-tuned version before the diffusion
|
|
537248
|
+
* call. The instruction is intentionally generic — the LLM uses its own
|
|
537249
|
+
* knowledge of the target model's prompt conventions to shape syntax,
|
|
537250
|
+
* length, and detail style, rather than relying on hard-coded templates.
|
|
537251
|
+
*/
|
|
537252
|
+
maybeInstallImagePromptExpander(tool) {
|
|
537253
|
+
const setExpander = tool.setPromptExpander;
|
|
537254
|
+
if (typeof setExpander !== "function")
|
|
537255
|
+
return;
|
|
537256
|
+
if (process.env["OMNIUS_IMAGE_PROMPT_EXPAND"] === "0") {
|
|
537257
|
+
setExpander.call(tool, null);
|
|
537258
|
+
return;
|
|
537259
|
+
}
|
|
537260
|
+
const backend = this.backend;
|
|
537261
|
+
if (!backend || typeof backend.chatCompletion !== "function")
|
|
537262
|
+
return;
|
|
537263
|
+
setExpander.call(tool, async (ctx3) => {
|
|
537264
|
+
const userPrompt = (ctx3.originalPrompt ?? "").trim();
|
|
537265
|
+
if (!userPrompt)
|
|
537266
|
+
return null;
|
|
537267
|
+
const system = "You are a prompt-engineering stage that rewrites a user's short image request into a richer, more detailed image-generation prompt. The expanded prompt should match the prompt conventions and tokenizer preferences of the specific image model named in the request — use your own knowledge of that model's training distribution to choose length, syntax, weighting markers, ordering, and descriptor density. Do not invent new subject matter or contradict the user's intent. Output only the expanded prompt: no preamble, no quotes, no labels.";
|
|
537268
|
+
const user = `Target image model: ${ctx3.model}
|
|
537269
|
+
Backend: ${ctx3.backend}
|
|
537270
|
+
Candidate position: ${ctx3.candidateIndex + 1} of ${ctx3.candidateCount} (fallback ladder)
|
|
537271
|
+
|
|
537272
|
+
User prompt to expand:
|
|
537273
|
+
${userPrompt}
|
|
537274
|
+
|
|
537275
|
+
Rewrite it now for ${ctx3.model}.`;
|
|
537276
|
+
try {
|
|
537277
|
+
const response = await backend.chatCompletion({
|
|
537278
|
+
messages: [
|
|
537279
|
+
{ role: "system", content: system },
|
|
537280
|
+
{ role: "user", content: user }
|
|
537281
|
+
],
|
|
537282
|
+
tools: [],
|
|
537283
|
+
temperature: 0.4,
|
|
537284
|
+
maxTokens: 600,
|
|
537285
|
+
timeoutMs: 3e4
|
|
537286
|
+
});
|
|
537287
|
+
const text = response?.choices?.[0]?.message?.content;
|
|
537288
|
+
if (typeof text !== "string")
|
|
537289
|
+
return null;
|
|
537290
|
+
const cleaned = text.replace(/^["'`]+|["'`]+$/g, "").replace(/^(?:expanded prompt|prompt|output)\s*:\s*/i, "").trim();
|
|
537291
|
+
return cleaned.length > 0 ? cleaned : null;
|
|
537292
|
+
} catch {
|
|
537293
|
+
return null;
|
|
537294
|
+
}
|
|
537295
|
+
});
|
|
537099
537296
|
}
|
|
537100
537297
|
/** Register multiple tools */
|
|
537101
537298
|
registerTools(tools) {
|
|
@@ -600951,6 +601148,7 @@ var init_tool_policy = __esm({
|
|
|
600951
601148
|
"memory_read",
|
|
600952
601149
|
"memory_write",
|
|
600953
601150
|
"memory_search",
|
|
601151
|
+
"identity_memory",
|
|
600954
601152
|
"todo_read",
|
|
600955
601153
|
"todo_write",
|
|
600956
601154
|
"web_search",
|
|
@@ -600983,6 +601181,7 @@ var init_tool_policy = __esm({
|
|
|
600983
601181
|
"memory_read",
|
|
600984
601182
|
"memory_write",
|
|
600985
601183
|
"memory_search",
|
|
601184
|
+
"identity_memory",
|
|
600986
601185
|
"todo_read",
|
|
600987
601186
|
"todo_write",
|
|
600988
601187
|
"web_search",
|
|
@@ -601106,7 +601305,7 @@ function buildTelegramCreativeTools(repoRoot, chatId, backendUrl2, imageDefaults
|
|
|
601106
601305
|
}
|
|
601107
601306
|
function scopedTool(base3, root, mode) {
|
|
601108
601307
|
const rootAbs = resolve41(root);
|
|
601109
|
-
|
|
601308
|
+
const wrapper = {
|
|
601110
601309
|
name: base3.name,
|
|
601111
601310
|
description: `[PUBLIC TELEGRAM CREATIVE WORKSPACE: ${rootAbs}] ${base3.description} Paths are restricted to this workspace. This tool cannot access or modify files outside the workspace. ` + (mode === "edit" ? "It can only edit files already created in this workspace manifest. " : ""),
|
|
601112
601311
|
parameters: base3.parameters,
|
|
@@ -601215,6 +601414,11 @@ function scopedTool(base3, root, mode) {
|
|
|
601215
601414
|
return withTelegramAutoAttachmentNotice(result, recordedPaths.size);
|
|
601216
601415
|
}
|
|
601217
601416
|
};
|
|
601417
|
+
const baseSetExpander = base3.setPromptExpander;
|
|
601418
|
+
if (typeof baseSetExpander === "function") {
|
|
601419
|
+
wrapper.setPromptExpander = (expander) => baseSetExpander.call(base3, expander);
|
|
601420
|
+
}
|
|
601421
|
+
return wrapper;
|
|
601218
601422
|
}
|
|
601219
601423
|
function withTelegramAutoAttachmentNotice(result, artifactCount) {
|
|
601220
601424
|
if (!result.success || artifactCount <= 0) return result;
|
|
@@ -603559,6 +603763,20 @@ function summarizeTelegramMessageAttachments(msg) {
|
|
|
603559
603763
|
parts.push(`replied-to caption: ${truncateTelegramContextLine(msg.replyToMedia.caption, 180)}`);
|
|
603560
603764
|
}
|
|
603561
603765
|
}
|
|
603766
|
+
const gen = msg.replyContext?.generatedMediaPromptInfo;
|
|
603767
|
+
if (gen?.originalPrompt) {
|
|
603768
|
+
parts.push(`replied-to image source prompt: "${truncateTelegramContextLine(gen.originalPrompt, 400)}"`);
|
|
603769
|
+
if (gen.promptWasExpanded && gen.expandedPrompt && gen.expandedPrompt !== gen.originalPrompt) {
|
|
603770
|
+
parts.push(`replied-to image expanded prompt: "${truncateTelegramContextLine(gen.expandedPrompt, 400)}"`);
|
|
603771
|
+
}
|
|
603772
|
+
const meta = [
|
|
603773
|
+
gen.model ? `model=${gen.model}` : "",
|
|
603774
|
+
gen.backend ? `backend=${gen.backend}` : "",
|
|
603775
|
+
gen.width && gen.height ? `size=${gen.width}x${gen.height}` : "",
|
|
603776
|
+
gen.aspectRatio ? `aspect=${gen.aspectRatio}` : ""
|
|
603777
|
+
].filter(Boolean).join(", ");
|
|
603778
|
+
if (meta) parts.push(`replied-to image generation: ${meta}`);
|
|
603779
|
+
}
|
|
603562
603780
|
if (msg.poll) {
|
|
603563
603781
|
parts.push(`poll: ${truncateTelegramContextLine(msg.poll.question, 180)}`);
|
|
603564
603782
|
}
|
|
@@ -603567,6 +603785,32 @@ function summarizeTelegramMessageAttachments(msg) {
|
|
|
603567
603785
|
}
|
|
603568
603786
|
return parts.join("; ");
|
|
603569
603787
|
}
|
|
603788
|
+
function formatTelegramGeneratedImagePromptInfo(info, maxPromptLength = 900) {
|
|
603789
|
+
if (!info?.originalPrompt) return "";
|
|
603790
|
+
const lines = [
|
|
603791
|
+
`Generated image original prompt:
|
|
603792
|
+
${quoteTelegramContextText(info.originalPrompt, maxPromptLength)}`
|
|
603793
|
+
];
|
|
603794
|
+
if (info.promptWasExpanded && info.expandedPrompt && info.expandedPrompt.trim() !== info.originalPrompt.trim()) {
|
|
603795
|
+
lines.push(`Generated image expanded prompt actually sent to image model:
|
|
603796
|
+
${quoteTelegramContextText(info.expandedPrompt, maxPromptLength)}`);
|
|
603797
|
+
}
|
|
603798
|
+
const meta = [
|
|
603799
|
+
info.model ? `model=${info.model}` : "",
|
|
603800
|
+
info.backend ? `backend=${info.backend}` : "",
|
|
603801
|
+
info.width && info.height ? `size=${info.width}x${info.height}` : "",
|
|
603802
|
+
info.aspectRatio ? `aspect=${info.aspectRatio}` : "",
|
|
603803
|
+
info.seed !== void 0 && info.seed !== null ? `seed=${info.seed}` : "",
|
|
603804
|
+
info.createdAt ? `created_at=${info.createdAt}` : ""
|
|
603805
|
+
].filter(Boolean).join(", ");
|
|
603806
|
+
if (meta) lines.push(`Generated image metadata: ${meta}`);
|
|
603807
|
+
return lines.join("\n");
|
|
603808
|
+
}
|
|
603809
|
+
function quoteTelegramContextText(text, maxLength) {
|
|
603810
|
+
const clipped = text.length > maxLength ? `${text.slice(0, Math.max(0, maxLength - 60)).trimEnd()}
|
|
603811
|
+
[generated prompt truncated]` : text;
|
|
603812
|
+
return clipped.split(/\r?\n/).map((line) => `> ${line}`).join("\n");
|
|
603813
|
+
}
|
|
603570
603814
|
function inferTelegramToneTags(text) {
|
|
603571
603815
|
const lower = text.toLowerCase();
|
|
603572
603816
|
const tags = /* @__PURE__ */ new Set();
|
|
@@ -604425,7 +604669,7 @@ function renderTelegramSubAgentError(username, error) {
|
|
|
604425
604669
|
process.stdout.write(` ${c3.dim("⎿")} ${c3.red("✘")} @${username}: ${c3.dim(preview)}
|
|
604426
604670
|
`);
|
|
604427
604671
|
}
|
|
604428
|
-
var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TelegramBridge;
|
|
604672
|
+
var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TelegramBridge;
|
|
604429
604673
|
var init_telegram_bridge = __esm({
|
|
604430
604674
|
"packages/cli/src/tui/telegram-bridge.ts"() {
|
|
604431
604675
|
"use strict";
|
|
@@ -604511,7 +604755,7 @@ var init_telegram_bridge = __esm({
|
|
|
604511
604755
|
]);
|
|
604512
604756
|
DEFAULT_TELEGRAM_TOOL_GROUP_POLICY = {
|
|
604513
604757
|
read: true,
|
|
604514
|
-
message:
|
|
604758
|
+
message: true,
|
|
604515
604759
|
media: true,
|
|
604516
604760
|
janitorial: false,
|
|
604517
604761
|
reaction: false,
|
|
@@ -604571,7 +604815,7 @@ Although this is an admin, the group is PUBLIC — other people can see your res
|
|
|
604571
604815
|
|
|
604572
604816
|
RULES FOR GROUP CONTEXT:
|
|
604573
604817
|
1. NEVER share private information, API keys, file paths, or system internals
|
|
604574
|
-
2. You have limited tools: web search, memory, and media analysis only
|
|
604818
|
+
2. You have limited tools: scoped web search/fetch, scoped memory, scoped identity memory, and scoped media analysis only
|
|
604575
604819
|
3. Keep responses helpful and relevant to the conversation
|
|
604576
604820
|
4. Be concise — group chats should have shorter responses
|
|
604577
604821
|
5. Only respond if the message is directed at you or clearly relevant
|
|
@@ -604607,6 +604851,18 @@ PUBLIC TELEGRAM MEMORY SCOPE
|
|
|
604607
604851
|
This turn may use memory and conversation history for the current Telegram group/private chat scope only.
|
|
604608
604852
|
Users in a shared public group may ask questions about that shared group history and group memory, scoped by the current group id or by a user id/username inside that same group.
|
|
604609
604853
|
Private chats, admin DMs, other groups, local terminal sessions, and fragmented private contexts are not visible from this public group. Do not imply they exist and do not answer from them.
|
|
604854
|
+
`.trim();
|
|
604855
|
+
TELEGRAM_PUBLIC_VISION_STACK_CONTRACT = `
|
|
604856
|
+
PUBLIC TELEGRAM VISION / MEDIA STACK
|
|
604857
|
+
|
|
604858
|
+
Public Telegram runs have the full scoped media-analysis stack for media posted in this chat:
|
|
604859
|
+
- Use telegram_media_recent to find recent scoped media, then use path/media aliases 'reply' and 'latest' instead of exposing local paths to users.
|
|
604860
|
+
- Use ocr_image_advanced for complex textual imagery: screenshots, dense documents, forms, receipts, scans, diagrams with labels, low-contrast photos, or uneven lighting.
|
|
604861
|
+
- Use ocr for quick image text extraction, image_read for image metadata + OCR + multimodal image payload, and vision for captioning, visual QA, object detection, or pointing.
|
|
604862
|
+
- Use pdf_to_text for embedded-text PDFs and ocr_pdf for scanned PDFs.
|
|
604863
|
+
- Use video_understand and transcribe_file for video/audio media posted in this chat.
|
|
604864
|
+
- Use identity_memory for explicit user-provided identity assertions, staged next-image names, and "who is this?" recall from scoped media. Do not guess real identities from images.
|
|
604865
|
+
- These tools are current-chat scoped. Never inspect arbitrary local files, reveal local paths, or claim access to media outside this Telegram chat scope.
|
|
604610
604866
|
`.trim();
|
|
604611
604867
|
GROUP_REPLY_DISCRETION_PROMPT = `
|
|
604612
604868
|
REPLY DISCRETION: You are in a group chat. The live router has already filtered
|
|
@@ -605153,7 +605409,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
|
|
|
605153
605409
|
threadId: entry.messageThreadId,
|
|
605154
605410
|
sender: this.telegramReplySenderWithSelfFlag(sender),
|
|
605155
605411
|
text: entry.text,
|
|
605156
|
-
mediaSummary: entry.mediaSummary
|
|
605412
|
+
mediaSummary: entry.mediaSummary,
|
|
605413
|
+
generatedMediaPromptInfo: entry.generatedMediaPromptInfo
|
|
605157
605414
|
};
|
|
605158
605415
|
}
|
|
605159
605416
|
resolveTelegramReplyContext(sessionKey, msg) {
|
|
@@ -605174,7 +605431,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
|
|
|
605174
605431
|
...msg.replyContext.sender ?? {}
|
|
605175
605432
|
}),
|
|
605176
605433
|
text: msg.replyContext.text ?? localContext.text,
|
|
605177
|
-
mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary
|
|
605434
|
+
mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary,
|
|
605435
|
+
generatedMediaPromptInfo: msg.replyContext.generatedMediaPromptInfo ?? localContext.generatedMediaPromptInfo
|
|
605178
605436
|
};
|
|
605179
605437
|
} else if (msg.replyContext) {
|
|
605180
605438
|
reply = {
|
|
@@ -605228,6 +605486,8 @@ ${this.quoteTelegramContextBlock(reply.quote, 1e3)}` : "",
|
|
|
605228
605486
|
${this.quoteTelegramContextBlock(content, 2200)}` : "",
|
|
605229
605487
|
reply.mediaSummary ? `Replied-to media: ${reply.mediaSummary}` : "",
|
|
605230
605488
|
reply.media && !reply.mediaSummary ? `Replied-to media: ${reply.media.type}${reply.media.fileName ? ` ${reply.media.fileName}` : ""}${reply.media.mimeType ? ` ${reply.media.mimeType}` : ""}` : "",
|
|
605489
|
+
reply.generatedMediaPromptInfo ? `Replied-to generated image provenance:
|
|
605490
|
+
${formatTelegramGeneratedImagePromptInfo(reply.generatedMediaPromptInfo, 1400)}` : "",
|
|
605231
605491
|
msg.text ? `Current user message:
|
|
605232
605492
|
${this.quoteTelegramContextBlock(msg.text, 1e3)}` : "",
|
|
605233
605493
|
'Instruction: resolve pronouns, follow-up requests, and requests like "links", "repos", "instructions", "that", or "this" against the replied-to content before broader chat/workspace context.'
|
|
@@ -606179,8 +606439,9 @@ ${olderLines.join("\n")}`);
|
|
|
606179
606439
|
const replySender = entry.replyContext?.sender ? `/${telegramReplySenderLabel(entry.replyContext.sender)}` : "";
|
|
606180
606440
|
const reply = entry.replyToMessageId ? ` reply_to:${entry.replyToMessageId}${replySender}` : "";
|
|
606181
606441
|
const media = entry.mediaSummary ? ` [${entry.mediaSummary}]` : "";
|
|
606442
|
+
const generatedPrompt = entry.generatedMediaPromptInfo?.originalPrompt ? ` generated_image_prompt="${truncateTelegramContextLine(entry.generatedMediaPromptInfo.originalPrompt, 220)}"` : "";
|
|
606182
606443
|
const prefix = [when, `${speaker}${mode}${reply}${media}`].filter(Boolean).join(" ");
|
|
606183
|
-
return `${prefix}: ${truncateTelegramContextLine(entry.text)}`;
|
|
606444
|
+
return `${prefix}: ${truncateTelegramContextLine(entry.text)}${generatedPrompt}`;
|
|
606184
606445
|
});
|
|
606185
606446
|
sections.push(`### Recent Thread, Oldest To Newest
|
|
606186
606447
|
${lines.join("\n")}`);
|
|
@@ -606290,7 +606551,7 @@ ${lines.join("\n")}`);
|
|
|
606290
606551
|
`Route meanings:`,
|
|
606291
606552
|
`- chat: a short conversational answer can be produced without tools.`,
|
|
606292
606553
|
`- action: tools, workspace context, media processing, web lookup, delegation, or a multi-step agent loop may be needed.`,
|
|
606293
|
-
`Route discipline: greetings, acknowledgements, casual tone/style discussion, and simple conversational questions are chat. Use action only when the message asks you to inspect, create, change, send, remember, search, analyze media, name/enroll/identify a person/face/voice from media, or otherwise do tool-backed work.`,
|
|
606554
|
+
`Route discipline: greetings, acknowledgements, casual tone/style discussion, and simple conversational questions are chat. Use action only when the message asks you to inspect, create, change, send, remember, search, analyze media, extract text from images/screenshots/forms/scans, name/enroll/identify a person/face/voice from media, or otherwise do tool-backed work.`,
|
|
606294
606555
|
``,
|
|
606295
606556
|
`Reply discretion: infer from the live thread, speaker relationships, direct platform signals, replies, tone, current message, and any private channel daydream artifact supplied in context. Do not use static keyword rules.`,
|
|
606296
606557
|
`Private chats: should_reply is normally true.`,
|
|
@@ -606568,6 +606829,8 @@ ${TELEGRAM_PUBLIC_SOUL_PROFILE}
|
|
|
606568
606829
|
|
|
606569
606830
|
${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
|
|
606570
606831
|
|
|
606832
|
+
${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
|
|
606833
|
+
|
|
606571
606834
|
${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
|
|
606572
606835
|
} else {
|
|
606573
606836
|
sections.push(`## Telegram Safety Contract
|
|
@@ -606578,6 +606841,8 @@ ${TELEGRAM_PUBLIC_SOUL_PROFILE}
|
|
|
606578
606841
|
|
|
606579
606842
|
${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
|
|
606580
606843
|
|
|
606844
|
+
${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
|
|
606845
|
+
|
|
606581
606846
|
${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
|
|
606582
606847
|
}
|
|
606583
606848
|
return { sessionKey, sessionId, context: sections.join("\n\n") };
|
|
@@ -607368,11 +607633,15 @@ Join: ${newUrl}`);
|
|
|
607368
607633
|
|
|
607369
607634
|
${TELEGRAM_PUBLIC_SOUL_PROFILE}
|
|
607370
607635
|
|
|
607371
|
-
${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
|
|
607636
|
+
${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
|
|
607637
|
+
|
|
607638
|
+
${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}` : `${TELEGRAM_SAFETY_PROMPT}
|
|
607372
607639
|
|
|
607373
607640
|
${TELEGRAM_PUBLIC_SOUL_PROFILE}
|
|
607374
607641
|
|
|
607375
|
-
${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
|
|
607642
|
+
${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
|
|
607643
|
+
|
|
607644
|
+
${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}`;
|
|
607376
607645
|
const groupHint = isGroup ? `Telegram group: ${msg.chatTitle || "unknown"}. The live router selected this turn as reply-worthy; keep the reply short and relevant. Never output a skip decision, no_reply marker, memory-stage note, or completion status.` : "Telegram private chat.";
|
|
607377
607646
|
const runtime = buildTelegramRuntimeContext(/* @__PURE__ */ new Date());
|
|
607378
607647
|
const messages2 = [
|
|
@@ -607636,6 +607905,7 @@ ${currentTelegramPrompt}`;
|
|
|
607636
607905
|
"You have access to isolated per-chat memory (memory_write, memory_read, memory_search) scoped to this conversation.",
|
|
607637
607906
|
"memory_search may use scope=group/current_chat for this group or scope=user with user_id/username for a participant in this same group. Other groups, admin chats, and private DMs are not accessible here.",
|
|
607638
607907
|
"You can remember facts about users and retrieve them later. You also have web_search and web_fetch to look up information.",
|
|
607908
|
+
"You have the full scoped Telegram media-analysis stack by default: telegram_media_recent, image_read, ocr, ocr_image_advanced, vision, pdf_to_text, ocr_pdf, transcribe_file, video_understand, audio_analyze, and identity_memory. For complex textual imagery, screenshots, forms, scans, or dense labels, prefer ocr_image_advanced after resolving media with path='reply' or path='latest'.",
|
|
607639
607909
|
formatIdentityMemoryContext(chatLabel || "Telegram private chat"),
|
|
607640
607910
|
reminderToolContract,
|
|
607641
607911
|
"If the user asks you to create an image, audio file, or document artifact, create it with the scoped creative tools. Freshly generated artifacts are recorded and automatically attached to this Telegram chat when the turn completes, so do not call telegram_send_file for those same artifacts unless the user asked for a specific caption, existing/unrecorded file, or non-default target.",
|
|
@@ -607970,6 +608240,8 @@ ${lines.join("\n\n")}` };
|
|
|
607970
608240
|
|
|
607971
608241
|
${TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT}
|
|
607972
608242
|
|
|
608243
|
+
${TELEGRAM_PUBLIC_VISION_STACK_CONTRACT}
|
|
608244
|
+
|
|
607973
608245
|
${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}
|
|
607974
608246
|
|
|
607975
608247
|
${conversation}`
|
|
@@ -609053,7 +609325,7 @@ Scoped workspace: ${scopedRoot}`,
|
|
|
609053
609325
|
const bridge = this;
|
|
609054
609326
|
return {
|
|
609055
609327
|
name: "telegram_media_recent",
|
|
609056
|
-
description: "List recent media files available in this Telegram chat scope, including safe aliases for image_read, ocr, vision, transcribe_file, pdf_to_text, video_understand, and audio_analyze.",
|
|
609328
|
+
description: "List recent media files available in this Telegram chat scope, including safe aliases for image_read, ocr, ocr_image_advanced, vision, identity_memory, transcribe_file, pdf_to_text, video_understand, and audio_analyze.",
|
|
609057
609329
|
parameters: {
|
|
609058
609330
|
type: "object",
|
|
609059
609331
|
properties: {
|
|
@@ -609208,7 +609480,8 @@ Scoped workspace: ${scopedRoot}`,
|
|
|
609208
609480
|
const messageId = await bridge.sendTelegramFileToChat(target.chatId, file.path, {
|
|
609209
609481
|
kind,
|
|
609210
609482
|
caption: caption || void 0,
|
|
609211
|
-
replyToMessageId
|
|
609483
|
+
replyToMessageId,
|
|
609484
|
+
sourcePromptPath: ledgerPath
|
|
609212
609485
|
});
|
|
609213
609486
|
bridge.rememberTelegramFileSendForMessage(currentMsg, sendFingerprint);
|
|
609214
609487
|
bridge.rememberTelegramDeliveredArtifactForMessage(currentMsg, ledgerPath);
|
|
@@ -609440,7 +609713,7 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
|
|
|
609440
609713
|
description = `[${sourceLabel}image received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
|
|
609441
609714
|
${visionContext}]`;
|
|
609442
609715
|
} else {
|
|
609443
|
-
description = `[${sourceLabel}image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read, ocr,
|
|
609716
|
+
description = `[${sourceLabel}image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read, ocr, ocr_image_advanced, vision, or identity_memory tools to analyze it.]`;
|
|
609444
609717
|
}
|
|
609445
609718
|
const ingestPayload = this.telegramMemoryIngestPayload(msg, media, localPath, source, cacheEntry.extractedContent);
|
|
609446
609719
|
let visualIdentityContext = "";
|
|
@@ -609706,10 +609979,69 @@ Content-Type: ${contentType}\r
|
|
|
609706
609979
|
const result = await res.json();
|
|
609707
609980
|
if (result.ok) {
|
|
609708
609981
|
this.state.messagesSent++;
|
|
609709
|
-
|
|
609982
|
+
const outboundMessageId = result.result?.message_id ?? null;
|
|
609983
|
+
if (outboundMessageId && media.kind === "image" && media.source === "file") {
|
|
609984
|
+
this.recordOutboundGeneratedImagePrompt(chatId, outboundMessageId, options2.sourcePromptPath ?? media.value, caption);
|
|
609985
|
+
}
|
|
609986
|
+
return outboundMessageId;
|
|
609710
609987
|
}
|
|
609711
609988
|
throw new Error(String(result.description || `Telegram ${method} failed`));
|
|
609712
609989
|
}
|
|
609990
|
+
/**
|
|
609991
|
+
* After the bot sends an outbound photo, look for a `<image>.json`
|
|
609992
|
+
* sidecar emitted by ImageGenerateTool and stash the source prompt info
|
|
609993
|
+
* on a chatHistory entry keyed by the outbound message_id. When the user
|
|
609994
|
+
* later replies to that image, resolveTelegramReplyContext finds the
|
|
609995
|
+
* entry and exposes the original prompt to the model.
|
|
609996
|
+
*/
|
|
609997
|
+
recordOutboundGeneratedImagePrompt(chatId, messageId, imagePath, caption) {
|
|
609998
|
+
const info = this.readGeneratedImagePromptInfo(imagePath);
|
|
609999
|
+
if (!info) return;
|
|
610000
|
+
const sessionKey = `chat:${String(chatId)}`;
|
|
610001
|
+
const captionText = (caption ?? "").trim();
|
|
610002
|
+
const summary = `photo (generated, model=${info.model ?? "?"}, ${info.width ?? "?"}x${info.height ?? "?"})`;
|
|
610003
|
+
const entry = {
|
|
610004
|
+
role: "assistant",
|
|
610005
|
+
text: captionText,
|
|
610006
|
+
mode: "action",
|
|
610007
|
+
chatId,
|
|
610008
|
+
speaker: this.state.botUsername ? `@${this.state.botUsername}` : "Assistant",
|
|
610009
|
+
messageId,
|
|
610010
|
+
mediaSummary: summary,
|
|
610011
|
+
generatedMediaPromptInfo: info
|
|
610012
|
+
};
|
|
610013
|
+
try {
|
|
610014
|
+
this.recordChatHistory(sessionKey, entry);
|
|
610015
|
+
this.saveTelegramConversationState(sessionKey);
|
|
610016
|
+
} catch {
|
|
610017
|
+
}
|
|
610018
|
+
}
|
|
610019
|
+
readGeneratedImagePromptInfo(imagePath) {
|
|
610020
|
+
const sidecarPath2 = `${imagePath}.json`;
|
|
610021
|
+
if (!existsSync108(sidecarPath2)) return null;
|
|
610022
|
+
try {
|
|
610023
|
+
const raw = readFileSync88(sidecarPath2, "utf8");
|
|
610024
|
+
const parsed = JSON.parse(raw);
|
|
610025
|
+
if (!parsed || typeof parsed !== "object" || typeof parsed["original_prompt"] !== "string") {
|
|
610026
|
+
return null;
|
|
610027
|
+
}
|
|
610028
|
+
return {
|
|
610029
|
+
imagePath,
|
|
610030
|
+
originalPrompt: String(parsed["original_prompt"]),
|
|
610031
|
+
expandedPrompt: typeof parsed["expanded_prompt"] === "string" ? String(parsed["expanded_prompt"]) : void 0,
|
|
610032
|
+
promptWasExpanded: parsed["prompt_was_expanded"] === true,
|
|
610033
|
+
model: typeof parsed["model"] === "string" ? String(parsed["model"]) : void 0,
|
|
610034
|
+
backend: typeof parsed["backend"] === "string" ? String(parsed["backend"]) : void 0,
|
|
610035
|
+
width: typeof parsed["width"] === "number" ? parsed["width"] : void 0,
|
|
610036
|
+
height: typeof parsed["height"] === "number" ? parsed["height"] : void 0,
|
|
610037
|
+
aspectRatio: typeof parsed["aspect_ratio"] === "string" || parsed["aspect_ratio"] === null ? parsed["aspect_ratio"] : void 0,
|
|
610038
|
+
seed: typeof parsed["seed"] === "number" ? parsed["seed"] : null,
|
|
610039
|
+
createdAt: typeof parsed["created_at"] === "string" ? String(parsed["created_at"]) : void 0
|
|
610040
|
+
};
|
|
610041
|
+
} catch {
|
|
610042
|
+
return null;
|
|
610043
|
+
}
|
|
610044
|
+
}
|
|
609713
610045
|
async sendGeneratedArtifactsFromSubAgent(msg, subAgent, finalText, includeMentioned) {
|
|
609714
610046
|
const root = subAgent.creativeWorkspaceRoot;
|
|
609715
610047
|
if (!root) return;
|
|
@@ -609741,6 +610073,8 @@ Content-Type: ${contentType}\r
|
|
|
609741
610073
|
kind,
|
|
609742
610074
|
source: "file",
|
|
609743
610075
|
audioAsVoice: kind === "voice"
|
|
610076
|
+
}, {
|
|
610077
|
+
sourcePromptPath: abs
|
|
609744
610078
|
}).then((messageId) => {
|
|
609745
610079
|
if (messageId !== null) {
|
|
609746
610080
|
this.rememberTelegramDeliveredArtifact(subAgent, abs);
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.47",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.47",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED