omnius 1.0.45 → 1.0.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +299 -25
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -251178,10 +251178,6 @@ function parseStructuredProgress(text) {
|
|
|
251178
251178
|
return null;
|
|
251179
251179
|
}
|
|
251180
251180
|
}
|
|
251181
|
-
function numberArg(value2, fallback) {
|
|
251182
|
-
const n2 = Number(value2);
|
|
251183
|
-
return Number.isFinite(n2) && n2 > 0 ? n2 : fallback;
|
|
251184
|
-
}
|
|
251185
251181
|
function optionalNumberArg(value2) {
|
|
251186
251182
|
if (value2 === void 0 || value2 === null || value2 === "")
|
|
251187
251183
|
return void 0;
|
|
@@ -251199,6 +251195,26 @@ function booleanArg(value2, fallback) {
|
|
|
251199
251195
|
}
|
|
251200
251196
|
return fallback;
|
|
251201
251197
|
}
|
|
251198
|
+
function roundToMultipleOf8(value2) {
|
|
251199
|
+
if (!Number.isFinite(value2) || value2 <= 0)
|
|
251200
|
+
return 1024;
|
|
251201
|
+
const rounded = Math.round(value2 / 8) * 8;
|
|
251202
|
+
return Math.max(64, rounded);
|
|
251203
|
+
}
|
|
251204
|
+
function resolveAspectRatioToSize(ratio, presetWidth, presetHeight) {
|
|
251205
|
+
const match = ratio.match(/^\s*(\d+(?:\.\d+)?)\s*[:xX/×]\s*(\d+(?:\.\d+)?)\s*$/);
|
|
251206
|
+
if (!match)
|
|
251207
|
+
return null;
|
|
251208
|
+
const w = Number(match[1]);
|
|
251209
|
+
const h = Number(match[2]);
|
|
251210
|
+
if (!Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0)
|
|
251211
|
+
return null;
|
|
251212
|
+
const longSide = Math.max(presetWidth, presetHeight);
|
|
251213
|
+
if (w >= h) {
|
|
251214
|
+
return { width: longSide, height: Math.round(longSide * h / w) };
|
|
251215
|
+
}
|
|
251216
|
+
return { width: Math.round(longSide * w / h), height: longSide };
|
|
251217
|
+
}
|
|
251202
251218
|
function generationFallbackEnabled(args) {
|
|
251203
251219
|
if (booleanArg(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
|
|
251204
251220
|
return false;
|
|
@@ -252151,7 +252167,7 @@ if __name__ == "__main__":
|
|
|
252151
252167
|
`;
|
|
252152
252168
|
ImageGenerateTool = class {
|
|
252153
252169
|
name = "generate_image";
|
|
252154
|
-
description =
|
|
252170
|
+
description = `Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Aspect ratio and resolution are model-controllable: pass aspect_ratio (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2") to derive width/height around the selected model's preferred base resolution, or pass explicit width/height (in pixels, both rounded to a multiple of 8) when a specific size is required. A preliminary prompt-expansion stage rewrites the user's prompt into a richer, model-tuned version before generation when an LLM expander is wired; pass expand_prompt=false to skip. Saves a PNG under .omnius/images and returns the file path.`;
|
|
252155
252171
|
parameters = {
|
|
252156
252172
|
type: "object",
|
|
252157
252173
|
properties: {
|
|
@@ -252168,13 +252184,17 @@ if __name__ == "__main__":
|
|
|
252168
252184
|
enum: ["auto", "ollama", "diffusers", "sdcpp"],
|
|
252169
252185
|
description: "Generation backend. Defaults to auto."
|
|
252170
252186
|
},
|
|
252187
|
+
aspect_ratio: {
|
|
252188
|
+
type: "string",
|
|
252189
|
+
description: `Desired aspect ratio expressed as W:H (e.g. "16:9", "9:16", "4:3", "3:4", "1:1", "21:9", "2:3", "3:2"). When provided, width/height are derived from the selected model's preferred base resolution so the longer side stays in that model's sweet spot. Ignored if explicit width and height are also provided.`
|
|
252190
|
+
},
|
|
252171
252191
|
width: {
|
|
252172
252192
|
type: "number",
|
|
252173
|
-
description: "Image width in pixels"
|
|
252193
|
+
description: "Image width in pixels. Optional — defaults to the selected model's preset width, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
|
|
252174
252194
|
},
|
|
252175
252195
|
height: {
|
|
252176
252196
|
type: "number",
|
|
252177
|
-
description: "Image height in pixels"
|
|
252197
|
+
description: "Image height in pixels. Optional — defaults to the selected model's preset height, or is derived from aspect_ratio when present. Rounded to a multiple of 8."
|
|
252178
252198
|
},
|
|
252179
252199
|
steps: {
|
|
252180
252200
|
type: "number",
|
|
@@ -252204,6 +252224,10 @@ if __name__ == "__main__":
|
|
|
252204
252224
|
strict_model: {
|
|
252205
252225
|
type: "boolean",
|
|
252206
252226
|
description: "When true, use only the requested model/backend and do not fall back. Defaults false."
|
|
252227
|
+
},
|
|
252228
|
+
expand_prompt: {
|
|
252229
|
+
type: "boolean",
|
|
252230
|
+
description: "When true (default), a preliminary LLM stage rewrites the prompt into a richer, model-tuned version before generation. Set false to send the raw prompt unchanged."
|
|
252207
252231
|
}
|
|
252208
252232
|
},
|
|
252209
252233
|
required: ["prompt"]
|
|
@@ -252216,15 +252240,23 @@ if __name__ == "__main__":
|
|
|
252216
252240
|
lastProgressAt = 0;
|
|
252217
252241
|
defaultModel;
|
|
252218
252242
|
defaultBackend;
|
|
252243
|
+
promptExpander = null;
|
|
252219
252244
|
constructor(cwd4, ollamaUrl = "http://localhost:11434", defaults3 = {}) {
|
|
252220
252245
|
this.cwd = cwd4;
|
|
252221
252246
|
this.ollamaUrl = ollamaUrl.replace(/\/v1\/?$/, "").replace(/\/$/, "");
|
|
252222
252247
|
this.defaultModel = defaults3.model;
|
|
252223
252248
|
this.defaultBackend = defaults3.backend;
|
|
252249
|
+
this.promptExpander = defaults3.promptExpander ?? null;
|
|
252224
252250
|
}
|
|
252225
252251
|
setDefaults(defaults3) {
|
|
252226
252252
|
this.defaultModel = defaults3.model;
|
|
252227
252253
|
this.defaultBackend = defaults3.backend;
|
|
252254
|
+
if (defaults3.promptExpander !== void 0) {
|
|
252255
|
+
this.promptExpander = defaults3.promptExpander;
|
|
252256
|
+
}
|
|
252257
|
+
}
|
|
252258
|
+
setPromptExpander(expander) {
|
|
252259
|
+
this.promptExpander = expander;
|
|
252228
252260
|
}
|
|
252229
252261
|
setProgressCallback(handler) {
|
|
252230
252262
|
this.progressHandler = handler;
|
|
@@ -252321,19 +252353,39 @@ if __name__ == "__main__":
|
|
|
252321
252353
|
}
|
|
252322
252354
|
async generateCandidateLadder(args) {
|
|
252323
252355
|
const failed = [];
|
|
252356
|
+
const expansionEnabled = args.args["expand_prompt"] === false ? false : true;
|
|
252357
|
+
const aspectRatio = typeof args.args["aspect_ratio"] === "string" ? String(args.args["aspect_ratio"]).trim() : "";
|
|
252324
252358
|
for (let index = 0; index < args.candidates.length; index++) {
|
|
252325
252359
|
const candidate = args.candidates[index];
|
|
252326
|
-
const
|
|
252327
|
-
const
|
|
252360
|
+
const presetW = candidate.preset?.width ?? 1024;
|
|
252361
|
+
const presetH = candidate.preset?.height ?? 1024;
|
|
252362
|
+
const explicitWidth = optionalNumberArg(args.args["width"]);
|
|
252363
|
+
const explicitHeight = optionalNumberArg(args.args["height"]);
|
|
252364
|
+
const derived = (explicitWidth === void 0 || explicitHeight === void 0) && aspectRatio ? resolveAspectRatioToSize(aspectRatio, presetW, presetH) : null;
|
|
252365
|
+
const width = roundToMultipleOf8(explicitWidth ?? derived?.width ?? presetW);
|
|
252366
|
+
const height = roundToMultipleOf8(explicitHeight ?? derived?.height ?? presetH);
|
|
252328
252367
|
const steps = optionalNumberArg(args.args["steps"]) ?? candidate.preset?.steps;
|
|
252329
252368
|
const guidance = optionalNumberArg(args.args["guidance"]) ?? candidate.preset?.guidance;
|
|
252330
252369
|
this.emitProgress({
|
|
252331
252370
|
stage: "setup",
|
|
252332
252371
|
message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
252333
252372
|
});
|
|
252334
|
-
const
|
|
252335
|
-
|
|
252336
|
-
|
|
252373
|
+
const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
|
|
252374
|
+
const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
|
|
252375
|
+
if (result.success) {
|
|
252376
|
+
await this.writeImageSidecar(result, {
|
|
252377
|
+
originalPrompt: args.prompt,
|
|
252378
|
+
expandedPrompt: promptForCandidate,
|
|
252379
|
+
model: candidate.model,
|
|
252380
|
+
backend: candidate.backend,
|
|
252381
|
+
width,
|
|
252382
|
+
height,
|
|
252383
|
+
aspectRatio: aspectRatio || null,
|
|
252384
|
+
seed: args.seed
|
|
252385
|
+
}).catch(() => {
|
|
252386
|
+
});
|
|
252387
|
+
return annotateImageFallbackSuccess(this.annotateResultWithSourcePrompt(result, args.prompt, promptForCandidate), failed, candidate);
|
|
252388
|
+
}
|
|
252337
252389
|
failed.push({ candidate, reason: summarizeToolResult(result) });
|
|
252338
252390
|
if (index < args.candidates.length - 1) {
|
|
252339
252391
|
this.emitProgress({
|
|
@@ -252350,6 +252402,95 @@ if __name__ == "__main__":
|
|
|
252350
252402
|
durationMs: performance.now() - args.start
|
|
252351
252403
|
};
|
|
252352
252404
|
}
|
|
252405
|
+
/**
|
|
252406
|
+
* Persist a sidecar JSON next to a generated image capturing the
|
|
252407
|
+
* original (user-typed) prompt, the expanded prompt actually sent to the
|
|
252408
|
+
* model, model identity, resolution, and any aspect-ratio request. The
|
|
252409
|
+
* Telegram bridge reads this when the user replies to a generated image
|
|
252410
|
+
* so the model can answer "what prompt made this?" or modify the prompt
|
|
252411
|
+
* for a follow-up generation without losing the original intent.
|
|
252412
|
+
*/
|
|
252413
|
+
async writeImageSidecar(result, meta) {
|
|
252414
|
+
const imagePath = this.extractImagePathFromResult(result);
|
|
252415
|
+
if (!imagePath)
|
|
252416
|
+
return;
|
|
252417
|
+
const sidecarPath2 = `${imagePath}.json`;
|
|
252418
|
+
const payload = {
|
|
252419
|
+
version: 1,
|
|
252420
|
+
kind: "image-generation",
|
|
252421
|
+
image_path: imagePath,
|
|
252422
|
+
original_prompt: meta.originalPrompt,
|
|
252423
|
+
expanded_prompt: meta.expandedPrompt,
|
|
252424
|
+
prompt_was_expanded: meta.originalPrompt.trim() !== meta.expandedPrompt.trim(),
|
|
252425
|
+
model: meta.model,
|
|
252426
|
+
backend: meta.backend,
|
|
252427
|
+
width: meta.width,
|
|
252428
|
+
height: meta.height,
|
|
252429
|
+
aspect_ratio: meta.aspectRatio,
|
|
252430
|
+
seed: meta.seed ?? null,
|
|
252431
|
+
created_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
252432
|
+
};
|
|
252433
|
+
await writeFile17(sidecarPath2, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
|
252434
|
+
}
|
|
252435
|
+
extractImagePathFromResult(result) {
|
|
252436
|
+
const mutated = result.mutatedFiles;
|
|
252437
|
+
if (Array.isArray(mutated) && mutated.length > 0) {
|
|
252438
|
+
const first2 = mutated[0];
|
|
252439
|
+
if (typeof first2 === "string" && first2.trim())
|
|
252440
|
+
return first2;
|
|
252441
|
+
}
|
|
252442
|
+
const match = result.output.match(/Image generated:\s*([^\n]+)/);
|
|
252443
|
+
if (match && match[1])
|
|
252444
|
+
return match[1].trim();
|
|
252445
|
+
return null;
|
|
252446
|
+
}
|
|
252447
|
+
/**
|
|
252448
|
+
* Add the original user prompt to the result output when prompt
|
|
252449
|
+
* expansion produced a different string. This gives downstream
|
|
252450
|
+
* consumers (Telegram reply context, TUI display, memory) access to
|
|
252451
|
+
* both the user's intent and the model-tuned prompt actually rendered.
|
|
252452
|
+
*/
|
|
252453
|
+
annotateResultWithSourcePrompt(result, originalPrompt, expandedPrompt) {
|
|
252454
|
+
if (originalPrompt.trim() === expandedPrompt.trim())
|
|
252455
|
+
return result;
|
|
252456
|
+
const annotation = ` Original prompt: "${this.truncatePromptForOutput(originalPrompt)}"`;
|
|
252457
|
+
const llmAnnotation = `Original user prompt: ${originalPrompt}`;
|
|
252458
|
+
const output = result.output ? `${result.output}
|
|
252459
|
+
${annotation}` : annotation;
|
|
252460
|
+
const llmContent = typeof result.llmContent === "string" && result.llmContent ? `${result.llmContent}
|
|
252461
|
+
${llmAnnotation}` : result.llmContent;
|
|
252462
|
+
return { ...result, output, llmContent };
|
|
252463
|
+
}
|
|
252464
|
+
truncatePromptForOutput(prompt) {
|
|
252465
|
+
return prompt.length > 200 ? prompt.slice(0, 197) + "..." : prompt;
|
|
252466
|
+
}
|
|
252467
|
+
async expandPromptForCandidate(originalPrompt, candidate, candidateIndex, candidateCount) {
|
|
252468
|
+
if (!this.promptExpander)
|
|
252469
|
+
return originalPrompt;
|
|
252470
|
+
try {
|
|
252471
|
+
this.emitProgress({
|
|
252472
|
+
stage: "setup",
|
|
252473
|
+
message: `Expanding prompt for ${candidate.model}`
|
|
252474
|
+
});
|
|
252475
|
+
const expanded = await this.promptExpander({
|
|
252476
|
+
model: candidate.model,
|
|
252477
|
+
backend: candidate.backend,
|
|
252478
|
+
originalPrompt,
|
|
252479
|
+
candidateIndex,
|
|
252480
|
+
candidateCount
|
|
252481
|
+
});
|
|
252482
|
+
const trimmed = typeof expanded === "string" ? expanded.trim() : "";
|
|
252483
|
+
if (!trimmed)
|
|
252484
|
+
return originalPrompt;
|
|
252485
|
+
this.emitProgress({
|
|
252486
|
+
stage: "setup",
|
|
252487
|
+
message: `Expanded prompt (${trimmed.length} chars) for ${candidate.model}`
|
|
252488
|
+
});
|
|
252489
|
+
return trimmed;
|
|
252490
|
+
} catch {
|
|
252491
|
+
return originalPrompt;
|
|
252492
|
+
}
|
|
252493
|
+
}
|
|
252353
252494
|
async prewarmOllama(args) {
|
|
252354
252495
|
const model = args.model || DEFAULT_OLLAMA_IMAGE_MODEL;
|
|
252355
252496
|
if (await this.ollamaHasModel(model)) {
|
|
@@ -253248,7 +253389,7 @@ async function ensureAudioRunner(repoRoot, backend) {
|
|
|
253248
253389
|
function audioOutputPath(repoRoot) {
|
|
253249
253390
|
return join37(audioOutputDir(repoRoot), `audio-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.wav`);
|
|
253250
253391
|
}
|
|
253251
|
-
function
|
|
253392
|
+
function numberArg(value2, fallback) {
|
|
253252
253393
|
if (typeof value2 === "number" && Number.isFinite(value2))
|
|
253253
253394
|
return value2;
|
|
253254
253395
|
if (typeof value2 === "string" && value2.trim()) {
|
|
@@ -254457,7 +254598,7 @@ if __name__ == "__main__":
|
|
|
254457
254598
|
const failed = [];
|
|
254458
254599
|
for (let index = 0; index < args.candidates.length; index++) {
|
|
254459
254600
|
const candidate = args.candidates[index];
|
|
254460
|
-
const duration =
|
|
254601
|
+
const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
|
|
254461
254602
|
this.emitProgress({
|
|
254462
254603
|
stage: "setup",
|
|
254463
254604
|
message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
@@ -254492,7 +254633,7 @@ if __name__ == "__main__":
|
|
|
254492
254633
|
const failed = [];
|
|
254493
254634
|
for (let index = 0; index < args.candidates.length; index++) {
|
|
254494
254635
|
const candidate = args.candidates[index];
|
|
254495
|
-
const duration =
|
|
254636
|
+
const duration = numberArg(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
|
|
254496
254637
|
const steps = optionalNumberArg2(args.args["steps"]) ?? candidate.preset?.defaultSteps;
|
|
254497
254638
|
this.emitProgress({
|
|
254498
254639
|
stage: "setup",
|
|
@@ -509592,7 +509733,7 @@ function boolArg(value2, fallback) {
|
|
|
509592
509733
|
}
|
|
509593
509734
|
return fallback;
|
|
509594
509735
|
}
|
|
509595
|
-
function
|
|
509736
|
+
function numberArg2(value2, fallback) {
|
|
509596
509737
|
if (typeof value2 === "number" && Number.isFinite(value2))
|
|
509597
509738
|
return value2;
|
|
509598
509739
|
if (typeof value2 === "string" && value2.trim()) {
|
|
@@ -510317,7 +510458,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
510317
510458
|
const cloneRef = cloneRefForSynthesis(args);
|
|
510318
510459
|
if (!cloneRef)
|
|
510319
510460
|
throw new Error(`No LuxTTS clone source found. Provide source_audio=<voice clip> or clone_ref=<registered clip>.`);
|
|
510320
|
-
const speed =
|
|
510461
|
+
const speed = numberArg2(args["speed"], 1);
|
|
510321
510462
|
ensureLuxttsInstalled();
|
|
510322
510463
|
const daemonReady = await ensureLuxttsDaemon();
|
|
510323
510464
|
if (daemonReady) {
|
|
@@ -510351,8 +510492,8 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
510351
510492
|
const venvPy = ensureSupertonicInstalled();
|
|
510352
510493
|
const voice = typeof args["voice"] === "string" ? args["voice"] : "M4";
|
|
510353
510494
|
const lang = typeof args["lang"] === "string" ? args["lang"] : "en";
|
|
510354
|
-
const speed =
|
|
510355
|
-
const totalStep = Math.round(
|
|
510495
|
+
const speed = numberArg2(args["speed"], 1.05);
|
|
510496
|
+
const totalStep = Math.round(numberArg2(args["total_step"], 8));
|
|
510356
510497
|
const stdout = execFileSync4(venvPy, [supertonicInferScript()], {
|
|
510357
510498
|
input: JSON.stringify({ text, output_path: outputPath2, voice_name: voice, lang, speed, total_step: totalStep }),
|
|
510358
510499
|
encoding: "utf8",
|
|
@@ -510404,7 +510545,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
510404
510545
|
if (!hasCommand3("espeak-ng"))
|
|
510405
510546
|
throw new Error("Local fallback TTS command not found.");
|
|
510406
510547
|
const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
|
|
510407
|
-
const speed = Math.round(
|
|
510548
|
+
const speed = Math.round(numberArg2(args["speed"], 160));
|
|
510408
510549
|
execFileSync4("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
|
|
510409
510550
|
stdio: "pipe",
|
|
510410
510551
|
timeout: 6e4
|
|
@@ -537096,6 +537237,62 @@ ${blob}
|
|
|
537096
537237
|
/** Register a tool for the agent to use */
|
|
537097
537238
|
registerTool(tool) {
|
|
537098
537239
|
this.tools.set(tool.name, tool);
|
|
537240
|
+
if (tool.name === "generate_image") {
|
|
537241
|
+
this.maybeInstallImagePromptExpander(tool);
|
|
537242
|
+
}
|
|
537243
|
+
}
|
|
537244
|
+
/**
|
|
537245
|
+
* Install an LLM-backed prompt expander on the registered generate_image
|
|
537246
|
+
* tool. The expander runs a preliminary stage that rewrites the user's
|
|
537247
|
+
* raw prompt into a richer, model-tuned version before the diffusion
|
|
537248
|
+
* call. The instruction is intentionally generic — the LLM uses its own
|
|
537249
|
+
* knowledge of the target model's prompt conventions to shape syntax,
|
|
537250
|
+
* length, and detail style, rather than relying on hard-coded templates.
|
|
537251
|
+
*/
|
|
537252
|
+
maybeInstallImagePromptExpander(tool) {
|
|
537253
|
+
const setExpander = tool.setPromptExpander;
|
|
537254
|
+
if (typeof setExpander !== "function")
|
|
537255
|
+
return;
|
|
537256
|
+
if (process.env["OMNIUS_IMAGE_PROMPT_EXPAND"] === "0") {
|
|
537257
|
+
setExpander.call(tool, null);
|
|
537258
|
+
return;
|
|
537259
|
+
}
|
|
537260
|
+
const backend = this.backend;
|
|
537261
|
+
if (!backend || typeof backend.chatCompletion !== "function")
|
|
537262
|
+
return;
|
|
537263
|
+
setExpander.call(tool, async (ctx3) => {
|
|
537264
|
+
const userPrompt = (ctx3.originalPrompt ?? "").trim();
|
|
537265
|
+
if (!userPrompt)
|
|
537266
|
+
return null;
|
|
537267
|
+
const system = "You are a prompt-engineering stage that rewrites a user's short image request into a richer, more detailed image-generation prompt. The expanded prompt should match the prompt conventions and tokenizer preferences of the specific image model named in the request — use your own knowledge of that model's training distribution to choose length, syntax, weighting markers, ordering, and descriptor density. Do not invent new subject matter or contradict the user's intent. Output only the expanded prompt: no preamble, no quotes, no labels.";
|
|
537268
|
+
const user = `Target image model: ${ctx3.model}
|
|
537269
|
+
Backend: ${ctx3.backend}
|
|
537270
|
+
Candidate position: ${ctx3.candidateIndex + 1} of ${ctx3.candidateCount} (fallback ladder)
|
|
537271
|
+
|
|
537272
|
+
User prompt to expand:
|
|
537273
|
+
${userPrompt}
|
|
537274
|
+
|
|
537275
|
+
Rewrite it now for ${ctx3.model}.`;
|
|
537276
|
+
try {
|
|
537277
|
+
const response = await backend.chatCompletion({
|
|
537278
|
+
messages: [
|
|
537279
|
+
{ role: "system", content: system },
|
|
537280
|
+
{ role: "user", content: user }
|
|
537281
|
+
],
|
|
537282
|
+
tools: [],
|
|
537283
|
+
temperature: 0.4,
|
|
537284
|
+
maxTokens: 600,
|
|
537285
|
+
timeoutMs: 3e4
|
|
537286
|
+
});
|
|
537287
|
+
const text = response?.choices?.[0]?.message?.content;
|
|
537288
|
+
if (typeof text !== "string")
|
|
537289
|
+
return null;
|
|
537290
|
+
const cleaned = text.replace(/^["'`]+|["'`]+$/g, "").replace(/^(?:expanded prompt|prompt|output)\s*:\s*/i, "").trim();
|
|
537291
|
+
return cleaned.length > 0 ? cleaned : null;
|
|
537292
|
+
} catch {
|
|
537293
|
+
return null;
|
|
537294
|
+
}
|
|
537295
|
+
});
|
|
537099
537296
|
}
|
|
537100
537297
|
/** Register multiple tools */
|
|
537101
537298
|
registerTools(tools) {
|
|
@@ -601106,7 +601303,7 @@ function buildTelegramCreativeTools(repoRoot, chatId, backendUrl2, imageDefaults
|
|
|
601106
601303
|
}
|
|
601107
601304
|
function scopedTool(base3, root, mode) {
|
|
601108
601305
|
const rootAbs = resolve41(root);
|
|
601109
|
-
|
|
601306
|
+
const wrapper = {
|
|
601110
601307
|
name: base3.name,
|
|
601111
601308
|
description: `[PUBLIC TELEGRAM CREATIVE WORKSPACE: ${rootAbs}] ${base3.description} Paths are restricted to this workspace. This tool cannot access or modify files outside the workspace. ` + (mode === "edit" ? "It can only edit files already created in this workspace manifest. " : ""),
|
|
601112
601309
|
parameters: base3.parameters,
|
|
@@ -601215,6 +601412,11 @@ function scopedTool(base3, root, mode) {
|
|
|
601215
601412
|
return withTelegramAutoAttachmentNotice(result, recordedPaths.size);
|
|
601216
601413
|
}
|
|
601217
601414
|
};
|
|
601415
|
+
const baseSetExpander = base3.setPromptExpander;
|
|
601416
|
+
if (typeof baseSetExpander === "function") {
|
|
601417
|
+
wrapper.setPromptExpander = (expander) => baseSetExpander.call(base3, expander);
|
|
601418
|
+
}
|
|
601419
|
+
return wrapper;
|
|
601218
601420
|
}
|
|
601219
601421
|
function withTelegramAutoAttachmentNotice(result, artifactCount) {
|
|
601220
601422
|
if (!result.success || artifactCount <= 0) return result;
|
|
@@ -603559,6 +603761,20 @@ function summarizeTelegramMessageAttachments(msg) {
|
|
|
603559
603761
|
parts.push(`replied-to caption: ${truncateTelegramContextLine(msg.replyToMedia.caption, 180)}`);
|
|
603560
603762
|
}
|
|
603561
603763
|
}
|
|
603764
|
+
const gen = msg.replyContext?.generatedMediaPromptInfo;
|
|
603765
|
+
if (gen?.originalPrompt) {
|
|
603766
|
+
parts.push(`replied-to image source prompt: "${truncateTelegramContextLine(gen.originalPrompt, 400)}"`);
|
|
603767
|
+
if (gen.promptWasExpanded && gen.expandedPrompt && gen.expandedPrompt !== gen.originalPrompt) {
|
|
603768
|
+
parts.push(`replied-to image expanded prompt: "${truncateTelegramContextLine(gen.expandedPrompt, 400)}"`);
|
|
603769
|
+
}
|
|
603770
|
+
const meta = [
|
|
603771
|
+
gen.model ? `model=${gen.model}` : "",
|
|
603772
|
+
gen.backend ? `backend=${gen.backend}` : "",
|
|
603773
|
+
gen.width && gen.height ? `size=${gen.width}x${gen.height}` : "",
|
|
603774
|
+
gen.aspectRatio ? `aspect=${gen.aspectRatio}` : ""
|
|
603775
|
+
].filter(Boolean).join(", ");
|
|
603776
|
+
if (meta) parts.push(`replied-to image generation: ${meta}`);
|
|
603777
|
+
}
|
|
603562
603778
|
if (msg.poll) {
|
|
603563
603779
|
parts.push(`poll: ${truncateTelegramContextLine(msg.poll.question, 180)}`);
|
|
603564
603780
|
}
|
|
@@ -604511,7 +604727,7 @@ var init_telegram_bridge = __esm({
|
|
|
604511
604727
|
]);
|
|
604512
604728
|
DEFAULT_TELEGRAM_TOOL_GROUP_POLICY = {
|
|
604513
604729
|
read: true,
|
|
604514
|
-
message:
|
|
604730
|
+
message: true,
|
|
604515
604731
|
media: true,
|
|
604516
604732
|
janitorial: false,
|
|
604517
604733
|
reaction: false,
|
|
@@ -605153,7 +605369,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
|
|
|
605153
605369
|
threadId: entry.messageThreadId,
|
|
605154
605370
|
sender: this.telegramReplySenderWithSelfFlag(sender),
|
|
605155
605371
|
text: entry.text,
|
|
605156
|
-
mediaSummary: entry.mediaSummary
|
|
605372
|
+
mediaSummary: entry.mediaSummary,
|
|
605373
|
+
generatedMediaPromptInfo: entry.generatedMediaPromptInfo
|
|
605157
605374
|
};
|
|
605158
605375
|
}
|
|
605159
605376
|
resolveTelegramReplyContext(sessionKey, msg) {
|
|
@@ -605174,7 +605391,8 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
|
|
|
605174
605391
|
...msg.replyContext.sender ?? {}
|
|
605175
605392
|
}),
|
|
605176
605393
|
text: msg.replyContext.text ?? localContext.text,
|
|
605177
|
-
mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary
|
|
605394
|
+
mediaSummary: msg.replyContext.mediaSummary ?? localContext.mediaSummary,
|
|
605395
|
+
generatedMediaPromptInfo: msg.replyContext.generatedMediaPromptInfo ?? localContext.generatedMediaPromptInfo
|
|
605178
605396
|
};
|
|
605179
605397
|
} else if (msg.replyContext) {
|
|
605180
605398
|
reply = {
|
|
@@ -609706,10 +609924,66 @@ Content-Type: ${contentType}\r
|
|
|
609706
609924
|
const result = await res.json();
|
|
609707
609925
|
if (result.ok) {
|
|
609708
609926
|
this.state.messagesSent++;
|
|
609709
|
-
|
|
609927
|
+
const outboundMessageId = result.result?.message_id ?? null;
|
|
609928
|
+
if (outboundMessageId && media.kind === "image" && media.source === "file") {
|
|
609929
|
+
this.recordOutboundGeneratedImagePrompt(chatId, outboundMessageId, media.value, caption);
|
|
609930
|
+
}
|
|
609931
|
+
return outboundMessageId;
|
|
609710
609932
|
}
|
|
609711
609933
|
throw new Error(String(result.description || `Telegram ${method} failed`));
|
|
609712
609934
|
}
|
|
609935
|
+
/**
|
|
609936
|
+
* After the bot sends an outbound photo, look for a `<image>.json`
|
|
609937
|
+
* sidecar emitted by ImageGenerateTool and stash the source prompt info
|
|
609938
|
+
* on a chatHistory entry keyed by the outbound message_id. When the user
|
|
609939
|
+
* later replies to that image, resolveTelegramReplyContext finds the
|
|
609940
|
+
* entry and exposes the original prompt to the model.
|
|
609941
|
+
*/
|
|
609942
|
+
recordOutboundGeneratedImagePrompt(chatId, messageId, imagePath, caption) {
|
|
609943
|
+
const sidecarPath2 = `${imagePath}.json`;
|
|
609944
|
+
if (!existsSync108(sidecarPath2)) return;
|
|
609945
|
+
let info = null;
|
|
609946
|
+
try {
|
|
609947
|
+
const raw = readFileSync88(sidecarPath2, "utf8");
|
|
609948
|
+
const parsed = JSON.parse(raw);
|
|
609949
|
+
if (parsed && typeof parsed === "object" && typeof parsed["original_prompt"] === "string") {
|
|
609950
|
+
info = {
|
|
609951
|
+
imagePath,
|
|
609952
|
+
originalPrompt: String(parsed["original_prompt"]),
|
|
609953
|
+
expandedPrompt: typeof parsed["expanded_prompt"] === "string" ? String(parsed["expanded_prompt"]) : void 0,
|
|
609954
|
+
promptWasExpanded: parsed["prompt_was_expanded"] === true,
|
|
609955
|
+
model: typeof parsed["model"] === "string" ? String(parsed["model"]) : void 0,
|
|
609956
|
+
backend: typeof parsed["backend"] === "string" ? String(parsed["backend"]) : void 0,
|
|
609957
|
+
width: typeof parsed["width"] === "number" ? parsed["width"] : void 0,
|
|
609958
|
+
height: typeof parsed["height"] === "number" ? parsed["height"] : void 0,
|
|
609959
|
+
aspectRatio: typeof parsed["aspect_ratio"] === "string" || parsed["aspect_ratio"] === null ? parsed["aspect_ratio"] : void 0,
|
|
609960
|
+
seed: typeof parsed["seed"] === "number" ? parsed["seed"] : null,
|
|
609961
|
+
createdAt: typeof parsed["created_at"] === "string" ? String(parsed["created_at"]) : void 0
|
|
609962
|
+
};
|
|
609963
|
+
}
|
|
609964
|
+
} catch {
|
|
609965
|
+
return;
|
|
609966
|
+
}
|
|
609967
|
+
if (!info) return;
|
|
609968
|
+
const sessionKey = `chat:${String(chatId)}`;
|
|
609969
|
+
const captionText = (caption ?? "").trim();
|
|
609970
|
+
const summary = `photo (generated, model=${info.model ?? "?"}, ${info.width ?? "?"}x${info.height ?? "?"})`;
|
|
609971
|
+
const entry = {
|
|
609972
|
+
role: "assistant",
|
|
609973
|
+
text: captionText,
|
|
609974
|
+
mode: "action",
|
|
609975
|
+
chatId,
|
|
609976
|
+
speaker: this.state.botUsername ? `@${this.state.botUsername}` : "Assistant",
|
|
609977
|
+
messageId,
|
|
609978
|
+
mediaSummary: summary,
|
|
609979
|
+
generatedMediaPromptInfo: info
|
|
609980
|
+
};
|
|
609981
|
+
try {
|
|
609982
|
+
this.recordChatHistory(sessionKey, entry);
|
|
609983
|
+
this.saveTelegramConversationState(sessionKey);
|
|
609984
|
+
} catch {
|
|
609985
|
+
}
|
|
609986
|
+
}
|
|
609713
609987
|
async sendGeneratedArtifactsFromSubAgent(msg, subAgent, finalText, includeMentioned) {
|
|
609714
609988
|
const root = subAgent.creativeWorkspaceRoot;
|
|
609715
609989
|
if (!root) return;
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.46",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.46",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED