omnius 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +718 -133
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1474,7 +1474,7 @@ var init_security_classifier = __esm({
|
|
|
1474
1474
|
// ── Network reads (safe)
|
|
1475
1475
|
{ match: /^(web_search|web_fetch)$/, info: NETWORK_READ },
|
|
1476
1476
|
// ── Network outbound (mutating or remote inference)
|
|
1477
|
-
{ match: /^(image_generate|generate_image|vision|video_understand)$/, info: NETWORK_OUTBOUND },
|
|
1477
|
+
{ match: /^(image_generate|generate_image|generate_audio|generate_tts|create_audio_file|vision|video_understand|telegram_send_file)$/, info: NETWORK_OUTBOUND },
|
|
1478
1478
|
{ match: /^(transcribe_file|transcribe_url|youtube_download)$/, info: NETWORK_OUTBOUND },
|
|
1479
1479
|
{ match: /^(fortemi_bridge)$/, info: NETWORK_OUTBOUND },
|
|
1480
1480
|
// ── Memory tools
|
|
@@ -1491,7 +1491,7 @@ var init_security_classifier = __esm({
|
|
|
1491
1491
|
{ match: /^(file_read|file_explore|list_directory|grep_search|glob_find|find_files)$/, info: LOCAL_READ },
|
|
1492
1492
|
{ match: /^(image_read|ocr|ocr_pdf|ocr_image_advanced|pdf_to_text|structured_read|read_structured_file)$/, info: LOCAL_READ },
|
|
1493
1493
|
{ match: /^(symbol_search|impact_analysis|code_neighbors|repo_map|codebase_map|semantic_map|import_graph)$/, info: LOCAL_READ },
|
|
1494
|
-
{ match: /^(diagnostic|git_info|environment_snapshot|process_health|todo_read|explore_tools)$/, info: LOCAL_READ },
|
|
1494
|
+
{ match: /^(diagnostic|git_info|environment_snapshot|process_health|todo_read|explore_tools|telegram_media_recent)$/, info: LOCAL_READ },
|
|
1495
1495
|
{ match: /^(log_explore|log_packet|change_log|phase_recall|code_graph)$/, info: LOCAL_READ },
|
|
1496
1496
|
{ match: /^skill_(list|execute|read)$/, info: LOCAL_READ },
|
|
1497
1497
|
// ── Task completion (neutral signal)
|
|
@@ -5733,13 +5733,20 @@ var init_explore_tools = __esm({
|
|
|
5733
5733
|
diagnostic: "Run project diagnostics (build, test, lint)",
|
|
5734
5734
|
image_read: "Read and describe image contents",
|
|
5735
5735
|
screenshot: "Capture a screenshot of the desktop",
|
|
5736
|
+
ocr: "Extract text from images via OCR",
|
|
5736
5737
|
ocr_image: "Extract text from images via OCR",
|
|
5738
|
+
ocr_image_advanced: "Advanced OCR for images with layout-aware extraction",
|
|
5737
5739
|
ocr_pdf: "Extract text from PDF pages via OCR",
|
|
5738
5740
|
pdf_to_text: "Convert PDF to plain text",
|
|
5739
5741
|
vision: "Describe what's on screen using Moondream",
|
|
5742
|
+
video_understand: "Analyze a video file with transcription and keyframe understanding",
|
|
5743
|
+
audio_analyze: "Classify sounds, detect speech, inspect spectrum, or analyze audio files",
|
|
5740
5744
|
desktop_click: "Click at coordinates on the desktop",
|
|
5741
5745
|
desktop_describe: "Describe a region of the desktop",
|
|
5742
5746
|
transcribe_file: "Transcribe audio/video files to text",
|
|
5747
|
+
telegram_media_recent: "List recent Telegram media available in the current chat scope",
|
|
5748
|
+
generate_audio: "Generate sound effects or music with local model backends",
|
|
5749
|
+
generate_tts: "Generate speech from text with configured voice/TTS backends",
|
|
5743
5750
|
create_tool: "Create a new custom tool from a workflow",
|
|
5744
5751
|
manage_tools: "List, inspect, or remove custom tools",
|
|
5745
5752
|
skill_list: "List available AIWG skills",
|
|
@@ -84452,7 +84459,7 @@ var require_mime_types = __commonJS({
|
|
|
84452
84459
|
"../node_modules/mime-types/index.js"(exports) {
|
|
84453
84460
|
"use strict";
|
|
84454
84461
|
var db = require_mime_db();
|
|
84455
|
-
var
|
|
84462
|
+
var extname17 = __require("path").extname;
|
|
84456
84463
|
var EXTRACT_TYPE_REGEXP = /^\s*([^;\s]*)(?:;|\s|$)/;
|
|
84457
84464
|
var TEXT_TYPE_REGEXP = /^text\//i;
|
|
84458
84465
|
exports.charset = charset;
|
|
@@ -84506,7 +84513,7 @@ var require_mime_types = __commonJS({
|
|
|
84506
84513
|
if (!path11 || typeof path11 !== "string") {
|
|
84507
84514
|
return false;
|
|
84508
84515
|
}
|
|
84509
|
-
var extension4 =
|
|
84516
|
+
var extension4 = extname17("x." + path11).toLowerCase().substr(1);
|
|
84510
84517
|
if (!extension4) {
|
|
84511
84518
|
return false;
|
|
84512
84519
|
}
|
|
@@ -250335,6 +250342,12 @@ import { spawn as spawn9 } from "node:child_process";
|
|
|
250335
250342
|
import { existsSync as existsSync23, statSync as statSync8 } from "node:fs";
|
|
250336
250343
|
import { chmod as chmod3, mkdir as mkdir11, writeFile as writeFile16 } from "node:fs/promises";
|
|
250337
250344
|
import { join as join36, resolve as resolve18 } from "node:path";
|
|
250345
|
+
function officialBflModel(model) {
|
|
250346
|
+
return `${OFFICIAL_BFL_ORG}/${model}`;
|
|
250347
|
+
}
|
|
250348
|
+
function normalizeImageGenerationModel(model) {
|
|
250349
|
+
return IMAGE_GENERATION_MODEL_REPLACEMENTS.get(model) ?? model;
|
|
250350
|
+
}
|
|
250338
250351
|
function parsePercent(text) {
|
|
250339
250352
|
const match = text.match(/\b(\d{1,3})%\b/);
|
|
250340
250353
|
if (!match)
|
|
@@ -250397,35 +250410,44 @@ function isBackend(value2) {
|
|
|
250397
250410
|
function getImageGenerationPreset(model) {
|
|
250398
250411
|
if (!model)
|
|
250399
250412
|
return void 0;
|
|
250400
|
-
|
|
250413
|
+
const resolved = normalizeImageGenerationModel(model);
|
|
250414
|
+
return IMAGE_GENERATION_MODEL_PRESETS.find((preset) => preset.id === resolved);
|
|
250401
250415
|
}
|
|
250402
250416
|
function imageGenerationQualityLadder() {
|
|
250403
250417
|
return IMAGE_GENERATION_QUALITY_LADDER.map((id) => getImageGenerationPreset(id)).filter((preset) => Boolean(preset));
|
|
250404
250418
|
}
|
|
250419
|
+
function imageGenerationFallbackAlternates(model) {
|
|
250420
|
+
if (!model)
|
|
250421
|
+
return [];
|
|
250422
|
+
const resolved = normalizeImageGenerationModel(model);
|
|
250423
|
+
return IMAGE_GENERATION_MODEL_PRESETS.filter((preset) => preset.fallbackFor?.includes(resolved));
|
|
250424
|
+
}
|
|
250405
250425
|
function inferImageGenerationBackend(model, requested) {
|
|
250406
250426
|
if (requested && isBackend(requested))
|
|
250407
250427
|
return requested;
|
|
250408
250428
|
if (!model || model === "auto")
|
|
250409
250429
|
return "auto";
|
|
250410
|
-
const
|
|
250430
|
+
const resolved = normalizeImageGenerationModel(model);
|
|
250431
|
+
const preset = getImageGenerationPreset(resolved);
|
|
250411
250432
|
if (preset)
|
|
250412
250433
|
return preset.backend;
|
|
250413
|
-
if (
|
|
250434
|
+
if (resolved.startsWith("x/"))
|
|
250414
250435
|
return "ollama";
|
|
250415
|
-
if (
|
|
250436
|
+
if (resolved === "sdcpp" || resolved.startsWith("sdcpp:"))
|
|
250416
250437
|
return "sdcpp";
|
|
250417
|
-
if (
|
|
250438
|
+
if (resolved.startsWith("/") || resolved.endsWith(".gguf") || resolved.endsWith(".safetensors") || resolved.endsWith(".ckpt"))
|
|
250418
250439
|
return "sdcpp";
|
|
250419
250440
|
return "diffusers";
|
|
250420
250441
|
}
|
|
250421
250442
|
function imageCandidateFor(model, requestedBackend) {
|
|
250422
|
-
|
|
250443
|
+
const resolved = normalizeImageGenerationModel(model);
|
|
250444
|
+
let backend = inferImageGenerationBackend(resolved, requestedBackend);
|
|
250423
250445
|
if (backend === "auto")
|
|
250424
250446
|
backend = "diffusers";
|
|
250425
250447
|
return {
|
|
250426
|
-
model,
|
|
250448
|
+
model: resolved,
|
|
250427
250449
|
backend,
|
|
250428
|
-
preset: getImageGenerationPreset(
|
|
250450
|
+
preset: getImageGenerationPreset(resolved)
|
|
250429
250451
|
};
|
|
250430
250452
|
}
|
|
250431
250453
|
function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
|
|
@@ -250438,6 +250460,8 @@ function imageGenerationFallbackCandidates(requestedModel, requestedBackend, all
|
|
|
250438
250460
|
};
|
|
250439
250461
|
if (requestedModel) {
|
|
250440
250462
|
add2(imageCandidateFor(requestedModel, requestedBackend));
|
|
250463
|
+
for (const alternate of imageGenerationFallbackAlternates(requestedModel))
|
|
250464
|
+
add2(imageCandidateFor(alternate.id));
|
|
250441
250465
|
} else if (requestedBackend && requestedBackend !== "auto") {
|
|
250442
250466
|
const firstForBackend = ladder.find((preset) => preset.backend === requestedBackend);
|
|
250443
250467
|
add2(imageCandidateFor(firstForBackend?.id ?? (requestedBackend === "ollama" ? DEFAULT_OLLAMA_IMAGE_MODEL : DEFAULT_DIFFUSERS_IMAGE_MODEL), requestedBackend));
|
|
@@ -250446,10 +250470,14 @@ function imageGenerationFallbackCandidates(requestedModel, requestedBackend, all
|
|
|
250446
250470
|
}
|
|
250447
250471
|
if (!allowFallback)
|
|
250448
250472
|
return candidates.length ? candidates : [imageCandidateFor(DEFAULT_DIFFUSERS_IMAGE_MODEL, requestedBackend)];
|
|
250449
|
-
const
|
|
250473
|
+
const normalizedRequestedModel = requestedModel ? normalizeImageGenerationModel(requestedModel) : void 0;
|
|
250474
|
+
const primaryIndex = normalizedRequestedModel ? ladder.findIndex((preset) => preset.id === normalizedRequestedModel) : requestedBackend && requestedBackend !== "auto" ? ladder.findIndex((preset) => preset.backend === requestedBackend) : 0;
|
|
250450
250475
|
const fallbackTail = primaryIndex >= 0 ? ladder.slice(primaryIndex) : ladder;
|
|
250451
|
-
for (const preset of fallbackTail)
|
|
250476
|
+
for (const preset of fallbackTail) {
|
|
250452
250477
|
add2(imageCandidateFor(preset.id));
|
|
250478
|
+
for (const alternate of imageGenerationFallbackAlternates(preset.id))
|
|
250479
|
+
add2(imageCandidateFor(alternate.id));
|
|
250480
|
+
}
|
|
250453
250481
|
return candidates;
|
|
250454
250482
|
}
|
|
250455
250483
|
function imageGenerationDir(repoRoot = ".") {
|
|
@@ -250745,13 +250773,34 @@ function parseRunnerJson(stdout) {
|
|
|
250745
250773
|
}
|
|
250746
250774
|
return null;
|
|
250747
250775
|
}
|
|
250748
|
-
var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, IMAGE_GENERATION_QUALITY_LADDER, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
|
|
250776
|
+
var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, SECONDARY_FLUX_DEV_MODEL, SECONDARY_FLUX_DEV_MIRROR_MODEL, SECONDARY_FLUX_DEV_COMFY_MODEL, SECONDARY_FLUX_FILL_MODEL, SECONDARY_FLUX_FILL_FP8_MODEL, SECONDARY_FLUX2_MODEL, OFFICIAL_BFL_ORG, IMAGE_GENERATION_MODEL_REPLACEMENTS, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, IMAGE_GENERATION_QUALITY_LADDER, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
|
|
250749
250777
|
var init_image_generate = __esm({
|
|
250750
250778
|
"packages/execution/dist/tools/image-generate.js"() {
|
|
250751
250779
|
"use strict";
|
|
250752
250780
|
init_venv_paths();
|
|
250753
250781
|
DEFAULT_DIFFUSERS_IMAGE_MODEL = "stabilityai/sdxl-turbo";
|
|
250754
250782
|
DEFAULT_OLLAMA_IMAGE_MODEL = "x/z-image-turbo";
|
|
250783
|
+
SECONDARY_FLUX_DEV_MODEL = "lllyasviel/flux1-dev-bnb-nf4";
|
|
250784
|
+
SECONDARY_FLUX_DEV_MIRROR_MODEL = "ChuckMcSneed/FLUX.1-dev";
|
|
250785
|
+
SECONDARY_FLUX_DEV_COMFY_MODEL = "Comfy-Org/flux1-dev";
|
|
250786
|
+
SECONDARY_FLUX_FILL_MODEL = "diffusers/FLUX.1-Fill-dev-nf4";
|
|
250787
|
+
SECONDARY_FLUX_FILL_FP8_MODEL = "boricuapab/flux1-fill-dev-fp8";
|
|
250788
|
+
SECONDARY_FLUX2_MODEL = "x/flux2-klein";
|
|
250789
|
+
OFFICIAL_BFL_ORG = "black-forest-labs";
|
|
250790
|
+
IMAGE_GENERATION_MODEL_REPLACEMENTS = /* @__PURE__ */ new Map([
|
|
250791
|
+
[officialBflModel("FLUX.1-dev"), SECONDARY_FLUX_DEV_MODEL],
|
|
250792
|
+
[officialBflModel("FLUX.1-dev-FP8"), SECONDARY_FLUX_DEV_MODEL],
|
|
250793
|
+
[officialBflModel("FLUX.1-Krea-dev"), SECONDARY_FLUX_DEV_COMFY_MODEL],
|
|
250794
|
+
[officialBflModel("FLUX.1-schnell"), SECONDARY_FLUX_DEV_MODEL],
|
|
250795
|
+
[officialBflModel("FLUX.1-Fill-dev"), SECONDARY_FLUX_FILL_MODEL],
|
|
250796
|
+
[officialBflModel("FLUX.1-Kontext-dev"), SECONDARY_FLUX_DEV_MODEL],
|
|
250797
|
+
[officialBflModel("FLUX.1-Kontext-dev-NVFP4"), SECONDARY_FLUX_DEV_MODEL],
|
|
250798
|
+
[officialBflModel("FLUX.2-klein-4B"), SECONDARY_FLUX2_MODEL],
|
|
250799
|
+
[officialBflModel("FLUX.2-klein-4b-fp8"), SECONDARY_FLUX2_MODEL],
|
|
250800
|
+
[officialBflModel("FLUX.2-klein-4b-nvfp4"), SECONDARY_FLUX2_MODEL],
|
|
250801
|
+
[officialBflModel("FLUX.2-klein-base-4b-fp8"), SECONDARY_FLUX2_MODEL],
|
|
250802
|
+
[officialBflModel("FLUX.2-klein-base-4b-nvfp4"), SECONDARY_FLUX2_MODEL]
|
|
250803
|
+
]);
|
|
250755
250804
|
DIFFUSERS_PYTHON_PACKAGES = [
|
|
250756
250805
|
"torch",
|
|
250757
250806
|
"torchvision",
|
|
@@ -250785,7 +250834,7 @@ var init_image_generate = __esm({
|
|
|
250785
250834
|
note: "Modern fast image model; good default when available in Ollama."
|
|
250786
250835
|
},
|
|
250787
250836
|
{
|
|
250788
|
-
id:
|
|
250837
|
+
id: SECONDARY_FLUX2_MODEL,
|
|
250789
250838
|
label: "FLUX.2 Klein",
|
|
250790
250839
|
backend: "ollama",
|
|
250791
250840
|
install: "ollama pull x/flux2-klein",
|
|
@@ -250801,21 +250850,92 @@ var init_image_generate = __esm({
|
|
|
250801
250850
|
note: "Compact FLUX-family Ollama path for interactive local generation."
|
|
250802
250851
|
},
|
|
250803
250852
|
{
|
|
250804
|
-
id:
|
|
250805
|
-
label: "FLUX.1 dev",
|
|
250853
|
+
id: SECONDARY_FLUX_DEV_MODEL,
|
|
250854
|
+
label: "FLUX.1 dev BNB NF4",
|
|
250806
250855
|
backend: "diffusers",
|
|
250807
|
-
install: 'python3 .omnius/image-gen/diffusers_text2image.py --model
|
|
250856
|
+
install: 'python3 .omnius/image-gen/diffusers_text2image.py --model lllyasviel/flux1-dev-bnb-nf4 --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
|
|
250808
250857
|
category: "Primary hyper-realistic baseline",
|
|
250809
|
-
sizeClass: "12B
|
|
250810
|
-
quality: "
|
|
250858
|
+
sizeClass: "12B FLUX.1 dev NF4",
|
|
250859
|
+
quality: "Traceable lower-memory FLUX.1 dev secondary-source route with strong photorealism and prompt adherence; avoids gated official BFL downloads.",
|
|
250860
|
+
minVramGB: 12,
|
|
250861
|
+
recommendedVramGB: 16,
|
|
250862
|
+
deployment: "Best with BNB-aware Diffusers/Forge-style runtimes. Falls through cleanly if the current runner cannot load it.",
|
|
250863
|
+
steps: 28,
|
|
250864
|
+
guidance: 3.5,
|
|
250865
|
+
width: 1024,
|
|
250866
|
+
height: 1024,
|
|
250867
|
+
note: "Primary serious-generation FLUX route using the secondary-source NF4 weights from the research package."
|
|
250868
|
+
},
|
|
250869
|
+
{
|
|
250870
|
+
id: SECONDARY_FLUX_DEV_MIRROR_MODEL,
|
|
250871
|
+
label: "FLUX.1 dev mirror",
|
|
250872
|
+
backend: "diffusers",
|
|
250873
|
+
install: 'python3 .omnius/image-gen/diffusers_text2image.py --model ChuckMcSneed/FLUX.1-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
|
|
250874
|
+
category: "Traceable FLUX fallback",
|
|
250875
|
+
sizeClass: "12B FLUX.1 dev mirror",
|
|
250876
|
+
quality: "Full FLUX.1 dev mirror fallback from the secondary-source inventory; higher trust than anonymous mirrors, lower priority than quantized routes.",
|
|
250811
250877
|
minVramGB: 24,
|
|
250812
250878
|
recommendedVramGB: 48,
|
|
250813
|
-
deployment: "
|
|
250879
|
+
deployment: "Use after the NF4 route if full-weight mirror provenance and license terms are acceptable.",
|
|
250880
|
+
steps: 28,
|
|
250881
|
+
guidance: 3.5,
|
|
250882
|
+
width: 1024,
|
|
250883
|
+
height: 1024,
|
|
250884
|
+
fallbackFor: [SECONDARY_FLUX_DEV_MODEL],
|
|
250885
|
+
note: "Traceable mirror fallback for FLUX.1 dev from the research package."
|
|
250886
|
+
},
|
|
250887
|
+
{
|
|
250888
|
+
id: SECONDARY_FLUX_DEV_COMFY_MODEL,
|
|
250889
|
+
label: "FLUX.1 dev Comfy-Org",
|
|
250890
|
+
backend: "diffusers",
|
|
250891
|
+
install: 'python3 .omnius/image-gen/diffusers_text2image.py --model Comfy-Org/flux1-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
|
|
250892
|
+
category: "Traceable FLUX fallback",
|
|
250893
|
+
sizeClass: "12B FLUX.1 dev single-file/FP8 family",
|
|
250894
|
+
quality: "ComfyUI-oriented secondary-source FLUX.1 dev route; useful when the quantized/default loader path is unavailable.",
|
|
250895
|
+
minVramGB: 16,
|
|
250896
|
+
recommendedVramGB: 24,
|
|
250897
|
+
deployment: "Best in ComfyUI/single-file workflows; the generic Diffusers runner will try it and fall through if the repo layout is incompatible.",
|
|
250898
|
+
steps: 28,
|
|
250899
|
+
guidance: 3.5,
|
|
250900
|
+
width: 1024,
|
|
250901
|
+
height: 1024,
|
|
250902
|
+
fallbackFor: [SECONDARY_FLUX_DEV_MODEL, SECONDARY_FLUX_DEV_MIRROR_MODEL],
|
|
250903
|
+
note: "Traceable Comfy-Org FLUX.1 fallback from the research package."
|
|
250904
|
+
},
|
|
250905
|
+
{
|
|
250906
|
+
id: SECONDARY_FLUX_FILL_MODEL,
|
|
250907
|
+
label: "FLUX.1 Fill dev NF4",
|
|
250908
|
+
backend: "diffusers",
|
|
250909
|
+
install: 'python3 .omnius/image-gen/diffusers_text2image.py --model diffusers/FLUX.1-Fill-dev-nf4 --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
|
|
250910
|
+
category: "Traceable FLUX fallback",
|
|
250911
|
+
sizeClass: "12B FLUX.1 Fill NF4",
|
|
250912
|
+
quality: "Lower-memory fill/inpainting secondary-source route; listed for FLUX-family coverage rather than generic text-to-image priority.",
|
|
250913
|
+
minVramGB: 12,
|
|
250914
|
+
recommendedVramGB: 16,
|
|
250915
|
+
deployment: "Best for fill/inpainting workflows. The generic text-to-image runner may not be the ideal loader, so it is not in the default ladder.",
|
|
250814
250916
|
steps: 28,
|
|
250815
250917
|
guidance: 3.5,
|
|
250816
250918
|
width: 1024,
|
|
250817
250919
|
height: 1024,
|
|
250818
|
-
note: "
|
|
250920
|
+
note: "Traceable NF4 fallback for FLUX.1 Fill dev from the research package."
|
|
250921
|
+
},
|
|
250922
|
+
{
|
|
250923
|
+
id: SECONDARY_FLUX_FILL_FP8_MODEL,
|
|
250924
|
+
label: "FLUX.1 Fill dev FP8",
|
|
250925
|
+
backend: "diffusers",
|
|
250926
|
+
install: 'python3 .omnius/image-gen/diffusers_text2image.py --model boricuapab/flux1-fill-dev-fp8 --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
|
|
250927
|
+
category: "Traceable FLUX fallback",
|
|
250928
|
+
sizeClass: "12B FLUX.1 Fill FP8",
|
|
250929
|
+
quality: "Traceable FP8 fill/inpainting conversion from the secondary-source inventory; gated status can vary.",
|
|
250930
|
+
minVramGB: 12,
|
|
250931
|
+
recommendedVramGB: 16,
|
|
250932
|
+
deployment: "Best for fill/inpainting workflows when accepted access terms allow the repo.",
|
|
250933
|
+
steps: 28,
|
|
250934
|
+
guidance: 3.5,
|
|
250935
|
+
width: 1024,
|
|
250936
|
+
height: 1024,
|
|
250937
|
+
fallbackFor: [SECONDARY_FLUX_FILL_MODEL],
|
|
250938
|
+
note: "Traceable FP8 fallback for FLUX.1 Fill dev from the research package."
|
|
250819
250939
|
},
|
|
250820
250940
|
{
|
|
250821
250941
|
id: "stabilityai/stable-diffusion-3.5-large",
|
|
@@ -250834,23 +250954,6 @@ var init_image_generate = __esm({
|
|
|
250834
250954
|
height: 1024,
|
|
250835
250955
|
note: "Primary serious-generation baseline for the Stable Diffusion ecosystem."
|
|
250836
250956
|
},
|
|
250837
|
-
{
|
|
250838
|
-
id: "black-forest-labs/FLUX.1-schnell",
|
|
250839
|
-
label: "FLUX.1 schnell",
|
|
250840
|
-
backend: "diffusers",
|
|
250841
|
-
install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-schnell --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
|
|
250842
|
-
category: "Fast large-model iteration",
|
|
250843
|
-
sizeClass: "12B rectified-flow transformer",
|
|
250844
|
-
quality: "FLUX-style output with fewer steps; better for rapid iteration than absolute peak quality.",
|
|
250845
|
-
minVramGB: 16,
|
|
250846
|
-
recommendedVramGB: 24,
|
|
250847
|
-
deployment: "Use for fast prompt iteration; verify current license terms before commercial use.",
|
|
250848
|
-
steps: 4,
|
|
250849
|
-
guidance: 0,
|
|
250850
|
-
width: 1024,
|
|
250851
|
-
height: 1024,
|
|
250852
|
-
note: "Fast FLUX-family iteration path."
|
|
250853
|
-
},
|
|
250854
250957
|
{
|
|
250855
250958
|
id: "stabilityai/stable-diffusion-3.5-large-turbo",
|
|
250856
250959
|
label: "SD3.5 Large Turbo",
|
|
@@ -250901,22 +251004,6 @@ var init_image_generate = __esm({
|
|
|
250901
251004
|
height: 1024,
|
|
250902
251005
|
note: "Efficient modern large image model."
|
|
250903
251006
|
},
|
|
250904
|
-
{
|
|
250905
|
-
id: "black-forest-labs/FLUX.2-klein-4B",
|
|
250906
|
-
label: "FLUX.2 Klein 4B",
|
|
250907
|
-
backend: "diffusers",
|
|
250908
|
-
install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.2-klein-4B --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
|
|
250909
|
-
category: "Modern deployable",
|
|
250910
|
-
sizeClass: "4B compact FLUX-family",
|
|
250911
|
-
quality: "Bridge between practical deployment and modern FLUX-family visual quality.",
|
|
250912
|
-
minVramGB: 12,
|
|
250913
|
-
recommendedVramGB: 16,
|
|
250914
|
-
deployment: "Better fit for consumer GPU experimentation than 8B-12B baselines.",
|
|
250915
|
-
steps: 8,
|
|
250916
|
-
width: 1024,
|
|
250917
|
-
height: 1024,
|
|
250918
|
-
note: "More deployable compact FLUX-family model."
|
|
250919
|
-
},
|
|
250920
251007
|
{
|
|
250921
251008
|
id: "deepseek-ai/Janus-Pro-7B",
|
|
250922
251009
|
label: "Janus-Pro-7B",
|
|
@@ -251070,13 +251157,12 @@ var init_image_generate = __esm({
|
|
|
251070
251157
|
}
|
|
251071
251158
|
];
|
|
251072
251159
|
IMAGE_GENERATION_QUALITY_LADDER = [
|
|
251073
|
-
|
|
251074
|
-
|
|
251160
|
+
SECONDARY_FLUX_DEV_MODEL,
|
|
251161
|
+
SECONDARY_FLUX_DEV_MIRROR_MODEL,
|
|
251162
|
+
SECONDARY_FLUX_DEV_COMFY_MODEL,
|
|
251075
251163
|
DEFAULT_OLLAMA_IMAGE_MODEL,
|
|
251076
|
-
"black-forest-labs/FLUX.1-schnell",
|
|
251077
|
-
"stabilityai/stable-diffusion-3.5-large-turbo",
|
|
251078
251164
|
"Tongyi-MAI/Z-Image-Turbo",
|
|
251079
|
-
|
|
251165
|
+
SECONDARY_FLUX2_MODEL,
|
|
251080
251166
|
DEFAULT_DIFFUSERS_IMAGE_MODEL,
|
|
251081
251167
|
"Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers",
|
|
251082
251168
|
"SimianLuo/LCM_Dreamshaper_v7",
|
|
@@ -251265,7 +251351,7 @@ if __name__ == "__main__":
|
|
|
251265
251351
|
`;
|
|
251266
251352
|
ImageGenerateTool = class {
|
|
251267
251353
|
name = "generate_image";
|
|
251268
|
-
description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first and falls back to smaller models if setup, download, or generation fails. Saves a PNG under .omnius/images and returns the file path.";
|
|
251354
|
+
description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Saves a PNG under .omnius/images and returns the file path.";
|
|
251269
251355
|
parameters = {
|
|
251270
251356
|
type: "object",
|
|
251271
251357
|
properties: {
|
|
@@ -251929,7 +252015,7 @@ ${errText.slice(0, 800)}`,
|
|
|
251929
252015
|
});
|
|
251930
252016
|
|
|
251931
252017
|
// packages/execution/dist/tools/audio-generate.js
|
|
251932
|
-
import { spawn as spawn10 } from "node:child_process";
|
|
252018
|
+
import { execFileSync as execFileSync2, spawn as spawn10 } from "node:child_process";
|
|
251933
252019
|
import { existsSync as existsSync24, readdirSync as readdirSync10, statSync as statSync9 } from "node:fs";
|
|
251934
252020
|
import { chmod as chmod4, mkdir as mkdir12, writeFile as writeFile17 } from "node:fs/promises";
|
|
251935
252021
|
import { join as join37 } from "node:path";
|
|
@@ -251953,6 +252039,56 @@ function backendPackages(backend) {
|
|
|
251953
252039
|
return TANGOFLUX_PACKAGES;
|
|
251954
252040
|
return DIFFUSERS_AUDIO_PACKAGES;
|
|
251955
252041
|
}
|
|
252042
|
+
function detectLegacyCudaComputeCapability() {
|
|
252043
|
+
try {
|
|
252044
|
+
const out = execFileSync2("nvidia-smi", ["--query-gpu=compute_cap,name", "--format=csv,noheader,nounits"], {
|
|
252045
|
+
encoding: "utf8",
|
|
252046
|
+
timeout: 5e3,
|
|
252047
|
+
stdio: ["ignore", "pipe", "ignore"]
|
|
252048
|
+
}).trim();
|
|
252049
|
+
const first2 = out.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
|
|
252050
|
+
const match = first2?.match(/^(\d+)\.(\d+)\s*,?\s*(.*)$/);
|
|
252051
|
+
if (!match)
|
|
252052
|
+
return null;
|
|
252053
|
+
const major = Number(match[1]);
|
|
252054
|
+
const minor = Number(match[2]);
|
|
252055
|
+
if (!Number.isFinite(major) || !Number.isFinite(minor))
|
|
252056
|
+
return null;
|
|
252057
|
+
return { major, minor, name: match[3]?.trim() || void 0 };
|
|
252058
|
+
} catch {
|
|
252059
|
+
return null;
|
|
252060
|
+
}
|
|
252061
|
+
}
|
|
252062
|
+
function isLegacyCudaCapability(major, minor) {
|
|
252063
|
+
return major < 7 || major === 7 && minor < 5;
|
|
252064
|
+
}
|
|
252065
|
+
function torchInstallPlan(forceLegacyCuda = false) {
|
|
252066
|
+
if (process.env["OMNIUS_AUDIO_TORCH_INDEX_URL"]) {
|
|
252067
|
+
return {
|
|
252068
|
+
args: ["torch", "torchaudio", "--index-url", process.env["OMNIUS_AUDIO_TORCH_INDEX_URL"]],
|
|
252069
|
+
description: `env override ${process.env["OMNIUS_AUDIO_TORCH_INDEX_URL"]}`
|
|
252070
|
+
};
|
|
252071
|
+
}
|
|
252072
|
+
if (forceLegacyCuda) {
|
|
252073
|
+
return {
|
|
252074
|
+
args: ["torch==2.3.1", "torchaudio==2.3.1", "--index-url", "https://download.pytorch.org/whl/cu118"],
|
|
252075
|
+
description: "runtime-detected legacy CUDA GPU; using PyTorch 2.3.1 cu118 to avoid cuDNN 9 incompatibility"
|
|
252076
|
+
};
|
|
252077
|
+
}
|
|
252078
|
+
if (process.platform === "linux" && process.arch === "x64") {
|
|
252079
|
+
const gpu = detectLegacyCudaComputeCapability();
|
|
252080
|
+
if (gpu && isLegacyCudaCapability(gpu.major, gpu.minor)) {
|
|
252081
|
+
return {
|
|
252082
|
+
args: ["torch==2.3.1", "torchaudio==2.3.1", "--index-url", "https://download.pytorch.org/whl/cu118"],
|
|
252083
|
+
description: `CUDA legacy GPU SM ${gpu.major}.${gpu.minor}${gpu.name ? ` ${gpu.name}` : ""}; using PyTorch 2.3.1 cu118 to avoid cuDNN 9 incompatibility`
|
|
252084
|
+
};
|
|
252085
|
+
}
|
|
252086
|
+
}
|
|
252087
|
+
return { args: ["torch", "torchaudio"], description: "default PyTorch wheel selection" };
|
|
252088
|
+
}
|
|
252089
|
+
function withoutTorchPackages(packages) {
|
|
252090
|
+
return packages.filter((pkg) => pkg !== "torch" && pkg !== "torchaudio");
|
|
252091
|
+
}
|
|
251956
252092
|
function backendImportCheck(backend) {
|
|
251957
252093
|
if (backend === "transformers")
|
|
251958
252094
|
return "import torch, torchaudio, transformers, scipy\nfrom transformers import AutoProcessor, MusicgenForConditionalGeneration\n";
|
|
@@ -252151,6 +252287,69 @@ async function pythonCanImport2(command, code8, repoRoot, env2) {
|
|
|
252151
252287
|
async function pythonImportResult(command, code8, repoRoot, env2) {
|
|
252152
252288
|
return await runProcess3(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
|
|
252153
252289
|
}
|
|
252290
|
+
async function torchRuntimeCompatibilityResult(command, repoRoot, env2) {
|
|
252291
|
+
const code8 = [
|
|
252292
|
+
"import json, sys",
|
|
252293
|
+
"import torch",
|
|
252294
|
+
"payload={'torch': getattr(torch, '__version__', '?'), 'cuda_available': bool(torch.cuda.is_available())}",
|
|
252295
|
+
"if torch.cuda.is_available():",
|
|
252296
|
+
" cap=torch.cuda.get_device_capability(0)",
|
|
252297
|
+
" cudnn=torch.backends.cudnn.version() or 0",
|
|
252298
|
+
" payload.update({'capability': list(cap), 'cudnn': int(cudnn), 'device': torch.cuda.get_device_name(0)})",
|
|
252299
|
+
" if int(cudnn) >= 90000 and tuple(cap) < (7, 5):",
|
|
252300
|
+
" print(json.dumps(payload))",
|
|
252301
|
+
" raise SystemExit(42)",
|
|
252302
|
+
"print(json.dumps(payload))"
|
|
252303
|
+
].join("\n");
|
|
252304
|
+
return await runProcess3(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
|
|
252305
|
+
}
|
|
252306
|
+
async function repairTorchRuntime(command, repoRoot, env2, forceLegacyCuda = false, onProgress) {
|
|
252307
|
+
const plan = torchInstallPlan(forceLegacyCuda);
|
|
252308
|
+
onProgress?.({ stage: "setup", message: `Installing PyTorch runtime: ${plan.description}` });
|
|
252309
|
+
const result = await runProcess3(command, [
|
|
252310
|
+
"-m",
|
|
252311
|
+
"pip",
|
|
252312
|
+
"install",
|
|
252313
|
+
"--progress-bar",
|
|
252314
|
+
"on",
|
|
252315
|
+
"--prefer-binary",
|
|
252316
|
+
"--force-reinstall",
|
|
252317
|
+
...plan.args
|
|
252318
|
+
], {
|
|
252319
|
+
cwd: repoRoot,
|
|
252320
|
+
timeoutMs: 18e5,
|
|
252321
|
+
env: env2,
|
|
252322
|
+
progressLabel: `Installing PyTorch runtime (${plan.description})`,
|
|
252323
|
+
onProgress
|
|
252324
|
+
});
|
|
252325
|
+
if (result.code !== 0) {
|
|
252326
|
+
throw new Error(`Failed to install compatible PyTorch runtime (${plan.description}).
|
|
252327
|
+
${trimProcessText2(result.stderr || result.stdout)}`);
|
|
252328
|
+
}
|
|
252329
|
+
}
|
|
252330
|
+
async function ensureCompatibleTorchRuntime(command, repoRoot, env2, onProgress) {
|
|
252331
|
+
const existing = await torchRuntimeCompatibilityResult(command, repoRoot, env2);
|
|
252332
|
+
if (existing.code === 0)
|
|
252333
|
+
return;
|
|
252334
|
+
if (existing.code === 42) {
|
|
252335
|
+
await repairTorchRuntime(command, repoRoot, env2, true, onProgress);
|
|
252336
|
+
} else {
|
|
252337
|
+
await repairTorchRuntime(command, repoRoot, env2, false, onProgress);
|
|
252338
|
+
}
|
|
252339
|
+
const installed = await torchRuntimeCompatibilityResult(command, repoRoot, env2);
|
|
252340
|
+
if (installed.code === 0)
|
|
252341
|
+
return;
|
|
252342
|
+
if (installed.code === 42) {
|
|
252343
|
+
await repairTorchRuntime(command, repoRoot, env2, true, onProgress);
|
|
252344
|
+
const repaired = await torchRuntimeCompatibilityResult(command, repoRoot, env2);
|
|
252345
|
+
if (repaired.code === 0)
|
|
252346
|
+
return;
|
|
252347
|
+
throw new Error(`Audio-generation PyTorch runtime remains incompatible after cu118 repair.
|
|
252348
|
+
${trimProcessText2(repaired.stderr || repaired.stdout)}`);
|
|
252349
|
+
}
|
|
252350
|
+
throw new Error(`Audio-generation PyTorch runtime could not be prepared.
|
|
252351
|
+
${trimProcessText2(installed.stderr || installed.stdout)}`);
|
|
252352
|
+
}
|
|
252154
252353
|
function formatAudioSetupFailure(backend, text) {
|
|
252155
252354
|
const body = trimProcessText2(text);
|
|
252156
252355
|
const lowered = text.toLowerCase();
|
|
@@ -252161,6 +252360,9 @@ function formatAudioSetupFailure(backend, text) {
|
|
|
252161
252360
|
if (lowered.includes("cuda") && lowered.includes("not available")) {
|
|
252162
252361
|
notes2.push("CUDA was not available to the selected Python environment; install a Torch build matching this machine's CUDA runtime or use CPU-compatible settings.");
|
|
252163
252362
|
}
|
|
252363
|
+
if (lowered.includes("cudnn version") && lowered.includes("sm < 7.5")) {
|
|
252364
|
+
notes2.push("The installed PyTorch wheel uses cuDNN 9 on a legacy CUDA GPU. Omnius now repairs audio-generation venvs by reinstalling PyTorch 2.3.1 from the cu118 index for SM < 7.5 hardware.");
|
|
252365
|
+
}
|
|
252164
252366
|
return [body, ...notes2.map((note) => `
|
|
252165
252367
|
${note}`)].filter(Boolean).join("");
|
|
252166
252368
|
}
|
|
@@ -252189,9 +252391,13 @@ ${trimProcessText2(created.stderr || created.stdout)}`);
|
|
|
252189
252391
|
}
|
|
252190
252392
|
}
|
|
252191
252393
|
if (await pythonCanImport2(command, backendImportCheck(backend), repoRoot, pythonEnv)) {
|
|
252192
|
-
|
|
252394
|
+
await ensureCompatibleTorchRuntime(command, repoRoot, pythonEnv, onProgress);
|
|
252395
|
+
if (await pythonCanImport2(command, backendImportCheck(backend), repoRoot, pythonEnv)) {
|
|
252396
|
+
return { command, env: pythonEnv };
|
|
252397
|
+
}
|
|
252193
252398
|
}
|
|
252194
252399
|
const packages = backendPackages(backend);
|
|
252400
|
+
await ensureCompatibleTorchRuntime(command, repoRoot, pythonEnv, onProgress);
|
|
252195
252401
|
onProgress?.({ stage: "setup", message: `Installing ${backend} audio-generation Python packages` });
|
|
252196
252402
|
const pipArgs = [
|
|
252197
252403
|
"-m",
|
|
@@ -252203,7 +252409,7 @@ ${trimProcessText2(created.stderr || created.stdout)}`);
|
|
|
252203
252409
|
...backend === "audiocraft" ? ["--only-binary", "av"] : [],
|
|
252204
252410
|
"-U",
|
|
252205
252411
|
"pip",
|
|
252206
|
-
...packages
|
|
252412
|
+
...withoutTorchPackages(packages)
|
|
252207
252413
|
];
|
|
252208
252414
|
const pip = await runProcess3(command, pipArgs, {
|
|
252209
252415
|
cwd: repoRoot,
|
|
@@ -252220,6 +252426,12 @@ ${formatAudioSetupFailure(backend, pip.stderr || pip.stdout)}`);
|
|
|
252220
252426
|
if (importCheck.code !== 0) {
|
|
252221
252427
|
throw new Error(`Audio-generation Python environment at ${venvDir} was created, but required ${backend} imports still fail.
|
|
252222
252428
|
${formatAudioSetupFailure(backend, importCheck.stderr || importCheck.stdout)}`);
|
|
252429
|
+
}
|
|
252430
|
+
await ensureCompatibleTorchRuntime(command, repoRoot, pythonEnv, onProgress);
|
|
252431
|
+
if (!await pythonCanImport2(command, backendImportCheck(backend), repoRoot, pythonEnv)) {
|
|
252432
|
+
const retry = await pythonImportResult(command, backendImportCheck(backend), repoRoot, pythonEnv);
|
|
252433
|
+
throw new Error(`Audio-generation Python environment at ${venvDir} lost required ${backend} imports after PyTorch repair.
|
|
252434
|
+
${formatAudioSetupFailure(backend, retry.stderr || retry.stdout)}`);
|
|
252223
252435
|
}
|
|
252224
252436
|
return { command, env: pythonEnv };
|
|
252225
252437
|
}
|
|
@@ -252911,6 +253123,10 @@ def _snapshot_model(repo_id):
|
|
|
252911
253123
|
def _device():
|
|
252912
253124
|
import torch
|
|
252913
253125
|
if torch.cuda.is_available():
|
|
253126
|
+
cap = torch.cuda.get_device_capability(0)
|
|
253127
|
+
cudnn = torch.backends.cudnn.version() or 0
|
|
253128
|
+
if int(cudnn) >= 90000 and tuple(cap) < (7, 5):
|
|
253129
|
+
raise RuntimeError(f"PyTorch cuDNN {cudnn} is incompatible with CUDA device {torch.cuda.get_device_name(0)} SM {cap[0]}.{cap[1]}; recreate the audio venv or let Omnius repair it with a cu118-compatible Torch wheel")
|
|
252914
253130
|
return "cuda"
|
|
252915
253131
|
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
252916
253132
|
return "mps"
|
|
@@ -253103,6 +253319,10 @@ def _snapshot_model(repo_id):
|
|
|
253103
253319
|
def _device():
|
|
253104
253320
|
import torch
|
|
253105
253321
|
if torch.cuda.is_available():
|
|
253322
|
+
cap = torch.cuda.get_device_capability(0)
|
|
253323
|
+
cudnn = torch.backends.cudnn.version() or 0
|
|
253324
|
+
if int(cudnn) >= 90000 and tuple(cap) < (7, 5):
|
|
253325
|
+
raise RuntimeError(f"PyTorch cuDNN {cudnn} is incompatible with CUDA device {torch.cuda.get_device_name(0)} SM {cap[0]}.{cap[1]}; recreate the audio venv or let Omnius repair it with a cu118-compatible Torch wheel")
|
|
253106
253326
|
return "cuda"
|
|
253107
253327
|
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
253108
253328
|
return "mps"
|
|
@@ -477268,7 +477488,7 @@ var require_path_browserify = __commonJS({
|
|
|
477268
477488
|
return path11.slice(start2, end);
|
|
477269
477489
|
}
|
|
477270
477490
|
},
|
|
477271
|
-
extname: function
|
|
477491
|
+
extname: function extname17(path11) {
|
|
477272
477492
|
assertPath(path11);
|
|
477273
477493
|
var startDot = -1;
|
|
477274
477494
|
var startPart = 0;
|
|
@@ -507429,22 +507649,22 @@ Saved to: ${tempFile}`,
|
|
|
507429
507649
|
});
|
|
507430
507650
|
|
|
507431
507651
|
// packages/execution/dist/tools/audio-playback.js
|
|
507432
|
-
import { execFileSync as
|
|
507652
|
+
import { execFileSync as execFileSync3, execSync as execSync29, spawn as spawn16 } from "node:child_process";
|
|
507433
507653
|
import { copyFileSync as copyFileSync2, existsSync as existsSync40, statSync as statSync18, writeFileSync as writeFileSync16, mkdirSync as mkdirSync16, readdirSync as readdirSync14 } from "node:fs";
|
|
507434
507654
|
import { basename as basename12, extname as extname10, isAbsolute, join as join58 } from "node:path";
|
|
507435
507655
|
import { homedir as homedir14, tmpdir as tmpdir11 } from "node:os";
|
|
507436
507656
|
function hasCommand3(command) {
|
|
507437
507657
|
try {
|
|
507438
507658
|
if (process.platform === "win32") {
|
|
507439
|
-
|
|
507659
|
+
execFileSync3("where", [command], { stdio: "ignore", timeout: 2e3 });
|
|
507440
507660
|
} else {
|
|
507441
|
-
|
|
507661
|
+
execFileSync3("command", ["-v", command], { stdio: "ignore", timeout: 2e3 });
|
|
507442
507662
|
}
|
|
507443
507663
|
return true;
|
|
507444
507664
|
} catch {
|
|
507445
507665
|
if (process.platform !== "win32") {
|
|
507446
507666
|
try {
|
|
507447
|
-
|
|
507667
|
+
execFileSync3("which", [command], { stdio: "ignore", timeout: 2e3 });
|
|
507448
507668
|
return true;
|
|
507449
507669
|
} catch {
|
|
507450
507670
|
return false;
|
|
@@ -507499,7 +507719,7 @@ function playSoundFile(file, opts = {}) {
|
|
|
507499
507719
|
};
|
|
507500
507720
|
}
|
|
507501
507721
|
try {
|
|
507502
|
-
|
|
507722
|
+
execFileSync3(command.command, command.args, { timeout: opts.timeoutMs ?? 3e5, stdio: "pipe" });
|
|
507503
507723
|
return { ok: true, player: command.label };
|
|
507504
507724
|
} catch (err) {
|
|
507505
507725
|
return { ok: false, error: `Playback via ${command.label} failed: ${err instanceof Error ? err.message.slice(0, 300) : String(err).slice(0, 300)}` };
|
|
@@ -507646,13 +507866,13 @@ function ensureSupertonicInstalled() {
|
|
|
507646
507866
|
const py = findPython32();
|
|
507647
507867
|
if (!py)
|
|
507648
507868
|
throw new Error("python3 is required to set up Supertonic TTS.");
|
|
507649
|
-
|
|
507869
|
+
execFileSync3(py, ["-m", "venv", join58(voiceDir(), "supertonic3-venv")], { stdio: "pipe", timeout: 18e4 });
|
|
507650
507870
|
}
|
|
507651
507871
|
try {
|
|
507652
|
-
|
|
507872
|
+
execFileSync3(venvPy, ["-c", "import supertonic"], { stdio: "pipe", timeout: 1e4 });
|
|
507653
507873
|
} catch {
|
|
507654
|
-
|
|
507655
|
-
|
|
507874
|
+
execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
|
|
507875
|
+
execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "supertonic"], { stdio: "pipe", timeout: 6e5 });
|
|
507656
507876
|
}
|
|
507657
507877
|
mkdirSync16(voiceDir(), { recursive: true });
|
|
507658
507878
|
writeFileSync16(supertonicInferScript(), SUPERTONIC_INFER_PY, "utf-8");
|
|
@@ -507667,19 +507887,19 @@ function ensureMlxInstalled() {
|
|
|
507667
507887
|
const py = findPython32();
|
|
507668
507888
|
if (!py)
|
|
507669
507889
|
throw new Error("python3 is required to set up MLX Audio.");
|
|
507670
|
-
|
|
507890
|
+
execFileSync3(py, ["-m", "venv", join58(voiceDir(), "mlx-venv")], { stdio: "pipe", timeout: 18e4 });
|
|
507671
507891
|
}
|
|
507672
507892
|
try {
|
|
507673
|
-
|
|
507893
|
+
execFileSync3(venvPy, ["-c", "import mlx_audio"], { stdio: "pipe", timeout: 1e4 });
|
|
507674
507894
|
} catch {
|
|
507675
|
-
|
|
507676
|
-
|
|
507895
|
+
execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
|
|
507896
|
+
execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "mlx-audio"], { stdio: "pipe", timeout: 6e5 });
|
|
507677
507897
|
}
|
|
507678
507898
|
return venvPy;
|
|
507679
507899
|
}
|
|
507680
507900
|
function pythonCanImportLuxTts(venvPy) {
|
|
507681
507901
|
try {
|
|
507682
|
-
|
|
507902
|
+
execFileSync3(venvPy, [
|
|
507683
507903
|
"-c",
|
|
507684
507904
|
"import sys, os; sys.path.insert(0, os.environ['LUXTTS_REPO_PATH']); from zipvoice.luxvoice import LuxTTS; print('ok')"
|
|
507685
507905
|
], {
|
|
@@ -507693,7 +507913,7 @@ function pythonCanImportLuxTts(venvPy) {
|
|
|
507693
507913
|
}
|
|
507694
507914
|
}
|
|
507695
507915
|
function pipInstall(venvPy, packages, timeout2 = 9e5) {
|
|
507696
|
-
|
|
507916
|
+
execFileSync3(venvPy, ["-m", "pip", "install", "--prefer-binary", ...packages], {
|
|
507697
507917
|
stdio: "pipe",
|
|
507698
507918
|
timeout: timeout2,
|
|
507699
507919
|
env: process.env
|
|
@@ -507711,9 +507931,9 @@ function ensureLuxttsInstalled() {
|
|
|
507711
507931
|
if (!py)
|
|
507712
507932
|
throw new Error("python3 is required to set up LuxTTS voice cloning.");
|
|
507713
507933
|
if (!existsSync40(venvPy)) {
|
|
507714
|
-
|
|
507934
|
+
execFileSync3(py, ["-m", "venv", luxttsVenvDir()], { stdio: "pipe", timeout: 18e4 });
|
|
507715
507935
|
}
|
|
507716
|
-
|
|
507936
|
+
execFileSync3(venvPy, ["-m", "pip", "install", "--upgrade", "pip", "wheel", "setuptools<81"], {
|
|
507717
507937
|
stdio: "pipe",
|
|
507718
507938
|
timeout: 3e5
|
|
507719
507939
|
});
|
|
@@ -507721,7 +507941,7 @@ function ensureLuxttsInstalled() {
|
|
|
507721
507941
|
if (!existsSync40(join58(repoDir, "zipvoice", "luxvoice.py"))) {
|
|
507722
507942
|
if (!hasCommand3("git"))
|
|
507723
507943
|
throw new Error("git is required to set up LuxTTS voice cloning.");
|
|
507724
|
-
|
|
507944
|
+
execFileSync3("git", ["clone", "--depth", "1", "https://github.com/ysharma3501/LuxTTS.git", repoDir], {
|
|
507725
507945
|
stdio: "pipe",
|
|
507726
507946
|
timeout: 3e5
|
|
507727
507947
|
});
|
|
@@ -507761,10 +507981,10 @@ function ensurePiperInstalled() {
|
|
|
507761
507981
|
if (!py)
|
|
507762
507982
|
throw new Error("python3 is required to set up Piper TTS.");
|
|
507763
507983
|
mkdirSync16(voiceDir(), { recursive: true });
|
|
507764
|
-
|
|
507984
|
+
execFileSync3(py, ["-m", "venv", piperVenvDir()], { stdio: "pipe", timeout: 18e4 });
|
|
507765
507985
|
const venvPy = process.platform === "win32" ? join58(piperVenvDir(), "Scripts", "python.exe") : join58(piperVenvDir(), "bin", "python3");
|
|
507766
|
-
|
|
507767
|
-
|
|
507986
|
+
execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
|
|
507987
|
+
execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "piper-tts"], { stdio: "pipe", timeout: 6e5 });
|
|
507768
507988
|
}
|
|
507769
507989
|
if (!existsSync40(bin)) {
|
|
507770
507990
|
throw new Error("Piper TTS installed but the piper executable was not found in the managed venv.");
|
|
@@ -508294,7 +508514,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
508294
508514
|
"d=(np.clip(wav.cpu().numpy().squeeze(), -1, 1)*32767).astype(np.int16)",
|
|
508295
508515
|
"f=wave.open(args['output'], 'wb'); f.setnchannels(1); f.setsampwidth(2); f.setframerate(48000); f.writeframes(d.tobytes()); f.close()"
|
|
508296
508516
|
].join("; ");
|
|
508297
|
-
|
|
508517
|
+
execFileSync3(venvPy, ["-c", pyScript, JSON.stringify({ text, output: outputPath2, clone_ref: cloneRef, repo: repoDir, speed })], {
|
|
508298
508518
|
stdio: "pipe",
|
|
508299
508519
|
timeout: 12e4,
|
|
508300
508520
|
env: { ...process.env, LUXTTS_REPO_PATH: repoDir }
|
|
@@ -508307,7 +508527,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
508307
508527
|
const lang = typeof args["lang"] === "string" ? args["lang"] : "en";
|
|
508308
508528
|
const speed = numberArg3(args["speed"], 1.05);
|
|
508309
508529
|
const totalStep = Math.round(numberArg3(args["total_step"], 8));
|
|
508310
|
-
const stdout =
|
|
508530
|
+
const stdout = execFileSync3(venvPy, [supertonicInferScript()], {
|
|
508311
508531
|
input: JSON.stringify({ text, output_path: outputPath2, voice_name: voice, lang, speed, total_step: totalStep }),
|
|
508312
508532
|
encoding: "utf8",
|
|
508313
508533
|
stdio: ["pipe", "pipe", "pipe"],
|
|
@@ -508330,7 +508550,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
508330
508550
|
"args=json.loads(sys.argv[1])",
|
|
508331
508551
|
"tts_gen.main(['--model', args['model'], '--text', args['text'], '--voice', args['voice'], '--lang_code', args['lang'], '--audio_path', args['output']])"
|
|
508332
508552
|
].join("; ");
|
|
508333
|
-
|
|
508553
|
+
execFileSync3(py, ["-c", pyScript, JSON.stringify({ text, model, voice, lang, output: outputPath2 })], {
|
|
508334
508554
|
stdio: "pipe",
|
|
508335
508555
|
timeout: 18e4,
|
|
508336
508556
|
cwd: tmpdir11()
|
|
@@ -508351,7 +508571,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
508351
508571
|
} else {
|
|
508352
508572
|
throw new Error(`${requireModel ? "Raw ONNX" : "Piper"} TTS requires model=<path.onnx> or voice=<path.onnx>.`);
|
|
508353
508573
|
}
|
|
508354
|
-
|
|
508574
|
+
execFileSync3(piper, argv, { input: text, stdio: ["pipe", "pipe", "pipe"], timeout: 12e4 });
|
|
508355
508575
|
return summary;
|
|
508356
508576
|
}
|
|
508357
508577
|
synthesizeEspeak(text, outputPath2, args) {
|
|
@@ -508359,7 +508579,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
508359
508579
|
throw new Error("Local fallback TTS command not found.");
|
|
508360
508580
|
const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
|
|
508361
508581
|
const speed = Math.round(numberArg3(args["speed"], 160));
|
|
508362
|
-
|
|
508582
|
+
execFileSync3("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
|
|
508363
508583
|
stdio: "pipe",
|
|
508364
508584
|
timeout: 6e4
|
|
508365
508585
|
});
|
|
@@ -575505,7 +575725,7 @@ __export(image_ascii_preview_exports, {
|
|
|
575505
575725
|
extractSavedImagePath: () => extractSavedImagePath,
|
|
575506
575726
|
formatImageAsciiContext: () => formatImageAsciiContext
|
|
575507
575727
|
});
|
|
575508
|
-
import { execFileSync as
|
|
575728
|
+
import { execFileSync as execFileSync4 } from "node:child_process";
|
|
575509
575729
|
import { createRequire as createRequire5 } from "node:module";
|
|
575510
575730
|
import { existsSync as existsSync94, readFileSync as readFileSync75, statSync as statSync32 } from "node:fs";
|
|
575511
575731
|
import { resolve as resolve37 } from "node:path";
|
|
@@ -575642,7 +575862,7 @@ function convertWithFfmpeg(imagePath, width, height, timeoutMs) {
|
|
|
575642
575862
|
`scale=${width}:${height}`,
|
|
575643
575863
|
"format=gray"
|
|
575644
575864
|
].join(",");
|
|
575645
|
-
const raw =
|
|
575865
|
+
const raw = execFileSync4(
|
|
575646
575866
|
"ffmpeg",
|
|
575647
575867
|
[
|
|
575648
575868
|
"-hide_banner",
|
|
@@ -596827,6 +597047,17 @@ var init_tool_policy = __esm({
|
|
|
596827
597047
|
"todo_write",
|
|
596828
597048
|
"web_search",
|
|
596829
597049
|
"web_fetch",
|
|
597050
|
+
"image_read",
|
|
597051
|
+
"ocr",
|
|
597052
|
+
"ocr_image_advanced",
|
|
597053
|
+
"ocr_pdf",
|
|
597054
|
+
"pdf_to_text",
|
|
597055
|
+
"vision",
|
|
597056
|
+
"transcribe_file",
|
|
597057
|
+
"video_understand",
|
|
597058
|
+
"audio_analyze",
|
|
597059
|
+
"explore_tools",
|
|
597060
|
+
"telegram_media_recent",
|
|
596830
597061
|
"generate_image",
|
|
596831
597062
|
"generate_audio",
|
|
596832
597063
|
"generate_tts",
|
|
@@ -596843,6 +597074,17 @@ var init_tool_policy = __esm({
|
|
|
596843
597074
|
"web_search",
|
|
596844
597075
|
"web_fetch",
|
|
596845
597076
|
"web_crawl",
|
|
597077
|
+
"image_read",
|
|
597078
|
+
"ocr",
|
|
597079
|
+
"ocr_image_advanced",
|
|
597080
|
+
"ocr_pdf",
|
|
597081
|
+
"pdf_to_text",
|
|
597082
|
+
"vision",
|
|
597083
|
+
"transcribe_file",
|
|
597084
|
+
"video_understand",
|
|
597085
|
+
"audio_analyze",
|
|
597086
|
+
"explore_tools",
|
|
597087
|
+
"telegram_media_recent",
|
|
596846
597088
|
"generate_image",
|
|
596847
597089
|
"generate_audio",
|
|
596848
597090
|
"generate_tts",
|
|
@@ -596934,6 +597176,7 @@ function buildTelegramCreativeTools(repoRoot, chatId, backendUrl, imageDefaults
|
|
|
596934
597176
|
ensureManifest(root);
|
|
596935
597177
|
return [
|
|
596936
597178
|
scopedTool(new FileWriteTool(root), root, "create"),
|
|
597179
|
+
scopedTool(new FileEditTool(root), root, "edit"),
|
|
596937
597180
|
scopedTool(new StructuredFileTool(root), root, "create"),
|
|
596938
597181
|
scopedTool(new ImageGenerateTool(root, backendUrl, imageDefaults), root, "generate"),
|
|
596939
597182
|
scopedTool(new AudioGenerateTool(root, audioDefaults), root, "generate"),
|
|
@@ -597000,6 +597243,7 @@ function scopedTool(base3, root, mode) {
|
|
|
597000
597243
|
}
|
|
597001
597244
|
}
|
|
597002
597245
|
const pathKey = PATH_KEYS.find((key) => typeof next[key] === "string" && String(next[key]).trim());
|
|
597246
|
+
let restoredEditPath = null;
|
|
597003
597247
|
if (pathKey) {
|
|
597004
597248
|
const guarded = guardPath(rootAbs, String(next[pathKey]));
|
|
597005
597249
|
if (!guarded.ok) return denied(guarded.error);
|
|
@@ -597010,6 +597254,14 @@ function scopedTool(base3, root, mode) {
|
|
|
597010
597254
|
if (mode === "edit" && !manifestHas(rootAbs, guarded.path.rel)) {
|
|
597011
597255
|
return denied(`Public Telegram creative tools can only edit files created in this chat workspace: ${guarded.path.rel}`);
|
|
597012
597256
|
}
|
|
597257
|
+
if (mode === "edit" && !existsSync104(guarded.path.abs)) {
|
|
597258
|
+
const materialized = materializeTelegramCreativeArtifactForSend(rootAbs, guarded.path.rel);
|
|
597259
|
+
if (!materialized.ok) return denied(materialized.error);
|
|
597260
|
+
mkdirSync59(dirname33(guarded.path.abs), { recursive: true });
|
|
597261
|
+
writeFileSync55(guarded.path.abs, readFileSync85(materialized.path));
|
|
597262
|
+
materialized.cleanup?.();
|
|
597263
|
+
restoredEditPath = guarded.path.abs;
|
|
597264
|
+
}
|
|
597013
597265
|
if (mode === "create" && existsSync104(guarded.path.abs) && !manifestHas(rootAbs, guarded.path.rel)) {
|
|
597014
597266
|
return denied(`Refusing to overwrite a file that is not owned by this chat workspace manifest: ${guarded.path.rel}`);
|
|
597015
597267
|
}
|
|
@@ -597018,9 +597270,14 @@ function scopedTool(base3, root, mode) {
|
|
|
597018
597270
|
return denied(`${base3.name} requires a path inside the public creative workspace.`);
|
|
597019
597271
|
}
|
|
597020
597272
|
const result = await base3.execute(next);
|
|
597021
|
-
if (result.success && mode === "create" && pathKey) {
|
|
597273
|
+
if (result.success && (mode === "create" || mode === "edit") && pathKey) {
|
|
597022
597274
|
const guarded = guardPath(rootAbs, String(args[pathKey]));
|
|
597023
597275
|
if (guarded.ok) rememberCreated(rootAbs, guarded.path.abs);
|
|
597276
|
+
} else if (restoredEditPath) {
|
|
597277
|
+
try {
|
|
597278
|
+
unlinkSync19(restoredEditPath);
|
|
597279
|
+
} catch {
|
|
597280
|
+
}
|
|
597024
597281
|
}
|
|
597025
597282
|
if (result.success) {
|
|
597026
597283
|
for (const path11 of collectGeneratedArtifactPathsFromText(result.output, rootAbs)) {
|
|
@@ -597417,12 +597674,12 @@ __export(vision_ingress_exports, {
|
|
|
597417
597674
|
queryVisionModel: () => queryVisionModel,
|
|
597418
597675
|
runVisionIngress: () => runVisionIngress
|
|
597419
597676
|
});
|
|
597420
|
-
import { execFileSync as
|
|
597677
|
+
import { execFileSync as execFileSync5 } from "node:child_process";
|
|
597421
597678
|
import { existsSync as existsSync105, readFileSync as readFileSync86, unlinkSync as unlinkSync20 } from "node:fs";
|
|
597422
597679
|
import { join as join120 } from "node:path";
|
|
597423
597680
|
function isTesseractAvailable() {
|
|
597424
597681
|
try {
|
|
597425
|
-
|
|
597682
|
+
execFileSync5("tesseract", ["--version"], { stdio: "ignore", timeout: 3e3 });
|
|
597426
597683
|
return true;
|
|
597427
597684
|
} catch {
|
|
597428
597685
|
return false;
|
|
@@ -597463,7 +597720,7 @@ function advancedOcr(imagePath) {
|
|
|
597463
597720
|
for (const psm of psmModes) {
|
|
597464
597721
|
const outFile = `${tmpBase}_psm${psm}`;
|
|
597465
597722
|
try {
|
|
597466
|
-
|
|
597723
|
+
execFileSync5("tesseract", [
|
|
597467
597724
|
imagePath,
|
|
597468
597725
|
outFile,
|
|
597469
597726
|
"--psm",
|
|
@@ -597562,7 +597819,7 @@ var init_vision_ingress = __esm({
|
|
|
597562
597819
|
|
|
597563
597820
|
// packages/cli/src/tui/telegram-bridge.ts
|
|
597564
597821
|
import { mkdirSync as mkdirSync60, existsSync as existsSync106, unlinkSync as unlinkSync21, readdirSync as readdirSync36, statSync as statSync36, readFileSync as readFileSync87, writeFileSync as writeFileSync57 } from "node:fs";
|
|
597565
|
-
import { join as join121, resolve as resolve39, basename as basename23, relative as relative13, isAbsolute as isAbsolute7 } from "node:path";
|
|
597822
|
+
import { join as join121, resolve as resolve39, basename as basename23, relative as relative13, isAbsolute as isAbsolute7, extname as extname15 } from "node:path";
|
|
597566
597823
|
import { writeFile as writeFileAsync } from "node:fs/promises";
|
|
597567
597824
|
import { createHash as createHash19, randomInt } from "node:crypto";
|
|
597568
597825
|
function parseTelegramInteractionDecision(text, forcedRoute, options2 = {}) {
|
|
@@ -597760,6 +598017,19 @@ function summarizeTelegramMessageAttachments(msg) {
|
|
|
597760
598017
|
parts.push(`caption: ${truncateTelegramContextLine(msg.media.caption, 180)}`);
|
|
597761
598018
|
}
|
|
597762
598019
|
}
|
|
598020
|
+
if (msg.replyToMedia) {
|
|
598021
|
+
const details = [
|
|
598022
|
+
msg.replyToMedia.type,
|
|
598023
|
+
msg.replyToMedia.mimeType,
|
|
598024
|
+
msg.replyToMedia.fileName,
|
|
598025
|
+
msg.replyToMedia.duration ? `${msg.replyToMedia.duration}s` : "",
|
|
598026
|
+
msg.replyToMedia.fileSize ? `${msg.replyToMedia.fileSize} bytes` : ""
|
|
598027
|
+
].filter(Boolean).join(", ");
|
|
598028
|
+
parts.push(`replied-to media: ${details}`);
|
|
598029
|
+
if (msg.replyToMedia.caption) {
|
|
598030
|
+
parts.push(`replied-to caption: ${truncateTelegramContextLine(msg.replyToMedia.caption, 180)}`);
|
|
598031
|
+
}
|
|
598032
|
+
}
|
|
597763
598033
|
if (msg.poll) {
|
|
597764
598034
|
parts.push(`poll: ${truncateTelegramContextLine(msg.poll.question, 180)}`);
|
|
597765
598035
|
}
|
|
@@ -598133,6 +598403,25 @@ function telegramImageMime(media) {
|
|
|
598133
598403
|
if (ext === ".tif" || ext === ".tiff") return "image/tiff";
|
|
598134
598404
|
return "image/jpeg";
|
|
598135
598405
|
}
|
|
598406
|
+
function telegramCachedMediaIsImage(entry) {
|
|
598407
|
+
if (entry.mediaType === "photo") return true;
|
|
598408
|
+
if (entry.mimeType?.toLowerCase().startsWith("image/")) return true;
|
|
598409
|
+
return TELEGRAM_IMAGE_EXTENSIONS.has(extname15(entry.localPath).toLowerCase());
|
|
598410
|
+
}
|
|
598411
|
+
function telegramCachedMediaIsPdf(entry) {
|
|
598412
|
+
if (entry.mimeType?.toLowerCase() === "application/pdf") return true;
|
|
598413
|
+
return extname15(entry.localPath).toLowerCase() === ".pdf";
|
|
598414
|
+
}
|
|
598415
|
+
function telegramCachedMediaIsAudio(entry) {
|
|
598416
|
+
if (entry.mediaType === "audio" || entry.mediaType === "voice") return true;
|
|
598417
|
+
if (entry.mimeType?.toLowerCase().startsWith("audio/")) return true;
|
|
598418
|
+
return [".wav", ".mp3", ".flac", ".aac", ".m4a", ".ogg", ".opus"].includes(extname15(entry.localPath).toLowerCase());
|
|
598419
|
+
}
|
|
598420
|
+
function telegramCachedMediaIsVideo(entry) {
|
|
598421
|
+
if (entry.mediaType === "video" || entry.mediaType === "video_note" || entry.mediaType === "live_photo") return true;
|
|
598422
|
+
if (entry.mimeType?.toLowerCase().startsWith("video/")) return true;
|
|
598423
|
+
return [".mp4", ".mkv", ".avi", ".mov", ".webm"].includes(extname15(entry.localPath).toLowerCase());
|
|
598424
|
+
}
|
|
598136
598425
|
function isPathInside(root, path11) {
|
|
598137
598426
|
const rel = relative13(resolve39(root), resolve39(path11));
|
|
598138
598427
|
return rel === "" || Boolean(rel) && !rel.startsWith("..") && !isAbsolute7(rel);
|
|
@@ -598166,6 +598455,10 @@ function normalizeTelegramUpdate(update2) {
|
|
|
598166
598455
|
const username = message2.from?.username ?? message2.sender_chat?.username ?? "";
|
|
598167
598456
|
const chatType = message2.chat?.type ?? "private";
|
|
598168
598457
|
const media = normalizeTelegramMedia(message2);
|
|
598458
|
+
const replyTo = message2.reply_to_message && typeof message2.reply_to_message === "object" ? message2.reply_to_message : void 0;
|
|
598459
|
+
const replyToMedia = replyTo ? normalizeTelegramMedia(replyTo) : void 0;
|
|
598460
|
+
const replyToPoll = replyTo ? normalizeTelegramPoll(replyTo.poll) : void 0;
|
|
598461
|
+
const replyToText = replyTo ? replyTo.text || replyTo.caption || (replyToPoll ? formatTelegramPollSummary(replyToPoll) : "") : "";
|
|
598169
598462
|
const poll = normalizeTelegramPoll(message2.poll);
|
|
598170
598463
|
const livePhoto = normalizeTelegramLivePhoto(message2.live_photo);
|
|
598171
598464
|
const text = message2.text || message2.caption || (poll ? formatTelegramPollSummary(poll) : "");
|
|
@@ -598180,6 +598473,8 @@ function normalizeTelegramUpdate(update2) {
|
|
|
598180
598473
|
chatType,
|
|
598181
598474
|
chatTitle: message2.chat?.title,
|
|
598182
598475
|
media,
|
|
598476
|
+
replyToMedia,
|
|
598477
|
+
replyToText: replyToText || void 0,
|
|
598183
598478
|
poll,
|
|
598184
598479
|
livePhoto,
|
|
598185
598480
|
guestQueryId: typeof message2.guest_query_id === "string" ? message2.guest_query_id : void 0,
|
|
@@ -598188,9 +598483,9 @@ function normalizeTelegramUpdate(update2) {
|
|
|
598188
598483
|
isGuestMessage: sourceUpdateType === "guest_message",
|
|
598189
598484
|
isDirectMessages: Boolean(message2.chat?.is_direct_messages),
|
|
598190
598485
|
parentChatId: message2.chat?.parent_chat?.id ?? message2.direct_messages_topic?.parent_topic?.id,
|
|
598191
|
-
replyToMessageId:
|
|
598192
|
-
replyToUsername:
|
|
598193
|
-
replyToBot: Boolean(
|
|
598486
|
+
replyToMessageId: replyTo?.message_id,
|
|
598487
|
+
replyToUsername: replyTo?.from?.username ?? replyTo?.sender_chat?.username,
|
|
598488
|
+
replyToBot: Boolean(replyTo?.from?.is_bot),
|
|
598194
598489
|
mentionedUsernames: extractTelegramMentionedUsernames(message2, text),
|
|
598195
598490
|
sourceUpdateType
|
|
598196
598491
|
};
|
|
@@ -598337,7 +598632,7 @@ function renderTelegramSubAgentError(username, error) {
|
|
|
598337
598632
|
process.stdout.write(` ${c3.dim("⎿")} ${c3.red("✘")} @${username}: ${c3.dim(preview)}
|
|
598338
598633
|
`);
|
|
598339
598634
|
}
|
|
598340
|
-
var TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_PUBLIC_HELP_COMMANDS, MEDIA_CACHE_TTL_MS, TelegramBridge;
|
|
598635
|
+
var TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TelegramBridge;
|
|
598341
598636
|
var init_telegram_bridge = __esm({
|
|
598342
598637
|
"packages/cli/src/tui/telegram-bridge.ts"() {
|
|
598343
598638
|
"use strict";
|
|
@@ -598533,6 +598828,7 @@ Telegram response contract:
|
|
|
598533
598828
|
"your"
|
|
598534
598829
|
]);
|
|
598535
598830
|
TELEGRAM_PUBLIC_HELP_COMMANDS = /* @__PURE__ */ new Set(["help", "start", "auth", "call"]);
|
|
598831
|
+
TELEGRAM_IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".svg"]);
|
|
598536
598832
|
MEDIA_CACHE_TTL_MS = 30 * 60 * 1e3;
|
|
598537
598833
|
TelegramBridge = class {
|
|
598538
598834
|
constructor(botToken, onMessage, agentConfig, repoRoot, toolPolicyConfig) {
|
|
@@ -598944,6 +599240,80 @@ Telegram response contract:
|
|
|
598944
599240
|
}
|
|
598945
599241
|
}
|
|
598946
599242
|
}
|
|
599243
|
+
updateLastTelegramUserMessageText(msg, text) {
|
|
599244
|
+
const sessionKey = this.sessionKeyForMessage(msg);
|
|
599245
|
+
const history = this.chatHistory.get(sessionKey);
|
|
599246
|
+
if (!history || !text.trim()) return;
|
|
599247
|
+
for (let i2 = history.length - 1; i2 >= 0; i2--) {
|
|
599248
|
+
const entry = history[i2];
|
|
599249
|
+
if (entry.role !== "user") continue;
|
|
599250
|
+
if (entry.messageId === msg.messageId || !entry.messageId && entry.text === msg.text) {
|
|
599251
|
+
entry.text = text.trim();
|
|
599252
|
+
entry.mediaSummary = summarizeTelegramMessageAttachments(msg) || entry.mediaSummary;
|
|
599253
|
+
this.updateTelegramMemoryCards(sessionKey, entry);
|
|
599254
|
+
this.saveTelegramConversationState(sessionKey);
|
|
599255
|
+
return;
|
|
599256
|
+
}
|
|
599257
|
+
}
|
|
599258
|
+
}
|
|
599259
|
+
recentTelegramMediaEntries(chatId, limit = 12) {
|
|
599260
|
+
const now = Date.now();
|
|
599261
|
+
return [...this.mediaCache.values()].filter((entry) => {
|
|
599262
|
+
if (chatId !== void 0 && String(entry.chatId) !== String(chatId)) return false;
|
|
599263
|
+
return now - entry.cachedAt <= MEDIA_CACHE_TTL_MS;
|
|
599264
|
+
}).sort((a2, b) => b.cachedAt - a2.cachedAt).slice(0, limit);
|
|
599265
|
+
}
|
|
599266
|
+
telegramMediaEntryMatchesKind(entry, kind) {
|
|
599267
|
+
if (kind === "image") return telegramCachedMediaIsImage(entry);
|
|
599268
|
+
if (kind === "pdf") return telegramCachedMediaIsPdf(entry);
|
|
599269
|
+
if (kind === "audio") return telegramCachedMediaIsAudio(entry);
|
|
599270
|
+
if (kind === "video") return telegramCachedMediaIsVideo(entry);
|
|
599271
|
+
if (kind === "transcribable") {
|
|
599272
|
+
return telegramCachedMediaIsAudio(entry) || telegramCachedMediaIsVideo(entry);
|
|
599273
|
+
}
|
|
599274
|
+
return true;
|
|
599275
|
+
}
|
|
599276
|
+
resolveTelegramScopedMediaPath(rawValue, chatId, currentMsg, kind) {
|
|
599277
|
+
const raw = String(rawValue ?? "").trim();
|
|
599278
|
+
const repoRoot = this.repoRoot || ".";
|
|
599279
|
+
const creativeRoot = telegramCreativeWorkspaceRoot(repoRoot, chatId);
|
|
599280
|
+
const mediaEntries = this.recentTelegramMediaEntries(chatId, 60).filter((entry) => this.telegramMediaEntryMatchesKind(entry, kind));
|
|
599281
|
+
const aliases = /* @__PURE__ */ new Set(["", "latest", "last", "current", "this", "that", "it", "reply", "replied", "replied-to", "replied_to"]);
|
|
599282
|
+
if (aliases.has(raw.toLowerCase())) {
|
|
599283
|
+
const replied = currentMsg?.replyToMessageId ? mediaEntries.find((entry2) => entry2.messageId === currentMsg.replyToMessageId) : void 0;
|
|
599284
|
+
const entry = replied ?? mediaEntries[0];
|
|
599285
|
+
if (!entry) {
|
|
599286
|
+
return { ok: false, error: `No recent ${kind} media is available in this Telegram chat scope.` };
|
|
599287
|
+
}
|
|
599288
|
+
return { ok: true, path: entry.localPath };
|
|
599289
|
+
}
|
|
599290
|
+
const matchingEntry = mediaEntries.find((entry) => {
|
|
599291
|
+
if (resolve39(entry.localPath) === resolve39(raw)) return true;
|
|
599292
|
+
if (basename23(entry.localPath) === raw) return true;
|
|
599293
|
+
if (entry.fileUniqueId === raw || entry.fileId === raw) return true;
|
|
599294
|
+
if (entry.messageId && String(entry.messageId) === raw) return true;
|
|
599295
|
+
return false;
|
|
599296
|
+
});
|
|
599297
|
+
if (matchingEntry) return { ok: true, path: matchingEntry.localPath };
|
|
599298
|
+
const creativeCandidate = isAbsolute7(raw) ? resolve39(raw) : resolve39(creativeRoot, raw);
|
|
599299
|
+
if (isPathInside(creativeRoot, creativeCandidate) && existsSync106(creativeCandidate)) {
|
|
599300
|
+
return { ok: true, path: creativeCandidate };
|
|
599301
|
+
}
|
|
599302
|
+
return {
|
|
599303
|
+
ok: false,
|
|
599304
|
+
error: `Path is outside this Telegram chat's media/workspace scope or does not exist: ${raw || "(empty)"}`
|
|
599305
|
+
};
|
|
599306
|
+
}
|
|
599307
|
+
resolveTelegramScopedOutputPath(rawValue, chatId, fallbackName) {
|
|
599308
|
+
const repoRoot = this.repoRoot || ".";
|
|
599309
|
+
const creativeRoot = telegramCreativeWorkspaceRoot(repoRoot, chatId);
|
|
599310
|
+
const raw = String(rawValue || fallbackName).trim() || fallbackName;
|
|
599311
|
+
const outputPath2 = isAbsolute7(raw) ? resolve39(raw) : resolve39(creativeRoot, raw);
|
|
599312
|
+
if (!isPathInside(creativeRoot, outputPath2)) {
|
|
599313
|
+
return { ok: false, error: `Output path must stay inside this Telegram chat's creative workspace: ${raw}` };
|
|
599314
|
+
}
|
|
599315
|
+
return { ok: true, path: outputPath2 };
|
|
599316
|
+
}
|
|
598947
599317
|
updateTelegramParticipantProfile(sessionKey, msg, text) {
|
|
598948
599318
|
const participantKey = String(msg.fromUserId || msg.username || msg.firstName || "unknown");
|
|
598949
599319
|
const participants = this.chatParticipants.get(sessionKey) ?? /* @__PURE__ */ new Map();
|
|
@@ -599118,6 +599488,22 @@ ${notes2}`;
|
|
|
599118
599488
|
sections.push(`### Zettelkasten Memory Recall
|
|
599119
599489
|
${cardLines.join("\n")}`);
|
|
599120
599490
|
}
|
|
599491
|
+
const recentMedia = this.recentTelegramMediaEntries(msg.chatId, 10);
|
|
599492
|
+
if (recentMedia.length > 0) {
|
|
599493
|
+
const mediaLines = recentMedia.map((entry) => {
|
|
599494
|
+
const kind = telegramCachedMediaIsImage(entry) ? "image" : entry.mediaType;
|
|
599495
|
+
const replyMark = msg.replyToMessageId && entry.messageId === msg.replyToMessageId ? " replied-to" : "";
|
|
599496
|
+
const caption = entry.caption ? ` caption:${truncateTelegramContextLine(entry.caption, 120)}` : "";
|
|
599497
|
+
const extracted = entry.extractedContent ? `
|
|
599498
|
+
${truncateTelegramContextLine(entry.extractedContent.replace(/\s+/g, " "), 220)}` : "";
|
|
599499
|
+
return `- message_id ${entry.messageId}${replyMark}: ${kind}; path ${entry.localPath}; file ${basename23(entry.localPath)}${caption}${extracted}`;
|
|
599500
|
+
});
|
|
599501
|
+
sections.push([
|
|
599502
|
+
"### Recent Chat Media",
|
|
599503
|
+
"Use these paths only as tool inputs when the user asks about media in this chat. Do not quote local paths in the visible Telegram reply.",
|
|
599504
|
+
mediaLines.join("\n")
|
|
599505
|
+
].join("\n"));
|
|
599506
|
+
}
|
|
599121
599507
|
if (olderCount > 0) {
|
|
599122
599508
|
const older = history.slice(0, olderCount);
|
|
599123
599509
|
const bySpeaker = /* @__PURE__ */ new Map();
|
|
@@ -599814,8 +600200,8 @@ Join: ${newUrl}`);
|
|
|
599814
600200
|
}
|
|
599815
600201
|
}
|
|
599816
600202
|
let steeringText = msg.text;
|
|
599817
|
-
if (msg.media) {
|
|
599818
|
-
const mediaContext = await this.
|
|
600203
|
+
if (msg.media || msg.replyToMedia) {
|
|
600204
|
+
const mediaContext = await this.processMediaContextForMessage(msg);
|
|
599819
600205
|
if (mediaContext) {
|
|
599820
600206
|
steeringText += `
|
|
599821
600207
|
|
|
@@ -599889,8 +600275,8 @@ ${mediaContext}`;
|
|
|
599889
600275
|
this.tuiWrite(() => renderTelegramSubAgentStart(msg.username, msg.text, isAdminDM));
|
|
599890
600276
|
try {
|
|
599891
600277
|
let mediaContext = "";
|
|
599892
|
-
if (msg.media) {
|
|
599893
|
-
mediaContext = await this.
|
|
600278
|
+
if (msg.media || msg.replyToMedia) {
|
|
600279
|
+
mediaContext = await this.processMediaContextForMessage(msg);
|
|
599894
600280
|
}
|
|
599895
600281
|
const result = await this.runSubAgent(msg, subAgent, mediaContext);
|
|
599896
600282
|
if (subAgent.typingInterval) {
|
|
@@ -599992,8 +600378,8 @@ ${mediaContext}`;
|
|
|
599992
600378
|
this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `admin chat with full context/tools (${this.interactionMode})`));
|
|
599993
600379
|
try {
|
|
599994
600380
|
let mediaContext = "";
|
|
599995
|
-
if (msg.media) {
|
|
599996
|
-
mediaContext = await this.
|
|
600381
|
+
if (msg.media || msg.replyToMedia) {
|
|
600382
|
+
mediaContext = await this.processMediaContextForMessage(msg);
|
|
599997
600383
|
}
|
|
599998
600384
|
const result = await this.runSubAgent(msg, subAgent, mediaContext, "chat");
|
|
599999
600385
|
if (subAgent.typingInterval) {
|
|
@@ -600076,7 +600462,7 @@ ${mediaContext}`;
|
|
|
600076
600462
|
}
|
|
600077
600463
|
this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `live inference: chat reply (${this.interactionMode})`));
|
|
600078
600464
|
try {
|
|
600079
|
-
const mediaContext = msg.media || msg.
|
|
600465
|
+
const mediaContext = msg.media || msg.replyToMedia || msg.livePhoto ? await this.processMediaContextForMessage(msg) : "";
|
|
600080
600466
|
const finalText = await this.runTelegramChatCompletion(
|
|
600081
600467
|
msg,
|
|
600082
600468
|
toolContext,
|
|
@@ -600569,6 +600955,128 @@ ${lines.join("\n\n")}` };
|
|
|
600569
600955
|
}
|
|
600570
600956
|
};
|
|
600571
600957
|
}
|
|
600958
|
+
if (tool.name === "image_read") {
|
|
600959
|
+
return {
|
|
600960
|
+
...tool,
|
|
600961
|
+
description: "Read only images from this Telegram chat's media cache or creative workspace. Use path='reply' for the replied-to image or path='latest' for the most recent chat image.",
|
|
600962
|
+
execute: async (args) => {
|
|
600963
|
+
const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "image");
|
|
600964
|
+
if (!resolved.ok) return { success: false, output: "", error: resolved.error };
|
|
600965
|
+
return tool.execute({ ...args, path: resolved.path });
|
|
600966
|
+
}
|
|
600967
|
+
};
|
|
600968
|
+
}
|
|
600969
|
+
if (tool.name === "ocr") {
|
|
600970
|
+
return {
|
|
600971
|
+
...tool,
|
|
600972
|
+
description: "Extract text only from images in this Telegram chat's media cache or creative workspace. Use path='reply' or path='latest' for chat media references.",
|
|
600973
|
+
execute: async (args) => {
|
|
600974
|
+
const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "image");
|
|
600975
|
+
if (!resolved.ok) return { success: false, output: "", error: resolved.error };
|
|
600976
|
+
return tool.execute({ ...args, path: resolved.path });
|
|
600977
|
+
}
|
|
600978
|
+
};
|
|
600979
|
+
}
|
|
600980
|
+
if (tool.name === "vision") {
|
|
600981
|
+
return {
|
|
600982
|
+
...tool,
|
|
600983
|
+
description: "Analyze only images from this Telegram chat's media cache or creative workspace. Use image='reply' for the replied-to image or image='latest' for the most recent chat image.",
|
|
600984
|
+
execute: async (args) => {
|
|
600985
|
+
const resolved = this.resolveTelegramScopedMediaPath(args["image"], chatId, currentMsg, "image");
|
|
600986
|
+
if (!resolved.ok) return { success: false, output: "", error: resolved.error };
|
|
600987
|
+
return tool.execute({ ...args, image: resolved.path });
|
|
600988
|
+
}
|
|
600989
|
+
};
|
|
600990
|
+
}
|
|
600991
|
+
if (tool.name === "ocr_image_advanced") {
|
|
600992
|
+
return {
|
|
600993
|
+
...tool,
|
|
600994
|
+
description: "Advanced OCR only for images in this Telegram chat's media cache or creative workspace. Batch directory mode is disabled in public Telegram scope.",
|
|
600995
|
+
execute: async (args) => {
|
|
600996
|
+
if (args["batch"] === true) return { success: false, output: "", error: "Batch directory OCR is not available in public Telegram scope." };
|
|
600997
|
+
const resolved = this.resolveTelegramScopedMediaPath(args["image"], chatId, currentMsg, "image");
|
|
600998
|
+
if (!resolved.ok) return { success: false, output: "", error: resolved.error };
|
|
600999
|
+
const next = { ...args, image: resolved.path };
|
|
601000
|
+
if (typeof next["output_dir"] === "string" && next["output_dir"].trim()) {
|
|
601001
|
+
const output = this.resolveTelegramScopedOutputPath(next["output_dir"], chatId, "ocr-output");
|
|
601002
|
+
if (!output.ok) return { success: false, output: "", error: output.error };
|
|
601003
|
+
next["output_dir"] = output.path;
|
|
601004
|
+
}
|
|
601005
|
+
return tool.execute(next);
|
|
601006
|
+
}
|
|
601007
|
+
};
|
|
601008
|
+
}
|
|
601009
|
+
if (tool.name === "transcribe_file") {
|
|
601010
|
+
return {
|
|
601011
|
+
...tool,
|
|
601012
|
+
description: "Transcribe only audio/video files from this Telegram chat's media cache or creative workspace. Use path='reply' or path='latest' for chat media references.",
|
|
601013
|
+
execute: async (args) => {
|
|
601014
|
+
const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "transcribable");
|
|
601015
|
+
if (!resolved.ok) return { success: false, output: "", error: resolved.error };
|
|
601016
|
+
return tool.execute({ ...args, path: resolved.path });
|
|
601017
|
+
}
|
|
601018
|
+
};
|
|
601019
|
+
}
|
|
601020
|
+
if (tool.name === "pdf_to_text") {
|
|
601021
|
+
return {
|
|
601022
|
+
...tool,
|
|
601023
|
+
description: "Extract text only from PDFs in this Telegram chat's media cache or creative workspace. Use path='reply' or path='latest' for chat document references.",
|
|
601024
|
+
execute: async (args) => {
|
|
601025
|
+
const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "pdf");
|
|
601026
|
+
if (!resolved.ok) return { success: false, output: "", error: resolved.error };
|
|
601027
|
+
return tool.execute({ ...args, path: resolved.path });
|
|
601028
|
+
}
|
|
601029
|
+
};
|
|
601030
|
+
}
|
|
601031
|
+
if (tool.name === "ocr_pdf") {
|
|
601032
|
+
return {
|
|
601033
|
+
...tool,
|
|
601034
|
+
description: "OCR only PDFs from this Telegram chat's media cache or creative workspace. Output, when requested, is forced into this chat's creative workspace.",
|
|
601035
|
+
execute: async (args) => {
|
|
601036
|
+
const input = this.resolveTelegramScopedMediaPath(args["input"], chatId, currentMsg, "pdf");
|
|
601037
|
+
if (!input.ok) return { success: false, output: "", error: input.error };
|
|
601038
|
+
const next = { ...args, input: input.path };
|
|
601039
|
+
if (typeof next["output"] === "string" && next["output"].trim()) {
|
|
601040
|
+
const output = this.resolveTelegramScopedOutputPath(next["output"], chatId, `ocr-${Date.now()}.pdf`);
|
|
601041
|
+
if (!output.ok) return { success: false, output: "", error: output.error };
|
|
601042
|
+
next["output"] = output.path;
|
|
601043
|
+
}
|
|
601044
|
+
return tool.execute(next);
|
|
601045
|
+
}
|
|
601046
|
+
};
|
|
601047
|
+
}
|
|
601048
|
+
if (tool.name === "video_understand") {
|
|
601049
|
+
return {
|
|
601050
|
+
...tool,
|
|
601051
|
+
description: "Analyze only video files from this Telegram chat's media cache or creative workspace. URL download is disabled in public Telegram scope; use path='reply' or path='latest'.",
|
|
601052
|
+
execute: async (args) => {
|
|
601053
|
+
if (args["url"]) return { success: false, output: "", error: "URL video analysis is not available in public Telegram scope. Use a video posted in this chat." };
|
|
601054
|
+
const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "video");
|
|
601055
|
+
if (!resolved.ok) return { success: false, output: "", error: resolved.error };
|
|
601056
|
+
return tool.execute({ ...args, path: resolved.path });
|
|
601057
|
+
}
|
|
601058
|
+
};
|
|
601059
|
+
}
|
|
601060
|
+
if (tool.name === "audio_analyze") {
|
|
601061
|
+
return {
|
|
601062
|
+
...tool,
|
|
601063
|
+
description: "Analyze only audio files from this Telegram chat's media cache or creative workspace. Microphone/listen mode is disabled in public Telegram scope.",
|
|
601064
|
+
execute: async (args) => {
|
|
601065
|
+
if (String(args["action"] || "").toLowerCase() === "listen") {
|
|
601066
|
+
return { success: false, output: "", error: "Continuous microphone listening is not available in Telegram public scope." };
|
|
601067
|
+
}
|
|
601068
|
+
const resolved = this.resolveTelegramScopedMediaPath(args["file"] ?? args["path"], chatId, currentMsg, "audio");
|
|
601069
|
+
if (!resolved.ok) return { success: false, output: "", error: resolved.error };
|
|
601070
|
+
return tool.execute({ ...args, file: resolved.path, path: resolved.path });
|
|
601071
|
+
}
|
|
601072
|
+
};
|
|
601073
|
+
}
|
|
601074
|
+
if (tool.name === "explore_tools") {
|
|
601075
|
+
return {
|
|
601076
|
+
...tool,
|
|
601077
|
+
description: "List and explain the tools available in this Telegram public/group scope. Do not invent unavailable tool names."
|
|
601078
|
+
};
|
|
601079
|
+
}
|
|
600572
601080
|
return tool;
|
|
600573
601081
|
});
|
|
600574
601082
|
}
|
|
@@ -600732,11 +601240,16 @@ Scoped workspace: ${scopedRoot}`,
|
|
|
600732
601240
|
new ImageReadTool(repoRoot),
|
|
600733
601241
|
new OCRTool(repoRoot),
|
|
600734
601242
|
new VisionTool(repoRoot),
|
|
601243
|
+
new OcrImageAdvancedTool(repoRoot),
|
|
600735
601244
|
new OcrPdfTool(repoRoot),
|
|
600736
601245
|
new PdfToTextTool(repoRoot),
|
|
600737
601246
|
// Transcription tools
|
|
600738
601247
|
new TranscribeFileTool(repoRoot),
|
|
600739
|
-
new TranscribeUrlTool(repoRoot)
|
|
601248
|
+
new TranscribeUrlTool(repoRoot),
|
|
601249
|
+
new VideoUnderstandTool(repoRoot),
|
|
601250
|
+
new AudioAnalyzeTool(),
|
|
601251
|
+
new ExploreToolsTool(),
|
|
601252
|
+
this.buildTelegramMediaRecentTool(chatId, msg)
|
|
600740
601253
|
];
|
|
600741
601254
|
const adminTools = [
|
|
600742
601255
|
new ShellTool(repoRoot),
|
|
@@ -600839,6 +601352,55 @@ Scoped workspace: ${scopedRoot}`,
|
|
|
600839
601352
|
]);
|
|
600840
601353
|
return tools.filter((tool) => !blocked.has(tool.name));
|
|
600841
601354
|
}
|
|
601355
|
+
buildTelegramMediaRecentTool(chatId, currentMsg) {
|
|
601356
|
+
const bridge = this;
|
|
601357
|
+
return {
|
|
601358
|
+
name: "telegram_media_recent",
|
|
601359
|
+
description: "List recent media files available in this Telegram chat scope, including safe aliases for image_read, ocr, vision, transcribe_file, pdf_to_text, video_understand, and audio_analyze.",
|
|
601360
|
+
parameters: {
|
|
601361
|
+
type: "object",
|
|
601362
|
+
properties: {
|
|
601363
|
+
kind: {
|
|
601364
|
+
type: "string",
|
|
601365
|
+
enum: ["media", "image", "audio", "video", "pdf", "transcribable"],
|
|
601366
|
+
description: "Filter by media kind. Defaults to all recent chat media."
|
|
601367
|
+
},
|
|
601368
|
+
limit: { type: "number", description: "Maximum entries to return, 1-20. Default: 10." }
|
|
601369
|
+
}
|
|
601370
|
+
},
|
|
601371
|
+
async execute(args) {
|
|
601372
|
+
const start2 = performance.now();
|
|
601373
|
+
const kind = String(args["kind"] || "media").toLowerCase();
|
|
601374
|
+
const limit = typeof args["limit"] === "number" && Number.isFinite(args["limit"]) ? Math.max(1, Math.min(20, Math.floor(args["limit"]))) : 10;
|
|
601375
|
+
const entries = bridge.recentTelegramMediaEntries(chatId, 60).filter((entry) => bridge.telegramMediaEntryMatchesKind(entry, kind)).slice(0, limit);
|
|
601376
|
+
if (entries.length === 0) {
|
|
601377
|
+
return { success: true, output: `No recent ${kind} media is available in this Telegram chat scope.`, durationMs: performance.now() - start2 };
|
|
601378
|
+
}
|
|
601379
|
+
const lines = entries.map((entry, index) => {
|
|
601380
|
+
const parts = [
|
|
601381
|
+
`${index + 1}. message_id ${entry.messageId || "unknown"}`,
|
|
601382
|
+
currentMsg?.replyToMessageId === entry.messageId ? "replied-to" : "",
|
|
601383
|
+
telegramCachedMediaIsImage(entry) ? "image" : telegramCachedMediaIsPdf(entry) ? "pdf" : telegramCachedMediaIsAudio(entry) ? "audio" : telegramCachedMediaIsVideo(entry) ? "video" : entry.mediaType,
|
|
601384
|
+
`file=${basename23(entry.localPath)}`,
|
|
601385
|
+
`path=${entry.localPath}`,
|
|
601386
|
+
entry.caption ? `caption=${truncateTelegramContextLine(entry.caption, 140)}` : ""
|
|
601387
|
+
].filter(Boolean);
|
|
601388
|
+
const extracted = entry.extractedContent ? `
|
|
601389
|
+
context: ${truncateTelegramContextLine(entry.extractedContent.replace(/\s+/g, " "), 240)}` : "";
|
|
601390
|
+
return `${parts.join("; ")}${extracted}`;
|
|
601391
|
+
});
|
|
601392
|
+
return {
|
|
601393
|
+
success: true,
|
|
601394
|
+
output: [
|
|
601395
|
+
"Recent scoped Telegram media:",
|
|
601396
|
+
"Use path='reply' for replied-to media, path='latest' for the most recent matching item, or one of the listed paths.",
|
|
601397
|
+
lines.join("\n")
|
|
601398
|
+
].join("\n"),
|
|
601399
|
+
durationMs: performance.now() - start2
|
|
601400
|
+
};
|
|
601401
|
+
}
|
|
601402
|
+
};
|
|
601403
|
+
}
|
|
600842
601404
|
imageGenerationDefaultsForRepo(repoRoot) {
|
|
600843
601405
|
const settings = resolveSettings(repoRoot);
|
|
600844
601406
|
return {
|
|
@@ -601056,30 +601618,36 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
|
|
|
601056
601618
|
* Downloads the file, runs it through the appropriate pipeline,
|
|
601057
601619
|
* caches it, and returns a text description for the agent.
|
|
601058
601620
|
*/
|
|
601059
|
-
async processMedia(msg) {
|
|
601060
|
-
|
|
601061
|
-
|
|
601062
|
-
const
|
|
601621
|
+
async processMedia(msg, source = "message") {
|
|
601622
|
+
const media = source === "reply" ? msg.replyToMedia : msg.media;
|
|
601623
|
+
if (!media) return "";
|
|
601624
|
+
const { type, fileId, fileUniqueId, mimeType, caption } = media;
|
|
601625
|
+
const isImageMedia = telegramMediaIsImage(media);
|
|
601626
|
+
const sourceMessageId = source === "reply" ? msg.replyToMessageId : msg.messageId;
|
|
601627
|
+
const sourceLabel = source === "reply" ? "replied-to " : "";
|
|
601063
601628
|
let ext = ".bin";
|
|
601064
|
-
if (isImageMedia) ext = telegramImageExtension(
|
|
601629
|
+
if (isImageMedia) ext = telegramImageExtension(media);
|
|
601065
601630
|
else if (type === "audio" || type === "voice") ext = ".ogg";
|
|
601066
601631
|
else if (type === "video" || type === "video_note" || type === "live_photo") ext = ".mp4";
|
|
601067
|
-
else if (
|
|
601068
|
-
const dotIdx =
|
|
601069
|
-
if (dotIdx >= 0) ext =
|
|
601632
|
+
else if (media.fileName) {
|
|
601633
|
+
const dotIdx = media.fileName.lastIndexOf(".");
|
|
601634
|
+
if (dotIdx >= 0) ext = media.fileName.slice(dotIdx);
|
|
601070
601635
|
}
|
|
601071
601636
|
const localPath = await this.downloadTelegramFile(fileId, ext);
|
|
601072
601637
|
if (!localPath) return `[Media: ${type} — failed to download]`;
|
|
601073
601638
|
const cacheEntry = {
|
|
601074
601639
|
localPath,
|
|
601075
601640
|
fileId,
|
|
601641
|
+
fileUniqueId,
|
|
601076
601642
|
chatId: msg.chatId,
|
|
601643
|
+
messageId: sourceMessageId ?? 0,
|
|
601077
601644
|
username: msg.username,
|
|
601078
601645
|
mediaType: type,
|
|
601079
601646
|
mimeType,
|
|
601647
|
+
caption,
|
|
601080
601648
|
cachedAt: Date.now()
|
|
601081
601649
|
};
|
|
601082
|
-
this.mediaCache.set(fileUniqueId
|
|
601650
|
+
this.mediaCache.set(`${String(msg.chatId)}:${String(sourceMessageId ?? 0)}:${fileUniqueId}`, cacheEntry);
|
|
601083
601651
|
const metadataKey = String(msg.chatId);
|
|
601084
601652
|
if (!this.mediaMetadata.has(metadataKey)) {
|
|
601085
601653
|
this.mediaMetadata.set(metadataKey, []);
|
|
@@ -601100,7 +601668,7 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
|
|
|
601100
601668
|
{
|
|
601101
601669
|
path: localPath,
|
|
601102
601670
|
buffer: readFileSync87(localPath),
|
|
601103
|
-
mime: telegramImageMime(
|
|
601671
|
+
mime: telegramImageMime(media)
|
|
601104
601672
|
},
|
|
601105
601673
|
this.agentConfig?.model ?? ""
|
|
601106
601674
|
);
|
|
@@ -601109,10 +601677,10 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
|
|
|
601109
601677
|
} catch {
|
|
601110
601678
|
}
|
|
601111
601679
|
if (visionContext) {
|
|
601112
|
-
description = `[
|
|
601680
|
+
description = `[${sourceLabel}image received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
|
|
601113
601681
|
${visionContext}]`;
|
|
601114
601682
|
} else {
|
|
601115
|
-
description = `[
|
|
601683
|
+
description = `[${sourceLabel}image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read, ocr, or vision tools to analyze it.]`;
|
|
601116
601684
|
}
|
|
601117
601685
|
try {
|
|
601118
601686
|
await fetch("http://127.0.0.1:11435/v1/memory/ingest", {
|
|
@@ -601136,9 +601704,9 @@ ${visionContext}]`;
|
|
|
601136
601704
|
} catch {
|
|
601137
601705
|
}
|
|
601138
601706
|
if (transcription) {
|
|
601139
|
-
description = `[
|
|
601707
|
+
description = `[${sourceLabel}voice message transcribed: "${transcription}"${caption ? ` — caption: "${caption}"` : ""}]`;
|
|
601140
601708
|
} else {
|
|
601141
|
-
description = `[
|
|
601709
|
+
description = `[${sourceLabel}audio/voice message received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use transcribe_file to transcribe it.]`;
|
|
601142
601710
|
}
|
|
601143
601711
|
try {
|
|
601144
601712
|
await fetch("http://127.0.0.1:11435/v1/memory/ingest", {
|
|
@@ -601151,13 +601719,30 @@ ${visionContext}]`;
|
|
|
601151
601719
|
}
|
|
601152
601720
|
} else if (type === "video" || type === "video_note" || type === "live_photo") {
|
|
601153
601721
|
const label = type === "live_photo" ? "Live photo" : "Video";
|
|
601154
|
-
description = `[${label} received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
|
|
601722
|
+
description = `[${sourceLabel}${label.toLowerCase()} received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use video_understand or transcribe_file to analyze it.]`;
|
|
601155
601723
|
} else if (type === "document") {
|
|
601156
|
-
description = `[
|
|
601724
|
+
description = `[${sourceLabel}document received: ${media.fileName || "unnamed"}${mimeType ? ` (${mimeType})` : ""}, saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
|
|
601157
601725
|
}
|
|
601158
601726
|
cacheEntry.extractedContent = description;
|
|
601159
601727
|
return description;
|
|
601160
601728
|
}
|
|
601729
|
+
async processMediaContextForMessage(msg) {
|
|
601730
|
+
const parts = [];
|
|
601731
|
+
if (msg.media) {
|
|
601732
|
+
const current = await this.processMedia(msg, "message");
|
|
601733
|
+
if (current) parts.push(current);
|
|
601734
|
+
}
|
|
601735
|
+
if (msg.replyToMedia) {
|
|
601736
|
+
const replied = await this.processMedia(msg, "reply");
|
|
601737
|
+
if (replied) parts.push(replied);
|
|
601738
|
+
}
|
|
601739
|
+
const text = parts.join("\n\n");
|
|
601740
|
+
if (text) this.updateLastTelegramUserMessageText(msg, `${msg.text}
|
|
601741
|
+
|
|
601742
|
+
[Media context]
|
|
601743
|
+
${text}`.trim());
|
|
601744
|
+
return text;
|
|
601745
|
+
}
|
|
601161
601746
|
/** Clean up expired media cache entries (older than 30 minutes) */
|
|
601162
601747
|
cleanupMediaCache() {
|
|
601163
601748
|
const now = Date.now();
|
|
@@ -625743,7 +626328,7 @@ var clipboard_media_exports = {};
|
|
|
625743
626328
|
__export(clipboard_media_exports, {
|
|
625744
626329
|
pasteClipboardImageToFile: () => pasteClipboardImageToFile
|
|
625745
626330
|
});
|
|
625746
|
-
import { execFileSync as
|
|
626331
|
+
import { execFileSync as execFileSync6, execSync as execSync58 } from "node:child_process";
|
|
625747
626332
|
import { mkdirSync as mkdirSync72, readFileSync as readFileSync99, rmSync as rmSync5, writeFileSync as writeFileSync67 } from "node:fs";
|
|
625748
626333
|
import { join as join136 } from "node:path";
|
|
625749
626334
|
function pasteClipboardImageToFile(repoRoot) {
|
|
@@ -625760,7 +626345,7 @@ function readClipboardImage() {
|
|
|
625760
626345
|
try {
|
|
625761
626346
|
execSync58("command -v pngpaste", { stdio: "ignore", timeout: 1e3 });
|
|
625762
626347
|
const tmp = `/tmp/omnius-clipboard-${Date.now()}.png`;
|
|
625763
|
-
|
|
626348
|
+
execFileSync6("pngpaste", [tmp], { timeout: 3e3 });
|
|
625764
626349
|
const buffer2 = readFileSync99(tmp);
|
|
625765
626350
|
try {
|
|
625766
626351
|
rmSync5(tmp);
|
|
@@ -625780,7 +626365,7 @@ function readClipboardImage() {
|
|
|
625780
626365
|
];
|
|
625781
626366
|
for (const attempt of attempts) {
|
|
625782
626367
|
try {
|
|
625783
|
-
const buffer2 =
|
|
626368
|
+
const buffer2 = execFileSync6(attempt.cmd, attempt.args, { timeout: 3e3, maxBuffer: 25 * 1024 * 1024 });
|
|
625784
626369
|
if (buffer2.length > 0) return { buffer: buffer2, mime: attempt.mime, ext: attempt.ext };
|
|
625785
626370
|
} catch {
|
|
625786
626371
|
continue;
|
|
@@ -625797,7 +626382,7 @@ function readClipboardImage() {
|
|
|
625797
626382
|
"$img.Save($ms,[Drawing.Imaging.ImageFormat]::Png);",
|
|
625798
626383
|
"[Console]::OpenStandardOutput().Write($ms.ToArray(),0,$ms.Length)"
|
|
625799
626384
|
].join("");
|
|
625800
|
-
const buffer2 =
|
|
626385
|
+
const buffer2 = execFileSync6("powershell.exe", ["-NoProfile", "-Command", ps], {
|
|
625801
626386
|
timeout: 5e3,
|
|
625802
626387
|
maxBuffer: 25 * 1024 * 1024
|
|
625803
626388
|
});
|
|
@@ -625816,7 +626401,7 @@ var init_clipboard_media = __esm({
|
|
|
625816
626401
|
|
|
625817
626402
|
// packages/cli/src/tui/interactive.ts
|
|
625818
626403
|
import { cwd } from "node:process";
|
|
625819
|
-
import { resolve as resolve44, join as join137, dirname as dirname38, extname as
|
|
626404
|
+
import { resolve as resolve44, join as join137, dirname as dirname38, extname as extname16, relative as relative14 } from "node:path";
|
|
625820
626405
|
import { createRequire as createRequire8 } from "node:module";
|
|
625821
626406
|
import { fileURLToPath as fileURLToPath18 } from "node:url";
|
|
625822
626407
|
import {
|
|
@@ -633118,7 +633703,7 @@ Execute this skill now. Follow the behavioral guidance above.`;
|
|
|
633118
633703
|
const imgPath = resolve44(repoRoot, cleanPath);
|
|
633119
633704
|
const imgBuffer = readFileSync100(imgPath);
|
|
633120
633705
|
const base642 = imgBuffer.toString("base64");
|
|
633121
|
-
const ext =
|
|
633706
|
+
const ext = extname16(cleanPath).toLowerCase();
|
|
633122
633707
|
const mime = ext === ".png" ? "image/png" : ext === ".gif" ? "image/gif" : ext === ".webp" ? "image/webp" : "image/jpeg";
|
|
633123
633708
|
const asciiContext = await renderAsciiPreviewForImage(
|
|
633124
633709
|
imgPath,
|