omnius 1.0.357 → 1.0.359
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +193 -70
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -567542,6 +567542,27 @@ var init_completion_resolution_verifier = __esm({
|
|
|
567542
567542
|
});
|
|
567543
567543
|
|
|
567544
567544
|
// packages/orchestrator/dist/evidenceBranch.js
|
|
567545
|
+
function buildStructuralPreview2(lines, path12, query) {
|
|
567546
|
+
const n2 = lines.length;
|
|
567547
|
+
const clip3 = (l2) => l2.length > 180 ? l2.slice(0, 180) + "…" : l2;
|
|
567548
|
+
const head = lines.slice(0, HEAD_LINES2).map((l2, i2) => `${i2 + 1}: ${clip3(l2)}`);
|
|
567549
|
+
const isStructural = (l2) => /^\s*(<[A-Za-z!]|#{1,6}\s|def |class |function |export |interface |type |async |public |private |\[[^\]]+\]|[A-Za-z_][\w.]*\s*=)/.test(l2) && l2.trim().length > 0 && l2.trim().length <= 180;
|
|
567550
|
+
const markers = [];
|
|
567551
|
+
for (let i2 = HEAD_LINES2; i2 < n2; i2++) {
|
|
567552
|
+
const l2 = lines[i2];
|
|
567553
|
+
if (isStructural(l2))
|
|
567554
|
+
markers.push(`${i2 + 1}: ${clip3(l2.trim())}`);
|
|
567555
|
+
}
|
|
567556
|
+
const MAX_MARKERS = 30;
|
|
567557
|
+
const sampled = markers.length > MAX_MARKERS ? Array.from({ length: MAX_MARKERS }, (_, k) => markers[Math.floor(k * markers.length / MAX_MARKERS)]) : markers;
|
|
567558
|
+
return [
|
|
567559
|
+
`[STRUCTURAL PREVIEW] "${query}" was not directly located in ${path12} (${n2} lines). Navigate by the structure below and re-read a SPECIFIC region with offset/limit — do NOT re-read the whole file:`,
|
|
567560
|
+
"Head:",
|
|
567561
|
+
...head,
|
|
567562
|
+
sampled.length ? "Section markers (line: content):" : "(no clear section markers)",
|
|
567563
|
+
...sampled
|
|
567564
|
+
].filter(Boolean).join("\n").slice(0, 1600);
|
|
567565
|
+
}
|
|
567545
567566
|
function queryTerms(query) {
|
|
567546
567567
|
return [
|
|
567547
567568
|
...new Set(query.toLowerCase().replace(/[^a-z0-9_<>./-]+/g, " ").split(/\s+/).filter((w) => w.length > 2 && !STOPWORDS2.has(w)))
|
|
@@ -567692,17 +567713,20 @@ async function extractEvidence(opts) {
|
|
|
567692
567713
|
const ends = kept.map((s2) => s2.end);
|
|
567693
567714
|
const snippetLower = claim2.toLowerCase();
|
|
567694
567715
|
const covered = terms2.filter((t2) => snippetLower.includes(t2)).length;
|
|
567695
|
-
|
|
567696
|
-
|
|
567697
|
-
|
|
567698
|
-
|
|
567699
|
-
|
|
567700
|
-
|
|
567701
|
-
|
|
567702
|
-
|
|
567703
|
-
|
|
567704
|
-
|
|
567705
|
-
|
|
567716
|
+
const grepConfidence = Math.min(1, covered / Math.max(1, terms2.length));
|
|
567717
|
+
if (grepConfidence >= EXTRACT_CONFIDENCE_FLOOR) {
|
|
567718
|
+
return {
|
|
567719
|
+
path: path12,
|
|
567720
|
+
query,
|
|
567721
|
+
claim: claim2,
|
|
567722
|
+
sourceStart: starts.length ? Math.min(...starts) : null,
|
|
567723
|
+
sourceEnd: ends.length ? Math.max(...ends) : null,
|
|
567724
|
+
fileVersion,
|
|
567725
|
+
confidence: grepConfidence,
|
|
567726
|
+
exploredLines: lines.length,
|
|
567727
|
+
injectedChars: claim2.length
|
|
567728
|
+
};
|
|
567729
|
+
}
|
|
567706
567730
|
}
|
|
567707
567731
|
}
|
|
567708
567732
|
const windows = lines.length <= WINDOW_LINES * 2 ? [{ start: 1, end: lines.length, text: content, score: 1 }] : selectWindows(lines, terms2);
|
|
@@ -567725,7 +567749,7 @@ async function extractEvidence(opts) {
|
|
|
567725
567749
|
parsed = null;
|
|
567726
567750
|
}
|
|
567727
567751
|
}
|
|
567728
|
-
const claim = parsed && parsed.found && parsed.claim ? parsed.claim :
|
|
567752
|
+
const claim = parsed && parsed.found && parsed.claim ? parsed.claim : buildStructuralPreview2(lines, path12, query);
|
|
567729
567753
|
return {
|
|
567730
567754
|
path: path12,
|
|
567731
567755
|
query,
|
|
@@ -567743,7 +567767,7 @@ function shouldBranchRead(contentLength, lineCount, hasExplicitSmallRange, thres
|
|
|
567743
567767
|
return false;
|
|
567744
567768
|
return contentLength > thresholdChars || lineCount > 200;
|
|
567745
567769
|
}
|
|
567746
|
-
var WINDOW_LINES, SNIPPET_CONTEXT, HEAD_LINES2, MAX_SNIPPET_LINES, STOPWORDS2;
|
|
567770
|
+
var WINDOW_LINES, SNIPPET_CONTEXT, HEAD_LINES2, MAX_SNIPPET_LINES, EXTRACT_CONFIDENCE_FLOOR, STOPWORDS2;
|
|
567747
567771
|
var init_evidenceBranch = __esm({
|
|
567748
567772
|
"packages/orchestrator/dist/evidenceBranch.js"() {
|
|
567749
567773
|
"use strict";
|
|
@@ -567751,6 +567775,7 @@ var init_evidenceBranch = __esm({
|
|
|
567751
567775
|
SNIPPET_CONTEXT = 4;
|
|
567752
567776
|
HEAD_LINES2 = 10;
|
|
567753
567777
|
MAX_SNIPPET_LINES = 220;
|
|
567778
|
+
EXTRACT_CONFIDENCE_FLOOR = 0.3;
|
|
567754
567779
|
STOPWORDS2 = /* @__PURE__ */ new Set([
|
|
567755
567780
|
"the",
|
|
567756
567781
|
"and",
|
|
@@ -615029,35 +615054,105 @@ ${CONTENT_BG_SEQ}`);
|
|
|
615029
615054
|
(seq) => seq.endsWith("m") ? seq : ""
|
|
615030
615055
|
);
|
|
615031
615056
|
}
|
|
615032
|
-
|
|
615057
|
+
/** Resolve a dynamic-block sentinel line to its registered renderer's lines
|
|
615058
|
+
* at the given width, or null if it is not a (live) sentinel. */
|
|
615059
|
+
dynamicBlockLines(line, maxWidth) {
|
|
615060
|
+
if (!line.startsWith(this.DYNAMIC_BLOCK_MARK_PREFIX) || !line.endsWith(this.DYNAMIC_BLOCK_MARK_SUFFIX)) {
|
|
615061
|
+
return null;
|
|
615062
|
+
}
|
|
615063
|
+
const id = line.slice(
|
|
615064
|
+
this.DYNAMIC_BLOCK_MARK_PREFIX.length,
|
|
615065
|
+
line.length - this.DYNAMIC_BLOCK_MARK_SUFFIX.length
|
|
615066
|
+
);
|
|
615067
|
+
const renderer = this._dynamicBlocks.get(id);
|
|
615068
|
+
if (!renderer) return [];
|
|
615069
|
+
try {
|
|
615070
|
+
return renderer(maxWidth);
|
|
615071
|
+
} catch {
|
|
615072
|
+
return [];
|
|
615073
|
+
}
|
|
615074
|
+
}
|
|
615075
|
+
/** Reflowed-row COUNT for one buffer line at width — dynamic-block aware.
|
|
615076
|
+
* Static lines are a cache-hit `.length` (no allocation), so the count pass
|
|
615077
|
+
* over the whole backlog is cheap. */
|
|
615078
|
+
rowCountForSourceLine(line, maxWidth) {
|
|
615079
|
+
const block = this.dynamicBlockLines(line, maxWidth);
|
|
615080
|
+
if (block !== null) {
|
|
615081
|
+
let n2 = 0;
|
|
615082
|
+
for (const seg of block) n2 += this.reflowContentLine(seg, maxWidth).length;
|
|
615083
|
+
return n2;
|
|
615084
|
+
}
|
|
615085
|
+
return this.reflowContentLine(line, maxWidth).length;
|
|
615086
|
+
}
|
|
615087
|
+
/** Reflowed rows (with bufferIdx) for one buffer line — built ONLY for the
|
|
615088
|
+
* lines actually inside the viewport window. */
|
|
615089
|
+
rowsForSourceLine(line, idx, maxWidth) {
|
|
615090
|
+
const block = this.dynamicBlockLines(line, maxWidth);
|
|
615091
|
+
if (block !== null) {
|
|
615092
|
+
return block.flatMap(
|
|
615093
|
+
(seg) => this.reflowContentLine(seg, maxWidth).map((s2) => ({
|
|
615094
|
+
line: s2,
|
|
615095
|
+
bufferIdx: idx
|
|
615096
|
+
}))
|
|
615097
|
+
);
|
|
615098
|
+
}
|
|
615099
|
+
return this.reflowContentLine(line, maxWidth).map((segment) => ({
|
|
615100
|
+
line: segment,
|
|
615101
|
+
bufferIdx: idx
|
|
615102
|
+
}));
|
|
615103
|
+
}
|
|
615104
|
+
/** Total reflowed row count at width (cheap — cache-hit counts, no big array
|
|
615105
|
+
* allocation). Used for scroll bounds. */
|
|
615106
|
+
reflowedRowCount(livePartialLine, width) {
|
|
615033
615107
|
const maxWidth = Math.max(16, width);
|
|
615034
615108
|
const source = livePartialLine ? [...this._contentLines, livePartialLine] : this._contentLines;
|
|
615035
|
-
|
|
615036
|
-
|
|
615037
|
-
|
|
615038
|
-
|
|
615039
|
-
|
|
615040
|
-
|
|
615041
|
-
|
|
615042
|
-
|
|
615043
|
-
|
|
615044
|
-
|
|
615045
|
-
|
|
615046
|
-
|
|
615047
|
-
|
|
615048
|
-
|
|
615049
|
-
|
|
615050
|
-
|
|
615051
|
-
|
|
615052
|
-
|
|
615053
|
-
|
|
615054
|
-
|
|
615055
|
-
|
|
615056
|
-
|
|
615057
|
-
|
|
615058
|
-
|
|
615059
|
-
|
|
615060
|
-
}
|
|
615109
|
+
let total = 0;
|
|
615110
|
+
for (let i2 = 0; i2 < source.length; i2++) {
|
|
615111
|
+
total += this.rowCountForSourceLine(source[i2], maxWidth);
|
|
615112
|
+
}
|
|
615113
|
+
return total;
|
|
615114
|
+
}
|
|
615115
|
+
/**
|
|
615116
|
+
* VIRTUALIZED reflow — produce ONLY the viewport window's rows (the visible
|
|
615117
|
+
* `viewportRows` reflowed rows at the given scroll offset). Repaint cost is
|
|
615118
|
+
* O(viewport + dynamic blocks) instead of O(scrollback), so a long session
|
|
615119
|
+
* never re-wraps the whole 10k-line backlog on every paint (the lag-after-
|
|
615120
|
+
* thousands-of-lines stall). Returns the window rows IN ORDER (index 0 = top
|
|
615121
|
+
* visible row), the total row count (for the scrollbar/bounds), and the
|
|
615122
|
+
* clamped scroll offset.
|
|
615123
|
+
*/
|
|
615124
|
+
reflowContentWindow(livePartialLine, width, viewportRows, scrollOffset) {
|
|
615125
|
+
const maxWidth = Math.max(16, width);
|
|
615126
|
+
const source = livePartialLine ? [...this._contentLines, livePartialLine] : this._contentLines;
|
|
615127
|
+
const h = Math.max(0, viewportRows);
|
|
615128
|
+
let totalRows = 0;
|
|
615129
|
+
const counts = new Array(source.length);
|
|
615130
|
+
for (let i2 = 0; i2 < source.length; i2++) {
|
|
615131
|
+
const c8 = this.rowCountForSourceLine(source[i2], maxWidth);
|
|
615132
|
+
counts[i2] = c8;
|
|
615133
|
+
totalRows += c8;
|
|
615134
|
+
}
|
|
615135
|
+
const maxOffset = Math.max(0, totalRows - h);
|
|
615136
|
+
const off = scrollOffset < 0 ? 0 : scrollOffset > maxOffset ? maxOffset : scrollOffset;
|
|
615137
|
+
const startIdx = Math.max(0, totalRows - h - off);
|
|
615138
|
+
const endIdx = startIdx + h;
|
|
615139
|
+
const rows = [];
|
|
615140
|
+
let cursor = 0;
|
|
615141
|
+
for (let i2 = 0; i2 < source.length && cursor < endIdx; i2++) {
|
|
615142
|
+
const c8 = counts[i2];
|
|
615143
|
+
const lineStart = cursor;
|
|
615144
|
+
cursor += c8;
|
|
615145
|
+
if (cursor <= startIdx) continue;
|
|
615146
|
+
if (lineStart >= endIdx) break;
|
|
615147
|
+
const lr = this.rowsForSourceLine(source[i2], i2, maxWidth);
|
|
615148
|
+
const from3 = Math.max(0, startIdx - lineStart);
|
|
615149
|
+
const to = Math.min(c8, endIdx - lineStart);
|
|
615150
|
+
for (let k = from3; k < to; k++) {
|
|
615151
|
+
const r2 = lr[k];
|
|
615152
|
+
if (r2) rows.push(r2);
|
|
615153
|
+
}
|
|
615154
|
+
}
|
|
615155
|
+
return { rows, totalRows, startIdx, scrollOffset: off };
|
|
615061
615156
|
}
|
|
615062
615157
|
// Memoize per-line reflow: it is a PURE function of (line, width), and
|
|
615063
615158
|
// reflowContentLines re-wraps the entire scrollback every repaint. Caching
|
|
@@ -615201,7 +615296,7 @@ ${CONTENT_BG_SEQ}`);
|
|
|
615201
615296
|
maxContentScrollOffset(width = termCols(), livePartialLine = this.getLiveBufferedLine()) {
|
|
615202
615297
|
return Math.max(
|
|
615203
615298
|
0,
|
|
615204
|
-
this.
|
|
615299
|
+
this.reflowedRowCount(livePartialLine, width) - this.contentHeight
|
|
615205
615300
|
);
|
|
615206
615301
|
}
|
|
615207
615302
|
clampContentScrollOffset(width = termCols()) {
|
|
@@ -615330,26 +615425,30 @@ ${CONTENT_BG_SEQ}`);
|
|
|
615330
615425
|
const w = termCols();
|
|
615331
615426
|
const _perfOn = process.env["OMNIUS_TUI_PERF"] === "1";
|
|
615332
615427
|
const _t0 = _perfOn ? performance.now() : 0;
|
|
615333
|
-
const
|
|
615428
|
+
const win = this.reflowContentWindow(
|
|
615429
|
+
livePartialLine,
|
|
615430
|
+
w,
|
|
615431
|
+
h,
|
|
615432
|
+
this._contentScrollOffset
|
|
615433
|
+
);
|
|
615434
|
+
const reflowedLines = win.rows;
|
|
615334
615435
|
if (_perfOn) {
|
|
615335
615436
|
const _ms = performance.now() - _t0;
|
|
615336
615437
|
if (_ms > 8) {
|
|
615337
615438
|
try {
|
|
615338
615439
|
process.stderr.write(
|
|
615339
|
-
`[TUI-PERF] reflow ${_ms.toFixed(1)}ms (
|
|
615440
|
+
`[TUI-PERF] reflow ${_ms.toFixed(1)}ms (window=${win.rows.length}/${win.totalRows} rows, w=${w})
|
|
615340
615441
|
`
|
|
615341
615442
|
);
|
|
615342
615443
|
} catch {
|
|
615343
615444
|
}
|
|
615344
615445
|
}
|
|
615345
615446
|
}
|
|
615346
|
-
|
|
615347
|
-
|
|
615348
|
-
if (this._contentScrollOffset > maxOffset) {
|
|
615349
|
-
this._contentScrollOffset = maxOffset;
|
|
615447
|
+
if (win.scrollOffset !== this._contentScrollOffset) {
|
|
615448
|
+
this._contentScrollOffset = win.scrollOffset;
|
|
615350
615449
|
if (this._contentScrollOffset === 0) this._autoScroll = true;
|
|
615351
615450
|
}
|
|
615352
|
-
const startIdx =
|
|
615451
|
+
const startIdx = 0;
|
|
615353
615452
|
this._lastPaintReflow = reflowedLines;
|
|
615354
615453
|
this._lastPaintStartIdx = startIdx;
|
|
615355
615454
|
const headerSafeFloor = layout().headerBottom + 1;
|
|
@@ -615402,8 +615501,8 @@ ${CONTENT_BG_SEQ}`);
|
|
|
615402
615501
|
}
|
|
615403
615502
|
}
|
|
615404
615503
|
if (this._contentScrollOffset > 0) {
|
|
615405
|
-
const linesAbove = startIdx;
|
|
615406
|
-
const pct =
|
|
615504
|
+
const linesAbove = win.startIdx;
|
|
615505
|
+
const pct = win.totalRows > 0 ? Math.round((win.startIdx + h) / win.totalRows * 100) : 100;
|
|
615407
615506
|
const indicator = ` ↑ ${linesAbove} lines above · ${pct}% · PgDn/End to return `;
|
|
615408
615507
|
const pad = Math.max(0, w - indicator.length);
|
|
615409
615508
|
buf += `\x1B[${this.scrollRegionTop};1H\x1B[7m${indicator}${" ".repeat(pad)}\x1B[0m`;
|
|
@@ -662307,18 +662406,27 @@ function deriveVisualEvidencePlan(request) {
|
|
|
662307
662406
|
const needsText = /\b(text|read|ocr|extract|label|word|number|what does it say|transcript|character|letter|digit|spell|transcribe|copy|quote|type|what is written)\b/i.test(prompt);
|
|
662308
662407
|
const needsScene = /\b(what|who|where|describe|scene|object|person|identify|tell me about|explain|see|show|happening|look like|recogniz)\b/i.test(prompt) && !needsText;
|
|
662309
662408
|
const needsUI = /\b(ui|button|menu|dialog|window|interface|screen|dashboard|form|field|input|select|option|dropdown)\b/i.test(prompt);
|
|
662409
|
+
const comprehensive = (reason) => ({
|
|
662410
|
+
stages: [
|
|
662411
|
+
{ kind: "low_fidelity_observation", required: false },
|
|
662412
|
+
{ kind: "ocr", required: true },
|
|
662413
|
+
{ kind: "auxiliary_vision", required: true }
|
|
662414
|
+
],
|
|
662415
|
+
reason
|
|
662416
|
+
});
|
|
662310
662417
|
switch (detail) {
|
|
662311
662418
|
case "low":
|
|
662312
|
-
return { stages: [{ kind: "low_fidelity_observation", required: true }], reason: "low detail requested" };
|
|
662419
|
+
return { stages: [{ kind: "low_fidelity_observation", required: true }], reason: "low detail explicitly requested" };
|
|
662313
662420
|
case "text":
|
|
662314
|
-
return
|
|
662421
|
+
return comprehensive("text extraction requested — full vision still runs for classification");
|
|
662422
|
+
case "visual":
|
|
662423
|
+
return comprehensive("visual analysis requested");
|
|
662315
662424
|
case "full":
|
|
662316
|
-
return
|
|
662425
|
+
return comprehensive("full detail requested");
|
|
662317
662426
|
default:
|
|
662318
|
-
|
|
662319
|
-
|
|
662320
|
-
|
|
662321
|
-
return { stages, reason: needsText ? "text evidence needed" : needsScene ? "scene analysis needed" : needsUI ? "UI/document analysis needed" : "auto" };
|
|
662427
|
+
return comprehensive(
|
|
662428
|
+
needsText ? "comprehensive (text emphasis)" : needsScene ? "comprehensive (scene emphasis)" : needsUI ? "comprehensive (UI/document emphasis)" : "comprehensive (full vision + OCR on all media)"
|
|
662429
|
+
);
|
|
662322
662430
|
}
|
|
662323
662431
|
}
|
|
662324
662432
|
async function executeVisualEvidencePlan(resolution, plan, executor) {
|
|
@@ -662362,6 +662470,7 @@ __export(vision_ingress_exports, {
|
|
|
662362
662470
|
isTesseractAvailable: () => isTesseractAvailable,
|
|
662363
662471
|
isVisionModel: () => isVisionModel,
|
|
662364
662472
|
queryVisionModel: () => queryVisionModel,
|
|
662473
|
+
resolveVisionModel: () => resolveVisionModel,
|
|
662365
662474
|
runVisionIngress: () => runVisionIngress
|
|
662366
662475
|
});
|
|
662367
662476
|
import { execFileSync as execFileSync10 } from "node:child_process";
|
|
@@ -662490,25 +662599,32 @@ async function queryVisionModel(modelName, imagePath, prompt = "Describe what yo
|
|
|
662490
662599
|
return "";
|
|
662491
662600
|
}
|
|
662492
662601
|
}
|
|
662602
|
+
function resolveVisionModel(currentModel) {
|
|
662603
|
+
if (currentModel && isVisionModel(currentModel)) return currentModel;
|
|
662604
|
+
const env2 = (process.env["OMNIUS_VISION_MODEL"] || "").trim();
|
|
662605
|
+
if (env2) return env2;
|
|
662606
|
+
return "moondream";
|
|
662607
|
+
}
|
|
662493
662608
|
async function runVisionIngress(image, currentModel) {
|
|
662494
662609
|
const ocrText = advancedOcr(image.path);
|
|
662495
662610
|
let visionDescription = "";
|
|
662496
662611
|
let visionUsed = false;
|
|
662497
|
-
|
|
662498
|
-
|
|
662612
|
+
const visionModel = resolveVisionModel(currentModel);
|
|
662613
|
+
if (visionModel) {
|
|
662614
|
+
visionDescription = await queryVisionModel(visionModel, image.path);
|
|
662499
662615
|
visionUsed = visionDescription.length > 0;
|
|
662500
662616
|
}
|
|
662501
662617
|
const parts = [];
|
|
662502
662618
|
if (ocrText.length > 0) {
|
|
662503
|
-
parts.push(`[OCR Text from
|
|
662619
|
+
parts.push(`[OCR Text from image]
|
|
662504
662620
|
${ocrText}`);
|
|
662505
662621
|
}
|
|
662506
662622
|
if (visionDescription.length > 0) {
|
|
662507
|
-
parts.push(`[Vision analysis of
|
|
662623
|
+
parts.push(`[Vision analysis of image (model: ${visionModel})]
|
|
662508
662624
|
${visionDescription}`);
|
|
662509
662625
|
}
|
|
662510
662626
|
if (parts.length === 0) {
|
|
662511
|
-
parts.push(`[Image
|
|
662627
|
+
parts.push(`[Image at ${image.path} — OCR found no text and the vision model (${visionModel}) returned no description; treat as UNCOMPREHENDED and re-run telegram_image_analyze with detail='full' before answering.]`);
|
|
662512
662628
|
}
|
|
662513
662629
|
const contextBlock = parts.join("\n\n");
|
|
662514
662630
|
return {
|
|
@@ -665197,7 +665313,7 @@ Public Telegram vision and media stack
|
|
|
665197
665313
|
|
|
665198
665314
|
Public Telegram runs have the full scoped media-analysis stack for media posted in this chat:
|
|
665199
665315
|
- Use telegram_media_recent to find recent scoped media, then use path/media aliases 'reply' and 'latest' instead of exposing local paths to users.
|
|
665200
|
-
-
|
|
665316
|
+
- MANDATORY: whenever one or more images are present (posted, replied-to, or recent), run a FULL comprehension pass on EVERY image BEFORE responding — telegram_image_analyze with detail='full' (advanced OCR + Moondream vision) on each; if a burst of images was posted, enumerate them with telegram_media_recent and analyze ALL of them. Base the answer ONLY on the extracted content (objects, scene, any text). NEVER answer from metadata alone (count, size, timestamp, caption), never claim you can't say what's pictured without running vision, and never offer to analyze only "the ones you care about" — full vision on all of them IS the job. Do not stop until every image is fully comprehended.
|
|
665201
665317
|
- Use ocr for quick image text extraction, ocr_image_advanced when basic OCR shows dense or degraded text, image_read for image metadata + multimodal image payload, and vision for direct Moondream captioning, visual QA, object detection, or pointing.
|
|
665202
665318
|
- Use pdf_to_text for embedded-text PDFs and ocr_pdf for scanned PDFs.
|
|
665203
665319
|
- Use video_understand and transcribe_file for video/audio media posted in this chat.
|
|
@@ -675910,7 +676026,7 @@ ${currentTelegramPrompt}`;
|
|
|
675910
676026
|
TELEGRAM_LINK_INTEGRITY_CONTRACT,
|
|
675911
676027
|
"If a user explicitly states a durable preference for reply cadence/order, call telegram_preference_set. Do not infer or classify reply-mode preferences from keywords, style, tone, or task type.",
|
|
675912
676028
|
TELEGRAM_EVIDENCE_SUFFICIENCY_CONTRACT,
|
|
675913
|
-
"You have the full scoped Telegram media-analysis stack by default: telegram_image_analyze, telegram_media_recent, image_read, ocr, ocr_image_advanced, vision, pdf_to_text, ocr_pdf, transcribe_file, video_understand, audio_analyze, and identity_memory.
|
|
676029
|
+
"You have the full scoped Telegram media-analysis stack by default: telegram_image_analyze, telegram_media_recent, image_read, ocr, ocr_image_advanced, vision, pdf_to_text, ocr_pdf, transcribe_file, video_understand, audio_analyze, and identity_memory. MANDATORY image handling: whenever one or more images are present in the message (or a referenced/recent message), you MUST run a FULL comprehension pass on EVERY image before you respond — call telegram_image_analyze with detail='full' (advanced OCR + Moondream vision) on each, and if multiple images were sent in a burst, analyze ALL of them (use telegram_media_recent to enumerate them). Base your answer ONLY on the actual extracted content (objects, scene, and any text). NEVER answer from metadata alone (count, file size, timestamp, caption) and NEVER say you 'can't say what's pictured without running vision' or offer to analyze 'the ones you care about' — running full vision on all of them IS your job. Do not stop until every image is fully comprehended; if a pass returns nothing, retry with detail='full' or image_read+ocr_image_advanced+vision before concluding.",
|
|
675914
676030
|
formatIdentityMemoryContext(chatLabel || "Telegram private chat"),
|
|
675915
676031
|
reminderToolContract,
|
|
675916
676032
|
"If the user asks you to create an image, audio file, video, 3D/CAD model, or document artifact, create it with the scoped creative tools. Freshly generated artifacts are recorded and automatically attached to this Telegram chat when the turn completes, so do not call telegram_send_file for those same artifacts unless the user asked for a specific caption, existing/unrecorded file, or non-default target.",
|
|
@@ -679750,7 +679866,7 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
|
|
|
679750
679866
|
* Downloads the file, runs it through the appropriate pipeline,
|
|
679751
679867
|
* caches it, and returns a text description for the agent.
|
|
679752
679868
|
*/
|
|
679753
|
-
async processMedia(msg, source = "message") {
|
|
679869
|
+
async processMedia(msg, source = "message", eager = false) {
|
|
679754
679870
|
const media = source === "reply" ? msg.replyToMedia : msg.media;
|
|
679755
679871
|
if (!media) return "";
|
|
679756
679872
|
const { type, fileId, fileUniqueId, mimeType, caption } = media;
|
|
@@ -679759,6 +679875,12 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
|
|
|
679759
679875
|
const sourceLabel = source === "reply" ? "replied-to " : "";
|
|
679760
679876
|
const mediaAlias = sourceMessageId ? `message_id:${sourceMessageId}` : source === "reply" ? "reply" : "latest";
|
|
679761
679877
|
const safeCaption = caption ? ` — caption: ${telegramContextJsonString(caption, 220)}` : "";
|
|
679878
|
+
const cacheKey = `${String(msg.chatId)}:${String(sourceMessageId ?? 0)}:${fileUniqueId}`;
|
|
679879
|
+
const existingEntry = this.mediaCache.get(cacheKey);
|
|
679880
|
+
if (existingEntry && existsSync146(existingEntry.localPath)) {
|
|
679881
|
+
existingEntry.cachedAt = Date.now();
|
|
679882
|
+
return existingEntry.extractedContent || `[${sourceLabel}${type} received: path_alias=${mediaAlias}${safeCaption}]`;
|
|
679883
|
+
}
|
|
679762
679884
|
let ext = ".bin";
|
|
679763
679885
|
if (isImageMedia) ext = telegramImageExtension(media);
|
|
679764
679886
|
else if (type === "audio" || type === "voice") ext = ".ogg";
|
|
@@ -679782,10 +679904,7 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
|
|
|
679782
679904
|
caption,
|
|
679783
679905
|
cachedAt: Date.now()
|
|
679784
679906
|
};
|
|
679785
|
-
this.mediaCache.set(
|
|
679786
|
-
`${String(msg.chatId)}:${String(sourceMessageId ?? 0)}:${fileUniqueId}`,
|
|
679787
|
-
cacheEntry
|
|
679788
|
-
);
|
|
679907
|
+
this.mediaCache.set(cacheKey, cacheEntry);
|
|
679789
679908
|
const metadataKey = String(msg.chatId);
|
|
679790
679909
|
if (!this.mediaMetadata.has(metadataKey)) {
|
|
679791
679910
|
this.mediaMetadata.set(metadataKey, []);
|
|
@@ -679798,7 +679917,7 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
|
|
|
679798
679917
|
username: msg.username
|
|
679799
679918
|
});
|
|
679800
679919
|
let description = `[${type}${caption ? `: ${caption}` : ""}]`;
|
|
679801
|
-
if (isImageMedia) {
|
|
679920
|
+
if (isImageMedia && !eager) {
|
|
679802
679921
|
let visionContext = "";
|
|
679803
679922
|
try {
|
|
679804
679923
|
const { runVisionIngress: runVisionIngress2, formatImageContextPrefix: formatImageContextPrefix2 } = await Promise.resolve().then(() => (init_vision_ingress(), vision_ingress_exports));
|
|
@@ -680897,6 +681016,10 @@ ${caption}\r
|
|
|
680897
681016
|
if (this.adminUserId && !this.agentConfig) {
|
|
680898
681017
|
if (!isAdmin) continue;
|
|
680899
681018
|
}
|
|
681019
|
+
if (msg.media) {
|
|
681020
|
+
void this.processMedia(msg, "message", true).catch(() => {
|
|
681021
|
+
});
|
|
681022
|
+
}
|
|
680900
681023
|
if (this.agentConfig && this.repoRoot) {
|
|
680901
681024
|
this.handleMessageWithSubAgent(msg).catch((err) => {
|
|
680902
681025
|
this.tuiWrite(
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.359",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.359",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED