npm - omnius - Versions diffs - 1.0.119 → 1.0.121 - Mend

omnius 1.0.119 → 1.0.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -527033,15 +527033,17 @@ var init_personality = __esm({
 // packages/orchestrator/dist/critic.js
 function buildForceProgressBlockMessage(call, hits) {
   const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
-  return `[FORCED PROGRESS BLOCK — you have called ${call.tool}(${argPreview}) ${hits} times with identical arguments and received the cached result each time. Consider whether additional calls are needed or if you can proceed with what you have.
+  return `[FORCED PROGRESS BLOCK — duplicate ${call.tool} call skipped; this is not a tool failure. You have called ${call.tool}(${argPreview}) ${hits} times with identical arguments. The runtime did not re-run the tool; it is returning the cached result below so you can proceed without retrying.
-To proceed, you can:
+Progress is REQUIRED before this tool will run again with the same arguments. To proceed, do one of these:
   • file_write or file_edit to make progress, OR
   • todo_write that advances the plan, OR
   • task_complete (if all phases are done), OR
-  • Call a different tool or use different arguments.
-The cached result of this exact call is in your conversation history.]`;
+  • Call a different tool or use different arguments.]`;
+}
+function buildCachedResultEnvelope(result) {
+  return `[CACHED RESULT — you already have this information from a prior successful call. Do NOT call this tool again with the same arguments.]
+${result}`;
 }
 function evaluate(inputs) {
   const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
@@ -527050,8 +527052,7 @@ function evaluate(inputs) {
     return {
       decision: "observer_block",
       reason: "Littleman observer flagged this fingerprint as redundant",
-      cachedResult: cached ? `[CACHED RESULT — you already have this information from a prior call. Do NOT call this tool again with the same arguments.]
-${cached.result}` : null
+      cachedResult: cached ? buildCachedResultEnvelope(cached.result) : null
     };
   }
   if (isReadLike) {
@@ -527064,11 +527065,12 @@ ${cached.result}` : null
           decision: "force_progress_block",
           reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
           hitNumber: hits,
-          blockMessage: buildForceProgressBlockMessage(proposedCall, hits)
+          blockMessage: buildForceProgressBlockMessage(proposedCall, hits),
+          cachedResult: buildCachedResultEnvelope(cached.result),
+          compacted: cached.compacted
         };
       }
-      const cachedEnvelope = `[CACHED RESULT — you already have this information from a prior call. Do NOT call this tool again with the same arguments.]
-${cached.result}`;
+      const cachedEnvelope = buildCachedResultEnvelope(cached.result);
       return {
         decision: "serve_cached",
         reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
@@ -543226,27 +543228,35 @@ ${latest.output || ""}`.trim();
         const dirsListed = [];
         const searches = [];
         const shells = [];
+        let compactedCount = 0;
         for (const [fingerprint, entry] of recentToolResults) {
+          const { toolName, args } = this._decodeToolFingerprint(fingerprint);
           if (entry.compacted)
-            continue;
-          const colonIdx = fingerprint.indexOf(":");
-          const toolName = colonIdx > 0 ? fingerprint.slice(0, colonIdx) : fingerprint;
+            compactedCount++;
           if (toolName === "file_read") {
-            const pathMatch = fingerprint.match(/path=([^,\s]+)/);
-            if (pathMatch?.[1])
-              filesRead.push(pathMatch[1]);
+            const path12 = args.get("path") ?? args.get("file");
+            if (path12) {
+              filesRead.push(this._formatKnowledgeTarget(this._formatFileReadKnowledgeTarget(path12, args), entry.compacted));
+            }
           } else if (toolName === "list_directory") {
-            const pathMatch = fingerprint.match(/path=([^,\s]+)/);
-            if (pathMatch?.[1])
-              dirsListed.push(pathMatch[1]);
+            const path12 = args.get("path") ?? ".";
+            dirsListed.push(this._formatKnowledgeTarget(path12, entry.compacted));
           } else if (toolName === "grep_search" || toolName === "find_files") {
-            searches.push(toolName);
+            const path12 = args.get("path") ?? ".";
+            const pattern = args.get("pattern") ?? args.get("query") ?? "";
+            const target = pattern ? `${toolName} ${path12} :: ${pattern}` : `${toolName} ${path12}`;
+            searches.push(this._formatKnowledgeTarget(target, entry.compacted));
           } else if (toolName === "shell" || toolName === "shell_async") {
-            const cmdMatch = fingerprint.match(/cmd=([^,\s]+)/);
-            shells.push(cmdMatch?.[1] ?? toolName);
+            const command = args.get("command") ?? args.get("cmd") ?? toolName;
+            shells.push(this._formatKnowledgeTarget(command, entry.compacted));
           }
         }
-        const sections = ["[KNOWLEDGE — you already have these results in context above. Do NOT re-call these tools for the same targets:]"];
+        const sections = [
+          "[KNOWLEDGE — cached tool results already known to the runtime. Do NOT re-call these tools with the same arguments:]"
+        ];
+        if (compactedCount > 0) {
+          sections.push(`Compacted cached entries still count as already-known results (${compactedCount}); an exact repeat will be served from cache or skipped, not produce new information.`);
+        }
         if (filesRead.length > 0) {
           const unique = [...new Set(filesRead)].slice(0, 30);
           sections.push(`Files already read (${unique.length}): ${unique.join(", ")}`);
@@ -543256,7 +543266,8 @@ ${latest.output || ""}`.trim();
           sections.push(`Directories already listed (${unique.length}): ${unique.join(", ")}`);
         }
         if (searches.length > 0) {
-          sections.push(`Searches already run: ${searches.length}`);
+          const unique = [...new Set(searches)].slice(0, 15);
+          sections.push(`Searches already run (${unique.length}): ${unique.join(", ")}`);
         }
         if (shells.length > 0) {
           const unique = [...new Set(shells)].slice(0, 15);
@@ -543494,6 +543505,68 @@ ${blob}
       _buildToolFingerprint(name10, args) {
         return `${name10}:${this._buildExactArgsKey(args)}`;
       }
+      _decodeToolFingerprint(fingerprint) {
+        const colonIdx = fingerprint.indexOf(":");
+        const toolName = colonIdx > 0 ? fingerprint.slice(0, colonIdx) : fingerprint;
+        const argsKey = colonIdx > 0 ? fingerprint.slice(colonIdx + 1) : "";
+        return { toolName, args: this._parseExactArgsKey(argsKey) };
+      }
+      _parseExactArgsKey(argsKey) {
+        const parsed = /* @__PURE__ */ new Map();
+        if (!argsKey)
+          return parsed;
+        const entries = [];
+        let current = "";
+        let escaped = false;
+        for (const ch of argsKey) {
+          if (escaped) {
+            current += ch;
+            escaped = false;
+          } else if (ch === "\\") {
+            escaped = true;
+          } else if (ch === ",") {
+            entries.push(current);
+            current = "";
+          } else {
+            current += ch;
+          }
+        }
+        if (escaped)
+          current += "\\";
+        entries.push(current);
+        for (const entry of entries) {
+          const eqIdx = entry.indexOf("=");
+          if (eqIdx <= 0)
+            continue;
+          parsed.set(entry.slice(0, eqIdx), entry.slice(eqIdx + 1));
+        }
+        return parsed;
+      }
+      _formatKnowledgeTarget(target, compacted) {
+        const clipped = target.length > 180 ? `${target.slice(0, 130)}...${target.slice(-40)}` : target;
+        return compacted ? `${clipped} (cached after compaction)` : clipped;
+      }
+      _formatFileReadKnowledgeTarget(path12, args) {
+        const offset = this._formatArgsKeyScalar(args.get("offset"));
+        const limit = this._formatArgsKeyScalar(args.get("limit"));
+        if (offset !== void 0 || limit !== void 0) {
+          return `${path12} (offset ${offset ?? "0"}, limit ${limit ?? "end"})`;
+        }
+        return path12;
+      }
+      _formatArgsKeyScalar(value2) {
+        if (value2 === void 0)
+          return void 0;
+        if (value2.startsWith("#number:"))
+          return value2.slice("#number:".length);
+        if (value2.startsWith("#boolean:"))
+          return value2.slice("#boolean:".length);
+        if (value2 === "#null")
+          return "null";
+        if (value2 === "#undefined")
+          return "undefined";
+        return value2;
+      }
       _isStatefulBrowserTool(name10) {
         return name10 === "playwright_browser" || name10 === "browser_action";
       }
@@ -546572,8 +546645,8 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
                 this.emit({
                   type: "tool_result",
                   toolName: tc.name,
-                  success: false,
-                  content: criticDecision.blockMessage.slice(0, 120),
+                  success: true,
+                  content: `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run; cached result returned.]`.slice(0, 120),
                   turn,
                   timestamp: (/* @__PURE__ */ new Date()).toISOString()
                 });
@@ -546581,7 +546654,19 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
                   mode: "step_repetition",
                   rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
                 });
-                return { tc, output: criticDecision.blockMessage };
+                const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. Do not retry this exact call.]
+` : `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run. The cached result below is from the prior successful call. Do not retry this exact call.]
+`;
+                const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
+... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
+                return {
+                  tc,
+                  output: `${criticDecision.blockMessage}
+${header}${truncatedCache}`
+                };
               }
               if (criticDecision.decision === "serve_cached") {
                 dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
@@ -553357,14 +553442,29 @@ ${description}`
           poolSlot.release(success);
           poolSlot = null;
         };
+        const streamTimeoutMs = Number.isFinite(request.timeoutMs) && request.timeoutMs > 0 ? Math.max(request.timeoutMs, 1e4) : 3e5;
+        const streamAbort = new AbortController();
+        const streamTimeoutHandle = setTimeout(() => {
+          streamAbort.abort(new Error(`stream timeout: no response or chunk within ${(streamTimeoutMs / 1e3).toFixed(0)}s`));
+        }, streamTimeoutMs);
+        if (typeof streamTimeoutHandle.unref === "function") {
+          streamTimeoutHandle.unref();
+        }
+        const externalAbortListener = this._abortSignal ? () => streamAbort.abort(this._abortSignal?.reason ?? new Error("external abort")) : null;
+        if (this._abortSignal && externalAbortListener) {
+          if (this._abortSignal.aborted) {
+            externalAbortListener();
+          } else {
+            this._abortSignal.addEventListener("abort", externalAbortListener, { once: true });
+          }
+        }
         try {
           const streamFetchOpts = {
             method: "POST",
             headers: this.authHeaders(),
-            body: JSON.stringify(body)
+            body: JSON.stringify(body),
+            signal: streamAbort.signal
           };
-          if (this._abortSignal)
-            streamFetchOpts.signal = this._abortSignal;
           let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
           if (!resp.ok) {
             const text = await resp.text().catch(() => "");
@@ -553463,6 +553563,13 @@ ${description}`
           this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
           poolSuccess = true;
         } finally {
+          clearTimeout(streamTimeoutHandle);
+          if (this._abortSignal && externalAbortListener) {
+            try {
+              this._abortSignal.removeEventListener("abort", externalAbortListener);
+            } catch {
+            }
+          }
           releasePoolSlot(poolSuccess);
         }
       }
@@ -619797,24 +619904,57 @@ ${lines.join("\n")}`);
             `inference ${inferenceId} [${entry.kind}] ${elapsed}s content=${entry.contentTokens}t thinking=${entry.thinkingTokens}t (${thinkRatio}% think) live=${JSON.stringify(preview)}`
           ));
         };
-        for await (const chunk of streamFn(request)) {
-          if (chunk.type === "content" && chunk.content) {
-            if (chunk.thinking) {
-              thinkingBuf += chunk.content;
-              this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
-            } else {
-              contentBuf += chunk.content;
-              this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
-            }
-            flushPreview(false);
-          } else if (chunk.type === "finish") {
-            finishReason = chunk.finishReason;
-          } else if (chunk.type === "usage") {
-            usage = {
-              prompt_tokens: chunk.promptTokens,
-              completion_tokens: chunk.completionTokens,
-              total_tokens: chunk.totalTokens
-            };
+        const inactivityMs = this.telegramStreamInactivityMs();
+        const iter = streamFn(request)[Symbol.asyncIterator]();
+        try {
+          while (true) {
+            let timeoutHandle = null;
+            const inactivityPromise = new Promise((_, reject) => {
+              timeoutHandle = setTimeout(
+                () => reject(new Error(
+                  `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (content=${contentBuf.length}c thinking=${thinkingBuf.length}c so far) — Ollama likely cold-loading the model or wedged; falling back to non-stream`
+                )),
+                inactivityMs
+              );
+              if (typeof timeoutHandle.unref === "function") {
+                timeoutHandle.unref();
+              }
+            });
+            let next;
+            try {
+              next = await Promise.race([iter.next(), inactivityPromise]);
+            } finally {
+              if (timeoutHandle) clearTimeout(timeoutHandle);
+            }
+            if (next.done) break;
+            const chunk = next.value;
+            if (chunk.type === "content" && chunk.content) {
+              const entry = this.telegramActiveInferences.get(inferenceId);
+              if (entry && entry.firstChunkAt === void 0) {
+                entry.firstChunkAt = performance.now();
+              }
+              if (chunk.thinking) {
+                thinkingBuf += chunk.content;
+                this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
+              } else {
+                contentBuf += chunk.content;
+                this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
+              }
+              flushPreview(false);
+            } else if (chunk.type === "finish") {
+              finishReason = chunk.finishReason;
+            } else if (chunk.type === "usage") {
+              usage = {
+                prompt_tokens: chunk.promptTokens,
+                completion_tokens: chunk.completionTokens,
+                total_tokens: chunk.totalTokens
+              };
+            }
+          }
+        } finally {
+          try {
+            await iter.return?.(void 0);
+          } catch {
           }
         }
         flushPreview(true);
@@ -619885,9 +620025,10 @@ ${lines.join("\n")}`);
           const dur = ((performance.now() - entry.startTs) / 1e3).toFixed(1);
           const totalTokens = entry.contentTokens + entry.thinkingTokens;
           const ratio = totalTokens > 0 ? Math.round(entry.thinkingTokens * 100 / totalTokens) : 0;
+          const ttfb = entry.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
           this.tuiWrite(() => renderTelegramSubAgentEvent(
             entry.sessionKey,
-            `inference ${id} [${entry.kind}] done in ${dur}s — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
+            `inference ${id} [${entry.kind}] done in ${dur}s (ttfb=${ttfb}) — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
           ));
         }
       }
@@ -619903,7 +620044,10 @@ ${lines.join("\n")}`);
         return Array.from(this.telegramActiveInferences.values()).map((e2) => ({
           ...e2,
           elapsedSec: (now - e2.startTs) / 1e3,
-          idleSec: (now - e2.lastTokenAt) / 1e3
+          idleSec: (now - e2.lastTokenAt) / 1e3,
+          // Undefined when no chunk has arrived yet (still cold-loading or wedged).
+          // A dashboard renderer should display "—" or "waiting" in that case.
+          ttfbSec: e2.firstChunkAt !== void 0 ? (e2.firstChunkAt - e2.startTs) / 1e3 : void 0
         }));
       }
       /**
@@ -620175,6 +620319,25 @@ ${retryText}`,
       telegramSubAgentWatchdogIntervalMs() {
         return 3e4;
       }
+      /**
+       * Per-chunk inactivity window for the bridge's stream consumer. If no
+       * chunk arrives within this window, the streaming consumer in
+       * streamTelegramInferenceToCompletion aborts via Promise.race + clears
+       * the iterator, and telegramObservableInference falls back to the
+       * non-streaming chatCompletion path. This gives operators a clean
+       * "stream silent for 60s, falling back" signal instead of the opaque
+       * 180s coalescer hard-deadline.
+       *
+       * Default 60s — comfortably longer than a healthy cold-load of a 35B
+       * model on a warm VRAM cache (typically <30s) but short enough to
+       * surface a real wedge before the 180s coalescer fires. Override via
+       * OMNIUS_TG_STREAM_INACTIVITY_MS (clamped to [10s, 5min]).
+       */
+      telegramStreamInactivityMs() {
+        const raw = Number.parseInt(process.env["OMNIUS_TG_STREAM_INACTIVITY_MS"] ?? "", 10);
+        if (Number.isFinite(raw) && raw >= 1e4 && raw <= 3e5) return raw;
+        return 6e4;
+      }
       /**
        * Start the periodic stale-sub-agent reaper. Idempotent — safe to call
        * multiple times (no-op if already running). Stopped by stop() and on

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.119",
+  "version": "1.0.121",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.119",
+      "version": "1.0.121",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.119",
+  "version": "1.0.121",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",