omnius 1.0.119 → 1.0.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -527033,15 +527033,17 @@ var init_personality = __esm({
527033
527033
  // packages/orchestrator/dist/critic.js
527034
527034
  function buildForceProgressBlockMessage(call, hits) {
527035
527035
  const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
527036
- return `[FORCED PROGRESS BLOCK — you have called ${call.tool}(${argPreview}) ${hits} times with identical arguments and received the cached result each time. Consider whether additional calls are needed or if you can proceed with what you have.
527036
+ return `[FORCED PROGRESS BLOCK — duplicate ${call.tool} call skipped; this is not a tool failure. You have called ${call.tool}(${argPreview}) ${hits} times with identical arguments. The runtime did not re-run the tool; it is returning the cached result below so you can proceed without retrying.
527037
527037
 
527038
- To proceed, you can:
527038
+ Progress is REQUIRED before this tool will run again with the same arguments. To proceed, do one of these:
527039
527039
  • file_write or file_edit to make progress, OR
527040
527040
  • todo_write that advances the plan, OR
527041
527041
  • task_complete (if all phases are done), OR
527042
- • Call a different tool or use different arguments.
527043
-
527044
- The cached result of this exact call is in your conversation history.]`;
527042
+ • Call a different tool or use different arguments.]`;
527043
+ }
527044
+ function buildCachedResultEnvelope(result) {
527045
+ return `[CACHED RESULT — you already have this information from a prior successful call. Do NOT call this tool again with the same arguments.]
527046
+ ${result}`;
527045
527047
  }
527046
527048
  function evaluate(inputs) {
527047
527049
  const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
@@ -527050,8 +527052,7 @@ function evaluate(inputs) {
527050
527052
  return {
527051
527053
  decision: "observer_block",
527052
527054
  reason: "Littleman observer flagged this fingerprint as redundant",
527053
- cachedResult: cached ? `[CACHED RESULT — you already have this information from a prior call. Do NOT call this tool again with the same arguments.]
527054
- ${cached.result}` : null
527055
+ cachedResult: cached ? buildCachedResultEnvelope(cached.result) : null
527055
527056
  };
527056
527057
  }
527057
527058
  if (isReadLike) {
@@ -527064,11 +527065,12 @@ ${cached.result}` : null
527064
527065
  decision: "force_progress_block",
527065
527066
  reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
527066
527067
  hitNumber: hits,
527067
- blockMessage: buildForceProgressBlockMessage(proposedCall, hits)
527068
+ blockMessage: buildForceProgressBlockMessage(proposedCall, hits),
527069
+ cachedResult: buildCachedResultEnvelope(cached.result),
527070
+ compacted: cached.compacted
527068
527071
  };
527069
527072
  }
527070
- const cachedEnvelope = `[CACHED RESULT — you already have this information from a prior call. Do NOT call this tool again with the same arguments.]
527071
- ${cached.result}`;
527073
+ const cachedEnvelope = buildCachedResultEnvelope(cached.result);
527072
527074
  return {
527073
527075
  decision: "serve_cached",
527074
527076
  reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
@@ -543226,27 +543228,35 @@ ${latest.output || ""}`.trim();
543226
543228
  const dirsListed = [];
543227
543229
  const searches = [];
543228
543230
  const shells = [];
543231
+ let compactedCount = 0;
543229
543232
  for (const [fingerprint, entry] of recentToolResults) {
543233
+ const { toolName, args } = this._decodeToolFingerprint(fingerprint);
543230
543234
  if (entry.compacted)
543231
- continue;
543232
- const colonIdx = fingerprint.indexOf(":");
543233
- const toolName = colonIdx > 0 ? fingerprint.slice(0, colonIdx) : fingerprint;
543235
+ compactedCount++;
543234
543236
  if (toolName === "file_read") {
543235
- const pathMatch = fingerprint.match(/path=([^,\s]+)/);
543236
- if (pathMatch?.[1])
543237
- filesRead.push(pathMatch[1]);
543237
+ const path12 = args.get("path") ?? args.get("file");
543238
+ if (path12) {
543239
+ filesRead.push(this._formatKnowledgeTarget(this._formatFileReadKnowledgeTarget(path12, args), entry.compacted));
543240
+ }
543238
543241
  } else if (toolName === "list_directory") {
543239
- const pathMatch = fingerprint.match(/path=([^,\s]+)/);
543240
- if (pathMatch?.[1])
543241
- dirsListed.push(pathMatch[1]);
543242
+ const path12 = args.get("path") ?? ".";
543243
+ dirsListed.push(this._formatKnowledgeTarget(path12, entry.compacted));
543242
543244
  } else if (toolName === "grep_search" || toolName === "find_files") {
543243
- searches.push(toolName);
543245
+ const path12 = args.get("path") ?? ".";
543246
+ const pattern = args.get("pattern") ?? args.get("query") ?? "";
543247
+ const target = pattern ? `${toolName} ${path12} :: ${pattern}` : `${toolName} ${path12}`;
543248
+ searches.push(this._formatKnowledgeTarget(target, entry.compacted));
543244
543249
  } else if (toolName === "shell" || toolName === "shell_async") {
543245
- const cmdMatch = fingerprint.match(/cmd=([^,\s]+)/);
543246
- shells.push(cmdMatch?.[1] ?? toolName);
543250
+ const command = args.get("command") ?? args.get("cmd") ?? toolName;
543251
+ shells.push(this._formatKnowledgeTarget(command, entry.compacted));
543247
543252
  }
543248
543253
  }
543249
- const sections = ["[KNOWLEDGE — you already have these results in context above. Do NOT re-call these tools for the same targets:]"];
543254
+ const sections = [
543255
+ "[KNOWLEDGE — cached tool results already known to the runtime. Do NOT re-call these tools with the same arguments:]"
543256
+ ];
543257
+ if (compactedCount > 0) {
543258
+ sections.push(`Compacted cached entries still count as already-known results (${compactedCount}); an exact repeat will be served from cache or skipped, not produce new information.`);
543259
+ }
543250
543260
  if (filesRead.length > 0) {
543251
543261
  const unique = [...new Set(filesRead)].slice(0, 30);
543252
543262
  sections.push(`Files already read (${unique.length}): ${unique.join(", ")}`);
@@ -543256,7 +543266,8 @@ ${latest.output || ""}`.trim();
543256
543266
  sections.push(`Directories already listed (${unique.length}): ${unique.join(", ")}`);
543257
543267
  }
543258
543268
  if (searches.length > 0) {
543259
- sections.push(`Searches already run: ${searches.length}`);
543269
+ const unique = [...new Set(searches)].slice(0, 15);
543270
+ sections.push(`Searches already run (${unique.length}): ${unique.join(", ")}`);
543260
543271
  }
543261
543272
  if (shells.length > 0) {
543262
543273
  const unique = [...new Set(shells)].slice(0, 15);
@@ -543494,6 +543505,68 @@ ${blob}
543494
543505
  _buildToolFingerprint(name10, args) {
543495
543506
  return `${name10}:${this._buildExactArgsKey(args)}`;
543496
543507
  }
543508
+ _decodeToolFingerprint(fingerprint) {
543509
+ const colonIdx = fingerprint.indexOf(":");
543510
+ const toolName = colonIdx > 0 ? fingerprint.slice(0, colonIdx) : fingerprint;
543511
+ const argsKey = colonIdx > 0 ? fingerprint.slice(colonIdx + 1) : "";
543512
+ return { toolName, args: this._parseExactArgsKey(argsKey) };
543513
+ }
543514
+ _parseExactArgsKey(argsKey) {
543515
+ const parsed = /* @__PURE__ */ new Map();
543516
+ if (!argsKey)
543517
+ return parsed;
543518
+ const entries = [];
543519
+ let current = "";
543520
+ let escaped = false;
543521
+ for (const ch of argsKey) {
543522
+ if (escaped) {
543523
+ current += ch;
543524
+ escaped = false;
543525
+ } else if (ch === "\\") {
543526
+ escaped = true;
543527
+ } else if (ch === ",") {
543528
+ entries.push(current);
543529
+ current = "";
543530
+ } else {
543531
+ current += ch;
543532
+ }
543533
+ }
543534
+ if (escaped)
543535
+ current += "\\";
543536
+ entries.push(current);
543537
+ for (const entry of entries) {
543538
+ const eqIdx = entry.indexOf("=");
543539
+ if (eqIdx <= 0)
543540
+ continue;
543541
+ parsed.set(entry.slice(0, eqIdx), entry.slice(eqIdx + 1));
543542
+ }
543543
+ return parsed;
543544
+ }
543545
+ _formatKnowledgeTarget(target, compacted) {
543546
+ const clipped = target.length > 180 ? `${target.slice(0, 130)}...${target.slice(-40)}` : target;
543547
+ return compacted ? `${clipped} (cached after compaction)` : clipped;
543548
+ }
543549
+ _formatFileReadKnowledgeTarget(path12, args) {
543550
+ const offset = this._formatArgsKeyScalar(args.get("offset"));
543551
+ const limit = this._formatArgsKeyScalar(args.get("limit"));
543552
+ if (offset !== void 0 || limit !== void 0) {
543553
+ return `${path12} (offset ${offset ?? "0"}, limit ${limit ?? "end"})`;
543554
+ }
543555
+ return path12;
543556
+ }
543557
+ _formatArgsKeyScalar(value2) {
543558
+ if (value2 === void 0)
543559
+ return void 0;
543560
+ if (value2.startsWith("#number:"))
543561
+ return value2.slice("#number:".length);
543562
+ if (value2.startsWith("#boolean:"))
543563
+ return value2.slice("#boolean:".length);
543564
+ if (value2 === "#null")
543565
+ return "null";
543566
+ if (value2 === "#undefined")
543567
+ return "undefined";
543568
+ return value2;
543569
+ }
543497
543570
  _isStatefulBrowserTool(name10) {
543498
543571
  return name10 === "playwright_browser" || name10 === "browser_action";
543499
543572
  }
@@ -546572,8 +546645,8 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
546572
546645
  this.emit({
546573
546646
  type: "tool_result",
546574
546647
  toolName: tc.name,
546575
- success: false,
546576
- content: criticDecision.blockMessage.slice(0, 120),
546648
+ success: true,
546649
+ content: `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run; cached result returned.]`.slice(0, 120),
546577
546650
  turn,
546578
546651
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
546579
546652
  });
@@ -546581,7 +546654,19 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
546581
546654
  mode: "step_repetition",
546582
546655
  rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
546583
546656
  });
546584
- return { tc, output: criticDecision.blockMessage };
546657
+ const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. Do not retry this exact call.]
546658
+
546659
+ ` : `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run. The cached result below is from the prior successful call. Do not retry this exact call.]
546660
+
546661
+ `;
546662
+ const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
546663
+ ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
546664
+ return {
546665
+ tc,
546666
+ output: `${criticDecision.blockMessage}
546667
+
546668
+ ${header}${truncatedCache}`
546669
+ };
546585
546670
  }
546586
546671
  if (criticDecision.decision === "serve_cached") {
546587
546672
  dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
@@ -553357,14 +553442,29 @@ ${description}`
553357
553442
  poolSlot.release(success);
553358
553443
  poolSlot = null;
553359
553444
  };
553445
+ const streamTimeoutMs = Number.isFinite(request.timeoutMs) && request.timeoutMs > 0 ? Math.max(request.timeoutMs, 1e4) : 3e5;
553446
+ const streamAbort = new AbortController();
553447
+ const streamTimeoutHandle = setTimeout(() => {
553448
+ streamAbort.abort(new Error(`stream timeout: no response or chunk within ${(streamTimeoutMs / 1e3).toFixed(0)}s`));
553449
+ }, streamTimeoutMs);
553450
+ if (typeof streamTimeoutHandle.unref === "function") {
553451
+ streamTimeoutHandle.unref();
553452
+ }
553453
+ const externalAbortListener = this._abortSignal ? () => streamAbort.abort(this._abortSignal?.reason ?? new Error("external abort")) : null;
553454
+ if (this._abortSignal && externalAbortListener) {
553455
+ if (this._abortSignal.aborted) {
553456
+ externalAbortListener();
553457
+ } else {
553458
+ this._abortSignal.addEventListener("abort", externalAbortListener, { once: true });
553459
+ }
553460
+ }
553360
553461
  try {
553361
553462
  const streamFetchOpts = {
553362
553463
  method: "POST",
553363
553464
  headers: this.authHeaders(),
553364
- body: JSON.stringify(body)
553465
+ body: JSON.stringify(body),
553466
+ signal: streamAbort.signal
553365
553467
  };
553366
- if (this._abortSignal)
553367
- streamFetchOpts.signal = this._abortSignal;
553368
553468
  let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
553369
553469
  if (!resp.ok) {
553370
553470
  const text = await resp.text().catch(() => "");
@@ -553463,6 +553563,13 @@ ${description}`
553463
553563
  this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
553464
553564
  poolSuccess = true;
553465
553565
  } finally {
553566
+ clearTimeout(streamTimeoutHandle);
553567
+ if (this._abortSignal && externalAbortListener) {
553568
+ try {
553569
+ this._abortSignal.removeEventListener("abort", externalAbortListener);
553570
+ } catch {
553571
+ }
553572
+ }
553466
553573
  releasePoolSlot(poolSuccess);
553467
553574
  }
553468
553575
  }
@@ -619797,24 +619904,57 @@ ${lines.join("\n")}`);
619797
619904
  `inference ${inferenceId} [${entry.kind}] ${elapsed}s content=${entry.contentTokens}t thinking=${entry.thinkingTokens}t (${thinkRatio}% think) live=${JSON.stringify(preview)}`
619798
619905
  ));
619799
619906
  };
619800
- for await (const chunk of streamFn(request)) {
619801
- if (chunk.type === "content" && chunk.content) {
619802
- if (chunk.thinking) {
619803
- thinkingBuf += chunk.content;
619804
- this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
619805
- } else {
619806
- contentBuf += chunk.content;
619807
- this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
619808
- }
619809
- flushPreview(false);
619810
- } else if (chunk.type === "finish") {
619811
- finishReason = chunk.finishReason;
619812
- } else if (chunk.type === "usage") {
619813
- usage = {
619814
- prompt_tokens: chunk.promptTokens,
619815
- completion_tokens: chunk.completionTokens,
619816
- total_tokens: chunk.totalTokens
619817
- };
619907
+ const inactivityMs = this.telegramStreamInactivityMs();
619908
+ const iter = streamFn(request)[Symbol.asyncIterator]();
619909
+ try {
619910
+ while (true) {
619911
+ let timeoutHandle = null;
619912
+ const inactivityPromise = new Promise((_, reject) => {
619913
+ timeoutHandle = setTimeout(
619914
+ () => reject(new Error(
619915
+ `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (content=${contentBuf.length}c thinking=${thinkingBuf.length}c so far) — Ollama likely cold-loading the model or wedged; falling back to non-stream`
619916
+ )),
619917
+ inactivityMs
619918
+ );
619919
+ if (typeof timeoutHandle.unref === "function") {
619920
+ timeoutHandle.unref();
619921
+ }
619922
+ });
619923
+ let next;
619924
+ try {
619925
+ next = await Promise.race([iter.next(), inactivityPromise]);
619926
+ } finally {
619927
+ if (timeoutHandle) clearTimeout(timeoutHandle);
619928
+ }
619929
+ if (next.done) break;
619930
+ const chunk = next.value;
619931
+ if (chunk.type === "content" && chunk.content) {
619932
+ const entry = this.telegramActiveInferences.get(inferenceId);
619933
+ if (entry && entry.firstChunkAt === void 0) {
619934
+ entry.firstChunkAt = performance.now();
619935
+ }
619936
+ if (chunk.thinking) {
619937
+ thinkingBuf += chunk.content;
619938
+ this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
619939
+ } else {
619940
+ contentBuf += chunk.content;
619941
+ this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
619942
+ }
619943
+ flushPreview(false);
619944
+ } else if (chunk.type === "finish") {
619945
+ finishReason = chunk.finishReason;
619946
+ } else if (chunk.type === "usage") {
619947
+ usage = {
619948
+ prompt_tokens: chunk.promptTokens,
619949
+ completion_tokens: chunk.completionTokens,
619950
+ total_tokens: chunk.totalTokens
619951
+ };
619952
+ }
619953
+ }
619954
+ } finally {
619955
+ try {
619956
+ await iter.return?.(void 0);
619957
+ } catch {
619818
619958
  }
619819
619959
  }
619820
619960
  flushPreview(true);
@@ -619885,9 +620025,10 @@ ${lines.join("\n")}`);
619885
620025
  const dur = ((performance.now() - entry.startTs) / 1e3).toFixed(1);
619886
620026
  const totalTokens = entry.contentTokens + entry.thinkingTokens;
619887
620027
  const ratio = totalTokens > 0 ? Math.round(entry.thinkingTokens * 100 / totalTokens) : 0;
620028
+ const ttfb = entry.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
619888
620029
  this.tuiWrite(() => renderTelegramSubAgentEvent(
619889
620030
  entry.sessionKey,
619890
- `inference ${id} [${entry.kind}] done in ${dur}s — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
620031
+ `inference ${id} [${entry.kind}] done in ${dur}s (ttfb=${ttfb}) — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
619891
620032
  ));
619892
620033
  }
619893
620034
  }
@@ -619903,7 +620044,10 @@ ${lines.join("\n")}`);
619903
620044
  return Array.from(this.telegramActiveInferences.values()).map((e2) => ({
619904
620045
  ...e2,
619905
620046
  elapsedSec: (now - e2.startTs) / 1e3,
619906
- idleSec: (now - e2.lastTokenAt) / 1e3
620047
+ idleSec: (now - e2.lastTokenAt) / 1e3,
620048
+ // Undefined when no chunk has arrived yet (still cold-loading or wedged).
620049
+ // A dashboard renderer should display "—" or "waiting" in that case.
620050
+ ttfbSec: e2.firstChunkAt !== void 0 ? (e2.firstChunkAt - e2.startTs) / 1e3 : void 0
619907
620051
  }));
619908
620052
  }
619909
620053
  /**
@@ -620175,6 +620319,25 @@ ${retryText}`,
620175
620319
  telegramSubAgentWatchdogIntervalMs() {
620176
620320
  return 3e4;
620177
620321
  }
620322
+ /**
620323
+ * Per-chunk inactivity window for the bridge's stream consumer. If no
620324
+ * chunk arrives within this window, the streaming consumer in
620325
+ * streamTelegramInferenceToCompletion aborts via Promise.race + clears
620326
+ * the iterator, and telegramObservableInference falls back to the
620327
+ * non-streaming chatCompletion path. This gives operators a clean
620328
+ * "stream silent for 60s, falling back" signal instead of the opaque
620329
+ * 180s coalescer hard-deadline.
620330
+ *
620331
+ * Default 60s — comfortably longer than a healthy cold-load of a 35B
620332
+ * model on a warm VRAM cache (typically <30s) but short enough to
620333
+ * surface a real wedge before the 180s coalescer fires. Override via
620334
+ * OMNIUS_TG_STREAM_INACTIVITY_MS (clamped to [10s, 5min]).
620335
+ */
620336
+ telegramStreamInactivityMs() {
620337
+ const raw = Number.parseInt(process.env["OMNIUS_TG_STREAM_INACTIVITY_MS"] ?? "", 10);
620338
+ if (Number.isFinite(raw) && raw >= 1e4 && raw <= 3e5) return raw;
620339
+ return 6e4;
620340
+ }
620178
620341
  /**
620179
620342
  * Start the periodic stale-sub-agent reaper. Idempotent — safe to call
620180
620343
  * multiple times (no-op if already running). Stopped by stop() and on
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.119",
3
+ "version": "1.0.121",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.119",
9
+ "version": "1.0.121",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.119",
3
+ "version": "1.0.121",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",