omnius 1.0.353 → 1.0.355

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -567440,6 +567440,81 @@ var init_adversaryStream = __esm({
567440
567440
  }
567441
567441
  });
567442
567442
 
567443
+ // packages/orchestrator/dist/completion-resolution-verifier.js
567444
+ function resolutionSystemPrompt() {
567445
+ return [
567446
+ "You are a COMPLETION VERIFIER. A coding agent wants to mark a task complete.",
567447
+ "Your only job: decide whether the ACTIONS IT TOOK actually accomplish the ORIGINAL request.",
567448
+ "",
567449
+ "Judge from the ORIGINAL REQUEST and the ACTIONS + EVIDENCE only. The agent's own",
567450
+ "summary is just a CLAIM — never treat the claim as proof. The agent has a documented",
567451
+ "bias toward declaring success prematurely, so be strict:",
567452
+ " - An edit is not a fix unless the evidence shows the failing thing now passes.",
567453
+ " - 'started' / a PID / a log line is not 'running' or 'working'.",
567454
+ " - exit code 0 on an unrelated command does not resolve the request.",
567455
+ " - Doing PART of the request, or adjacent work, is NOT resolution.",
567456
+ " - If the request had multiple parts, EVERY part must be addressed.",
567457
+ "",
567458
+ "Respond with ONLY a JSON object, no prose, no code fences:",
567459
+ '{"resolved": true|false,',
567460
+ ' "confidence": 0.0-1.0,',
567461
+ ' "missing": ["each concrete part of the ORIGINAL request not yet accomplished"],',
567462
+ ' "rationale": "2-3 sentences citing the specific actions/evidence (or their absence)"}',
567463
+ "",
567464
+ "Set resolved=true ONLY when the actions+evidence directly and fully accomplish the",
567465
+ "original request. When in doubt, resolved=false and name what is missing."
567466
+ ].join("\n");
567467
+ }
567468
+ function buildResolutionPrompt(i2) {
567469
+ return [
567470
+ "ORIGINAL REQUEST (what the user actually asked for):",
567471
+ i2.originalGoal.slice(0, 2e3) || "(empty)",
567472
+ "",
567473
+ "ACTIONS THE AGENT TOOK:",
567474
+ i2.actionsDigest.slice(0, 3e3) || "(none recorded)",
567475
+ "",
567476
+ "GROUNDED EVIDENCE (outcomes, tests, files):",
567477
+ i2.evidenceDigest.slice(0, 2e3) || "(none recorded)",
567478
+ "",
567479
+ "THE AGENT'S COMPLETION CLAIM (audit this — do NOT trust it as evidence):",
567480
+ i2.proposedSummary.slice(0, 1500) || "(empty)",
567481
+ "",
567482
+ "Does the work above directly and fully accomplish the ORIGINAL REQUEST? Return ONLY the JSON object."
567483
+ ].join("\n");
567484
+ }
567485
+ function parseResolutionVerdict(raw) {
567486
+ if (!raw)
567487
+ return null;
567488
+ const text2 = raw.trim().replace(/^```(?:json)?/i, "").replace(/```$/i, "").trim();
567489
+ const start2 = text2.indexOf("{");
567490
+ const end = text2.lastIndexOf("}");
567491
+ if (start2 < 0 || end <= start2)
567492
+ return null;
567493
+ let obj;
567494
+ try {
567495
+ obj = JSON.parse(text2.slice(start2, end + 1));
567496
+ } catch {
567497
+ return null;
567498
+ }
567499
+ let confidence2 = Number(obj["confidence"]);
567500
+ if (!Number.isFinite(confidence2))
567501
+ confidence2 = 0.6;
567502
+ confidence2 = Math.min(1, Math.max(0, confidence2));
567503
+ const missingRaw = obj["missing"];
567504
+ const missing = Array.isArray(missingRaw) ? missingRaw.map((m2) => String(m2)).filter((m2) => m2.trim()).slice(0, 12) : [];
567505
+ return {
567506
+ resolved: obj["resolved"] === true,
567507
+ confidence: confidence2,
567508
+ missing,
567509
+ rationale: String(obj["rationale"] ?? "").slice(0, 800)
567510
+ };
567511
+ }
567512
+ var init_completion_resolution_verifier = __esm({
567513
+ "packages/orchestrator/dist/completion-resolution-verifier.js"() {
567514
+ "use strict";
567515
+ }
567516
+ });
567517
+
567443
567518
  // packages/orchestrator/dist/evidenceBranch.js
567444
567519
  function queryTerms(query) {
567445
567520
  return [
@@ -569606,6 +569681,7 @@ var init_agenticRunner = __esm({
569606
569681
  init_context_fabric();
569607
569682
  init_evidenceLedger();
569608
569683
  init_adversaryStream();
569684
+ init_completion_resolution_verifier();
569609
569685
  init_evidenceBranch();
569610
569686
  init_resolution_memory();
569611
569687
  init_contextEngine();
@@ -569923,6 +569999,9 @@ var init_agenticRunner = __esm({
569923
569999
  // because the configured verify command failed. Bounded by
569924
570000
  // OMNIUS_COMPLETION_VERIFY_MAX to avoid an endless verify→fix→verify loop.
569925
570001
  _completionVerifyRejections = 0;
570002
+ // Diagnostics counter for the inference-driven resolution gate (the real
570003
+ // bounded escape is the same-summary completion-hold counter / REG-54).
570004
+ _resolutionGateRejections = 0;
569926
570005
  _lastBackwardPassVerdict = null;
569927
570006
  _lastBackwardPassCritique = null;
569928
570007
  // Run-local completion contract inferred from the user's ask/context before
@@ -570816,25 +570895,24 @@ Your hypotheses MUST address this specific error, not generic causes.
570816
570895
  return best && best.count >= 3 ? best : null;
570817
570896
  }
570818
570897
  /**
570819
- * Backend adapter for AUXILIARY inference (adversary critiques, branch
570820
- * extraction) — tool-less, think-off, JSON-shaped calls. The main backend's
570821
- * chatCompletion routes to Ollama's /v1/chat/completions, where qwen3-family
570822
- * models IGNORE think:false and /no_think and (with no tools to anchor
570823
- * output) emit a reasoning-only response that gets stripped to EMPTY. The
570824
- * native /api/chat path honors think:false. This adapter prefers it and sets
570825
- * a responseFormat so the native path enforces JSON mode. Falls back to
570826
- * chatCompletion for non-Ollama backends.
570898
+ * Backend adapter for AUXILIARY inference (adversary critiques, resolution
570899
+ * gate, branch extraction) — tool-less, think-off, JSON-shaped calls. Uses the
570900
+ * SAME inference backend the main agent loop uses — whatever provider the user
570901
+ * selected and only ensures a JSON responseFormat for these structured
570902
+ * calls. Provider-specific quirks (e.g. Ollama /v1 + qwen3 returning an empty
570903
+ * reasoning-only response for tool-less think-off calls) are recovered INSIDE
570904
+ * the backend's own chatCompletion (empty-recovery native /api/chat
570905
+ * fallback), so no caller here is Ollama-aware.
570827
570906
  */
570828
570907
  _auxInferenceBackend() {
570829
570908
  const b = this.backend;
570830
- const useNative = typeof b.nativeOllamaChatCompletion === "function";
570831
570909
  return {
570832
570910
  chatCompletion: (req3) => {
570833
570911
  const r2 = {
570834
570912
  ...req3,
570835
570913
  responseFormat: req3.responseFormat ?? { type: "json_object" }
570836
570914
  };
570837
- return useNative ? b.nativeOllamaChatCompletion(r2) : b.chatCompletion(r2);
570915
+ return b.chatCompletion(r2);
570838
570916
  }
570839
570917
  };
570840
570918
  }
@@ -571733,6 +571811,119 @@ ${input.answerText ?? ""}`.toLowerCase().trim();
571733
571811
  * up auto-blocking and surfaces a status event so the caller can take
571734
571812
  * a different path (eg. surface to user). max cycles enforced here.
571735
571813
  */
571814
+ /**
571815
+ * Inference-driven, original-request-anchored completion gate. Runs ONE
571816
+ * grounded LLM call (native /api/chat via _auxInferenceBackend) asking whether
571817
+ * the actions taken actually resolve the ORIGINAL request. Returns
571818
+ * { proceed:false, feedback, reason } to HOLD task_complete when not resolved.
571819
+ * Bounded: after OMNIUS_COMPLETION_HOLD_MAX holds of the same summary the
571820
+ * existing REG-54 escape finishes the run as incomplete_verification.
571821
+ *
571822
+ * Disabled by OMNIUS_DISABLE_RESOLUTION_GATE=1, by disableAdversaryCritic, by
571823
+ * a missing backend, or when the run took no file/shell actions. Fails OPEN on
571824
+ * any inference error — never hard-stalls a run because the verifier hiccuped.
571825
+ *
571826
+ * Anti-reward-hacking: the verdict is judged from originalGoal + actions +
571827
+ * evidence; proposedSummary is passed only as "the claim under audit".
571828
+ */
571829
+ async _runResolutionGate(turn, proposedSummary, toolCallLog) {
571830
+ if (process.env["OMNIUS_DISABLE_RESOLUTION_GATE"] === "1")
571831
+ return { proceed: true };
571832
+ if (this.options.disableAdversaryCritic === true)
571833
+ return { proceed: true };
571834
+ if (!this.backend || typeof this.backend.chatCompletion !== "function")
571835
+ return { proceed: true };
571836
+ if (this._resolutionGateRejections >= 3)
571837
+ return { proceed: true };
571838
+ if (/^\s*BLOCKED\b/i.test(proposedSummary))
571839
+ return { proceed: true };
571840
+ const originalGoal = (this._taskState.originalGoal || this._taskState.goal || "").trim();
571841
+ if (!originalGoal)
571842
+ return { proceed: true };
571843
+ const actionable = toolCallLog.some((e2) => e2.mutated || e2.name === "shell" || e2.name === "file_write" || e2.name === "file_edit");
571844
+ if (!actionable)
571845
+ return { proceed: true };
571846
+ const filesChanged = [...this._taskState.modifiedFiles.entries()].map(([p2, action]) => ` - ${action} ${p2}`).slice(0, 40);
571847
+ const shellLines = toolCallLog.filter((e2) => e2.name === "shell").slice(-12).map((e2) => ` - shell: ${e2.success ? "ok" : "FAIL"} — ${(e2.outputPreview || "").slice(0, 120)}`);
571848
+ const actionsDigest = [
571849
+ filesChanged.length ? `Files changed (${filesChanged.length}):
571850
+ ${filesChanged.join("\n")}` : "Files changed: none",
571851
+ shellLines.length ? `Recent commands:
571852
+ ${shellLines.join("\n")}` : "Commands run: none"
571853
+ ].join("\n");
571854
+ const wf = this._worldFacts;
571855
+ const evidenceParts = [];
571856
+ if (wf?.lastTest?.summary) {
571857
+ evidenceParts.push(`Last test outcome: ${wf.lastTest.passed ? "PASSED" : "FAILED"} — ${wf.lastTest.summary.slice(0, 200)}`);
571858
+ }
571859
+ const failCount = toolCallLog.filter((e2) => e2.success === false).length;
571860
+ evidenceParts.push(`Failed tool calls this run: ${failCount}`);
571861
+ const evidenceDigest = evidenceParts.join("\n");
571862
+ let verdict = null;
571863
+ try {
571864
+ const backend = this._auxInferenceBackend();
571865
+ for (let attempt = 0; attempt < 2 && !verdict; attempt++) {
571866
+ const resp = await backend.chatCompletion({
571867
+ messages: [
571868
+ { role: "system", content: resolutionSystemPrompt() },
571869
+ {
571870
+ role: "user",
571871
+ content: buildResolutionPrompt({
571872
+ originalGoal,
571873
+ actionsDigest,
571874
+ evidenceDigest,
571875
+ proposedSummary
571876
+ })
571877
+ }
571878
+ ],
571879
+ tools: [],
571880
+ temperature: 0,
571881
+ maxTokens: 700,
571882
+ timeoutMs: 3e4
571883
+ });
571884
+ verdict = parseResolutionVerdict(resp.choices?.[0]?.message?.content ?? "");
571885
+ }
571886
+ } catch {
571887
+ verdict = null;
571888
+ }
571889
+ if (!verdict)
571890
+ return { proceed: true };
571891
+ if (verdict.resolved || verdict.confidence < 0.5) {
571892
+ this._resolutionGateRejections = 0;
571893
+ return { proceed: true };
571894
+ }
571895
+ this._resolutionGateRejections++;
571896
+ const missing = verdict.missing.length ? verdict.missing.map((m2) => ` • ${m2}`).join("\n") : " • (verifier did not enumerate specific gaps; re-read the original request)";
571897
+ const feedback = [
571898
+ `[COMPLETION BLOCKED — actions do not yet resolve the original request]`,
571899
+ `Original request: ${originalGoal.slice(0, 300)}`,
571900
+ `Verifier rationale: ${verdict.rationale}`,
571901
+ `Still unaddressed:`,
571902
+ missing,
571903
+ ``,
571904
+ `Do the work that actually accomplishes the unaddressed items above, then re-verify.`,
571905
+ `Do NOT call task_complete again until each item is done AND its result is evidenced.`
571906
+ ].join("\n");
571907
+ const reason = `task resolution not verified (${verdict.missing.length} item(s) unaddressed)`;
571908
+ this.emit({
571909
+ type: "adversary_reaction",
571910
+ adversary: {
571911
+ class: "false_success",
571912
+ shortText: `Completion blocked — ${verdict.missing.length} request item(s) unresolved`,
571913
+ confidence: verdict.confidence,
571914
+ details: feedback
571915
+ },
571916
+ turn,
571917
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
571918
+ });
571919
+ this.emit({
571920
+ type: "status",
571921
+ content: `Resolution gate HELD task_complete: ${reason}`,
571922
+ turn,
571923
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
571924
+ });
571925
+ return { proceed: false, feedback, reason };
571926
+ }
571736
571927
  /**
571737
571928
  * Completion compile/verify gate (opt-in). Runs a configured shell command
571738
571929
  * (e.g. a typecheck/build) before `task_complete` is accepted, but ONLY when
@@ -571841,6 +572032,13 @@ ${input.answerText ?? ""}`.toLowerCase().trim();
571841
572032
  const _verify = await this._runCompletionVerifyGate(turn);
571842
572033
  if (!_verify.proceed)
571843
572034
  return _verify;
572035
+ const _resolution = await this._runResolutionGate(turn, proposedSummary, toolCallLog);
572036
+ if (!_resolution.proceed) {
572037
+ return {
572038
+ proceed: false,
572039
+ feedback: _resolution.feedback ?? "Completion not resolved."
572040
+ };
572041
+ }
571844
572042
  if (this._completionLedger && proposedSummary) {
571845
572043
  const _newClaims = deriveClaimsFromProposedText({
571846
572044
  text: proposedSummary,
@@ -574960,6 +575158,7 @@ Respond with your assessment, then take action.`;
574960
575158
  this._fileWritesThisRun = 0;
574961
575159
  this._backwardPassCyclesUsed = 0;
574962
575160
  this._completionVerifyRejections = 0;
575161
+ this._resolutionGateRejections = 0;
574963
575162
  this._lastBackwardPassVerdict = null;
574964
575163
  this._lastBackwardPassCritique = null;
574965
575164
  this._completionContract = null;
@@ -585783,6 +585982,15 @@ ${description}`
585783
585982
  const justSuppressed = this._thinkSuppressed && this._thinkFailStreak === _OllamaAgenticBackend._thinkFailThreshold;
585784
585983
  const shouldRetryThinkGuard = outcome !== null && effectiveThink === true && (justSuppressed || outcome === "empty_after_strip" || outcome === "unclosed_think");
585785
585984
  if (shouldRetryThinkGuard || shouldRecoverFromEmpty) {
585985
+ if (shouldRecoverFromEmpty) {
585986
+ try {
585987
+ const _native = await this.nativeOllamaChatCompletion(request);
585988
+ const _nText = String(_native.choices?.[0]?.message?.content ?? "");
585989
+ if (_nText.trim().length >= 2)
585990
+ return _native;
585991
+ } catch {
585992
+ }
585993
+ }
585786
585994
  const retryMessages = injectNoThinkDirective(requestMessages);
585787
585995
  const retryBody = {
585788
585996
  model: this.model,
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.353",
3
+ "version": "1.0.355",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.353",
9
+ "version": "1.0.355",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.353",
3
+ "version": "1.0.355",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",