npm - omnius - Versions diffs - 1.0.340 → 1.0.341 - Mend

omnius 1.0.340 → 1.0.341

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -560736,7 +560736,7 @@ var init_critic = __esm({
 function extractSubject(errorText) {
   if (!errorText)
     return null;
-  const PATTERNS = [
+  const PATTERNS2 = [
     // Quoted module / type / symbol after recognizable phrases
     /cannot find (?:module|name|type|symbol|reference|file|namespace)\s+['"`]([^'"`\n]{1,80})['"`]/i,
     /(?:undefined|unresolved)\s+(?:reference|import|symbol)\s+(?:to\s+)?['"`]([^'"`\n]{1,80})['"`]/i,
@@ -560751,7 +560751,7 @@ function extractSubject(errorText) {
     /\bcannot resolve\s+['"`]?([^'"`\n\s]{1,120})['"`]?/i,
     /\bmodule not found:?\s+['"`]?([^'"`\n\s]{1,120})['"`]?/i
   ];
-  for (const re of PATTERNS) {
+  for (const re of PATTERNS2) {
     const m2 = errorText.match(re);
     if (m2 && m2[1]) {
       const subj = m2[1].trim();
@@ -565070,6 +565070,79 @@ var init_consolidation_runtime = __esm({
   }
 });
+// packages/orchestrator/dist/completion-evidence-gate.js
+function classifyCompletionClaim(text2) {
+  const t2 = String(text2 || "");
+  for (const p2 of PATTERNS) {
+    if (p2.re.test(t2)) {
+      return { category: p2.category, requiresIndependentEvidence: true, requiredCheck: p2.check };
+    }
+  }
+  return { category: null, requiresIndependentEvidence: false, requiredCheck: "" };
+}
+function detectExitCodeMisread(text2) {
+  const t2 = String(text2 || "");
+  const nonzeroExit = /\bexit(?:ed|s)?\b[^.\n]{0,20}?\b(?:code\s*)?(?:[1-9]\d*|non-?zero)\b/i;
+  const successWord = /\b(success(?:ful|fully|ed)?|succeeded|done|passed|works?|working|fine|ok|complete[d]?)\b/i;
+  if (nonzeroExit.test(t2) && successWord.test(t2))
+    return true;
+  if (/\|\|\s*true\b|\bset \+e\b|2>\/dev\/null\s*;\s*(echo|exit 0)/i.test(t2))
+    return true;
+  return false;
+}
+function auditCompletionClaims(claims) {
+  const blockers = [];
+  for (const c8 of claims || []) {
+    const cls = classifyCompletionClaim(c8.text);
+    if (!cls.requiresIndependentEvidence || !cls.category)
+      continue;
+    if (c8.status !== "supported") {
+      blockers.push({ claim: String(c8.text || "").slice(0, 160), category: cls.category, requiredCheck: cls.requiredCheck });
+    }
+  }
+  return { ok: blockers.length === 0, blockers };
+}
+function completionEvidenceDirective() {
+  return [
+    "[Evidence-gated completion — do not pretend success]",
+    "Before you claim a task is done, optimize for 'is the end state actually correct?', NOT 'did I make the change?'. Each of these claim types REQUIRES an independent check from a DIFFERENT command than the one that made the change:",
+    "- A process is 'running/started/up' → re-probe liveness after a short delay (ps/pgrep, a health endpoint, or the listening port). A PID file or a launcher log is NOT proof it stayed alive — 'started' ≠ 'running'.",
+    "- Something is 'installed/available' → verify with `command -v`/`--version`/a package query, NOT the installer's own log.",
+    "- A bug is 'fixed' → re-run the ORIGINAL failing command/test and watch it pass, not just confirm the edit landed.",
+    "- A success in simulation/mock/dry-run mode → label it as SIMULATED; never present it as the real capability.",
+    "Exit codes: a non-zero exit is NOT success; a zero exit from a wrapper / `|| true` / `set +e` is NOT proof the underlying thing worked — inspect what actually happened.",
+    "If you cannot produce the independent evidence, say so plainly and mark the item unverified or partial — never report a success state you have not verified."
+  ].join("\n");
+}
+var PATTERNS;
+var init_completion_evidence_gate = __esm({
+  "packages/orchestrator/dist/completion-evidence-gate.js"() {
+    "use strict";
+    PATTERNS = [
+      {
+        category: "process_liveness",
+        re: /\b(running|started|launched|spun up|brought up|is up|listening|serving|alive|daemon (is|now)|service (is|now)|server (is|now)|booted|online)\b/i,
+        check: "Independently probe liveness AFTER a short delay: ps/pgrep for the actual process, a health endpoint, or `ss -ltnp` for the port — NOT the launcher log or a PID file (a PID file can exist while the process is dead)."
+      },
+      {
+        category: "installation",
+        re: /\b(installed|available in PATH|on the PATH|set up|provisioned|configured and ready|now available|dependency (is )?(present|installed))\b/i,
+        check: "Independently confirm presence: `command -v <bin>` / `<bin> --version` / package manager query (e.g. `apt-cache policy`, `pip show`) — NOT the installer's own success log."
+      },
+      {
+        category: "fix_confirmation",
+        re: /\b(fixed|resolved|corrected|patched|repaired|no longer (fails|errors|crashes)|works now|issue (is )?gone)\b/i,
+        check: "Re-run the ORIGINAL failing command/test and observe it now passes — an independent reproduction, not just confirming the edit was written."
+      },
+      {
+        category: "reality",
+        re: /\b(simulat\w+|mock\w*|stub\w*|fake|dry[- ]?run|--sim|placeholder mode|fallback mode)\b/i,
+        check: "This was a SIMULATED/mock run. Label it explicitly as simulated and do NOT present it as the real capability; verify against the real system before claiming the real success."
+      }
+    ];
+  }
+});
 // packages/orchestrator/dist/tool-batching.js
 function isConcurrencySafe(toolName, readOnlyHints) {
   if (CONCURRENT_SAFE_TOOLS.has(toolName))
@@ -568620,6 +568693,7 @@ var init_agenticRunner = __esm({
     init_dist5();
     init_exploration_fanout();
     init_consolidation_runtime();
+    init_completion_evidence_gate();
     init_tool_batching();
     init_hooks2();
     init_todo_context_chunker();
@@ -570710,6 +570784,18 @@ ${input.answerText ?? ""}`.toLowerCase().trim();
             this._completionLedger = reconcileClaimsWithEvidence(this._completionLedger);
             this._saveCompletionLedgerSafe();
           }
+          try {
+            const _audit = auditCompletionClaims(this._completionLedger.proposedClaims.map((c8) => ({ text: c8.text, status: c8.status })));
+            if (!_audit.ok) {
+              const _checks = _audit.blockers.map((b) => `• [${b.category}] "${b.claim}" → ${b.requiredCheck}`).join("\n");
+              this._completionCaveat = [
+                this._completionCaveat || "",
+                `[UNVERIFIED SUCCESS — run these independent checks before claiming done]
+${_checks}`
+              ].filter(Boolean).join("\n\n");
+            }
+          } catch {
+          }
         }
         const optOverride = this.options.backwardPassReview;
         const raw = (process.env["OMNIUS_BACKWARD_PASS"] || "on").toLowerCase();
@@ -573943,6 +574029,12 @@ Respond with your assessment, then take action.`;
         }
         const contextComposition = await this.assembleContext(task, context2);
         let systemPrompt = contextComposition.assembled;
+        try {
+          systemPrompt = `${systemPrompt}
+${completionEvidenceDirective()}`;
+        } catch {
+        }
         try {
           const _cap = getMediaCapability();
           const _tier = this.options.modelTier ?? "large";
@@ -590380,6 +590472,7 @@ __export(dist_exports3, {
   appendSteeringLedgerEntry: () => appendSteeringLedgerEntry,
   appendSteeringOutcome: () => appendSteeringOutcome,
   arcSummary: () => arcSummary,
+  auditCompletionClaims: () => auditCompletionClaims,
   buildAgentNotification: () => buildAgentNotification,
   buildAgentTypeSummary: () => buildAgentTypeSummary,
   buildCompletionScenarioDecomposition: () => buildCompletionScenarioDecomposition,
@@ -590400,6 +590493,7 @@ __export(dist_exports3, {
   chooseCheapModelRoute: () => chooseCheapModelRoute,
   claimAssertion: () => claimAssertion,
   classifyBreadth: () => classifyBreadth,
+  classifyCompletionClaim: () => classifyCompletionClaim,
   classifyHandoff: () => classifyHandoff,
   classifyOllamaProcesses: () => classifyOllamaProcesses,
   cleanForStorage: () => cleanForStorage,
@@ -590408,6 +590502,7 @@ __export(dist_exports3, {
   clearTurnState: () => clearTurnState,
   combineValidatorResults: () => combineValidatorResults,
   compilePersonalityPrompt: () => compilePersonalityPrompt,
+  completionEvidenceDirective: () => completionEvidenceDirective,
   compressFindings: () => compressFindings,
   computeArc: () => computeArc,
   computeStabilityHash: () => computeStabilityHash,
@@ -590425,6 +590520,7 @@ __export(dist_exports3, {
   decomposeSpec: () => decomposeSpec,
   deleteAgentTaskSidecar: () => deleteAgentTaskSidecar,
   deriveClaimsFromProposedText: () => deriveClaimsFromProposedText,
+  detectExitCodeMisread: () => detectExitCodeMisread,
   detectExplorationIntent: () => detectExplorationIntent,
   detectPressure: () => detectPressure,
   detectTaskType: () => detectTaskType,
@@ -590638,6 +590734,7 @@ var init_dist8 = __esm({
     init_memory_consolidation();
     init_exploration_fanout();
     init_consolidation_runtime();
+    init_completion_evidence_gate();
   }
 });

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.340",
+  "version": "1.0.341",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.340",
+      "version": "1.0.341",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.340",
+  "version": "1.0.341",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",