npm - omnius - Versions diffs - 1.0.383 → 1.0.385 - Mend

omnius 1.0.383 → 1.0.385

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -8602,12 +8602,14 @@ __export(vision_exports, {
   MOONDREAM3_PREVIEW_HF_MODEL: () => MOONDREAM3_PREVIEW_HF_MODEL,
   VisionTool: () => VisionTool,
   analyzeImageWithVision: () => analyzeImageWithVision,
+  callOllamaVision: () => callOllamaVision,
   formatVisionPointResult: () => formatVisionPointResult,
   getVisionPointDiagnostics: () => getVisionPointDiagnostics,
   locateImagePoints: () => locateImagePoints,
   normalizeVisionModelName: () => normalizeVisionModelName,
   resetMoondreamClient: () => resetMoondreamClient,
   resolveHuggingFaceVisionModelCandidates: () => resolveHuggingFaceVisionModelCandidates,
+  resolveInstalledOllamaVisionModelAlias: () => resolveInstalledOllamaVisionModelAlias,
   resolveOllamaVisionModelCandidates: () => resolveOllamaVisionModelCandidates
 });
 import { mkdirSync as mkdirSync9, readFileSync as readFileSync12, existsSync as existsSync14, statSync as statSync7, unlinkSync as unlinkSync2, writeFileSync as writeFileSync10 } from "node:fs";
@@ -8940,7 +8942,8 @@ function resolveOllamaVisionModelCandidates(options2 = {}) {
     ollamaVisionModelName(options2.preferredModel || ""),
     process.env["OLLAMA_VISION_MODEL"] || "",
     options2.activeModelHasVision && options2.activeModel ? options2.activeModel : "",
-    DEFAULT_OLLAMA_VISION_MODEL
+    DEFAULT_OLLAMA_VISION_MODEL,
+    `${DEFAULT_OLLAMA_VISION_MODEL}:latest`
   ].map((entry) => entry.trim()).filter(Boolean);
   return [...new Set(candidates)];
 }
@@ -9191,6 +9194,26 @@ async function callOllamaVision(ollamaHost, model, prompt, imageBase64, timeoutM
   if (!res.ok && shouldAutoPullOllamaVisionModel(model)) {
     const errText = await res.text().catch(() => "");
     if (res.status === 404 || /not found|does not exist/i.test(errText)) {
+      const installedAlias = await resolveInstalledOllamaVisionModelAlias(ollamaHost, model, timeoutMs);
+      if (installedAlias && installedAlias !== model) {
+        res = await fetch(`${ollamaHost}/api/generate`, {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({
+            model: installedAlias,
+            prompt,
+            images: [imageBase64],
+            stream: false,
+            think: false,
+            options: { temperature: 0 }
+          }),
+          signal: AbortSignal.timeout(timeoutMs)
+        });
+        if (res.ok) {
+          const data2 = await res.json();
+          return typeof data2.response === "string" && data2.response.trim() ? data2.response : null;
+        }
+      }
       try {
         ensureDiskSpaceForOllamaVisionModel(model);
         pullOllamaVisionModel(model);
@@ -9217,6 +9240,33 @@ async function callOllamaVision(ollamaHost, model, prompt, imageBase64, timeoutM
   const data = await res.json();
   return typeof data.response === "string" && data.response.trim() ? data.response : null;
 }
+async function resolveInstalledOllamaVisionModelAlias(ollamaHost, model, timeoutMs = 5e3) {
+  const requested = model.trim();
+  if (!requested)
+    return null;
+  try {
+    const res = await fetch(`${ollamaHost}/api/tags`, {
+      signal: AbortSignal.timeout(Math.min(Math.max(timeoutMs, 1e3), 5e3))
+    });
+    if (!res.ok)
+      return null;
+    const data = await res.json();
+    const names = (Array.isArray(data.models) ? data.models : []).map((entry) => typeof entry.name === "string" ? entry.name.trim() : "").filter(Boolean);
+    if (names.includes(requested))
+      return requested;
+    if (!requested.includes(":")) {
+      const latest = `${requested}:latest`;
+      if (names.includes(latest))
+        return latest;
+      const prefixMatch = names.find((name10) => name10.startsWith(`${requested}:`));
+      if (prefixMatch)
+        return prefixMatch;
+    }
+  } catch {
+    return null;
+  }
+  return null;
+}
 function shouldAutoPullOllamaVisionModel(model) {
   if (!envFlag2(process.env["OMNIUS_OLLAMA_VISION_AUTO_PULL"], true))
     return false;
@@ -295276,6 +295326,21 @@ function getTodoSessionId() {
     return envSession;
   return "default";
 }
+function validateLargeTaskDecomposition(todos) {
+  if (todos.length < 20)
+    return null;
+  const ids = new Set(todos.map((todo) => todo.id).filter((id) => typeof id === "string" && id.trim().length > 0));
+  const childTodos = todos.filter((todo) => typeof todo.parentId === "string" && todo.parentId.trim().length > 0);
+  const parentIds = new Set(childTodos.map((todo) => todo.parentId.trim()));
+  const hasValidParent = [...parentIds].some((parentId) => ids.has(parentId));
+  if (childTodos.length > 0 && hasValidParent)
+    return null;
+  return [
+    "Large todo lists (20+ items) must be decomposed into a nested tree with stable ids and parentId links.",
+    "Create parent objectives and child leaf tasks instead of a flat checklist.",
+    'Canonical shape: todo_write({"todos":[{"id":"group-1","content":"Steps 01-08","status":"in_progress"},{"id":"step-01","parentId":"group-1","content":"Complete step 01 and verify evidence","status":"in_progress"}]})'
+  ].join(" ");
+}
 function normalizeIncomingTodos(args) {
   const repairNotes = [];
   const record = args;
@@ -295462,6 +295527,15 @@ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark comp
               declaredArtifacts: Array.isArray(entry["declaredArtifacts"]) ? entry["declaredArtifacts"].filter((x) => typeof x === "string") : void 0
             });
           }
+          const decompositionError = validateLargeTaskDecomposition(incoming);
+          if (decompositionError) {
+            return {
+              success: false,
+              output: "",
+              error: decompositionError,
+              durationMs: performance.now() - start2
+            };
+          }
           const sessionId = typeof args["session_id"] === "string" && args["session_id"].trim() ? args["session_id"].trim() : typeof args["sessionId"] === "string" && args["sessionId"].trim() ? args["sessionId"].trim() : getTodoSessionId();
           const oldTodos = readTodos(sessionId);
           const canonicalize2 = (todos) => JSON.stringify(todos.map((t2) => ({
@@ -547133,6 +547207,23 @@ function summarizeProcessFailure(stdout, stderr) {
   }
   return parts.join("\n").slice(0, 2200);
 }
+function formatObjectRecognitionResult(result) {
+  const matches = (Array.isArray(result.matches) ? result.matches : []).filter((m2) => m2.recognized);
+  const matchLines = matches.map((m2) => `  ${m2.label}: ${(m2.blended_score * 100).toFixed(0)}% (image=${(m2.image_similarity * 100).toFixed(0)}%, text=${(m2.text_similarity * 100).toFixed(0)}%)`);
+  const extraLabels = Array.isArray(result.extra_labels) ? result.extra_labels : null;
+  if (extraLabels && extraLabels.length > 0) {
+    const extraLines = extraLabels.map((s2) => `  ${s2.label}: ${(s2.score * 100).toFixed(0)}%`);
+    const sections = [`CLIP candidate label scores:
+${extraLines.join("\n")}`];
+    if (matches.length > 0) {
+      sections.push(`Persistent visual memory matches above threshold:
+${matchLines.join("\n")}`);
+    }
+    return sections.join("\n\n");
+  }
+  return matches.length > 0 ? `Recognized ${result.recognized_count} object(s):
+${matchLines.join("\n")}` : "No taught objects recognized in this image.";
+}
 var VMEM_DIR, VENV_DIR2, VENV_PY, VENV_PIP2, VISUAL_MEMORY_ACTIONS, VisualMemoryTool;
 var init_visual_memory = __esm({
   "packages/execution/dist/tools/visual-memory.js"() {
@@ -547668,18 +547759,7 @@ print(json.dumps({
           const payload = JSON.stringify(result);
           return { success: true, output: payload, llmContent: payload, durationMs: performance.now() - start2 };
         }
-        const matches = (result.matches || []).filter((m2) => m2.recognized);
-        const lines = matches.map((m2) => `  ${m2.label}: ${(m2.blended_score * 100).toFixed(0)}% (image=${(m2.image_similarity * 100).toFixed(0)}%, text=${(m2.text_similarity * 100).toFixed(0)}%)`);
-        let output = matches.length > 0 ? `Recognized ${result.recognized_count} object(s):
-${lines.join("\n")}` : "No taught objects recognized in this image.";
-        if (result.extra_labels) {
-          const extraLines = result.extra_labels.map((s2) => `  ${s2.label}: ${(s2.score * 100).toFixed(0)}%`);
-          output += `
-CLIP label scores:
-${extraLines.join("\n")}`;
-        }
-        return { success: true, output, durationMs: performance.now() - start2 };
+        return { success: true, output: formatObjectRecognitionResult(result), durationMs: performance.now() - start2 };
       }
       // =========================================================================
       // Memory Management
@@ -569521,8 +569601,8 @@ var init_focusSupervisor = __esm({
             const directive = this.setDirective({
               turn: input.turn,
               state: ignoredManyTimes ? "verify_or_block" : "single_next_action",
-              reason: ignoredManyTimes ? `model ignored ${this.ignoredDirectiveStreak} focus directives; report incomplete or ask for help instead of trying another variant` : `model ignored prior directive ${prior.id}; ${prior.reason}`,
-              requiredNextAction: ignoredManyTimes ? "report_incomplete" : prior.requiredNextAction,
+              reason: ignoredManyTimes ? `model ignored ${this.ignoredDirectiveStreak} focus directives; take the required recovery action before trying another variant` : `model ignored prior directive ${prior.id}; ${prior.reason}`,
+              requiredNextAction: prior.requiredNextAction,
               forbiddenActionFamilies: unique2([
                 ...prior.forbiddenActionFamilies,
                 family
@@ -569626,7 +569706,7 @@ var init_focusSupervisor = __esm({
             turn: input.turn,
             state: "forced_replan",
             reason: `same ${input.toolName} failure family repeated ${next.count} times: ${next.sample}`,
-            requiredNextAction: input.toolName === "shell" ? "read_authoritative_target" : "update_todos",
+            requiredNextAction: input.toolName === "shell" ? "edit_different_target" : "update_todos",
             forbiddenActionFamilies: [actionFamily(input.toolName, input.args)]
           });
         }
@@ -572951,6 +573031,7 @@ ${parts.join("\n")}
           memoryPrefix: options2?.memoryPrefix ?? "",
           memoryPrefixHash: options2?.memoryPrefixHash ?? "",
           stateDir: options2?.stateDir ?? "",
+          surface: options2?.surface ?? "tui",
           artifactMode: options2?.artifactMode ?? "user-task",
           disablePersistentMemory: options2?.disablePersistentMemory ?? false,
           disableCodebaseMap: options2?.disableCodebaseMap ?? false,
@@ -573302,16 +573383,21 @@ ${parts.join("\n")}
       // -------------------------------------------------------------------------
       /** Infer the surface identifier from runner configuration and dynamic context. */
       _inferSurface() {
-        const ctx3 = this._stickyDynamicContext || this.options.dynamicContext || "";
-        if (/Admin Capability/i.test(ctx3) || /telegram-admin/i.test(ctx3))
-          return "telegram-admin";
-        if (/Telegram|telegram|Voice Soul/i.test(ctx3) || this.options.stateDir)
-          return "telegram-public";
-        if (/api/i.test(ctx3))
-          return "api";
-        if (/background/i.test(ctx3))
-          return "background";
-        return "tui";
+        return this.options.surface;
+      }
+      _isTelegramSurface() {
+        return this.options.surface === "telegram-public" || this.options.surface === "telegram-admin";
+      }
+      stickyDynamicContextForActiveSurface() {
+        const ctx3 = this._stickyDynamicContext.trim();
+        if (!ctx3)
+          return "";
+        if (this._isTelegramSurface())
+          return ctx3;
+        const voiceSoul = this.extractDynamicMarkdownBlock(ctx3, "## Voice Soul Context", 6e3);
+        if (!voiceSoul)
+          return "";
+        return /Telegram|telegram|Public Telegram|Admin Capability/i.test(voiceSoul) ? "" : voiceSoul;
       }
       /**
        * Build structured context via the context engine.
@@ -587072,10 +587158,11 @@ ${postCompactRestore.join("\n")}`);
 [Ephemeral skill-pack restore — current run only, do not persist]
 ${this._ephemeralSkillPackContext}
 Use skill_extract for targeted skill unpacking; do not load full skills into the main context unless necessary.` : "";
-        const stickyDynamicContextReminder = this._stickyDynamicContext ? `
+        const scopedStickyDynamicContext = this.stickyDynamicContextForActiveSurface();
+        const stickyDynamicContextReminder = scopedStickyDynamicContext ? `
 [Sticky dynamic context restore — surface/persona anchors]
-${this._stickyDynamicContext}` : "";
+${scopedStickyDynamicContext}` : "";
         const compactionMsg = {
           role: "system",
           // WO-CE-03: XML tags for structural clarity on small/medium models
@@ -587092,7 +587179,7 @@ ${fullSummary}
         this.persistCheckpoint(fullSummary);
         let narrowedHead = [...head];
         const EVIDENCE_RULE_COMPACT = `EVIDENCE RULE (PRIORITY 0): never claim something works or is true unless a tool result you saw this turn proves it. A command succeeding only means it ran — not that the intended effect happened; verify the end-state directly before claiming it. A negative, empty, or error result means failed or absent — report it, never explain it away with an untested theory. Never describe how you got a result (tool, command, or source) unless you actually used it. Do not assert relationships the output does not show. Say "I could not verify X" when it is unproven — that is the correct answer, not a guess.`;
-        const telegramPersonaHead = /Telegram|Voice Soul Context|Public Telegram voice profile/.test(this._stickyDynamicContext) ? `You are Omnius replying through Telegram. Your visible assistant text is sent to Telegram; keep it concise, scoped, and user-facing. Do not emit scratch notes, router decisions, internal status, or no_reply text. Use available tools when needed and call task_complete when the Telegram run is complete.
+        const telegramPersonaHead = this._isTelegramSurface() && /Telegram|Voice Soul Context|Public Telegram voice profile/.test(scopedStickyDynamicContext) ? `You are Omnius replying through Telegram. Your visible assistant text is sent to Telegram; keep it concise, scoped, and user-facing. Do not emit scratch notes, router decisions, internal status, or no_reply text. Use available tools when needed and call task_complete when the Telegram run is complete.
 ${EVIDENCE_RULE_COMPACT}
@@ -587247,7 +587334,12 @@ ${content.slice(0, 8e3)}
             while (trimmedRecent.length > 1 && trimmedRecent[0]?.role === "tool") {
               trimmedRecent = trimmedRecent.slice(1);
             }
-            result = [...head, compactionMsg, ...stickyToKeep, ...trimmedRecent];
+            result = [
+              ...narrowedHead,
+              compactionMsg,
+              ...stickyToKeep,
+              ...trimmedRecent
+            ];
           }
           if (trimmedRecent.length < filteredRecent.length) {
             this.emit({
@@ -680834,6 +680926,7 @@ ${conversationStream}`
         );
         const requestTimeoutMs = config.timeoutMs ?? 3e5;
         const runner = new AgenticRunner(backend, {
+          surface: isAdminDM || isAdminGroup ? "telegram-admin" : "telegram-public",
           // Admin DMs are operator-directed work sessions. A hard turn cap turns
           // active tool progress into a false "completed" Telegram panel when the
           // model has not reached task_complete yet. Public/group runs stay bounded.
@@ -719721,6 +719814,7 @@ Only tools allowed by this profile are visible and executable.`
     ].filter(Boolean).join("");
   }
   const runner = new AgenticRunner(backend, {
+    surface: "tui",
     maxTurns: realtimeEnabled ? Math.min(effectiveMaxTurns, 8) : effectiveMaxTurns,
     maxTokens: realtimeEnabled ? 512 : 16384,
     temperature: realtimeEnabled ? 0.6 : 0,
@@ -722715,6 +722809,7 @@ Respond to the scoped Telegram target when complete.`
       }
       const modelTier2 = getModelTier(currentConfig.model);
       const runner = new AgenticRunner(backend, {
+        surface: "background",
         // 0 = unlimited; halt only on task_complete or abort. Background
         // prompts may legitimately need many turns; an arbitrary cap stalls
         // them mid-task.
@@ -728675,11 +728770,17 @@ var init_serve2 = __esm({
 // packages/cli/src/commands/eval.ts
 var eval_exports = {};
 __export(eval_exports, {
-  evalCommand: () => evalCommand
+  createTempEvalRepo: () => createTempEvalRepo,
+  evalCommand: () => evalCommand,
+  expectedStatusesForEvalTask: () => expectedStatusesForEvalTask
 });
 import { tmpdir as tmpdir23 } from "node:os";
 import { mkdirSync as mkdirSync106, writeFileSync as writeFileSync90 } from "node:fs";
 import { join as join178 } from "node:path";
+function expectedStatusesForEvalTask(task, live) {
+  if (!live) return task.expectedStatuses;
+  return task.liveExpectedStatuses ?? task.expectedStatuses.filter((status) => status !== "needs_human_decision");
+}
 async function evalCommand(opts, config) {
   const suiteName = opts.suite ?? "basic";
   const suite = SUITES[suiteName];
@@ -728693,6 +728794,10 @@ async function evalCommand(opts, config) {
   printKeyValue("Suite", suiteName, 2);
   printKeyValue("Tasks", String(suite.length), 2);
   printKeyValue("Mode", modeLabel, 2);
+  if (useLive) {
+    printKeyValue("Live pass statuses", "success, partial_success", 2);
+    printInfo("Live eval treats needs_human_decision as a failure for concrete coding tasks.");
+  }
   const evalRepoRoot = opts.repoPath ?? createTempEvalRepo();
   let rawBackend;
   if (useLive) {
@@ -728751,22 +728856,27 @@ async function evalCommand(opts, config) {
     let result;
     try {
       const report2 = await loop.run(task.request, evalRepoRoot);
-      const passed2 = task.expectedStatuses.includes(report2.status);
+      const expectedStatuses = expectedStatusesForEvalTask(task, useLive);
+      const passed2 = expectedStatuses.includes(report2.status);
       result = {
         task,
         status: report2.status,
+        expectedStatuses,
         passed: passed2,
         durationMs: Date.now() - start2
       };
       if (passed2) {
         spinner.succeed(`[${task.id}] PASS (${report2.status})`);
       } else {
-        spinner.fail(`[${task.id}] FAIL (got: ${report2.status})`);
+        spinner.fail(
+          `[${task.id}] FAIL (got: ${report2.status}; expected: ${expectedStatuses.join(", ")})`
+        );
       }
     } catch (err) {
       result = {
         task,
         status: "error",
+        expectedStatuses: expectedStatusesForEvalTask(task, useLive),
         passed: false,
         durationMs: Date.now() - start2,
         error: err instanceof Error ? err.message : String(err)
@@ -728792,7 +728902,7 @@ async function evalCommand(opts, config) {
       const icon = r2.passed ? "PASS" : "FAIL";
       printKeyValue(
         `${r2.task.id} [${icon}]`,
-        `${r2.status} (${formatDuration(r2.durationMs)})`,
+        `${r2.status} (${formatDuration(r2.durationMs)}; expected ${r2.expectedStatuses.join(", ")})`,
         2
       );
       if (r2.error) {
@@ -728810,13 +728920,79 @@ async function evalCommand(opts, config) {
 function createTempEvalRepo() {
   const dir = join178(tmpdir23(), `omnius-eval-${Date.now()}`);
   mkdirSync106(dir, { recursive: true });
-  writeFileSync90(
-    join178(dir, "package.json"),
-    JSON.stringify({ name: "eval-repo", version: "0.0.0" }, null, 2) + "\n",
-    "utf8"
-  );
+  mkdirSync106(join178(dir, "src"), { recursive: true });
+  mkdirSync106(join178(dir, "tests"), { recursive: true });
+  writeEvalFile(dir, "package.json", JSON.stringify({
+    name: "eval-repo",
+    version: "0.0.0",
+    type: "module",
+    scripts: { test: "node tests/auth.test.js && node tests/users.test.js && node tests/db.test.js && node tests/payment.test.js" }
+  }, null, 2));
+  writeEvalFile(dir, "src/auth.js", [
+    "export function authenticateUser(user) {",
+    "  return user.active;",
+    "}"
+  ].join("\n"));
+  writeEvalFile(dir, "src/users.js", [
+    "export function listUsers(users) {",
+    "  return users.slice();",
+    "}"
+  ].join("\n"));
+  writeEvalFile(dir, "src/db.js", [
+    "export function getConnection() {",
+    '  return Promise.resolve({ id: "primary", open: true });',
+    "}"
+  ].join("\n"));
+  writeEvalFile(dir, "src/payment.js", [
+    "export function applyDiscount(amount, percent) {",
+    "  return amount - amount * (percent / 100);",
+    "}",
+    "",
+    "export function addTax(amount, taxRate) {",
+    "  return amount + amount * taxRate;",
+    "}"
+  ].join("\n"));
+  writeEvalFile(dir, "src/api.js", [
+    "export function health() {",
+    "  return { ok: true };",
+    "}",
+    "",
+    "export function version() {",
+    '  return "0.0.0";',
+    "}"
+  ].join("\n"));
+  writeEvalFile(dir, "tests/auth.test.js", [
+    "import assert from 'node:assert/strict';",
+    "import { authenticateUser } from '../src/auth.js';",
+    "assert.equal(authenticateUser({ id: 'u1', active: true }), true);",
+    "assert.equal(authenticateUser({ id: 'u2', active: false }), false);",
+    "assert.equal(authenticateUser(null), false);",
+    "assert.equal(authenticateUser(undefined), false);"
+  ].join("\n"));
+  writeEvalFile(dir, "tests/users.test.js", [
+    "import assert from 'node:assert/strict';",
+    "import { paginateUsers } from '../src/users.js';",
+    "const users = ['a', 'b', 'c', 'd', 'e'];",
+    "assert.deepEqual(paginateUsers(users, 1, 2), { items: ['a', 'b'], page: 1, pageSize: 2, totalPages: 3, totalItems: 5 });",
+    "assert.deepEqual(paginateUsers(users, 3, 2).items, ['e']);"
+  ].join("\n"));
+  writeEvalFile(dir, "tests/db.test.js", [
+    "import assert from 'node:assert/strict';",
+    "import { getConnection } from '../src/db.js';",
+    "const conn = await getConnection();",
+    "assert.deepEqual(conn, { id: 'primary', open: true });"
+  ].join("\n"));
+  writeEvalFile(dir, "tests/payment.test.js", [
+    "import assert from 'node:assert/strict';",
+    "import { applyDiscount, addTax } from '../src/payment.js';",
+    "assert.equal(applyDiscount(100, 15), 85);",
+    "assert.equal(addTax(100, 0.0825), 108.25);"
+  ].join("\n"));
   return dir;
 }
+function writeEvalFile(root, relativePath, content) {
+  writeFileSync90(join178(root, relativePath), content.trimEnd() + "\n", "utf8");
+}
 var BASIC_SUITE, FULL_SUITE, SUITES;
 var init_eval = __esm({
   "packages/cli/src/commands/eval.ts"() {
@@ -728828,21 +729004,24 @@ var init_eval = __esm({
     BASIC_SUITE = [
       {
         id: "eval-001",
-        description: "Simple fix request",
-        request: "Fix the null pointer dereference in the auth module",
-        expectedStatuses: ["success", "partial_success", "needs_human_decision"]
+        description: "Boundary bug fix",
+        request: "In src/auth.js, fix authenticateUser so null or undefined users return false instead of throwing. Use tests/auth.test.js as the acceptance evidence.",
+        expectedStatuses: ["success", "partial_success", "needs_human_decision"],
+        liveExpectedStatuses: ["success", "partial_success"]
       },
       {
         id: "eval-002",
         description: "Feature addition request",
-        request: "Add pagination support to the user list endpoint",
-        expectedStatuses: ["success", "partial_success", "needs_human_decision"]
+        request: "In src/users.js, add paginateUsers(users, page, pageSize) with 1-based page indexing, stable slicing, and totalPages metadata. Use tests/users.test.js as the acceptance evidence.",
+        expectedStatuses: ["success", "partial_success", "needs_human_decision"],
+        liveExpectedStatuses: ["success", "partial_success"]
       },
       {
         id: "eval-003",
         description: "Refactor request",
-        request: "Refactor the database connection pool to use async/await",
-        expectedStatuses: ["success", "partial_success", "needs_human_decision"]
+        request: "In src/db.js, refactor getConnection to async/await while preserving the exported API behavior covered by tests/db.test.js.",
+        expectedStatuses: ["success", "partial_success", "needs_human_decision"],
+        liveExpectedStatuses: ["success", "partial_success"]
       }
     ];
     FULL_SUITE = [
@@ -728850,14 +729029,16 @@ var init_eval = __esm({
       {
         id: "eval-004",
         description: "Test generation request",
-        request: "Write unit tests for the payment processing module",
-        expectedStatuses: ["success", "partial_success", "needs_human_decision"]
+        request: "Add missing unit coverage for src/payment.js discount and tax behavior in tests/payment.test.js without changing production semantics.",
+        expectedStatuses: ["success", "partial_success", "needs_human_decision"],
+        liveExpectedStatuses: ["success", "partial_success"]
       },
       {
         id: "eval-005",
         description: "Documentation request",
-        request: "Add JSDoc comments to all exported functions in the API layer",
-        expectedStatuses: ["success", "partial_success", "needs_human_decision"]
+        request: "Add concise JSDoc comments to the exported functions in src/api.js while preserving behavior.",
+        expectedStatuses: ["success", "partial_success", "needs_human_decision"],
+        liveExpectedStatuses: ["success", "partial_success"]
       }
     ];
     SUITES = {

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.383",
+  "version": "1.0.385",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.383",
+      "version": "1.0.385",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.383",
+  "version": "1.0.385",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",