omnius 1.0.384 → 1.0.386

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -8602,12 +8602,14 @@ __export(vision_exports, {
8602
8602
  MOONDREAM3_PREVIEW_HF_MODEL: () => MOONDREAM3_PREVIEW_HF_MODEL,
8603
8603
  VisionTool: () => VisionTool,
8604
8604
  analyzeImageWithVision: () => analyzeImageWithVision,
8605
+ callOllamaVision: () => callOllamaVision,
8605
8606
  formatVisionPointResult: () => formatVisionPointResult,
8606
8607
  getVisionPointDiagnostics: () => getVisionPointDiagnostics,
8607
8608
  locateImagePoints: () => locateImagePoints,
8608
8609
  normalizeVisionModelName: () => normalizeVisionModelName,
8609
8610
  resetMoondreamClient: () => resetMoondreamClient,
8610
8611
  resolveHuggingFaceVisionModelCandidates: () => resolveHuggingFaceVisionModelCandidates,
8612
+ resolveInstalledOllamaVisionModelAlias: () => resolveInstalledOllamaVisionModelAlias,
8611
8613
  resolveOllamaVisionModelCandidates: () => resolveOllamaVisionModelCandidates
8612
8614
  });
8613
8615
  import { mkdirSync as mkdirSync9, readFileSync as readFileSync12, existsSync as existsSync14, statSync as statSync7, unlinkSync as unlinkSync2, writeFileSync as writeFileSync10 } from "node:fs";
@@ -8940,7 +8942,8 @@ function resolveOllamaVisionModelCandidates(options2 = {}) {
8940
8942
  ollamaVisionModelName(options2.preferredModel || ""),
8941
8943
  process.env["OLLAMA_VISION_MODEL"] || "",
8942
8944
  options2.activeModelHasVision && options2.activeModel ? options2.activeModel : "",
8943
- DEFAULT_OLLAMA_VISION_MODEL
8945
+ DEFAULT_OLLAMA_VISION_MODEL,
8946
+ `${DEFAULT_OLLAMA_VISION_MODEL}:latest`
8944
8947
  ].map((entry) => entry.trim()).filter(Boolean);
8945
8948
  return [...new Set(candidates)];
8946
8949
  }
@@ -9191,6 +9194,26 @@ async function callOllamaVision(ollamaHost, model, prompt, imageBase64, timeoutM
9191
9194
  if (!res.ok && shouldAutoPullOllamaVisionModel(model)) {
9192
9195
  const errText = await res.text().catch(() => "");
9193
9196
  if (res.status === 404 || /not found|does not exist/i.test(errText)) {
9197
+ const installedAlias = await resolveInstalledOllamaVisionModelAlias(ollamaHost, model, timeoutMs);
9198
+ if (installedAlias && installedAlias !== model) {
9199
+ res = await fetch(`${ollamaHost}/api/generate`, {
9200
+ method: "POST",
9201
+ headers: { "Content-Type": "application/json" },
9202
+ body: JSON.stringify({
9203
+ model: installedAlias,
9204
+ prompt,
9205
+ images: [imageBase64],
9206
+ stream: false,
9207
+ think: false,
9208
+ options: { temperature: 0 }
9209
+ }),
9210
+ signal: AbortSignal.timeout(timeoutMs)
9211
+ });
9212
+ if (res.ok) {
9213
+ const data2 = await res.json();
9214
+ return typeof data2.response === "string" && data2.response.trim() ? data2.response : null;
9215
+ }
9216
+ }
9194
9217
  try {
9195
9218
  ensureDiskSpaceForOllamaVisionModel(model);
9196
9219
  pullOllamaVisionModel(model);
@@ -9217,6 +9240,33 @@ async function callOllamaVision(ollamaHost, model, prompt, imageBase64, timeoutM
9217
9240
  const data = await res.json();
9218
9241
  return typeof data.response === "string" && data.response.trim() ? data.response : null;
9219
9242
  }
9243
+ async function resolveInstalledOllamaVisionModelAlias(ollamaHost, model, timeoutMs = 5e3) {
9244
+ const requested = model.trim();
9245
+ if (!requested)
9246
+ return null;
9247
+ try {
9248
+ const res = await fetch(`${ollamaHost}/api/tags`, {
9249
+ signal: AbortSignal.timeout(Math.min(Math.max(timeoutMs, 1e3), 5e3))
9250
+ });
9251
+ if (!res.ok)
9252
+ return null;
9253
+ const data = await res.json();
9254
+ const names = (Array.isArray(data.models) ? data.models : []).map((entry) => typeof entry.name === "string" ? entry.name.trim() : "").filter(Boolean);
9255
+ if (names.includes(requested))
9256
+ return requested;
9257
+ if (!requested.includes(":")) {
9258
+ const latest = `${requested}:latest`;
9259
+ if (names.includes(latest))
9260
+ return latest;
9261
+ const prefixMatch = names.find((name10) => name10.startsWith(`${requested}:`));
9262
+ if (prefixMatch)
9263
+ return prefixMatch;
9264
+ }
9265
+ } catch {
9266
+ return null;
9267
+ }
9268
+ return null;
9269
+ }
9220
9270
  function shouldAutoPullOllamaVisionModel(model) {
9221
9271
  if (!envFlag2(process.env["OMNIUS_OLLAMA_VISION_AUTO_PULL"], true))
9222
9272
  return false;
@@ -295276,6 +295326,50 @@ function getTodoSessionId() {
295276
295326
  return envSession;
295277
295327
  return "default";
295278
295328
  }
295329
+ function flattenNestedTodoItems(items, repairNotes, parentId) {
295330
+ const flattened = [];
295331
+ for (const item of items) {
295332
+ if (!item || typeof item !== "object" || Array.isArray(item)) {
295333
+ flattened.push(item);
295334
+ continue;
295335
+ }
295336
+ const record = item;
295337
+ const children2 = Array.isArray(record["children"]) ? record["children"] : Array.isArray(record["subtasks"]) ? record["subtasks"] : [];
295338
+ const parentAware = { ...record };
295339
+ delete parentAware["children"];
295340
+ delete parentAware["subtasks"];
295341
+ if (parentId && typeof parentAware["parentId"] !== "string") {
295342
+ parentAware["parentId"] = parentId;
295343
+ }
295344
+ flattened.push(parentAware);
295345
+ const id = typeof parentAware["id"] === "string" && parentAware["id"].trim() ? parentAware["id"].trim() : void 0;
295346
+ if (children2.length > 0) {
295347
+ if (id) {
295348
+ repairNotes.push("flattened nested children/subtasks into parentId-linked todos");
295349
+ flattened.push(...flattenNestedTodoItems(children2, repairNotes, id));
295350
+ } else {
295351
+ repairNotes.push("left nested children unattached because parent todo had no stable id");
295352
+ flattened.push(...flattenNestedTodoItems(children2, repairNotes, parentId));
295353
+ }
295354
+ }
295355
+ }
295356
+ return flattened;
295357
+ }
295358
+ function validateLargeTaskDecomposition(todos) {
295359
+ if (todos.length < 20)
295360
+ return null;
295361
+ const ids = new Set(todos.map((todo) => todo.id).filter((id) => typeof id === "string" && id.trim().length > 0));
295362
+ const childTodos = todos.filter((todo) => typeof todo.parentId === "string" && todo.parentId.trim().length > 0);
295363
+ const parentIds = new Set(childTodos.map((todo) => todo.parentId.trim()));
295364
+ const hasValidParent = [...parentIds].some((parentId) => ids.has(parentId));
295365
+ if (childTodos.length > 0 && hasValidParent)
295366
+ return null;
295367
+ return [
295368
+ "Large todo lists (20+ items) must be decomposed into a nested tree with stable ids and parentId links.",
295369
+ "Create parent objectives and child leaf tasks instead of a flat checklist.",
295370
+ 'Canonical shape: todo_write({"todos":[{"id":"group-1","content":"Steps 01-08","status":"in_progress"},{"id":"step-01","parentId":"group-1","content":"Complete step 01 and verify evidence","status":"in_progress"}]})'
295371
+ ].join(" ");
295372
+ }
295279
295373
  function normalizeIncomingTodos(args) {
295280
295374
  const repairNotes = [];
295281
295375
  const record = args;
@@ -295385,6 +295479,16 @@ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark comp
295385
295479
  type: "array",
295386
295480
  items: { type: "string" },
295387
295481
  description: `REG-38: optional list of file paths this todo is expected to produce on disk. When you mark the todo 'completed', the supervisor inspects each declared path; missing/empty/stale files trigger a rejection with a specific gap critique. Use whenever a todo has concrete deliverables (e.g. ["src/lib/foo.ts", "tests/unit/foo.test.ts"]). Generic across stacks.`
295482
+ },
295483
+ children: {
295484
+ type: "array",
295485
+ description: "Optional nested child todos. The tool flattens children into parentId-linked todos before storing.",
295486
+ items: { type: "object" }
295487
+ },
295488
+ subtasks: {
295489
+ type: "array",
295490
+ description: "Alias for children. Use this for decomposed child work under a parent objective.",
295491
+ items: { type: "object" }
295388
295492
  }
295389
295493
  }
295390
295494
  }
@@ -295409,8 +295513,9 @@ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark comp
295409
295513
  }
295410
295514
  const incoming = [];
295411
295515
  const repairNotes = [...normalized.repairNotes];
295412
- for (let index = 0; index < normalized.todos.length; index++) {
295413
- const raw = normalized.todos[index];
295516
+ const flattenedTodos = flattenNestedTodoItems(normalized.todos, repairNotes);
295517
+ for (let index = 0; index < flattenedTodos.length; index++) {
295518
+ const raw = flattenedTodos[index];
295414
295519
  if (!raw || typeof raw !== "object") {
295415
295520
  if (typeof raw === "string" && raw.trim()) {
295416
295521
  incoming.push({
@@ -295462,6 +295567,15 @@ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark comp
295462
295567
  declaredArtifacts: Array.isArray(entry["declaredArtifacts"]) ? entry["declaredArtifacts"].filter((x) => typeof x === "string") : void 0
295463
295568
  });
295464
295569
  }
295570
+ const decompositionError = validateLargeTaskDecomposition(incoming);
295571
+ if (decompositionError) {
295572
+ return {
295573
+ success: false,
295574
+ output: "",
295575
+ error: decompositionError,
295576
+ durationMs: performance.now() - start2
295577
+ };
295578
+ }
295465
295579
  const sessionId = typeof args["session_id"] === "string" && args["session_id"].trim() ? args["session_id"].trim() : typeof args["sessionId"] === "string" && args["sessionId"].trim() ? args["sessionId"].trim() : getTodoSessionId();
295466
295580
  const oldTodos = readTodos(sessionId);
295467
295581
  const canonicalize2 = (todos) => JSON.stringify(todos.map((t2) => ({
@@ -547133,6 +547247,23 @@ function summarizeProcessFailure(stdout, stderr) {
547133
547247
  }
547134
547248
  return parts.join("\n").slice(0, 2200);
547135
547249
  }
547250
+ function formatObjectRecognitionResult(result) {
547251
+ const matches = (Array.isArray(result.matches) ? result.matches : []).filter((m2) => m2.recognized);
547252
+ const matchLines = matches.map((m2) => ` ${m2.label}: ${(m2.blended_score * 100).toFixed(0)}% (image=${(m2.image_similarity * 100).toFixed(0)}%, text=${(m2.text_similarity * 100).toFixed(0)}%)`);
547253
+ const extraLabels = Array.isArray(result.extra_labels) ? result.extra_labels : null;
547254
+ if (extraLabels && extraLabels.length > 0) {
547255
+ const extraLines = extraLabels.map((s2) => ` ${s2.label}: ${(s2.score * 100).toFixed(0)}%`);
547256
+ const sections = [`CLIP candidate label scores:
547257
+ ${extraLines.join("\n")}`];
547258
+ if (matches.length > 0) {
547259
+ sections.push(`Persistent visual memory matches above threshold:
547260
+ ${matchLines.join("\n")}`);
547261
+ }
547262
+ return sections.join("\n\n");
547263
+ }
547264
+ return matches.length > 0 ? `Recognized ${result.recognized_count} object(s):
547265
+ ${matchLines.join("\n")}` : "No taught objects recognized in this image.";
547266
+ }
547136
547267
  var VMEM_DIR, VENV_DIR2, VENV_PY, VENV_PIP2, VISUAL_MEMORY_ACTIONS, VisualMemoryTool;
547137
547268
  var init_visual_memory = __esm({
547138
547269
  "packages/execution/dist/tools/visual-memory.js"() {
@@ -547668,18 +547799,7 @@ print(json.dumps({
547668
547799
  const payload = JSON.stringify(result);
547669
547800
  return { success: true, output: payload, llmContent: payload, durationMs: performance.now() - start2 };
547670
547801
  }
547671
- const matches = (result.matches || []).filter((m2) => m2.recognized);
547672
- const lines = matches.map((m2) => ` ${m2.label}: ${(m2.blended_score * 100).toFixed(0)}% (image=${(m2.image_similarity * 100).toFixed(0)}%, text=${(m2.text_similarity * 100).toFixed(0)}%)`);
547673
- let output = matches.length > 0 ? `Recognized ${result.recognized_count} object(s):
547674
- ${lines.join("\n")}` : "No taught objects recognized in this image.";
547675
- if (result.extra_labels) {
547676
- const extraLines = result.extra_labels.map((s2) => ` ${s2.label}: ${(s2.score * 100).toFixed(0)}%`);
547677
- output += `
547678
-
547679
- CLIP label scores:
547680
- ${extraLines.join("\n")}`;
547681
- }
547682
- return { success: true, output, durationMs: performance.now() - start2 };
547802
+ return { success: true, output: formatObjectRecognitionResult(result), durationMs: performance.now() - start2 };
547683
547803
  }
547684
547804
  // =========================================================================
547685
547805
  // Memory Management
@@ -576297,6 +576417,26 @@ ${contentPreview}
576297
576417
  }
576298
576418
  return true;
576299
576419
  }
576420
+ _shellCommandLikelyMutatesFilesystem(rawCmd) {
576421
+ if (!rawCmd || typeof rawCmd !== "string")
576422
+ return false;
576423
+ const cmd = rawCmd.trim();
576424
+ if (!cmd)
576425
+ return false;
576426
+ if (/(^|[^&\d])(>|>>)\s*\S/.test(cmd))
576427
+ return true;
576428
+ if (/\|\s*(?:tee|dd)\b/i.test(cmd))
576429
+ return true;
576430
+ if (/\b(?:sed|gsed)\s+(?:[^\n;&|]*\s)?(?:-i|--in-place)\b/i.test(cmd))
576431
+ return true;
576432
+ if (/\bperl\s+-[A-Za-z]*i[A-Za-z]*\b/.test(cmd))
576433
+ return true;
576434
+ if (/\b(?:cp|mv|rm|mkdir|rmdir|touch|truncate|ln|install)\b/i.test(cmd))
576435
+ return true;
576436
+ if (/\b(?:python3?|node|ruby|deno|bun)\b[\s\S]{0,240}\b(?:writeFile|writeFileSync|openSync|mkdirSync|renameSync|unlinkSync|rmSync)\b/i.test(cmd))
576437
+ return true;
576438
+ return false;
576439
+ }
576300
576440
  /**
576301
576441
  * REG-5: Render the recent-failures block so the agent SEES its own error
576302
576442
  * output before deciding what to do next. Detects same-fingerprint failure
@@ -582076,6 +582216,7 @@ Respond with EXACTLY this structure before your next tool call:
582076
582216
  }
582077
582217
  }
582078
582218
  }
582219
+ const shellFilesystemMutation = tc.name === "shell" && result.success === true && this._shellCommandLikelyMutatesFilesystem(String(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? ""));
582079
582220
  const realFileMutation = this._isRealProjectMutation(tc.name, result);
582080
582221
  const realMutationPaths = realFileMutation ? this._extractToolTargetPaths(tc.name, tc.arguments, result) : [];
582081
582222
  if (realFileMutation && this._reg61PerpetualGateActive) {
@@ -582857,6 +582998,20 @@ Respond with EXACTLY this structure before your next tool call:
582857
582998
  dedupHitCount.clear();
582858
582999
  }
582859
583000
  }
583001
+ if (shellFilesystemMutation && recentToolResults.size > 0) {
583002
+ for (const key of Array.from(recentToolResults.keys())) {
583003
+ if (key.startsWith("shell:") || key.startsWith("file_read:") || key.startsWith("list_directory:") || key.startsWith("grep_search:") || key.startsWith("find_files:")) {
583004
+ recentToolResults.delete(key);
583005
+ dedupHitCount.delete(key);
583006
+ }
583007
+ }
583008
+ this._readCoverage.clear();
583009
+ this.emit({
583010
+ type: "status",
583011
+ content: "Shell filesystem mutation invalidated cached read/shell evidence",
583012
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
583013
+ });
583014
+ }
582860
583015
  if (isFileMutation && recentToolResults.size > 0) {
582861
583016
  for (const key of Array.from(recentToolResults.keys())) {
582862
583017
  if (key.startsWith("shell:"))
@@ -583254,7 +583409,7 @@ Evidence: ${evidencePreview}`.slice(0, 500);
583254
583409
  success: result.success,
583255
583410
  output: result.output ?? result.llmContent ?? "",
583256
583411
  error: result.error ?? "",
583257
- mutated: realFileMutation,
583412
+ mutated: realFileMutation || shellFilesystemMutation,
583258
583413
  isReadLike
583259
583414
  });
583260
583415
  const afterDirective = this._focusSupervisor?.snapshot().directive ?? null;
@@ -728690,11 +728845,17 @@ var init_serve2 = __esm({
728690
728845
  // packages/cli/src/commands/eval.ts
728691
728846
  var eval_exports = {};
728692
728847
  __export(eval_exports, {
728693
- evalCommand: () => evalCommand
728848
+ createTempEvalRepo: () => createTempEvalRepo,
728849
+ evalCommand: () => evalCommand,
728850
+ expectedStatusesForEvalTask: () => expectedStatusesForEvalTask
728694
728851
  });
728695
728852
  import { tmpdir as tmpdir23 } from "node:os";
728696
728853
  import { mkdirSync as mkdirSync106, writeFileSync as writeFileSync90 } from "node:fs";
728697
728854
  import { join as join178 } from "node:path";
728855
+ function expectedStatusesForEvalTask(task, live) {
728856
+ if (!live) return task.expectedStatuses;
728857
+ return task.liveExpectedStatuses ?? task.expectedStatuses.filter((status) => status !== "needs_human_decision");
728858
+ }
728698
728859
  async function evalCommand(opts, config) {
728699
728860
  const suiteName = opts.suite ?? "basic";
728700
728861
  const suite = SUITES[suiteName];
@@ -728708,6 +728869,10 @@ async function evalCommand(opts, config) {
728708
728869
  printKeyValue("Suite", suiteName, 2);
728709
728870
  printKeyValue("Tasks", String(suite.length), 2);
728710
728871
  printKeyValue("Mode", modeLabel, 2);
728872
+ if (useLive) {
728873
+ printKeyValue("Live pass statuses", "success, partial_success", 2);
728874
+ printInfo("Live eval treats needs_human_decision as a failure for concrete coding tasks.");
728875
+ }
728711
728876
  const evalRepoRoot = opts.repoPath ?? createTempEvalRepo();
728712
728877
  let rawBackend;
728713
728878
  if (useLive) {
@@ -728766,22 +728931,27 @@ async function evalCommand(opts, config) {
728766
728931
  let result;
728767
728932
  try {
728768
728933
  const report2 = await loop.run(task.request, evalRepoRoot);
728769
- const passed2 = task.expectedStatuses.includes(report2.status);
728934
+ const expectedStatuses = expectedStatusesForEvalTask(task, useLive);
728935
+ const passed2 = expectedStatuses.includes(report2.status);
728770
728936
  result = {
728771
728937
  task,
728772
728938
  status: report2.status,
728939
+ expectedStatuses,
728773
728940
  passed: passed2,
728774
728941
  durationMs: Date.now() - start2
728775
728942
  };
728776
728943
  if (passed2) {
728777
728944
  spinner.succeed(`[${task.id}] PASS (${report2.status})`);
728778
728945
  } else {
728779
- spinner.fail(`[${task.id}] FAIL (got: ${report2.status})`);
728946
+ spinner.fail(
728947
+ `[${task.id}] FAIL (got: ${report2.status}; expected: ${expectedStatuses.join(", ")})`
728948
+ );
728780
728949
  }
728781
728950
  } catch (err) {
728782
728951
  result = {
728783
728952
  task,
728784
728953
  status: "error",
728954
+ expectedStatuses: expectedStatusesForEvalTask(task, useLive),
728785
728955
  passed: false,
728786
728956
  durationMs: Date.now() - start2,
728787
728957
  error: err instanceof Error ? err.message : String(err)
@@ -728807,7 +728977,7 @@ async function evalCommand(opts, config) {
728807
728977
  const icon = r2.passed ? "PASS" : "FAIL";
728808
728978
  printKeyValue(
728809
728979
  `${r2.task.id} [${icon}]`,
728810
- `${r2.status} (${formatDuration(r2.durationMs)})`,
728980
+ `${r2.status} (${formatDuration(r2.durationMs)}; expected ${r2.expectedStatuses.join(", ")})`,
728811
728981
  2
728812
728982
  );
728813
728983
  if (r2.error) {
@@ -728825,13 +728995,79 @@ async function evalCommand(opts, config) {
728825
728995
  function createTempEvalRepo() {
728826
728996
  const dir = join178(tmpdir23(), `omnius-eval-${Date.now()}`);
728827
728997
  mkdirSync106(dir, { recursive: true });
728828
- writeFileSync90(
728829
- join178(dir, "package.json"),
728830
- JSON.stringify({ name: "eval-repo", version: "0.0.0" }, null, 2) + "\n",
728831
- "utf8"
728832
- );
728998
+ mkdirSync106(join178(dir, "src"), { recursive: true });
728999
+ mkdirSync106(join178(dir, "tests"), { recursive: true });
729000
+ writeEvalFile(dir, "package.json", JSON.stringify({
729001
+ name: "eval-repo",
729002
+ version: "0.0.0",
729003
+ type: "module",
729004
+ scripts: { test: "node tests/auth.test.js && node tests/users.test.js && node tests/db.test.js && node tests/payment.test.js" }
729005
+ }, null, 2));
729006
+ writeEvalFile(dir, "src/auth.js", [
729007
+ "export function authenticateUser(user) {",
729008
+ " return user.active;",
729009
+ "}"
729010
+ ].join("\n"));
729011
+ writeEvalFile(dir, "src/users.js", [
729012
+ "export function listUsers(users) {",
729013
+ " return users.slice();",
729014
+ "}"
729015
+ ].join("\n"));
729016
+ writeEvalFile(dir, "src/db.js", [
729017
+ "export function getConnection() {",
729018
+ ' return Promise.resolve({ id: "primary", open: true });',
729019
+ "}"
729020
+ ].join("\n"));
729021
+ writeEvalFile(dir, "src/payment.js", [
729022
+ "export function applyDiscount(amount, percent) {",
729023
+ " return amount - amount * (percent / 100);",
729024
+ "}",
729025
+ "",
729026
+ "export function addTax(amount, taxRate) {",
729027
+ " return amount + amount * taxRate;",
729028
+ "}"
729029
+ ].join("\n"));
729030
+ writeEvalFile(dir, "src/api.js", [
729031
+ "export function health() {",
729032
+ " return { ok: true };",
729033
+ "}",
729034
+ "",
729035
+ "export function version() {",
729036
+ ' return "0.0.0";',
729037
+ "}"
729038
+ ].join("\n"));
729039
+ writeEvalFile(dir, "tests/auth.test.js", [
729040
+ "import assert from 'node:assert/strict';",
729041
+ "import { authenticateUser } from '../src/auth.js';",
729042
+ "assert.equal(authenticateUser({ id: 'u1', active: true }), true);",
729043
+ "assert.equal(authenticateUser({ id: 'u2', active: false }), false);",
729044
+ "assert.equal(authenticateUser(null), false);",
729045
+ "assert.equal(authenticateUser(undefined), false);"
729046
+ ].join("\n"));
729047
+ writeEvalFile(dir, "tests/users.test.js", [
729048
+ "import assert from 'node:assert/strict';",
729049
+ "import { paginateUsers } from '../src/users.js';",
729050
+ "const users = ['a', 'b', 'c', 'd', 'e'];",
729051
+ "assert.deepEqual(paginateUsers(users, 1, 2), { items: ['a', 'b'], page: 1, pageSize: 2, totalPages: 3, totalItems: 5 });",
729052
+ "assert.deepEqual(paginateUsers(users, 3, 2).items, ['e']);"
729053
+ ].join("\n"));
729054
+ writeEvalFile(dir, "tests/db.test.js", [
729055
+ "import assert from 'node:assert/strict';",
729056
+ "import { getConnection } from '../src/db.js';",
729057
+ "const conn = await getConnection();",
729058
+ "assert.deepEqual(conn, { id: 'primary', open: true });"
729059
+ ].join("\n"));
729060
+ writeEvalFile(dir, "tests/payment.test.js", [
729061
+ "import assert from 'node:assert/strict';",
729062
+ "import { applyDiscount, addTax } from '../src/payment.js';",
729063
+ "assert.equal(applyDiscount(100, 15), 85);",
729064
+ "assert.equal(addTax(100, 0.0825), 108.25);"
729065
+ ].join("\n"));
728833
729066
  return dir;
728834
729067
  }
729068
+ function writeEvalFile(root, relativePath, content) {
729069
+ writeFileSync90(join178(root, relativePath), content.trimEnd() + "\n", "utf8");
729070
+ }
728835
729071
  var BASIC_SUITE, FULL_SUITE, SUITES;
728836
729072
  var init_eval = __esm({
728837
729073
  "packages/cli/src/commands/eval.ts"() {
@@ -728843,21 +729079,24 @@ var init_eval = __esm({
728843
729079
  BASIC_SUITE = [
728844
729080
  {
728845
729081
  id: "eval-001",
728846
- description: "Simple fix request",
728847
- request: "Fix the null pointer dereference in the auth module",
728848
- expectedStatuses: ["success", "partial_success", "needs_human_decision"]
729082
+ description: "Boundary bug fix",
729083
+ request: "In src/auth.js, fix authenticateUser so null or undefined users return false instead of throwing. Use tests/auth.test.js as the acceptance evidence.",
729084
+ expectedStatuses: ["success", "partial_success", "needs_human_decision"],
729085
+ liveExpectedStatuses: ["success", "partial_success"]
728849
729086
  },
728850
729087
  {
728851
729088
  id: "eval-002",
728852
729089
  description: "Feature addition request",
728853
- request: "Add pagination support to the user list endpoint",
728854
- expectedStatuses: ["success", "partial_success", "needs_human_decision"]
729090
+ request: "In src/users.js, add paginateUsers(users, page, pageSize) with 1-based page indexing, stable slicing, and totalPages metadata. Use tests/users.test.js as the acceptance evidence.",
729091
+ expectedStatuses: ["success", "partial_success", "needs_human_decision"],
729092
+ liveExpectedStatuses: ["success", "partial_success"]
728855
729093
  },
728856
729094
  {
728857
729095
  id: "eval-003",
728858
729096
  description: "Refactor request",
728859
- request: "Refactor the database connection pool to use async/await",
728860
- expectedStatuses: ["success", "partial_success", "needs_human_decision"]
729097
+ request: "In src/db.js, refactor getConnection to async/await while preserving the exported API behavior covered by tests/db.test.js.",
729098
+ expectedStatuses: ["success", "partial_success", "needs_human_decision"],
729099
+ liveExpectedStatuses: ["success", "partial_success"]
728861
729100
  }
728862
729101
  ];
728863
729102
  FULL_SUITE = [
@@ -728865,14 +729104,16 @@ var init_eval = __esm({
728865
729104
  {
728866
729105
  id: "eval-004",
728867
729106
  description: "Test generation request",
728868
- request: "Write unit tests for the payment processing module",
728869
- expectedStatuses: ["success", "partial_success", "needs_human_decision"]
729107
+ request: "Add missing unit coverage for src/payment.js discount and tax behavior in tests/payment.test.js without changing production semantics.",
729108
+ expectedStatuses: ["success", "partial_success", "needs_human_decision"],
729109
+ liveExpectedStatuses: ["success", "partial_success"]
728870
729110
  },
728871
729111
  {
728872
729112
  id: "eval-005",
728873
729113
  description: "Documentation request",
728874
- request: "Add JSDoc comments to all exported functions in the API layer",
728875
- expectedStatuses: ["success", "partial_success", "needs_human_decision"]
729114
+ request: "Add concise JSDoc comments to the exported functions in src/api.js while preserving behavior.",
729115
+ expectedStatuses: ["success", "partial_success", "needs_human_decision"],
729116
+ liveExpectedStatuses: ["success", "partial_success"]
728876
729117
  }
728877
729118
  ];
728878
729119
  SUITES = {
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.384",
3
+ "version": "1.0.386",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.384",
9
+ "version": "1.0.386",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.384",
3
+ "version": "1.0.386",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",