omnius 1.0.214 → 1.0.216

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -24413,7 +24413,7 @@ var EXCLUDED, MAX_ENTRIES, ListDirectoryTool;
24413
24413
  var init_list_directory = __esm({
24414
24414
  "packages/execution/dist/tools/list-directory.js"() {
24415
24415
  "use strict";
24416
- EXCLUDED = /* @__PURE__ */ new Set(["node_modules", ".git"]);
24416
+ EXCLUDED = /* @__PURE__ */ new Set(["node_modules", ".git", ".omnius"]);
24417
24417
  MAX_ENTRIES = 100;
24418
24418
  ListDirectoryTool = class {
24419
24419
  name = "list_directory";
@@ -289853,6 +289853,53 @@ function getTodoSessionId() {
289853
289853
  return envSession;
289854
289854
  return "default";
289855
289855
  }
289856
+ function normalizeIncomingTodos(args) {
289857
+ const repairNotes = [];
289858
+ const record = args;
289859
+ if (Array.isArray(args)) {
289860
+ repairNotes.push("coerced top-level array into {todos:[...]}");
289861
+ return { todos: args, repairNotes, error: "" };
289862
+ }
289863
+ const direct = record["todos"];
289864
+ if (Array.isArray(direct)) {
289865
+ return { todos: direct, repairNotes, error: "" };
289866
+ }
289867
+ if (direct && typeof direct === "object") {
289868
+ const nested = direct;
289869
+ for (const key of ["todos", "items", "tasks", "checklist"]) {
289870
+ if (Array.isArray(nested[key])) {
289871
+ repairNotes.push(`coerced todos.${key} into todos array`);
289872
+ return { todos: nested[key], repairNotes, error: "" };
289873
+ }
289874
+ }
289875
+ if (typeof nested["content"] === "string") {
289876
+ repairNotes.push("wrapped single todo object in todos array");
289877
+ return { todos: [nested], repairNotes, error: "" };
289878
+ }
289879
+ }
289880
+ for (const key of ["items", "tasks", "checklist", "todo_items"]) {
289881
+ if (Array.isArray(record[key])) {
289882
+ repairNotes.push(`coerced ${key} into todos array`);
289883
+ return { todos: record[key], repairNotes, error: "" };
289884
+ }
289885
+ }
289886
+ const single = record["todo"] ?? record["task"];
289887
+ if (single && typeof single === "object" && !Array.isArray(single)) {
289888
+ const obj = single;
289889
+ if (typeof obj["content"] === "string") {
289890
+ repairNotes.push("coerced single todo/task object into todos array");
289891
+ return { todos: [obj], repairNotes, error: "" };
289892
+ }
289893
+ }
289894
+ if (typeof single === "string" && single.trim()) {
289895
+ repairNotes.push("coerced single todo/task string into todos array");
289896
+ return { todos: [single.trim()], repairNotes, error: "" };
289897
+ }
289898
+ return {
289899
+ repairNotes,
289900
+ error: 'todos must be an array. Correct shape: todo_write({"todos":[{"content":"Inspect files","status":"in_progress"},{"content":"Make changes","status":"pending"}]})'
289901
+ };
289902
+ }
289856
289903
  var _currentSessionId, TodoWriteTool, TodoReadTool;
289857
289904
  var init_todo_write = __esm({
289858
289905
  "packages/execution/dist/tools/todo-write.js"() {
@@ -289861,7 +289908,27 @@ var init_todo_write = __esm({
289861
289908
  _currentSessionId = "";
289862
289909
  TodoWriteTool = class {
289863
289910
  name = "todo_write";
289864
- description = "Update the session task checklist. To be used proactively and often to track progress and pending tasks. Make sure that at least one task is in_progress at all times. \n\n## When to use\n1. Complex multi-step tasks — when a task requires 3 or more distinct steps or actions\n2. When the user provides multiple tasks (numbered or comma-separated)\n3. After receiving new instructions — capture user requirements as todos immediately\n4. When you start a task — mark it in_progress BEFORE beginning work. Only ONE in_progress at a time\n5. After completing a task — mark it completed and add follow-up tasks you discovered\n\n## When NOT to use\n- Single, straightforward tasks (a trivial edit, a one-line fix)\n- Conversational or informational questions\n- Tasks completable in <3 trivial steps\n\n## Task states\n- pending: not started\n- in_progress: currently working on (exactly ONE at a time)\n- completed: fully done (tests pass, code works, goal met)\n- blocked: stuck on a dependency (include blocker text)\n\nMark tasks complete IMMEDIATELY after finishing — don't batch. Never mark completed if tests are failing or implementation is partial. The user watches this list in the chat UI in real time.";
289911
+ description = `Update the session task checklist. To be used proactively and often to track progress and pending tasks. Make sure that at least one task is in_progress at all times.
289912
+
289913
+ ## When to use
289914
+ 1. Complex multi-step tasks — when a task requires 3 or more distinct steps or actions
289915
+ 2. When the user provides multiple tasks (numbered or comma-separated)
289916
+ 3. After receiving new instructions — capture user requirements as todos immediately
289917
+ 4. When you start a task — mark it in_progress BEFORE beginning work. Only ONE in_progress at a time
289918
+ 5. After completing a task — mark it completed and add follow-up tasks you discovered
289919
+
289920
+ ## When NOT to use
289921
+ - Single, straightforward tasks (a trivial edit, a one-line fix)
289922
+ - Conversational or informational questions
289923
+ - Tasks completable in <3 trivial steps
289924
+
289925
+ ## Task states
289926
+ - pending: not started
289927
+ - in_progress: currently working on (exactly ONE at a time)
289928
+ - completed: fully done (tests pass, code works, goal met)
289929
+ - blocked: stuck on a dependency (include blocker text)
289930
+
289931
+ Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark completed if tests are failing or implementation is partial. The user watches this list in the chat UI in real time. Canonical call shape: todo_write({"todos":[{"content":"Inspect files","status":"in_progress"},{"content":"Make changes","status":"pending"},{"content":"Verify results","status":"pending"}]})`;
289865
289932
  parameters = {
289866
289933
  type: "object",
289867
289934
  required: ["todos"],
@@ -289902,48 +289969,62 @@ var init_todo_write = __esm({
289902
289969
  async execute(args) {
289903
289970
  const start2 = performance.now();
289904
289971
  try {
289905
- const incomingRaw = args["todos"];
289906
- if (!Array.isArray(incomingRaw)) {
289972
+ const normalized = normalizeIncomingTodos(args);
289973
+ if (!normalized.todos) {
289907
289974
  return {
289908
289975
  success: false,
289909
289976
  output: "",
289910
- error: "todos must be an array",
289977
+ error: normalized.error,
289911
289978
  durationMs: performance.now() - start2
289912
289979
  };
289913
289980
  }
289914
289981
  const incoming = [];
289915
- for (const raw of incomingRaw) {
289982
+ const repairNotes = [...normalized.repairNotes];
289983
+ for (let index = 0; index < normalized.todos.length; index++) {
289984
+ const raw = normalized.todos[index];
289916
289985
  if (!raw || typeof raw !== "object") {
289986
+ if (typeof raw === "string" && raw.trim()) {
289987
+ incoming.push({
289988
+ content: raw.trim(),
289989
+ status: index === 0 ? "in_progress" : "pending"
289990
+ });
289991
+ repairNotes.push("coerced string todo item into {content,status}");
289992
+ continue;
289993
+ }
289917
289994
  return {
289918
289995
  success: false,
289919
289996
  output: "",
289920
- error: "each todo must be an object with content+status",
289997
+ error: 'each todo must be an object with content+status. Correct shape: {"todos":[{"content":"...","status":"in_progress"}]}',
289921
289998
  durationMs: performance.now() - start2
289922
289999
  };
289923
290000
  }
289924
290001
  const entry = raw;
289925
290002
  const content = entry["content"];
289926
290003
  const status = entry["status"];
289927
- if (typeof content !== "string" || typeof status !== "string") {
290004
+ if (typeof content !== "string") {
289928
290005
  return {
289929
290006
  success: false,
289930
290007
  output: "",
289931
- error: "todo must have string content and string status",
290008
+ error: 'todo must have string content. Correct shape: {"todos":[{"content":"...","status":"in_progress"}]}',
289932
290009
  durationMs: performance.now() - start2
289933
290010
  };
289934
290011
  }
289935
- if (!["pending", "in_progress", "completed", "blocked"].includes(status)) {
290012
+ const resolvedStatus = typeof status === "string" ? status : index === 0 ? "in_progress" : "pending";
290013
+ if (typeof status !== "string") {
290014
+ repairNotes.push("defaulted missing todo status to in_progress/pending");
290015
+ }
290016
+ if (!["pending", "in_progress", "completed", "blocked"].includes(resolvedStatus)) {
289936
290017
  return {
289937
290018
  success: false,
289938
290019
  output: "",
289939
- error: `invalid status: ${status}`,
290020
+ error: `invalid status: ${resolvedStatus}`,
289940
290021
  durationMs: performance.now() - start2
289941
290022
  };
289942
290023
  }
289943
290024
  incoming.push({
289944
290025
  id: typeof entry["id"] === "string" ? entry["id"] : void 0,
289945
290026
  content,
289946
- status,
290027
+ status: resolvedStatus,
289947
290028
  parentId: typeof entry["parentId"] === "string" ? entry["parentId"] : void 0,
289948
290029
  blocker: typeof entry["blocker"] === "string" ? entry["blocker"] : void 0,
289949
290030
  // REG-37: verification-aware planning
@@ -289986,6 +290067,16 @@ var init_todo_write = __esm({
289986
290067
  newTodos: result.newTodos,
289987
290068
  verificationNudgeNeeded
289988
290069
  };
290070
+ if (repairNotes.length > 0) {
290071
+ payload["inputRepair"] = Array.from(new Set(repairNotes));
290072
+ payload["canonicalShape"] = {
290073
+ todos: [
290074
+ { content: "Inspect files", status: "in_progress" },
290075
+ { content: "Make changes", status: "pending" },
290076
+ { content: "Verify results", status: "pending" }
290077
+ ]
290078
+ };
290079
+ }
289989
290080
  if (verificationNudgeNeeded) {
289990
290081
  payload["nudge"] = "You just closed 3+ todos without scheduling a verification step. Add a 'Verify the changes work' item and spawn a verification agent before declaring task_complete.";
289991
290082
  }
@@ -564459,10 +564550,12 @@ ${_staleSamples.join("\n")}` : ``,
564459
564550
  const turnTier = this.options.modelTier ?? "large";
564460
564551
  if (turn === 0 && !this.options.disableTodoPlanningNudges && (turnTier === "small" || turnTier === "medium")) {
564461
564552
  const goal = this._taskState.goal || "";
564462
- const wordCount2 = goal.split(/\s+/).length;
564463
- const hasMultipleActions = /\band\b.*\band\b|then.*then|also.*also/i.test(goal);
564464
- const hasMultipleFiles = /files?.*files?|\.ts.*\.ts|create.*write|modify.*create/i.test(goal);
564465
- const isComplex = wordCount2 > 40 || hasMultipleActions || hasMultipleFiles;
564553
+ const substantiveGoal = goal.replace(/\b(?:then\s+)?call\s+task_complete\b[^.?!;]*/gi, "").replace(/\b(?:observe|report|summarize|finish|complete)\b[^.?!;]*/gi, "");
564554
+ const wordCount2 = substantiveGoal.split(/\s+/).filter(Boolean).length;
564555
+ const hasMultipleActions = /\band\b.*\band\b|then.*then|also.*also/i.test(substantiveGoal);
564556
+ const hasMultipleFiles = /files?.*files?|\.ts.*\.ts|create.*write|modify.*create/i.test(substantiveGoal);
564557
+ const explicitSingleTool = /\b(exactly once|single tool|one tool|one tool call)\b/i.test(goal) || /\b(call|use)\s+(?:list_directory|file_read|grep_search|find_files|shell|web_search|web_fetch)\b/i.test(goal) && !/\b(edit|write|modify|create|fix|implement|patch|test|build|install|refactor)\b/i.test(substantiveGoal);
564558
+ const isComplex = !explicitSingleTool && (wordCount2 > 40 || hasMultipleActions || hasMultipleFiles);
564466
564559
  if (isComplex) {
564467
564560
  messages2.push({
564468
564561
  role: "user",
@@ -564471,6 +564564,7 @@ ${_staleSamples.join("\n")}` : ``,
564471
564564
  MANDATORY FIRST ACTION: Call todo_write NOW with the complete plan.
564472
564565
  Each todo item is { content: "what to do", status: "pending" | "in_progress" | "completed" | "blocked" }.
564473
564566
  Mark item 1 as in_progress, the rest as pending.
564567
+ Only count substantive work phases. Do NOT count observing a tool result, reporting findings, or calling task_complete as todo phases.
564474
564568
  Example: todo_write({todos: [{content: "read source files", status: "in_progress"}, {content: "make changes", status: "pending"}, {content: "run tests", status: "pending"}]})
564475
564569
 
564476
564570
  After EACH phase finishes, call todo_write AGAIN with item N marked completed and item N+1 marked in_progress.
@@ -564566,7 +564660,7 @@ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`);
564566
564660
  const isReadTask = /\bread\b|\bshow\b|\btell me\b|\bwhat is\b/i.test(taskGoal);
564567
564661
  const hints = [];
564568
564662
  if (isSimpleTask) {
564569
- hints.push("This is a simple task — if it needs only ONE tool call, skip todo_write and call the tool directly. If it needs 2+ steps, use todo_write to plan.");
564663
+ hints.push("This is a simple task — if it needs only ONE substantive tool call, skip todo_write and call the tool directly, then task_complete. Do not count reporting, observing output, or task_complete as planning steps. If it needs 2+ substantive work steps, use todo_write to plan.");
564570
564664
  }
564571
564665
  if (isSearchTask) {
564572
564666
  hints.push("SEARCH STRATEGY: Use grep_search to find what you need FIRST, THEN file_read only the specific file and lines. Do NOT read entire files hoping to find something.");
@@ -578422,6 +578516,70 @@ var init_generative_progress = __esm({
578422
578516
  }
578423
578517
  });
578424
578518
 
578519
+ // packages/cli/src/tui/tool-adapter.ts
578520
+ function mapExecutionToolResult(result) {
578521
+ return {
578522
+ success: result.success,
578523
+ output: result.output,
578524
+ error: result.error,
578525
+ llmContent: result.llmContent,
578526
+ mutated: result.mutated,
578527
+ mutatedFiles: result.mutatedFiles,
578528
+ diff: result.diff,
578529
+ dryRun: result.dryRun,
578530
+ noop: result.noop,
578531
+ partial: result.partial,
578532
+ beforeHash: result.beforeHash,
578533
+ afterHash: result.afterHash
578534
+ };
578535
+ }
578536
+ function adaptExecutionTool(tool, options2 = {}) {
578537
+ const progressTool = tool;
578538
+ if (generationKindForToolName(tool.name) && typeof progressTool.setProgressCallback === "function") {
578539
+ progressTool.setProgressCallback((event) => {
578540
+ options2.onProgress?.(tool.name, event);
578541
+ });
578542
+ }
578543
+ const adapted = {
578544
+ name: tool.name,
578545
+ aliases: tool.aliases,
578546
+ description: tool.description,
578547
+ parameters: tool.parameters,
578548
+ inputSchema: tool.inputSchema,
578549
+ maxResultSizeChars: tool.maxResultSizeChars,
578550
+ async execute(args) {
578551
+ const invoke = () => tool.execute(args);
578552
+ const result = options2.execute ? await options2.execute(tool, args, invoke) : await invoke();
578553
+ return mapExecutionToolResult(result);
578554
+ }
578555
+ };
578556
+ if (typeof tool.prompt === "function") {
578557
+ adapted.prompt = (context2) => tool.prompt(context2);
578558
+ }
578559
+ if (typeof tool.executeStream === "function") {
578560
+ adapted.executeStream = async function* (args) {
578561
+ const result = yield* tool.executeStream(args);
578562
+ return mapExecutionToolResult(result);
578563
+ };
578564
+ }
578565
+ if (typeof tool.validateInput === "function") {
578566
+ adapted.validateInput = (args, context2) => tool.validateInput(args, context2);
578567
+ }
578568
+ if (typeof tool.isConcurrencySafe === "function") {
578569
+ adapted.isConcurrencySafe = (args) => tool.isConcurrencySafe(args);
578570
+ }
578571
+ if (typeof tool.isReadOnly === "function") {
578572
+ adapted.isReadOnly = (args) => tool.isReadOnly(args);
578573
+ }
578574
+ return adapted;
578575
+ }
578576
+ var init_tool_adapter = __esm({
578577
+ "packages/cli/src/tui/tool-adapter.ts"() {
578578
+ "use strict";
578579
+ init_generative_progress();
578580
+ }
578581
+ });
578582
+
578425
578583
  // packages/cli/src/tui/runtime-verification.ts
578426
578584
  import { execFileSync as execFileSync6 } from "node:child_process";
578427
578585
  import { existsSync as existsSync92, readFileSync as readFileSync74, readdirSync as readdirSync29 } from "node:fs";
@@ -592024,15 +592182,7 @@ var init_p2p = __esm({
592024
592182
  import { EventEmitter as EventEmitter11 } from "node:events";
592025
592183
  import crypto13 from "node:crypto";
592026
592184
  function adaptTool(tool) {
592027
- return {
592028
- name: tool.name,
592029
- description: tool.description,
592030
- parameters: tool.parameters,
592031
- async execute(args) {
592032
- const result = await tool.execute(args);
592033
- return { success: result.success, output: result.output, error: result.error };
592034
- }
592035
- };
592185
+ return adaptExecutionTool(tool);
592036
592186
  }
592037
592187
  function getActivityFeed() {
592038
592188
  if (!_globalFeed) _globalFeed = new ActivityFeed();
@@ -592047,6 +592197,7 @@ var init_call_agent = __esm({
592047
592197
  "use strict";
592048
592198
  init_dist8();
592049
592199
  init_dist6();
592200
+ init_tool_adapter();
592050
592201
  ActivityFeed = class {
592051
592202
  entries = [];
592052
592203
  maxEntries = 100;
@@ -632130,15 +632281,7 @@ function computeSparsity(entries) {
632130
632281
  return Math.max(0, Math.min(1, 1 - avgOverlap));
632131
632282
  }
632132
632283
  function adaptTool2(tool) {
632133
- return {
632134
- name: tool.name,
632135
- description: tool.description,
632136
- parameters: tool.parameters,
632137
- async execute(args) {
632138
- const result = await tool.execute(args);
632139
- return { success: result.success, output: result.output, error: result.error };
632140
- }
632141
- };
632284
+ return adaptExecutionTool(tool);
632142
632285
  }
632143
632286
  var SNREngine;
632144
632287
  var init_snr_engine = __esm({
@@ -632148,6 +632291,7 @@ var init_snr_engine = __esm({
632148
632291
  init_dist6();
632149
632292
  init_project_context();
632150
632293
  init_render();
632294
+ init_tool_adapter();
632151
632295
  SNREngine = class {
632152
632296
  constructor(config, repoRoot) {
632153
632297
  this.config = config;
@@ -632608,15 +632752,7 @@ ${sections.join("\n\n")}`;
632608
632752
  }
632609
632753
  }
632610
632754
  function adaptTool3(tool) {
632611
- return {
632612
- name: tool.name,
632613
- description: tool.description,
632614
- parameters: tool.parameters,
632615
- async execute(args) {
632616
- const result = await tool.execute(args);
632617
- return { success: result.success, output: result.output, error: result.error };
632618
- }
632619
- };
632755
+ return adaptExecutionTool(tool);
632620
632756
  }
632621
632757
  function buildDreamPrompt(mode, stage, cycleNum, totalCycles, previousFindings, dreamsDir) {
632622
632758
  const modeDesc = mode === "lucid" ? "LUCID DREAM MODE: You have full implementation capability. After ideation, you will implement, test, and evaluate changes." : mode === "deep" ? "DEEP DREAM MODE: Explore deeply with multiple expansion/contraction cycles. All proposals go in .omnius/dreams/." : "DREAM MODE: Creative exploration only. All output must be written to .omnius/dreams/ directory using file_write.";
@@ -632758,6 +632894,7 @@ var init_dream_engine = __esm({
632758
632894
  init_setup();
632759
632895
  init_render();
632760
632896
  init_promptLoader3();
632897
+ init_tool_adapter();
632761
632898
  _dreamWriteContent = null;
632762
632899
  SWARM_ROLE_CONFIG = {
632763
632900
  researcher: { maxTurns: 25, temperature: 0.4 },
@@ -634546,15 +634683,7 @@ Reflect on what went well and what could improve.`;
634546
634683
  }
634547
634684
  }
634548
634685
  function adaptTool4(tool) {
634549
- return {
634550
- name: tool.name,
634551
- description: tool.description,
634552
- parameters: tool.parameters,
634553
- async execute(args) {
634554
- const result = await tool.execute(args);
634555
- return { success: result.success, output: result.output, error: result.error };
634556
- }
634557
- };
634686
+ return adaptExecutionTool(tool);
634558
634687
  }
634559
634688
  function renderDMNCycleStart(cycleNum, deliberation = false) {
634560
634689
  process.stdout.write(`
@@ -634622,6 +634751,7 @@ var init_dmn_engine = __esm({
634622
634751
  init_project_context();
634623
634752
  init_render();
634624
634753
  init_promptLoader3();
634754
+ init_tool_adapter();
634625
634755
  DMNEngine = class {
634626
634756
  constructor(config, repoRoot) {
634627
634757
  this.config = config;
@@ -642252,25 +642382,17 @@ function normalizeTelegramCallbackQuery(update2) {
642252
642382
  };
642253
642383
  }
642254
642384
  function adaptTool5(tool, todoSessionId, progress) {
642255
- const progressTool = tool;
642256
- if (generationKindForToolName(tool.name) && typeof progressTool.setProgressCallback === "function") {
642257
- progressTool.setProgressCallback((event) => {
642258
- progress?.onProgress(tool.name, event);
642259
- });
642260
- }
642261
- return {
642262
- name: tool.name,
642263
- description: tool.description,
642264
- parameters: tool.parameters,
642265
- async execute(args) {
642385
+ return adaptExecutionTool(tool, {
642386
+ onProgress: (toolName, event) => progress?.onProgress(toolName, event),
642387
+ execute: async (_tool, args, invoke) => {
642266
642388
  const previousTodoSession = todoSessionId ? getTodoSessionId() : "";
642267
642389
  if (todoSessionId && (tool.name === "todo_write" || tool.name === "todo_read")) {
642268
642390
  setTodoSessionId(todoSessionId);
642269
642391
  }
642270
642392
  try {
642271
- const result = await tool.execute(args);
642393
+ const result = await invoke();
642272
642394
  progress?.complete(tool.name, result);
642273
- return { success: result.success, output: result.output, error: result.error, llmContent: result.llmContent };
642395
+ return result;
642274
642396
  } catch (err) {
642275
642397
  progress?.complete(tool.name, {
642276
642398
  success: false,
@@ -642284,7 +642406,7 @@ function adaptTool5(tool, todoSessionId, progress) {
642284
642406
  }
642285
642407
  }
642286
642408
  }
642287
- };
642409
+ });
642288
642410
  }
642289
642411
  function telegramBotAccessSettingsFromApi(settings) {
642290
642412
  return {
@@ -642449,6 +642571,7 @@ var init_telegram_bridge = __esm({
642449
642571
  init_voice_soul();
642450
642572
  init_telegram_creative_tools();
642451
642573
  init_generative_progress();
642574
+ init_tool_adapter();
642452
642575
  init_omnius_directory();
642453
642576
  init_stimulation();
642454
642577
  init_pid_controller();
@@ -681209,42 +681332,9 @@ function getVersion4() {
681209
681332
  return "0.0.0";
681210
681333
  }
681211
681334
  function adaptTool6(tool) {
681212
- const progressTool = tool;
681213
- if (generationKindForToolName(tool.name) && typeof progressTool.setProgressCallback === "function") {
681214
- progressTool.setProgressCallback((event) => {
681215
- _generativeProgressSink?.(tool.name, event);
681216
- });
681217
- }
681218
- return {
681219
- name: tool.name,
681220
- aliases: tool.aliases,
681221
- description: tool.description,
681222
- parameters: tool.parameters,
681223
- inputSchema: tool.inputSchema,
681224
- prompt: tool.prompt,
681225
- executeStream: tool.executeStream,
681226
- validateInput: tool.validateInput,
681227
- isConcurrencySafe: tool.isConcurrencySafe,
681228
- isReadOnly: tool.isReadOnly,
681229
- maxResultSizeChars: tool.maxResultSizeChars,
681230
- async execute(args) {
681231
- const result = await tool.execute(args);
681232
- return {
681233
- success: result.success,
681234
- output: result.output,
681235
- error: result.error,
681236
- llmContent: result.llmContent,
681237
- mutated: result.mutated,
681238
- mutatedFiles: result.mutatedFiles,
681239
- diff: result.diff,
681240
- dryRun: result.dryRun,
681241
- noop: result.noop,
681242
- partial: result.partial,
681243
- beforeHash: result.beforeHash,
681244
- afterHash: result.afterHash
681245
- };
681246
- }
681247
- };
681335
+ return adaptExecutionTool(tool, {
681336
+ onProgress: (toolName, event) => _generativeProgressSink?.(toolName, event)
681337
+ });
681248
681338
  }
681249
681339
  function createTuiReminderOptions(allowActionDelivery = true) {
681250
681340
  const sessionId = process.env["OMNIUS_SESSION_ID"] || "terminal";
@@ -690440,6 +690530,7 @@ var init_interactive = __esm({
690440
690530
  init_dist8();
690441
690531
  init_dist6();
690442
690532
  init_generative_progress();
690533
+ init_tool_adapter();
690443
690534
  init_runtime_verification();
690444
690535
  init_dist();
690445
690536
  init_listen();
@@ -691679,6 +691770,7 @@ function parseCliArgs(argv) {
691679
691770
  local: { type: "boolean", short: "l" },
691680
691771
  port: { type: "string" },
691681
691772
  suite: { type: "string" },
691773
+ live: { type: "boolean" },
691682
691774
  json: { type: "boolean", short: "j" },
691683
691775
  background: { type: "boolean" },
691684
691776
  help: { type: "boolean", short: "h" },
@@ -691738,6 +691830,7 @@ function parseCliArgs(argv) {
691738
691830
  break;
691739
691831
  case "eval":
691740
691832
  result.evalSuite = typeof values.suite === "string" ? values.suite : void 0;
691833
+ result.evalLive = values.live === true;
691741
691834
  break;
691742
691835
  default:
691743
691836
  break;
@@ -691776,6 +691869,7 @@ Flags:
691776
691869
  --max-retries <n> Max retries per model request
691777
691870
  --timeout-ms <ms> Overall task timeout
691778
691871
  --suite <name> Eval suite: basic (default) or full
691872
+ --live Run eval against configured backend instead of FakeBackend
691779
691873
  --port <n> Server port (serve command, vLLM only, default: 8000)
691780
691874
  -h, --help Show this help
691781
691875
  -V, --version Show version
@@ -691801,6 +691895,7 @@ Examples:
691801
691895
  omnius serve
691802
691896
  omnius serve --backend vllm --port 9000
691803
691897
  omnius eval --suite full --verbose
691898
+ omnius eval --suite basic --live --backend ollama --model qwen3.5:9b
691804
691899
  omnius config set model qwen3.5:122b
691805
691900
  `.trim();
691806
691901
  process.stdout.write(text + "\n");
@@ -691936,7 +692031,8 @@ async function main() {
691936
692031
  {
691937
692032
  suite: parsed.evalSuite,
691938
692033
  repoPath: parsed.repoPath,
691939
- verbose: parsed.verbose
692034
+ verbose: parsed.verbose,
692035
+ live: parsed.evalLive
691940
692036
  },
691941
692037
  config
691942
692038
  );
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.214",
3
+ "version": "1.0.216",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.214",
9
+ "version": "1.0.216",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.214",
3
+ "version": "1.0.216",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -51,7 +51,7 @@ If you anticipate a large result before calling a tool, prefer narrow flags firs
51
51
  - list_directory: List files in a directory with types and sizes
52
52
  - web_search: Search the web for documentation or solutions
53
53
  - web_fetch: Fetch a web page and extract text content (for docs, MDN, w3schools.com, etc.)
54
- - todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+ logical phases, your FIRST tool call must be todo_write declaring the entire plan as an array of items with status pending|in_progress|completed|blocked. After each phase completes, call todo_write again with item N marked completed and item N+1 marked in_progress. The user watches this checklist update live in the chat UI — it is your primary planning surface for long-horizon work and the user can see at a glance whether you are making progress or stuck. Use todo_write for any task naturally containing 3+ phases (build/test/ship, scrape/parse/store, plan/draft/edit, explore/refactor/verify, etc.). Do NOT use it for trivial single-step questions. Each todo accepts two OPTIONAL fields you should USE whenever the todo has objective completion criteria: `verifyCommand` (a shell command that PROVES the todo is complete — typecheck/test/build invocations etc.) and `declaredArtifacts` (a list of file paths this todo will produce). The orchestrator auto-checks both at completion-claim time; missing/unverified completions are rejected with a specific gap critique. **Worked example — emit todos in this exact shape:** `todo_write({"todos":[{"id":"p1","content":"Implement cache module","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts","tests/cache.test"]},{"id":"p2","content":"Make build pass","status":"pending","verifyCommand":"<your build command>"}]})`. Substitute placeholder strings with commands native to YOUR stack.
54
+ - todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+ substantive work phases, your FIRST tool call must be todo_write declaring the entire plan as an array of items with status pending|in_progress|completed|blocked. After each phase completes, call todo_write again with item N marked completed and item N+1 marked in_progress. Do NOT count observing a tool result, reporting findings, or task_complete as phases. The user watches this checklist update live in the chat UI — it is your primary planning surface for long-horizon work and the user can see at a glance whether you are making progress or stuck. Use todo_write for any task naturally containing 3+ real work phases (build/test/ship, scrape/parse/store, plan/draft/edit, explore/refactor/verify, etc.). Do NOT use it for trivial single-step questions. Each todo accepts two OPTIONAL fields you should USE whenever the todo has objective completion criteria: `verifyCommand` (a shell command that PROVES the todo is complete — typecheck/test/build invocations etc.) and `declaredArtifacts` (a list of file paths this todo will produce). The orchestrator auto-checks both at completion-claim time; missing/unverified completions are rejected with a specific gap critique. **Worked example — emit todos in this exact shape:** `todo_write({"todos":[{"id":"p1","content":"Implement cache module","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts","tests/cache.test"]},{"id":"p2","content":"Make build pass","status":"pending","verifyCommand":"<your build command>"}]})`. Substitute placeholder strings with commands native to YOUR stack.
55
55
 
56
56
  ## Web Tool Selection
57
57
 
@@ -182,7 +182,7 @@ When you discover image files (png, jpg, gif, svg, webp, bmp) during codebase ex
182
182
 
183
183
  ## Workflow
184
184
 
185
- 0. **PLAN AT THE TOP** — for any task with 3+ logical phases, your VERY FIRST tool call must be `todo_write` with a complete checklist (each item: `{content, status}`). Mark item 1 as `in_progress`, the rest as `pending`. The user watches this checklist update live in the chat UI as you work, so they always know what step you're on. After each phase, call todo_write again to mark the finished item `completed` and the next one `in_progress`.
185
+ 0. **PLAN AT THE TOP** — for any task with 3+ substantive work phases, your VERY FIRST tool call must be `todo_write` with a complete checklist (each item: `{content, status}`). Mark item 1 as `in_progress`, the rest as `pending`. Do not count observing output, reporting findings, or task_complete as phases. The user watches this checklist update live in the chat UI as you work, so they always know what step you're on. After each phase, call todo_write again to mark the finished item `completed` and the next one `in_progress`.
186
186
  1. EXPLORE: Use find_files and grep_search to locate relevant code. Read specific files.
187
187
  2. PLAN: Determine what changes are needed based on the code you've read.
188
188
  3. IMPLEMENT: Make changes using file_edit (preferred) or file_write for new files.
@@ -11,7 +11,7 @@ You operate in two modes based on what the user needs:
11
11
  **TASK MODE** — coding tasks, file operations, technical directives:
12
12
  - Call tools iteratively until complete. NEVER write code blocks as text — only tool calls execute.
13
13
  - If you need to read a file, call file_read. If you need to run a command, call shell.
14
- - **MANDATORY: For ANY task that will take 3 or more tool calls, your VERY FIRST tool call MUST be `todo_write` declaring the complete plan.** Items have `{content, status}` where status is one of pending|in_progress|completed|blocked. Mark item 1 in_progress, the rest pending. Then re-call todo_write after each phase finishes to mark item N completed and N+1 in_progress. The user watches this checklist update live in the chat UI — without it they can't see your plan or track your progress.
14
+ - **MANDATORY: For ANY task that will take 3 or more substantive work tool calls, your VERY FIRST tool call MUST be `todo_write` declaring the complete plan.** Items have `{content, status}` where status is one of pending|in_progress|completed|blocked. Mark item 1 in_progress, the rest pending. Then re-call todo_write after each phase finishes to mark item N completed and N+1 in_progress. Do NOT count observing tool output, reporting findings, or task_complete as work phases. For one-tool tasks, call the tool directly and then task_complete. The user watches this checklist update live in the chat UI — without it they can't see your plan or track your progress.
15
15
 
16
16
  ## Instruction Hierarchy
17
17
 
@@ -41,7 +41,7 @@ Tool results over ~100KB are NOT truncated. The orchestrator saves the full payl
41
41
  - list_directory: List files in a directory
42
42
  - web_search: Search the web
43
43
  - web_fetch: Fetch a web page's text
44
- - todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+ logical steps, start by calling todo_write to declare your plan, then re-call todo_write as each step transitions (mark item N "completed" + N+1 "in_progress"). The user sees this list update live in the UI — it is your primary planning surface for long-horizon work. Use it whenever the task naturally has 3+ phases (build/refactor/test/ship, scrape/parse/store/report, plan/draft/edit/publish, etc.).
44
+ - todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+ substantive work steps, start by calling todo_write to declare your plan, then re-call todo_write as each step transitions (mark item N "completed" + N+1 "in_progress"). The user sees this list update live in the UI — it is your primary planning surface for long-horizon work. Use it whenever the task naturally has 3+ real work phases (build/refactor/test/ship, scrape/parse/store/report, plan/draft/edit/publish, etc.). Skip it for a single tool action followed only by reporting and task_complete.
45
45
 
46
46
  Each todo accepts two OPTIONAL fields you should USE whenever the todo has objective completion criteria:
47
47
 
@@ -105,8 +105,8 @@ Launch ALL sub_agent calls in ONE response. This saves your context window for o
105
105
 
106
106
  ## Workflow
107
107
 
108
- For tasks requiring 3+ tool calls — plan before acting:
109
- 1. LIST all steps needed before your first tool call. **For 3+ step tasks, your FIRST tool call must be `todo_write` declaring the full plan with item 1 set to status:"in_progress" and the rest "pending".** Then call todo_write again as each step finishes to mark items "completed" and the next one "in_progress". The user watches this list update live in the chat UI.
108
+ For tasks requiring 3+ substantive work tool calls — plan before acting:
109
+ 1. LIST all real work steps needed before your first tool call. **For 3+ substantive-step tasks, your FIRST tool call must be `todo_write` declaring the full plan with item 1 set to status:"in_progress" and the rest "pending".** Do not count reporting, observing output, or task_complete as steps. Then call todo_write again as each step finishes to mark items "completed" and the next one "in_progress". The user watches this list update live in the chat UI.
110
110
  2. If task mentions 3+ independent modules/files: delegate each to a sub_agent (saves context)
111
111
  3. EXPLORE: Use find_files, grep_search, file_explore to understand the codebase
112
112
  - For large files (200+ lines): use file_explore(strategy='overview') then search/chunk — NEVER read entire file
@@ -34,7 +34,7 @@ File edits: Use file_write/file_edit/file_patch/batch_edit for project files, no
34
34
 
35
35
  Tool choice: Use file/search/code-graph tools for repository discovery, web_fetch/web_download/browser_action for web work, and repl_exec for multi-step data processing. Use shell when the command itself is the verifier or work product: tests, builds, package managers, git, system operations, and small native scripts. Do not hide diagnostics inside opaque shell blobs or `|| true`. Use background_run for long commands and poll with task_status/task_output.
36
36
 
37
- todo_write: visible task checklist for the user. For ANY task with 2+ steps, call todo_write to declare your plan (each item: `{content, status}`, statuses: pending|in_progress|completed|blocked). Update status as you complete each step. Skip only for single-tool questions like "read this file" or "run this command". Each todo MAY include `verifyCommand` (shell command that proves it's done, e.g. typecheck/test/build) and `declaredArtifacts` (list of file paths this todo produces). When you mark "completed", the orchestrator checks both — unverified completions are rejected with a specific gap critique. **Example shape:** `{"id":"p1","content":"Implement cache","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts"]}`. Substitute placeholders with commands native to YOUR stack.
37
+ todo_write: visible task checklist for the user. Use it for substantive multi-step work, not ceremony. For tasks with 2+ substantive work steps, call todo_write to declare your plan (each item: `{content, status}`, statuses: pending|in_progress|completed|blocked). Update status as you complete each step. Skip single-tool questions like "read this file", "list this directory", or "run this command", even if you will report findings and call task_complete afterward. Do NOT count observing a tool result, reporting findings, or task_complete as todo steps. Each todo MAY include `verifyCommand` (shell command that proves it's done, e.g. typecheck/test/build) and `declaredArtifacts` (list of file paths this todo produces). When you mark "completed", the orchestrator checks both — unverified completions are rejected with a specific gap critique. **Example shape:** `{"id":"p1","content":"Implement cache","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts"]}`. Substitute placeholders with commands native to YOUR stack.
38
38
 
39
39
  Web: web_search finds URLs, web_fetch reads them. For JS pages use web_crawl, for clicking/login use browser_action.
40
40
 
@@ -100,7 +100,7 @@ Creating new files — WRITE FIRST, refine later:
100
100
  - After writing: fill in each method, test after each one.
101
101
  - A bad first draft you can fix is better than no draft at all.
102
102
 
103
- Complex tasks (5+ steps) — DECOMPOSE before acting:
103
+ Complex tasks (5+ substantive work steps) — DECOMPOSE before acting:
104
104
  1. Call todo_write with the checklist. Mark item 1 "in_progress".
105
105
  2. Execute ONE STEP AT A TIME. After each, update todo_write status.
106
106
  3. After each file edit, VERIFY: file_read or shell test.