omnius 1.0.215 → 1.0.216
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -24413,7 +24413,7 @@ var EXCLUDED, MAX_ENTRIES, ListDirectoryTool;
|
|
|
24413
24413
|
var init_list_directory = __esm({
|
|
24414
24414
|
"packages/execution/dist/tools/list-directory.js"() {
|
|
24415
24415
|
"use strict";
|
|
24416
|
-
EXCLUDED = /* @__PURE__ */ new Set(["node_modules", ".git"]);
|
|
24416
|
+
EXCLUDED = /* @__PURE__ */ new Set(["node_modules", ".git", ".omnius"]);
|
|
24417
24417
|
MAX_ENTRIES = 100;
|
|
24418
24418
|
ListDirectoryTool = class {
|
|
24419
24419
|
name = "list_directory";
|
|
@@ -289853,6 +289853,53 @@ function getTodoSessionId() {
|
|
|
289853
289853
|
return envSession;
|
|
289854
289854
|
return "default";
|
|
289855
289855
|
}
|
|
289856
|
+
function normalizeIncomingTodos(args) {
|
|
289857
|
+
const repairNotes = [];
|
|
289858
|
+
const record = args;
|
|
289859
|
+
if (Array.isArray(args)) {
|
|
289860
|
+
repairNotes.push("coerced top-level array into {todos:[...]}");
|
|
289861
|
+
return { todos: args, repairNotes, error: "" };
|
|
289862
|
+
}
|
|
289863
|
+
const direct = record["todos"];
|
|
289864
|
+
if (Array.isArray(direct)) {
|
|
289865
|
+
return { todos: direct, repairNotes, error: "" };
|
|
289866
|
+
}
|
|
289867
|
+
if (direct && typeof direct === "object") {
|
|
289868
|
+
const nested = direct;
|
|
289869
|
+
for (const key of ["todos", "items", "tasks", "checklist"]) {
|
|
289870
|
+
if (Array.isArray(nested[key])) {
|
|
289871
|
+
repairNotes.push(`coerced todos.${key} into todos array`);
|
|
289872
|
+
return { todos: nested[key], repairNotes, error: "" };
|
|
289873
|
+
}
|
|
289874
|
+
}
|
|
289875
|
+
if (typeof nested["content"] === "string") {
|
|
289876
|
+
repairNotes.push("wrapped single todo object in todos array");
|
|
289877
|
+
return { todos: [nested], repairNotes, error: "" };
|
|
289878
|
+
}
|
|
289879
|
+
}
|
|
289880
|
+
for (const key of ["items", "tasks", "checklist", "todo_items"]) {
|
|
289881
|
+
if (Array.isArray(record[key])) {
|
|
289882
|
+
repairNotes.push(`coerced ${key} into todos array`);
|
|
289883
|
+
return { todos: record[key], repairNotes, error: "" };
|
|
289884
|
+
}
|
|
289885
|
+
}
|
|
289886
|
+
const single = record["todo"] ?? record["task"];
|
|
289887
|
+
if (single && typeof single === "object" && !Array.isArray(single)) {
|
|
289888
|
+
const obj = single;
|
|
289889
|
+
if (typeof obj["content"] === "string") {
|
|
289890
|
+
repairNotes.push("coerced single todo/task object into todos array");
|
|
289891
|
+
return { todos: [obj], repairNotes, error: "" };
|
|
289892
|
+
}
|
|
289893
|
+
}
|
|
289894
|
+
if (typeof single === "string" && single.trim()) {
|
|
289895
|
+
repairNotes.push("coerced single todo/task string into todos array");
|
|
289896
|
+
return { todos: [single.trim()], repairNotes, error: "" };
|
|
289897
|
+
}
|
|
289898
|
+
return {
|
|
289899
|
+
repairNotes,
|
|
289900
|
+
error: 'todos must be an array. Correct shape: todo_write({"todos":[{"content":"Inspect files","status":"in_progress"},{"content":"Make changes","status":"pending"}]})'
|
|
289901
|
+
};
|
|
289902
|
+
}
|
|
289856
289903
|
var _currentSessionId, TodoWriteTool, TodoReadTool;
|
|
289857
289904
|
var init_todo_write = __esm({
|
|
289858
289905
|
"packages/execution/dist/tools/todo-write.js"() {
|
|
@@ -289861,7 +289908,27 @@ var init_todo_write = __esm({
|
|
|
289861
289908
|
_currentSessionId = "";
|
|
289862
289909
|
TodoWriteTool = class {
|
|
289863
289910
|
name = "todo_write";
|
|
289864
|
-
description =
|
|
289911
|
+
description = `Update the session task checklist. To be used proactively and often to track progress and pending tasks. Make sure that at least one task is in_progress at all times.
|
|
289912
|
+
|
|
289913
|
+
## When to use
|
|
289914
|
+
1. Complex multi-step tasks — when a task requires 3 or more distinct steps or actions
|
|
289915
|
+
2. When the user provides multiple tasks (numbered or comma-separated)
|
|
289916
|
+
3. After receiving new instructions — capture user requirements as todos immediately
|
|
289917
|
+
4. When you start a task — mark it in_progress BEFORE beginning work. Only ONE in_progress at a time
|
|
289918
|
+
5. After completing a task — mark it completed and add follow-up tasks you discovered
|
|
289919
|
+
|
|
289920
|
+
## When NOT to use
|
|
289921
|
+
- Single, straightforward tasks (a trivial edit, a one-line fix)
|
|
289922
|
+
- Conversational or informational questions
|
|
289923
|
+
- Tasks completable in <3 trivial steps
|
|
289924
|
+
|
|
289925
|
+
## Task states
|
|
289926
|
+
- pending: not started
|
|
289927
|
+
- in_progress: currently working on (exactly ONE at a time)
|
|
289928
|
+
- completed: fully done (tests pass, code works, goal met)
|
|
289929
|
+
- blocked: stuck on a dependency (include blocker text)
|
|
289930
|
+
|
|
289931
|
+
Mark tasks complete IMMEDIATELY after finishing — don't batch. Never mark completed if tests are failing or implementation is partial. The user watches this list in the chat UI in real time. Canonical call shape: todo_write({"todos":[{"content":"Inspect files","status":"in_progress"},{"content":"Make changes","status":"pending"},{"content":"Verify results","status":"pending"}]})`;
|
|
289865
289932
|
parameters = {
|
|
289866
289933
|
type: "object",
|
|
289867
289934
|
required: ["todos"],
|
|
@@ -289902,48 +289969,62 @@ var init_todo_write = __esm({
|
|
|
289902
289969
|
async execute(args) {
|
|
289903
289970
|
const start2 = performance.now();
|
|
289904
289971
|
try {
|
|
289905
|
-
const
|
|
289906
|
-
if (!
|
|
289972
|
+
const normalized = normalizeIncomingTodos(args);
|
|
289973
|
+
if (!normalized.todos) {
|
|
289907
289974
|
return {
|
|
289908
289975
|
success: false,
|
|
289909
289976
|
output: "",
|
|
289910
|
-
error:
|
|
289977
|
+
error: normalized.error,
|
|
289911
289978
|
durationMs: performance.now() - start2
|
|
289912
289979
|
};
|
|
289913
289980
|
}
|
|
289914
289981
|
const incoming = [];
|
|
289915
|
-
|
|
289982
|
+
const repairNotes = [...normalized.repairNotes];
|
|
289983
|
+
for (let index = 0; index < normalized.todos.length; index++) {
|
|
289984
|
+
const raw = normalized.todos[index];
|
|
289916
289985
|
if (!raw || typeof raw !== "object") {
|
|
289986
|
+
if (typeof raw === "string" && raw.trim()) {
|
|
289987
|
+
incoming.push({
|
|
289988
|
+
content: raw.trim(),
|
|
289989
|
+
status: index === 0 ? "in_progress" : "pending"
|
|
289990
|
+
});
|
|
289991
|
+
repairNotes.push("coerced string todo item into {content,status}");
|
|
289992
|
+
continue;
|
|
289993
|
+
}
|
|
289917
289994
|
return {
|
|
289918
289995
|
success: false,
|
|
289919
289996
|
output: "",
|
|
289920
|
-
error:
|
|
289997
|
+
error: 'each todo must be an object with content+status. Correct shape: {"todos":[{"content":"...","status":"in_progress"}]}',
|
|
289921
289998
|
durationMs: performance.now() - start2
|
|
289922
289999
|
};
|
|
289923
290000
|
}
|
|
289924
290001
|
const entry = raw;
|
|
289925
290002
|
const content = entry["content"];
|
|
289926
290003
|
const status = entry["status"];
|
|
289927
|
-
if (typeof content !== "string"
|
|
290004
|
+
if (typeof content !== "string") {
|
|
289928
290005
|
return {
|
|
289929
290006
|
success: false,
|
|
289930
290007
|
output: "",
|
|
289931
|
-
error:
|
|
290008
|
+
error: 'todo must have string content. Correct shape: {"todos":[{"content":"...","status":"in_progress"}]}',
|
|
289932
290009
|
durationMs: performance.now() - start2
|
|
289933
290010
|
};
|
|
289934
290011
|
}
|
|
289935
|
-
|
|
290012
|
+
const resolvedStatus = typeof status === "string" ? status : index === 0 ? "in_progress" : "pending";
|
|
290013
|
+
if (typeof status !== "string") {
|
|
290014
|
+
repairNotes.push("defaulted missing todo status to in_progress/pending");
|
|
290015
|
+
}
|
|
290016
|
+
if (!["pending", "in_progress", "completed", "blocked"].includes(resolvedStatus)) {
|
|
289936
290017
|
return {
|
|
289937
290018
|
success: false,
|
|
289938
290019
|
output: "",
|
|
289939
|
-
error: `invalid status: ${
|
|
290020
|
+
error: `invalid status: ${resolvedStatus}`,
|
|
289940
290021
|
durationMs: performance.now() - start2
|
|
289941
290022
|
};
|
|
289942
290023
|
}
|
|
289943
290024
|
incoming.push({
|
|
289944
290025
|
id: typeof entry["id"] === "string" ? entry["id"] : void 0,
|
|
289945
290026
|
content,
|
|
289946
|
-
status,
|
|
290027
|
+
status: resolvedStatus,
|
|
289947
290028
|
parentId: typeof entry["parentId"] === "string" ? entry["parentId"] : void 0,
|
|
289948
290029
|
blocker: typeof entry["blocker"] === "string" ? entry["blocker"] : void 0,
|
|
289949
290030
|
// REG-37: verification-aware planning
|
|
@@ -289986,6 +290067,16 @@ var init_todo_write = __esm({
|
|
|
289986
290067
|
newTodos: result.newTodos,
|
|
289987
290068
|
verificationNudgeNeeded
|
|
289988
290069
|
};
|
|
290070
|
+
if (repairNotes.length > 0) {
|
|
290071
|
+
payload["inputRepair"] = Array.from(new Set(repairNotes));
|
|
290072
|
+
payload["canonicalShape"] = {
|
|
290073
|
+
todos: [
|
|
290074
|
+
{ content: "Inspect files", status: "in_progress" },
|
|
290075
|
+
{ content: "Make changes", status: "pending" },
|
|
290076
|
+
{ content: "Verify results", status: "pending" }
|
|
290077
|
+
]
|
|
290078
|
+
};
|
|
290079
|
+
}
|
|
289989
290080
|
if (verificationNudgeNeeded) {
|
|
289990
290081
|
payload["nudge"] = "You just closed 3+ todos without scheduling a verification step. Add a 'Verify the changes work' item and spawn a verification agent before declaring task_complete.";
|
|
289991
290082
|
}
|
|
@@ -564459,10 +564550,12 @@ ${_staleSamples.join("\n")}` : ``,
|
|
|
564459
564550
|
const turnTier = this.options.modelTier ?? "large";
|
|
564460
564551
|
if (turn === 0 && !this.options.disableTodoPlanningNudges && (turnTier === "small" || turnTier === "medium")) {
|
|
564461
564552
|
const goal = this._taskState.goal || "";
|
|
564462
|
-
const
|
|
564463
|
-
const
|
|
564464
|
-
const
|
|
564465
|
-
const
|
|
564553
|
+
const substantiveGoal = goal.replace(/\b(?:then\s+)?call\s+task_complete\b[^.?!;]*/gi, "").replace(/\b(?:observe|report|summarize|finish|complete)\b[^.?!;]*/gi, "");
|
|
564554
|
+
const wordCount2 = substantiveGoal.split(/\s+/).filter(Boolean).length;
|
|
564555
|
+
const hasMultipleActions = /\band\b.*\band\b|then.*then|also.*also/i.test(substantiveGoal);
|
|
564556
|
+
const hasMultipleFiles = /files?.*files?|\.ts.*\.ts|create.*write|modify.*create/i.test(substantiveGoal);
|
|
564557
|
+
const explicitSingleTool = /\b(exactly once|single tool|one tool|one tool call)\b/i.test(goal) || /\b(call|use)\s+(?:list_directory|file_read|grep_search|find_files|shell|web_search|web_fetch)\b/i.test(goal) && !/\b(edit|write|modify|create|fix|implement|patch|test|build|install|refactor)\b/i.test(substantiveGoal);
|
|
564558
|
+
const isComplex = !explicitSingleTool && (wordCount2 > 40 || hasMultipleActions || hasMultipleFiles);
|
|
564466
564559
|
if (isComplex) {
|
|
564467
564560
|
messages2.push({
|
|
564468
564561
|
role: "user",
|
|
@@ -564471,6 +564564,7 @@ ${_staleSamples.join("\n")}` : ``,
|
|
|
564471
564564
|
MANDATORY FIRST ACTION: Call todo_write NOW with the complete plan.
|
|
564472
564565
|
Each todo item is { content: "what to do", status: "pending" | "in_progress" | "completed" | "blocked" }.
|
|
564473
564566
|
Mark item 1 as in_progress, the rest as pending.
|
|
564567
|
+
Only count substantive work phases. Do NOT count observing a tool result, reporting findings, or calling task_complete as todo phases.
|
|
564474
564568
|
Example: todo_write({todos: [{content: "read source files", status: "in_progress"}, {content: "make changes", status: "pending"}, {content: "run tests", status: "pending"}]})
|
|
564475
564569
|
|
|
564476
564570
|
After EACH phase finishes, call todo_write AGAIN with item N marked completed and item N+1 marked in_progress.
|
|
@@ -564566,7 +564660,7 @@ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`);
|
|
|
564566
564660
|
const isReadTask = /\bread\b|\bshow\b|\btell me\b|\bwhat is\b/i.test(taskGoal);
|
|
564567
564661
|
const hints = [];
|
|
564568
564662
|
if (isSimpleTask) {
|
|
564569
|
-
hints.push("This is a simple task — if it needs only ONE tool call, skip todo_write and call the tool directly. If it needs 2+ steps, use todo_write to plan.");
|
|
564663
|
+
hints.push("This is a simple task — if it needs only ONE substantive tool call, skip todo_write and call the tool directly, then task_complete. Do not count reporting, observing output, or task_complete as planning steps. If it needs 2+ substantive work steps, use todo_write to plan.");
|
|
564570
564664
|
}
|
|
564571
564665
|
if (isSearchTask) {
|
|
564572
564666
|
hints.push("SEARCH STRATEGY: Use grep_search to find what you need FIRST, THEN file_read only the specific file and lines. Do NOT read entire files hoping to find something.");
|
|
@@ -691676,6 +691770,7 @@ function parseCliArgs(argv) {
|
|
|
691676
691770
|
local: { type: "boolean", short: "l" },
|
|
691677
691771
|
port: { type: "string" },
|
|
691678
691772
|
suite: { type: "string" },
|
|
691773
|
+
live: { type: "boolean" },
|
|
691679
691774
|
json: { type: "boolean", short: "j" },
|
|
691680
691775
|
background: { type: "boolean" },
|
|
691681
691776
|
help: { type: "boolean", short: "h" },
|
|
@@ -691735,6 +691830,7 @@ function parseCliArgs(argv) {
|
|
|
691735
691830
|
break;
|
|
691736
691831
|
case "eval":
|
|
691737
691832
|
result.evalSuite = typeof values.suite === "string" ? values.suite : void 0;
|
|
691833
|
+
result.evalLive = values.live === true;
|
|
691738
691834
|
break;
|
|
691739
691835
|
default:
|
|
691740
691836
|
break;
|
|
@@ -691773,6 +691869,7 @@ Flags:
|
|
|
691773
691869
|
--max-retries <n> Max retries per model request
|
|
691774
691870
|
--timeout-ms <ms> Overall task timeout
|
|
691775
691871
|
--suite <name> Eval suite: basic (default) or full
|
|
691872
|
+
--live Run eval against configured backend instead of FakeBackend
|
|
691776
691873
|
--port <n> Server port (serve command, vLLM only, default: 8000)
|
|
691777
691874
|
-h, --help Show this help
|
|
691778
691875
|
-V, --version Show version
|
|
@@ -691798,6 +691895,7 @@ Examples:
|
|
|
691798
691895
|
omnius serve
|
|
691799
691896
|
omnius serve --backend vllm --port 9000
|
|
691800
691897
|
omnius eval --suite full --verbose
|
|
691898
|
+
omnius eval --suite basic --live --backend ollama --model qwen3.5:9b
|
|
691801
691899
|
omnius config set model qwen3.5:122b
|
|
691802
691900
|
`.trim();
|
|
691803
691901
|
process.stdout.write(text + "\n");
|
|
@@ -691933,7 +692031,8 @@ async function main() {
|
|
|
691933
692031
|
{
|
|
691934
692032
|
suite: parsed.evalSuite,
|
|
691935
692033
|
repoPath: parsed.repoPath,
|
|
691936
|
-
verbose: parsed.verbose
|
|
692034
|
+
verbose: parsed.verbose,
|
|
692035
|
+
live: parsed.evalLive
|
|
691937
692036
|
},
|
|
691938
692037
|
config
|
|
691939
692038
|
);
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.216",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.216",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED
|
@@ -51,7 +51,7 @@ If you anticipate a large result before calling a tool, prefer narrow flags firs
|
|
|
51
51
|
- list_directory: List files in a directory with types and sizes
|
|
52
52
|
- web_search: Search the web for documentation or solutions
|
|
53
53
|
- web_fetch: Fetch a web page and extract text content (for docs, MDN, w3schools.com, etc.)
|
|
54
|
-
- todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+
|
|
54
|
+
- todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+ substantive work phases, your FIRST tool call must be todo_write declaring the entire plan as an array of items with status pending|in_progress|completed|blocked. After each phase completes, call todo_write again with item N marked completed and item N+1 marked in_progress. Do NOT count observing a tool result, reporting findings, or task_complete as phases. The user watches this checklist update live in the chat UI — it is your primary planning surface for long-horizon work and the user can see at a glance whether you are making progress or stuck. Use todo_write for any task naturally containing 3+ real work phases (build/test/ship, scrape/parse/store, plan/draft/edit, explore/refactor/verify, etc.). Do NOT use it for trivial single-step questions. Each todo accepts two OPTIONAL fields you should USE whenever the todo has objective completion criteria: `verifyCommand` (a shell command that PROVES the todo is complete — typecheck/test/build invocations etc.) and `declaredArtifacts` (a list of file paths this todo will produce). The orchestrator auto-checks both at completion-claim time; missing/unverified completions are rejected with a specific gap critique. **Worked example — emit todos in this exact shape:** `todo_write({"todos":[{"id":"p1","content":"Implement cache module","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts","tests/cache.test"]},{"id":"p2","content":"Make build pass","status":"pending","verifyCommand":"<your build command>"}]})`. Substitute placeholder strings with commands native to YOUR stack.
|
|
55
55
|
|
|
56
56
|
## Web Tool Selection
|
|
57
57
|
|
|
@@ -182,7 +182,7 @@ When you discover image files (png, jpg, gif, svg, webp, bmp) during codebase ex
|
|
|
182
182
|
|
|
183
183
|
## Workflow
|
|
184
184
|
|
|
185
|
-
0. **PLAN AT THE TOP** — for any task with 3+
|
|
185
|
+
0. **PLAN AT THE TOP** — for any task with 3+ substantive work phases, your VERY FIRST tool call must be `todo_write` with a complete checklist (each item: `{content, status}`). Mark item 1 as `in_progress`, the rest as `pending`. Do not count observing output, reporting findings, or task_complete as phases. The user watches this checklist update live in the chat UI as you work, so they always know what step you're on. After each phase, call todo_write again to mark the finished item `completed` and the next one `in_progress`.
|
|
186
186
|
1. EXPLORE: Use find_files and grep_search to locate relevant code. Read specific files.
|
|
187
187
|
2. PLAN: Determine what changes are needed based on the code you've read.
|
|
188
188
|
3. IMPLEMENT: Make changes using file_edit (preferred) or file_write for new files.
|
|
@@ -11,7 +11,7 @@ You operate in two modes based on what the user needs:
|
|
|
11
11
|
**TASK MODE** — coding tasks, file operations, technical directives:
|
|
12
12
|
- Call tools iteratively until complete. NEVER write code blocks as text — only tool calls execute.
|
|
13
13
|
- If you need to read a file, call file_read. If you need to run a command, call shell.
|
|
14
|
-
- **MANDATORY: For ANY task that will take 3 or more tool calls, your VERY FIRST tool call MUST be `todo_write` declaring the complete plan.** Items have `{content, status}` where status is one of pending|in_progress|completed|blocked. Mark item 1 in_progress, the rest pending. Then re-call todo_write after each phase finishes to mark item N completed and N+1 in_progress. The user watches this checklist update live in the chat UI — without it they can't see your plan or track your progress.
|
|
14
|
+
- **MANDATORY: For ANY task that will take 3 or more substantive work tool calls, your VERY FIRST tool call MUST be `todo_write` declaring the complete plan.** Items have `{content, status}` where status is one of pending|in_progress|completed|blocked. Mark item 1 in_progress, the rest pending. Then re-call todo_write after each phase finishes to mark item N completed and N+1 in_progress. Do NOT count observing tool output, reporting findings, or task_complete as work phases. For one-tool tasks, call the tool directly and then task_complete. The user watches this checklist update live in the chat UI — without it they can't see your plan or track your progress.
|
|
15
15
|
|
|
16
16
|
## Instruction Hierarchy
|
|
17
17
|
|
|
@@ -41,7 +41,7 @@ Tool results over ~100KB are NOT truncated. The orchestrator saves the full payl
|
|
|
41
41
|
- list_directory: List files in a directory
|
|
42
42
|
- web_search: Search the web
|
|
43
43
|
- web_fetch: Fetch a web page's text
|
|
44
|
-
- todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+
|
|
44
|
+
- todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+ substantive work steps, start by calling todo_write to declare your plan, then re-call todo_write as each step transitions (mark item N "completed" + N+1 "in_progress"). The user sees this list update live in the UI — it is your primary planning surface for long-horizon work. Use it whenever the task naturally has 3+ real work phases (build/refactor/test/ship, scrape/parse/store/report, plan/draft/edit/publish, etc.). Skip it for a single tool action followed only by reporting and task_complete.
|
|
45
45
|
|
|
46
46
|
Each todo accepts two OPTIONAL fields you should USE whenever the todo has objective completion criteria:
|
|
47
47
|
|
|
@@ -105,8 +105,8 @@ Launch ALL sub_agent calls in ONE response. This saves your context window for o
|
|
|
105
105
|
|
|
106
106
|
## Workflow
|
|
107
107
|
|
|
108
|
-
For tasks requiring 3+ tool calls — plan before acting:
|
|
109
|
-
1. LIST all steps needed before your first tool call. **For 3+ step tasks, your FIRST tool call must be `todo_write` declaring the full plan with item 1 set to status:"in_progress" and the rest "pending".** Then call todo_write again as each step finishes to mark items "completed" and the next one "in_progress". The user watches this list update live in the chat UI.
|
|
108
|
+
For tasks requiring 3+ substantive work tool calls — plan before acting:
|
|
109
|
+
1. LIST all real work steps needed before your first tool call. **For 3+ substantive-step tasks, your FIRST tool call must be `todo_write` declaring the full plan with item 1 set to status:"in_progress" and the rest "pending".** Do not count reporting, observing output, or task_complete as steps. Then call todo_write again as each step finishes to mark items "completed" and the next one "in_progress". The user watches this list update live in the chat UI.
|
|
110
110
|
2. If task mentions 3+ independent modules/files: delegate each to a sub_agent (saves context)
|
|
111
111
|
3. EXPLORE: Use find_files, grep_search, file_explore to understand the codebase
|
|
112
112
|
- For large files (200+ lines): use file_explore(strategy='overview') then search/chunk — NEVER read entire file
|
|
@@ -34,7 +34,7 @@ File edits: Use file_write/file_edit/file_patch/batch_edit for project files, no
|
|
|
34
34
|
|
|
35
35
|
Tool choice: Use file/search/code-graph tools for repository discovery, web_fetch/web_download/browser_action for web work, and repl_exec for multi-step data processing. Use shell when the command itself is the verifier or work product: tests, builds, package managers, git, system operations, and small native scripts. Do not hide diagnostics inside opaque shell blobs or `|| true`. Use background_run for long commands and poll with task_status/task_output.
|
|
36
36
|
|
|
37
|
-
todo_write: visible task checklist for the user. For
|
|
37
|
+
todo_write: visible task checklist for the user. Use it for substantive multi-step work, not ceremony. For tasks with 2+ substantive work steps, call todo_write to declare your plan (each item: `{content, status}`, statuses: pending|in_progress|completed|blocked). Update status as you complete each step. Skip single-tool questions like "read this file", "list this directory", or "run this command", even if you will report findings and call task_complete afterward. Do NOT count observing a tool result, reporting findings, or task_complete as todo steps. Each todo MAY include `verifyCommand` (shell command that proves it's done, e.g. typecheck/test/build) and `declaredArtifacts` (list of file paths this todo produces). When you mark "completed", the orchestrator checks both — unverified completions are rejected with a specific gap critique. **Example shape:** `{"id":"p1","content":"Implement cache","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts"]}`. Substitute placeholders with commands native to YOUR stack.
|
|
38
38
|
|
|
39
39
|
Web: web_search finds URLs, web_fetch reads them. For JS pages use web_crawl, for clicking/login use browser_action.
|
|
40
40
|
|
|
@@ -100,7 +100,7 @@ Creating new files — WRITE FIRST, refine later:
|
|
|
100
100
|
- After writing: fill in each method, test after each one.
|
|
101
101
|
- A bad first draft you can fix is better than no draft at all.
|
|
102
102
|
|
|
103
|
-
Complex tasks (5+ steps) — DECOMPOSE before acting:
|
|
103
|
+
Complex tasks (5+ substantive work steps) — DECOMPOSE before acting:
|
|
104
104
|
1. Call todo_write with the checklist. Mark item 1 "in_progress".
|
|
105
105
|
2. Execute ONE STEP AT A TIME. After each, update todo_write status.
|
|
106
106
|
3. After each file edit, VERIFY: file_read or shell test.
|