open-agents-ai 0.187.487 → 0.187.489
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -519538,6 +519538,35 @@ var init_agenticRunner = __esm({
|
|
|
519538
519538
|
// unresolved verification failure. Effectively gates task_complete
|
|
519539
519539
|
// suggestion behind real verification, not just self-report.
|
|
519540
519540
|
_verifyFailures = /* @__PURE__ */ new Set();
|
|
519541
|
+
// REG-37e: track whether we've already nudged the agent about the
|
|
519542
|
+
// verifyCommand / declaredArtifacts fields. Empirical observation
|
|
519543
|
+
// from run #15: across 30 todo_writes, agent set neither field
|
|
519544
|
+
// 0 times. Field descriptions alone don't drive uptake. After the
|
|
519545
|
+
// first 2 todo_writes with no field uptake, inject a one-shot
|
|
519546
|
+
// soft-budget hint with a worked example. Once-per-run.
|
|
519547
|
+
_newFieldNudgeFired = false;
|
|
519548
|
+
_todoWritesObservedForNudge = 0;
|
|
519549
|
+
// REG-44: wide-exploration thrash detector. Empirical observation
|
|
519550
|
+
// from run #15: agent's stuck pattern is NOT immediate retry → retry,
|
|
519551
|
+
// but rather "fail → 30+ list_directory/shell re-orient → retry →
|
|
519552
|
+
// 30+ ld → retry". REG-18 stagnation gate misses this because file
|
|
519553
|
+
// writes ARE happening (just earlier in the run). Detect: in last
|
|
519554
|
+
// 12 turns, ld+sh count >= 25 + fw growth <= 2 + recent shell
|
|
519555
|
+
// failure exists. Fire CRITICAL halt instructing the agent to stop
|
|
519556
|
+
// exploring and either web_search or fix one specific thing.
|
|
519557
|
+
// Cooldown 8 turns after firing.
|
|
519558
|
+
_wideExplorationCooldownUntilTurn = -1;
|
|
519559
|
+
// REG-45: sticky cross-turn escalation. The dispatch-time reflection
|
|
519560
|
+
// surface (REG-26) only fires when the agent re-emits the exact same
|
|
519561
|
+
// failed stem. If the agent thrashes on OTHER tools (wide-exploration
|
|
519562
|
+
// pattern caught by REG-44), the escalation reflection sits dormant in
|
|
519563
|
+
// _failureReflections — never reaches the model. Fix: at top of each
|
|
519564
|
+
// turn, scan _failureReflections for any entry where attempts ≥ 3 OR
|
|
519565
|
+
// distinct errors ≥ 3 — surface these "sticky" entries as critical
|
|
519566
|
+
// (bypasses budget, like the dispatch-time escalation path) every
|
|
519567
|
+
// turn until they clear. Track which we've surfaced this run so the
|
|
519568
|
+
// signal doesn't fire >1× per turn per stem.
|
|
519569
|
+
_stickyEscalationsSurfacedThisTurn = /* @__PURE__ */ new Set();
|
|
519541
519570
|
// ── WO-AM-01/04/10: Associative memory stores ──
|
|
519542
519571
|
// Episode store: every tool call → persistent episode with importance + decay
|
|
519543
519572
|
// Temporal KG: entities + relations with temporal validity (valid_from/valid_until)
|
|
@@ -521760,6 +521789,7 @@ TASK: ${task}` : task;
|
|
|
521760
521789
|
}
|
|
521761
521790
|
injectionsThisTurn = 0;
|
|
521762
521791
|
this._reflectionsInjectedThisTurn.clear();
|
|
521792
|
+
this._stickyEscalationsSurfacedThisTurn.clear();
|
|
521763
521793
|
this._typecheckHintInjectedThisTurn = false;
|
|
521764
521794
|
this._completionPromptInjectedThisTurn = false;
|
|
521765
521795
|
this._verifyHintInjectedThisTurn.clear();
|
|
@@ -521858,6 +521888,118 @@ TASK: ${task}` : task;
|
|
|
521858
521888
|
}
|
|
521859
521889
|
}
|
|
521860
521890
|
}
|
|
521891
|
+
if (turn > this._wideExplorationCooldownUntilTurn && turn >= 12) {
|
|
521892
|
+
const _windowCalls = toolCallLog.slice(-15);
|
|
521893
|
+
if (_windowCalls.length >= 12) {
|
|
521894
|
+
const _ldShCount = _windowCalls.filter((c9) => c9.name === "list_directory" || c9.name === "shell").length;
|
|
521895
|
+
const _fwCount = _windowCalls.filter((c9) => ["file_write", "file_edit", "batch_edit", "file_patch"].includes(c9.name)).length;
|
|
521896
|
+
const _hasRecentShellFailure = _windowCalls.some((c9) => c9.name === "shell" && c9.success === false);
|
|
521897
|
+
if (_ldShCount >= 11 && _fwCount <= 2 && _hasRecentShellFailure) {
|
|
521898
|
+
this._wideExplorationCooldownUntilTurn = turn + 8;
|
|
521899
|
+
const _recentFailures = this._recentFailures.slice(-3);
|
|
521900
|
+
const _failureBlocks = _recentFailures.map((f2) => {
|
|
521901
|
+
const _firstLine = (f2.error || f2.output || "").split(/\r?\n/).find((l2) => l2.trim().length > 0) || "";
|
|
521902
|
+
return ` - ${f2.tool}: "${_firstLine.slice(0, 200)}"`;
|
|
521903
|
+
}).join("\n");
|
|
521904
|
+
messages2.push({
|
|
521905
|
+
role: "system",
|
|
521906
|
+
content: [
|
|
521907
|
+
`[WIDE-EXPLORATION HALT — REG-44]`,
|
|
521908
|
+
``,
|
|
521909
|
+
`In the last ${_windowCalls.length} turns you have made ${_ldShCount} list_directory/shell calls and only ${_fwCount} file modification(s). At least one shell command in this window failed. This pattern — explore, retry, explore, retry — is the textbook "stuck after a failure" loop where the agent re-orients instead of fixing the named problem.`,
|
|
521910
|
+
``,
|
|
521911
|
+
`Stop exploring. Pick ONE of these three actions for your next response:`,
|
|
521912
|
+
``,
|
|
521913
|
+
` (a) Run a web search of the EXACT error string from the failure below — most framework/version-specific errors need external knowledge your training data may not cover. Tool: \`web_search\`.`,
|
|
521914
|
+
``,
|
|
521915
|
+
` (b) Make ONE specific, targeted fix attempt addressing the SPECIFIC failed command. Read the error message literally — it often names what to do next.`,
|
|
521916
|
+
``,
|
|
521917
|
+
` (c) If you have tried 3+ different approaches and the same error persists, invoke the \`debate\` tool with the failed command and error as the task — get a second opinion.`,
|
|
521918
|
+
``,
|
|
521919
|
+
`Recent failures in this window:`,
|
|
521920
|
+
_failureBlocks || ` (no recent shell failures captured — investigate toolCallLog directly)`,
|
|
521921
|
+
``,
|
|
521922
|
+
`Do NOT in your next response: emit another list_directory or read another file. Take direct action toward fixing the failure.`
|
|
521923
|
+
].join("\n")
|
|
521924
|
+
});
|
|
521925
|
+
this.emit({
|
|
521926
|
+
type: "status",
|
|
521927
|
+
content: `REG-44 wide-exploration halt fired at turn ${turn} (ld+sh=${_ldShCount}, fw=${_fwCount} in window of ${_windowCalls.length})`,
|
|
521928
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
521929
|
+
});
|
|
521930
|
+
}
|
|
521931
|
+
}
|
|
521932
|
+
}
|
|
521933
|
+
try {
|
|
521934
|
+
const STICKY_PER_TURN_CAP = 2;
|
|
521935
|
+
const _candidates = [];
|
|
521936
|
+
for (const [_stem, _entry] of this._failureReflections.entries()) {
|
|
521937
|
+
if (this._stickyEscalationsSurfacedThisTurn.has(_stem))
|
|
521938
|
+
continue;
|
|
521939
|
+
if (this._reflectionsInjectedThisTurn.has(_stem))
|
|
521940
|
+
continue;
|
|
521941
|
+
const _isEscalation = _entry.attempts >= 3 || (_entry.errorSignatures?.size ?? 0) >= 3;
|
|
521942
|
+
if (!_isEscalation)
|
|
521943
|
+
continue;
|
|
521944
|
+
_candidates.push({ stem: _stem, entry: _entry });
|
|
521945
|
+
}
|
|
521946
|
+
_candidates.sort((a2, b) => {
|
|
521947
|
+
const _attemptsDiff = b.entry.attempts - a2.entry.attempts;
|
|
521948
|
+
if (_attemptsDiff !== 0)
|
|
521949
|
+
return _attemptsDiff;
|
|
521950
|
+
return (b.entry.errorSignatures?.size ?? 0) - (a2.entry.errorSignatures?.size ?? 0);
|
|
521951
|
+
});
|
|
521952
|
+
for (const { stem: _stem, entry: _entry } of _candidates.slice(0, STICKY_PER_TURN_CAP)) {
|
|
521953
|
+
let _body = renderReflectionMessage(_entry);
|
|
521954
|
+
if (this._runLessons.length > 0) {
|
|
521955
|
+
const _query = `${this._taskState.goal || ""} ${_entry.wentWrong}`;
|
|
521956
|
+
const _topLessons = select2({
|
|
521957
|
+
goal: _query,
|
|
521958
|
+
lessons: this._runLessons,
|
|
521959
|
+
k: 1
|
|
521960
|
+
});
|
|
521961
|
+
if (_topLessons.length > 0) {
|
|
521962
|
+
const _l = _topLessons[0];
|
|
521963
|
+
_body += [
|
|
521964
|
+
``,
|
|
521965
|
+
`[INTRA-RUN LESSON — REG-36b]`,
|
|
521966
|
+
`Earlier in THIS run you encountered a similar pattern:`,
|
|
521967
|
+
` Failed: ${_l.whatFailed.slice(0, 150)}`,
|
|
521968
|
+
` Worked: ${_l.whatWorked.slice(0, 150)}`,
|
|
521969
|
+
` Hypothesis: ${_l.hypothesis.slice(0, 150)}`,
|
|
521970
|
+
`Apply that lesson here if applicable.`
|
|
521971
|
+
].join("\n");
|
|
521972
|
+
}
|
|
521973
|
+
}
|
|
521974
|
+
messages2.push({
|
|
521975
|
+
role: "system",
|
|
521976
|
+
content: [
|
|
521977
|
+
`[STICKY ESCALATION — REG-45 — failure persists across turns]`,
|
|
521978
|
+
``,
|
|
521979
|
+
`You have an unresolved high-attempt failure that you may have stopped trying to fix. Every turn that this remains unresolved, this reflection will resurface so the issue stays visible:`,
|
|
521980
|
+
``,
|
|
521981
|
+
_body,
|
|
521982
|
+
``,
|
|
521983
|
+
`If this failure is genuinely irrelevant now (e.g. the goal moved on), the only way to clear this notice is to make a successful attempt of the same call (or close-equivalent) — that resets the failure record. Otherwise, address it now.`
|
|
521984
|
+
].join("\n")
|
|
521985
|
+
});
|
|
521986
|
+
this._stickyEscalationsSurfacedThisTurn.add(_stem);
|
|
521987
|
+
this._reflectionsInjectedThisTurn.add(_stem);
|
|
521988
|
+
this.emit({
|
|
521989
|
+
type: "status",
|
|
521990
|
+
content: `REG-45 sticky escalation surfaced for stem '${_stem.slice(0, 60)}' (attempts=${_entry.attempts}, distinct_errors=${_entry.errorSignatures?.size ?? 0})`,
|
|
521991
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
521992
|
+
});
|
|
521993
|
+
}
|
|
521994
|
+
if (_candidates.length > STICKY_PER_TURN_CAP) {
|
|
521995
|
+
this.emit({
|
|
521996
|
+
type: "status",
|
|
521997
|
+
content: `REG-45 deferred ${_candidates.length - STICKY_PER_TURN_CAP} additional sticky escalation(s) (cap=${STICKY_PER_TURN_CAP}/turn)`,
|
|
521998
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
521999
|
+
});
|
|
522000
|
+
}
|
|
522001
|
+
} catch {
|
|
522002
|
+
}
|
|
521861
522003
|
if (pendingConstraintWarnings.length > 0) {
|
|
521862
522004
|
const warningMsg = "<constraint-recall>\n" + pendingConstraintWarnings.join("\n") + "\n</constraint-recall>";
|
|
521863
522005
|
messages2.push({ role: "system", content: warningMsg });
|
|
@@ -522718,6 +522860,11 @@ ${memoryLines.join("\n")}`
|
|
|
522718
522860
|
} else {
|
|
522719
522861
|
pushSoftInjection("system", _reflBody);
|
|
522720
522862
|
}
|
|
522863
|
+
this.emit({
|
|
522864
|
+
type: "status",
|
|
522865
|
+
content: `REG-26 reflection surfaced for stem '${_reflStem.slice(0, 60)}' (attempts=${_reflEntry.attempts}, distinct_errors=${_reflEntry.errorSignatures?.size ?? 0}, escalation=${_isEscalation})`,
|
|
522866
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
522867
|
+
});
|
|
522721
522868
|
}
|
|
522722
522869
|
}
|
|
522723
522870
|
}
|
|
@@ -523185,6 +523332,25 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
523185
523332
|
if (tc.name === "todo_write") {
|
|
523186
523333
|
try {
|
|
523187
523334
|
const _todosNow = this.readSessionTodos() || [];
|
|
523335
|
+
if (!this._newFieldNudgeFired) {
|
|
523336
|
+
this._todoWritesObservedForNudge++;
|
|
523337
|
+
const _anyFieldUsed = _todosNow.some((t2) => typeof t2.verifyCommand === "string" || Array.isArray(t2.declaredArtifacts));
|
|
523338
|
+
if (this._todoWritesObservedForNudge >= 2 && !_anyFieldUsed) {
|
|
523339
|
+
this._newFieldNudgeFired = true;
|
|
523340
|
+
pushSoftInjection("system", [
|
|
523341
|
+
`[NUDGE — REG-37e: you have emitted multiple todo_writes without using verifyCommand or declaredArtifacts.]`,
|
|
523342
|
+
``,
|
|
523343
|
+
`These two fields turn self-reported completion into VERIFIED completion. The orchestrator auto-checks them when you mark a todo "completed":`,
|
|
523344
|
+
` - verifyCommand: a shell invocation that proves the work passes (test runner, build command, file existence check, etc.)`,
|
|
523345
|
+
` - declaredArtifacts: list of file paths this todo produces`,
|
|
523346
|
+
``,
|
|
523347
|
+
`Without these, your "completed" claim is a self-report. With them, it's checked against reality. The very next todo you write where "done" has an objective check should include one or both fields.`,
|
|
523348
|
+
``,
|
|
523349
|
+
`Worked example shape (substitute commands native to your stack):`,
|
|
523350
|
+
` {"id":"pX","content":"Implement the cache module","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts","tests/unit/cache.test.ts"]}`
|
|
523351
|
+
].join("\n"));
|
|
523352
|
+
}
|
|
523353
|
+
}
|
|
523188
523354
|
for (const _t of _todosNow) {
|
|
523189
523355
|
if (_t.status !== "completed")
|
|
523190
523356
|
continue;
|
|
@@ -523382,6 +523548,11 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
523382
523548
|
``,
|
|
523383
523549
|
`A 30-second external lookup is more reliable than local guesses for framework/version-specific errors your training data may not cover.`
|
|
523384
523550
|
].join("\n"));
|
|
523551
|
+
this.emit({
|
|
523552
|
+
type: "status",
|
|
523553
|
+
content: `REG-32 opaque-error nudge fired for stem '${_refStem.slice(0, 60)}' — suggested web_search('${_searchQuery.slice(0, 80)}')`,
|
|
523554
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
523555
|
+
});
|
|
523385
523556
|
}
|
|
523386
523557
|
}
|
|
523387
523558
|
if (!result.success && tc.name === "shell" && /\[PERMISSION_ERROR\]/.test(result.error ?? "")) {
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.489",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.489",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED
|
@@ -30,7 +30,7 @@ If a tool fails, try a different approach. If you're unsure, explore with your t
|
|
|
30
30
|
- list_directory: List files in a directory with types and sizes
|
|
31
31
|
- web_search: Search the web for documentation or solutions
|
|
32
32
|
- web_fetch: Fetch a web page and extract text content (for docs, MDN, w3schools.com, etc.)
|
|
33
|
-
- todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+ logical phases, your FIRST tool call must be todo_write declaring the entire plan as an array of items with status pending|in_progress|completed|blocked. After each phase completes, call todo_write again with item N marked completed and item N+1 marked in_progress. The user watches this checklist update live in the chat UI — it is your primary planning surface for long-horizon work and the user can see at a glance whether you are making progress or stuck. Use todo_write for any task naturally containing 3+ phases (build/test/ship, scrape/parse/store, plan/draft/edit, explore/refactor/verify, etc.). Do NOT use it for trivial single-step questions. Each todo accepts two OPTIONAL fields you should USE whenever the todo has objective completion criteria: `verifyCommand` (a shell command that PROVES the todo is complete — typecheck/test/build invocations etc.) and `declaredArtifacts` (a list of file paths this todo will produce). The orchestrator auto-checks both at completion-claim time; missing/unverified completions are rejected with a specific gap critique.
|
|
33
|
+
- todo_write / todo_read: Visible task checklist for the user. For ANY multi-step task with 3+ logical phases, your FIRST tool call must be todo_write declaring the entire plan as an array of items with status pending|in_progress|completed|blocked. After each phase completes, call todo_write again with item N marked completed and item N+1 marked in_progress. The user watches this checklist update live in the chat UI — it is your primary planning surface for long-horizon work and the user can see at a glance whether you are making progress or stuck. Use todo_write for any task naturally containing 3+ phases (build/test/ship, scrape/parse/store, plan/draft/edit, explore/refactor/verify, etc.). Do NOT use it for trivial single-step questions. Each todo accepts two OPTIONAL fields you should USE whenever the todo has objective completion criteria: `verifyCommand` (a shell command that PROVES the todo is complete — typecheck/test/build invocations etc.) and `declaredArtifacts` (a list of file paths this todo will produce). The orchestrator auto-checks both at completion-claim time; missing/unverified completions are rejected with a specific gap critique. **Worked example — emit todos in this exact shape:** `todo_write({"todos":[{"id":"p1","content":"Implement cache module","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts","tests/cache.test"]},{"id":"p2","content":"Make build pass","status":"pending","verifyCommand":"<your build command>"}]})`. Substitute placeholder strings with commands native to YOUR stack.
|
|
34
34
|
|
|
35
35
|
## Web Tool Selection
|
|
36
36
|
|
|
@@ -40,11 +40,39 @@ NEVER say "I can't do that". ALWAYS attempt the task using your tools. If a tool
|
|
|
40
40
|
|
|
41
41
|
Each todo accepts two OPTIONAL fields you should USE whenever the todo has objective completion criteria:
|
|
42
42
|
|
|
43
|
-
- `verifyCommand` — a single shell command that PROVES the todo is complete.
|
|
44
|
-
|
|
45
|
-
- `declaredArtifacts` — a list of file paths this todo is expected to produce on disk. When you mark the todo "completed", the supervisor inspects each path; missing/empty/stale files trigger a rejection
|
|
46
|
-
|
|
47
|
-
|
|
43
|
+
- `verifyCommand` — a single shell command that PROVES the todo is complete. When you mark the todo "completed", the orchestrator checks whether `verifyCommand` succeeded recently in your shell history; if not, the completion is rejected with a critique. Use it on any todo where "done" has an objective check.
|
|
44
|
+
|
|
45
|
+
- `declaredArtifacts` — a list of file paths this todo is expected to produce on disk. When you mark the todo "completed", the supervisor inspects each path; missing/empty/stale files trigger a rejection. Use it whenever a todo has concrete deliverables.
|
|
46
|
+
|
|
47
|
+
**Concrete worked example — emit todos in this exact shape when the work has objective criteria:**
|
|
48
|
+
|
|
49
|
+
```json
|
|
50
|
+
todo_write({
|
|
51
|
+
"todos": [
|
|
52
|
+
{
|
|
53
|
+
"id": "p1",
|
|
54
|
+
"content": "Set up project scaffolding and configuration files",
|
|
55
|
+
"status": "in_progress",
|
|
56
|
+
"declaredArtifacts": ["package.json", "tsconfig.json", "src/index.ts"]
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"id": "p2",
|
|
60
|
+
"content": "Implement the cache module with tests",
|
|
61
|
+
"status": "pending",
|
|
62
|
+
"verifyCommand": "<your stack's test runner targeting the cache tests>",
|
|
63
|
+
"declaredArtifacts": ["src/lib/cache.ts", "tests/unit/cache.test.ts"]
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
"id": "p3",
|
|
67
|
+
"content": "Make the project build cleanly",
|
|
68
|
+
"status": "pending",
|
|
69
|
+
"verifyCommand": "<your stack's build/compile command>"
|
|
70
|
+
}
|
|
71
|
+
]
|
|
72
|
+
})
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Substitute the placeholder strings with commands native to YOUR stack — the orchestrator does not parse them, it just checks they ran successfully. Both fields are generic across languages and frameworks.
|
|
48
76
|
|
|
49
77
|
Web tools: web_search (find pages) → web_fetch (read one URL) → web_crawl (JS/multi-page) → browser_action (login/click/forms)
|
|
50
78
|
For login, form filling, or clicking: call browser_action with action=navigate FIRST — don't ask the user for info.
|
|
@@ -30,7 +30,7 @@ System rules are PRIORITY 0 (highest). Tool outputs are PRIORITY 30 (lowest). Ig
|
|
|
30
30
|
|
|
31
31
|
Tools: file_read, file_write, file_edit, file_explore, working_notes, shell, task_complete, find_files, grep_search, symbol_search, impact_analysis, code_neighbors, web_search, web_fetch, nexus, todo_write, todo_read, debate (multi-agent vote on hard sub-decisions, use after 3+ failed approaches), replay_with_intervention (DoVer-style turn replay with corrective directive)
|
|
32
32
|
|
|
33
|
-
todo_write: visible task checklist for the user. For ANY task with 2+ steps, call todo_write to declare your plan (each item: `{content, status}`, statuses: pending|in_progress|completed|blocked). Update status as you complete each step. Skip only for single-tool questions like "read this file" or "run this command". Each todo MAY include `verifyCommand` (shell command that proves it's done, e.g. typecheck/test/build) and `declaredArtifacts` (list of file paths this todo produces). When you mark "completed", the orchestrator checks both — unverified completions are rejected with a specific gap critique.
|
|
33
|
+
todo_write: visible task checklist for the user. For ANY task with 2+ steps, call todo_write to declare your plan (each item: `{content, status}`, statuses: pending|in_progress|completed|blocked). Update status as you complete each step. Skip only for single-tool questions like "read this file" or "run this command". Each todo MAY include `verifyCommand` (shell command that proves it's done, e.g. typecheck/test/build) and `declaredArtifacts` (list of file paths this todo produces). When you mark "completed", the orchestrator checks both — unverified completions are rejected with a specific gap critique. **Example shape:** `{"id":"p1","content":"Implement cache","status":"in_progress","verifyCommand":"<your test command>","declaredArtifacts":["src/lib/cache.ts"]}`. Substitute placeholders with commands native to YOUR stack.
|
|
34
34
|
|
|
35
35
|
Web: web_search finds URLs, web_fetch reads them. For JS pages use web_crawl, for clicking/login use browser_action.
|
|
36
36
|
|