botholomew 0.22.0 → 0.22.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.ts +24 -3
- package/src/worker/llm.ts +23 -2
- package/src/worker/prompt.ts +2 -0
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -74,9 +74,30 @@ registerPrepareCommand(program);
|
|
|
74
74
|
registerCheckUpdateCommand(program);
|
|
75
75
|
registerUpgradeCommand(program);
|
|
76
76
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
77
|
+
// Bare `botholomew` (only the global -d/--dir, or nothing) prints help on
|
|
78
|
+
// stdout and exits 0. We do this explicitly instead of via a root .action()
|
|
79
|
+
// handler, because that handler made Commander treat a mistyped command as an
|
|
80
|
+
// excess positional argument ("too many arguments. Expected 0 arguments but
|
|
81
|
+
// got 1: foo.") instead of reporting an unknown command. With no root action, a
|
|
82
|
+
// real typo now reaches Commander's unknownCommand() → "error: unknown command
|
|
83
|
+
// 'foo'" (plus a did-you-mean suggestion).
|
|
84
|
+
function isBareInvocation(argv: string[]): boolean {
|
|
85
|
+
for (let i = 0; i < argv.length; i++) {
|
|
86
|
+
const a = argv[i];
|
|
87
|
+
if (a === undefined) continue;
|
|
88
|
+
if (a === "-d" || a === "--dir") {
|
|
89
|
+
i++; // skip the option's value
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
if (a.startsWith("--dir=")) continue;
|
|
93
|
+
return false; // any operand OR other flag (--help, --version, foo, …)
|
|
94
|
+
}
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (isBareInvocation(process.argv.slice(2))) {
|
|
99
|
+
program.help(); // outputs to stdout and exits 0
|
|
100
|
+
}
|
|
80
101
|
|
|
81
102
|
// Start background update check before parsing (non-blocking)
|
|
82
103
|
const updateNotice = maybeCheckForUpdate();
|
package/src/worker/llm.ts
CHANGED
|
@@ -114,6 +114,7 @@ export async function runAgentLoop(input: {
|
|
|
114
114
|
const maxInputTokens = await getMaxInputTokens(config.llm);
|
|
115
115
|
|
|
116
116
|
const maxTurns = config.max_turns;
|
|
117
|
+
let nudgeCount = 0;
|
|
117
118
|
for (let turn = 0; !maxTurns || turn < maxTurns; turn++) {
|
|
118
119
|
const startTime = Date.now();
|
|
119
120
|
fitToContextWindow(messages, systemPrompt, maxInputTokens);
|
|
@@ -188,9 +189,29 @@ export async function runAgentLoop(input: {
|
|
|
188
189
|
}
|
|
189
190
|
|
|
190
191
|
if (collectedToolCalls.length === 0) {
|
|
192
|
+
// An implicit tick-end (the model stopped emitting tool calls) is
|
|
193
|
+
// ambiguous evidence — it usually means the agent hit a dead end,
|
|
194
|
+
// exhausted its output budget mid-thought, or forgot to declare a
|
|
195
|
+
// terminal status. Don't treat it as success. Nudge once to give the
|
|
196
|
+
// agent a chance to recover (e.g. emit the final tool call it was about
|
|
197
|
+
// to make, or fail_task on a capability gap); fail if it still doesn't.
|
|
198
|
+
if (nudgeCount === 0) {
|
|
199
|
+
nudgeCount++;
|
|
200
|
+
const nudge =
|
|
201
|
+
"You ended your turn without calling a terminal status tool. Every tick must end with exactly one of: complete_task (only if the required deliverable truly exists — verify it), fail_task (if you are blocked or a required tool/capability is unavailable — state the gap), or wait_task (if you must wait on something external). Call the appropriate one now.";
|
|
202
|
+
messages.push({ role: "user", content: nudge });
|
|
203
|
+
await logInteraction(projectDir, threadId, {
|
|
204
|
+
role: "system",
|
|
205
|
+
kind: "status_change",
|
|
206
|
+
content:
|
|
207
|
+
"Agent ended its turn without a terminal status tool; nudging to call complete_task/fail_task/wait_task.",
|
|
208
|
+
});
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
191
211
|
return {
|
|
192
|
-
status: "
|
|
193
|
-
reason:
|
|
212
|
+
status: "failed",
|
|
213
|
+
reason:
|
|
214
|
+
"Agent ended its tick without calling a terminal status tool (complete_task/fail_task/wait_task)",
|
|
194
215
|
};
|
|
195
216
|
}
|
|
196
217
|
|
package/src/worker/prompt.ts
CHANGED
|
@@ -146,6 +146,8 @@ export async function buildSystemPrompt(
|
|
|
146
146
|
prompt += `## Instructions
|
|
147
147
|
You are Botholomew, a wise-owl worker that works through tasks. Use available tools to complete your assigned task, then call complete_task, fail_task, or wait_task. Use create_task for subtasks and update_task to refine pending tasks. Batch independent tool calls in a single response for parallel execution.
|
|
148
148
|
|
|
149
|
+
Always end your tick by calling exactly one terminal status tool — never just stop. Call complete_task ONLY if the required deliverable actually exists (verify it). If you are blocked or a required tool/capability is unavailable (e.g. no way to produce the requested output), call fail_task and state the gap — do not pretend success. If you must wait on something external, call wait_task.
|
|
150
|
+
|
|
149
151
|
When calling complete_task, write a summary that captures your key findings, decisions, and outputs. This summary becomes the task's output and is provided to any downstream tasks that depend on this one. Include specific results (data, names, paths, conclusions) rather than vague descriptions of what you did — downstream tasks will rely on this information to do their work.
|
|
150
152
|
`;
|
|
151
153
|
|