@bubblebrain-ai/bubble 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/execution-governor.d.ts +5 -13
- package/dist/agent/execution-governor.js +33 -142
- package/dist/agent/task-size.d.ts +9 -0
- package/dist/agent/task-size.js +33 -0
- package/dist/agent/tool-intent.d.ts +1 -0
- package/dist/agent/tool-intent.js +1 -1
- package/dist/agent.js +46 -2
- package/dist/main.js +57 -42
- package/dist/orchestrator/default-hooks.js +83 -84
- package/dist/orchestrator/hooks.d.ts +5 -8
- package/dist/prompt/compose.js +3 -0
- package/dist/prompt/environment.js +2 -0
- package/dist/prompt/provider-prompts/deepseek.js +1 -2
- package/dist/prompt/provider-prompts/kimi.js +1 -2
- package/dist/prompt/reminders.d.ts +21 -3
- package/dist/prompt/reminders.js +44 -17
- package/dist/prompt/runtime.js +17 -23
- package/dist/provider.d.ts +10 -1
- package/dist/provider.js +87 -34
- package/dist/slash-commands/commands.js +0 -17
- package/dist/tools/bash.d.ts +2 -1
- package/dist/tools/bash.js +1 -1
- package/dist/tools/edit-apply.js +37 -6
- package/dist/tools/edit.d.ts +2 -1
- package/dist/tools/edit.js +18 -6
- package/dist/tools/file-state.d.ts +25 -0
- package/dist/tools/file-state.js +52 -0
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +6 -4
- package/dist/tools/read.d.ts +2 -1
- package/dist/tools/read.js +5 -1
- package/dist/tools/write.d.ts +4 -3
- package/dist/tools/write.js +133 -54
- package/dist/tui/display-history.d.ts +2 -0
- package/dist/tui/run.js +115 -23
- package/dist/tui/streaming-tool-args.d.ts +15 -0
- package/dist/tui/streaming-tool-args.js +30 -0
- package/dist/tui/tool-renderers/write-preview.d.ts +1 -1
- package/dist/tui/tool-renderers/write-preview.js +9 -1
- package/dist/tui/tool-renderers/write.js +13 -7
- package/dist/types.d.ts +15 -0
- package/package.json +1 -1
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { classifyTask } from "../agent/task-classifier.js";
|
|
2
|
+
import { classifyTaskSize } from "../agent/task-size.js";
|
|
2
3
|
import { EvidenceTracker } from "../agent/evidence-tracker.js";
|
|
3
4
|
import { ExecutionGovernor } from "../agent/execution-governor.js";
|
|
4
5
|
import { arbitrateToolCall } from "../agent/tool-arbiter.js";
|
|
5
|
-
import {
|
|
6
|
+
import { buildEditRetryEscalationReminder, buildRedundantReadReminder, buildSmallTaskHint, buildTaskSummaryReminder, buildWorkflowPhaseReminder, } from "../prompt/reminders.js";
|
|
6
7
|
import { reminderForTaskType } from "../prompt/task-reminders.js";
|
|
7
8
|
import { formatCoverageSummary, resolveWorkflowPhase } from "./workflow.js";
|
|
8
9
|
export function createDefaultHooks() {
|
|
@@ -16,6 +17,13 @@ export function createDefaultHooks() {
|
|
|
16
17
|
if (taskReminder) {
|
|
17
18
|
ctx.queueReminder(taskReminder);
|
|
18
19
|
}
|
|
20
|
+
// Small-task hint: counterweight to the default protocol's exploration
|
|
21
|
+
// bias, only fires once per run on focused one-shot requests like
|
|
22
|
+
// "写个 HTML 介绍元旦". Don't issue for the same input twice.
|
|
23
|
+
if (!ctx.state.smallTaskHintSent && classifyTaskSize(ctx.input) === "small") {
|
|
24
|
+
ctx.state.smallTaskHintSent = true;
|
|
25
|
+
ctx.queueReminder(buildSmallTaskHint());
|
|
26
|
+
}
|
|
19
27
|
if (taskType === "security_investigation") {
|
|
20
28
|
ctx.state.evidenceTracker = new EvidenceTracker();
|
|
21
29
|
ctx.state.workflowPhase = "investigate";
|
|
@@ -28,18 +36,15 @@ export function createDefaultHooks() {
|
|
|
28
36
|
},
|
|
29
37
|
beforeModelCall(ctx) {
|
|
30
38
|
ctx.agent.compactResidentHistory();
|
|
31
|
-
if (ctx.state.governor) {
|
|
32
|
-
ctx.toolEntries = ctx.state.governor.filterToolDefinitions(ctx.toolEntries);
|
|
33
|
-
}
|
|
34
39
|
if (ctx.state.taskType === "security_investigation" && ctx.state.evidenceTracker && ctx.state.governor) {
|
|
35
40
|
const coverage = ctx.state.evidenceTracker.snapshot();
|
|
36
41
|
const phase = resolveWorkflowPhase({
|
|
37
42
|
coreCoverageComplete: ctx.state.evidenceTracker.isCoreCoverageComplete(),
|
|
38
|
-
searchFrozen:
|
|
43
|
+
searchFrozen: false,
|
|
39
44
|
});
|
|
40
45
|
ctx.state.workflowPhase = phase;
|
|
41
46
|
const summary = formatCoverageSummary(coverage);
|
|
42
|
-
const key = `${phase}:${ctx.state.evidenceTracker.key()}
|
|
47
|
+
const key = `${phase}:${ctx.state.evidenceTracker.key()}:0`;
|
|
43
48
|
if (ctx.state.workflowKey !== key) {
|
|
44
49
|
ctx.state.workflowKey = key;
|
|
45
50
|
ctx.queueReminder(buildWorkflowPhaseReminder({
|
|
@@ -58,10 +63,7 @@ export function createDefaultHooks() {
|
|
|
58
63
|
beforeToolCall(ctx) {
|
|
59
64
|
const arbitration = arbitrateToolCall(ctx.toolCall);
|
|
60
65
|
ctx.replaceToolCall({ ...arbitration.toolCall, ...(arbitration.note ? { arbiterNote: arbitration.note } : {}) });
|
|
61
|
-
|
|
62
|
-
if (decision?.blockedResult) {
|
|
63
|
-
ctx.blockToolCall(decision.blockedResult);
|
|
64
|
-
}
|
|
66
|
+
ctx.state.governor?.beforeToolCall(ctx.toolCall);
|
|
65
67
|
},
|
|
66
68
|
afterToolCall(ctx) {
|
|
67
69
|
if (ctx.toolCall.arbiterNote) {
|
|
@@ -75,21 +77,58 @@ export function createDefaultHooks() {
|
|
|
75
77
|
}
|
|
76
78
|
ctx.state.evidenceTracker?.observe(ctx.toolCall, ctx.result);
|
|
77
79
|
ctx.state.governor?.afterToolResult(ctx.toolCall, ctx.result);
|
|
78
|
-
if
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
80
|
+
// Edit/write retry-escalation: if the same tool with the same args
|
|
81
|
+
// failed twice in a row, models — especially thinking-heavy ones —
|
|
82
|
+
// can spiral on "identical content" / "not found" errors. Nudge them
|
|
83
|
+
// to change strategy.
|
|
84
|
+
if ((ctx.toolCall.name === "edit" || ctx.toolCall.name === "write") && ctx.result.isError) {
|
|
85
|
+
const hash = hashEditCall(ctx.toolCall);
|
|
86
|
+
const history = ctx.state.recentEditFailures ?? (ctx.state.recentEditFailures = []);
|
|
87
|
+
history.push(hash);
|
|
88
|
+
// Keep last 4 entries.
|
|
89
|
+
if (history.length > 4)
|
|
90
|
+
history.shift();
|
|
91
|
+
const len = history.length;
|
|
92
|
+
if (len >= 2 && history[len - 1] === history[len - 2] && !ctx.state.editRetryReminderSent) {
|
|
93
|
+
ctx.state.editRetryReminderSent = true;
|
|
94
|
+
const summary = ctx.result.content.split("\n")[0] || "";
|
|
95
|
+
ctx.queueReminder(buildEditRetryEscalationReminder(`Last failure: ${ctx.toolCall.name} on the same target with identical arguments. ${summary}`));
|
|
86
96
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
97
|
+
}
|
|
98
|
+
else if ((ctx.toolCall.name === "edit" || ctx.toolCall.name === "write") && !ctx.result.isError) {
|
|
99
|
+
// Successful mutation resets the dedup state so a later, unrelated
|
|
100
|
+
// failure won't fire the reminder spuriously.
|
|
101
|
+
ctx.state.recentEditFailures = [];
|
|
102
|
+
ctx.state.editRetryReminderSent = false;
|
|
103
|
+
}
|
|
104
|
+
// Redundant-Read detection: same file path read twice within this turn.
|
|
105
|
+
// Soft single-shot reminder, governor handles cumulative read budgets.
|
|
106
|
+
if (ctx.toolCall.name === "read" && !ctx.result.isError) {
|
|
107
|
+
const rawPath = ctx.toolCall.parsedArgs?.path ?? ctx.toolCall.parsedArgs?.file_path;
|
|
108
|
+
const path = typeof rawPath === "string" ? rawPath : undefined;
|
|
109
|
+
if (path) {
|
|
110
|
+
const seen = ctx.state.recentReadPaths ?? (ctx.state.recentReadPaths = []);
|
|
111
|
+
const flagged = ctx.state.redundantReadReminded ?? (ctx.state.redundantReadReminded = new Set());
|
|
112
|
+
if (seen.includes(path) && !flagged.has(path)) {
|
|
113
|
+
flagged.add(path);
|
|
114
|
+
ctx.queueReminder(buildRedundantReadReminder(path));
|
|
115
|
+
}
|
|
116
|
+
seen.push(path);
|
|
117
|
+
if (seen.length > 16)
|
|
118
|
+
seen.shift();
|
|
91
119
|
}
|
|
92
120
|
}
|
|
121
|
+
if (isCodeWriteResult(ctx.toolCall, ctx.result)) {
|
|
122
|
+
markCodeChanged(ctx.state);
|
|
123
|
+
}
|
|
124
|
+
// Removed: active verification tracking. The previous design nagged the
|
|
125
|
+
// model every turn until it ran a recognised verification command, and
|
|
126
|
+
// narrowly accepted only test/lint commands — which meant ad-hoc python
|
|
127
|
+
// checks did not count, the nag never cleared, and reasoning models
|
|
128
|
+
// (DeepSeek v4-pro with hex-blindness) spiraled trying to "prove" the
|
|
129
|
+
// edit was correct. CC's approach is the opposite: verify when there
|
|
130
|
+
// is something real to verify, say so explicitly when there isn't, and
|
|
131
|
+
// trust the model to judge. We follow that.
|
|
93
132
|
if (ctx.toolCall.name === "task") {
|
|
94
133
|
ctx.queueReminder(buildTaskSummaryReminder());
|
|
95
134
|
}
|
|
@@ -104,52 +143,17 @@ export function createDefaultHooks() {
|
|
|
104
143
|
ctx.requestTextOnlyTurn("Core security investigation evidence has been collected. Summarize the findings instead of continuing with more tool calls.");
|
|
105
144
|
return;
|
|
106
145
|
}
|
|
107
|
-
|
|
108
|
-
&& ctx.toolCalls.every((toolCall) => ["glob", "grep", "bash", "web_search", "web_fetch"].includes(toolCall.name))
|
|
109
|
-
&& ctx.toolResults.every((result) => result.status === "no_match" || result.status === "blocked");
|
|
110
|
-
if (ctx.state.governor?.snapshot().searchFrozen && allSearchResultsWereLowSignal) {
|
|
111
|
-
ctx.requestTextOnlyTurn("Search continuation has become low-yield. Summarize the strongest evidence already collected instead of continuing broad exploration.");
|
|
112
|
-
}
|
|
113
|
-
const changedThisTurn = ctx.toolResults.some((result) => result.metadata?.kind === "write" || result.metadata?.kind === "edit");
|
|
114
|
-
if (changedThisTurn && !ctx.state.verificationAttempted && !ctx.state.verificationCompleted && !ctx.state.verificationReminderQueued) {
|
|
115
|
-
ctx.state.verificationReminderQueued = true;
|
|
116
|
-
ctx.queueReminder(buildVerificationReminder("The previous turn changed files and no verification evidence has been observed yet."));
|
|
117
|
-
}
|
|
118
|
-
if (ctx.state.codeChanged && ctx.state.verificationFailed && !ctx.state.verificationFailureReminderQueued) {
|
|
119
|
-
ctx.state.verificationFailureReminderQueued = true;
|
|
120
|
-
ctx.queueReminder(buildVerificationFailureReminder("A verification command or runtime check was attempted after file changes, but it did not pass."));
|
|
121
|
-
}
|
|
122
|
-
if (ctx.state.codeChanged && ctx.state.verificationCompleted && !ctx.state.finalizeReminderQueued) {
|
|
123
|
-
ctx.state.finalizeReminderQueued = true;
|
|
124
|
-
ctx.queueReminder(buildFinalizeOpportunityReminder("A relevant verification command or runtime check passed after file changes."));
|
|
125
|
-
}
|
|
146
|
+
// Verification reminders intentionally removed. See afterToolCall.
|
|
126
147
|
},
|
|
127
|
-
afterTurn(
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
ctx.state.forceContinuationReason = "Files were changed, but the latest verification evidence failed.";
|
|
131
|
-
ctx.queueReminder(buildVerificationFailureReminder(ctx.state.forceContinuationReason));
|
|
132
|
-
return;
|
|
133
|
-
}
|
|
134
|
-
if (ctx.state.codeChanged && !ctx.state.verificationAttempted && !ctx.state.verificationCompleted && !ctx.state.finalVerificationReminderSent) {
|
|
135
|
-
ctx.state.finalVerificationReminderSent = true;
|
|
136
|
-
ctx.state.forceContinuationReason = "Files were changed but no verification evidence was observed before the final answer.";
|
|
137
|
-
ctx.queueReminder(buildVerificationReminder(ctx.state.forceContinuationReason));
|
|
138
|
-
}
|
|
148
|
+
afterTurn() {
|
|
149
|
+
// Verification force-continuation removed. The model decides whether
|
|
150
|
+
// verification is meaningful for the task, per the system prompt.
|
|
139
151
|
},
|
|
140
152
|
},
|
|
141
153
|
];
|
|
142
154
|
}
|
|
143
155
|
function markCodeChanged(state) {
|
|
144
156
|
state.codeChanged = true;
|
|
145
|
-
state.verificationAttempted = false;
|
|
146
|
-
state.verificationCompleted = false;
|
|
147
|
-
state.verificationFailed = false;
|
|
148
|
-
state.verificationReminderQueued = false;
|
|
149
|
-
state.finalVerificationReminderSent = false;
|
|
150
|
-
state.verificationFailureReminderQueued = false;
|
|
151
|
-
state.verificationFailureReminderSent = false;
|
|
152
|
-
state.finalizeReminderQueued = false;
|
|
153
157
|
}
|
|
154
158
|
function isCodeWriteResult(_toolCall, result) {
|
|
155
159
|
if (result.isError || result.status === "blocked" || result.status === "command_error") {
|
|
@@ -157,31 +161,26 @@ function isCodeWriteResult(_toolCall, result) {
|
|
|
157
161
|
}
|
|
158
162
|
return result.metadata?.kind === "write" || result.metadata?.kind === "edit";
|
|
159
163
|
}
|
|
160
|
-
function
|
|
161
|
-
|
|
162
|
-
|
|
164
|
+
function hashEditCall(toolCall) {
|
|
165
|
+
// Cheap fingerprint that identifies "same edit/write call". JSON of the
|
|
166
|
+
// sorted parsed args is good enough — we only need stable equality between
|
|
167
|
+
// identical calls, not cryptographic strength.
|
|
168
|
+
try {
|
|
169
|
+
return `${toolCall.name}:${stableStringify(toolCall.parsedArgs)}`;
|
|
170
|
+
}
|
|
171
|
+
catch {
|
|
172
|
+
return `${toolCall.name}:${toolCall.arguments}`;
|
|
163
173
|
}
|
|
164
|
-
return result.status !== "blocked" && result.status !== "command_error" && result.status !== "timeout";
|
|
165
174
|
}
|
|
166
|
-
function
|
|
167
|
-
if (
|
|
168
|
-
return
|
|
175
|
+
function stableStringify(value) {
|
|
176
|
+
if (Array.isArray(value)) {
|
|
177
|
+
return `[${value.map(stableStringify).join(",")}]`;
|
|
169
178
|
}
|
|
170
|
-
if (
|
|
171
|
-
|
|
179
|
+
if (value && typeof value === "object") {
|
|
180
|
+
const entries = Object.keys(value)
|
|
181
|
+
.sort()
|
|
182
|
+
.map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`);
|
|
183
|
+
return `{${entries.join(",")}}`;
|
|
172
184
|
}
|
|
173
|
-
|
|
174
|
-
? result.metadata.command
|
|
175
|
-
: typeof toolCall.parsedArgs.command === "string"
|
|
176
|
-
? toolCall.parsedArgs.command
|
|
177
|
-
: "";
|
|
178
|
-
return isVerificationCommand(command);
|
|
179
|
-
}
|
|
180
|
-
function isVerificationCommand(command) {
|
|
181
|
-
const normalized = command.trim().toLowerCase();
|
|
182
|
-
return /\b(npm|pnpm|yarn|bun)\s+(test|run\s+(test|build|typecheck|lint|check|tsc)|exec\s+tsc)\b/.test(normalized)
|
|
183
|
-
|| /\b(npx|pnpm\s+exec|bunx)\s+(vitest|tsc|eslint|playwright)\b/.test(normalized)
|
|
184
|
-
|| /\b(python3?|uv\s+run\s+python3?|poetry\s+run\s+python3?)\s+(-m\s+)?(pytest|unittest|ruff|mypy)\b/.test(normalized)
|
|
185
|
-
|| /\b(make|cmake)\s+(test|check)\b/.test(normalized)
|
|
186
|
-
|| /\b(vitest|tsc|pytest|ruff|mypy|ctest|cargo\s+test|go\s+test|swift\s+test|mvn\s+test|gradle\s+test|\.\/gradlew\s+test)\b/.test(normalized);
|
|
185
|
+
return JSON.stringify(value ?? null);
|
|
187
186
|
}
|
|
@@ -14,14 +14,11 @@ export interface TurnHookState {
|
|
|
14
14
|
forceTextOnlyReason?: string;
|
|
15
15
|
forceContinuationReason?: string;
|
|
16
16
|
codeChanged?: boolean;
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
verificationFailureReminderQueued?: boolean;
|
|
23
|
-
verificationFailureReminderSent?: boolean;
|
|
24
|
-
finalizeReminderQueued?: boolean;
|
|
17
|
+
smallTaskHintSent?: boolean;
|
|
18
|
+
recentEditFailures?: string[];
|
|
19
|
+
editRetryReminderSent?: boolean;
|
|
20
|
+
recentReadPaths?: string[];
|
|
21
|
+
redundantReadReminded?: Set<string>;
|
|
25
22
|
taskBudget?: {
|
|
26
23
|
total: number;
|
|
27
24
|
spent: number;
|
package/dist/prompt/compose.js
CHANGED
|
@@ -79,6 +79,9 @@ function buildGuidelines(tools, extraGuidelines) {
|
|
|
79
79
|
if (tools.includes("question")) {
|
|
80
80
|
add("When the user is explicitly discussing, brainstorming, or shaping an approach instead of asking for immediate execution, use the question tool for targeted clarification or preference choices when it would materially improve the discussion; do not use it for generic permission-to-proceed questions");
|
|
81
81
|
}
|
|
82
|
+
if (tools.includes("todo_write")) {
|
|
83
|
+
add("Use todo_write to plan any task that needs three or more concrete steps before you start. Mark each item completed as soon as it is done; do not batch updates");
|
|
84
|
+
}
|
|
82
85
|
for (const item of extraGuidelines) {
|
|
83
86
|
add(item);
|
|
84
87
|
}
|
|
@@ -15,6 +15,7 @@ export const defaultToolSnippets = {
|
|
|
15
15
|
close_agent: "Close or cancel a spawned subagent thread",
|
|
16
16
|
question: "Ask the user structured questions when clarification or preference choices would materially improve the work",
|
|
17
17
|
skill: "Load a named skill with specialized instructions and bundled resources",
|
|
18
|
+
todo_write: "Plan and track multi-step work. Mark each task completed as soon as it is done — do not batch.",
|
|
18
19
|
};
|
|
19
20
|
export const defaultToolNames = [
|
|
20
21
|
"read",
|
|
@@ -32,6 +33,7 @@ export const defaultToolNames = [
|
|
|
32
33
|
"close_agent",
|
|
33
34
|
"question",
|
|
34
35
|
"skill",
|
|
36
|
+
"todo_write",
|
|
35
37
|
];
|
|
36
38
|
export function buildEnvironmentPrompt(options = {}) {
|
|
37
39
|
const configuredProvider = options.configuredProvider ?? "unknown";
|
|
@@ -3,6 +3,5 @@ export function buildDeepSeekProviderPrompt(agentName) {
|
|
|
3
3
|
|
|
4
4
|
Prefer short plans followed by concrete tool use. Avoid broad speculation.
|
|
5
5
|
After each tool result, update your understanding before choosing the next action.
|
|
6
|
-
Do not repeat equivalent searches unless the previous result changed the search space
|
|
7
|
-
When provider/API behavior is involved, inspect serialization and request-shape code before changing generic agent logic.`;
|
|
6
|
+
Do not repeat equivalent searches unless the previous result changed the search space.`;
|
|
8
7
|
}
|
|
@@ -2,6 +2,5 @@ export function buildKimiProviderPrompt(agentName) {
|
|
|
2
2
|
return `You are ${agentName}, a terminal coding agent running on a Kimi/Moonshot model.
|
|
3
3
|
|
|
4
4
|
Keep tool use disciplined: pursue one concrete hypothesis at a time, read results carefully, and converge after evidence is sufficient.
|
|
5
|
-
Do not fan out into many parallel search directions unless the task truly requires it
|
|
6
|
-
For tool-call or reasoning-mode issues, inspect message history serialization before changing unrelated agent behavior.`;
|
|
5
|
+
Do not fan out into many parallel search directions unless the task truly requires it.`;
|
|
7
6
|
}
|
|
@@ -30,6 +30,24 @@ export declare function buildWorkflowPhaseReminder(input: {
|
|
|
30
30
|
pending: string[];
|
|
31
31
|
}): string;
|
|
32
32
|
export declare function buildTaskSummaryReminder(): string;
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
33
|
+
/**
|
|
34
|
+
* Fired when the same edit/write tool call (identical tool name + args) has
|
|
35
|
+
* just failed for the second time in a row. Models — especially thinking-heavy
|
|
36
|
+
* ones — can otherwise spiral on `No changes made: identical content` or
|
|
37
|
+
* `oldText not found` because their internal reasoning convinces them they
|
|
38
|
+
* are typing the change correctly even though the JSON args arrive identical.
|
|
39
|
+
* This nudge forces a strategy change.
|
|
40
|
+
*/
|
|
41
|
+
export declare function buildEditRetryEscalationReminder(reason: string): string;
|
|
42
|
+
/**
|
|
43
|
+
* Fired the FIRST time the model re-reads a file it already read in this turn.
|
|
44
|
+
* Soft — does not freeze the tool. The model may still re-read when context was
|
|
45
|
+
* pruned, the requested range changed, or a later mutation needs verification.
|
|
46
|
+
*/
|
|
47
|
+
export declare function buildRedundantReadReminder(path: string): string;
|
|
48
|
+
/**
|
|
49
|
+
* Injected once at task start when the user's input looks like a small,
|
|
50
|
+
* focused task (e.g. "write an HTML page about X"). Counterweight to the
|
|
51
|
+
* default protocol which biases toward thorough exploration.
|
|
52
|
+
*/
|
|
53
|
+
export declare function buildSmallTaskHint(): string;
|
package/dist/prompt/reminders.js
CHANGED
|
@@ -171,33 +171,60 @@ Treat the task output as a bounded subtask result:
|
|
|
171
171
|
- do not re-run the same exploratory search unless the subtask uncovered a concrete contradiction
|
|
172
172
|
`);
|
|
173
173
|
}
|
|
174
|
-
|
|
174
|
+
// Removed: buildVerificationReminder / buildVerificationFailureReminder.
|
|
175
|
+
// The verification reminder ladder pressured the model to run a "verification"
|
|
176
|
+
// after every file change. For models with hex-tokenization blind spots (e.g.
|
|
177
|
+
// DeepSeek v4-pro), this triggered death loops where the model wrote ad-hoc
|
|
178
|
+
// validation scripts that found the bug but could never fix it. CC trusts the
|
|
179
|
+
// model to decide when verification is meaningful; we follow that.
|
|
180
|
+
/**
|
|
181
|
+
* Fired when the same edit/write tool call (identical tool name + args) has
|
|
182
|
+
* just failed for the second time in a row. Models — especially thinking-heavy
|
|
183
|
+
* ones — can otherwise spiral on `No changes made: identical content` or
|
|
184
|
+
* `oldText not found` because their internal reasoning convinces them they
|
|
185
|
+
* are typing the change correctly even though the JSON args arrive identical.
|
|
186
|
+
* This nudge forces a strategy change.
|
|
187
|
+
*/
|
|
188
|
+
export function buildEditRetryEscalationReminder(reason) {
|
|
175
189
|
return wrapInSystemReminder(`
|
|
176
|
-
|
|
190
|
+
The same edit/write call has failed twice with identical arguments.
|
|
177
191
|
|
|
178
192
|
${reason}
|
|
179
193
|
|
|
180
|
-
|
|
181
|
-
|
|
194
|
+
Stop retrying the same call. Pick one of:
|
|
195
|
+
- Re-read the target file and compare the actual bytes to your intended oldText / newText. Trailing whitespace, unicode lookalikes, or off-by-one boundaries are common causes.
|
|
196
|
+
- If you intended to add a single character (e.g. fixing a 5-digit hex color to 6 digits), confirm that your newText string actually contains the added character before sending again.
|
|
197
|
+
- Use the write tool with overwrite=true and the full new content instead of edit — useful when the change spans many lines or the diff anchor is ambiguous. Existing files must be read or modified in this session before full-file replacement.
|
|
198
|
+
- If you cannot determine the cause, ask the user for clarification.
|
|
182
199
|
`);
|
|
183
200
|
}
|
|
184
|
-
|
|
201
|
+
/**
|
|
202
|
+
* Fired the FIRST time the model re-reads a file it already read in this turn.
|
|
203
|
+
* Soft — does not freeze the tool. The model may still re-read when context was
|
|
204
|
+
* pruned, the requested range changed, or a later mutation needs verification.
|
|
205
|
+
*/
|
|
206
|
+
export function buildRedundantReadReminder(path) {
|
|
185
207
|
return wrapInSystemReminder(`
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
${reason}
|
|
189
|
-
|
|
190
|
-
Do not finalize as complete while this failure is unresolved. Make one focused fix and rerun the most relevant verification.
|
|
191
|
-
If you cannot fix it, explain the concrete blocker and the residual risk instead of claiming success.
|
|
208
|
+
You already read ${path} earlier in this turn. If that content is still available and nothing changed, rely on it rather than re-reading.
|
|
209
|
+
It is okay to re-read when you need to recover pruned context, inspect a different range, or verify a later edit/write/bash change.
|
|
192
210
|
`);
|
|
193
211
|
}
|
|
194
|
-
|
|
212
|
+
/**
|
|
213
|
+
* Injected once at task start when the user's input looks like a small,
|
|
214
|
+
* focused task (e.g. "write an HTML page about X"). Counterweight to the
|
|
215
|
+
* default protocol which biases toward thorough exploration.
|
|
216
|
+
*/
|
|
217
|
+
export function buildSmallTaskHint() {
|
|
195
218
|
return wrapInSystemReminder(`
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
${reason}
|
|
219
|
+
This appears to be a small, focused task (short request, single deliverable, no integration ambiguity).
|
|
199
220
|
|
|
200
|
-
|
|
201
|
-
|
|
221
|
+
Prefer direct execution over exploration:
|
|
222
|
+
- If the target file path is given or obvious, use write/edit directly.
|
|
223
|
+
- Do not glob, read, or grep adjacent files unless the request explicitly references them.
|
|
224
|
+
- Do not pre-plan with todo_write for tasks that can be done in one or two tool calls.
|
|
225
|
+
- Skip the "investigate the codebase" step that applies to larger changes.
|
|
202
226
|
`);
|
|
203
227
|
}
|
|
228
|
+
// Removed: buildFinalizeOpportunityReminder. Was paired with the verification
|
|
229
|
+
// nag ladder. Without the ladder, "you can finalize now" advice is redundant —
|
|
230
|
+
// the model finalises whenever its own judgement says the task is done.
|
package/dist/prompt/runtime.js
CHANGED
|
@@ -1,30 +1,24 @@
|
|
|
1
|
+
// Compact, prose-shaped guidelines. Each line is one rule. The set is kept
|
|
2
|
+
// short on purpose: a thinking-heavy model burns reasoning tokens on every
|
|
3
|
+
// rule it has to weigh per turn, and most behaviors should be background
|
|
4
|
+
// disposition, not active checklist items. Add to this list only when an
|
|
5
|
+
// observed failure cannot be addressed by an existing rule.
|
|
1
6
|
const defaultGuidelines = [
|
|
2
|
-
"
|
|
3
|
-
"
|
|
4
|
-
"
|
|
5
|
-
"
|
|
6
|
-
"
|
|
7
|
-
"
|
|
8
|
-
"Prefer structured search tools over bash for repository searches whenever possible",
|
|
9
|
-
"Do not repeat near-identical searches when they are not producing new evidence",
|
|
10
|
-
"When investigating configuration or security questions, stop once the relevant load path, storage path, and exposure path are identified",
|
|
11
|
-
"Use spawn_agent and wait_agent for bounded investigative subproblems instead of letting the main loop churn on repeated exploratory searches",
|
|
12
|
-
"After code edits, run the narrowest meaningful verification command or explain why verification is not possible",
|
|
13
|
-
"When finishing a coding task, report what changed, where it changed, verification results, and remaining risk",
|
|
14
|
-
"Be concise in your responses",
|
|
7
|
+
"Ground decisions in the codebase: inspect relevant files, command output, or runtime state before making claims about behavior. Separate confirmed facts from inference when evidence is incomplete.",
|
|
8
|
+
"Choose the smallest coherent change. Edit only the files required for the requested change; do not refactor or improve adjacent code unprompted.",
|
|
9
|
+
"For modifications to existing code, read the file first. For brand-new files whose target path is known and does not exist, write directly without exploratory reading. Use edit for small targeted changes; use write with overwrite=true for intentional full-file replacement of an existing file. Never delete and recreate a file just to overwrite it.",
|
|
10
|
+
"Prefer structured tools (glob, grep, lsp, read) over bash for search and inspection. Do not repeat a near-identical search or re-read the same file unless new evidence changes the question.",
|
|
11
|
+
"If a tool fails, diagnose the error before switching tactics. Do not retry the identical call with identical arguments. After two equivalent failures, switch approach — re-read the file, use a different tool, rewrite the whole file with write overwrite=true, or ask the user.",
|
|
12
|
+
"Before reporting a task complete, verify it works when verification is meaningful and cheap — run the existing test, execute the script, check the output. If no test exists, the change is purely declarative (static HTML/markdown/config), or running the code is not practical, state that explicitly rather than inventing a verification step. Do not write throwaway validation scripts to prove correctness; if there is no real check to run, report the change and stop.",
|
|
15
13
|
];
|
|
16
14
|
export function buildRuntimePrompt(options = {}) {
|
|
17
|
-
const thinkingLevel = options.thinkingLevel ?? "off";
|
|
18
15
|
const guidelines = dedupe(defaultGuidelines, options.guidelines ?? []);
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
4. Edit only the necessary files.
|
|
26
|
-
5. Verify with the narrowest meaningful command or runtime check when possible.
|
|
27
|
-
6. Finish with changed files, verification results, and unresolved risk.
|
|
16
|
+
// The execution flow is stated as a single prose sentence rather than a
|
|
17
|
+
// numbered protocol. Numbered checklists prompt thinking models to walk
|
|
18
|
+
// each step explicitly in their reasoning every turn, even for trivial
|
|
19
|
+
// tasks — multiplying latency without improving quality. Prose lets the
|
|
20
|
+
// protocol act as background disposition.
|
|
21
|
+
return `Work by understanding the requested outcome, grounding decisions in the codebase, making the smallest coherent change, and verifying when possible. Scale your effort to the task: a one-file create-or-edit deserves direct execution, not extensive pre-exploration.
|
|
28
22
|
|
|
29
23
|
Guidelines:
|
|
30
24
|
${guidelines.map((item) => `- ${item}`).join("\n")}`;
|
package/dist/provider.d.ts
CHANGED
|
@@ -5,6 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import type { Provider, ProviderMessage, StreamChunk, ThinkingLevel } from "./types.js";
|
|
7
7
|
type ReasoningContentEcho = "tool_calls" | "all";
|
|
8
|
+
export type ToolArgsMergeMode = "delta" | "snapshot";
|
|
9
|
+
export interface TranslateOpenAIStreamOptions {
|
|
10
|
+
toolArgsMergeMode?: ToolArgsMergeMode;
|
|
11
|
+
}
|
|
8
12
|
export declare function toChatCompletionsMessage(message: ProviderMessage, options?: {
|
|
9
13
|
reasoningContentEcho?: ReasoningContentEcho;
|
|
10
14
|
}): Record<string, unknown>;
|
|
@@ -17,6 +21,11 @@ export interface ProviderInstanceOptions {
|
|
|
17
21
|
}
|
|
18
22
|
export declare function createUnavailableProvider(message: string): Provider;
|
|
19
23
|
export declare function createProviderInstance(options: ProviderInstanceOptions): Provider;
|
|
24
|
+
export interface NormalizedToolArgs {
|
|
25
|
+
args: string;
|
|
26
|
+
corrupt: boolean;
|
|
27
|
+
}
|
|
28
|
+
export declare function normalizeToolArgsDetailed(raw: string): NormalizedToolArgs;
|
|
20
29
|
export declare function normalizeToolArgs(raw: string): string;
|
|
21
30
|
/**
|
|
22
31
|
* Convert an OpenAI-compatible chat-completions stream into our internal StreamChunk events.
|
|
@@ -26,5 +35,5 @@ export declare function normalizeToolArgs(raw: string): string;
|
|
|
26
35
|
* partial write previews before the tool executes. End events are still flushed
|
|
27
36
|
* in index order to keep multi-call turns deterministic.
|
|
28
37
|
*/
|
|
29
|
-
export declare function translateOpenAIStream(stream: AsyncIterable<any
|
|
38
|
+
export declare function translateOpenAIStream(stream: AsyncIterable<any>, options?: TranslateOpenAIStreamOptions): AsyncIterable<StreamChunk>;
|
|
30
39
|
export {};
|