@oh-my-pi/pi-coding-agent 14.7.2 → 14.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/package.json +7 -7
- package/src/cli/read-cli.ts +1 -2
- package/src/commands/read.ts +2 -7
- package/src/config/settings-schema.ts +0 -5
- package/src/edit/modes/hashline.ts +40 -19
- package/src/edit/modes/patch.ts +7 -5
- package/src/edit/modes/replace.ts +6 -2
- package/src/edit/notebook.ts +222 -0
- package/src/edit/read-file.ts +7 -0
- package/src/edit/renderer.ts +4 -3
- package/src/edit/streaming.ts +49 -7
- package/src/modes/components/diff.ts +54 -7
- package/src/modes/components/tool-execution.ts +3 -29
- package/src/prompts/agents/designer.md +1 -2
- package/src/prompts/agents/explore.md +2 -5
- package/src/prompts/agents/init.md +1 -4
- package/src/prompts/agents/librarian.md +1 -3
- package/src/prompts/agents/plan.md +7 -8
- package/src/prompts/agents/reviewer.md +1 -2
- package/src/prompts/ci-green-request.md +10 -10
- package/src/prompts/commands/orchestrate.md +48 -0
- package/src/prompts/memories/consolidation.md +10 -10
- package/src/prompts/memories/read-path.md +6 -6
- package/src/prompts/system/agent-creation-architect.md +54 -44
- package/src/prompts/system/custom-system-prompt.md +3 -5
- package/src/prompts/system/eager-todo.md +4 -4
- package/src/prompts/system/handoff-document.md +7 -4
- package/src/prompts/system/plan-mode-active.md +7 -3
- package/src/prompts/system/plan-mode-approved.md +5 -5
- package/src/prompts/system/summarization-system.md +2 -2
- package/src/prompts/system/system-prompt.md +53 -65
- package/src/prompts/system/title-system.md +2 -2
- package/src/prompts/system/web-search.md +16 -19
- package/src/prompts/tools/bash.md +8 -8
- package/src/prompts/tools/browser.md +4 -4
- package/src/prompts/tools/debug.md +3 -1
- package/src/prompts/tools/eval.md +13 -9
- package/src/prompts/tools/hashline.md +4 -2
- package/src/prompts/tools/image-gen.md +1 -1
- package/src/prompts/tools/read.md +1 -2
- package/src/prompts/tools/reflect.md +3 -3
- package/src/prompts/tools/render-mermaid.md +2 -2
- package/src/prompts/tools/resolve.md +2 -2
- package/src/prompts/tools/retain.md +3 -2
- package/src/prompts/tools/rewind.md +2 -2
- package/src/prompts/tools/search-tool-bm25.md +3 -4
- package/src/prompts/tools/task.md +1 -1
- package/src/prompts/tools/todo-write.md +2 -2
- package/src/task/commands.ts +5 -1
- package/src/tools/fetch.ts +6 -7
- package/src/tools/index.ts +0 -4
- package/src/tools/read.ts +18 -7
- package/src/tools/renderers.ts +0 -2
- package/src/tools/write.ts +41 -26
- package/src/tools/notebook.ts +0 -286
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { sanitizeText } from "@oh-my-pi/pi-natives";
|
|
2
2
|
import { getIndentation } from "@oh-my-pi/pi-utils";
|
|
3
3
|
import * as Diff from "diff";
|
|
4
|
-
import { theme } from "../../modes/theme/theme";
|
|
4
|
+
import { getLanguageFromPath, highlightCode, theme } from "../../modes/theme/theme";
|
|
5
5
|
import { type CodeFrameMarker, formatCodeFrameLine, replaceTabs } from "../../tools/render-utils";
|
|
6
6
|
|
|
7
7
|
/** SGR dim on / normal intensity — additive, preserves fg/bg colors. */
|
|
@@ -115,6 +115,10 @@ export function renderDiff(diffText: string, options: RenderDiffOptions = {}): s
|
|
|
115
115
|
return Math.max(width, lineNumber.length);
|
|
116
116
|
}, 0);
|
|
117
117
|
|
|
118
|
+
// Batch-highlight context (unedited) lines so consecutive lines tokenize
|
|
119
|
+
// with full multi-line context. Highlighting is a no-op when no language
|
|
120
|
+
// can be detected from the file path.
|
|
121
|
+
const contextHighlights = highlightContextLines(parsedLines, options.filePath);
|
|
118
122
|
// Track the line number rendered on the previous emitted line so we can
|
|
119
123
|
// blank out duplicate gutters. Two cases trigger this:
|
|
120
124
|
// 1. Single-line replacement (`-N` followed by `+N`) — the `+N` repeats `N`.
|
|
@@ -206,15 +210,58 @@ export function renderDiff(diffText: string, options: RenderDiffOptions = {}): s
|
|
|
206
210
|
);
|
|
207
211
|
i++;
|
|
208
212
|
} else {
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
);
|
|
213
|
+
const highlighted = contextHighlights.get(i);
|
|
214
|
+
const content =
|
|
215
|
+
highlighted !== undefined
|
|
216
|
+
? replaceTabs(highlighted, options.filePath)
|
|
217
|
+
: visualizeIndent(parsed.content, options.filePath);
|
|
218
|
+
result.push(theme.fg("toolDiffContext", formatLine(" ", parsed.lineNum, content)));
|
|
215
219
|
i++;
|
|
216
220
|
}
|
|
217
221
|
}
|
|
218
222
|
|
|
219
223
|
return result.join("\n");
|
|
220
224
|
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Batch-highlight runs of consecutive context lines.
|
|
228
|
+
* Returns a map keyed by index in `parsedLines` to the highlighted content
|
|
229
|
+
* for that line. Lines whose language is unknown are not added to the map,
|
|
230
|
+
* letting callers fall back to the existing rendering path.
|
|
231
|
+
*/
|
|
232
|
+
function highlightContextLines(
|
|
233
|
+
parsedLines: Array<{ prefix: CodeFrameMarker; lineNum: string; content: string } | null>,
|
|
234
|
+
filePath: string | undefined,
|
|
235
|
+
): Map<number, string> {
|
|
236
|
+
const map = new Map<number, string>();
|
|
237
|
+
const lang = filePath ? getLanguageFromPath(filePath) : undefined;
|
|
238
|
+
if (!lang) return map;
|
|
239
|
+
|
|
240
|
+
let runIndices: number[] = [];
|
|
241
|
+
let runContents: string[] = [];
|
|
242
|
+
const flush = () => {
|
|
243
|
+
if (runContents.length === 0) return;
|
|
244
|
+
const highlighted = highlightCode(runContents.join("\n"), lang);
|
|
245
|
+
for (let k = 0; k < runIndices.length; k++) {
|
|
246
|
+
map.set(runIndices[k], highlighted[k] ?? runContents[k]);
|
|
247
|
+
}
|
|
248
|
+
runIndices = [];
|
|
249
|
+
runContents = [];
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
for (let j = 0; j < parsedLines.length; j++) {
|
|
253
|
+
const p = parsedLines[j];
|
|
254
|
+
// Collapse markers ("...") are emitted as context lines but are not real
|
|
255
|
+
// code; highlighting them produces nonsense (e.g. "..." → spread operator)
|
|
256
|
+
// and would also stitch together unrelated context blocks across the gap.
|
|
257
|
+
const isCollapseMarker = p?.prefix === " " && (p.content === "..." || p.content === "…");
|
|
258
|
+
if (p && p.prefix === " " && !isCollapseMarker) {
|
|
259
|
+
runIndices.push(j);
|
|
260
|
+
runContents.push(p.content);
|
|
261
|
+
} else {
|
|
262
|
+
flush();
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
flush();
|
|
266
|
+
return map;
|
|
267
|
+
}
|
|
@@ -114,7 +114,6 @@ export class ToolExecutionComponent extends Container {
|
|
|
114
114
|
// Edit preview state
|
|
115
115
|
#editMode?: EditMode;
|
|
116
116
|
#editDiffPreview?: PerFileDiffPreview[];
|
|
117
|
-
#editDiffScheduleTimer?: NodeJS.Timeout;
|
|
118
117
|
#editDiffAbort?: AbortController;
|
|
119
118
|
#editDiffLastArgsKey?: string;
|
|
120
119
|
// Cached converted images for Kitty protocol (which requires PNG), keyed by index
|
|
@@ -173,13 +172,13 @@ export class ToolExecutionComponent extends Container {
|
|
|
173
172
|
this.#editMode = resolveEditModeForTool(toolName, tool);
|
|
174
173
|
|
|
175
174
|
this.#updateDisplay();
|
|
176
|
-
this.#
|
|
175
|
+
void this.#runPreviewDiff();
|
|
177
176
|
}
|
|
178
177
|
|
|
179
178
|
updateArgs(args: any, _toolCallId?: string): void {
|
|
180
179
|
this.#args = cloneToolArgs(args);
|
|
181
180
|
this.#updateSpinnerAnimation();
|
|
182
|
-
this.#
|
|
181
|
+
void this.#runPreviewDiff();
|
|
183
182
|
this.#updateDisplay();
|
|
184
183
|
}
|
|
185
184
|
|
|
@@ -190,28 +189,7 @@ export class ToolExecutionComponent extends Container {
|
|
|
190
189
|
setArgsComplete(_toolCallId?: string): void {
|
|
191
190
|
this.#argsComplete = true;
|
|
192
191
|
this.#updateSpinnerAnimation();
|
|
193
|
-
this.#
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
/**
|
|
197
|
-
* Schedule a debounced compute of the streaming edit-diff preview.
|
|
198
|
-
* `delayMs === 0` runs immediately (used on construction and on
|
|
199
|
-
* `setArgsComplete`). All other calls coalesce to a trailing-edge timer.
|
|
200
|
-
*/
|
|
201
|
-
#schedulePreviewDiff(delayMs = 80): void {
|
|
202
|
-
if (!this.#editMode) return;
|
|
203
|
-
if (this.#editDiffScheduleTimer) {
|
|
204
|
-
clearTimeout(this.#editDiffScheduleTimer);
|
|
205
|
-
this.#editDiffScheduleTimer = undefined;
|
|
206
|
-
}
|
|
207
|
-
if (delayMs === 0) {
|
|
208
|
-
void this.#runPreviewDiff();
|
|
209
|
-
return;
|
|
210
|
-
}
|
|
211
|
-
this.#editDiffScheduleTimer = setTimeout(() => {
|
|
212
|
-
this.#editDiffScheduleTimer = undefined;
|
|
213
|
-
void this.#runPreviewDiff();
|
|
214
|
-
}, delayMs);
|
|
192
|
+
void this.#runPreviewDiff();
|
|
215
193
|
}
|
|
216
194
|
|
|
217
195
|
async #runPreviewDiff(): Promise<void> {
|
|
@@ -365,10 +343,6 @@ export class ToolExecutionComponent extends Container {
|
|
|
365
343
|
this.#spinnerInterval = undefined;
|
|
366
344
|
this.#spinnerFrame = undefined;
|
|
367
345
|
}
|
|
368
|
-
if (this.#editDiffScheduleTimer) {
|
|
369
|
-
clearTimeout(this.#editDiffScheduleTimer);
|
|
370
|
-
this.#editDiffScheduleTimer = undefined;
|
|
371
|
-
}
|
|
372
346
|
this.#editDiffAbort?.abort();
|
|
373
347
|
this.#editDiffAbort = undefined;
|
|
374
348
|
}
|
|
@@ -4,8 +4,7 @@ description: UI/UX specialist for design implementation, review, visual refineme
|
|
|
4
4
|
model: pi/designer
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
You **MAY** make file edits, create components, and run commands—and **SHOULD** do so when needed.
|
|
7
|
+
Implement and review UI designs. Edit files, create components, run commands when needed.
|
|
9
8
|
|
|
10
9
|
<strengths>
|
|
11
10
|
- Translate design intent into working UI code
|
|
@@ -29,13 +29,11 @@ output:
|
|
|
29
29
|
type: string
|
|
30
30
|
---
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
Given a task, you rapidly investigate the codebase and return structured findings another agent can use without re-reading everything.
|
|
32
|
+
Investigate the codebase rapidly. Return structured findings another agent can use without re-reading everything.
|
|
35
33
|
|
|
36
34
|
<directives>
|
|
37
35
|
- You **MUST** use tools for broad pattern matching / code search as much as possible.
|
|
38
|
-
- You **SHOULD** invoke tools in parallel
|
|
36
|
+
- You **SHOULD** invoke tools in parallel—this is a short investigation, and you are supposed to finish in a few seconds.
|
|
39
37
|
- If a search returns empty results, you **MUST** try at least one alternate strategy (different pattern, broader path, or AST search) before concluding the target doesn't exist.
|
|
40
38
|
</directives>
|
|
41
39
|
|
|
@@ -47,7 +45,6 @@ You **MUST** infer the thoroughness from the task; default to medium:
|
|
|
47
45
|
</thoroughness>
|
|
48
46
|
|
|
49
47
|
<procedure>
|
|
50
|
-
You **SHOULD** generally follow this procedure, but are allowed to adjust it as the task requires:
|
|
51
48
|
1. Locate relevant code using tools.
|
|
52
49
|
2. Read key sections (You **MUST NOT** read full files unless they're tiny)
|
|
53
50
|
3. Identify types/interfaces/key functions.
|
|
@@ -4,12 +4,9 @@ description: Generate AGENTS.md for current codebase
|
|
|
4
4
|
thinking-level: medium
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
You **MUST** launch multiple `explore` agents in parallel (via `task` tool) scanning different areas (core src, tests, configs/build, scripts/docs), then synthesize your findings into a detailed AGENTS.md file.
|
|
7
|
+
Generate AGENTS.md by launching multiple `explore` agents in parallel (via `task` tool) scanning different areas (core src, tests, configs/build, scripts/docs), then synthesize findings into a single file.
|
|
10
8
|
|
|
11
9
|
<structure>
|
|
12
|
-
You will likely need to document these sections, but only take it as a starting point and adjust it to the specific codebase:
|
|
13
10
|
- **Project Overview**: Brief description of project purpose
|
|
14
11
|
- **Architecture & Data Flow**: High-level structure, key modules, data flow
|
|
15
12
|
- **Key Directories**: Main source directories, purposes
|
|
@@ -65,7 +65,7 @@ output:
|
|
|
65
65
|
type: string
|
|
66
66
|
---
|
|
67
67
|
|
|
68
|
-
|
|
68
|
+
Answer questions about external libraries, frameworks, and APIs by reading source code and official documentation.
|
|
69
69
|
|
|
70
70
|
<critical>
|
|
71
71
|
You **MUST** ground every claim in source code or official documentation. You **MUST NOT** rely on training data for API details — it may be stale or wrong.
|
|
@@ -74,8 +74,6 @@ You **MUST** operate as read-only on the user's project. You **MUST NOT** modify
|
|
|
74
74
|
|
|
75
75
|
<procedure>
|
|
76
76
|
## 1. Classify the request
|
|
77
|
-
|
|
78
|
-
Before acting, determine what kind of question this is:
|
|
79
77
|
- **Conceptual**: "How do I use X?", "Best practice for Y?" — Prioritize types, docs, and usage examples.
|
|
80
78
|
- **Implementation**: "How does X implement Y?", "Show me the source of Z" — Clone and read the actual code.
|
|
81
79
|
- **Behavioral**: "Why does X behave this way?", "What's the default for Y?" — Read implementation, find where values are set, check tests.
|
|
@@ -7,7 +7,7 @@ model: pi/plan, pi/slow
|
|
|
7
7
|
thinking-level: high
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Analyze the codebase and the user's request. Produce a detailed implementation plan.
|
|
11
11
|
|
|
12
12
|
## Phase 1: Understand
|
|
13
13
|
1. Parse requirements precisely
|
|
@@ -33,14 +33,13 @@ You **MUST** spawn `explore` agents for independent areas and synthesize finding
|
|
|
33
33
|
|
|
34
34
|
You **MUST** write a plan executable without re-exploration.
|
|
35
35
|
|
|
36
|
-
You will likely need to document these sections, but only take it as a starting point and adjust it to the specific request.
|
|
37
36
|
<structure>
|
|
38
|
-
**Summary**: What to build and why (one paragraph).
|
|
39
|
-
**Changes**: List concrete changes (files, functions, types), concrete as much as possible. Exact file paths/line ranges where relevant.
|
|
40
|
-
**Sequence**: List sequence and dependencies between sub-tasks, to schedule them in the best order.
|
|
41
|
-
**Edge Cases**: List edge cases and error conditions, to be aware of.
|
|
42
|
-
**Verification**: List verification steps, to be able to verify the correctness.
|
|
43
|
-
**Critical Files**: List critical files, to be able to read them and understand the codebase.
|
|
37
|
+
- **Summary**: What to build and why (one paragraph).
|
|
38
|
+
- **Changes**: List concrete changes (files, functions, types), concrete as much as possible. Exact file paths/line ranges where relevant.
|
|
39
|
+
- **Sequence**: List sequence and dependencies between sub-tasks, to schedule them in the best order.
|
|
40
|
+
- **Edge Cases**: List edge cases and error conditions, to be aware of.
|
|
41
|
+
- **Verification**: List verification steps, to be able to verify the correctness.
|
|
42
|
+
- **Critical Files**: List critical files, to be able to read them and understand the codebase.
|
|
44
43
|
</structure>
|
|
45
44
|
|
|
46
45
|
<critical>
|
|
@@ -56,8 +56,7 @@ output:
|
|
|
56
56
|
type: number
|
|
57
57
|
---
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
Your goal is to identify bugs the author would want fixed before merge.
|
|
59
|
+
Identify bugs the author would want fixed before merge.
|
|
61
60
|
|
|
62
61
|
<procedure>
|
|
63
62
|
1. Run `git diff` (or `gh pr diff <number>`) to view patch
|
|
@@ -4,24 +4,24 @@ Do not stop after a single fix attempt.
|
|
|
4
4
|
</critical>
|
|
5
5
|
|
|
6
6
|
<instruction>
|
|
7
|
-
- Prefer
|
|
7
|
+
- Prefer `github` tool with `op: run_watch` and no other arguments if available.
|
|
8
8
|
- Otherwise use `gh` cli.
|
|
9
|
-
- Use
|
|
9
|
+
- Use workflow runs for current HEAD as source of truth after each push.
|
|
10
10
|
</instruction>
|
|
11
11
|
|
|
12
12
|
<procedure>
|
|
13
|
-
1. Watch
|
|
14
|
-
2. If any run fails, inspect
|
|
15
|
-
3. Identify
|
|
16
|
-
4. Run local verification
|
|
13
|
+
1. Watch workflow runs for current HEAD commit.
|
|
14
|
+
2. If any run fails, inspect failing job output and logs.
|
|
15
|
+
3. Identify root cause and make minimal correct fix.
|
|
16
|
+
4. Run local verification if it reduces chance of another failing push.
|
|
17
17
|
5. Push the branch.
|
|
18
|
-
6. Watch
|
|
19
|
-
7. Repeat until
|
|
18
|
+
6. Watch workflow runs for new HEAD commit again.
|
|
19
|
+
7. Repeat until workflow runs for latest HEAD commit succeed.
|
|
20
20
|
</procedure>
|
|
21
21
|
|
|
22
22
|
<caution>
|
|
23
|
-
- Treat each
|
|
24
|
-
- If
|
|
23
|
+
- Treat each push as fresh CI attempt. Re-watch new HEAD immediately.
|
|
24
|
+
- If watcher output is insufficient, inspect underlying workflow or job context before changing code.
|
|
25
25
|
</caution>
|
|
26
26
|
|
|
27
27
|
{{#if headTag}}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: orchestrate
|
|
3
|
+
description: Drive a multi-phase task to completion via parallel subagents
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Task
|
|
7
|
+
|
|
8
|
+
$@
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Orchestration Contract
|
|
13
|
+
|
|
14
|
+
You are the **orchestrator** for the task above. Read it once, then execute under the rules below. The contract overrides any default tendency to yield early, narrate, or do work yourself.
|
|
15
|
+
|
|
16
|
+
<role>
|
|
17
|
+
You decompose, dispatch, verify, and iterate. You do **not** edit code. Every file mutation goes through a `task` subagent. Your tool budget is: reading for planning, `task` for dispatch, verification (`bun check`, `bun test`, `recipe`, `lsp diagnostics`), git via `bash`, and `todo_write` for tracking.
|
|
18
|
+
</role>
|
|
19
|
+
|
|
20
|
+
<rules>
|
|
21
|
+
1. **Do not yield until everything is closed.** A phase finishing is *not* a yield point — launch the next phase in the same turn. Stop only when every requested item is verifiably done, or you hit a concrete [blocked] state that genuinely requires the user.
|
|
22
|
+
2. **Enumerate the full surface before dispatching.** If the task references audits, plans, checklists, phase lists, or file lists, expand them into a flat set of items in `todo_write`. "Most of them" or "the important ones" is failure. Re-read the source documents — do not work from memory.
|
|
23
|
+
3. **Parallelize maximally.** Every set of edits with disjoint file scope **MUST** ship as one `task` batch. Serialize only when one subagent produces a contract (types, schema, shared module) the next consumes — and state the dependency when you do.
|
|
24
|
+
4. **Each `task` assignment is self-contained.** Subagents have no shared context. Spell out: target files (≤3–5 explicit paths, no globs), the change with APIs and patterns, edge cases, and observable acceptance criteria. Do not assume they read the same plan you did.
|
|
25
|
+
5. **Verify after every phase before launching the next.** Run the appropriate gate: `bun check` for types, package-scoped `bun test` for behavior, `lsp diagnostics` for changed files. If a phase introduced breakage, dispatch fix-up subagents *before* moving on. Never declare a phase done on a red tree.
|
|
26
|
+
6. **Commit policy.** If the task asks for commits or the repo workflow expects them, commit after each green phase with a focused message. Never commit a red tree. Never commit work the user did not ask to commit.
|
|
27
|
+
7. **Respawn, do not absorb.** If a subagent returns incomplete or wrong work, spawn a corrective subagent with the specific gap — do not silently fix it yourself.
|
|
28
|
+
8. **No scope creep, no scope shrink.** Do not add work the user did not ask for. Do not relabel unfinished items as "follow-up", "v1", or "MVP" to imply completion.
|
|
29
|
+
</rules>
|
|
30
|
+
|
|
31
|
+
<workflow>
|
|
32
|
+
1. **Ingest.** Read every referenced file (audits, plans, prior agent output, current branch state). Run `git status` to see uncommitted changes.
|
|
33
|
+
2. **Plan.** Materialize the full work surface in `todo_write` as ordered phases. Within each phase, list the parallelizable units.
|
|
34
|
+
3. **Dispatch phase.** Launch all parallel `task` subagents in one call. Wait for the batch.
|
|
35
|
+
4. **Verify phase.** Run the gates. On failure, dispatch fix-up subagents and re-verify. Do not advance with a red gate.
|
|
36
|
+
5. **Commit phase** (if applicable). Focused message naming the phase.
|
|
37
|
+
6. **Advance.** Mark the phase done in `todo_write`, immediately start the next phase. No summary message between phases — keep going.
|
|
38
|
+
7. **Final verification.** When the last phase is green, run the full gate set once more and confirm every `todo_write` item is closed. Then yield with a terse status, not a recap.
|
|
39
|
+
</workflow>
|
|
40
|
+
|
|
41
|
+
<anti-patterns>
|
|
42
|
+
- Editing files yourself "because it's faster".
|
|
43
|
+
- Yielding after phase 1 with "ready to continue?".
|
|
44
|
+
- Dispatching one subagent at a time when five could run in parallel.
|
|
45
|
+
- Skipping `bun check` between phases because "the change looked safe".
|
|
46
|
+
- Marking todos done based on subagent self-reports without verifying the gate.
|
|
47
|
+
- Summarizing progress in chat instead of advancing to the next phase.
|
|
48
|
+
</anti-patterns>
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
Memory consolidation agent.
|
|
2
2
|
Memory root: memory://root
|
|
3
3
|
Input corpus (raw memories):
|
|
4
4
|
{{raw_memories}}
|
|
@@ -19,12 +19,12 @@ Produce strict JSON only with this schema — you **MUST NOT** include any other
|
|
|
19
19
|
]
|
|
20
20
|
}
|
|
21
21
|
Requirements:
|
|
22
|
-
- memory_md:
|
|
23
|
-
- memory_summary:
|
|
24
|
-
- skills: reusable
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
- scripts/templates/examples
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
22
|
+
- memory_md: long-term memory document.
|
|
23
|
+
- memory_summary: prompt-time memory guidance.
|
|
24
|
+
- skills: reusable playbooks. Empty array allowed.
|
|
25
|
+
- skill.name maps to skills/<name>/.
|
|
26
|
+
- skill.content maps to skills/<name>/SKILL.md.
|
|
27
|
+
- scripts/templates/examples: optional. Each entry **MUST** write to skills/<name>/<bucket>/<path>.
|
|
28
|
+
- Only include files worth keeping long-term. Omit stale assets so they are pruned.
|
|
29
|
+
- Preserve useful prior themes. Remove stale or contradictory guidance.
|
|
30
|
+
- Treat memory as advisory: current repository state wins.
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# Memory Guidance
|
|
2
2
|
Memory root: memory://root
|
|
3
3
|
Operational rules:
|
|
4
|
-
1)
|
|
5
|
-
2) If needed,
|
|
6
|
-
3)
|
|
7
|
-
4)
|
|
8
|
-
5)
|
|
9
|
-
6)
|
|
4
|
+
1) Read `memory://root/memory_summary.md` first.
|
|
5
|
+
2) If needed, inspect `memory://root/MEMORY.md` and `memory://root/skills/<name>/SKILL.md`.
|
|
6
|
+
3) Trust memory for heuristics and process context. Trust current repo files, runtime output, and user instruction for factual state and final decisions.
|
|
7
|
+
4) When memory changes your plan, cite the artifact path (e.g. `memory://root/skills/<name>/SKILL.md`) and pair it with current-repo evidence.
|
|
8
|
+
5) If memory disagrees with repo state or user instruction, prefer repo/user. Treat memory as stale. Proceed with corrected behavior, then update/regenerate memory artifacts.
|
|
9
|
+
6) Escalate confidence only after repository verification. Memory alone **MUST NOT** be treated as sufficient proof.
|
|
10
10
|
Memory summary:
|
|
11
11
|
{{memory_summary}}
|
|
@@ -1,64 +1,74 @@
|
|
|
1
|
-
You are an
|
|
1
|
+
You are an AI agent architect. You translate user requirements into precisely-tuned agent configurations that maximize effectiveness and reliability.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Consider project-specific instructions from CLAUDE.md files when creating agents. Align new agents with established project patterns.
|
|
4
4
|
|
|
5
|
-
When a user describes what they want an agent to do
|
|
6
|
-
1. Extract
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
-
|
|
10
|
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
|
|
14
|
-
-
|
|
15
|
-
|
|
16
|
-
-
|
|
17
|
-
-
|
|
18
|
-
-
|
|
19
|
-
-
|
|
20
|
-
|
|
5
|
+
When a user describes what they want an agent to do:
|
|
6
|
+
1. Extract core intent
|
|
7
|
+
- Identify the fundamental purpose, key responsibilities, and success criteria
|
|
8
|
+
- Consider both explicit requirements and implicit needs
|
|
9
|
+
- For code-review agents, **SHOULD** assume the user wants review of recently written code, not the whole codebase, unless explicitly stated otherwise
|
|
10
|
+
2. Design expert persona
|
|
11
|
+
- Create an identity with deep domain knowledge relevant to the task
|
|
12
|
+
- The persona should guide the agent's decision-making approach
|
|
13
|
+
3. Architect comprehensive instructions
|
|
14
|
+
- Establish clear behavioral boundaries and operational parameters
|
|
15
|
+
- Provide specific methodologies and best practices for task execution
|
|
16
|
+
- Anticipate edge cases and provide guidance for handling them
|
|
17
|
+
- Incorporate user-specific requirements or preferences
|
|
18
|
+
- Define output format expectations when relevant
|
|
19
|
+
- Align with project-specific coding standards and patterns from CLAUDE.md
|
|
20
|
+
4. Optimize for performance
|
|
21
|
+
- Include decision-making frameworks appropriate to the domain
|
|
22
|
+
- Include quality control mechanisms and self-verification steps
|
|
23
|
+
- Include efficient workflow patterns
|
|
24
|
+
- Include clear escalation or fallback strategies
|
|
25
|
+
5. Create identifier
|
|
21
26
|
- **MUST** use lowercase letters, numbers, and hyphens only
|
|
22
27
|
- **SHOULD** be 2-4 words joined by hyphens
|
|
23
28
|
- **MUST** clearly indicate the agent's primary function
|
|
24
29
|
- **SHOULD** be memorable and easy to type
|
|
25
30
|
- **MUST NOT** use generic terms like "helper" or "assistant"
|
|
26
|
-
6. Example agent descriptions
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
31
|
+
6. Example agent descriptions
|
|
32
|
+
- In the `whenToUse` field, **SHOULD** include examples of when this agent **SHOULD** be used
|
|
33
|
+
- Format examples as:
|
|
34
|
+
```
|
|
35
|
+
<example>
|
|
36
|
+
Context: The user is creating a test-runner agent that should be called after a logical chunk of code is written.
|
|
37
|
+
user: "Please write a function that checks if a number is prime"
|
|
38
|
+
assistant: "Here is the relevant function: "
|
|
39
|
+
<function call omitted for brevity only for this example>
|
|
40
|
+
<commentary>
|
|
41
|
+
Since a significant piece of code was written, use the {{TASK_TOOL_NAME}} tool to launch the test-runner agent to run the tests.
|
|
42
|
+
</commentary>
|
|
43
|
+
assistant: "Now let me use the test-runner agent to run the tests"
|
|
44
|
+
</example>
|
|
45
|
+
<example>
|
|
46
|
+
Context: User is creating an agent to respond to the word "hello" with a friendly joke.
|
|
47
|
+
user: "Hello"
|
|
48
|
+
assistant: "I'm going to use the {{TASK_TOOL_NAME}} tool to launch the greeting-responder agent to respond with a friendly joke"
|
|
49
|
+
<commentary>
|
|
50
|
+
Since the user is greeting, use the greeting-responder agent to respond with a friendly joke.
|
|
51
|
+
</commentary>
|
|
52
|
+
</example>
|
|
53
|
+
```
|
|
54
|
+
- If the user mentioned or implied proactive use, **SHOULD** include proactive examples
|
|
55
|
+
- **MUST** ensure examples show the assistant using the Agent tool, not responding directly
|
|
49
56
|
|
|
50
57
|
Your output **MUST** be a valid JSON object with exactly these fields:
|
|
58
|
+
|
|
59
|
+
```json
|
|
51
60
|
{
|
|
52
61
|
"identifier": "A unique, descriptive identifier using lowercase letters, numbers, and hyphens (e.g., 'test-runner', 'api-docs-writer', 'code-formatter')",
|
|
53
|
-
"whenToUse": "A precise, actionable description starting with 'Use this agent when…' that clearly defines the triggering conditions and use cases.
|
|
62
|
+
"whenToUse": "A precise, actionable description starting with 'Use this agent when…' that clearly defines the triggering conditions and use cases. Include examples as described above.",
|
|
54
63
|
"systemPrompt": "The complete system prompt that will govern the agent's behavior, written in second person ('You are…', 'You will…') and structured for maximum clarity and effectiveness"
|
|
55
64
|
}
|
|
65
|
+
```
|
|
56
66
|
|
|
57
67
|
Key principles for your system prompts:
|
|
58
|
-
- **MUST** be specific
|
|
68
|
+
- **MUST** be specific, not generic — **MUST NOT** use vague instructions
|
|
59
69
|
- **SHOULD** include concrete examples when they would clarify behavior
|
|
60
70
|
- **MUST** balance comprehensiveness with clarity — every instruction **MUST** add value
|
|
61
|
-
- **MUST** ensure the agent has enough context to handle variations
|
|
71
|
+
- **MUST** ensure the agent has enough context to handle task variations
|
|
62
72
|
- **MUST** make the agent proactive in seeking clarification when needed
|
|
63
73
|
- **MUST** build in quality assurance and self-correction mechanisms
|
|
64
74
|
|
|
@@ -29,9 +29,8 @@ Main branch: {{git.mainBranch}}
|
|
|
29
29
|
</project>
|
|
30
30
|
{{/ifAny}}
|
|
31
31
|
{{#if skills.length}}
|
|
32
|
-
Skills are specialized knowledge.
|
|
33
|
-
|
|
34
|
-
If a skill covers your output, you **MUST** read `skill://<name>` before proceeding.
|
|
32
|
+
Skills are specialized knowledge. Scan descriptions for your task domain.
|
|
33
|
+
If a skill applies, you **MUST** read `skill://<name>` before proceeding.
|
|
35
34
|
<skills>
|
|
36
35
|
{{#list skills join="\n"}}
|
|
37
36
|
<skill name="{{name}}">
|
|
@@ -46,8 +45,7 @@ If a skill covers your output, you **MUST** read `skill://<name>` before proceed
|
|
|
46
45
|
{{/each}}
|
|
47
46
|
{{/if}}
|
|
48
47
|
{{#if rules.length}}
|
|
49
|
-
Rules are local constraints.
|
|
50
|
-
You **MUST** read `rule://<name>` when working in that domain.
|
|
48
|
+
Rules are local constraints. You **MUST** read `rule://<name>` when working in that domain.
|
|
51
49
|
<rules>
|
|
52
50
|
{{#list rules join="\n"}}
|
|
53
51
|
<rule name="{{name}}">
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
<system-reminder>
|
|
2
|
-
Before
|
|
2
|
+
Before substantive work, create a phased todo.
|
|
3
3
|
|
|
4
4
|
You **MUST** call `todo_write` first in this turn.
|
|
5
5
|
You **MUST** initialize the todo list with a single `init` op.
|
|
6
6
|
You **MUST** cover the entire request from investigation through implementation and verification — not just the next immediate step.
|
|
7
|
-
|
|
7
|
+
Task descriptions **MUST** be specific. A future turn **MUST** execute them without re-planning.
|
|
8
8
|
You **MUST** keep task `content` to a short label (5-10 words). Put file paths, implementation steps, and specifics in `details`.
|
|
9
9
|
You **MUST** keep exactly one task `in_progress` and all later tasks `pending`.
|
|
10
10
|
|
|
11
|
-
After
|
|
12
|
-
Do not
|
|
11
|
+
After `todo_write` succeeds, continue the request in the same turn.
|
|
12
|
+
Do not call `todo_write` again unless task state materially changed.
|
|
13
13
|
</system-reminder>
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
<critical>
|
|
2
|
-
Write a
|
|
2
|
+
Write a handoff document for another instance of yourself.
|
|
3
3
|
The handoff **MUST** be sufficient for seamless continuation without access to this conversation.
|
|
4
4
|
Output ONLY the handoff document. No preamble, no commentary, no wrapper text.
|
|
5
5
|
</critical>
|
|
6
6
|
|
|
7
7
|
<instruction>
|
|
8
8
|
Capture exact technical state, not abstractions.
|
|
9
|
-
|
|
9
|
+
- File paths, symbol names, commands run
|
|
10
|
+
- Test results, observed failures
|
|
11
|
+
- Decisions made
|
|
12
|
+
- Partial work affecting the next step
|
|
10
13
|
</instruction>
|
|
11
14
|
|
|
12
15
|
<output>
|
|
@@ -32,8 +35,8 @@ Use exactly this structure:
|
|
|
32
35
|
- **[Decision]**: [Rationale]
|
|
33
36
|
|
|
34
37
|
## Critical Context
|
|
35
|
-
-
|
|
36
|
-
-
|
|
38
|
+
- Code snippets, file paths, function/type names, error messages, data essential to continue
|
|
39
|
+
- Repository state if relevant
|
|
37
40
|
|
|
38
41
|
## Next Steps
|
|
39
42
|
1. [What should happen next]
|