ag-cortex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/commands/test-browser.md +339 -0
- package/.agent/rules/00-constitution.md +46 -0
- package/.agent/rules/project-rules.md +49 -0
- package/.agent/skills/agent-browser/SKILL.md +223 -0
- package/.agent/skills/agent-native-architecture/SKILL.md +435 -0
- package/.agent/skills/agent-native-architecture/references/action-parity-discipline.md +409 -0
- package/.agent/skills/agent-native-architecture/references/agent-execution-patterns.md +467 -0
- package/.agent/skills/agent-native-architecture/references/agent-native-testing.md +582 -0
- package/.agent/skills/agent-native-architecture/references/architecture-patterns.md +478 -0
- package/.agent/skills/agent-native-architecture/references/dynamic-context-injection.md +338 -0
- package/.agent/skills/agent-native-architecture/references/files-universal-interface.md +301 -0
- package/.agent/skills/agent-native-architecture/references/from-primitives-to-domain-tools.md +359 -0
- package/.agent/skills/agent-native-architecture/references/mcp-tool-design.md +506 -0
- package/.agent/skills/agent-native-architecture/references/mobile-patterns.md +871 -0
- package/.agent/skills/agent-native-architecture/references/product-implications.md +443 -0
- package/.agent/skills/agent-native-architecture/references/refactoring-to-prompt-native.md +317 -0
- package/.agent/skills/agent-native-architecture/references/self-modification.md +269 -0
- package/.agent/skills/agent-native-architecture/references/shared-workspace-architecture.md +680 -0
- package/.agent/skills/agent-native-architecture/references/system-prompt-design.md +250 -0
- package/.agent/skills/agent-native-reviewer/SKILL.md +246 -0
- package/.agent/skills/andrew-kane-gem-writer/SKILL.md +184 -0
- package/.agent/skills/andrew-kane-gem-writer/references/database-adapters.md +231 -0
- package/.agent/skills/andrew-kane-gem-writer/references/module-organization.md +121 -0
- package/.agent/skills/andrew-kane-gem-writer/references/rails-integration.md +183 -0
- package/.agent/skills/andrew-kane-gem-writer/references/resources.md +119 -0
- package/.agent/skills/andrew-kane-gem-writer/references/testing-patterns.md +261 -0
- package/.agent/skills/ankane-readme-writer/SKILL.md +50 -0
- package/.agent/skills/architecture-strategist/SKILL.md +52 -0
- package/.agent/skills/best-practices-researcher/SKILL.md +100 -0
- package/.agent/skills/bug-reproduction-validator/SKILL.md +67 -0
- package/.agent/skills/code-simplicity-reviewer/SKILL.md +85 -0
- package/.agent/skills/coding-tutor/.claude-plugin/plugin.json +9 -0
- package/.agent/skills/coding-tutor/README.md +37 -0
- package/.agent/skills/coding-tutor/commands/quiz-me.md +1 -0
- package/.agent/skills/coding-tutor/commands/sync-tutorials.md +25 -0
- package/.agent/skills/coding-tutor/commands/teach-me.md +1 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/SKILL.md +214 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/create_tutorial.py +202 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/index_tutorials.py +203 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/quiz_priority.py +190 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/setup_tutorials.py +132 -0
- package/.agent/skills/compound-docs/SKILL.md +510 -0
- package/.agent/skills/compound-docs/assets/critical-pattern-template.md +34 -0
- package/.agent/skills/compound-docs/assets/resolution-template.md +93 -0
- package/.agent/skills/compound-docs/references/yaml-schema.md +65 -0
- package/.agent/skills/compound-docs/schema.yaml +176 -0
- package/.agent/skills/create-agent-skills/SKILL.md +299 -0
- package/.agent/skills/create-agent-skills/references/api-security.md +226 -0
- package/.agent/skills/create-agent-skills/references/be-clear-and-direct.md +531 -0
- package/.agent/skills/create-agent-skills/references/best-practices.md +404 -0
- package/.agent/skills/create-agent-skills/references/common-patterns.md +595 -0
- package/.agent/skills/create-agent-skills/references/core-principles.md +437 -0
- package/.agent/skills/create-agent-skills/references/executable-code.md +175 -0
- package/.agent/skills/create-agent-skills/references/iteration-and-testing.md +474 -0
- package/.agent/skills/create-agent-skills/references/official-spec.md +185 -0
- package/.agent/skills/create-agent-skills/references/recommended-structure.md +168 -0
- package/.agent/skills/create-agent-skills/references/skill-structure.md +372 -0
- package/.agent/skills/create-agent-skills/references/using-scripts.md +113 -0
- package/.agent/skills/create-agent-skills/references/using-templates.md +112 -0
- package/.agent/skills/create-agent-skills/references/workflows-and-validation.md +510 -0
- package/.agent/skills/create-agent-skills/templates/router-skill.md +73 -0
- package/.agent/skills/create-agent-skills/templates/simple-skill.md +33 -0
- package/.agent/skills/create-agent-skills/workflows/add-reference.md +96 -0
- package/.agent/skills/create-agent-skills/workflows/add-script.md +93 -0
- package/.agent/skills/create-agent-skills/workflows/add-template.md +74 -0
- package/.agent/skills/create-agent-skills/workflows/add-workflow.md +120 -0
- package/.agent/skills/create-agent-skills/workflows/audit-skill.md +138 -0
- package/.agent/skills/create-agent-skills/workflows/create-domain-expertise-skill.md +605 -0
- package/.agent/skills/create-agent-skills/workflows/create-new-skill.md +191 -0
- package/.agent/skills/create-agent-skills/workflows/get-guidance.md +121 -0
- package/.agent/skills/create-agent-skills/workflows/upgrade-to-router.md +161 -0
- package/.agent/skills/create-agent-skills/workflows/verify-skill.md +204 -0
- package/.agent/skills/data-integrity-guardian/SKILL.md +70 -0
- package/.agent/skills/data-migration-expert/SKILL.md +97 -0
- package/.agent/skills/deployment-verification-agent/SKILL.md +159 -0
- package/.agent/skills/design-implementation-reviewer/SKILL.md +85 -0
- package/.agent/skills/design-iterator/SKILL.md +197 -0
- package/.agent/skills/dhh-rails-reviewer/SKILL.md +45 -0
- package/.agent/skills/dhh-rails-style/SKILL.md +184 -0
- package/.agent/skills/dhh-rails-style/references/architecture.md +653 -0
- package/.agent/skills/dhh-rails-style/references/controllers.md +303 -0
- package/.agent/skills/dhh-rails-style/references/frontend.md +510 -0
- package/.agent/skills/dhh-rails-style/references/gems.md +266 -0
- package/.agent/skills/dhh-rails-style/references/models.md +359 -0
- package/.agent/skills/dhh-rails-style/references/testing.md +338 -0
- package/.agent/skills/dspy-ruby/SKILL.md +594 -0
- package/.agent/skills/dspy-ruby/assets/config-template.rb +359 -0
- package/.agent/skills/dspy-ruby/assets/module-template.rb +326 -0
- package/.agent/skills/dspy-ruby/assets/signature-template.rb +143 -0
- package/.agent/skills/dspy-ruby/references/core-concepts.md +265 -0
- package/.agent/skills/dspy-ruby/references/optimization.md +623 -0
- package/.agent/skills/dspy-ruby/references/providers.md +305 -0
- package/.agent/skills/every-style-editor/SKILL.md +134 -0
- package/.agent/skills/every-style-editor/references/EVERY_WRITE_STYLE.md +529 -0
- package/.agent/skills/figma-design-sync/SKILL.md +166 -0
- package/.agent/skills/file-todos/SKILL.md +251 -0
- package/.agent/skills/file-todos/assets/todo-template.md +155 -0
- package/.agent/skills/framework-docs-researcher/SKILL.md +83 -0
- package/.agent/skills/frontend-design/SKILL.md +42 -0
- package/.agent/skills/gemini-imagegen/SKILL.md +237 -0
- package/.agent/skills/gemini-imagegen/requirements.txt +2 -0
- package/.agent/skills/gemini-imagegen/scripts/compose_images.py +168 -0
- package/.agent/skills/gemini-imagegen/scripts/edit_image.py +157 -0
- package/.agent/skills/gemini-imagegen/scripts/gemini_images.py +265 -0
- package/.agent/skills/gemini-imagegen/scripts/generate_image.py +147 -0
- package/.agent/skills/gemini-imagegen/scripts/multi_turn_chat.py +215 -0
- package/.agent/skills/git-history-analyzer/SKILL.md +42 -0
- package/.agent/skills/git-worktree/SKILL.md +302 -0
- package/.agent/skills/git-worktree/scripts/worktree-manager.sh +345 -0
- package/.agent/skills/julik-frontend-races-reviewer/SKILL.md +222 -0
- package/.agent/skills/kieran-python-reviewer/SKILL.md +104 -0
- package/.agent/skills/kieran-rails-reviewer/SKILL.md +86 -0
- package/.agent/skills/kieran-typescript-reviewer/SKILL.md +95 -0
- package/.agent/skills/lint/SKILL.md +16 -0
- package/.agent/skills/pattern-recognition-specialist/SKILL.md +57 -0
- package/.agent/skills/performance-oracle/SKILL.md +110 -0
- package/.agent/skills/pr-comment-resolver/SKILL.md +69 -0
- package/.agent/skills/rclone/SKILL.md +150 -0
- package/.agent/skills/rclone/scripts/check_setup.sh +60 -0
- package/.agent/skills/repo-research-analyst/SKILL.md +113 -0
- package/.agent/skills/security-sentinel/SKILL.md +93 -0
- package/.agent/skills/skill-creator/SKILL.md +209 -0
- package/.agent/skills/skill-creator/scripts/init_skill.py +304 -0
- package/.agent/skills/skill-creator/scripts/package_skill.py +112 -0
- package/.agent/skills/skill-creator/scripts/quick_validate.py +72 -0
- package/.agent/skills/spec-flow-analyzer/SKILL.md +113 -0
- package/.agent/skills/test-agent/SKILL.md +4 -0
- package/.agent/workflows/agent-native-audit.md +277 -0
- package/.agent/workflows/ask-user-question.md +21 -0
- package/.agent/workflows/changelog.md +137 -0
- package/.agent/workflows/compound.md +202 -0
- package/.agent/workflows/create-agent-skill.md +8 -0
- package/.agent/workflows/deepen-plan-research.md +334 -0
- package/.agent/workflows/deepen-plan-synthesis.md +182 -0
- package/.agent/workflows/deepen-plan.md +79 -0
- package/.agent/workflows/feature-video.md +342 -0
- package/.agent/workflows/generate-command.md +162 -0
- package/.agent/workflows/heal-skill.md +142 -0
- package/.agent/workflows/lfg.md +20 -0
- package/.agent/workflows/plan-analysis.md +67 -0
- package/.agent/workflows/plan-next-steps.md +63 -0
- package/.agent/workflows/plan-review.md +33 -0
- package/.agent/workflows/plan-synthesis.md +106 -0
- package/.agent/workflows/plan.md +49 -0
- package/.agent/workflows/report-bug.md +150 -0
- package/.agent/workflows/reproduce-bug.md +99 -0
- package/.agent/workflows/resolve-parallel.md +34 -0
- package/.agent/workflows/resolve-pr-parallel.md +49 -0
- package/.agent/workflows/resolve-todo-parallel.md +35 -0
- package/.agent/workflows/review-analysis.md +145 -0
- package/.agent/workflows/review-synthesis.md +262 -0
- package/.agent/workflows/review.md +64 -0
- package/.agent/workflows/ship.md +90 -0
- package/.agent/workflows/test-command.md +3 -0
- package/.agent/workflows/triage.md +310 -0
- package/.agent/workflows/work.md +157 -0
- package/.agent/workflows/xcode-test.md +332 -0
- package/LICENSE +22 -0
- package/README.md +49 -0
- package/bin/ag-cortex.js +54 -0
- package/lib/core.js +165 -0
- package/package.json +31 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
<overview>
|
|
2
|
+
How to refactor existing agent code to follow prompt-native principles. The goal: move behavior from code into prompts, and simplify tools into primitives.
|
|
3
|
+
</overview>
|
|
4
|
+
|
|
5
|
+
<diagnosis>
|
|
6
|
+
## Diagnosing Non-Prompt-Native Code
|
|
7
|
+
|
|
8
|
+
Signs your agent isn't prompt-native:
|
|
9
|
+
|
|
10
|
+
**Tools that encode workflows:**
|
|
11
|
+
```typescript
|
|
12
|
+
// RED FLAG: Tool contains business logic
|
|
13
|
+
tool("process_feedback", async ({ message }) => {
|
|
14
|
+
const category = categorize(message); // Logic in code
|
|
15
|
+
const priority = calculatePriority(message); // Logic in code
|
|
16
|
+
await store(message, category, priority); // Orchestration in code
|
|
17
|
+
if (priority > 3) await notify(); // Decision in code
|
|
18
|
+
});
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**Agent calls functions instead of figuring things out:**
|
|
22
|
+
```typescript
|
|
23
|
+
// RED FLAG: Agent is just a function caller
|
|
24
|
+
"Use process_feedback to handle incoming messages"
|
|
25
|
+
// vs.
|
|
26
|
+
"When feedback comes in, decide importance, store it, notify if high"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
**Artificial limits on agent capability:**
|
|
30
|
+
```typescript
|
|
31
|
+
// RED FLAG: Tool prevents agent from doing what users can do
|
|
32
|
+
tool("read_file", async ({ path }) => {
|
|
33
|
+
if (!ALLOWED_PATHS.includes(path)) {
|
|
34
|
+
throw new Error("Not allowed to read this file");
|
|
35
|
+
}
|
|
36
|
+
return readFile(path);
|
|
37
|
+
});
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
**Prompts that specify HOW instead of WHAT:**
|
|
41
|
+
```markdown
|
|
42
|
+
// RED FLAG: Micromanaging the agent
|
|
43
|
+
When creating a summary:
|
|
44
|
+
1. Use exactly 3 bullet points
|
|
45
|
+
2. Each bullet must be under 20 words
|
|
46
|
+
3. Format with em-dashes for sub-points
|
|
47
|
+
4. Bold the first word of each bullet
|
|
48
|
+
```
|
|
49
|
+
</diagnosis>
|
|
50
|
+
|
|
51
|
+
<refactoring_workflow>
|
|
52
|
+
## Step-by-Step Refactoring
|
|
53
|
+
|
|
54
|
+
**Step 1: Identify workflow tools**
|
|
55
|
+
|
|
56
|
+
List all your tools. Mark any that:
|
|
57
|
+
- Have business logic (categorize, calculate, decide)
|
|
58
|
+
- Orchestrate multiple operations
|
|
59
|
+
- Make decisions on behalf of the agent
|
|
60
|
+
- Contain conditional logic (if/else based on content)
|
|
61
|
+
|
|
62
|
+
**Step 2: Extract the primitives**
|
|
63
|
+
|
|
64
|
+
For each workflow tool, identify the underlying primitives:
|
|
65
|
+
|
|
66
|
+
| Workflow Tool | Hidden Primitives |
|
|
67
|
+
|---------------|-------------------|
|
|
68
|
+
| `process_feedback` | `store_item`, `send_message` |
|
|
69
|
+
| `generate_report` | `read_file`, `write_file` |
|
|
70
|
+
| `deploy_and_notify` | `git_push`, `send_message` |
|
|
71
|
+
|
|
72
|
+
**Step 3: Move behavior to the prompt**
|
|
73
|
+
|
|
74
|
+
Take the logic from your workflow tools and express it in natural language:
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
// Before (in code):
|
|
78
|
+
async function processFeedback(message) {
|
|
79
|
+
const priority = message.includes("crash") ? 5 :
|
|
80
|
+
message.includes("bug") ? 4 : 3;
|
|
81
|
+
await store(message, priority);
|
|
82
|
+
if (priority >= 4) await notify();
|
|
83
|
+
}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
```markdown
|
|
87
|
+
// After (in prompt):
|
|
88
|
+
## Feedback Processing
|
|
89
|
+
|
|
90
|
+
When someone shares feedback:
|
|
91
|
+
1. Rate importance 1-5:
|
|
92
|
+
- 5: Crashes, data loss, security issues
|
|
93
|
+
- 4: Bug reports with clear reproduction steps
|
|
94
|
+
- 3: General suggestions, minor issues
|
|
95
|
+
2. Store using store_item
|
|
96
|
+
3. If importance >= 4, notify the team
|
|
97
|
+
|
|
98
|
+
Use your judgment. Context matters more than keywords.
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Step 4: Simplify tools to primitives**
|
|
102
|
+
|
|
103
|
+
```typescript
|
|
104
|
+
// Before: 1 workflow tool
|
|
105
|
+
tool("process_feedback", { message, category, priority }, ...complex logic...)
|
|
106
|
+
|
|
107
|
+
// After: 2 primitive tools
|
|
108
|
+
tool("store_item", { key: z.string(), value: z.any() }, ...simple storage...)
|
|
109
|
+
tool("send_message", { channel: z.string(), content: z.string() }, ...simple send...)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**Step 5: Remove artificial limits**
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
// Before: Limited capability
|
|
116
|
+
tool("read_file", async ({ path }) => {
|
|
117
|
+
if (!isAllowed(path)) throw new Error("Forbidden");
|
|
118
|
+
return readFile(path);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// After: Full capability
|
|
122
|
+
tool("read_file", async ({ path }) => {
|
|
123
|
+
return readFile(path); // Agent can read anything
|
|
124
|
+
});
|
|
125
|
+
// Use approval gates for WRITES, not artificial limits on READS
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Step 6: Test with outcomes, not procedures**
|
|
129
|
+
|
|
130
|
+
Instead of testing "does it call the right function?", test "does it achieve the outcome?"
|
|
131
|
+
|
|
132
|
+
```typescript
|
|
133
|
+
// Before: Testing procedure
|
|
134
|
+
expect(mockProcessFeedback).toHaveBeenCalledWith(...)
|
|
135
|
+
|
|
136
|
+
// After: Testing outcome
|
|
137
|
+
// Send feedback → Check it was stored with reasonable importance
|
|
138
|
+
// Send high-priority feedback → Check notification was sent
|
|
139
|
+
```
|
|
140
|
+
</refactoring_workflow>
|
|
141
|
+
|
|
142
|
+
<before_after>
|
|
143
|
+
## Before/After Examples
|
|
144
|
+
|
|
145
|
+
**Example 1: Feedback Processing**
|
|
146
|
+
|
|
147
|
+
Before:
|
|
148
|
+
```typescript
|
|
149
|
+
tool("handle_feedback", async ({ message, author }) => {
|
|
150
|
+
const category = detectCategory(message);
|
|
151
|
+
const priority = calculatePriority(message, category);
|
|
152
|
+
const feedbackId = await db.feedback.insert({
|
|
153
|
+
id: generateId(),
|
|
154
|
+
author,
|
|
155
|
+
message,
|
|
156
|
+
category,
|
|
157
|
+
priority,
|
|
158
|
+
timestamp: new Date().toISOString(),
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
if (priority >= 4) {
|
|
162
|
+
await discord.send(ALERT_CHANNEL, `High priority feedback from ${author}`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return { feedbackId, category, priority };
|
|
166
|
+
});
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
After:
|
|
170
|
+
```typescript
|
|
171
|
+
// Simple storage primitive
|
|
172
|
+
tool("store_feedback", async ({ item }) => {
|
|
173
|
+
await db.feedback.insert(item);
|
|
174
|
+
return { text: `Stored feedback ${item.id}` };
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
// Simple message primitive
|
|
178
|
+
tool("send_message", async ({ channel, content }) => {
|
|
179
|
+
await discord.send(channel, content);
|
|
180
|
+
return { text: "Sent" };
|
|
181
|
+
});
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
System prompt:
|
|
185
|
+
```markdown
|
|
186
|
+
## Feedback Processing
|
|
187
|
+
|
|
188
|
+
When someone shares feedback:
|
|
189
|
+
1. Generate a unique ID
|
|
190
|
+
2. Rate importance 1-5 based on impact and urgency
|
|
191
|
+
3. Store using store_feedback with the full item
|
|
192
|
+
4. If importance >= 4, send a notification to the team channel
|
|
193
|
+
|
|
194
|
+
Importance guidelines:
|
|
195
|
+
- 5: Critical (crashes, data loss, security)
|
|
196
|
+
- 4: High (detailed bug reports, blocking issues)
|
|
197
|
+
- 3: Medium (suggestions, minor bugs)
|
|
198
|
+
- 2: Low (cosmetic, edge cases)
|
|
199
|
+
- 1: Minimal (off-topic, duplicates)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
**Example 2: Report Generation**
|
|
203
|
+
|
|
204
|
+
Before:
|
|
205
|
+
```typescript
|
|
206
|
+
tool("generate_weekly_report", async ({ startDate, endDate, format }) => {
|
|
207
|
+
const data = await fetchMetrics(startDate, endDate);
|
|
208
|
+
const summary = summarizeMetrics(data);
|
|
209
|
+
const charts = generateCharts(data);
|
|
210
|
+
|
|
211
|
+
if (format === "html") {
|
|
212
|
+
return renderHtmlReport(summary, charts);
|
|
213
|
+
} else if (format === "markdown") {
|
|
214
|
+
return renderMarkdownReport(summary, charts);
|
|
215
|
+
} else {
|
|
216
|
+
return renderPdfReport(summary, charts);
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
After:
|
|
222
|
+
```typescript
|
|
223
|
+
tool("query_metrics", async ({ start, end }) => {
|
|
224
|
+
const data = await db.metrics.query({ start, end });
|
|
225
|
+
return { text: JSON.stringify(data, null, 2) };
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
tool("write_file", async ({ path, content }) => {
|
|
229
|
+
writeFileSync(path, content);
|
|
230
|
+
return { text: `Wrote ${path}` };
|
|
231
|
+
});
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
System prompt:
|
|
235
|
+
```markdown
|
|
236
|
+
## Report Generation
|
|
237
|
+
|
|
238
|
+
When asked to generate a report:
|
|
239
|
+
1. Query the relevant metrics using query_metrics
|
|
240
|
+
2. Analyze the data and identify key trends
|
|
241
|
+
3. Create a clear, well-formatted report
|
|
242
|
+
4. Write it using write_file in the appropriate format
|
|
243
|
+
|
|
244
|
+
Use your judgment about format and structure. Make it useful.
|
|
245
|
+
```
|
|
246
|
+
</before_after>
|
|
247
|
+
|
|
248
|
+
<common_challenges>
|
|
249
|
+
## Common Refactoring Challenges
|
|
250
|
+
|
|
251
|
+
**"But the agent might make mistakes!"**
|
|
252
|
+
|
|
253
|
+
Yes, and you can iterate. Change the prompt to add guidance:
|
|
254
|
+
```markdown
|
|
255
|
+
// Before
|
|
256
|
+
Rate importance 1-5.
|
|
257
|
+
|
|
258
|
+
// After (if agent keeps rating too high)
|
|
259
|
+
Rate importance 1-5. Be conservative—most feedback is 2-3.
|
|
260
|
+
Only use 4-5 for truly blocking or critical issues.
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
**"The workflow is complex!"**
|
|
264
|
+
|
|
265
|
+
Complex workflows can still be expressed in prompts. The agent is smart.
|
|
266
|
+
```markdown
|
|
267
|
+
When processing video feedback:
|
|
268
|
+
1. Check if it's a Loom, YouTube, or direct link
|
|
269
|
+
2. For YouTube, pass URL directly to video analysis
|
|
270
|
+
3. For others, download first, then analyze
|
|
271
|
+
4. Extract timestamped issues
|
|
272
|
+
5. Rate based on issue density and severity
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
**"We need deterministic behavior!"**
|
|
276
|
+
|
|
277
|
+
Some operations should stay in code. That's fine. Prompt-native isn't all-or-nothing.
|
|
278
|
+
|
|
279
|
+
Keep in code:
|
|
280
|
+
- Security validation
|
|
281
|
+
- Rate limiting
|
|
282
|
+
- Audit logging
|
|
283
|
+
- Exact format requirements
|
|
284
|
+
|
|
285
|
+
Move to prompts:
|
|
286
|
+
- Categorization decisions
|
|
287
|
+
- Priority judgments
|
|
288
|
+
- Content generation
|
|
289
|
+
- Workflow orchestration
|
|
290
|
+
|
|
291
|
+
**"What about testing?"**
|
|
292
|
+
|
|
293
|
+
Test outcomes, not procedures:
|
|
294
|
+
- "Given this input, does the agent achieve the right result?"
|
|
295
|
+
- "Does stored feedback have reasonable importance ratings?"
|
|
296
|
+
- "Are notifications sent for truly high-priority items?"
|
|
297
|
+
</common_challenges>
|
|
298
|
+
|
|
299
|
+
<checklist>
|
|
300
|
+
## Refactoring Checklist
|
|
301
|
+
|
|
302
|
+
Diagnosis:
|
|
303
|
+
- [ ] Listed all tools with business logic
|
|
304
|
+
- [ ] Identified artificial limits on agent capability
|
|
305
|
+
- [ ] Found prompts that micromanage HOW
|
|
306
|
+
|
|
307
|
+
Refactoring:
|
|
308
|
+
- [ ] Extracted primitives from workflow tools
|
|
309
|
+
- [ ] Moved business logic to system prompt
|
|
310
|
+
- [ ] Removed artificial limits
|
|
311
|
+
- [ ] Simplified tool inputs to data, not decisions
|
|
312
|
+
|
|
313
|
+
Validation:
|
|
314
|
+
- [ ] Agent achieves same outcomes with primitives
|
|
315
|
+
- [ ] Behavior can be changed by editing prompts
|
|
316
|
+
- [ ] New features could be added without new tools
|
|
317
|
+
</checklist>
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
<overview>
|
|
2
|
+
Self-modification is the advanced tier of agent native engineering: agents that can evolve their own code, prompts, and behavior. Not required for every app, but a big part of the future.
|
|
3
|
+
|
|
4
|
+
This is the logical extension of "whatever the developer can do, the agent can do."
|
|
5
|
+
</overview>
|
|
6
|
+
|
|
7
|
+
<why_self_modification>
|
|
8
|
+
## Why Self-Modification?
|
|
9
|
+
|
|
10
|
+
Traditional software is static—it does what you wrote, nothing more. Self-modifying agents can:
|
|
11
|
+
|
|
12
|
+
- **Fix their own bugs** - See an error, patch the code, restart
|
|
13
|
+
- **Add new capabilities** - User asks for something new, agent implements it
|
|
14
|
+
- **Evolve behavior** - Learn from feedback and adjust prompts
|
|
15
|
+
- **Deploy themselves** - Push code, trigger builds, restart
|
|
16
|
+
|
|
17
|
+
The agent becomes a living system that improves over time, not frozen code.
|
|
18
|
+
</why_self_modification>
|
|
19
|
+
|
|
20
|
+
<capabilities>
|
|
21
|
+
## What Self-Modification Enables
|
|
22
|
+
|
|
23
|
+
**Code modification:**
|
|
24
|
+
- Read and understand source files
|
|
25
|
+
- Write fixes and new features
|
|
26
|
+
- Commit and push to version control
|
|
27
|
+
- Trigger builds and verify they pass
|
|
28
|
+
|
|
29
|
+
**Prompt evolution:**
|
|
30
|
+
- Edit the system prompt based on feedback
|
|
31
|
+
- Add new features as prompt sections
|
|
32
|
+
- Refine judgment criteria that aren't working
|
|
33
|
+
|
|
34
|
+
**Infrastructure control:**
|
|
35
|
+
- Pull latest code from upstream
|
|
36
|
+
- Merge from other branches/instances
|
|
37
|
+
- Restart after changes
|
|
38
|
+
- Roll back if something breaks
|
|
39
|
+
|
|
40
|
+
**Site/output generation:**
|
|
41
|
+
- Generate and maintain websites
|
|
42
|
+
- Create documentation
|
|
43
|
+
- Build dashboards from data
|
|
44
|
+
</capabilities>
|
|
45
|
+
|
|
46
|
+
<guardrails>
|
|
47
|
+
## Required Guardrails
|
|
48
|
+
|
|
49
|
+
Self-modification is powerful. It needs safety mechanisms.
|
|
50
|
+
|
|
51
|
+
**Approval gates for code changes:**
|
|
52
|
+
```typescript
|
|
53
|
+
tool("write_file", async ({ path, content }) => {
|
|
54
|
+
if (isCodeFile(path)) {
|
|
55
|
+
// Store for approval, don't apply immediately
|
|
56
|
+
pendingChanges.set(path, content);
|
|
57
|
+
const diff = generateDiff(path, content);
|
|
58
|
+
return { text: `Requires approval:\n\n${diff}\n\nReply "yes" to apply.` };
|
|
59
|
+
}
|
|
60
|
+
// Non-code files apply immediately
|
|
61
|
+
writeFileSync(path, content);
|
|
62
|
+
return { text: `Wrote ${path}` };
|
|
63
|
+
});
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Auto-commit before changes:**
|
|
67
|
+
```typescript
|
|
68
|
+
tool("self_deploy", async () => {
|
|
69
|
+
// Save current state first
|
|
70
|
+
runGit("stash"); // or commit uncommitted changes
|
|
71
|
+
|
|
72
|
+
// Then pull/merge
|
|
73
|
+
runGit("fetch origin");
|
|
74
|
+
runGit("merge origin/main --no-edit");
|
|
75
|
+
|
|
76
|
+
// Build and verify
|
|
77
|
+
runCommand("npm run build");
|
|
78
|
+
|
|
79
|
+
// Only then restart
|
|
80
|
+
scheduleRestart();
|
|
81
|
+
});
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Build verification:**
|
|
85
|
+
```typescript
|
|
86
|
+
// Don't restart unless build passes
|
|
87
|
+
try {
|
|
88
|
+
runCommand("npm run build", { timeout: 120000 });
|
|
89
|
+
} catch (error) {
|
|
90
|
+
// Rollback the merge
|
|
91
|
+
runGit("merge --abort");
|
|
92
|
+
return { text: "Build failed, aborting deploy", isError: true };
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**Health checks after restart:**
|
|
97
|
+
```typescript
|
|
98
|
+
tool("health_check", async () => {
|
|
99
|
+
const uptime = process.uptime();
|
|
100
|
+
const buildValid = existsSync("dist/index.js");
|
|
101
|
+
const gitClean = !runGit("status --porcelain");
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
text: JSON.stringify({
|
|
105
|
+
status: "healthy",
|
|
106
|
+
uptime: `${Math.floor(uptime / 60)}m`,
|
|
107
|
+
build: buildValid ? "valid" : "missing",
|
|
108
|
+
git: gitClean ? "clean" : "uncommitted changes",
|
|
109
|
+
}, null, 2),
|
|
110
|
+
};
|
|
111
|
+
});
|
|
112
|
+
```
|
|
113
|
+
</guardrails>
|
|
114
|
+
|
|
115
|
+
<git_architecture>
|
|
116
|
+
## Git-Based Self-Modification
|
|
117
|
+
|
|
118
|
+
Use git as the foundation for self-modification. It provides:
|
|
119
|
+
- Version history (rollback capability)
|
|
120
|
+
- Branching (experiment safely)
|
|
121
|
+
- Merge (sync with other instances)
|
|
122
|
+
- Push/pull (deploy and collaborate)
|
|
123
|
+
|
|
124
|
+
**Essential git tools:**
|
|
125
|
+
```typescript
|
|
126
|
+
tool("status", "Show git status", {}, ...);
|
|
127
|
+
tool("diff", "Show file changes", { path: z.string().optional() }, ...);
|
|
128
|
+
tool("log", "Show commit history", { count: z.number() }, ...);
|
|
129
|
+
tool("commit_code", "Commit code changes", { message: z.string() }, ...);
|
|
130
|
+
tool("git_push", "Push to GitHub", { branch: z.string().optional() }, ...);
|
|
131
|
+
tool("pull", "Pull from GitHub", { source: z.enum(["main", "instance"]) }, ...);
|
|
132
|
+
tool("rollback", "Revert recent commits", { commits: z.number() }, ...);
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Multi-instance architecture:**
|
|
136
|
+
```
|
|
137
|
+
main # Shared code
|
|
138
|
+
├── instance/bot-a # Instance A's branch
|
|
139
|
+
├── instance/bot-b # Instance B's branch
|
|
140
|
+
└── instance/bot-c # Instance C's branch
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Each instance can:
|
|
144
|
+
- Pull updates from main
|
|
145
|
+
- Push improvements back to main (via PR)
|
|
146
|
+
- Sync features from other instances
|
|
147
|
+
- Maintain instance-specific config
|
|
148
|
+
</git_architecture>
|
|
149
|
+
|
|
150
|
+
<prompt_evolution>
|
|
151
|
+
## Self-Modifying Prompts
|
|
152
|
+
|
|
153
|
+
The system prompt is a file the agent can read and write.
|
|
154
|
+
|
|
155
|
+
```typescript
|
|
156
|
+
// Agent can read its own prompt
|
|
157
|
+
tool("read_file", ...); // Can read src/prompts/system.md
|
|
158
|
+
|
|
159
|
+
// Agent can propose changes
|
|
160
|
+
tool("write_file", ...); // Can write to src/prompts/system.md (with approval)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**System prompt as living document:**
|
|
164
|
+
```markdown
|
|
165
|
+
## Feedback Processing
|
|
166
|
+
|
|
167
|
+
When someone shares feedback:
|
|
168
|
+
1. Acknowledge warmly
|
|
169
|
+
2. Rate importance 1-5
|
|
170
|
+
3. Store using feedback tools
|
|
171
|
+
|
|
172
|
+
<!-- Note to self: Video walkthroughs should always be 4-5,
|
|
173
|
+
learned this from Dan's feedback on 2024-12-07 -->
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
The agent can:
|
|
177
|
+
- Add notes to itself
|
|
178
|
+
- Refine judgment criteria
|
|
179
|
+
- Add new feature sections
|
|
180
|
+
- Document edge cases it learned
|
|
181
|
+
</prompt_evolution>
|
|
182
|
+
|
|
183
|
+
<when_to_use>
|
|
184
|
+
## When to Implement Self-Modification
|
|
185
|
+
|
|
186
|
+
**Good candidates:**
|
|
187
|
+
- Long-running autonomous agents
|
|
188
|
+
- Agents that need to adapt to feedback
|
|
189
|
+
- Systems where behavior evolution is valuable
|
|
190
|
+
- Internal tools where rapid iteration matters
|
|
191
|
+
|
|
192
|
+
**Not necessary for:**
|
|
193
|
+
- Simple single-task agents
|
|
194
|
+
- Highly regulated environments
|
|
195
|
+
- Systems where behavior must be auditable
|
|
196
|
+
- One-off or short-lived agents
|
|
197
|
+
|
|
198
|
+
Start with a non-self-modifying prompt-native agent. Add self-modification when you need it.
|
|
199
|
+
</when_to_use>
|
|
200
|
+
|
|
201
|
+
<example_tools>
|
|
202
|
+
## Complete Self-Modification Toolset
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
const selfMcpServer = createSdkMcpServer({
|
|
206
|
+
name: "self",
|
|
207
|
+
version: "1.0.0",
|
|
208
|
+
tools: [
|
|
209
|
+
// FILE OPERATIONS
|
|
210
|
+
tool("read_file", "Read any project file", { path: z.string() }, ...),
|
|
211
|
+
tool("write_file", "Write a file (code requires approval)", { path, content }, ...),
|
|
212
|
+
tool("list_files", "List directory contents", { path: z.string() }, ...),
|
|
213
|
+
tool("search_code", "Search for patterns", { pattern: z.string() }, ...),
|
|
214
|
+
|
|
215
|
+
// APPROVAL WORKFLOW
|
|
216
|
+
tool("apply_pending", "Apply approved changes", {}, ...),
|
|
217
|
+
tool("get_pending", "Show pending changes", {}, ...),
|
|
218
|
+
tool("clear_pending", "Discard pending changes", {}, ...),
|
|
219
|
+
|
|
220
|
+
// RESTART
|
|
221
|
+
tool("restart", "Rebuild and restart", {}, ...),
|
|
222
|
+
tool("health_check", "Check if bot is healthy", {}, ...),
|
|
223
|
+
],
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
const gitMcpServer = createSdkMcpServer({
|
|
227
|
+
name: "git",
|
|
228
|
+
version: "1.0.0",
|
|
229
|
+
tools: [
|
|
230
|
+
// STATUS
|
|
231
|
+
tool("status", "Show git status", {}, ...),
|
|
232
|
+
tool("diff", "Show changes", { path: z.string().optional() }, ...),
|
|
233
|
+
tool("log", "Show history", { count: z.number() }, ...),
|
|
234
|
+
|
|
235
|
+
// COMMIT & PUSH
|
|
236
|
+
tool("commit_code", "Commit code changes", { message: z.string() }, ...),
|
|
237
|
+
tool("git_push", "Push to GitHub", { branch: z.string().optional() }, ...),
|
|
238
|
+
|
|
239
|
+
// SYNC
|
|
240
|
+
tool("pull", "Pull from upstream", { source: z.enum(["main", "instance"]) }, ...),
|
|
241
|
+
tool("self_deploy", "Pull, build, restart", { source: z.enum(["main", "instance"]) }, ...),
|
|
242
|
+
|
|
243
|
+
// SAFETY
|
|
244
|
+
tool("rollback", "Revert commits", { commits: z.number() }, ...),
|
|
245
|
+
tool("health_check", "Detailed health report", {}, ...),
|
|
246
|
+
],
|
|
247
|
+
});
|
|
248
|
+
```
|
|
249
|
+
</example_tools>
|
|
250
|
+
|
|
251
|
+
<checklist>
|
|
252
|
+
## Self-Modification Checklist
|
|
253
|
+
|
|
254
|
+
Before enabling self-modification:
|
|
255
|
+
- [ ] Git-based version control set up
|
|
256
|
+
- [ ] Approval gates for code changes
|
|
257
|
+
- [ ] Build verification before restart
|
|
258
|
+
- [ ] Rollback mechanism available
|
|
259
|
+
- [ ] Health check endpoint
|
|
260
|
+
- [ ] Instance identity configured
|
|
261
|
+
|
|
262
|
+
When implementing:
|
|
263
|
+
- [ ] Agent can read all project files
|
|
264
|
+
- [ ] Agent can write files (with appropriate approval)
|
|
265
|
+
- [ ] Agent can commit and push
|
|
266
|
+
- [ ] Agent can pull updates
|
|
267
|
+
- [ ] Agent can restart itself
|
|
268
|
+
- [ ] Agent can roll back if needed
|
|
269
|
+
</checklist>
|