ag-cortex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/.agent/commands/test-browser.md +339 -0
  2. package/.agent/rules/00-constitution.md +46 -0
  3. package/.agent/rules/project-rules.md +49 -0
  4. package/.agent/skills/agent-browser/SKILL.md +223 -0
  5. package/.agent/skills/agent-native-architecture/SKILL.md +435 -0
  6. package/.agent/skills/agent-native-architecture/references/action-parity-discipline.md +409 -0
  7. package/.agent/skills/agent-native-architecture/references/agent-execution-patterns.md +467 -0
  8. package/.agent/skills/agent-native-architecture/references/agent-native-testing.md +582 -0
  9. package/.agent/skills/agent-native-architecture/references/architecture-patterns.md +478 -0
  10. package/.agent/skills/agent-native-architecture/references/dynamic-context-injection.md +338 -0
  11. package/.agent/skills/agent-native-architecture/references/files-universal-interface.md +301 -0
  12. package/.agent/skills/agent-native-architecture/references/from-primitives-to-domain-tools.md +359 -0
  13. package/.agent/skills/agent-native-architecture/references/mcp-tool-design.md +506 -0
  14. package/.agent/skills/agent-native-architecture/references/mobile-patterns.md +871 -0
  15. package/.agent/skills/agent-native-architecture/references/product-implications.md +443 -0
  16. package/.agent/skills/agent-native-architecture/references/refactoring-to-prompt-native.md +317 -0
  17. package/.agent/skills/agent-native-architecture/references/self-modification.md +269 -0
  18. package/.agent/skills/agent-native-architecture/references/shared-workspace-architecture.md +680 -0
  19. package/.agent/skills/agent-native-architecture/references/system-prompt-design.md +250 -0
  20. package/.agent/skills/agent-native-reviewer/SKILL.md +246 -0
  21. package/.agent/skills/andrew-kane-gem-writer/SKILL.md +184 -0
  22. package/.agent/skills/andrew-kane-gem-writer/references/database-adapters.md +231 -0
  23. package/.agent/skills/andrew-kane-gem-writer/references/module-organization.md +121 -0
  24. package/.agent/skills/andrew-kane-gem-writer/references/rails-integration.md +183 -0
  25. package/.agent/skills/andrew-kane-gem-writer/references/resources.md +119 -0
  26. package/.agent/skills/andrew-kane-gem-writer/references/testing-patterns.md +261 -0
  27. package/.agent/skills/ankane-readme-writer/SKILL.md +50 -0
  28. package/.agent/skills/architecture-strategist/SKILL.md +52 -0
  29. package/.agent/skills/best-practices-researcher/SKILL.md +100 -0
  30. package/.agent/skills/bug-reproduction-validator/SKILL.md +67 -0
  31. package/.agent/skills/code-simplicity-reviewer/SKILL.md +85 -0
  32. package/.agent/skills/coding-tutor/.claude-plugin/plugin.json +9 -0
  33. package/.agent/skills/coding-tutor/README.md +37 -0
  34. package/.agent/skills/coding-tutor/commands/quiz-me.md +1 -0
  35. package/.agent/skills/coding-tutor/commands/sync-tutorials.md +25 -0
  36. package/.agent/skills/coding-tutor/commands/teach-me.md +1 -0
  37. package/.agent/skills/coding-tutor/skills/coding-tutor/SKILL.md +214 -0
  38. package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/create_tutorial.py +202 -0
  39. package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/index_tutorials.py +203 -0
  40. package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/quiz_priority.py +190 -0
  41. package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/setup_tutorials.py +132 -0
  42. package/.agent/skills/compound-docs/SKILL.md +510 -0
  43. package/.agent/skills/compound-docs/assets/critical-pattern-template.md +34 -0
  44. package/.agent/skills/compound-docs/assets/resolution-template.md +93 -0
  45. package/.agent/skills/compound-docs/references/yaml-schema.md +65 -0
  46. package/.agent/skills/compound-docs/schema.yaml +176 -0
  47. package/.agent/skills/create-agent-skills/SKILL.md +299 -0
  48. package/.agent/skills/create-agent-skills/references/api-security.md +226 -0
  49. package/.agent/skills/create-agent-skills/references/be-clear-and-direct.md +531 -0
  50. package/.agent/skills/create-agent-skills/references/best-practices.md +404 -0
  51. package/.agent/skills/create-agent-skills/references/common-patterns.md +595 -0
  52. package/.agent/skills/create-agent-skills/references/core-principles.md +437 -0
  53. package/.agent/skills/create-agent-skills/references/executable-code.md +175 -0
  54. package/.agent/skills/create-agent-skills/references/iteration-and-testing.md +474 -0
  55. package/.agent/skills/create-agent-skills/references/official-spec.md +185 -0
  56. package/.agent/skills/create-agent-skills/references/recommended-structure.md +168 -0
  57. package/.agent/skills/create-agent-skills/references/skill-structure.md +372 -0
  58. package/.agent/skills/create-agent-skills/references/using-scripts.md +113 -0
  59. package/.agent/skills/create-agent-skills/references/using-templates.md +112 -0
  60. package/.agent/skills/create-agent-skills/references/workflows-and-validation.md +510 -0
  61. package/.agent/skills/create-agent-skills/templates/router-skill.md +73 -0
  62. package/.agent/skills/create-agent-skills/templates/simple-skill.md +33 -0
  63. package/.agent/skills/create-agent-skills/workflows/add-reference.md +96 -0
  64. package/.agent/skills/create-agent-skills/workflows/add-script.md +93 -0
  65. package/.agent/skills/create-agent-skills/workflows/add-template.md +74 -0
  66. package/.agent/skills/create-agent-skills/workflows/add-workflow.md +120 -0
  67. package/.agent/skills/create-agent-skills/workflows/audit-skill.md +138 -0
  68. package/.agent/skills/create-agent-skills/workflows/create-domain-expertise-skill.md +605 -0
  69. package/.agent/skills/create-agent-skills/workflows/create-new-skill.md +191 -0
  70. package/.agent/skills/create-agent-skills/workflows/get-guidance.md +121 -0
  71. package/.agent/skills/create-agent-skills/workflows/upgrade-to-router.md +161 -0
  72. package/.agent/skills/create-agent-skills/workflows/verify-skill.md +204 -0
  73. package/.agent/skills/data-integrity-guardian/SKILL.md +70 -0
  74. package/.agent/skills/data-migration-expert/SKILL.md +97 -0
  75. package/.agent/skills/deployment-verification-agent/SKILL.md +159 -0
  76. package/.agent/skills/design-implementation-reviewer/SKILL.md +85 -0
  77. package/.agent/skills/design-iterator/SKILL.md +197 -0
  78. package/.agent/skills/dhh-rails-reviewer/SKILL.md +45 -0
  79. package/.agent/skills/dhh-rails-style/SKILL.md +184 -0
  80. package/.agent/skills/dhh-rails-style/references/architecture.md +653 -0
  81. package/.agent/skills/dhh-rails-style/references/controllers.md +303 -0
  82. package/.agent/skills/dhh-rails-style/references/frontend.md +510 -0
  83. package/.agent/skills/dhh-rails-style/references/gems.md +266 -0
  84. package/.agent/skills/dhh-rails-style/references/models.md +359 -0
  85. package/.agent/skills/dhh-rails-style/references/testing.md +338 -0
  86. package/.agent/skills/dspy-ruby/SKILL.md +594 -0
  87. package/.agent/skills/dspy-ruby/assets/config-template.rb +359 -0
  88. package/.agent/skills/dspy-ruby/assets/module-template.rb +326 -0
  89. package/.agent/skills/dspy-ruby/assets/signature-template.rb +143 -0
  90. package/.agent/skills/dspy-ruby/references/core-concepts.md +265 -0
  91. package/.agent/skills/dspy-ruby/references/optimization.md +623 -0
  92. package/.agent/skills/dspy-ruby/references/providers.md +305 -0
  93. package/.agent/skills/every-style-editor/SKILL.md +134 -0
  94. package/.agent/skills/every-style-editor/references/EVERY_WRITE_STYLE.md +529 -0
  95. package/.agent/skills/figma-design-sync/SKILL.md +166 -0
  96. package/.agent/skills/file-todos/SKILL.md +251 -0
  97. package/.agent/skills/file-todos/assets/todo-template.md +155 -0
  98. package/.agent/skills/framework-docs-researcher/SKILL.md +83 -0
  99. package/.agent/skills/frontend-design/SKILL.md +42 -0
  100. package/.agent/skills/gemini-imagegen/SKILL.md +237 -0
  101. package/.agent/skills/gemini-imagegen/requirements.txt +2 -0
  102. package/.agent/skills/gemini-imagegen/scripts/compose_images.py +168 -0
  103. package/.agent/skills/gemini-imagegen/scripts/edit_image.py +157 -0
  104. package/.agent/skills/gemini-imagegen/scripts/gemini_images.py +265 -0
  105. package/.agent/skills/gemini-imagegen/scripts/generate_image.py +147 -0
  106. package/.agent/skills/gemini-imagegen/scripts/multi_turn_chat.py +215 -0
  107. package/.agent/skills/git-history-analyzer/SKILL.md +42 -0
  108. package/.agent/skills/git-worktree/SKILL.md +302 -0
  109. package/.agent/skills/git-worktree/scripts/worktree-manager.sh +345 -0
  110. package/.agent/skills/julik-frontend-races-reviewer/SKILL.md +222 -0
  111. package/.agent/skills/kieran-python-reviewer/SKILL.md +104 -0
  112. package/.agent/skills/kieran-rails-reviewer/SKILL.md +86 -0
  113. package/.agent/skills/kieran-typescript-reviewer/SKILL.md +95 -0
  114. package/.agent/skills/lint/SKILL.md +16 -0
  115. package/.agent/skills/pattern-recognition-specialist/SKILL.md +57 -0
  116. package/.agent/skills/performance-oracle/SKILL.md +110 -0
  117. package/.agent/skills/pr-comment-resolver/SKILL.md +69 -0
  118. package/.agent/skills/rclone/SKILL.md +150 -0
  119. package/.agent/skills/rclone/scripts/check_setup.sh +60 -0
  120. package/.agent/skills/repo-research-analyst/SKILL.md +113 -0
  121. package/.agent/skills/security-sentinel/SKILL.md +93 -0
  122. package/.agent/skills/skill-creator/SKILL.md +209 -0
  123. package/.agent/skills/skill-creator/scripts/init_skill.py +304 -0
  124. package/.agent/skills/skill-creator/scripts/package_skill.py +112 -0
  125. package/.agent/skills/skill-creator/scripts/quick_validate.py +72 -0
  126. package/.agent/skills/spec-flow-analyzer/SKILL.md +113 -0
  127. package/.agent/skills/test-agent/SKILL.md +4 -0
  128. package/.agent/workflows/agent-native-audit.md +277 -0
  129. package/.agent/workflows/ask-user-question.md +21 -0
  130. package/.agent/workflows/changelog.md +137 -0
  131. package/.agent/workflows/compound.md +202 -0
  132. package/.agent/workflows/create-agent-skill.md +8 -0
  133. package/.agent/workflows/deepen-plan-research.md +334 -0
  134. package/.agent/workflows/deepen-plan-synthesis.md +182 -0
  135. package/.agent/workflows/deepen-plan.md +79 -0
  136. package/.agent/workflows/feature-video.md +342 -0
  137. package/.agent/workflows/generate-command.md +162 -0
  138. package/.agent/workflows/heal-skill.md +142 -0
  139. package/.agent/workflows/lfg.md +20 -0
  140. package/.agent/workflows/plan-analysis.md +67 -0
  141. package/.agent/workflows/plan-next-steps.md +63 -0
  142. package/.agent/workflows/plan-review.md +33 -0
  143. package/.agent/workflows/plan-synthesis.md +106 -0
  144. package/.agent/workflows/plan.md +49 -0
  145. package/.agent/workflows/report-bug.md +150 -0
  146. package/.agent/workflows/reproduce-bug.md +99 -0
  147. package/.agent/workflows/resolve-parallel.md +34 -0
  148. package/.agent/workflows/resolve-pr-parallel.md +49 -0
  149. package/.agent/workflows/resolve-todo-parallel.md +35 -0
  150. package/.agent/workflows/review-analysis.md +145 -0
  151. package/.agent/workflows/review-synthesis.md +262 -0
  152. package/.agent/workflows/review.md +64 -0
  153. package/.agent/workflows/ship.md +90 -0
  154. package/.agent/workflows/test-command.md +3 -0
  155. package/.agent/workflows/triage.md +310 -0
  156. package/.agent/workflows/work.md +157 -0
  157. package/.agent/workflows/xcode-test.md +332 -0
  158. package/LICENSE +22 -0
  159. package/README.md +49 -0
  160. package/bin/ag-cortex.js +54 -0
  161. package/lib/core.js +165 -0
  162. package/package.json +31 -0
@@ -0,0 +1,317 @@
1
+ <overview>
2
+ How to refactor existing agent code to follow prompt-native principles. The goal: move behavior from code into prompts, and simplify tools into primitives.
3
+ </overview>
4
+
5
+ <diagnosis>
6
+ ## Diagnosing Non-Prompt-Native Code
7
+
8
+ Signs your agent isn't prompt-native:
9
+
10
+ **Tools that encode workflows:**
11
+ ```typescript
12
+ // RED FLAG: Tool contains business logic
13
+ tool("process_feedback", async ({ message }) => {
14
+ const category = categorize(message); // Logic in code
15
+ const priority = calculatePriority(message); // Logic in code
16
+ await store(message, category, priority); // Orchestration in code
17
+ if (priority > 3) await notify(); // Decision in code
18
+ });
19
+ ```
20
+
21
+ **Agent calls functions instead of figuring things out:**
22
+ ```typescript
23
+ // RED FLAG: Agent is just a function caller
24
+ "Use process_feedback to handle incoming messages"
25
+ // vs.
26
+ "When feedback comes in, decide importance, store it, notify if high"
27
+ ```
28
+
29
+ **Artificial limits on agent capability:**
30
+ ```typescript
31
+ // RED FLAG: Tool prevents agent from doing what users can do
32
+ tool("read_file", async ({ path }) => {
33
+ if (!ALLOWED_PATHS.includes(path)) {
34
+ throw new Error("Not allowed to read this file");
35
+ }
36
+ return readFile(path);
37
+ });
38
+ ```
39
+
40
+ **Prompts that specify HOW instead of WHAT:**
41
+ ```markdown
42
+ // RED FLAG: Micromanaging the agent
43
+ When creating a summary:
44
+ 1. Use exactly 3 bullet points
45
+ 2. Each bullet must be under 20 words
46
+ 3. Format with em-dashes for sub-points
47
+ 4. Bold the first word of each bullet
48
+ ```
49
+ </diagnosis>
50
+
51
+ <refactoring_workflow>
52
+ ## Step-by-Step Refactoring
53
+
54
+ **Step 1: Identify workflow tools**
55
+
56
+ List all your tools. Mark any that:
57
+ - Have business logic (categorize, calculate, decide)
58
+ - Orchestrate multiple operations
59
+ - Make decisions on behalf of the agent
60
+ - Contain conditional logic (if/else based on content)
61
+
62
+ **Step 2: Extract the primitives**
63
+
64
+ For each workflow tool, identify the underlying primitives:
65
+
66
+ | Workflow Tool | Hidden Primitives |
67
+ |---------------|-------------------|
68
+ | `process_feedback` | `store_item`, `send_message` |
69
+ | `generate_report` | `read_file`, `write_file` |
70
+ | `deploy_and_notify` | `git_push`, `send_message` |
71
+
72
+ **Step 3: Move behavior to the prompt**
73
+
74
+ Take the logic from your workflow tools and express it in natural language:
75
+
76
+ ```typescript
77
+ // Before (in code):
78
+ async function processFeedback(message) {
79
+ const priority = message.includes("crash") ? 5 :
80
+ message.includes("bug") ? 4 : 3;
81
+ await store(message, priority);
82
+ if (priority >= 4) await notify();
83
+ }
84
+ ```
85
+
86
+ ```markdown
87
+ // After (in prompt):
88
+ ## Feedback Processing
89
+
90
+ When someone shares feedback:
91
+ 1. Rate importance 1-5:
92
+ - 5: Crashes, data loss, security issues
93
+ - 4: Bug reports with clear reproduction steps
94
+ - 3: General suggestions, minor issues
95
+ 2. Store using store_item
96
+ 3. If importance >= 4, notify the team
97
+
98
+ Use your judgment. Context matters more than keywords.
99
+ ```
100
+
101
+ **Step 4: Simplify tools to primitives**
102
+
103
+ ```typescript
104
+ // Before: 1 workflow tool
105
+ tool("process_feedback", { message, category, priority }, ...complex logic...)
106
+
107
+ // After: 2 primitive tools
108
+ tool("store_item", { key: z.string(), value: z.any() }, ...simple storage...)
109
+ tool("send_message", { channel: z.string(), content: z.string() }, ...simple send...)
110
+ ```
111
+
112
+ **Step 5: Remove artificial limits**
113
+
114
+ ```typescript
115
+ // Before: Limited capability
116
+ tool("read_file", async ({ path }) => {
117
+ if (!isAllowed(path)) throw new Error("Forbidden");
118
+ return readFile(path);
119
+ });
120
+
121
+ // After: Full capability
122
+ tool("read_file", async ({ path }) => {
123
+ return readFile(path); // Agent can read anything
124
+ });
125
+ // Use approval gates for WRITES, not artificial limits on READS
126
+ ```
127
+
128
+ **Step 6: Test with outcomes, not procedures**
129
+
130
+ Instead of testing "does it call the right function?", test "does it achieve the outcome?"
131
+
132
+ ```typescript
133
+ // Before: Testing procedure
134
+ expect(mockProcessFeedback).toHaveBeenCalledWith(...)
135
+
136
+ // After: Testing outcome
137
+ // Send feedback → Check it was stored with reasonable importance
138
+ // Send high-priority feedback → Check notification was sent
139
+ ```
140
+ </refactoring_workflow>
141
+
142
+ <before_after>
143
+ ## Before/After Examples
144
+
145
+ **Example 1: Feedback Processing**
146
+
147
+ Before:
148
+ ```typescript
149
+ tool("handle_feedback", async ({ message, author }) => {
150
+ const category = detectCategory(message);
151
+ const priority = calculatePriority(message, category);
152
+ const feedbackId = await db.feedback.insert({
153
+ id: generateId(),
154
+ author,
155
+ message,
156
+ category,
157
+ priority,
158
+ timestamp: new Date().toISOString(),
159
+ });
160
+
161
+ if (priority >= 4) {
162
+ await discord.send(ALERT_CHANNEL, `High priority feedback from ${author}`);
163
+ }
164
+
165
+ return { feedbackId, category, priority };
166
+ });
167
+ ```
168
+
169
+ After:
170
+ ```typescript
171
+ // Simple storage primitive
172
+ tool("store_feedback", async ({ item }) => {
173
+ await db.feedback.insert(item);
174
+ return { text: `Stored feedback ${item.id}` };
175
+ });
176
+
177
+ // Simple message primitive
178
+ tool("send_message", async ({ channel, content }) => {
179
+ await discord.send(channel, content);
180
+ return { text: "Sent" };
181
+ });
182
+ ```
183
+
184
+ System prompt:
185
+ ```markdown
186
+ ## Feedback Processing
187
+
188
+ When someone shares feedback:
189
+ 1. Generate a unique ID
190
+ 2. Rate importance 1-5 based on impact and urgency
191
+ 3. Store using store_feedback with the full item
192
+ 4. If importance >= 4, send a notification to the team channel
193
+
194
+ Importance guidelines:
195
+ - 5: Critical (crashes, data loss, security)
196
+ - 4: High (detailed bug reports, blocking issues)
197
+ - 3: Medium (suggestions, minor bugs)
198
+ - 2: Low (cosmetic, edge cases)
199
+ - 1: Minimal (off-topic, duplicates)
200
+ ```
201
+
202
+ **Example 2: Report Generation**
203
+
204
+ Before:
205
+ ```typescript
206
+ tool("generate_weekly_report", async ({ startDate, endDate, format }) => {
207
+ const data = await fetchMetrics(startDate, endDate);
208
+ const summary = summarizeMetrics(data);
209
+ const charts = generateCharts(data);
210
+
211
+ if (format === "html") {
212
+ return renderHtmlReport(summary, charts);
213
+ } else if (format === "markdown") {
214
+ return renderMarkdownReport(summary, charts);
215
+ } else {
216
+ return renderPdfReport(summary, charts);
217
+ }
218
+ });
219
+ ```
220
+
221
+ After:
222
+ ```typescript
223
+ tool("query_metrics", async ({ start, end }) => {
224
+ const data = await db.metrics.query({ start, end });
225
+ return { text: JSON.stringify(data, null, 2) };
226
+ });
227
+
228
+ tool("write_file", async ({ path, content }) => {
229
+ writeFileSync(path, content);
230
+ return { text: `Wrote ${path}` };
231
+ });
232
+ ```
233
+
234
+ System prompt:
235
+ ```markdown
236
+ ## Report Generation
237
+
238
+ When asked to generate a report:
239
+ 1. Query the relevant metrics using query_metrics
240
+ 2. Analyze the data and identify key trends
241
+ 3. Create a clear, well-formatted report
242
+ 4. Write it using write_file in the appropriate format
243
+
244
+ Use your judgment about format and structure. Make it useful.
245
+ ```
246
+ </before_after>
247
+
248
+ <common_challenges>
249
+ ## Common Refactoring Challenges
250
+
251
+ **"But the agent might make mistakes!"**
252
+
253
+ Yes, and you can iterate. Change the prompt to add guidance:
254
+ ```markdown
255
+ // Before
256
+ Rate importance 1-5.
257
+
258
+ // After (if agent keeps rating too high)
259
+ Rate importance 1-5. Be conservative—most feedback is 2-3.
260
+ Only use 4-5 for truly blocking or critical issues.
261
+ ```
262
+
263
+ **"The workflow is complex!"**
264
+
265
+ Complex workflows can still be expressed in prompts. The agent is smart.
266
+ ```markdown
267
+ When processing video feedback:
268
+ 1. Check if it's a Loom, YouTube, or direct link
269
+ 2. For YouTube, pass URL directly to video analysis
270
+ 3. For others, download first, then analyze
271
+ 4. Extract timestamped issues
272
+ 5. Rate based on issue density and severity
273
+ ```
274
+
275
+ **"We need deterministic behavior!"**
276
+
277
+ Some operations should stay in code. That's fine. Prompt-native isn't all-or-nothing.
278
+
279
+ Keep in code:
280
+ - Security validation
281
+ - Rate limiting
282
+ - Audit logging
283
+ - Exact format requirements
284
+
285
+ Move to prompts:
286
+ - Categorization decisions
287
+ - Priority judgments
288
+ - Content generation
289
+ - Workflow orchestration
290
+
291
+ **"What about testing?"**
292
+
293
+ Test outcomes, not procedures:
294
+ - "Given this input, does the agent achieve the right result?"
295
+ - "Does stored feedback have reasonable importance ratings?"
296
+ - "Are notifications sent for truly high-priority items?"
297
+ </common_challenges>
298
+
299
+ <checklist>
300
+ ## Refactoring Checklist
301
+
302
+ Diagnosis:
303
+ - [ ] Listed all tools with business logic
304
+ - [ ] Identified artificial limits on agent capability
305
+ - [ ] Found prompts that micromanage HOW
306
+
307
+ Refactoring:
308
+ - [ ] Extracted primitives from workflow tools
309
+ - [ ] Moved business logic to system prompt
310
+ - [ ] Removed artificial limits
311
+ - [ ] Simplified tool inputs to data, not decisions
312
+
313
+ Validation:
314
+ - [ ] Agent achieves same outcomes with primitives
315
+ - [ ] Behavior can be changed by editing prompts
316
+ - [ ] New features could be added without new tools
317
+ </checklist>
@@ -0,0 +1,269 @@
1
+ <overview>
2
+ Self-modification is the advanced tier of agent native engineering: agents that can evolve their own code, prompts, and behavior. Not required for every app, but a big part of the future.
3
+
4
+ This is the logical extension of "whatever the developer can do, the agent can do."
5
+ </overview>
6
+
7
+ <why_self_modification>
8
+ ## Why Self-Modification?
9
+
10
+ Traditional software is static—it does what you wrote, nothing more. Self-modifying agents can:
11
+
12
+ - **Fix their own bugs** - See an error, patch the code, restart
13
+ - **Add new capabilities** - User asks for something new, agent implements it
14
+ - **Evolve behavior** - Learn from feedback and adjust prompts
15
+ - **Deploy themselves** - Push code, trigger builds, restart
16
+
17
+ The agent becomes a living system that improves over time, not frozen code.
18
+ </why_self_modification>
19
+
20
+ <capabilities>
21
+ ## What Self-Modification Enables
22
+
23
+ **Code modification:**
24
+ - Read and understand source files
25
+ - Write fixes and new features
26
+ - Commit and push to version control
27
+ - Trigger builds and verify they pass
28
+
29
+ **Prompt evolution:**
30
+ - Edit the system prompt based on feedback
31
+ - Add new features as prompt sections
32
+ - Refine judgment criteria that aren't working
33
+
34
+ **Infrastructure control:**
35
+ - Pull latest code from upstream
36
+ - Merge from other branches/instances
37
+ - Restart after changes
38
+ - Roll back if something breaks
39
+
40
+ **Site/output generation:**
41
+ - Generate and maintain websites
42
+ - Create documentation
43
+ - Build dashboards from data
44
+ </capabilities>
45
+
46
+ <guardrails>
47
+ ## Required Guardrails
48
+
49
+ Self-modification is powerful. It needs safety mechanisms.
50
+
51
+ **Approval gates for code changes:**
52
+ ```typescript
53
+ tool("write_file", async ({ path, content }) => {
54
+ if (isCodeFile(path)) {
55
+ // Store for approval, don't apply immediately
56
+ pendingChanges.set(path, content);
57
+ const diff = generateDiff(path, content);
58
+ return { text: `Requires approval:\n\n${diff}\n\nReply "yes" to apply.` };
59
+ }
60
+ // Non-code files apply immediately
61
+ writeFileSync(path, content);
62
+ return { text: `Wrote ${path}` };
63
+ });
64
+ ```
65
+
66
+ **Auto-commit before changes:**
67
+ ```typescript
68
+ tool("self_deploy", async () => {
69
+ // Save current state first
70
+ runGit("stash"); // or commit uncommitted changes
71
+
72
+ // Then pull/merge
73
+ runGit("fetch origin");
74
+ runGit("merge origin/main --no-edit");
75
+
76
+ // Build and verify
77
+ runCommand("npm run build");
78
+
79
+ // Only then restart
80
+ scheduleRestart();
81
+ });
82
+ ```
83
+
84
+ **Build verification:**
85
+ ```typescript
86
+ // Don't restart unless build passes
87
+ try {
88
+ runCommand("npm run build", { timeout: 120000 });
89
+ } catch (error) {
90
+ // Rollback the merge
91
+ runGit("merge --abort");
92
+ return { text: "Build failed, aborting deploy", isError: true };
93
+ }
94
+ ```
95
+
96
+ **Health checks after restart:**
97
+ ```typescript
98
+ tool("health_check", async () => {
99
+ const uptime = process.uptime();
100
+ const buildValid = existsSync("dist/index.js");
101
+ const gitClean = !runGit("status --porcelain");
102
+
103
+ return {
104
+ text: JSON.stringify({
105
+ status: "healthy",
106
+ uptime: `${Math.floor(uptime / 60)}m`,
107
+ build: buildValid ? "valid" : "missing",
108
+ git: gitClean ? "clean" : "uncommitted changes",
109
+ }, null, 2),
110
+ };
111
+ });
112
+ ```
113
+ </guardrails>
114
+
115
+ <git_architecture>
116
+ ## Git-Based Self-Modification
117
+
118
+ Use git as the foundation for self-modification. It provides:
119
+ - Version history (rollback capability)
120
+ - Branching (experiment safely)
121
+ - Merge (sync with other instances)
122
+ - Push/pull (deploy and collaborate)
123
+
124
+ **Essential git tools:**
125
+ ```typescript
126
+ tool("status", "Show git status", {}, ...);
127
+ tool("diff", "Show file changes", { path: z.string().optional() }, ...);
128
+ tool("log", "Show commit history", { count: z.number() }, ...);
129
+ tool("commit_code", "Commit code changes", { message: z.string() }, ...);
130
+ tool("git_push", "Push to GitHub", { branch: z.string().optional() }, ...);
131
+ tool("pull", "Pull from GitHub", { source: z.enum(["main", "instance"]) }, ...);
132
+ tool("rollback", "Revert recent commits", { commits: z.number() }, ...);
133
+ ```
134
+
135
+ **Multi-instance architecture:**
136
+ ```
137
+ main # Shared code
138
+ ├── instance/bot-a # Instance A's branch
139
+ ├── instance/bot-b # Instance B's branch
140
+ └── instance/bot-c # Instance C's branch
141
+ ```
142
+
143
+ Each instance can:
144
+ - Pull updates from main
145
+ - Push improvements back to main (via PR)
146
+ - Sync features from other instances
147
+ - Maintain instance-specific config
148
+ </git_architecture>
149
+
150
+ <prompt_evolution>
151
+ ## Self-Modifying Prompts
152
+
153
+ The system prompt is a file the agent can read and write.
154
+
155
+ ```typescript
156
+ // Agent can read its own prompt
157
+ tool("read_file", ...); // Can read src/prompts/system.md
158
+
159
+ // Agent can propose changes
160
+ tool("write_file", ...); // Can write to src/prompts/system.md (with approval)
161
+ ```
162
+
163
+ **System prompt as living document:**
164
+ ```markdown
165
+ ## Feedback Processing
166
+
167
+ When someone shares feedback:
168
+ 1. Acknowledge warmly
169
+ 2. Rate importance 1-5
170
+ 3. Store using feedback tools
171
+
172
+ <!-- Note to self: Video walkthroughs should always be 4-5,
173
+ learned this from Dan's feedback on 2024-12-07 -->
174
+ ```
175
+
176
+ The agent can:
177
+ - Add notes to itself
178
+ - Refine judgment criteria
179
+ - Add new feature sections
180
+ - Document edge cases it learned
181
+ </prompt_evolution>
182
+
183
+ <when_to_use>
184
+ ## When to Implement Self-Modification
185
+
186
+ **Good candidates:**
187
+ - Long-running autonomous agents
188
+ - Agents that need to adapt to feedback
189
+ - Systems where behavior evolution is valuable
190
+ - Internal tools where rapid iteration matters
191
+
192
+ **Not necessary for:**
193
+ - Simple single-task agents
194
+ - Highly regulated environments
195
+ - Systems where behavior must be auditable
196
+ - One-off or short-lived agents
197
+
198
+ Start with a non-self-modifying prompt-native agent. Add self-modification when you need it.
199
+ </when_to_use>
200
+
201
+ <example_tools>
202
+ ## Complete Self-Modification Toolset
203
+
204
+ ```typescript
205
+ const selfMcpServer = createSdkMcpServer({
206
+ name: "self",
207
+ version: "1.0.0",
208
+ tools: [
209
+ // FILE OPERATIONS
210
+ tool("read_file", "Read any project file", { path: z.string() }, ...),
211
+ tool("write_file", "Write a file (code requires approval)", { path, content }, ...),
212
+ tool("list_files", "List directory contents", { path: z.string() }, ...),
213
+ tool("search_code", "Search for patterns", { pattern: z.string() }, ...),
214
+
215
+ // APPROVAL WORKFLOW
216
+ tool("apply_pending", "Apply approved changes", {}, ...),
217
+ tool("get_pending", "Show pending changes", {}, ...),
218
+ tool("clear_pending", "Discard pending changes", {}, ...),
219
+
220
+ // RESTART
221
+ tool("restart", "Rebuild and restart", {}, ...),
222
+ tool("health_check", "Check if bot is healthy", {}, ...),
223
+ ],
224
+ });
225
+
226
+ const gitMcpServer = createSdkMcpServer({
227
+ name: "git",
228
+ version: "1.0.0",
229
+ tools: [
230
+ // STATUS
231
+ tool("status", "Show git status", {}, ...),
232
+ tool("diff", "Show changes", { path: z.string().optional() }, ...),
233
+ tool("log", "Show history", { count: z.number() }, ...),
234
+
235
+ // COMMIT & PUSH
236
+ tool("commit_code", "Commit code changes", { message: z.string() }, ...),
237
+ tool("git_push", "Push to GitHub", { branch: z.string().optional() }, ...),
238
+
239
+ // SYNC
240
+ tool("pull", "Pull from upstream", { source: z.enum(["main", "instance"]) }, ...),
241
+ tool("self_deploy", "Pull, build, restart", { source: z.enum(["main", "instance"]) }, ...),
242
+
243
+ // SAFETY
244
+ tool("rollback", "Revert commits", { commits: z.number() }, ...),
245
+ tool("health_check", "Detailed health report", {}, ...),
246
+ ],
247
+ });
248
+ ```
249
+ </example_tools>
250
+
251
+ <checklist>
252
+ ## Self-Modification Checklist
253
+
254
+ Before enabling self-modification:
255
+ - [ ] Git-based version control set up
256
+ - [ ] Approval gates for code changes
257
+ - [ ] Build verification before restart
258
+ - [ ] Rollback mechanism available
259
+ - [ ] Health check endpoint
260
+ - [ ] Instance identity configured
261
+
262
+ When implementing:
263
+ - [ ] Agent can read all project files
264
+ - [ ] Agent can write files (with appropriate approval)
265
+ - [ ] Agent can commit and push
266
+ - [ ] Agent can pull updates
267
+ - [ ] Agent can restart itself
268
+ - [ ] Agent can roll back if needed
269
+ </checklist>