@codeharbor/agent-playbook 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +4 -2
  2. package/package.json +5 -2
  3. package/skills/api-designer/README.md +36 -0
  4. package/skills/api-designer/SKILL.md +232 -0
  5. package/skills/api-designer/references/graphql-patterns.md +12 -0
  6. package/skills/api-designer/references/rest-patterns.md +17 -0
  7. package/skills/api-designer/scripts/generate_api.py +87 -0
  8. package/skills/api-designer/scripts/validate_api.py +48 -0
  9. package/skills/api-documenter/README.md +41 -0
  10. package/skills/api-documenter/SKILL.md +209 -0
  11. package/skills/api-documenter/references/examples/README.md +3 -0
  12. package/skills/api-documenter/references/examples/openapi-example.yaml +10 -0
  13. package/skills/api-documenter/references/openapi-template.yaml +5 -0
  14. package/skills/api-documenter/scripts/generate_openapi.py +84 -0
  15. package/skills/api-documenter/scripts/validate_openapi.py +45 -0
  16. package/skills/architecting-solutions/README.md +22 -0
  17. package/skills/architecting-solutions/SKILL.md +459 -0
  18. package/skills/auto-trigger/README.md +23 -0
  19. package/skills/auto-trigger/SKILL.md +183 -0
  20. package/skills/code-reviewer/README.md +59 -0
  21. package/skills/code-reviewer/SKILL.md +220 -0
  22. package/skills/code-reviewer/references/checklist.md +80 -0
  23. package/skills/code-reviewer/references/patterns.md +226 -0
  24. package/skills/code-reviewer/references/security.md +88 -0
  25. package/skills/code-reviewer/scripts/review_checklist.py +191 -0
  26. package/skills/commit-helper/README.md +58 -0
  27. package/skills/commit-helper/SKILL.md +159 -0
  28. package/skills/commit-helper/references/conventional-commits.md +68 -0
  29. package/skills/commit-helper/references/examples.md +125 -0
  30. package/skills/commit-helper/references/scopes.md +49 -0
  31. package/skills/commit-helper/scripts/validate_commit.py +70 -0
  32. package/skills/create-pr/README.md +182 -0
  33. package/skills/create-pr/SKILL.md +340 -0
  34. package/skills/debugger/README.md +53 -0
  35. package/skills/debugger/SKILL.md +239 -0
  36. package/skills/debugger/references/checklist.md +7 -0
  37. package/skills/debugger/references/errors.md +6 -0
  38. package/skills/debugger/references/patterns.md +5 -0
  39. package/skills/debugger/scripts/debug_report.py +77 -0
  40. package/skills/deployment-engineer/README.md +40 -0
  41. package/skills/deployment-engineer/SKILL.md +242 -0
  42. package/skills/deployment-engineer/references/kubernetes.md +23 -0
  43. package/skills/deployment-engineer/references/monitoring.md +14 -0
  44. package/skills/deployment-engineer/references/pipelines.md +12 -0
  45. package/skills/deployment-engineer/scripts/generate_deploy.py +72 -0
  46. package/skills/deployment-engineer/scripts/validate_deploy.py +46 -0
  47. package/skills/documentation-engineer/README.md +41 -0
  48. package/skills/documentation-engineer/SKILL.md +164 -0
  49. package/skills/documentation-engineer/references/api-template.md +22 -0
  50. package/skills/documentation-engineer/references/readme-template.md +25 -0
  51. package/skills/documentation-engineer/references/style-guide.md +13 -0
  52. package/skills/documentation-engineer/scripts/generate_docs.py +68 -0
  53. package/skills/documentation-engineer/scripts/validate_docs.py +46 -0
  54. package/skills/figma-designer/README.md +222 -0
  55. package/skills/figma-designer/SKILL.md +407 -0
  56. package/skills/figma-designer/references/example-output.md +86 -0
  57. package/skills/performance-engineer/README.md +42 -0
  58. package/skills/performance-engineer/SKILL.md +236 -0
  59. package/skills/performance-engineer/references/checklist.md +6 -0
  60. package/skills/performance-engineer/references/monitoring.md +5 -0
  61. package/skills/performance-engineer/references/optimization.md +7 -0
  62. package/skills/performance-engineer/scripts/perf_report.py +64 -0
  63. package/skills/performance-engineer/scripts/profile.py +63 -0
  64. package/skills/planning-with-files/README.md +27 -0
  65. package/skills/planning-with-files/SKILL.md +103 -0
  66. package/skills/prd-implementation-precheck/README.md +97 -0
  67. package/skills/prd-implementation-precheck/SKILL.md +112 -0
  68. package/skills/prd-planner/README.md +102 -0
  69. package/skills/prd-planner/SKILL.md +449 -0
  70. package/skills/prd-planner/references/edge-case-analysis.md +111 -0
  71. package/skills/qa-expert/README.md +37 -0
  72. package/skills/qa-expert/SKILL.md +225 -0
  73. package/skills/qa-expert/references/gates.md +11 -0
  74. package/skills/qa-expert/references/metrics.md +6 -0
  75. package/skills/qa-expert/references/strategy.md +11 -0
  76. package/skills/qa-expert/scripts/coverage_analysis.py +61 -0
  77. package/skills/qa-expert/scripts/generate_test_plan.py +68 -0
  78. package/skills/refactoring-specialist/README.md +37 -0
  79. package/skills/refactoring-specialist/SKILL.md +283 -0
  80. package/skills/refactoring-specialist/references/checklist.md +6 -0
  81. package/skills/refactoring-specialist/references/smells.md +6 -0
  82. package/skills/refactoring-specialist/references/techniques.md +6 -0
  83. package/skills/security-auditor/README.md +48 -0
  84. package/skills/security-auditor/SKILL.md +256 -0
  85. package/skills/security-auditor/references/checklist.md +7 -0
  86. package/skills/security-auditor/references/owasp.md +12 -0
  87. package/skills/security-auditor/references/remediation.md +7 -0
  88. package/skills/security-auditor/scripts/find_secrets.py +58 -0
  89. package/skills/security-auditor/scripts/security_audit.py +64 -0
  90. package/skills/self-improving-agent/README.md +136 -0
  91. package/skills/self-improving-agent/SKILL.md +407 -0
  92. package/skills/self-improving-agent/hooks/post-bash.sh +10 -0
  93. package/skills/self-improving-agent/hooks/pre-tool.sh +10 -0
  94. package/skills/self-improving-agent/hooks/session-end.sh +4 -0
  95. package/skills/self-improving-agent/memory/semantic-patterns.json +288 -0
  96. package/skills/self-improving-agent/references/appendix.md +131 -0
  97. package/skills/self-improving-agent/templates/correction-template.md +11 -0
  98. package/skills/self-improving-agent/templates/pattern-template.md +15 -0
  99. package/skills/self-improving-agent/templates/validation-template.md +14 -0
  100. package/skills/session-logger/README.md +50 -0
  101. package/skills/session-logger/SKILL.md +156 -0
  102. package/skills/skill-router/README.md +155 -0
  103. package/skills/skill-router/SKILL.md +215 -0
  104. package/skills/test-automator/README.md +41 -0
  105. package/skills/test-automator/SKILL.md +202 -0
  106. package/skills/test-automator/references/best-practices.md +6 -0
  107. package/skills/test-automator/references/examples/README.md +3 -0
  108. package/skills/test-automator/references/examples/unit-test-example.md +8 -0
  109. package/skills/test-automator/references/mocking.md +5 -0
  110. package/skills/test-automator/scripts/coverage_report.py +59 -0
  111. package/skills/test-automator/scripts/generate_test.py +66 -0
  112. package/skills/workflow-orchestrator/README.md +20 -0
  113. package/skills/workflow-orchestrator/SKILL.md +342 -0
  114. package/src/cli.js +107 -20
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env python3
2
+ # Template generator for security audit.
3
+
4
+ from pathlib import Path
5
+ import argparse
6
+ import textwrap
7
+
8
+
9
+ def write_output(path: Path, content: str, force: bool) -> bool:
10
+ if path.exists() and not force:
11
+ print(f"{path} already exists (use --force to overwrite)")
12
+ return False
13
+ path.parent.mkdir(parents=True, exist_ok=True)
14
+ path.write_text(content, encoding="utf-8")
15
+ return True
16
+
17
+
18
+ def main() -> int:
19
+ parser = argparse.ArgumentParser(description="Generate a security audit report.")
20
+ parser.add_argument("--output", default="security-audit.md", help="Output file path")
21
+ parser.add_argument("--name", default="example", help="System or scope name")
22
+ parser.add_argument("--owner", default="team", help="Owning team")
23
+ parser.add_argument("--force", action="store_true", help="Overwrite existing file")
24
+ args = parser.parse_args()
25
+
26
+ content = textwrap.dedent(
27
+ f"""\
28
+ # Security Audit
29
+
30
+ ## Scope
31
+ {args.name}
32
+
33
+ ## Ownership
34
+ - Owner: {args.owner}
35
+ - Security contact: TBD
36
+
37
+ ## Threat Model
38
+ - Assets
39
+ - Entry points
40
+ - Trust boundaries
41
+
42
+ ## Findings
43
+ | Severity | Issue | Impact | Recommendation |
44
+ | --- | --- | --- | --- |
45
+ | High | TBD | TBD | TBD |
46
+
47
+ ## Remediation Plan
48
+ - Immediate fixes
49
+ - Long-term hardening
50
+
51
+ ## Evidence
52
+ - Logs, scans, and screenshots
53
+ """
54
+ ).strip() + "\n"
55
+
56
+ output = Path(args.output)
57
+ if not write_output(output, content, args.force):
58
+ return 1
59
+ print(f"Wrote {output}")
60
+ return 0
61
+
62
+
63
+ if __name__ == "__main__":
64
+ raise SystemExit(main())
@@ -0,0 +1,136 @@
1
+ # Self-Improving Agent
2
+
3
+ A universal self-improvement system that learns from ALL skill experiences and continuously updates the codebase.
4
+
5
+ ## Overview
6
+
7
+ This agent learns from **every skill interaction** to achieve true lifelong learning. It implements a complete feedback loop with multi-memory architecture, self-correction, and evolution markers.
8
+
9
+ ## Key Features
10
+
11
+ - **Multi-Memory Architecture**: Semantic + Episodic + Working memory
12
+ - **Universal Learning**: Learns from ALL skills, not just PRDs
13
+ - **Pattern Extraction**: Converts experiences into reusable patterns
14
+ - **Self-Correction**: Fixes skill guidance when errors occur
15
+ - **Self-Validation**: Periodically verifies skill accuracy
16
+ - **Automatic Updates**: Updates related skills based on learned patterns
17
+ - **Confidence Tracking**: Measures pattern reliability over time
18
+ - **Human-in-the-Loop**: Collects feedback to validate improvements
19
+
20
+ ## Memory System
21
+
22
+ ```
23
+ ~/.claude/memory/
24
+ ├── semantic/ # Patterns, rules, best practices
25
+ ├── episodic/ # Specific experiences and episodes
26
+ └── working/ # Current session context
27
+ ```
28
+
29
+ ## How It Works
30
+
31
+ ```
32
+ Any Skill Completes
33
+
34
+ Extract Experience → Identify Patterns → Update Skills → Consolidate Memory
35
+ ↓ ↓ ↓ ↓
36
+ What happened? What can we reuse? Which skills? Track metrics
37
+ ```
38
+
39
+ ## Installation
40
+
41
+ ```bash
42
+ ln -s ~/path/to/agent-playbook/skills/self-improving-agent ~/.claude/skills/self-improving-agent
43
+ ```
44
+
45
+ ## Hooks (Optional)
46
+
47
+ Wire hooks to capture errors and session-end signals:
48
+
49
+ ```json
50
+ {
51
+ "hooks": {
52
+ "PreToolUse": [
53
+ {
54
+ "matcher": "Bash|Write|Edit",
55
+ "hooks": [
56
+ { "type": "command", "command": "bash ${SKILLS_DIR}/self-improving-agent/hooks/pre-tool.sh \"$TOOL_NAME\" \"$TOOL_INPUT\"" }
57
+ ]
58
+ }
59
+ ],
60
+ "PostToolUse": [
61
+ {
62
+ "matcher": "Bash",
63
+ "hooks": [
64
+ { "type": "command", "command": "bash ${SKILLS_DIR}/self-improving-agent/hooks/post-bash.sh \"$TOOL_OUTPUT\" \"$EXIT_CODE\"" }
65
+ ]
66
+ }
67
+ ],
68
+ "Stop": [
69
+ {
70
+ "matcher": "",
71
+ "hooks": [
72
+ { "type": "command", "command": "bash ${SKILLS_DIR}/self-improving-agent/hooks/session-end.sh" }
73
+ ]
74
+ }
75
+ ]
76
+ }
77
+ }
78
+ ```
79
+
80
+ ## Triggering
81
+
82
+ ### Automatic
83
+ After ANY skill completes:
84
+ - prd-planner
85
+ - code-reviewer
86
+ - debugger
87
+ - refactoring-specialist
88
+ - etc.
89
+
90
+ ### Manual
91
+ ```
92
+ "自我进化"
93
+ "self-improve"
94
+ "分析今天的经验"
95
+ "总结这次教训"
96
+ ```
97
+
98
+ ## Example Learning
99
+
100
+ ### Episode
101
+ ```yaml
102
+ Skill: debugger
103
+ Situation: Form submission doesn't refresh data
104
+ Root Cause: Empty callback function
105
+ Pattern: Always verify callbacks have implementations
106
+ Confidence: 0.95 → Updates: debugger, prd-implementation-precheck
107
+ ```
108
+
109
+ ### Skill Update
110
+ ```markdown
111
+ ## Auto-Update (2025-01-11)
112
+
113
+ ### Pattern Added
114
+ **Callback Verification**: Always verify that callback functions
115
+ passed as props are not empty and actually execute logic.
116
+
117
+ **Source**: Episode ep-2025-01-11-003 (3 occurrences)
118
+ **Action**: Added to debugger checklist
119
+ ```
120
+
121
+ ## Research Basis
122
+
123
+ - [SimpleMem: Efficient Lifelong Memory](https://arxiv.org/html/2601.02553v1)
124
+ - [ACM Memory Mechanisms Survey](https://dl.acm.org/doi/10.1145/3748302)
125
+ - [Lifelong Learning of LLM Agents](https://arxiv.org/html/2501.07278v1)
126
+
127
+ ## Templates
128
+
129
+ Reusable templates live in `skills/self-improving-agent/templates`:
130
+ - `pattern-template.md`
131
+ - `correction-template.md`
132
+ - `validation-template.md`
133
+
134
+ ## License
135
+
136
+ MIT
@@ -0,0 +1,407 @@
1
+ ---
2
+ name: self-improving-agent
3
+ description: A universal self-improving agent that learns from ALL skill experiences. Uses multi-memory architecture (semantic + episodic + working) to continuously evolve the codebase. Auto-triggers on skill completion/error with hooks-based self-correction.
4
+ allowed-tools: Read, Write, Edit, Bash, Grep, Glob, WebSearch
5
+ hooks:
6
+ before_start:
7
+ - trigger: session-logger
8
+ mode: auto
9
+ context: "Start {skill_name}"
10
+ after_complete:
11
+ - trigger: create-pr
12
+ mode: ask_first
13
+ condition: skills_modified
14
+ reason: "Submit improvements to repository"
15
+ - trigger: session-logger
16
+ mode: auto
17
+ context: "Self-improvement cycle complete"
18
+ # Note: on_error intentionally only logs to session to avoid infinite recursion
19
+ # Self-correction is triggered by other skills (debugger, code-reviewer) completing their work
20
+ on_error:
21
+ - trigger: session-logger
22
+ mode: auto
23
+ context: "Error captured in {skill_name}"
24
+ ---
25
+
26
+ # Self-Improving Agent
27
+
28
+ > "An AI agent that learns from every interaction, accumulating patterns and insights to continuously improve its own capabilities." — Based on 2025 lifelong learning research
29
+
30
+ ## Overview
31
+
32
+ This is a **universal self-improvement system** that learns from ALL skill experiences, not just PRDs. It implements a complete feedback loop with:
33
+
34
+ - **Multi-Memory Architecture**: Semantic + Episodic + Working memory
35
+ - **Self-Correction**: Detects and fixes skill guidance errors
36
+ - **Self-Validation**: Periodically verifies skill accuracy
37
+ - **Hooks Integration**: Auto-triggers on skill events (before_start, after_complete, on_error)
38
+ - **Evolution Markers**: Traceable changes with source attribution
39
+
40
+ ## Research-Based Design
41
+
42
+ Based on 2025 research:
43
+
44
+ | Research | Key Insight | Application |
45
+ |----------|-------------|-------------|
46
+ | [SimpleMem](https://arxiv.org/html/2601.02553v1) | Efficient lifelong memory | Pattern accumulation system |
47
+ | [Multi-Memory Survey](https://dl.acm.org/doi/10.1145/3748302) | Semantic + Episodic memory | World knowledge + experiences |
48
+ | [Lifelong Learning](https://arxiv.org/html/2501.07278v1) | Continuous task stream learning | Learn from every skill use |
49
+ | [Evo-Memory](https://shothota.medium.com/evo-memory-deepminds-new-benchmark) | Test-time lifelong learning | Real-time adaptation |
50
+
51
+ ## The Self-Improvement Loop
52
+
53
+ ```
54
+ ┌─────────────────────────────────────────────────────────────────┐
55
+ │ UNIVERSAL SELF-IMPROVEMENT │
56
+ ├─────────────────────────────────────────────────────────────────┤
57
+ │ │
58
+ │ Skill Event → Extract Experience → Abstract Pattern → Update │
59
+ │ │ │ │ │ │
60
+ │ ▼ ▼ ▼ ▼ │
61
+ │ ┌─────────────────────────────────────────────────────┐ │
62
+ │ │ MULTI-MEMORY SYSTEM │ │
63
+ │ ├─────────────────────────────────────────────────────┤ │
64
+ │ │ Semantic Memory │ Episodic Memory │ Working Memory │ │
65
+ │ │ (Patterns/Rules) │ (Experiences) │ (Current) │ │
66
+ │ │ memory/semantic/ │ memory/episodic/ │ memory/working/│ │
67
+ │ └─────────────────────────────────────────────────────┘ │
68
+ │ │
69
+ │ ┌─────────────────────────────────────────────────────┐ │
70
+ │ │ FEEDBACK LOOP │ │
71
+ │ │ User Feedback → Confidence Update → Pattern Adapt │ │
72
+ │ └─────────────────────────────────────────────────────┘ │
73
+ │ │
74
+ └─────────────────────────────────────────────────────────────────┘
75
+ ```
76
+
77
+ ## When This Activates
78
+
79
+ ### Automatic Triggers (via hooks)
80
+
81
+ | Event | Trigger | Action |
82
+ |-------|---------|--------|
83
+ | **before_start** | Any skill starts | Log session start |
84
+ | **after_complete** | Any skill completes | Extract patterns, update skills |
85
+ | **on_error** | Bash returns non-zero exit | Capture error context, trigger self-correction |
86
+
87
+ ### Manual Triggers
88
+
89
+ - User says "自我进化", "self-improve", "从经验中学习"
90
+ - User says "分析今天的经验", "总结教训"
91
+ - User asks to improve a specific skill
92
+
93
+ ## Evolution Priority Matrix
94
+
95
+ Trigger evolution when new reusable knowledge appears:
96
+
97
+ | Trigger | Target Skill | Priority | Action |
98
+ |---------|--------------|----------|--------|
99
+ | New PRD pattern discovered | prd-planner | High | Add to quality checklist |
100
+ | Architecture tradeoff clarified | architecting-solutions | High | Add to decision patterns |
101
+ | API design rule learned | api-designer | High | Update template |
102
+ | Debugging fix discovered | debugger | High | Add to anti-patterns |
103
+ | Review checklist gap | code-reviewer | High | Add checklist item |
104
+ | Perf/security insight | performance-engineer, security-auditor | High | Add to patterns |
105
+ | UI/UX spec issue | prd-planner, architecting-solutions | High | Add visual spec requirements |
106
+ | React/state pattern | debugger, refactoring-specialist | Medium | Add to patterns |
107
+ | Test strategy improvement | test-automator, qa-expert | Medium | Update approach |
108
+ | CI/deploy fix | deployment-engineer | Medium | Add to troubleshooting |
109
+
110
+ ## Multi-Memory Architecture
111
+
112
+ ### 1. Semantic Memory (`memory/semantic-patterns.json`)
113
+
114
+ Stores **abstract patterns and rules** reusable across contexts:
115
+
116
+ ```json
117
+ {
118
+ "patterns": {
119
+ "pattern_id": {
120
+ "id": "pat-2025-01-11-001",
121
+ "name": "Pattern Name",
122
+ "source": "user_feedback|implementation_review|retrospective",
123
+ "confidence": 0.95,
124
+ "applications": 5,
125
+ "created": "2025-01-11",
126
+ "category": "prd_structure|react_patterns|async_patterns|...",
127
+ "pattern": "One-line summary",
128
+ "problem": "What problem does this solve?",
129
+ "solution": { ... },
130
+ "quality_rules": [ ... ],
131
+ "target_skills": [ ... ]
132
+ }
133
+ }
134
+ }
135
+ ```
136
+
137
+ ### 2. Episodic Memory (`memory/episodic/`)
138
+
139
+ Stores **specific experiences and what happened**:
140
+
141
+ ```
142
+ memory/episodic/
143
+ ├── 2025/
144
+ │ ├── 2025-01-11-prd-creation.json
145
+ │ ├── 2025-01-11-debug-session.json
146
+ │ └── 2025-01-12-refactoring.json
147
+ ```
148
+
149
+ ```json
150
+ {
151
+ "id": "ep-2025-01-11-001",
152
+ "timestamp": "2025-01-11T10:30:00Z",
153
+ "skill": "debugger",
154
+ "situation": "User reported data not refreshing after form submission",
155
+ "root_cause": "Empty callback in onRefresh prop",
156
+ "solution": "Implement actual refresh logic in callback",
157
+ "lesson": "Always verify callbacks are not empty functions",
158
+ "related_pattern": "callback_verification",
159
+ "user_feedback": {
160
+ "rating": 8,
161
+ "comments": "This was exactly the issue"
162
+ }
163
+ }
164
+ ```
165
+
166
+ ### 3. Working Memory (`memory/working/`)
167
+
168
+ Stores **current session context**:
169
+
170
+ ```
171
+ memory/working/
172
+ ├── current_session.json # Active session data
173
+ ├── last_error.json # Error context for self-correction
174
+ └── session_end.json # Session end marker
175
+ ```
176
+
177
+ ## Self-Improvement Process
178
+
179
+ ### Phase 1: Experience Extraction
180
+
181
+ After any skill completes, extract:
182
+
183
+ ```yaml
184
+ What happened:
185
+ skill_used: {which skill}
186
+ task: {what was being done}
187
+ outcome: {success|partial|failure}
188
+
189
+ Key Insights:
190
+ what_went_well: [what worked]
191
+ what_went_wrong: [what didn't work]
192
+ root_cause: {underlying issue if applicable}
193
+
194
+ User Feedback:
195
+ rating: {1-10 if provided}
196
+ comments: {specific feedback}
197
+ ```
198
+
199
+ ### Phase 2: Pattern Abstraction
200
+
201
+ Convert experiences to reusable patterns:
202
+
203
+ | Concrete Experience | Abstract Pattern | Target Skill |
204
+ |--------------------|------------------|--------------|
205
+ | "User forgot to save PRD notes" | "Always persist thinking to files" | prd-planner |
206
+ | "Code review missed SQL injection" | "Add security checklist item" | code-reviewer |
207
+ | "Callback was empty, didn't work" | "Verify callback implementations" | debugger |
208
+ | "Net APY position ambiguous" | "UI specs need exact relative positions" | prd-planner |
209
+
210
+ **Abstraction Rules:**
211
+
212
+ ```yaml
213
+ If experience_repeats 3+ times:
214
+ pattern_level: critical
215
+ action: Add to skill's "Critical Mistakes" section
216
+
217
+ If solution_was_effective:
218
+ pattern_level: best_practice
219
+ action: Add to skill's "Best Practices" section
220
+
221
+ If user_rating >= 7:
222
+ pattern_level: strength
223
+ action: Reinforce this approach
224
+
225
+ If user_rating <= 4:
226
+ pattern_level: weakness
227
+ action: Add to "What to Avoid" section
228
+ ```
229
+
230
+ ### Phase 3: Skill Updates
231
+
232
+ Update the appropriate skill files with **evolution markers**:
233
+
234
+ ```markdown
235
+ <!-- Evolution: 2025-01-12 | source: ep-2025-01-12-001 | skill: debugger -->
236
+
237
+ ## Pattern Added (2025-01-12)
238
+
239
+ **Pattern**: Always verify callbacks are not empty functions
240
+
241
+ **Source**: Episode ep-2025-01-12-001
242
+
243
+ **Confidence**: 0.95
244
+
245
+ ### Updated Checklist
246
+ - [ ] Verify all callbacks have implementations
247
+ - [ ] Test callback execution paths
248
+ ```
249
+
250
+ **Correction Markers** (when fixing wrong guidance):
251
+
252
+ ```markdown
253
+ <!-- Correction: 2025-01-12 | was: "Use callback chain" | reason: caused stale refresh -->
254
+
255
+ ## Corrected Guidance
256
+
257
+ Use direct state monitoring instead of callback chains:
258
+ ```typescript
259
+ // ✅ Do: Direct state monitoring
260
+ const prevPendingCount = usePrevious(pendingCount);
261
+ ```
262
+ ```
263
+
264
+ ### Phase 4: Memory Consolidation
265
+
266
+ 1. **Update semantic memory** (`memory/semantic-patterns.json`)
267
+ 2. **Store episodic memory** (`memory/episodic/YYYY-MM-DD-{skill}.json`)
268
+ 3. **Update pattern confidence** based on applications/feedback
269
+ 4. **Prune outdated patterns** (low confidence, no recent applications)
270
+
271
+ ## Self-Correction (on_error hook)
272
+
273
+ Triggered when:
274
+ - Bash command returns non-zero exit code
275
+ - Tests fail after following skill guidance
276
+ - User reports the guidance produced incorrect results
277
+
278
+ **Process:**
279
+
280
+ ```markdown
281
+ ## Self-Correction Workflow
282
+
283
+ 1. Detect Error
284
+ - Capture error context from working/last_error.json
285
+ - Identify which skill guidance was followed
286
+
287
+ 2. Verify Root Cause
288
+ - Was the skill guidance incorrect?
289
+ - Was the guidance misinterpreted?
290
+ - Was the guidance incomplete?
291
+
292
+ 3. Apply Correction
293
+ - Update skill file with corrected guidance
294
+ - Add correction marker with reason
295
+ - Update related patterns in semantic memory
296
+
297
+ 4. Validate Fix
298
+ - Test the corrected guidance
299
+ - Ask user to verify
300
+ ```
301
+
302
+ **Example:**
303
+
304
+ ```markdown
305
+ <!-- Correction: 2025-01-12 | was: "useMemo for claimable ids" | reason: stale data at click time -->
306
+
307
+ ## Self-Correction: Click-Time Computation
308
+
309
+ **Issue**: Using useMemo for claimable IDs caused stale data
310
+ **Fix**: Compute at click time for always-fresh data
311
+ **Pattern**: click_time_vs_open_time_computation
312
+ ```
313
+
314
+ ## Self-Validation
315
+
316
+ Use the validation template in `references/appendix.md` when reviewing updates.
317
+
318
+ ## Hooks Integration
319
+
320
+ ### Wiring Hooks in Claude Code Settings
321
+
322
+ Add to Claude Code settings (`~/.claude/settings.json`):
323
+
324
+ ```json
325
+ {
326
+ "hooks": {
327
+ "PreToolUse": [
328
+ {
329
+ "matcher": "Bash|Write|Edit",
330
+ "hooks": [
331
+ {
332
+ "type": "command",
333
+ "command": "bash ${SKILLS_DIR}/self-improving-agent/hooks/pre-tool.sh \"$TOOL_NAME\" \"$TOOL_INPUT\""
334
+ }
335
+ ]
336
+ }
337
+ ],
338
+ "PostToolUse": [
339
+ {
340
+ "matcher": "Bash",
341
+ "hooks": [
342
+ {
343
+ "type": "command",
344
+ "command": "bash ${SKILLS_DIR}/self-improving-agent/hooks/post-bash.sh \"$TOOL_OUTPUT\" \"$EXIT_CODE\""
345
+ }
346
+ ]
347
+ }
348
+ ],
349
+ "Stop": [
350
+ {
351
+ "matcher": "",
352
+ "hooks": [
353
+ {
354
+ "type": "command",
355
+ "command": "bash ${SKILLS_DIR}/self-improving-agent/hooks/session-end.sh"
356
+ }
357
+ ]
358
+ }
359
+ ]
360
+ }
361
+ }
362
+ ```
363
+
364
+ Replace `${SKILLS_DIR}` with your actual skills path.
365
+
366
+ ## Additional References
367
+
368
+ See `references/appendix.md` for memory structure, workflow diagrams, metrics, feedback templates, and research links.
369
+
370
+ ## Best Practices
371
+
372
+ ### DO
373
+
374
+ - ✅ Learn from EVERY skill interaction
375
+ - ✅ Extract patterns at the right abstraction level
376
+ - ✅ Update multiple related skills
377
+ - ✅ Track confidence and apply counts
378
+ - ✅ Ask for user feedback on improvements
379
+ - ✅ Use evolution/correction markers for traceability
380
+ - ✅ Validate guidance before applying broadly
381
+
382
+ ### DON'T
383
+
384
+ - ❌ Over-generalize from single experiences
385
+ - ❌ Update skills without confidence tracking
386
+ - ❌ Ignore negative feedback
387
+ - ❌ Make changes that break existing functionality
388
+ - ❌ Create contradictory patterns
389
+ - ❌ Update skills without understanding context
390
+
391
+ ## Quick Start
392
+
393
+ After any skill completes, this agent automatically:
394
+
395
+ 1. **Analyzes** what happened
396
+ 2. **Extracts** patterns and insights
397
+ 3. **Updates** relevant skill files
398
+ 4. **Logs** to memory for future reference
399
+ 5. **Reports** summary to user
400
+
401
+ ## References
402
+
403
+ - [SimpleMem: Efficient Lifelong Memory for LLM Agents](https://arxiv.org/html/2601.02553v1)
404
+ - [A Survey on the Memory Mechanism of Large Language Model Agents](https://dl.acm.org/doi/10.1145/3748302)
405
+ - [Lifelong Learning of LLM based Agents](https://arxiv.org/html/2501.07278v1)
406
+ - [Evo-Memory: DeepMind's Benchmark](https://shothota.medium.com/evo-memory-deepminds-new-benchmark)
407
+ - [Let's Build a Self-Improving AI Agent](https://medium.com/@nomannayeem/lets-build-a-self-improving-ai-agent-that-learns-from-your-feedback-722d2ce9c2d9)
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ tool_output="${1:-}"
5
+ exit_code="${2:-0}"
6
+
7
+ echo "[self-improving-agent] PostToolUse: exit=${exit_code}" >&2
8
+ if [[ -n "${tool_output}" ]]; then
9
+ echo "[self-improving-agent] Output: ${tool_output}" >&2
10
+ fi
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ tool_name="${1:-unknown}"
5
+ tool_input="${2:-}"
6
+
7
+ echo "[self-improving-agent] PreToolUse: ${tool_name}" >&2
8
+ if [[ -n "${tool_input}" ]]; then
9
+ echo "[self-improving-agent] Input: ${tool_input}" >&2
10
+ fi
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ echo "[self-improving-agent] Session ended" >&2