@soleri/forge 5.5.0 → 5.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/scaffolder.js +82 -5
- package/dist/scaffolder.js.map +1 -1
- package/dist/skills/skills/brain-debrief.md +214 -0
- package/dist/skills/skills/brainstorming.md +180 -0
- package/dist/skills/skills/code-patrol.md +178 -0
- package/dist/skills/skills/context-resume.md +146 -0
- package/dist/skills/skills/executing-plans.md +216 -0
- package/dist/skills/skills/fix-and-learn.md +167 -0
- package/dist/skills/skills/health-check.md +231 -0
- package/dist/skills/skills/knowledge-harvest.md +185 -0
- package/dist/skills/skills/onboard-me.md +198 -0
- package/dist/skills/skills/retrospective.md +205 -0
- package/dist/skills/skills/second-opinion.md +149 -0
- package/dist/skills/skills/systematic-debugging.md +241 -0
- package/dist/skills/skills/test-driven-development.md +281 -0
- package/dist/skills/skills/vault-capture.md +170 -0
- package/dist/skills/skills/vault-navigator.md +140 -0
- package/dist/skills/skills/verification-before-completion.md +182 -0
- package/dist/skills/skills/writing-plans.md +215 -0
- package/dist/templates/entry-point.js +8 -0
- package/dist/templates/entry-point.js.map +1 -1
- package/dist/templates/test-facades.js +35 -6
- package/dist/templates/test-facades.js.map +1 -1
- package/package.json +1 -1
- package/src/__tests__/scaffolder.test.ts +2 -2
- package/src/scaffolder.ts +82 -5
- package/src/skills/brain-debrief.md +47 -19
- package/src/skills/brainstorming.md +19 -9
- package/src/skills/code-patrol.md +21 -19
- package/src/skills/context-resume.md +14 -11
- package/src/skills/executing-plans.md +30 -15
- package/src/skills/fix-and-learn.md +17 -14
- package/src/skills/health-check.md +29 -23
- package/src/skills/knowledge-harvest.md +27 -20
- package/src/skills/onboard-me.md +16 -15
- package/src/skills/retrospective.md +34 -18
- package/src/skills/second-opinion.md +16 -9
- package/src/skills/systematic-debugging.md +40 -29
- package/src/skills/test-driven-development.md +45 -30
- package/src/skills/vault-capture.md +31 -15
- package/src/skills/vault-navigator.md +24 -13
- package/src/skills/verification-before-completion.md +38 -26
- package/src/skills/writing-plans.md +21 -13
- package/src/templates/entry-point.ts +8 -0
- package/src/templates/test-facades.ts +35 -6
|
@@ -45,37 +45,43 @@ Skip any step = lying, not verifying
|
|
|
45
45
|
After passing all verification commands, run system diagnostics:
|
|
46
46
|
|
|
47
47
|
### Health Check
|
|
48
|
+
|
|
48
49
|
```
|
|
49
50
|
YOUR_AGENT_core op:admin_health
|
|
50
51
|
```
|
|
52
|
+
|
|
51
53
|
Catches issues tests might miss — vault corruption, stale caches, configuration drift.
|
|
52
54
|
|
|
53
55
|
### Full Diagnostic
|
|
56
|
+
|
|
54
57
|
```
|
|
55
58
|
YOUR_AGENT_core op:admin_diagnostic
|
|
56
59
|
```
|
|
60
|
+
|
|
57
61
|
Comprehensive system check — module status, database integrity, cache health, configuration validity.
|
|
58
62
|
|
|
59
63
|
### Vault Analytics
|
|
64
|
+
|
|
60
65
|
```
|
|
61
66
|
YOUR_AGENT_core op:admin_vault_analytics
|
|
62
67
|
```
|
|
68
|
+
|
|
63
69
|
Verify knowledge quality metrics — are capture rates healthy? Any degradation?
|
|
64
70
|
|
|
65
71
|
If any check reports problems, address them before claiming completion.
|
|
66
72
|
|
|
67
73
|
## Common Failures
|
|
68
74
|
|
|
69
|
-
| Claim
|
|
70
|
-
|
|
71
|
-
| Tests pass
|
|
72
|
-
| Linter clean
|
|
73
|
-
| Build succeeds
|
|
74
|
-
| Bug fixed
|
|
75
|
-
| Regression test works | Red-green cycle verified
|
|
76
|
-
| Agent completed
|
|
77
|
-
| Requirements met
|
|
78
|
-
| Agent healthy
|
|
75
|
+
| Claim | Requires | Not Sufficient |
|
|
76
|
+
| --------------------- | ------------------------------- | ------------------------------ |
|
|
77
|
+
| Tests pass | Test command output: 0 failures | Previous run, "should pass" |
|
|
78
|
+
| Linter clean | Linter output: 0 errors | Partial check, extrapolation |
|
|
79
|
+
| Build succeeds | Build command: exit 0 | Linter passing, logs look good |
|
|
80
|
+
| Bug fixed | Test original symptom: passes | Code changed, assumed fixed |
|
|
81
|
+
| Regression test works | Red-green cycle verified | Test passes once |
|
|
82
|
+
| Agent completed | VCS diff shows changes | Agent reports "success" |
|
|
83
|
+
| Requirements met | Line-by-line checklist | Tests passing |
|
|
84
|
+
| Agent healthy | `admin_diagnostic` clean | "No errors in logs" |
|
|
79
85
|
|
|
80
86
|
## Red Flags - STOP
|
|
81
87
|
|
|
@@ -90,44 +96,49 @@ If any check reports problems, address them before claiming completion.
|
|
|
90
96
|
|
|
91
97
|
## Rationalization Prevention
|
|
92
98
|
|
|
93
|
-
| Excuse
|
|
94
|
-
|
|
95
|
-
| "Should work now"
|
|
96
|
-
| "I'm confident"
|
|
97
|
-
| "Just this once"
|
|
98
|
-
| "Linter passed"
|
|
99
|
-
| "Agent said success"
|
|
100
|
-
| "I'm tired"
|
|
101
|
-
| "Partial check is enough"
|
|
102
|
-
| "Different words so rule doesn't apply" | Spirit over letter
|
|
99
|
+
| Excuse | Reality |
|
|
100
|
+
| --------------------------------------- | ---------------------- |
|
|
101
|
+
| "Should work now" | RUN the verification |
|
|
102
|
+
| "I'm confident" | Confidence ≠ evidence |
|
|
103
|
+
| "Just this once" | No exceptions |
|
|
104
|
+
| "Linter passed" | Linter ≠ compiler |
|
|
105
|
+
| "Agent said success" | Verify independently |
|
|
106
|
+
| "I'm tired" | Exhaustion ≠ excuse |
|
|
107
|
+
| "Partial check is enough" | Partial proves nothing |
|
|
108
|
+
| "Different words so rule doesn't apply" | Spirit over letter |
|
|
103
109
|
|
|
104
110
|
## Key Patterns
|
|
105
111
|
|
|
106
112
|
**Tests:**
|
|
113
|
+
|
|
107
114
|
```
|
|
108
115
|
[Run test command] [See: 34/34 pass] "All tests pass"
|
|
109
116
|
NOT: "Should pass now" / "Looks correct"
|
|
110
117
|
```
|
|
111
118
|
|
|
112
119
|
**Regression tests (TDD Red-Green):**
|
|
120
|
+
|
|
113
121
|
```
|
|
114
122
|
Write -> Run (pass) -> Revert fix -> Run (MUST FAIL) -> Restore -> Run (pass)
|
|
115
123
|
NOT: "I've written a regression test" (without red-green verification)
|
|
116
124
|
```
|
|
117
125
|
|
|
118
126
|
**Build:**
|
|
127
|
+
|
|
119
128
|
```
|
|
120
129
|
[Run build] [See: exit 0] "Build passes"
|
|
121
130
|
NOT: "Linter passed" (linter doesn't check compilation)
|
|
122
131
|
```
|
|
123
132
|
|
|
124
133
|
**Requirements:**
|
|
134
|
+
|
|
125
135
|
```
|
|
126
136
|
Re-read plan -> Create checklist -> Verify each -> Report gaps or completion
|
|
127
137
|
NOT: "Tests pass, phase complete"
|
|
128
138
|
```
|
|
129
139
|
|
|
130
140
|
**Agent delegation:**
|
|
141
|
+
|
|
131
142
|
```
|
|
132
143
|
Agent reports success -> Check VCS diff -> Verify changes -> Report actual state
|
|
133
144
|
NOT: Trust agent report
|
|
@@ -149,6 +160,7 @@ This ensures the next session has context about what was verified and completed.
|
|
|
149
160
|
## When To Apply
|
|
150
161
|
|
|
151
162
|
**ALWAYS before:**
|
|
163
|
+
|
|
152
164
|
- ANY variation of success/completion claims
|
|
153
165
|
- ANY expression of satisfaction
|
|
154
166
|
- ANY positive statement about work state
|
|
@@ -162,9 +174,9 @@ Run the command. Read the output. THEN claim the result. This is non-negotiable.
|
|
|
162
174
|
|
|
163
175
|
## Agent Tools Reference
|
|
164
176
|
|
|
165
|
-
| Op
|
|
166
|
-
|
|
167
|
-
| `admin_health`
|
|
168
|
-
| `admin_diagnostic`
|
|
169
|
-
| `admin_vault_analytics` | Knowledge quality metrics
|
|
170
|
-
| `session_capture`
|
|
177
|
+
| Op | When to Use |
|
|
178
|
+
| ----------------------- | ----------------------------------- |
|
|
179
|
+
| `admin_health` | Quick system health check |
|
|
180
|
+
| `admin_diagnostic` | Comprehensive system diagnostic |
|
|
181
|
+
| `admin_vault_analytics` | Knowledge quality metrics |
|
|
182
|
+
| `session_capture` | Persist verified completion context |
|
|
@@ -22,6 +22,7 @@ Assume they are a skilled developer, but know almost nothing about our toolset o
|
|
|
22
22
|
**Never write a plan from scratch.** Always search for existing knowledge first.
|
|
23
23
|
|
|
24
24
|
### 1. Vault First
|
|
25
|
+
|
|
25
26
|
Check the vault for relevant implementation patterns:
|
|
26
27
|
|
|
27
28
|
```
|
|
@@ -30,6 +31,7 @@ YOUR_AGENT_core op:search_intelligent
|
|
|
30
31
|
```
|
|
31
32
|
|
|
32
33
|
Look for:
|
|
34
|
+
|
|
33
35
|
- **Implementation patterns** — proven approaches for similar features
|
|
34
36
|
- **Anti-patterns** — approaches that failed and should be avoided
|
|
35
37
|
- **Testing patterns** — how similar features were tested
|
|
@@ -48,13 +50,16 @@ YOUR_AGENT_core op:vault_tags
|
|
|
48
50
|
```
|
|
49
51
|
|
|
50
52
|
### 2. Web Search Second
|
|
53
|
+
|
|
51
54
|
If the vault doesn't have implementation guidance, search the web:
|
|
55
|
+
|
|
52
56
|
- **Libraries and tools** — is there a package that does this already?
|
|
53
57
|
- **Reference implementations** — how did other projects solve this?
|
|
54
58
|
- **API documentation** — official docs for libraries you'll use
|
|
55
59
|
- **Known issues** — pitfalls others ran into
|
|
56
60
|
|
|
57
61
|
### 3. Then Write the Plan
|
|
62
|
+
|
|
58
63
|
Incorporate vault insights and web findings into the plan. Reference specific vault entries and documentation links when they inform a step. A plan informed by existing knowledge is dramatically better than one written from first principles.
|
|
59
64
|
|
|
60
65
|
## Create a Tracked Plan
|
|
@@ -123,6 +128,7 @@ This generates individual tasks from the plan steps, ready for execution trackin
|
|
|
123
128
|
## Bite-Sized Task Granularity
|
|
124
129
|
|
|
125
130
|
**Each step is one action (2-5 minutes):**
|
|
131
|
+
|
|
126
132
|
- "Write the failing test" - step
|
|
127
133
|
- "Run it to make sure it fails" - step
|
|
128
134
|
- "Implement the minimal code to make the test pass" - step
|
|
@@ -150,6 +156,7 @@ This generates individual tasks from the plan steps, ready for execution trackin
|
|
|
150
156
|
## Task Structure
|
|
151
157
|
|
|
152
158
|
Each task uses this format:
|
|
159
|
+
|
|
153
160
|
- Files: Create / Modify / Test paths
|
|
154
161
|
- Step 1: Write the failing test (with code)
|
|
155
162
|
- Step 2: Run test to verify it fails (with expected output)
|
|
@@ -158,6 +165,7 @@ Each task uses this format:
|
|
|
158
165
|
- Step 5: Commit (with exact git commands)
|
|
159
166
|
|
|
160
167
|
## Remember
|
|
168
|
+
|
|
161
169
|
- Exact file paths always
|
|
162
170
|
- Complete code in plan (not "add validation")
|
|
163
171
|
- Exact commands with expected output
|
|
@@ -192,16 +200,16 @@ Which approach?"
|
|
|
192
200
|
|
|
193
201
|
## Agent Tools Reference
|
|
194
202
|
|
|
195
|
-
| Op
|
|
196
|
-
|
|
197
|
-
| `search_intelligent`
|
|
198
|
-
| `brain_strengths`
|
|
199
|
-
| `vault_domains` / `vault_tags` | Browse knowledge landscape
|
|
200
|
-
| `create_plan`
|
|
201
|
-
| `plan_grade`
|
|
202
|
-
| `plan_auto_improve`
|
|
203
|
-
| `plan_meets_grade`
|
|
204
|
-
| `plan_iterate`
|
|
205
|
-
| `plan_split`
|
|
206
|
-
| `approve_plan`
|
|
207
|
-
| `plan_stats`
|
|
203
|
+
| Op | When to Use |
|
|
204
|
+
| ------------------------------ | -------------------------------------- |
|
|
205
|
+
| `search_intelligent` | Find relevant patterns before planning |
|
|
206
|
+
| `brain_strengths` | Check proven approaches |
|
|
207
|
+
| `vault_domains` / `vault_tags` | Browse knowledge landscape |
|
|
208
|
+
| `create_plan` | Create tracked, persistent plan |
|
|
209
|
+
| `plan_grade` | Grade plan quality |
|
|
210
|
+
| `plan_auto_improve` | Auto-fix plan weaknesses |
|
|
211
|
+
| `plan_meets_grade` | Verify grade target reached |
|
|
212
|
+
| `plan_iterate` | Iterate on draft with feedback |
|
|
213
|
+
| `plan_split` | Split plan into trackable tasks |
|
|
214
|
+
| `approve_plan` | Lock in approved plan |
|
|
215
|
+
| `plan_stats` | Overview of plan metrics |
|
|
@@ -22,6 +22,7 @@ import {
|
|
|
22
22
|
createCoreOps,
|
|
23
23
|
createDomainFacades,
|
|
24
24
|
registerAllFacades,
|
|
25
|
+
seedDefaultPlaybooks,
|
|
25
26
|
} from '@soleri/core';
|
|
26
27
|
import type { OpDefinition } from '@soleri/core';
|
|
27
28
|
import { z } from 'zod';
|
|
@@ -39,6 +40,13 @@ async function main(): Promise<void> {
|
|
|
39
40
|
});
|
|
40
41
|
|
|
41
42
|
const tag = PERSONA.name.toLowerCase();
|
|
43
|
+
|
|
44
|
+
// Seed built-in playbooks (idempotent)
|
|
45
|
+
const seedResult = seedDefaultPlaybooks(runtime.vault);
|
|
46
|
+
if (seedResult.seeded > 0) {
|
|
47
|
+
console.error(\`[\${tag}] Seeded \${seedResult.seeded} built-in playbooks\`);
|
|
48
|
+
}
|
|
49
|
+
|
|
42
50
|
const stats = runtime.vault.stats();
|
|
43
51
|
console.error(\`[\${tag}] Vault: \${stats.totalEntries} entries, Brain: \${runtime.brain.getVocabularySize()} terms\`);
|
|
44
52
|
|
|
@@ -200,6 +200,8 @@ ${domainDescribes}
|
|
|
200
200
|
expect(opNames).toContain('brain_feedback');
|
|
201
201
|
expect(opNames).toContain('brain_feedback_stats');
|
|
202
202
|
expect(opNames).toContain('brain_reset_extracted');
|
|
203
|
+
// Brain decay report (#89)
|
|
204
|
+
expect(opNames).toContain('brain_decay_report');
|
|
203
205
|
// Agent-specific ops (5)
|
|
204
206
|
expect(opNames).toContain('health');
|
|
205
207
|
expect(opNames).toContain('identity');
|
|
@@ -230,7 +232,7 @@ ${domainDescribes}
|
|
|
230
232
|
expect(opNames).toContain('governance_stats');
|
|
231
233
|
expect(opNames).toContain('governance_expire');
|
|
232
234
|
expect(opNames).toContain('governance_dashboard');
|
|
233
|
-
// Planning Extra ops (
|
|
235
|
+
// Planning Extra ops (13)
|
|
234
236
|
expect(opNames).toContain('plan_iterate');
|
|
235
237
|
expect(opNames).toContain('plan_split');
|
|
236
238
|
expect(opNames).toContain('plan_reconcile');
|
|
@@ -240,6 +242,10 @@ ${domainDescribes}
|
|
|
240
242
|
expect(opNames).toContain('plan_archive');
|
|
241
243
|
expect(opNames).toContain('plan_list_tasks');
|
|
242
244
|
expect(opNames).toContain('plan_stats');
|
|
245
|
+
expect(opNames).toContain('plan_execution_metrics');
|
|
246
|
+
expect(opNames).toContain('plan_record_task_metrics');
|
|
247
|
+
expect(opNames).toContain('plan_submit_deliverable');
|
|
248
|
+
expect(opNames).toContain('plan_verify_deliverables');
|
|
243
249
|
// Memory Extra ops (8)
|
|
244
250
|
expect(opNames).toContain('memory_delete');
|
|
245
251
|
expect(opNames).toContain('memory_stats');
|
|
@@ -262,6 +268,10 @@ ${domainDescribes}
|
|
|
262
268
|
expect(opNames).toContain('vault_seed');
|
|
263
269
|
expect(opNames).toContain('vault_backup');
|
|
264
270
|
expect(opNames).toContain('vault_age_report');
|
|
271
|
+
// #89: Bi-temporal
|
|
272
|
+
expect(opNames).toContain('vault_set_temporal');
|
|
273
|
+
expect(opNames).toContain('vault_find_expiring');
|
|
274
|
+
expect(opNames).toContain('vault_find_expired');
|
|
265
275
|
// Admin ops (8)
|
|
266
276
|
expect(opNames).toContain('admin_health');
|
|
267
277
|
expect(opNames).toContain('admin_tool_list');
|
|
@@ -271,7 +281,7 @@ ${domainDescribes}
|
|
|
271
281
|
expect(opNames).toContain('admin_version');
|
|
272
282
|
expect(opNames).toContain('admin_reset_cache');
|
|
273
283
|
expect(opNames).toContain('admin_diagnostic');
|
|
274
|
-
// Loop ops (
|
|
284
|
+
// Loop ops (8)
|
|
275
285
|
expect(opNames).toContain('loop_start');
|
|
276
286
|
expect(opNames).toContain('loop_iterate');
|
|
277
287
|
expect(opNames).toContain('loop_status');
|
|
@@ -279,6 +289,7 @@ ${domainDescribes}
|
|
|
279
289
|
expect(opNames).toContain('loop_history');
|
|
280
290
|
expect(opNames).toContain('loop_is_active');
|
|
281
291
|
expect(opNames).toContain('loop_complete');
|
|
292
|
+
expect(opNames).toContain('loop_anomaly_check');
|
|
282
293
|
// Orchestrate ops (5)
|
|
283
294
|
expect(opNames).toContain('orchestrate_plan');
|
|
284
295
|
expect(opNames).toContain('orchestrate_execute');
|
|
@@ -296,7 +307,7 @@ ${domainDescribes}
|
|
|
296
307
|
expect(opNames).toContain('plan_latest_check');
|
|
297
308
|
expect(opNames).toContain('plan_meets_grade');
|
|
298
309
|
expect(opNames).toContain('plan_auto_improve');
|
|
299
|
-
// Admin Extra ops (
|
|
310
|
+
// Admin Extra ops (11)
|
|
300
311
|
expect(opNames).toContain('admin_telemetry');
|
|
301
312
|
expect(opNames).toContain('admin_telemetry_recent');
|
|
302
313
|
expect(opNames).toContain('admin_telemetry_reset');
|
|
@@ -307,11 +318,14 @@ ${domainDescribes}
|
|
|
307
318
|
expect(opNames).toContain('admin_env');
|
|
308
319
|
expect(opNames).toContain('admin_gc');
|
|
309
320
|
expect(opNames).toContain('admin_export_config');
|
|
310
|
-
|
|
321
|
+
expect(opNames).toContain('admin_hot_reload');
|
|
322
|
+
// Curator Extra ops (4 + 1 hybrid)
|
|
311
323
|
expect(opNames).toContain('curator_entry_history');
|
|
312
324
|
expect(opNames).toContain('curator_record_snapshot');
|
|
313
325
|
expect(opNames).toContain('curator_queue_stats');
|
|
314
326
|
expect(opNames).toContain('curator_enrich');
|
|
327
|
+
// #36: Hybrid contradiction detection
|
|
328
|
+
expect(opNames).toContain('curator_hybrid_contradictions');
|
|
315
329
|
// Project ops (12)
|
|
316
330
|
expect(opNames).toContain('project_get');
|
|
317
331
|
expect(opNames).toContain('project_list');
|
|
@@ -329,8 +343,23 @@ ${domainDescribes}
|
|
|
329
343
|
expect(opNames).toContain('memory_promote_to_global');
|
|
330
344
|
expect(opNames).toContain('memory_configure');
|
|
331
345
|
expect(opNames).toContain('memory_cross_project_search');
|
|
332
|
-
//
|
|
333
|
-
expect(
|
|
346
|
+
// Playbook ops (5)
|
|
347
|
+
expect(opNames).toContain('playbook_list');
|
|
348
|
+
expect(opNames).toContain('playbook_get');
|
|
349
|
+
expect(opNames).toContain('playbook_create');
|
|
350
|
+
expect(opNames).toContain('playbook_match');
|
|
351
|
+
expect(opNames).toContain('playbook_seed');
|
|
352
|
+
// Cognee Sync ops (3)
|
|
353
|
+
expect(opNames).toContain('cognee_sync_status');
|
|
354
|
+
expect(opNames).toContain('cognee_sync_drain');
|
|
355
|
+
expect(opNames).toContain('cognee_sync_reconcile');
|
|
356
|
+
// Intake ops (4)
|
|
357
|
+
expect(opNames).toContain('intake_ingest_book');
|
|
358
|
+
expect(opNames).toContain('intake_process');
|
|
359
|
+
expect(opNames).toContain('intake_status');
|
|
360
|
+
expect(opNames).toContain('intake_preview');
|
|
361
|
+
// Total: 208 (203 core + 5 agent-specific)
|
|
362
|
+
expect(facade.ops.length).toBe(208);
|
|
334
363
|
});
|
|
335
364
|
|
|
336
365
|
it('search should query across all domains with ranked results', async () => {
|