@soleri/forge 5.5.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/scaffolder.js +82 -5
  2. package/dist/scaffolder.js.map +1 -1
  3. package/dist/skills/skills/brain-debrief.md +214 -0
  4. package/dist/skills/skills/brainstorming.md +180 -0
  5. package/dist/skills/skills/code-patrol.md +178 -0
  6. package/dist/skills/skills/context-resume.md +146 -0
  7. package/dist/skills/skills/executing-plans.md +216 -0
  8. package/dist/skills/skills/fix-and-learn.md +167 -0
  9. package/dist/skills/skills/health-check.md +231 -0
  10. package/dist/skills/skills/knowledge-harvest.md +185 -0
  11. package/dist/skills/skills/onboard-me.md +198 -0
  12. package/dist/skills/skills/retrospective.md +205 -0
  13. package/dist/skills/skills/second-opinion.md +149 -0
  14. package/dist/skills/skills/systematic-debugging.md +241 -0
  15. package/dist/skills/skills/test-driven-development.md +281 -0
  16. package/dist/skills/skills/vault-capture.md +170 -0
  17. package/dist/skills/skills/vault-navigator.md +140 -0
  18. package/dist/skills/skills/verification-before-completion.md +182 -0
  19. package/dist/skills/skills/writing-plans.md +215 -0
  20. package/dist/templates/entry-point.js +8 -0
  21. package/dist/templates/entry-point.js.map +1 -1
  22. package/dist/templates/test-facades.js +35 -6
  23. package/dist/templates/test-facades.js.map +1 -1
  24. package/package.json +1 -1
  25. package/src/__tests__/scaffolder.test.ts +2 -2
  26. package/src/scaffolder.ts +82 -5
  27. package/src/skills/brain-debrief.md +47 -19
  28. package/src/skills/brainstorming.md +19 -9
  29. package/src/skills/code-patrol.md +21 -19
  30. package/src/skills/context-resume.md +14 -11
  31. package/src/skills/executing-plans.md +30 -15
  32. package/src/skills/fix-and-learn.md +17 -14
  33. package/src/skills/health-check.md +29 -23
  34. package/src/skills/knowledge-harvest.md +27 -20
  35. package/src/skills/onboard-me.md +16 -15
  36. package/src/skills/retrospective.md +34 -18
  37. package/src/skills/second-opinion.md +16 -9
  38. package/src/skills/systematic-debugging.md +40 -29
  39. package/src/skills/test-driven-development.md +45 -30
  40. package/src/skills/vault-capture.md +31 -15
  41. package/src/skills/vault-navigator.md +24 -13
  42. package/src/skills/verification-before-completion.md +38 -26
  43. package/src/skills/writing-plans.md +21 -13
  44. package/src/templates/entry-point.ts +8 -0
  45. package/src/templates/test-facades.ts +35 -6
@@ -45,37 +45,43 @@ Skip any step = lying, not verifying
45
45
  After passing all verification commands, run system diagnostics:
46
46
 
47
47
  ### Health Check
48
+
48
49
  ```
49
50
  YOUR_AGENT_core op:admin_health
50
51
  ```
52
+
51
53
  Catches issues tests might miss — vault corruption, stale caches, configuration drift.
52
54
 
53
55
  ### Full Diagnostic
56
+
54
57
  ```
55
58
  YOUR_AGENT_core op:admin_diagnostic
56
59
  ```
60
+
57
61
  Comprehensive system check — module status, database integrity, cache health, configuration validity.
58
62
 
59
63
  ### Vault Analytics
64
+
60
65
  ```
61
66
  YOUR_AGENT_core op:admin_vault_analytics
62
67
  ```
68
+
63
69
  Verify knowledge quality metrics — are capture rates healthy? Any degradation?
64
70
 
65
71
  If any check reports problems, address them before claiming completion.
66
72
 
67
73
  ## Common Failures
68
74
 
69
- | Claim | Requires | Not Sufficient |
70
- |-------|----------|----------------|
71
- | Tests pass | Test command output: 0 failures | Previous run, "should pass" |
72
- | Linter clean | Linter output: 0 errors | Partial check, extrapolation |
73
- | Build succeeds | Build command: exit 0 | Linter passing, logs look good |
74
- | Bug fixed | Test original symptom: passes | Code changed, assumed fixed |
75
- | Regression test works | Red-green cycle verified | Test passes once |
76
- | Agent completed | VCS diff shows changes | Agent reports "success" |
77
- | Requirements met | Line-by-line checklist | Tests passing |
78
- | Agent healthy | `admin_diagnostic` clean | "No errors in logs" |
75
+ | Claim | Requires | Not Sufficient |
76
+ | --------------------- | ------------------------------- | ------------------------------ |
77
+ | Tests pass | Test command output: 0 failures | Previous run, "should pass" |
78
+ | Linter clean | Linter output: 0 errors | Partial check, extrapolation |
79
+ | Build succeeds | Build command: exit 0 | Linter passing, logs look good |
80
+ | Bug fixed | Test original symptom: passes | Code changed, assumed fixed |
81
+ | Regression test works | Red-green cycle verified | Test passes once |
82
+ | Agent completed | VCS diff shows changes | Agent reports "success" |
83
+ | Requirements met | Line-by-line checklist | Tests passing |
84
+ | Agent healthy | `admin_diagnostic` clean | "No errors in logs" |
79
85
 
80
86
  ## Red Flags - STOP
81
87
 
@@ -90,44 +96,49 @@ If any check reports problems, address them before claiming completion.
90
96
 
91
97
  ## Rationalization Prevention
92
98
 
93
- | Excuse | Reality |
94
- |--------|---------|
95
- | "Should work now" | RUN the verification |
96
- | "I'm confident" | Confidence ≠ evidence |
97
- | "Just this once" | No exceptions |
98
- | "Linter passed" | Linter ≠ compiler |
99
- | "Agent said success" | Verify independently |
100
- | "I'm tired" | Exhaustion ≠ excuse |
101
- | "Partial check is enough" | Partial proves nothing |
102
- | "Different words so rule doesn't apply" | Spirit over letter |
99
+ | Excuse | Reality |
100
+ | --------------------------------------- | ---------------------- |
101
+ | "Should work now" | RUN the verification |
102
+ | "I'm confident" | Confidence ≠ evidence |
103
+ | "Just this once" | No exceptions |
104
+ | "Linter passed" | Linter ≠ compiler |
105
+ | "Agent said success" | Verify independently |
106
+ | "I'm tired" | Exhaustion ≠ excuse |
107
+ | "Partial check is enough" | Partial proves nothing |
108
+ | "Different words so rule doesn't apply" | Spirit over letter |
103
109
 
104
110
  ## Key Patterns
105
111
 
106
112
  **Tests:**
113
+
107
114
  ```
108
115
  [Run test command] [See: 34/34 pass] "All tests pass"
109
116
  NOT: "Should pass now" / "Looks correct"
110
117
  ```
111
118
 
112
119
  **Regression tests (TDD Red-Green):**
120
+
113
121
  ```
114
122
  Write -> Run (pass) -> Revert fix -> Run (MUST FAIL) -> Restore -> Run (pass)
115
123
  NOT: "I've written a regression test" (without red-green verification)
116
124
  ```
117
125
 
118
126
  **Build:**
127
+
119
128
  ```
120
129
  [Run build] [See: exit 0] "Build passes"
121
130
  NOT: "Linter passed" (linter doesn't check compilation)
122
131
  ```
123
132
 
124
133
  **Requirements:**
134
+
125
135
  ```
126
136
  Re-read plan -> Create checklist -> Verify each -> Report gaps or completion
127
137
  NOT: "Tests pass, phase complete"
128
138
  ```
129
139
 
130
140
  **Agent delegation:**
141
+
131
142
  ```
132
143
  Agent reports success -> Check VCS diff -> Verify changes -> Report actual state
133
144
  NOT: Trust agent report
@@ -149,6 +160,7 @@ This ensures the next session has context about what was verified and completed.
149
160
  ## When To Apply
150
161
 
151
162
  **ALWAYS before:**
163
+
152
164
  - ANY variation of success/completion claims
153
165
  - ANY expression of satisfaction
154
166
  - ANY positive statement about work state
@@ -162,9 +174,9 @@ Run the command. Read the output. THEN claim the result. This is non-negotiable.
162
174
 
163
175
  ## Agent Tools Reference
164
176
 
165
- | Op | When to Use |
166
- |----|-------------|
167
- | `admin_health` | Quick system health check |
168
- | `admin_diagnostic` | Comprehensive system diagnostic |
169
- | `admin_vault_analytics` | Knowledge quality metrics |
170
- | `session_capture` | Persist verified completion context |
177
+ | Op | When to Use |
178
+ | ----------------------- | ----------------------------------- |
179
+ | `admin_health` | Quick system health check |
180
+ | `admin_diagnostic` | Comprehensive system diagnostic |
181
+ | `admin_vault_analytics` | Knowledge quality metrics |
182
+ | `session_capture` | Persist verified completion context |
@@ -22,6 +22,7 @@ Assume they are a skilled developer, but know almost nothing about our toolset o
22
22
  **Never write a plan from scratch.** Always search for existing knowledge first.
23
23
 
24
24
  ### 1. Vault First
25
+
25
26
  Check the vault for relevant implementation patterns:
26
27
 
27
28
  ```
@@ -30,6 +31,7 @@ YOUR_AGENT_core op:search_intelligent
30
31
  ```
31
32
 
32
33
  Look for:
34
+
33
35
  - **Implementation patterns** — proven approaches for similar features
34
36
  - **Anti-patterns** — approaches that failed and should be avoided
35
37
  - **Testing patterns** — how similar features were tested
@@ -48,13 +50,16 @@ YOUR_AGENT_core op:vault_tags
48
50
  ```
49
51
 
50
52
  ### 2. Web Search Second
53
+
51
54
  If the vault doesn't have implementation guidance, search the web:
55
+
52
56
  - **Libraries and tools** — is there a package that does this already?
53
57
  - **Reference implementations** — how did other projects solve this?
54
58
  - **API documentation** — official docs for libraries you'll use
55
59
  - **Known issues** — pitfalls others ran into
56
60
 
57
61
  ### 3. Then Write the Plan
62
+
58
63
  Incorporate vault insights and web findings into the plan. Reference specific vault entries and documentation links when they inform a step. A plan informed by existing knowledge is dramatically better than one written from first principles.
59
64
 
60
65
  ## Create a Tracked Plan
@@ -123,6 +128,7 @@ This generates individual tasks from the plan steps, ready for execution trackin
123
128
  ## Bite-Sized Task Granularity
124
129
 
125
130
  **Each step is one action (2-5 minutes):**
131
+
126
132
  - "Write the failing test" - step
127
133
  - "Run it to make sure it fails" - step
128
134
  - "Implement the minimal code to make the test pass" - step
@@ -150,6 +156,7 @@ This generates individual tasks from the plan steps, ready for execution trackin
150
156
  ## Task Structure
151
157
 
152
158
  Each task uses this format:
159
+
153
160
  - Files: Create / Modify / Test paths
154
161
  - Step 1: Write the failing test (with code)
155
162
  - Step 2: Run test to verify it fails (with expected output)
@@ -158,6 +165,7 @@ Each task uses this format:
158
165
  - Step 5: Commit (with exact git commands)
159
166
 
160
167
  ## Remember
168
+
161
169
  - Exact file paths always
162
170
  - Complete code in plan (not "add validation")
163
171
  - Exact commands with expected output
@@ -192,16 +200,16 @@ Which approach?"
192
200
 
193
201
  ## Agent Tools Reference
194
202
 
195
- | Op | When to Use |
196
- |----|-------------|
197
- | `search_intelligent` | Find relevant patterns before planning |
198
- | `brain_strengths` | Check proven approaches |
199
- | `vault_domains` / `vault_tags` | Browse knowledge landscape |
200
- | `create_plan` | Create tracked, persistent plan |
201
- | `plan_grade` | Grade plan quality |
202
- | `plan_auto_improve` | Auto-fix plan weaknesses |
203
- | `plan_meets_grade` | Verify grade target reached |
204
- | `plan_iterate` | Iterate on draft with feedback |
205
- | `plan_split` | Split plan into trackable tasks |
206
- | `approve_plan` | Lock in approved plan |
207
- | `plan_stats` | Overview of plan metrics |
203
+ | Op | When to Use |
204
+ | ------------------------------ | -------------------------------------- |
205
+ | `search_intelligent` | Find relevant patterns before planning |
206
+ | `brain_strengths` | Check proven approaches |
207
+ | `vault_domains` / `vault_tags` | Browse knowledge landscape |
208
+ | `create_plan` | Create tracked, persistent plan |
209
+ | `plan_grade` | Grade plan quality |
210
+ | `plan_auto_improve` | Auto-fix plan weaknesses |
211
+ | `plan_meets_grade` | Verify grade target reached |
212
+ | `plan_iterate` | Iterate on draft with feedback |
213
+ | `plan_split` | Split plan into trackable tasks |
214
+ | `approve_plan` | Lock in approved plan |
215
+ | `plan_stats` | Overview of plan metrics |
@@ -22,6 +22,7 @@ import {
22
22
  createCoreOps,
23
23
  createDomainFacades,
24
24
  registerAllFacades,
25
+ seedDefaultPlaybooks,
25
26
  } from '@soleri/core';
26
27
  import type { OpDefinition } from '@soleri/core';
27
28
  import { z } from 'zod';
@@ -39,6 +40,13 @@ async function main(): Promise<void> {
39
40
  });
40
41
 
41
42
  const tag = PERSONA.name.toLowerCase();
43
+
44
+ // Seed built-in playbooks (idempotent)
45
+ const seedResult = seedDefaultPlaybooks(runtime.vault);
46
+ if (seedResult.seeded > 0) {
47
+ console.error(\`[\${tag}] Seeded \${seedResult.seeded} built-in playbooks\`);
48
+ }
49
+
42
50
  const stats = runtime.vault.stats();
43
51
  console.error(\`[\${tag}] Vault: \${stats.totalEntries} entries, Brain: \${runtime.brain.getVocabularySize()} terms\`);
44
52
 
@@ -200,6 +200,8 @@ ${domainDescribes}
200
200
  expect(opNames).toContain('brain_feedback');
201
201
  expect(opNames).toContain('brain_feedback_stats');
202
202
  expect(opNames).toContain('brain_reset_extracted');
203
+ // Brain decay report (#89)
204
+ expect(opNames).toContain('brain_decay_report');
203
205
  // Agent-specific ops (5)
204
206
  expect(opNames).toContain('health');
205
207
  expect(opNames).toContain('identity');
@@ -230,7 +232,7 @@ ${domainDescribes}
230
232
  expect(opNames).toContain('governance_stats');
231
233
  expect(opNames).toContain('governance_expire');
232
234
  expect(opNames).toContain('governance_dashboard');
233
- // Planning Extra ops (9)
235
+ // Planning Extra ops (13)
234
236
  expect(opNames).toContain('plan_iterate');
235
237
  expect(opNames).toContain('plan_split');
236
238
  expect(opNames).toContain('plan_reconcile');
@@ -240,6 +242,10 @@ ${domainDescribes}
240
242
  expect(opNames).toContain('plan_archive');
241
243
  expect(opNames).toContain('plan_list_tasks');
242
244
  expect(opNames).toContain('plan_stats');
245
+ expect(opNames).toContain('plan_execution_metrics');
246
+ expect(opNames).toContain('plan_record_task_metrics');
247
+ expect(opNames).toContain('plan_submit_deliverable');
248
+ expect(opNames).toContain('plan_verify_deliverables');
243
249
  // Memory Extra ops (8)
244
250
  expect(opNames).toContain('memory_delete');
245
251
  expect(opNames).toContain('memory_stats');
@@ -262,6 +268,10 @@ ${domainDescribes}
262
268
  expect(opNames).toContain('vault_seed');
263
269
  expect(opNames).toContain('vault_backup');
264
270
  expect(opNames).toContain('vault_age_report');
271
+ // #89: Bi-temporal
272
+ expect(opNames).toContain('vault_set_temporal');
273
+ expect(opNames).toContain('vault_find_expiring');
274
+ expect(opNames).toContain('vault_find_expired');
265
275
  // Admin ops (8)
266
276
  expect(opNames).toContain('admin_health');
267
277
  expect(opNames).toContain('admin_tool_list');
@@ -271,7 +281,7 @@ ${domainDescribes}
271
281
  expect(opNames).toContain('admin_version');
272
282
  expect(opNames).toContain('admin_reset_cache');
273
283
  expect(opNames).toContain('admin_diagnostic');
274
- // Loop ops (7)
284
+ // Loop ops (8)
275
285
  expect(opNames).toContain('loop_start');
276
286
  expect(opNames).toContain('loop_iterate');
277
287
  expect(opNames).toContain('loop_status');
@@ -279,6 +289,7 @@ ${domainDescribes}
279
289
  expect(opNames).toContain('loop_history');
280
290
  expect(opNames).toContain('loop_is_active');
281
291
  expect(opNames).toContain('loop_complete');
292
+ expect(opNames).toContain('loop_anomaly_check');
282
293
  // Orchestrate ops (5)
283
294
  expect(opNames).toContain('orchestrate_plan');
284
295
  expect(opNames).toContain('orchestrate_execute');
@@ -296,7 +307,7 @@ ${domainDescribes}
296
307
  expect(opNames).toContain('plan_latest_check');
297
308
  expect(opNames).toContain('plan_meets_grade');
298
309
  expect(opNames).toContain('plan_auto_improve');
299
- // Admin Extra ops (10)
310
+ // Admin Extra ops (11)
300
311
  expect(opNames).toContain('admin_telemetry');
301
312
  expect(opNames).toContain('admin_telemetry_recent');
302
313
  expect(opNames).toContain('admin_telemetry_reset');
@@ -307,11 +318,14 @@ ${domainDescribes}
307
318
  expect(opNames).toContain('admin_env');
308
319
  expect(opNames).toContain('admin_gc');
309
320
  expect(opNames).toContain('admin_export_config');
310
- // Curator Extra ops (4)
321
+ expect(opNames).toContain('admin_hot_reload');
322
+ // Curator Extra ops (4 + 1 hybrid)
311
323
  expect(opNames).toContain('curator_entry_history');
312
324
  expect(opNames).toContain('curator_record_snapshot');
313
325
  expect(opNames).toContain('curator_queue_stats');
314
326
  expect(opNames).toContain('curator_enrich');
327
+ // #36: Hybrid contradiction detection
328
+ expect(opNames).toContain('curator_hybrid_contradictions');
315
329
  // Project ops (12)
316
330
  expect(opNames).toContain('project_get');
317
331
  expect(opNames).toContain('project_list');
@@ -329,8 +343,23 @@ ${domainDescribes}
329
343
  expect(opNames).toContain('memory_promote_to_global');
330
344
  expect(opNames).toContain('memory_configure');
331
345
  expect(opNames).toContain('memory_cross_project_search');
332
- // Total: 152 (147 core + 5 agent-specific)
333
- expect(facade.ops.length).toBe(152);
346
+ // Playbook ops (5)
347
+ expect(opNames).toContain('playbook_list');
348
+ expect(opNames).toContain('playbook_get');
349
+ expect(opNames).toContain('playbook_create');
350
+ expect(opNames).toContain('playbook_match');
351
+ expect(opNames).toContain('playbook_seed');
352
+ // Cognee Sync ops (3)
353
+ expect(opNames).toContain('cognee_sync_status');
354
+ expect(opNames).toContain('cognee_sync_drain');
355
+ expect(opNames).toContain('cognee_sync_reconcile');
356
+ // Intake ops (4)
357
+ expect(opNames).toContain('intake_ingest_book');
358
+ expect(opNames).toContain('intake_process');
359
+ expect(opNames).toContain('intake_status');
360
+ expect(opNames).toContain('intake_preview');
361
+ // Total: 208 (203 core + 5 agent-specific)
362
+ expect(facade.ops.length).toBe(208);
334
363
  });
335
364
 
336
365
  it('search should query across all domains with ranked results', async () => {