cognitive-core 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/surfacing/skill-publisher.d.ts +3 -3
  2. package/dist/surfacing/skill-publisher.d.ts.map +1 -1
  3. package/dist/surfacing/skill-publisher.js +90 -41
  4. package/dist/surfacing/skill-publisher.js.map +1 -1
  5. package/dist/surfacing/sqlite-storage-adapter.d.ts +2 -2
  6. package/dist/surfacing/sqlite-storage-adapter.d.ts.map +1 -1
  7. package/dist/surfacing/sqlite-storage-adapter.js +15 -10
  8. package/dist/surfacing/sqlite-storage-adapter.js.map +1 -1
  9. package/dist/types/playbook.d.ts +11 -0
  10. package/dist/types/playbook.d.ts.map +1 -1
  11. package/dist/types/playbook.js +2 -0
  12. package/dist/types/playbook.js.map +1 -1
  13. package/dist/workspace/templates/index.d.ts +1 -0
  14. package/dist/workspace/templates/index.d.ts.map +1 -1
  15. package/dist/workspace/templates/index.js +2 -0
  16. package/dist/workspace/templates/index.js.map +1 -1
  17. package/dist/workspace/templates/skill-enrichment.d.ts +48 -0
  18. package/dist/workspace/templates/skill-enrichment.d.ts.map +1 -0
  19. package/dist/workspace/templates/skill-enrichment.js +175 -0
  20. package/dist/workspace/templates/skill-enrichment.js.map +1 -0
  21. package/package.json +4 -4
  22. package/src/surfacing/skill-publisher.ts +116 -49
  23. package/src/surfacing/sqlite-storage-adapter.ts +14 -12
  24. package/src/types/playbook.ts +15 -0
  25. package/src/workspace/templates/index.ts +7 -0
  26. package/src/workspace/templates/skill-enrichment.ts +275 -0
  27. package/tests/integration/ranking-driven-loadout-e2e.test.ts +185 -0
  28. package/tests/integration/skill-publishing-filesystem-e2e.test.ts +216 -0
  29. package/tests/surfacing/skill-publisher.test.ts +86 -18
  30. package/tests/surfacing/sqlite-storage-adapter.test.ts +0 -9
@@ -0,0 +1,275 @@
1
+ /**
2
+ * Skill Enrichment Template
3
+ *
4
+ * Agent-in-the-loop step between playbook extraction and skill publishing.
5
+ * Given a Playbook and its procedurally generated SKILL.md skeleton, an agent
6
+ * enriches the skill with narrative prose, worked examples, cross-references,
7
+ * and a trigger-rich description — things a procedural converter cannot produce.
8
+ *
9
+ * Complexity routing:
10
+ * - Simple playbooks (<=3 tactics, low complexity) → heuristic: keep procedural output as-is
11
+ * - Standard/complex playbooks → agent enrichment
12
+ */
13
+
14
+ import type { WorkspaceHandle } from 'agent-workspace';
15
+ import type { Playbook } from '../../types/index.js';
16
+ import { convertPlaybookToSkill } from '../../surfacing/skill-publisher.js';
17
+ import type {
18
+ AgenticTaskTemplate,
19
+ AnalysisComplexity,
20
+ ResourceSpec,
21
+ } from '../types.js';
22
+ import type { ComputeRequirements } from '../../runtime/compute-provider.js';
23
+
24
+ // ============================================================
25
+ // Input / Output Types
26
+ // ============================================================
27
+
28
+ export interface SkillEnrichmentInput {
29
+ /** The playbook to enrich into a skill */
30
+ playbook: Playbook;
31
+ /** Optional: existing hand-written SKILL.md to use as style reference */
32
+ styleReference?: string;
33
+ /** Optional: related skill names for cross-referencing */
34
+ relatedSkills?: Array<{ name: string; description: string }>;
35
+ /** Optional: domain context (e.g. "swarmkit ecosystem", "TypeScript tooling") */
36
+ domainContext?: string;
37
+ }
38
+
39
+ export interface SkillEnrichmentOutput {
40
+ /** Enriched SKILL.md frontmatter description (trigger-rich, under 1024 chars) */
41
+ description: string;
42
+ /** Enriched markdown body (replaces the procedural instructions) */
43
+ instructions: string;
44
+ /** Worked examples the agent generated */
45
+ examples: Array<{
46
+ title: string;
47
+ scenario: string;
48
+ steps: string;
49
+ }>;
50
+ /** Cross-references to related skills */
51
+ seeAlso: Array<{
52
+ name: string;
53
+ relation: string;
54
+ }>;
55
+ /** Agent's assessment of the playbook's completeness */
56
+ gaps: string[];
57
+ }
58
+
59
+ // ============================================================
60
+ // Template Implementation
61
+ // ============================================================
62
+
63
+ export const skillEnrichmentTemplate: AgenticTaskTemplate<
64
+ SkillEnrichmentInput,
65
+ SkillEnrichmentOutput
66
+ > = {
67
+ taskType: 'skill-enrichment',
68
+ domain: 'skill-publishing',
69
+ description: 'Enrich a procedurally generated SKILL.md with narrative prose, examples, and cross-references',
70
+
71
+ assessComplexity(input: SkillEnrichmentInput): AnalysisComplexity {
72
+ const pb = input.playbook;
73
+ const tacticCount = pb.guidance.tactics.length;
74
+ const hasSteps = (pb.guidance.steps?.length ?? 0) > 0;
75
+ const isComplex = pb.complexity === 'complex';
76
+
77
+ if (tacticCount <= 3 && !hasSteps && !isComplex) return 'heuristic';
78
+ if (isComplex || tacticCount > 8) return 'thorough';
79
+ return 'standard';
80
+ },
81
+
82
+ async heuristicFallback(input: SkillEnrichmentInput): Promise<SkillEnrichmentOutput> {
83
+ const skill = convertPlaybookToSkill(input.playbook);
84
+ return {
85
+ description: skill.description,
86
+ instructions: skill.instructions,
87
+ examples: [],
88
+ seeAlso: (input.relatedSkills ?? []).map((r) => ({
89
+ name: r.name,
90
+ relation: `Related: ${r.description}`,
91
+ })),
92
+ gaps: [],
93
+ };
94
+ },
95
+
96
+ async prepareWorkspace(
97
+ input: SkillEnrichmentInput,
98
+ handle: WorkspaceHandle,
99
+ ): Promise<void> {
100
+ // Write the playbook as structured JSON
101
+ await handle.writeJson('input', 'playbook.json', {
102
+ name: input.playbook.name,
103
+ applicability: input.playbook.applicability,
104
+ guidance: input.playbook.guidance,
105
+ verification: input.playbook.verification,
106
+ complexity: input.playbook.complexity,
107
+ confidence: input.playbook.confidence,
108
+ evolution: {
109
+ version: input.playbook.evolution.version,
110
+ successCount: input.playbook.evolution.successCount,
111
+ failureCount: input.playbook.evolution.failureCount,
112
+ refinements: input.playbook.evolution.refinements,
113
+ },
114
+ provenance: input.playbook.provenance,
115
+ userInvocable: input.playbook.userInvocable,
116
+ publishMetadata: input.playbook.publishMetadata,
117
+ });
118
+
119
+ // Write the procedural skeleton for the agent to improve upon
120
+ const skill = convertPlaybookToSkill(input.playbook);
121
+ await handle.writeRaw('input', 'skeleton.md', skill.instructions);
122
+ await handle.writeRaw('input', 'skeleton-description.txt', skill.description);
123
+
124
+ // Style reference (if provided)
125
+ if (input.styleReference) {
126
+ await handle.writeRaw('input', 'style-reference.md', input.styleReference);
127
+ }
128
+
129
+ // Related skills for cross-referencing
130
+ if (input.relatedSkills && input.relatedSkills.length > 0) {
131
+ await handle.writeJson('input', 'related-skills.json', input.relatedSkills);
132
+ }
133
+
134
+ // Domain context
135
+ if (input.domainContext) {
136
+ await handle.writeRaw('input', 'domain-context.txt', input.domainContext);
137
+ }
138
+ },
139
+
140
+ buildTaskPrompt(input: SkillEnrichmentInput): string {
141
+ const parts: string[] = [
142
+ `Enrich the skill "${input.playbook.name}" into a high-quality SKILL.md that an AI agent will load and follow.`,
143
+ '',
144
+ '## Inputs',
145
+ '',
146
+ '- `input/playbook.json` — the structured playbook (applicability, guidance, verification)',
147
+ '- `input/skeleton.md` — procedurally generated markdown body (your starting point)',
148
+ '- `input/skeleton-description.txt` — procedurally generated frontmatter description',
149
+ ];
150
+
151
+ if (input.styleReference) {
152
+ parts.push('- `input/style-reference.md` — a hand-written SKILL.md to match in tone and structure');
153
+ }
154
+ if (input.relatedSkills && input.relatedSkills.length > 0) {
155
+ parts.push('- `input/related-skills.json` — related skills to cross-reference');
156
+ }
157
+ if (input.domainContext) {
158
+ parts.push('- `input/domain-context.txt` — domain context for accurate terminology');
159
+ }
160
+
161
+ parts.push(
162
+ '',
163
+ '## What to produce',
164
+ '',
165
+ 'Read all inputs, then write `output/enrichment.json` with this schema:',
166
+ '',
167
+ '```json',
168
+ '{',
169
+ ' "description": "Trigger-rich frontmatter description, under 1024 chars. Structure: [What it does] + [Use when ...trigger phrases...]. Include concrete phrases users would say.",',
170
+ ' "instructions": "Full markdown body. Must include:\\n- ## When to use (bulleted)\\n- ## When not to use (bulleted)\\n- ## Workflow (numbered tactics expanded into full prose — explain WHY each step matters, not just WHAT to do)\\n- ## Inputs / ## Outputs\\n- ## Verification (bulleted success/failure indicators)\\n- ## Edge cases (narrative, with recovery paths)\\n- ## Examples (2-3 worked scenarios with concrete commands)\\n- ## See also (cross-references)",',
171
+ ' "examples": [{"title": "...", "scenario": "...", "steps": "..."}],',
172
+ ' "seeAlso": [{"name": "skill-name", "relation": "why it is related"}],',
173
+ ' "gaps": ["anything missing from the playbook that the skill should cover but cannot without more data"]',
174
+ '}',
175
+ '```',
176
+ '',
177
+ '## Guidelines',
178
+ '',
179
+ '- The skeleton is your starting point — improve it, do not discard it.',
180
+ '- Expand each tactic in the Workflow section into a paragraph that explains the rationale, not just the command.',
181
+ '- Generate 2-3 realistic examples with concrete tool calls (MCP tools, CLI commands, or library calls as appropriate for the domain).',
182
+ '- Cross-reference related skills by name in the See Also section.',
183
+ '- Flag gaps honestly — if the playbook is missing triggers, anti-patterns, or verification criteria, say so in the gaps array.',
184
+ '- Keep the description under 1024 characters. It must include trigger phrases users would actually say.',
185
+ '- Do not invent capabilities the playbook does not describe. Enrich what is there; do not fabricate.',
186
+ );
187
+
188
+ if (input.styleReference) {
189
+ parts.push(
190
+ '- Match the tone, section structure, and level of detail from the style reference.',
191
+ );
192
+ }
193
+
194
+ return parts.join('\n');
195
+ },
196
+
197
+ getSkills() { return []; },
198
+ getResources(input: SkillEnrichmentInput): ResourceSpec[] {
199
+ const resources: ResourceSpec[] = [];
200
+ if (input.domainContext) {
201
+ resources.push({
202
+ type: 'file',
203
+ path: 'domain-context.txt',
204
+ source: input.domainContext,
205
+ description: 'Domain context for accurate terminology',
206
+ });
207
+ }
208
+ return resources;
209
+ },
210
+
211
+ outputConfig: {
212
+ files: [
213
+ {
214
+ path: 'enrichment.json',
215
+ format: 'json' as const,
216
+ required: true,
217
+ description: 'Enriched skill content with description, instructions, examples, cross-refs, and gaps',
218
+ },
219
+ ],
220
+ },
221
+
222
+ async collectOutput(handle: WorkspaceHandle): Promise<SkillEnrichmentOutput> {
223
+ const raw = await handle.readJson('output', 'enrichment.json') as Record<string, unknown>;
224
+
225
+ return {
226
+ description: typeof raw.description === 'string'
227
+ ? raw.description.slice(0, 1024)
228
+ : '',
229
+ instructions: typeof raw.instructions === 'string'
230
+ ? raw.instructions
231
+ : '',
232
+ examples: Array.isArray(raw.examples)
233
+ ? (raw.examples as Array<Record<string, unknown>>).map((e) => ({
234
+ title: String(e.title ?? ''),
235
+ scenario: String(e.scenario ?? ''),
236
+ steps: String(e.steps ?? ''),
237
+ }))
238
+ : [],
239
+ seeAlso: Array.isArray(raw.seeAlso)
240
+ ? (raw.seeAlso as Array<Record<string, unknown>>).map((s) => ({
241
+ name: String(s.name ?? ''),
242
+ relation: String(s.relation ?? ''),
243
+ }))
244
+ : [],
245
+ gaps: Array.isArray(raw.gaps)
246
+ ? (raw.gaps as Array<unknown>).map(String)
247
+ : [],
248
+ };
249
+ },
250
+
251
+ async processOutput(): Promise<void> {
252
+ // Caller handles merging the enriched output back into the Skill
253
+ // and publishing via SkillPublisher.
254
+ },
255
+
256
+ computeRequirements: {
257
+ mode: 'local',
258
+ complexity: 'standard',
259
+ },
260
+
261
+ getComputeRequirements(
262
+ _input: SkillEnrichmentInput,
263
+ complexity: AnalysisComplexity,
264
+ ): ComputeRequirements {
265
+ return {
266
+ mode: 'local',
267
+ complexity,
268
+ timeout: complexity === 'thorough' ? 300_000 : 180_000,
269
+ };
270
+ },
271
+
272
+ agentType: 'claude-code',
273
+ timeout: 180_000,
274
+ captureToolCalls: true,
275
+ };
@@ -0,0 +1,185 @@
1
+ /**
2
+ * E2E: ranking-driven loadout — the canonical replacement for skill-tree's
3
+ * removed `LoadoutCriteria.minSuccessRate`.
4
+ *
5
+ * The 0.2 deprecation moved live ranking responsibility upstream of the
6
+ * `include: [...]` list. This test proves that pattern works end-to-end:
7
+ *
8
+ * 1. Build playbooks with different evolution states.
9
+ * 2. Publish all of them as skills via SkillPublisher.
10
+ * 3. Rank the playbooks externally by `getPlaybookSuccessRate`.
11
+ * 4. Pass the top-N IDs to skill-tree's LoadoutCompiler via `include`.
12
+ * 5. Verify the compiled loadout matches the ranking — no metric
13
+ * filter inside skill-tree, no snapshot drift.
14
+ *
15
+ * Real components only:
16
+ * - cognitive-core's Playbook + SkillPublisher
17
+ * - skill-tree's createSkillBank + LoadoutCompiler
18
+ * - real filesystem storage in a temp dir
19
+ */
20
+
21
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
22
+ import * as fs from 'node:fs/promises';
23
+ import * as path from 'node:path';
24
+ import * as os from 'node:os';
25
+
26
+ import {
27
+ createPlaybook,
28
+ getPlaybookSuccessRate,
29
+ type Playbook,
30
+ } from '../../src/types/playbook.js';
31
+ import { SkillPublisher } from '../../src/surfacing/skill-publisher.js';
32
+ import { createSkillBank, LoadoutCompiler } from 'skill-tree';
33
+
34
+ let testDir: string;
35
+
36
+ beforeEach(async () => {
37
+ testDir = await fs.mkdtemp(path.join(os.tmpdir(), 'cc-rank-loadout-e2e-'));
38
+ });
39
+
40
+ afterEach(async () => {
41
+ await fs.rm(testDir, { recursive: true, force: true });
42
+ });
43
+
44
+ function pb(
45
+ id: string,
46
+ successCount: number,
47
+ failureCount: number,
48
+ overrides: Partial<Playbook['guidance']> = {},
49
+ ): Playbook {
50
+ return createPlaybook({
51
+ id,
52
+ name: id,
53
+ applicability: {
54
+ situations: [`Situation for ${id}`],
55
+ triggers: [],
56
+ antiPatterns: [],
57
+ domains: ['testing'],
58
+ },
59
+ guidance: {
60
+ strategy: `Strategy for ${id}`,
61
+ tactics: [],
62
+ ...overrides,
63
+ },
64
+ verification: { successIndicators: [], failureIndicators: [] },
65
+ evolution: {
66
+ version: '1.0.0',
67
+ createdFrom: ['session-x'],
68
+ failures: [],
69
+ refinements: [],
70
+ successCount,
71
+ failureCount,
72
+ },
73
+ });
74
+ }
75
+
76
+ describe('ranking-driven loadout e2e (cognitive-core ranks → skill-tree compiles)', () => {
77
+ it('top-N by successRate flows through skill-tree compile via include list', async () => {
78
+ const bank = createSkillBank({ storage: { basePath: testDir } });
79
+ await bank.initialize();
80
+ const publisher = new SkillPublisher(bank.getStorage());
81
+
82
+ // Build playbooks with disparate success rates
83
+ const playbooks: Playbook[] = [
84
+ pb('hot-skill', 18, 2), // 0.90
85
+ pb('warm-skill', 12, 8), // 0.60
86
+ pb('cold-skill', 1, 9), // 0.10
87
+ pb('lukewarm-skill', 6, 4), // 0.60
88
+ pb('zero-skill', 0, 0), // 0.00 (untested)
89
+ ];
90
+
91
+ // Publish every playbook
92
+ for (const p of playbooks) {
93
+ const result = await publisher.publishPlaybook(p);
94
+ expect(result.published).toBe(true);
95
+ }
96
+
97
+ // Rank externally — the new pattern that replaces the removed
98
+ // LoadoutCriteria.minSuccessRate filter
99
+ const ranked = [...playbooks].sort(
100
+ (a, b) => getPlaybookSuccessRate(b) - getPlaybookSuccessRate(a),
101
+ );
102
+ const topThreeIds = ranked.slice(0, 3).map((p) => p.id);
103
+ expect(topThreeIds).toContain('hot-skill');
104
+ expect(topThreeIds).not.toContain('zero-skill');
105
+ expect(topThreeIds).not.toContain('cold-skill');
106
+
107
+ // Pass top-N to skill-tree's compiler via include + maxSkills cap.
108
+ //
109
+ // skill-tree 0.2 makes `include` a presence guarantee — included
110
+ // skills are always in the result, in the order listed. Combine
111
+ // with `maxSkills: include.length` to express "exactly these N
112
+ // skills" (the documented restrict-to-list pattern).
113
+ const compiler = new LoadoutCompiler(bank.getStorage());
114
+ const selected = await compiler.compile({
115
+ include: topThreeIds,
116
+ maxSkills: topThreeIds.length,
117
+ });
118
+
119
+ // include preserves order; we passed top-1, top-2, top-3
120
+ expect(selected.map((s) => s.id)).toEqual(topThreeIds);
121
+ });
122
+
123
+ it('include preserves caller-specified order from the ranking', async () => {
124
+ const bank = createSkillBank({ storage: { basePath: testDir } });
125
+ await bank.initialize();
126
+ const publisher = new SkillPublisher(bank.getStorage());
127
+
128
+ const playbooks: Playbook[] = [
129
+ pb('alpha', 5, 0), // 1.00
130
+ pb('beta', 3, 1), // 0.75
131
+ pb('gamma', 2, 2), // 0.50
132
+ ];
133
+ for (const p of playbooks) {
134
+ await publisher.publishPlaybook(p);
135
+ }
136
+
137
+ const ranked = [...playbooks].sort(
138
+ (a, b) => getPlaybookSuccessRate(b) - getPlaybookSuccessRate(a),
139
+ );
140
+ const orderedIds = ranked.map((p) => p.id);
141
+
142
+ // skill-tree 0.2 preserves include order — included skills appear
143
+ // at the front in the order listed. This makes include a viable
144
+ // surface for caller-driven ranking.
145
+ const compiler = new LoadoutCompiler(bank.getStorage());
146
+ const selected = await compiler.compile({ include: orderedIds });
147
+
148
+ expect(selected.map((s) => s.id)).toEqual(orderedIds);
149
+ });
150
+
151
+ it('demonstrates why the old minSuccessRate filter was lossy', async () => {
152
+ // The old skill-tree had `LoadoutCriteria.minSuccessRate` which
153
+ // filtered against `Skill.metrics.successRate`, a SNAPSHOT taken at
154
+ // publish time. If the playbook evolved after publish (more
155
+ // successes/failures), the stored snapshot would drift from reality.
156
+ //
157
+ // Now: rank live, then pass IDs. No snapshot, no drift.
158
+ const bank = createSkillBank({ storage: { basePath: testDir } });
159
+ await bank.initialize();
160
+ const publisher = new SkillPublisher(bank.getStorage());
161
+
162
+ const playbook = pb('evolving', 5, 5); // 0.50 at publish
163
+ await publisher.publishPlaybook(playbook);
164
+
165
+ // Drift: more successes after publish — playbook's live rate
166
+ // diverges from anything we could have captured at publish time
167
+ playbook.evolution.successCount = 50;
168
+ playbook.evolution.failureCount = 5;
169
+ const liveRate = getPlaybookSuccessRate(playbook);
170
+ expect(liveRate).toBeCloseTo(50 / 55, 4); // ~0.909
171
+
172
+ // Re-publishing IS the way to refresh; the stored skill content is
173
+ // a frozen snapshot, but we're not relying on metric values inside
174
+ // that snapshot anymore.
175
+ const compiler = new LoadoutCompiler(bank.getStorage());
176
+ const selected = await compiler.compile({ include: ['evolving'] });
177
+ expect(selected).toHaveLength(1);
178
+ expect((selected[0] as Record<string, unknown>).metrics).toBeUndefined();
179
+
180
+ // The "is this skill effective" decision happens upstream — the
181
+ // include caller (cognitive-core consumer) knows liveRate ~0.909
182
+ // and would include this skill in a top-N selection.
183
+ expect(liveRate).toBeGreaterThan(0.7);
184
+ });
185
+ });
@@ -0,0 +1,216 @@
1
+ /**
2
+ * E2E: cognitive-core publishing to skill-tree's FilesystemStorageAdapter.
3
+ *
4
+ * Validates the cross-repo handshake after the skill-tree 0.2 metrics
5
+ * deprecation. Uses real components throughout — no mocks:
6
+ * - real Playbook objects with evolution mutations
7
+ * - real cognitive-core SkillPublisher
8
+ * - real skill-tree FilesystemStorageAdapter (file-backed in temp dir)
9
+ * - real on-disk SKILL.md + .skilltree.json sidecar inspection
10
+ *
11
+ * The contract verified:
12
+ * 1. The published Skill has NO `metrics` field (Phase 2/3 invariant).
13
+ * 2. The on-disk artifacts contain the playbook content but no metric
14
+ * JSON in the sidecar.
15
+ * 3. Reopening the bank and retrieving via getSkill returns the same
16
+ * content with no metrics.
17
+ * 4. Live evolution data still lives on the playbook side — recording
18
+ * success after publish updates the playbook but does NOT update the
19
+ * stored skill (republish required).
20
+ */
21
+
22
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
23
+ import * as fs from 'node:fs/promises';
24
+ import * as fss from 'node:fs';
25
+ import * as path from 'node:path';
26
+ import * as os from 'node:os';
27
+
28
+ import {
29
+ createPlaybook,
30
+ getPlaybookSuccessRate,
31
+ type Playbook,
32
+ } from '../../src/types/playbook.js';
33
+ import { SkillPublisher, convertPlaybookToSkill } from '../../src/surfacing/skill-publisher.js';
34
+ import { createSkillBank } from 'skill-tree';
35
+
36
+ let testDir: string;
37
+
38
+ beforeEach(async () => {
39
+ testDir = await fs.mkdtemp(path.join(os.tmpdir(), 'cc-publish-fs-e2e-'));
40
+ });
41
+
42
+ afterEach(async () => {
43
+ await fs.rm(testDir, { recursive: true, force: true });
44
+ });
45
+
46
+ function makePlaybook(overrides: Partial<Playbook> = {}): Playbook {
47
+ return createPlaybook({
48
+ id: overrides.id ?? 'pb-test',
49
+ name: overrides.name ?? 'fix-typescript-imports',
50
+ applicability: {
51
+ situations: ['TypeScript ESM build emits "Cannot find module" errors'],
52
+ triggers: ['build error', 'esm migration'],
53
+ antiPatterns: ['CommonJS-only project'],
54
+ domains: ['typescript', 'esm'],
55
+ ...overrides.applicability,
56
+ },
57
+ guidance: {
58
+ strategy: 'Add .js extensions to all relative imports',
59
+ tactics: ['Use codemod or sed -i', 'Verify with tsc --noEmit'],
60
+ ...overrides.guidance,
61
+ },
62
+ verification: {
63
+ successIndicators: ['tsc --noEmit passes', 'tests run'],
64
+ failureIndicators: [],
65
+ ...overrides.verification,
66
+ },
67
+ evolution: {
68
+ version: '1.0.0',
69
+ createdFrom: ['session-init'],
70
+ failures: [],
71
+ refinements: [],
72
+ successCount: 8,
73
+ failureCount: 2,
74
+ lastUsed: new Date('2025-06-01T12:00:00Z'),
75
+ ...overrides.evolution,
76
+ },
77
+ confidence: overrides.confidence ?? 0.85,
78
+ ...overrides,
79
+ });
80
+ }
81
+
82
+ describe('cognitive-core → skill-tree filesystem publish e2e', () => {
83
+ it('publishes a playbook to filesystem storage with no metrics field on disk', async () => {
84
+ const bank = createSkillBank({ storage: { basePath: testDir } });
85
+ await bank.initialize();
86
+ const publisher = new SkillPublisher(bank.getStorage());
87
+
88
+ const playbook = makePlaybook();
89
+ const result = await publisher.publishPlaybook(playbook);
90
+ expect(result.published).toBe(true);
91
+ expect(result.skillId).toBe(playbook.id);
92
+
93
+ // Inspect the on-disk artifacts
94
+ const skillsRoot = path.join(testDir, '.skilltree', 'skills', playbook.id);
95
+ expect(fss.existsSync(skillsRoot)).toBe(true);
96
+
97
+ const skillMd = await fs.readFile(path.join(skillsRoot, 'SKILL.md'), 'utf-8');
98
+ // Frontmatter must NOT carry any metric fields
99
+ expect(skillMd).not.toMatch(/^successRate:/m);
100
+ expect(skillMd).not.toMatch(/^usageCount:/m);
101
+ expect(skillMd).not.toMatch(/^feedbackScores:/m);
102
+ expect(skillMd).not.toMatch(/^lastUsed:/m);
103
+ // Body should carry the playbook's strategy
104
+ expect(skillMd).toContain('Add .js extensions');
105
+
106
+ // Sidecar carries source/lineage but no metrics block
107
+ const sidecar = JSON.parse(
108
+ await fs.readFile(path.join(skillsRoot, '.skilltree.json'), 'utf-8'),
109
+ );
110
+ expect(sidecar.metrics).toBeUndefined();
111
+ expect(sidecar.source?.type).toBe('extracted');
112
+
113
+ await bank.shutdown();
114
+ });
115
+
116
+ it('roundtrips through getSkill without exposing metrics', async () => {
117
+ const bank0 = createSkillBank({ storage: { basePath: testDir } });
118
+ await bank0.initialize();
119
+ const publisher = new SkillPublisher(bank0.getStorage());
120
+
121
+ const playbook = makePlaybook({ id: 'pb-roundtrip' });
122
+ await publisher.publishPlaybook(playbook);
123
+
124
+ // Reopen via SkillBank and retrieve
125
+ const bank = createSkillBank({ storage: { basePath: testDir } });
126
+ await bank.initialize();
127
+ const skill = await bank.getSkill('pb-roundtrip');
128
+
129
+ expect(skill).not.toBeNull();
130
+ expect(skill!.id).toBe('pb-roundtrip');
131
+ expect(skill!.name).toBe('fix-typescript-imports');
132
+ expect(skill!.instructions).toContain('Add .js extensions');
133
+ // Skill-tree 0.2 has no metrics on the type at all
134
+ expect((skill as Record<string, unknown>).metrics).toBeUndefined();
135
+
136
+ await bank.shutdown();
137
+ });
138
+
139
+ it('keeps live evolution data on the playbook after publish (no skill-side mutation on recordSuccess)', async () => {
140
+ const bank0 = createSkillBank({ storage: { basePath: testDir } });
141
+ await bank0.initialize();
142
+ const publisher = new SkillPublisher(bank0.getStorage());
143
+
144
+ const playbook = makePlaybook({ id: 'pb-live-evolution' });
145
+ await publisher.publishPlaybook(playbook);
146
+
147
+ const initialRate = getPlaybookSuccessRate(playbook);
148
+ expect(initialRate).toBe(0.8); // 8 / (8 + 2)
149
+
150
+ // Mutate playbook (this would normally happen via PlaybookLibrary.recordSuccess)
151
+ playbook.evolution.successCount += 5;
152
+ playbook.evolution.lastUsed = new Date();
153
+ playbook.confidence = 0.92;
154
+
155
+ const newRate = getPlaybookSuccessRate(playbook);
156
+ expect(newRate).toBeCloseTo(13 / 15, 4);
157
+
158
+ // Without re-publishing, the stored skill is unchanged — it never
159
+ // tracked the metric values in the first place
160
+ const bank = createSkillBank({ storage: { basePath: testDir } });
161
+ await bank.initialize();
162
+ const skill = await bank.getSkill('pb-live-evolution');
163
+ expect(skill).not.toBeNull();
164
+ expect((skill as Record<string, unknown>).metrics).toBeUndefined();
165
+ // Content is unchanged (same playbook strategy text)
166
+ expect(skill!.instructions).toContain('Add .js extensions');
167
+ await bank.shutdown();
168
+ });
169
+
170
+ it('republishing after evolution updates updates the on-disk skill content', async () => {
171
+ const bank0 = createSkillBank({ storage: { basePath: testDir } });
172
+ await bank0.initialize();
173
+ const publisher = new SkillPublisher(bank0.getStorage());
174
+
175
+ const playbook = makePlaybook({ id: 'pb-republish' });
176
+ await publisher.publishPlaybook(playbook);
177
+
178
+ // Evolve the playbook — add a refinement and bump version
179
+ playbook.evolution.version = '1.1.0';
180
+ playbook.evolution.refinements.push({
181
+ context: 'mixed CJS/ESM project',
182
+ addition: 'Run tsc with --moduleResolution bundler',
183
+ addedAt: new Date(),
184
+ source: 'success',
185
+ });
186
+ playbook.guidance.strategy = 'Add .js extensions; for bundler-resolution projects, prefer the bundler moduleResolution flag';
187
+
188
+ const update = await publisher.publishPlaybook(playbook);
189
+ expect(update.published).toBe(true);
190
+ expect(update.isUpdate).toBe(true);
191
+ expect(update.version).toBe('1.1.0');
192
+
193
+ const bank = createSkillBank({ storage: { basePath: testDir } });
194
+ await bank.initialize();
195
+ const skill = await bank.getSkill('pb-republish');
196
+ expect(skill!.version).toBe('1.1.0');
197
+ expect(skill!.instructions).toContain('bundler moduleResolution');
198
+ expect(skill!.instructions).toContain('Refinements:');
199
+ await bank.shutdown();
200
+ });
201
+
202
+ it('convertPlaybookToSkill does not emit a metrics field at the type level', () => {
203
+ const playbook = makePlaybook({ id: 'pb-shape' });
204
+ const skill = convertPlaybookToSkill(playbook);
205
+
206
+ // Belt-and-suspenders: even at the conversion boundary there should
207
+ // be no metrics field. (TypeScript already enforces this; the
208
+ // runtime check guards against accidental object-spread regressions.)
209
+ expect((skill as Record<string, unknown>).metrics).toBeUndefined();
210
+ expect(skill.id).toBe('pb-shape');
211
+ expect(skill.name).toBe('fix-typescript-imports');
212
+ expect(skill.author).toBe('cognitive-core');
213
+ expect(skill.tags).toEqual(['typescript', 'esm']);
214
+ expect(skill.serving?.tokenEstimate).toBeGreaterThan(0);
215
+ });
216
+ });