npm - cognitive-core - Versions diffs - 0.2.3 → 0.2.5 - Mend

cognitive-core 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/dist/surfacing/skill-publisher.d.ts +3 -3
package/dist/surfacing/skill-publisher.d.ts.map +1 -1
package/dist/surfacing/skill-publisher.js +90 -41
package/dist/surfacing/skill-publisher.js.map +1 -1
package/dist/surfacing/sqlite-storage-adapter.d.ts +2 -2
package/dist/surfacing/sqlite-storage-adapter.d.ts.map +1 -1
package/dist/surfacing/sqlite-storage-adapter.js +15 -10
package/dist/surfacing/sqlite-storage-adapter.js.map +1 -1
package/dist/types/playbook.d.ts +11 -0
package/dist/types/playbook.d.ts.map +1 -1
package/dist/types/playbook.js +2 -0
package/dist/types/playbook.js.map +1 -1
package/dist/workspace/templates/index.d.ts +1 -0
package/dist/workspace/templates/index.d.ts.map +1 -1
package/dist/workspace/templates/index.js +2 -0
package/dist/workspace/templates/index.js.map +1 -1
package/dist/workspace/templates/skill-enrichment.d.ts +48 -0
package/dist/workspace/templates/skill-enrichment.d.ts.map +1 -0
package/dist/workspace/templates/skill-enrichment.js +175 -0
package/dist/workspace/templates/skill-enrichment.js.map +1 -0
package/package.json +4 -4
package/src/surfacing/skill-publisher.ts +116 -49
package/src/surfacing/sqlite-storage-adapter.ts +14 -12
package/src/types/playbook.ts +15 -0
package/src/workspace/templates/index.ts +7 -0
package/src/workspace/templates/skill-enrichment.ts +275 -0
package/tests/integration/ranking-driven-loadout-e2e.test.ts +185 -0
package/tests/integration/skill-publishing-filesystem-e2e.test.ts +216 -0
package/tests/surfacing/skill-publisher.test.ts +86 -18
package/tests/surfacing/sqlite-storage-adapter.test.ts +0 -9

package/src/workspace/templates/skill-enrichment.ts ADDED Viewed

@@ -0,0 +1,275 @@
+/**
+ * Skill Enrichment Template
+ *
+ * Agent-in-the-loop step between playbook extraction and skill publishing.
+ * Given a Playbook and its procedurally generated SKILL.md skeleton, an agent
+ * enriches the skill with narrative prose, worked examples, cross-references,
+ * and a trigger-rich description — things a procedural converter cannot produce.
+ *
+ * Complexity routing:
+ * - Simple playbooks (<=3 tactics, low complexity) → heuristic: keep procedural output as-is
+ * - Standard/complex playbooks → agent enrichment
+ */
+import type { WorkspaceHandle } from 'agent-workspace';
+import type { Playbook } from '../../types/index.js';
+import { convertPlaybookToSkill } from '../../surfacing/skill-publisher.js';
+import type {
+  AgenticTaskTemplate,
+  AnalysisComplexity,
+  ResourceSpec,
+} from '../types.js';
+import type { ComputeRequirements } from '../../runtime/compute-provider.js';
+// ============================================================
+// Input / Output Types
+// ============================================================
+export interface SkillEnrichmentInput {
+  /** The playbook to enrich into a skill */
+  playbook: Playbook;
+  /** Optional: existing hand-written SKILL.md to use as style reference */
+  styleReference?: string;
+  /** Optional: related skill names for cross-referencing */
+  relatedSkills?: Array<{ name: string; description: string }>;
+  /** Optional: domain context (e.g. "swarmkit ecosystem", "TypeScript tooling") */
+  domainContext?: string;
+}
+export interface SkillEnrichmentOutput {
+  /** Enriched SKILL.md frontmatter description (trigger-rich, under 1024 chars) */
+  description: string;
+  /** Enriched markdown body (replaces the procedural instructions) */
+  instructions: string;
+  /** Worked examples the agent generated */
+  examples: Array<{
+    title: string;
+    scenario: string;
+    steps: string;
+  }>;
+  /** Cross-references to related skills */
+  seeAlso: Array<{
+    name: string;
+    relation: string;
+  }>;
+  /** Agent's assessment of the playbook's completeness */
+  gaps: string[];
+}
+// ============================================================
+// Template Implementation
+// ============================================================
+export const skillEnrichmentTemplate: AgenticTaskTemplate<
+  SkillEnrichmentInput,
+  SkillEnrichmentOutput
+> = {
+  taskType: 'skill-enrichment',
+  domain: 'skill-publishing',
+  description: 'Enrich a procedurally generated SKILL.md with narrative prose, examples, and cross-references',
+  assessComplexity(input: SkillEnrichmentInput): AnalysisComplexity {
+    const pb = input.playbook;
+    const tacticCount = pb.guidance.tactics.length;
+    const hasSteps = (pb.guidance.steps?.length ?? 0) > 0;
+    const isComplex = pb.complexity === 'complex';
+    if (tacticCount <= 3 && !hasSteps && !isComplex) return 'heuristic';
+    if (isComplex || tacticCount > 8) return 'thorough';
+    return 'standard';
+  },
+  async heuristicFallback(input: SkillEnrichmentInput): Promise<SkillEnrichmentOutput> {
+    const skill = convertPlaybookToSkill(input.playbook);
+    return {
+      description: skill.description,
+      instructions: skill.instructions,
+      examples: [],
+      seeAlso: (input.relatedSkills ?? []).map((r) => ({
+        name: r.name,
+        relation: `Related: ${r.description}`,
+      })),
+      gaps: [],
+    };
+  },
+  async prepareWorkspace(
+    input: SkillEnrichmentInput,
+    handle: WorkspaceHandle,
+  ): Promise<void> {
+    // Write the playbook as structured JSON
+    await handle.writeJson('input', 'playbook.json', {
+      name: input.playbook.name,
+      applicability: input.playbook.applicability,
+      guidance: input.playbook.guidance,
+      verification: input.playbook.verification,
+      complexity: input.playbook.complexity,
+      confidence: input.playbook.confidence,
+      evolution: {
+        version: input.playbook.evolution.version,
+        successCount: input.playbook.evolution.successCount,
+        failureCount: input.playbook.evolution.failureCount,
+        refinements: input.playbook.evolution.refinements,
+      },
+      provenance: input.playbook.provenance,
+      userInvocable: input.playbook.userInvocable,
+      publishMetadata: input.playbook.publishMetadata,
+    });
+    // Write the procedural skeleton for the agent to improve upon
+    const skill = convertPlaybookToSkill(input.playbook);
+    await handle.writeRaw('input', 'skeleton.md', skill.instructions);
+    await handle.writeRaw('input', 'skeleton-description.txt', skill.description);
+    // Style reference (if provided)
+    if (input.styleReference) {
+      await handle.writeRaw('input', 'style-reference.md', input.styleReference);
+    }
+    // Related skills for cross-referencing
+    if (input.relatedSkills && input.relatedSkills.length > 0) {
+      await handle.writeJson('input', 'related-skills.json', input.relatedSkills);
+    }
+    // Domain context
+    if (input.domainContext) {
+      await handle.writeRaw('input', 'domain-context.txt', input.domainContext);
+    }
+  },
+  buildTaskPrompt(input: SkillEnrichmentInput): string {
+    const parts: string[] = [
+      `Enrich the skill "${input.playbook.name}" into a high-quality SKILL.md that an AI agent will load and follow.`,
+      '',
+      '## Inputs',
+      '',
+      '- `input/playbook.json` — the structured playbook (applicability, guidance, verification)',
+      '- `input/skeleton.md` — procedurally generated markdown body (your starting point)',
+      '- `input/skeleton-description.txt` — procedurally generated frontmatter description',
+    ];
+    if (input.styleReference) {
+      parts.push('- `input/style-reference.md` — a hand-written SKILL.md to match in tone and structure');
+    }
+    if (input.relatedSkills && input.relatedSkills.length > 0) {
+      parts.push('- `input/related-skills.json` — related skills to cross-reference');
+    }
+    if (input.domainContext) {
+      parts.push('- `input/domain-context.txt` — domain context for accurate terminology');
+    }
+    parts.push(
+      '',
+      '## What to produce',
+      '',
+      'Read all inputs, then write `output/enrichment.json` with this schema:',
+      '',
+      '```json',
+      '{',
+      '  "description": "Trigger-rich frontmatter description, under 1024 chars. Structure: [What it does] + [Use when ...trigger phrases...]. Include concrete phrases users would say.",',
+      '  "instructions": "Full markdown body. Must include:\\n- ## When to use (bulleted)\\n- ## When not to use (bulleted)\\n- ## Workflow (numbered tactics expanded into full prose — explain WHY each step matters, not just WHAT to do)\\n- ## Inputs / ## Outputs\\n- ## Verification (bulleted success/failure indicators)\\n- ## Edge cases (narrative, with recovery paths)\\n- ## Examples (2-3 worked scenarios with concrete commands)\\n- ## See also (cross-references)",',
+      '  "examples": [{"title": "...", "scenario": "...", "steps": "..."}],',
+      '  "seeAlso": [{"name": "skill-name", "relation": "why it is related"}],',
+      '  "gaps": ["anything missing from the playbook that the skill should cover but cannot without more data"]',
+      '}',
+      '```',
+      '',
+      '## Guidelines',
+      '',
+      '- The skeleton is your starting point — improve it, do not discard it.',
+      '- Expand each tactic in the Workflow section into a paragraph that explains the rationale, not just the command.',
+      '- Generate 2-3 realistic examples with concrete tool calls (MCP tools, CLI commands, or library calls as appropriate for the domain).',
+      '- Cross-reference related skills by name in the See Also section.',
+      '- Flag gaps honestly — if the playbook is missing triggers, anti-patterns, or verification criteria, say so in the gaps array.',
+      '- Keep the description under 1024 characters. It must include trigger phrases users would actually say.',
+      '- Do not invent capabilities the playbook does not describe. Enrich what is there; do not fabricate.',
+    );
+    if (input.styleReference) {
+      parts.push(
+        '- Match the tone, section structure, and level of detail from the style reference.',
+      );
+    }
+    return parts.join('\n');
+  },
+  getSkills() { return []; },
+  getResources(input: SkillEnrichmentInput): ResourceSpec[] {
+    const resources: ResourceSpec[] = [];
+    if (input.domainContext) {
+      resources.push({
+        type: 'file',
+        path: 'domain-context.txt',
+        source: input.domainContext,
+        description: 'Domain context for accurate terminology',
+      });
+    }
+    return resources;
+  },
+  outputConfig: {
+    files: [
+      {
+        path: 'enrichment.json',
+        format: 'json' as const,
+        required: true,
+        description: 'Enriched skill content with description, instructions, examples, cross-refs, and gaps',
+      },
+    ],
+  },
+  async collectOutput(handle: WorkspaceHandle): Promise<SkillEnrichmentOutput> {
+    const raw = await handle.readJson('output', 'enrichment.json') as Record<string, unknown>;
+    return {
+      description: typeof raw.description === 'string'
+        ? raw.description.slice(0, 1024)
+        : '',
+      instructions: typeof raw.instructions === 'string'
+        ? raw.instructions
+        : '',
+      examples: Array.isArray(raw.examples)
+        ? (raw.examples as Array<Record<string, unknown>>).map((e) => ({
+          title: String(e.title ?? ''),
+          scenario: String(e.scenario ?? ''),
+          steps: String(e.steps ?? ''),
+        }))
+        : [],
+      seeAlso: Array.isArray(raw.seeAlso)
+        ? (raw.seeAlso as Array<Record<string, unknown>>).map((s) => ({
+          name: String(s.name ?? ''),
+          relation: String(s.relation ?? ''),
+        }))
+        : [],
+      gaps: Array.isArray(raw.gaps)
+        ? (raw.gaps as Array<unknown>).map(String)
+        : [],
+    };
+  },
+  async processOutput(): Promise<void> {
+    // Caller handles merging the enriched output back into the Skill
+    // and publishing via SkillPublisher.
+  },
+  computeRequirements: {
+    mode: 'local',
+    complexity: 'standard',
+  },
+  getComputeRequirements(
+    _input: SkillEnrichmentInput,
+    complexity: AnalysisComplexity,
+  ): ComputeRequirements {
+    return {
+      mode: 'local',
+      complexity,
+      timeout: complexity === 'thorough' ? 300_000 : 180_000,
+    };
+  },
+  agentType: 'claude-code',
+  timeout: 180_000,
+  captureToolCalls: true,
+};

package/tests/integration/ranking-driven-loadout-e2e.test.ts ADDED Viewed

@@ -0,0 +1,185 @@
+/**
+ * E2E: ranking-driven loadout — the canonical replacement for skill-tree's
+ * removed `LoadoutCriteria.minSuccessRate`.
+ *
+ * The 0.2 deprecation moved live ranking responsibility upstream of the
+ * `include: [...]` list. This test proves that pattern works end-to-end:
+ *
+ *   1. Build playbooks with different evolution states.
+ *   2. Publish all of them as skills via SkillPublisher.
+ *   3. Rank the playbooks externally by `getPlaybookSuccessRate`.
+ *   4. Pass the top-N IDs to skill-tree's LoadoutCompiler via `include`.
+ *   5. Verify the compiled loadout matches the ranking — no metric
+ *      filter inside skill-tree, no snapshot drift.
+ *
+ * Real components only:
+ *   - cognitive-core's Playbook + SkillPublisher
+ *   - skill-tree's createSkillBank + LoadoutCompiler
+ *   - real filesystem storage in a temp dir
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'node:fs/promises';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import {
+  createPlaybook,
+  getPlaybookSuccessRate,
+  type Playbook,
+} from '../../src/types/playbook.js';
+import { SkillPublisher } from '../../src/surfacing/skill-publisher.js';
+import { createSkillBank, LoadoutCompiler } from 'skill-tree';
+let testDir: string;
+beforeEach(async () => {
+  testDir = await fs.mkdtemp(path.join(os.tmpdir(), 'cc-rank-loadout-e2e-'));
+});
+afterEach(async () => {
+  await fs.rm(testDir, { recursive: true, force: true });
+});
+function pb(
+  id: string,
+  successCount: number,
+  failureCount: number,
+  overrides: Partial<Playbook['guidance']> = {},
+): Playbook {
+  return createPlaybook({
+    id,
+    name: id,
+    applicability: {
+      situations: [`Situation for ${id}`],
+      triggers: [],
+      antiPatterns: [],
+      domains: ['testing'],
+    },
+    guidance: {
+      strategy: `Strategy for ${id}`,
+      tactics: [],
+      ...overrides,
+    },
+    verification: { successIndicators: [], failureIndicators: [] },
+    evolution: {
+      version: '1.0.0',
+      createdFrom: ['session-x'],
+      failures: [],
+      refinements: [],
+      successCount,
+      failureCount,
+    },
+  });
+}
+describe('ranking-driven loadout e2e (cognitive-core ranks → skill-tree compiles)', () => {
+  it('top-N by successRate flows through skill-tree compile via include list', async () => {
+    const bank = createSkillBank({ storage: { basePath: testDir } });
+    await bank.initialize();
+    const publisher = new SkillPublisher(bank.getStorage());
+    // Build playbooks with disparate success rates
+    const playbooks: Playbook[] = [
+      pb('hot-skill', 18, 2),       // 0.90
+      pb('warm-skill', 12, 8),      // 0.60
+      pb('cold-skill', 1, 9),       // 0.10
+      pb('lukewarm-skill', 6, 4),   // 0.60
+      pb('zero-skill', 0, 0),       // 0.00 (untested)
+    ];
+    // Publish every playbook
+    for (const p of playbooks) {
+      const result = await publisher.publishPlaybook(p);
+      expect(result.published).toBe(true);
+    }
+    // Rank externally — the new pattern that replaces the removed
+    // LoadoutCriteria.minSuccessRate filter
+    const ranked = [...playbooks].sort(
+      (a, b) => getPlaybookSuccessRate(b) - getPlaybookSuccessRate(a),
+    );
+    const topThreeIds = ranked.slice(0, 3).map((p) => p.id);
+    expect(topThreeIds).toContain('hot-skill');
+    expect(topThreeIds).not.toContain('zero-skill');
+    expect(topThreeIds).not.toContain('cold-skill');
+    // Pass top-N to skill-tree's compiler via include + maxSkills cap.
+    //
+    // skill-tree 0.2 makes `include` a presence guarantee — included
+    // skills are always in the result, in the order listed. Combine
+    // with `maxSkills: include.length` to express "exactly these N
+    // skills" (the documented restrict-to-list pattern).
+    const compiler = new LoadoutCompiler(bank.getStorage());
+    const selected = await compiler.compile({
+      include: topThreeIds,
+      maxSkills: topThreeIds.length,
+    });
+    // include preserves order; we passed top-1, top-2, top-3
+    expect(selected.map((s) => s.id)).toEqual(topThreeIds);
+  });
+  it('include preserves caller-specified order from the ranking', async () => {
+    const bank = createSkillBank({ storage: { basePath: testDir } });
+    await bank.initialize();
+    const publisher = new SkillPublisher(bank.getStorage());
+    const playbooks: Playbook[] = [
+      pb('alpha', 5, 0),  // 1.00
+      pb('beta', 3, 1),   // 0.75
+      pb('gamma', 2, 2),  // 0.50
+    ];
+    for (const p of playbooks) {
+      await publisher.publishPlaybook(p);
+    }
+    const ranked = [...playbooks].sort(
+      (a, b) => getPlaybookSuccessRate(b) - getPlaybookSuccessRate(a),
+    );
+    const orderedIds = ranked.map((p) => p.id);
+    // skill-tree 0.2 preserves include order — included skills appear
+    // at the front in the order listed. This makes include a viable
+    // surface for caller-driven ranking.
+    const compiler = new LoadoutCompiler(bank.getStorage());
+    const selected = await compiler.compile({ include: orderedIds });
+    expect(selected.map((s) => s.id)).toEqual(orderedIds);
+  });
+  it('demonstrates why the old minSuccessRate filter was lossy', async () => {
+    // The old skill-tree had `LoadoutCriteria.minSuccessRate` which
+    // filtered against `Skill.metrics.successRate`, a SNAPSHOT taken at
+    // publish time. If the playbook evolved after publish (more
+    // successes/failures), the stored snapshot would drift from reality.
+    //
+    // Now: rank live, then pass IDs. No snapshot, no drift.
+    const bank = createSkillBank({ storage: { basePath: testDir } });
+    await bank.initialize();
+    const publisher = new SkillPublisher(bank.getStorage());
+    const playbook = pb('evolving', 5, 5); // 0.50 at publish
+    await publisher.publishPlaybook(playbook);
+    // Drift: more successes after publish — playbook's live rate
+    // diverges from anything we could have captured at publish time
+    playbook.evolution.successCount = 50;
+    playbook.evolution.failureCount = 5;
+    const liveRate = getPlaybookSuccessRate(playbook);
+    expect(liveRate).toBeCloseTo(50 / 55, 4); // ~0.909
+    // Re-publishing IS the way to refresh; the stored skill content is
+    // a frozen snapshot, but we're not relying on metric values inside
+    // that snapshot anymore.
+    const compiler = new LoadoutCompiler(bank.getStorage());
+    const selected = await compiler.compile({ include: ['evolving'] });
+    expect(selected).toHaveLength(1);
+    expect((selected[0] as Record<string, unknown>).metrics).toBeUndefined();
+    // The "is this skill effective" decision happens upstream — the
+    // include caller (cognitive-core consumer) knows liveRate ~0.909
+    // and would include this skill in a top-N selection.
+    expect(liveRate).toBeGreaterThan(0.7);
+  });
+});

package/tests/integration/skill-publishing-filesystem-e2e.test.ts ADDED Viewed

@@ -0,0 +1,216 @@
+/**
+ * E2E: cognitive-core publishing to skill-tree's FilesystemStorageAdapter.
+ *
+ * Validates the cross-repo handshake after the skill-tree 0.2 metrics
+ * deprecation. Uses real components throughout — no mocks:
+ *   - real Playbook objects with evolution mutations
+ *   - real cognitive-core SkillPublisher
+ *   - real skill-tree FilesystemStorageAdapter (file-backed in temp dir)
+ *   - real on-disk SKILL.md + .skilltree.json sidecar inspection
+ *
+ * The contract verified:
+ *   1. The published Skill has NO `metrics` field (Phase 2/3 invariant).
+ *   2. The on-disk artifacts contain the playbook content but no metric
+ *      JSON in the sidecar.
+ *   3. Reopening the bank and retrieving via getSkill returns the same
+ *      content with no metrics.
+ *   4. Live evolution data still lives on the playbook side — recording
+ *      success after publish updates the playbook but does NOT update the
+ *      stored skill (republish required).
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'node:fs/promises';
+import * as fss from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import {
+  createPlaybook,
+  getPlaybookSuccessRate,
+  type Playbook,
+} from '../../src/types/playbook.js';
+import { SkillPublisher, convertPlaybookToSkill } from '../../src/surfacing/skill-publisher.js';
+import { createSkillBank } from 'skill-tree';
+let testDir: string;
+beforeEach(async () => {
+  testDir = await fs.mkdtemp(path.join(os.tmpdir(), 'cc-publish-fs-e2e-'));
+});
+afterEach(async () => {
+  await fs.rm(testDir, { recursive: true, force: true });
+});
+function makePlaybook(overrides: Partial<Playbook> = {}): Playbook {
+  return createPlaybook({
+    id: overrides.id ?? 'pb-test',
+    name: overrides.name ?? 'fix-typescript-imports',
+    applicability: {
+      situations: ['TypeScript ESM build emits "Cannot find module" errors'],
+      triggers: ['build error', 'esm migration'],
+      antiPatterns: ['CommonJS-only project'],
+      domains: ['typescript', 'esm'],
+      ...overrides.applicability,
+    },
+    guidance: {
+      strategy: 'Add .js extensions to all relative imports',
+      tactics: ['Use codemod or sed -i', 'Verify with tsc --noEmit'],
+      ...overrides.guidance,
+    },
+    verification: {
+      successIndicators: ['tsc --noEmit passes', 'tests run'],
+      failureIndicators: [],
+      ...overrides.verification,
+    },
+    evolution: {
+      version: '1.0.0',
+      createdFrom: ['session-init'],
+      failures: [],
+      refinements: [],
+      successCount: 8,
+      failureCount: 2,
+      lastUsed: new Date('2025-06-01T12:00:00Z'),
+      ...overrides.evolution,
+    },
+    confidence: overrides.confidence ?? 0.85,
+    ...overrides,
+  });
+}
+describe('cognitive-core → skill-tree filesystem publish e2e', () => {
+  it('publishes a playbook to filesystem storage with no metrics field on disk', async () => {
+    const bank = createSkillBank({ storage: { basePath: testDir } });
+    await bank.initialize();
+    const publisher = new SkillPublisher(bank.getStorage());
+    const playbook = makePlaybook();
+    const result = await publisher.publishPlaybook(playbook);
+    expect(result.published).toBe(true);
+    expect(result.skillId).toBe(playbook.id);
+    // Inspect the on-disk artifacts
+    const skillsRoot = path.join(testDir, '.skilltree', 'skills', playbook.id);
+    expect(fss.existsSync(skillsRoot)).toBe(true);
+    const skillMd = await fs.readFile(path.join(skillsRoot, 'SKILL.md'), 'utf-8');
+    // Frontmatter must NOT carry any metric fields
+    expect(skillMd).not.toMatch(/^successRate:/m);
+    expect(skillMd).not.toMatch(/^usageCount:/m);
+    expect(skillMd).not.toMatch(/^feedbackScores:/m);
+    expect(skillMd).not.toMatch(/^lastUsed:/m);
+    // Body should carry the playbook's strategy
+    expect(skillMd).toContain('Add .js extensions');
+    // Sidecar carries source/lineage but no metrics block
+    const sidecar = JSON.parse(
+      await fs.readFile(path.join(skillsRoot, '.skilltree.json'), 'utf-8'),
+    );
+    expect(sidecar.metrics).toBeUndefined();
+    expect(sidecar.source?.type).toBe('extracted');
+    await bank.shutdown();
+  });
+  it('roundtrips through getSkill without exposing metrics', async () => {
+    const bank0 = createSkillBank({ storage: { basePath: testDir } });
+    await bank0.initialize();
+    const publisher = new SkillPublisher(bank0.getStorage());
+    const playbook = makePlaybook({ id: 'pb-roundtrip' });
+    await publisher.publishPlaybook(playbook);
+    // Reopen via SkillBank and retrieve
+    const bank = createSkillBank({ storage: { basePath: testDir } });
+    await bank.initialize();
+    const skill = await bank.getSkill('pb-roundtrip');
+    expect(skill).not.toBeNull();
+    expect(skill!.id).toBe('pb-roundtrip');
+    expect(skill!.name).toBe('fix-typescript-imports');
+    expect(skill!.instructions).toContain('Add .js extensions');
+    // Skill-tree 0.2 has no metrics on the type at all
+    expect((skill as Record<string, unknown>).metrics).toBeUndefined();
+    await bank.shutdown();
+  });
+  it('keeps live evolution data on the playbook after publish (no skill-side mutation on recordSuccess)', async () => {
+    const bank0 = createSkillBank({ storage: { basePath: testDir } });
+    await bank0.initialize();
+    const publisher = new SkillPublisher(bank0.getStorage());
+    const playbook = makePlaybook({ id: 'pb-live-evolution' });
+    await publisher.publishPlaybook(playbook);
+    const initialRate = getPlaybookSuccessRate(playbook);
+    expect(initialRate).toBe(0.8); // 8 / (8 + 2)
+    // Mutate playbook (this would normally happen via PlaybookLibrary.recordSuccess)
+    playbook.evolution.successCount += 5;
+    playbook.evolution.lastUsed = new Date();
+    playbook.confidence = 0.92;
+    const newRate = getPlaybookSuccessRate(playbook);
+    expect(newRate).toBeCloseTo(13 / 15, 4);
+    // Without re-publishing, the stored skill is unchanged — it never
+    // tracked the metric values in the first place
+    const bank = createSkillBank({ storage: { basePath: testDir } });
+    await bank.initialize();
+    const skill = await bank.getSkill('pb-live-evolution');
+    expect(skill).not.toBeNull();
+    expect((skill as Record<string, unknown>).metrics).toBeUndefined();
+    // Content is unchanged (same playbook strategy text)
+    expect(skill!.instructions).toContain('Add .js extensions');
+    await bank.shutdown();
+  });
+  it('republishing after evolution updates updates the on-disk skill content', async () => {
+    const bank0 = createSkillBank({ storage: { basePath: testDir } });
+    await bank0.initialize();
+    const publisher = new SkillPublisher(bank0.getStorage());
+    const playbook = makePlaybook({ id: 'pb-republish' });
+    await publisher.publishPlaybook(playbook);
+    // Evolve the playbook — add a refinement and bump version
+    playbook.evolution.version = '1.1.0';
+    playbook.evolution.refinements.push({
+      context: 'mixed CJS/ESM project',
+      addition: 'Run tsc with --moduleResolution bundler',
+      addedAt: new Date(),
+      source: 'success',
+    });
+    playbook.guidance.strategy = 'Add .js extensions; for bundler-resolution projects, prefer the bundler moduleResolution flag';
+    const update = await publisher.publishPlaybook(playbook);
+    expect(update.published).toBe(true);
+    expect(update.isUpdate).toBe(true);
+    expect(update.version).toBe('1.1.0');
+    const bank = createSkillBank({ storage: { basePath: testDir } });
+    await bank.initialize();
+    const skill = await bank.getSkill('pb-republish');
+    expect(skill!.version).toBe('1.1.0');
+    expect(skill!.instructions).toContain('bundler moduleResolution');
+    expect(skill!.instructions).toContain('Refinements:');
+    await bank.shutdown();
+  });
+  it('convertPlaybookToSkill does not emit a metrics field at the type level', () => {
+    const playbook = makePlaybook({ id: 'pb-shape' });
+    const skill = convertPlaybookToSkill(playbook);
+    // Belt-and-suspenders: even at the conversion boundary there should
+    // be no metrics field. (TypeScript already enforces this; the
+    // runtime check guards against accidental object-spread regressions.)
+    expect((skill as Record<string, unknown>).metrics).toBeUndefined();
+    expect(skill.id).toBe('pb-shape');
+    expect(skill.name).toBe('fix-typescript-imports');
+    expect(skill.author).toBe('cognitive-core');
+    expect(skill.tags).toEqual(['typescript', 'esm']);
+    expect(skill.serving?.tokenEstimate).toBeGreaterThan(0);
+  });
+});