@interleavelove/keating 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +274 -0
  2. package/bin/keating.js +31 -0
  3. package/dist/src/cli/main.js +165 -0
  4. package/dist/src/core/animation.js +372 -0
  5. package/dist/src/core/benchmark.js +238 -0
  6. package/dist/src/core/config.js +81 -0
  7. package/dist/src/core/evolution.js +224 -0
  8. package/dist/src/core/learner-state.js +88 -0
  9. package/dist/src/core/lesson-plan.js +155 -0
  10. package/dist/src/core/map.js +89 -0
  11. package/dist/src/core/paths.js +69 -0
  12. package/dist/src/core/pi-agent.js +58 -0
  13. package/dist/src/core/policy.js +53 -0
  14. package/dist/src/core/project.js +189 -0
  15. package/dist/src/core/prompt-evolution.js +337 -0
  16. package/dist/src/core/random.js +19 -0
  17. package/dist/src/core/self-improve.js +419 -0
  18. package/dist/src/core/topics.js +620 -0
  19. package/dist/src/core/types.js +1 -0
  20. package/dist/src/core/util.js +28 -0
  21. package/dist/src/core/verification.js +162 -0
  22. package/dist/src/pi/hyperteacher-extension.js +180 -0
  23. package/dist/src/runtime/pi.js +118 -0
  24. package/dist/test/animation.test.js +43 -0
  25. package/dist/test/config.test.js +36 -0
  26. package/dist/test/evolution.test.js +39 -0
  27. package/dist/test/fuzz.test.js +37 -0
  28. package/dist/test/hyperteacher-extension.test.js +122 -0
  29. package/dist/test/lesson-plan.test.js +35 -0
  30. package/dist/test/pipeline.test.js +57 -0
  31. package/dist/test/prompt-evolution.test.js +89 -0
  32. package/package.json +58 -0
  33. package/pi/prompts/bridge.md +14 -0
  34. package/pi/prompts/diagnose.md +15 -0
  35. package/pi/prompts/improve.md +39 -0
  36. package/pi/prompts/learn.md +21 -0
  37. package/pi/prompts/quiz.md +14 -0
  38. package/pi/skills/adaptive-teaching/SKILL.md +33 -0
  39. package/scripts/install/install.sh +307 -0
@@ -0,0 +1,162 @@
1
+ import { createHash } from "node:crypto";
2
+ import { readFile, writeFile } from "node:fs/promises";
3
+ import * as dotenv from "dotenv";
4
+ import { piComplete, piCompleteJson } from "./pi-agent.js";
5
+ dotenv.config();
6
+ export function topicContentHash(topic) {
7
+ const payload = JSON.stringify({
8
+ slug: topic.slug,
9
+ summary: topic.summary,
10
+ formalCore: topic.formalCore,
11
+ misconceptions: topic.misconceptions,
12
+ examples: topic.examples
13
+ });
14
+ return createHash("sha256").update(payload).digest("hex").slice(0, 16);
15
+ }
16
+ export function extractClaims(topic) {
17
+ const claims = [];
18
+ claims.push(`Summary: ${topic.summary}`);
19
+ for (const statement of topic.formalCore) {
20
+ claims.push(`Formal claim: ${statement}`);
21
+ }
22
+ for (const misconception of topic.misconceptions) {
23
+ claims.push(`Misconception assertion: ${misconception}`);
24
+ }
25
+ for (const example of topic.examples) {
26
+ if (example.length > 20) {
27
+ claims.push(`Example claim: ${example}`);
28
+ }
29
+ }
30
+ return claims;
31
+ }
32
+ export function buildVerificationChecklist(topic, result) {
33
+ const claims = extractClaims(topic);
34
+ const lines = [
35
+ `# Verification Checklist: ${topic.title}`,
36
+ "",
37
+ `**Domain**: ${topic.domain}`,
38
+ `**Content hash**: ${topicContentHash(topic)}`,
39
+ `**Generated**: ${new Date().toISOString()}`,
40
+ `**Overall Confidence**: ${result ? (result.overallConfidence * 100).toFixed(1) : 0}%`,
41
+ "",
42
+ "## Instructions",
43
+ "",
44
+ "Before teaching this topic, verify each claim below. For each claim:",
45
+ "1. Search for an authoritative source (textbook, peer-reviewed paper, official documentation, legal text).",
46
+ "2. Mark the claim as CONFIRMED, UNCONFIRMED, or CORRECTED.",
47
+ "3. If corrected, write the accurate version.",
48
+ "4. Do NOT teach unverified claims as settled facts.",
49
+ "",
50
+ "## Claims to Verify",
51
+ ""
52
+ ];
53
+ for (let i = 0; i < claims.length; i++) {
54
+ const verified = result?.claims[i];
55
+ lines.push(`### Claim ${i + 1}`);
56
+ lines.push("");
57
+ lines.push(`> ${claims[i]}`);
58
+ lines.push("");
59
+ lines.push(`- [${verified?.status === "confirmed" ? "x" : " "}] **Status**: _${verified?.status?.toUpperCase() || "CONFIRMED / UNCONFIRMED / CORRECTED"}_`);
60
+ lines.push(`- **Source**: _${verified?.source || "[cite source here]"}_`);
61
+ lines.push(`- **Correction** (if any): _${verified?.correction || "[write corrected claim here]"}_`);
62
+ lines.push("");
63
+ }
64
+ lines.push("## Verification Summary");
65
+ lines.push("");
66
+ lines.push("- **Total claims**: " + claims.length);
67
+ lines.push(`- **Confirmed**: ${result?.claims.filter(c => c.status === "confirmed").length || 0}`);
68
+ lines.push(`- **Unconfirmed**: ${result?.claims.filter(c => c.status === "unconfirmed").length || 0}`);
69
+ lines.push(`- **Corrected**: ${result?.claims.filter(c => c.status === "corrected").length || 0}`);
70
+ lines.push(`- **Overall confidence**: ${result && result.overallConfidence > 0.8 ? "high" : result && result.overallConfidence > 0.5 ? "medium" : "low"}`);
71
+ lines.push("");
72
+ lines.push("---");
73
+ lines.push("*This checklist was auto-generated by Keating using Chain-of-Verification (CoVe) and cross-consistency checks. The teaching agent must complete it before presenting this material.*");
74
+ return lines.join("\n");
75
+ }
76
+ export async function loadVerificationCache(cachePath) {
77
+ try {
78
+ const raw = await readFile(cachePath, "utf8");
79
+ return JSON.parse(raw);
80
+ }
81
+ catch {
82
+ return {};
83
+ }
84
+ }
85
+ export async function saveVerificationCache(cachePath, cache) {
86
+ await writeFile(cachePath, JSON.stringify(cache, null, 2), "utf8");
87
+ }
88
+ export function verificationStatus(topic, cache) {
89
+ const cached = cache[topic.slug];
90
+ if (!cached)
91
+ return null;
92
+ if (cached.contentHash !== topicContentHash(topic))
93
+ return null;
94
+ return cached;
95
+ }
96
+ export function buildPendingVerificationResult(topic) {
97
+ const claims = extractClaims(topic);
98
+ return {
99
+ topic: topic.slug,
100
+ contentHash: topicContentHash(topic),
101
+ claims: claims.map((claim) => ({
102
+ claim,
103
+ status: "unconfirmed"
104
+ })),
105
+ overallConfidence: 0,
106
+ checkedAt: new Date().toISOString()
107
+ };
108
+ }
109
+ /**
110
+ * Automated Chain-of-Verification (CoVe) with Consistency Checking.
111
+ * Inspired by EdinburghNLP Hallucination Detection research.
112
+ * 1. Generates verification questions for each claim.
113
+ * 2. Answers those questions independently (the "Blackbox" cross-check).
114
+ * 3. Checks for consistency between independent answers and the claim.
115
+ */
116
+ export async function runCoveVerification(cwd, topic) {
117
+ const claims = extractClaims(topic);
118
+ const verifiedClaims = [];
119
+ let totalConfidence = 0;
120
+ for (const claim of claims) {
121
+ // Phase 1: Question Generation
122
+ const qGenPrompt = `Given this claim, generate 3 specific, granular questions that would verify its factual accuracy.
123
+ Claim: "${claim}"
124
+ Questions (one per line):`;
125
+ const questionsText = await piComplete(cwd, qGenPrompt, { thinking: "low" });
126
+ const questions = questionsText.trim().split("\n").filter(q => q.trim());
127
+ // Phase 2: Independent Answering (the "Blackbox" cross-check)
128
+ const answers = [];
129
+ for (const q of questions) {
130
+ const a = await piComplete(cwd, `Answer this factual question accurately and concisely: ${q}`, { thinking: "medium" });
131
+ answers.push(a.trim());
132
+ }
133
+ // Phase 3: Consistency Analysis (Hallucination Detection)
134
+ const finalPrompt = `Original Claim: "${claim}"
135
+ Supporting Research:
136
+ ${questions.map((q, i) => `Q: ${q}\nA: ${answers[i]}`).join("\n")}
137
+
138
+ Is the original claim CONFIRMED, UNCONFIRMED, or CORRECTED based ONLY on the Supporting Research?
139
+ Provide a JSON response:
140
+ {
141
+ "status": "confirmed" | "unconfirmed" | "corrected",
142
+ "reasoning": "string",
143
+ "correction": "string | null",
144
+ "confidence": number (0 to 1)
145
+ }`;
146
+ const data = await piCompleteJson(cwd, finalPrompt, { thinking: "high" });
147
+ verifiedClaims.push({
148
+ claim,
149
+ status: data.status,
150
+ source: data.reasoning,
151
+ correction: data.correction || undefined
152
+ });
153
+ totalConfidence += data.confidence;
154
+ }
155
+ return {
156
+ topic: topic.slug,
157
+ contentHash: topicContentHash(topic),
158
+ claims: verifiedClaims,
159
+ overallConfidence: totalConfidence / claims.length,
160
+ checkedAt: new Date().toISOString()
161
+ };
162
+ }
@@ -0,0 +1,180 @@
1
+ import { relative } from "node:path";
2
+ import { animateTopicArtifact, benchPolicyArtifact, currentPolicySummary, ensureProjectScaffold, evolvePolicyArtifact, evolvePromptArtifact, improveArtifact, improveHistory, listArtifacts, mapTopicArtifact, planTopicArtifact, verifyTopicArtifact } from "../core/project.js";
3
+ import { learnerStatePath } from "../core/paths.js";
4
+ import { loadLearnerState, recordFeedback, saveLearnerState } from "../core/learner-state.js";
5
+ function topicFromArgs(args) {
6
+ return (Array.isArray(args) ? args.join(" ") : String(args ?? "")).trim();
7
+ }
8
+ function info(ctx, message) {
9
+ ctx.ui.notify(message, "info");
10
+ }
11
+ export default function hyperteacher(pi) {
12
+ pi.registerCommand("plan", {
13
+ description: "Generate a deterministic lesson plan artifact for a topic.",
14
+ handler: async (args, ctx) => {
15
+ const topic = topicFromArgs(args);
16
+ if (!topic) {
17
+ info(ctx, "Usage: /plan <topic>");
18
+ return;
19
+ }
20
+ const artifact = await planTopicArtifact(ctx.cwd, topic);
21
+ ctx.ui.setEditorText(`read ${relative(ctx.cwd, artifact.planPath)}`);
22
+ info(ctx, `Wrote ${relative(ctx.cwd, artifact.planPath)}`);
23
+ }
24
+ });
25
+ pi.registerCommand("map", {
26
+ description: "Generate a Mermaid lesson map and render it with oxdraw when available.",
27
+ handler: async (args, ctx) => {
28
+ const topic = topicFromArgs(args);
29
+ if (!topic) {
30
+ info(ctx, "Usage: /map <topic>");
31
+ return;
32
+ }
33
+ const artifact = await mapTopicArtifact(ctx.cwd, topic);
34
+ const outputs = [relative(ctx.cwd, artifact.mmdPath)];
35
+ if (artifact.svgPath)
36
+ outputs.push(relative(ctx.cwd, artifact.svgPath));
37
+ ctx.ui.setEditorText(`read ${outputs[0]}`);
38
+ info(ctx, `Generated ${outputs.join(" and ")}`);
39
+ }
40
+ });
41
+ pi.registerCommand("animate", {
42
+ description: "Generate a manim-web animation bundle for a topic.",
43
+ handler: async (args, ctx) => {
44
+ const topic = topicFromArgs(args);
45
+ if (!topic) {
46
+ info(ctx, "Usage: /animate <topic>");
47
+ return;
48
+ }
49
+ const artifact = await animateTopicArtifact(ctx.cwd, topic);
50
+ ctx.ui.setEditorText(`read ${relative(ctx.cwd, artifact.storyboardPath)}`);
51
+ info(ctx, `Generated ${relative(ctx.cwd, artifact.playerPath)}, ${relative(ctx.cwd, artifact.scenePath)}, and ${relative(ctx.cwd, artifact.manifestPath)}`);
52
+ }
53
+ });
54
+ pi.registerCommand("bench", {
55
+ description: "Run the synthetic learner benchmark suite against the current teaching policy.",
56
+ handler: async (args, ctx) => {
57
+ const topic = topicFromArgs(args) || undefined;
58
+ const artifact = await benchPolicyArtifact(ctx.cwd, topic);
59
+ ctx.ui.setEditorText(`read ${relative(ctx.cwd, artifact.reportPath)}`);
60
+ info(ctx, `Benchmark score ${artifact.overallScore.toFixed(2)} saved to ${relative(ctx.cwd, artifact.reportPath)}${artifact.tracePath ? ` with trace ${relative(ctx.cwd, artifact.tracePath)}` : ""}`);
61
+ }
62
+ });
63
+ pi.registerCommand("evolve", {
64
+ description: "Mutate and benchmark teaching policies, then keep the strongest safe candidate.",
65
+ handler: async (args, ctx) => {
66
+ const topic = topicFromArgs(args) || undefined;
67
+ const artifact = await evolvePolicyArtifact(ctx.cwd, topic);
68
+ ctx.ui.setEditorText(`read ${relative(ctx.cwd, artifact.reportPath)}`);
69
+ info(ctx, `Policy evolved to ${artifact.bestScore.toFixed(2)} and saved to ${relative(ctx.cwd, artifact.policyPath)}${artifact.tracePath ? ` with trace ${relative(ctx.cwd, artifact.tracePath)}` : ""}`);
70
+ }
71
+ });
72
+ pi.registerCommand("prompt-evolve", {
73
+ description: "Evolve a prompt template using prompt-learning feedback and PROSPER-style selection.",
74
+ handler: async (args, ctx) => {
75
+ const promptName = topicFromArgs(args) || "learn";
76
+ const artifact = await evolvePromptArtifact(ctx.cwd, promptName);
77
+ ctx.ui.setEditorText(`read ${relative(ctx.cwd, artifact.reportPath)}`);
78
+ info(ctx, `Prompt ${promptName} evolved to ${artifact.bestScore.toFixed(2)} and saved to ${relative(ctx.cwd, artifact.evolvedPromptPath)}`);
79
+ }
80
+ });
81
+ pi.registerCommand("policy", {
82
+ description: "Show the active hyperteacher policy.",
83
+ handler: async (_args, ctx) => {
84
+ ctx.ui.setEditorText(await currentPolicySummary(ctx.cwd));
85
+ info(ctx, "Loaded current policy into the editor.");
86
+ }
87
+ });
88
+ pi.registerCommand("outputs", {
89
+ description: "Browse Keating plans, maps, benchmark reports, and evolution logs.",
90
+ handler: async (_args, ctx) => {
91
+ const artifacts = await listArtifacts(ctx.cwd);
92
+ if (artifacts.length === 0) {
93
+ info(ctx, "No artifacts yet. Use /plan, /map, /bench, or /evolve first.");
94
+ return;
95
+ }
96
+ const selected = await ctx.ui.select("Keating Outputs", artifacts.map((artifact) => artifact.label));
97
+ const artifact = artifacts.find((entry) => entry.label === selected);
98
+ if (artifact) {
99
+ ctx.ui.setEditorText(`read ${artifact.path}`);
100
+ }
101
+ }
102
+ });
103
+ pi.registerCommand("verify", {
104
+ description: "Generate a fact-checking checklist for a topic before teaching it.",
105
+ handler: async (args, ctx) => {
106
+ const topic = topicFromArgs(args);
107
+ if (!topic) {
108
+ info(ctx, "Usage: /verify <topic>");
109
+ return;
110
+ }
111
+ const result = await verifyTopicArtifact(ctx.cwd, topic);
112
+ if (result.alreadyVerified) {
113
+ info(ctx, `Already verified: ${relative(ctx.cwd, result.checklistPath)}`);
114
+ }
115
+ else {
116
+ ctx.ui.setEditorText(`read ${relative(ctx.cwd, result.checklistPath)}`);
117
+ info(ctx, `Verification checklist generated. Complete it before teaching this topic.`);
118
+ }
119
+ }
120
+ });
121
+ pi.registerCommand("feedback", {
122
+ description: "Record feedback on the current teaching session (up, down, confused).",
123
+ handler: async (args, ctx) => {
124
+ const raw = topicFromArgs(args).toLowerCase();
125
+ const parts = raw.split(/\s+/);
126
+ const signalMap = {
127
+ up: "thumbs-up",
128
+ down: "thumbs-down",
129
+ confused: "confused"
130
+ };
131
+ const signal = signalMap[parts[0]];
132
+ if (!signal) {
133
+ info(ctx, "Usage: /feedback <up|down|confused> [topic]");
134
+ return;
135
+ }
136
+ const topic = parts.slice(1).join(" ") || "general";
137
+ const statePath = learnerStatePath(ctx.cwd);
138
+ const state = await loadLearnerState(statePath);
139
+ recordFeedback(state, topic, signal);
140
+ await saveLearnerState(statePath, state);
141
+ info(ctx, `Recorded ${signal} feedback for "${topic}".`);
142
+ }
143
+ });
144
+ pi.registerCommand("improve", {
145
+ description: "Generate a self-improvement proposal by diagnosing benchmark weaknesses.",
146
+ handler: async (args, ctx) => {
147
+ const sub = topicFromArgs(args).toLowerCase();
148
+ if (sub === "history") {
149
+ const md = await improveHistory(ctx.cwd);
150
+ ctx.ui.setEditorText(md);
151
+ info(ctx, "Loaded improvement history into the editor.");
152
+ return;
153
+ }
154
+ info(ctx, "Running benchmark and diagnosing weaknesses...");
155
+ const artifact = await improveArtifact(ctx.cwd);
156
+ ctx.ui.setEditorText(`read ${relative(ctx.cwd, artifact.proposalPath)}`);
157
+ info(ctx, `Improvement proposal ${artifact.proposal.id} targets ${artifact.proposal.targets.map(t => t.file).join(", ")}`);
158
+ }
159
+ });
160
+ pi.registerCommand("trace", {
161
+ description: "Browse persisted benchmark and evolution traces.",
162
+ handler: async (args, ctx) => {
163
+ const query = topicFromArgs(args).toLowerCase();
164
+ const artifacts = (await listArtifacts(ctx.cwd)).filter((artifact) => !query ? true : artifact.path.toLowerCase().includes(query) || artifact.label.toLowerCase().includes(query));
165
+ if (artifacts.length === 0) {
166
+ info(ctx, "No matching trace artifacts. Use /bench or /evolve first.");
167
+ return;
168
+ }
169
+ const selected = await ctx.ui.select("Keating Traces", artifacts.map((artifact) => artifact.label));
170
+ const artifact = artifacts.find((entry) => entry.label === selected);
171
+ if (artifact) {
172
+ ctx.ui.setEditorText(`read ${artifact.path}`);
173
+ }
174
+ }
175
+ });
176
+ pi.on("session_start", async (_event, ctx) => {
177
+ await ensureProjectScaffold(ctx.cwd);
178
+ info(ctx, "Keating loaded: use /plan, /map, /animate, /verify, /bench, /evolve, /prompt-evolve, /improve, /trace, /feedback, or /policy.");
179
+ });
180
+ }
@@ -0,0 +1,118 @@
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import { readdir } from "node:fs/promises";
3
+ import { join, resolve } from "node:path";
4
+ import { spawn, spawnSync } from "node:child_process";
5
+ import { homedir } from "node:os";
6
+ import { loadKeatingConfig, mergePiDefaults } from "../core/config.js";
7
+ import { ensureProjectScaffold } from "../core/project.js";
8
+ import { sessionsDir } from "../core/paths.js";
9
+ function resolveStandalonePi() {
10
+ const result = spawnSync("which", ["pi"], { encoding: "utf8" });
11
+ if (result.status === 0 && result.stdout.trim()) {
12
+ return {
13
+ kind: "binary",
14
+ command: result.stdout.trim()
15
+ };
16
+ }
17
+ return null;
18
+ }
19
+ async function resolveEmbeddedPi() {
20
+ const base = join(homedir(), ".local", "share", "feynman");
21
+ const entries = await readdir(base, { withFileTypes: true }).catch(() => []);
22
+ const dirs = entries
23
+ .filter((entry) => entry.isDirectory())
24
+ .map((entry) => entry.name)
25
+ .sort()
26
+ .reverse();
27
+ for (const name of dirs) {
28
+ const appRoot = join(base, name, "app");
29
+ const cliPath = join(appRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "cli.js");
30
+ if (existsSync(cliPath)) {
31
+ return {
32
+ kind: "embedded-feynman",
33
+ command: process.execPath,
34
+ cliPath
35
+ };
36
+ }
37
+ }
38
+ return null;
39
+ }
40
+ export async function detectPiRuntime(cwd) {
41
+ const config = await loadKeatingConfig(cwd);
42
+ const standalone = resolveStandalonePi();
43
+ const embedded = await resolveEmbeddedPi();
44
+ let selected = null;
45
+ switch (config.pi.runtimePreference) {
46
+ case "embedded-only":
47
+ selected = embedded;
48
+ break;
49
+ case "prefer-standalone":
50
+ selected = standalone ?? embedded;
51
+ break;
52
+ case "standalone-only":
53
+ default:
54
+ selected = standalone;
55
+ break;
56
+ }
57
+ return {
58
+ selected,
59
+ standalone,
60
+ embedded,
61
+ preference: config.pi.runtimePreference
62
+ };
63
+ }
64
+ export async function launchPi(cwd, args) {
65
+ await ensureProjectScaffold(cwd);
66
+ const config = await loadKeatingConfig(cwd);
67
+ const report = await detectPiRuntime(cwd);
68
+ const runtime = report.selected;
69
+ if (!runtime) {
70
+ if (report.preference === "standalone-only") {
71
+ throw new Error("Keating is configured for a fresh standalone Pi install, but no `pi` binary was found on PATH. Install `@mariozechner/pi-coding-agent` or change `pi.runtimePreference` in keating.config.json.");
72
+ }
73
+ throw new Error("Could not find a Pi runtime matching the current Keating runtime preference.");
74
+ }
75
+ const extensionPath = resolve(cwd, "dist", "src", "pi", "hyperteacher-extension.js");
76
+ if (!existsSync(extensionPath)) {
77
+ throw new Error(`Missing built extension: ${extensionPath}. Run npm run build first.`);
78
+ }
79
+ const promptDir = resolve(cwd, "pi", "prompts");
80
+ const skillsDir = resolve(cwd, "pi", "skills");
81
+ const systemPrompt = readFileSync(resolve(cwd, "SYSTEM.md"), "utf8");
82
+ const sharedArgs = mergePiDefaults(config, [
83
+ "--session-dir",
84
+ sessionsDir(cwd),
85
+ "--extension",
86
+ extensionPath,
87
+ "--prompt-template",
88
+ promptDir,
89
+ "--skill",
90
+ skillsDir,
91
+ "--append-system-prompt",
92
+ systemPrompt,
93
+ "--tools",
94
+ "read,bash,edit,write,grep,find,ls",
95
+ ...args
96
+ ]);
97
+ const child = runtime.kind === "binary"
98
+ ? spawn(runtime.command, sharedArgs, {
99
+ cwd,
100
+ stdio: "inherit",
101
+ env: {
102
+ ...process.env,
103
+ PI_SKIP_VERSION_CHECK: process.env.PI_SKIP_VERSION_CHECK ?? "1"
104
+ }
105
+ })
106
+ : spawn(runtime.command, [runtime.cliPath, ...sharedArgs], {
107
+ cwd,
108
+ stdio: "inherit",
109
+ env: {
110
+ ...process.env,
111
+ PI_SKIP_VERSION_CHECK: process.env.PI_SKIP_VERSION_CHECK ?? "1"
112
+ }
113
+ });
114
+ return await new Promise((resolvePromise, reject) => {
115
+ child.on("error", reject);
116
+ child.on("exit", (code) => resolvePromise(code ?? 0));
117
+ });
118
+ }
@@ -0,0 +1,43 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { animationSceneSource, buildAnimationManifest } from "../src/core/animation.js";
4
+ import { lessonPlanToMermaid } from "../src/core/map.js";
5
+ import { DEFAULT_POLICY, clampPolicy } from "../src/core/policy.js";
6
+ import { Prng } from "../src/core/random.js";
7
+ test("visual generators preserve meaning-map and animation invariants across randomized topics", () => {
8
+ const prng = new Prng(4242);
9
+ for (let index = 0; index < 80; index += 1) {
10
+ const policy = clampPolicy({
11
+ ...DEFAULT_POLICY,
12
+ name: `visual-${index}`,
13
+ analogyDensity: prng.next(),
14
+ socraticRatio: prng.next(),
15
+ formalism: prng.next(),
16
+ retrievalPractice: prng.next(),
17
+ exerciseCount: prng.int(1, 5),
18
+ diagramBias: prng.next(),
19
+ reflectionBias: prng.next(),
20
+ interdisciplinaryBias: prng.next(),
21
+ challengeRate: prng.next()
22
+ });
23
+ const topic = index % 4 === 0 ? "derivative" : `concept ${index} ${Math.floor(prng.next() * 1000)}`;
24
+ const mermaid = lessonPlanToMermaid(topic, policy);
25
+ const manifest = buildAnimationManifest(topic, policy);
26
+ const scene = animationSceneSource(topic, policy, "./vendor/manim-web.js");
27
+ assert.ok(mermaid.startsWith("graph TD"));
28
+ assert.ok(mermaid.includes('subgraph pedagogy["Teaching Loop"]'));
29
+ assert.ok(mermaid.includes('subgraph meaning["Meaning Map"]'));
30
+ assert.ok(mermaid.includes('subgraph friction["Misconceptions And Practice"]'));
31
+ assert.ok(mermaid.includes('subgraph transfer["Transfer Hooks"]'));
32
+ assert.ok(scene.includes('from "./vendor/manim-web.js"'));
33
+ assert.ok(scene.includes("export async function construct(scene)"));
34
+ assert.ok(manifest.rationale.length >= 4);
35
+ assert.equal(manifest.focusMoments.length, 4);
36
+ }
37
+ });
38
+ test("canonical topics select distinct animation grammars", () => {
39
+ assert.equal(buildAnimationManifest("derivative", DEFAULT_POLICY).sceneKind, "function-graph");
40
+ assert.equal(buildAnimationManifest("entropy", DEFAULT_POLICY).sceneKind, "distribution-bars");
41
+ assert.equal(buildAnimationManifest("bayes", DEFAULT_POLICY).sceneKind, "belief-update");
42
+ assert.equal(buildAnimationManifest("stoicism", DEFAULT_POLICY).sceneKind, "concept-card");
43
+ });
@@ -0,0 +1,36 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { mkdtemp, readFile, writeFile } from "node:fs/promises";
4
+ import { join } from "node:path";
5
+ import { tmpdir } from "node:os";
6
+ import { DEFAULT_KEATING_CONFIG, configPath, ensureConfig, loadKeatingConfig, mergePiDefaults } from "../src/core/config.js";
7
+ test("ensureConfig creates the default config and loadKeatingConfig reads it", async () => {
8
+ const workdir = await mkdtemp(join(tmpdir(), "keating-config-"));
9
+ await ensureConfig(workdir);
10
+ const config = await loadKeatingConfig(workdir);
11
+ const saved = JSON.parse(await readFile(configPath(workdir), "utf8"));
12
+ assert.deepEqual(config, DEFAULT_KEATING_CONFIG);
13
+ assert.deepEqual(saved, DEFAULT_KEATING_CONFIG);
14
+ });
15
+ test("mergePiDefaults injects model defaults but preserves explicit user overrides", async () => {
16
+ const workdir = await mkdtemp(join(tmpdir(), "keating-config-override-"));
17
+ await writeFile(configPath(workdir), JSON.stringify({
18
+ pi: {
19
+ runtimePreference: "prefer-standalone",
20
+ defaultProvider: "anthropic",
21
+ defaultModel: "anthropic/claude-sonnet-4-5",
22
+ defaultThinking: "high"
23
+ }
24
+ }, null, 2), "utf8");
25
+ const config = await loadKeatingConfig(workdir);
26
+ assert.deepEqual(mergePiDefaults(config, ["hello"]), [
27
+ "--thinking",
28
+ "high",
29
+ "--model",
30
+ "anthropic/claude-sonnet-4-5",
31
+ "--provider",
32
+ "anthropic",
33
+ "hello"
34
+ ]);
35
+ assert.deepEqual(mergePiDefaults(config, ["--model", "openai/gpt-5", "--provider", "openai", "--thinking", "low", "hello"]), ["--model", "openai/gpt-5", "--provider", "openai", "--thinking", "low", "hello"]);
36
+ });
@@ -0,0 +1,39 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { mkdtemp, readFile } from "node:fs/promises";
4
+ import { join } from "node:path";
5
+ import { tmpdir } from "node:os";
6
+ import { runBenchmarkSuite } from "../src/core/benchmark.js";
7
+ import { evolvePolicy, noveltyScore } from "../src/core/evolution.js";
8
+ import { DEFAULT_POLICY } from "../src/core/policy.js";
9
+ test("accepted evolution candidates never underperform the current best by construction", async () => {
10
+ const workdir = await mkdtemp(join(tmpdir(), "keating-evolution-"));
11
+ const archivePath = join(workdir, "archive.json");
12
+ const run = await evolvePolicy(archivePath, DEFAULT_POLICY, undefined, 20, 1234);
13
+ assert.ok(run.best.overallScore >= run.baseline.overallScore);
14
+ for (const candidate of run.acceptedCandidates) {
15
+ assert.ok(candidate.novelty >= 0.05);
16
+ assert.ok(candidate.benchmark.overallScore >= run.baseline.overallScore);
17
+ }
18
+ const saved = JSON.parse(await readFile(archivePath, "utf8"));
19
+ assert.equal(saved.currentPolicy.name, run.best.policy.name);
20
+ });
21
+ test("noveltyScore uses Euclidean distance in parameter space", () => {
22
+ const base = { ...DEFAULT_POLICY };
23
+ const identical = { ...DEFAULT_POLICY };
24
+ const different = { ...DEFAULT_POLICY, analogyDensity: 0.1, formalism: 0.1 };
25
+ // Identical policy should have zero distance
26
+ const identicalNovelty = noveltyScore([base], identical);
27
+ assert.equal(identicalNovelty, 0);
28
+ // Different policy should have positive distance
29
+ const differentNovelty = noveltyScore([base], different);
30
+ assert.ok(differentNovelty > 0, `expected positive novelty, got ${differentNovelty}`);
31
+ // Empty archive should return 1
32
+ const emptyNovelty = noveltyScore([], different);
33
+ assert.equal(emptyNovelty, 1);
34
+ });
35
+ test("benchmark suite remains deterministic for fixed policy and seed", () => {
36
+ const left = runBenchmarkSuite(DEFAULT_POLICY, "derivative", 99);
37
+ const right = runBenchmarkSuite(DEFAULT_POLICY, "derivative", 99);
38
+ assert.deepEqual(left, right);
39
+ });
@@ -0,0 +1,37 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { runBenchmarkSuite } from "../src/core/benchmark.js";
4
+ import { buildLessonPlan } from "../src/core/lesson-plan.js";
5
+ import { lessonPlanToMermaid } from "../src/core/map.js";
6
+ import { DEFAULT_POLICY, clampPolicy } from "../src/core/policy.js";
7
+ import { Prng } from "../src/core/random.js";
8
+ test("fuzzed topics and policies stay bounded and renderable", () => {
9
+ const prng = new Prng(2026);
10
+ for (let index = 0; index < 200; index += 1) {
11
+ const policy = clampPolicy({
12
+ ...DEFAULT_POLICY,
13
+ name: `fuzz-${index}`,
14
+ analogyDensity: prng.next(),
15
+ socraticRatio: prng.next(),
16
+ formalism: prng.next(),
17
+ retrievalPractice: prng.next(),
18
+ exerciseCount: prng.int(1, 5),
19
+ diagramBias: prng.next(),
20
+ reflectionBias: prng.next(),
21
+ interdisciplinaryBias: prng.next(),
22
+ challengeRate: prng.next()
23
+ });
24
+ const topic = `topic ${index} ${Math.floor(prng.next() * 1000)}`;
25
+ const plan = buildLessonPlan(topic, policy);
26
+ const mermaid = lessonPlanToMermaid(topic, policy);
27
+ const benchmark = runBenchmarkSuite(policy, topic, index + 1);
28
+ assert.ok(plan.phases.length >= 6);
29
+ assert.ok(mermaid.startsWith("graph TD"));
30
+ assert.ok(benchmark.overallScore >= 0);
31
+ assert.ok(benchmark.overallScore <= 100);
32
+ for (const entry of benchmark.topicBenchmarks) {
33
+ assert.ok(entry.meanConfusion >= 0);
34
+ assert.ok(entry.meanConfusion <= 1);
35
+ }
36
+ }
37
+ });