@interleavelove/keating 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +274 -0
  2. package/bin/keating.js +31 -0
  3. package/dist/src/cli/main.js +165 -0
  4. package/dist/src/core/animation.js +372 -0
  5. package/dist/src/core/benchmark.js +238 -0
  6. package/dist/src/core/config.js +81 -0
  7. package/dist/src/core/evolution.js +224 -0
  8. package/dist/src/core/learner-state.js +88 -0
  9. package/dist/src/core/lesson-plan.js +155 -0
  10. package/dist/src/core/map.js +89 -0
  11. package/dist/src/core/paths.js +69 -0
  12. package/dist/src/core/pi-agent.js +58 -0
  13. package/dist/src/core/policy.js +53 -0
  14. package/dist/src/core/project.js +189 -0
  15. package/dist/src/core/prompt-evolution.js +337 -0
  16. package/dist/src/core/random.js +19 -0
  17. package/dist/src/core/self-improve.js +419 -0
  18. package/dist/src/core/topics.js +620 -0
  19. package/dist/src/core/types.js +1 -0
  20. package/dist/src/core/util.js +28 -0
  21. package/dist/src/core/verification.js +162 -0
  22. package/dist/src/pi/hyperteacher-extension.js +180 -0
  23. package/dist/src/runtime/pi.js +118 -0
  24. package/dist/test/animation.test.js +43 -0
  25. package/dist/test/config.test.js +36 -0
  26. package/dist/test/evolution.test.js +39 -0
  27. package/dist/test/fuzz.test.js +37 -0
  28. package/dist/test/hyperteacher-extension.test.js +122 -0
  29. package/dist/test/lesson-plan.test.js +35 -0
  30. package/dist/test/pipeline.test.js +57 -0
  31. package/dist/test/prompt-evolution.test.js +89 -0
  32. package/package.json +58 -0
  33. package/pi/prompts/bridge.md +14 -0
  34. package/pi/prompts/diagnose.md +15 -0
  35. package/pi/prompts/improve.md +39 -0
  36. package/pi/prompts/learn.md +21 -0
  37. package/pi/prompts/quiz.md +14 -0
  38. package/pi/skills/adaptive-teaching/SKILL.md +33 -0
  39. package/scripts/install/install.sh +307 -0
@@ -0,0 +1,53 @@
1
+ import { readFile, writeFile } from "node:fs/promises";
2
+ import { clamp } from "./util.js";
3
+ export const DEFAULT_POLICY = {
4
+ name: "keating-default",
5
+ analogyDensity: 0.72,
6
+ socraticRatio: 0.66,
7
+ formalism: 0.64,
8
+ retrievalPractice: 0.74,
9
+ exerciseCount: 3,
10
+ diagramBias: 0.7,
11
+ reflectionBias: 0.68,
12
+ interdisciplinaryBias: 0.62,
13
+ challengeRate: 0.58
14
+ };
15
+ export function clampPolicy(policy) {
16
+ return {
17
+ ...policy,
18
+ analogyDensity: clamp(policy.analogyDensity),
19
+ socraticRatio: clamp(policy.socraticRatio),
20
+ formalism: clamp(policy.formalism),
21
+ retrievalPractice: clamp(policy.retrievalPractice),
22
+ exerciseCount: Math.min(5, Math.max(1, Math.round(policy.exerciseCount))),
23
+ diagramBias: clamp(policy.diagramBias),
24
+ reflectionBias: clamp(policy.reflectionBias),
25
+ interdisciplinaryBias: clamp(policy.interdisciplinaryBias),
26
+ challengeRate: clamp(policy.challengeRate)
27
+ };
28
+ }
29
+ export function policySignature(policy) {
30
+ return [
31
+ policy.analogyDensity.toFixed(2),
32
+ policy.socraticRatio.toFixed(2),
33
+ policy.formalism.toFixed(2),
34
+ policy.retrievalPractice.toFixed(2),
35
+ String(policy.exerciseCount),
36
+ policy.diagramBias.toFixed(2),
37
+ policy.reflectionBias.toFixed(2),
38
+ policy.interdisciplinaryBias.toFixed(2),
39
+ policy.challengeRate.toFixed(2)
40
+ ].join("|");
41
+ }
42
+ export async function loadPolicy(filePath) {
43
+ try {
44
+ const content = await readFile(filePath, "utf8");
45
+ return clampPolicy(JSON.parse(content));
46
+ }
47
+ catch {
48
+ return DEFAULT_POLICY;
49
+ }
50
+ }
51
+ export async function savePolicy(filePath, policy) {
52
+ await writeFile(filePath, `${JSON.stringify(clampPolicy(policy), null, 2)}\n`, "utf8");
53
+ }
@@ -0,0 +1,189 @@
1
+ import { readdir, stat, writeFile } from "node:fs/promises";
2
+ import { join, relative } from "node:path";
3
+ import { loadKeatingConfig } from "./config.js";
4
+ import { writeLessonAnimation } from "./animation.js";
5
+ import { benchmarkToMarkdown, runBenchmarkSuite } from "./benchmark.js";
6
+ import { evolutionToMarkdown, evolvePolicy } from "./evolution.js";
7
+ import { buildLessonPlan, lessonPlanToMarkdown } from "./lesson-plan.js";
8
+ import { writeLessonMap } from "./map.js";
9
+ import { writePromptEvolutionArtifacts } from "./prompt-evolution.js";
10
+ import { animationsDir, benchmarksDir, currentPolicyPath, ensureKeatingDirs, evolutionDir, mapsDir, plansDir, policyArchivePath, promptEvolutionDir, tracesDir, verificationsDir, verificationCachePath } from "./paths.js";
11
+ import { ensureConfig } from "./config.js";
12
+ import { DEFAULT_POLICY, loadPolicy, savePolicy } from "./policy.js";
13
+ import { resolveTopic } from "./topics.js";
14
+ import { slugify } from "./util.js";
15
+ import { buildPendingVerificationResult, buildVerificationChecklist, loadVerificationCache, runCoveVerification, saveVerificationCache, verificationStatus } from "./verification.js";
16
+ import { generateImprovementArtifact, loadImprovementArchive, improvementHistoryToMarkdown, evaluateImprovement, acceptImprovement, rejectImprovement } from "./self-improve.js";
17
+ export async function ensureProjectScaffold(cwd) {
18
+ await ensureKeatingDirs(cwd);
19
+ await ensureConfig(cwd);
20
+ const policy = await loadPolicy(currentPolicyPath(cwd));
21
+ await savePolicy(currentPolicyPath(cwd), policy ?? DEFAULT_POLICY);
22
+ }
23
+ export async function planTopicArtifact(cwd, topicName) {
24
+ await ensureProjectScaffold(cwd);
25
+ const policy = await loadPolicy(currentPolicyPath(cwd));
26
+ const plan = buildLessonPlan(topicName, policy);
27
+ const planPath = join(plansDir(cwd), `${slugify(topicName)}.md`);
28
+ await writeFile(planPath, lessonPlanToMarkdown(plan), "utf8");
29
+ return { planPath };
30
+ }
31
+ export async function mapTopicArtifact(cwd, topicName) {
32
+ await ensureProjectScaffold(cwd);
33
+ const policy = await loadPolicy(currentPolicyPath(cwd));
34
+ return writeLessonMap(cwd, topicName, policy);
35
+ }
36
+ export async function animateTopicArtifact(cwd, topicName) {
37
+ await ensureProjectScaffold(cwd);
38
+ const policy = await loadPolicy(currentPolicyPath(cwd));
39
+ return writeLessonAnimation(cwd, topicName, policy);
40
+ }
41
+ export async function benchPolicyArtifact(cwd, focusTopic) {
42
+ await ensureProjectScaffold(cwd);
43
+ const config = await loadKeatingConfig(cwd);
44
+ const policy = await loadPolicy(currentPolicyPath(cwd));
45
+ const result = runBenchmarkSuite(policy, focusTopic, 20260401, config.debug.traceTopLearners);
46
+ const fileName = focusTopic ? `${slugify(focusTopic)}.md` : "core-suite.md";
47
+ const reportPath = join(benchmarksDir(cwd), fileName);
48
+ await writeFile(reportPath, benchmarkToMarkdown(result), "utf8");
49
+ const tracePath = config.debug.persistTraces
50
+ ? join(tracesDir(cwd), `${focusTopic ? slugify(focusTopic) : "core-suite"}-benchmark.json`)
51
+ : null;
52
+ if (tracePath) {
53
+ await writeFile(tracePath, `${JSON.stringify(result, null, 2)}\n`, "utf8");
54
+ }
55
+ return { reportPath, tracePath, overallScore: result.overallScore };
56
+ }
57
+ export async function evolvePolicyArtifact(cwd, focusTopic) {
58
+ await ensureProjectScaffold(cwd);
59
+ const config = await loadKeatingConfig(cwd);
60
+ const policyPath = currentPolicyPath(cwd);
61
+ const basePolicy = await loadPolicy(policyPath);
62
+ const run = await evolvePolicy(policyArchivePath(cwd), basePolicy, focusTopic);
63
+ await savePolicy(policyPath, run.best.policy);
64
+ const fileName = focusTopic ? `${slugify(focusTopic)}.md` : "latest.md";
65
+ const reportPath = join(evolutionDir(cwd), fileName);
66
+ await writeFile(reportPath, evolutionToMarkdown(run), "utf8");
67
+ const tracePath = config.debug.persistTraces
68
+ ? join(tracesDir(cwd), `${focusTopic ? slugify(focusTopic) : "latest"}-evolution.json`)
69
+ : null;
70
+ if (tracePath) {
71
+ await writeFile(tracePath, `${JSON.stringify(run, null, 2)}\n`, "utf8");
72
+ }
73
+ return { reportPath, tracePath, bestScore: run.best.overallScore, policyPath };
74
+ }
75
+ export async function evolvePromptArtifact(cwd, promptName = "learn") {
76
+ await ensureProjectScaffold(cwd);
77
+ return writePromptEvolutionArtifacts(cwd, promptName);
78
+ }
79
+ export async function verifyTopicArtifact(cwd, topicName, useLLM = true) {
80
+ await ensureProjectScaffold(cwd);
81
+ const topic = resolveTopic(topicName);
82
+ const cachePath = verificationCachePath(cwd);
83
+ const cache = await loadVerificationCache(cachePath);
84
+ const existing = verificationStatus(topic, cache);
85
+ if (existing && existing.claims.every((c) => c.status !== "unconfirmed")) {
86
+ return {
87
+ checklistPath: join(verificationsDir(cwd), `${topic.slug}.md`),
88
+ alreadyVerified: true,
89
+ result: existing
90
+ };
91
+ }
92
+ let result;
93
+ if (useLLM) {
94
+ try {
95
+ result = await runCoveVerification(cwd, topic);
96
+ cache[topic.slug] = result;
97
+ await saveVerificationCache(cachePath, cache);
98
+ }
99
+ catch (error) {
100
+ console.warn(`CoVe verification failed for ${topic.slug}, falling back to manual checklist:`, error);
101
+ result = buildPendingVerificationResult(topic);
102
+ }
103
+ }
104
+ else {
105
+ result = buildPendingVerificationResult(topic);
106
+ }
107
+ const checklist = buildVerificationChecklist(topic, result);
108
+ const checklistPath = join(verificationsDir(cwd), `${topic.slug}.md`);
109
+ await writeFile(checklistPath, checklist, "utf8");
110
+ return { checklistPath, alreadyVerified: false, result };
111
+ }
112
+ export async function improveArtifact(cwd) {
113
+ await ensureProjectScaffold(cwd);
114
+ return generateImprovementArtifact(cwd);
115
+ }
116
+ export async function improveAccept(cwd, proposalId) {
117
+ await ensureProjectScaffold(cwd);
118
+ const evaluation = await evaluateImprovement(cwd, 0);
119
+ await acceptImprovement(cwd, proposalId, evaluation.afterScore);
120
+ return { afterScore: evaluation.afterScore, delta: evaluation.delta };
121
+ }
122
+ export async function improveReject(cwd, proposalId, snapshots) {
123
+ await ensureProjectScaffold(cwd);
124
+ await rejectImprovement(cwd, proposalId, snapshots);
125
+ }
126
+ export async function improveHistory(cwd) {
127
+ await ensureProjectScaffold(cwd);
128
+ const archive = await loadImprovementArchive(cwd);
129
+ return improvementHistoryToMarkdown(archive);
130
+ }
131
+ export async function currentPolicySummary(cwd) {
132
+ await ensureProjectScaffold(cwd);
133
+ const policy = await loadPolicy(currentPolicyPath(cwd));
134
+ return [
135
+ `Policy: ${policy.name}`,
136
+ `analogyDensity=${policy.analogyDensity.toFixed(2)}`,
137
+ `socraticRatio=${policy.socraticRatio.toFixed(2)}`,
138
+ `formalism=${policy.formalism.toFixed(2)}`,
139
+ `retrievalPractice=${policy.retrievalPractice.toFixed(2)}`,
140
+ `exerciseCount=${policy.exerciseCount}`,
141
+ `diagramBias=${policy.diagramBias.toFixed(2)}`,
142
+ `reflectionBias=${policy.reflectionBias.toFixed(2)}`,
143
+ `interdisciplinaryBias=${policy.interdisciplinaryBias.toFixed(2)}`,
144
+ `challengeRate=${policy.challengeRate.toFixed(2)}`
145
+ ].join("\n");
146
+ }
147
+ export async function listArtifacts(cwd) {
148
+ await ensureProjectScaffold(cwd);
149
+ const roots = [
150
+ plansDir(cwd),
151
+ mapsDir(cwd),
152
+ animationsDir(cwd),
153
+ benchmarksDir(cwd),
154
+ evolutionDir(cwd),
155
+ promptEvolutionDir(cwd),
156
+ tracesDir(cwd),
157
+ verificationsDir(cwd)
158
+ ];
159
+ const artifacts = [];
160
+ async function collectFiles(root) {
161
+ const entries = await readdir(root, { withFileTypes: true }).catch(() => []);
162
+ const files = [];
163
+ for (const entry of entries) {
164
+ if (entry.name === "_vendor")
165
+ continue;
166
+ const fullPath = join(root, entry.name);
167
+ if (entry.isDirectory()) {
168
+ files.push(...(await collectFiles(fullPath)));
169
+ }
170
+ else if (entry.isFile()) {
171
+ files.push(fullPath);
172
+ }
173
+ }
174
+ return files;
175
+ }
176
+ for (const root of roots) {
177
+ for (const fullPath of await collectFiles(root)) {
178
+ const info = await stat(fullPath);
179
+ artifacts.push({
180
+ label: `${relative(cwd, fullPath)} (${Math.round(info.size / 1024) || 1}KB)`,
181
+ path: relative(cwd, fullPath),
182
+ mtime: info.mtimeMs
183
+ });
184
+ }
185
+ }
186
+ return artifacts
187
+ .sort((left, right) => right.mtime - left.mtime)
188
+ .map((artifact) => ({ label: artifact.label, path: artifact.path }));
189
+ }
@@ -0,0 +1,337 @@
1
+ import { readFile, writeFile } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import * as dotenv from "dotenv";
4
+ import { promptEvolutionArchivePath, promptEvolutionDir } from "./paths.js";
5
+ import { piComplete, piCompleteJson } from "./pi-agent.js";
6
+ dotenv.config();
7
+ const KEATING_QUOTE = "Boys, you must strive to find your own voice. Because the longer you wait to begin, the less likely you are to find it at all.";
8
+ let piAvailabilityCache = null;
9
+ function promptNameFromPath(promptPath) {
10
+ const fileName = promptPath.split(/[\\/]/).pop() ?? promptPath;
11
+ return fileName.replace(/\.md$/, "");
12
+ }
13
+ function parsePromptBody(raw) {
14
+ if (!raw.startsWith("---"))
15
+ return raw.trim();
16
+ const parts = raw.split("\n---\n");
17
+ if (parts.length < 2)
18
+ return raw.trim();
19
+ return parts.slice(1).join("\n---\n").trim();
20
+ }
21
+ function parseFrontMatter(raw) {
22
+ if (!raw.startsWith("---"))
23
+ return "";
24
+ const parts = raw.split("\n---\n");
25
+ if (parts.length < 2)
26
+ return "";
27
+ return `${parts[0]}\n---`.trim();
28
+ }
29
+ function clamp01(value) {
30
+ return Math.max(0, Math.min(1, value));
31
+ }
32
+ function keywordScore(body, keywords, base, bonus) {
33
+ const hits = keywords.filter((keyword) => body.includes(keyword)).length;
34
+ return clamp01(base + hits * bonus);
35
+ }
36
+ function heuristicPromptEvaluation(promptPath, prompt) {
37
+ const body = parsePromptBody(prompt).toLowerCase();
38
+ const objectives = {
39
+ voice_divergence: keywordScore(body, ["own words", "own language", "personal context", "say it again"], 0.35, 0.18),
40
+ diagnosis: keywordScore(body, ["diagnostic", "prerequisite", "misconception", "assumption check"], 0.4, 0.16),
41
+ verification: keywordScore(body, ["verify", "verification", "source", "unverified", "check claim"], 0.2, 0.18),
42
+ retrieval: keywordScore(body, ["retrieval", "reconstruct", "without looking", "recall", "practice"], 0.35, 0.18),
43
+ transfer: keywordScore(body, ["transfer", "bridge", "other domain", "practical consequence", "new setting"], 0.3, 0.18),
44
+ structure: keywordScore(body, ["diagnose", "intuition", "formal", "misconception", "example", "retrieval", "reflection"], 0.45, 0.09)
45
+ };
46
+ const feedback = [];
47
+ if (objectives.voice_divergence < 0.7)
48
+ feedback.push("Add an explicit requirement that the learner restate the idea in their own words.");
49
+ if (objectives.diagnosis < 0.7)
50
+ feedback.push("Strengthen diagnosis of prerequisite gaps and misconceptions before teaching.");
51
+ if (objectives.verification < 0.7)
52
+ feedback.push("Include a step that distinguishes verified claims from claims that still need checking.");
53
+ if (objectives.retrieval < 0.7)
54
+ feedback.push("Add a retrieval checkpoint that requires reconstruction rather than agreement.");
55
+ if (objectives.transfer < 0.7)
56
+ feedback.push("Bridge the concept into a different domain or practical context before ending.");
57
+ if (objectives.structure < 0.7)
58
+ feedback.push("Make the lesson loop explicit so the workflow is easy to follow and evaluate.");
59
+ const score = objectives.voice_divergence * 14 +
60
+ objectives.diagnosis * 20 +
61
+ objectives.verification * 18 +
62
+ objectives.retrieval * 18 +
63
+ objectives.transfer * 16 +
64
+ objectives.structure * 14;
65
+ return {
66
+ promptPath,
67
+ promptName: promptNameFromPath(promptPath),
68
+ score,
69
+ objectives,
70
+ feedback
71
+ };
72
+ }
73
+ function heuristicCandidatePrompt(basePrompt) {
74
+ const frontMatter = parseFrontMatter(basePrompt);
75
+ const body = parsePromptBody(basePrompt).trimEnd();
76
+ const additions = [
77
+ '4a. If the learner echoes your phrasing, stop and ask them to explain the idea again in their own words.',
78
+ '4b. Separate missing prerequisite, misconception, and partial intuition before choosing the next teaching move.',
79
+ '5a. Add one short retrieval checkpoint that the learner must answer without relying on your wording.',
80
+ '6a. Bridge the idea into a new domain, personal example, or practical consequence before ending.',
81
+ '6b. Mark any factual claim that still needs verification instead of presenting it as settled.'
82
+ ].filter((line) => !body.includes(line));
83
+ const evolvedBody = additions.length === 0 ? `${body}\n7a. Keep the learner cognitively active at every step.` : `${body}\n${additions.join("\n")}`;
84
+ return frontMatter ? `${frontMatter}\n${evolvedBody}\n` : `${evolvedBody}\n`;
85
+ }
86
+ export async function evaluatePromptContent(cwd, promptPath, prompt) {
87
+ const body = parsePromptBody(prompt);
88
+ const evalPrompt = `Evaluate the following teaching prompt template based on Keating's hyperteacher philosophy.
89
+ Keating philosophy:
90
+ - AI must not be a surrogate for thought.
91
+ - Measuring success by the learner's independent articulation (finding their "voice").
92
+ - Rejecting "surface agreement" (rote echoes or simple "yes/no" answers).
93
+ - The "diagnose -> intuition -> formal core -> misconception repair -> example -> retrieval -> reflection" loop.
94
+
95
+ Evaluate these objectives on a scale of 0 to 1 (floating point):
96
+ 1. voice_divergence: How well does the prompt force the learner to use their own language? Penalize prompts that allow the learner to echo the AI's explanation.
97
+ 2. diagnosis: Checking prerequisites and misconceptions before teaching.
98
+ 3. verification: Refusing to teach unverified claims as settled truth.
99
+ 4. retrieval: Using reconstruction and practice instead of passive agreement.
100
+ 5. transfer: Bridging the concept to other domains or practical consequences.
101
+ 6. structure: Having an explicit, logical teaching workflow.
102
+
103
+ Provide the evaluation in JSON format:
104
+ {
105
+ "objectives": {
106
+ "voice_divergence": number,
107
+ "diagnosis": number,
108
+ "verification": number,
109
+ "retrieval": number,
110
+ "transfer": number,
111
+ "structure": number
112
+ },
113
+ "feedback": string[]
114
+ }
115
+
116
+ Prompt Content:
117
+ """
118
+ ${body}
119
+ """
120
+ `;
121
+ if (piAvailabilityCache === false) {
122
+ return heuristicPromptEvaluation(promptPath, prompt);
123
+ }
124
+ let data;
125
+ try {
126
+ data = await piCompleteJson(cwd, evalPrompt, { thinking: "low" });
127
+ piAvailabilityCache = true;
128
+ }
129
+ catch {
130
+ piAvailabilityCache = false;
131
+ const heuristic = heuristicPromptEvaluation(promptPath, prompt);
132
+ return heuristic;
133
+ }
134
+ const objectives = data.objectives;
135
+ const score = objectives.voice_divergence * 14 +
136
+ objectives.diagnosis * 20 +
137
+ objectives.verification * 18 +
138
+ objectives.retrieval * 18 +
139
+ objectives.transfer * 16 +
140
+ objectives.structure * 14;
141
+ return {
142
+ promptPath,
143
+ promptName: promptNameFromPath(promptPath),
144
+ score,
145
+ objectives,
146
+ feedback: data.feedback
147
+ };
148
+ }
149
+ async function generateCandidatePrompt(cwd, basePrompt, evaluation, iteration) {
150
+ const frontMatter = parseFrontMatter(basePrompt);
151
+ const body = parsePromptBody(basePrompt);
152
+ const generationPrompt = `You are Keating's prompt-learning agent. Your goal is to evolve a teaching prompt template so that it prevents "surface agreement" and forces the learner to find their own voice.
153
+
154
+ Original Prompt:
155
+ """
156
+ ${body}
157
+ """
158
+
159
+ Feedback:
160
+ ${evaluation.feedback.map(f => `- ${f}`).join("\n")}
161
+
162
+ Mandate:
163
+ 1. PUSH FOR DIVERGENCE: Add instructions that force the learner to re-explain the concept using their own analogies or personal context.
164
+ 2. REJECT ECHOES: Instruct the teacher to identify if the learner is just repeating words from the previous explanation and, if so, ask them to "say it again, but as if you're explaining it to someone else entirely."
165
+ 3. ADDRESS FEEDBACK: Address the specific gaps in diagnosis, verification, and structure.
166
+
167
+ Evolved Prompt Body (no code blocks, no frontmatter):`;
168
+ if (piAvailabilityCache === false) {
169
+ return heuristicCandidatePrompt(basePrompt);
170
+ }
171
+ let evolvedBody;
172
+ try {
173
+ evolvedBody = await piComplete(cwd, generationPrompt, { thinking: "medium" });
174
+ piAvailabilityCache = true;
175
+ }
176
+ catch {
177
+ piAvailabilityCache = false;
178
+ return heuristicCandidatePrompt(basePrompt);
179
+ }
180
+ return frontMatter ? `${frontMatter}\n${evolvedBody}\n` : `${evolvedBody}\n`;
181
+ }
182
+ function objectiveVector(candidate) {
183
+ const { objectives } = candidate.evaluation;
184
+ return [
185
+ objectives.voice_divergence,
186
+ objectives.diagnosis,
187
+ objectives.verification,
188
+ objectives.retrieval,
189
+ objectives.transfer,
190
+ objectives.structure
191
+ ];
192
+ }
193
+ function pairwisePreference(left, right) {
194
+ const leftVector = objectiveVector(left);
195
+ const rightVector = objectiveVector(right);
196
+ let wins = 0;
197
+ let losses = 0;
198
+ for (let index = 0; index < leftVector.length; index += 1) {
199
+ if (leftVector[index] > rightVector[index])
200
+ wins += 1;
201
+ if (leftVector[index] < rightVector[index])
202
+ losses += 1;
203
+ }
204
+ const aggregateDelta = left.evaluation.score - right.evaluation.score;
205
+ return wins - losses + aggregateDelta / 25;
206
+ }
207
+ export function prosperStyleWinner(candidates) {
208
+ let best = candidates[0];
209
+ let bestScore = -Infinity;
210
+ for (const candidate of candidates) {
211
+ const score = candidates.reduce((sum, opponent) => {
212
+ if (candidate === opponent)
213
+ return sum;
214
+ return sum + pairwisePreference(candidate, opponent);
215
+ }, 0);
216
+ candidate.preferenceScore = score;
217
+ if (score > bestScore) {
218
+ best = candidate;
219
+ bestScore = score;
220
+ }
221
+ }
222
+ return best;
223
+ }
224
+ async function loadArchive(archivePath) {
225
+ try {
226
+ const raw = await readFile(archivePath, "utf8");
227
+ return JSON.parse(raw);
228
+ }
229
+ catch {
230
+ return { winners: [] };
231
+ }
232
+ }
233
+ async function saveArchive(archivePath, archive) {
234
+ await writeFile(archivePath, `${JSON.stringify(archive, null, 2)}\n`, "utf8");
235
+ }
236
+ export async function evolvePrompt(cwd, promptName = "learn", iterations = 4, evaluator = evaluatePromptContent, generator = generateCandidatePrompt) {
237
+ const promptPath = join(cwd, "pi", "prompts", `${promptName}.md`);
238
+ const prompt = await readFile(promptPath, "utf8");
239
+ const baseline = await evaluator(cwd, promptPath, prompt);
240
+ const candidates = [];
241
+ for (let iteration = 1; iteration <= iterations; iteration += 1) {
242
+ const candidatePrompt = await generator(cwd, prompt, baseline, iteration);
243
+ const evaluation = await evaluator(cwd, promptPath, candidatePrompt);
244
+ candidates.push({
245
+ iteration,
246
+ label: `${promptName}-candidate-${iteration}`,
247
+ prompt: candidatePrompt,
248
+ evaluation,
249
+ parentLabel: promptName,
250
+ accepted: false,
251
+ preferenceScore: 0
252
+ });
253
+ }
254
+ const best = prosperStyleWinner(candidates);
255
+ for (const candidate of candidates) {
256
+ candidate.accepted = candidate.label === best.label && candidate.evaluation.score >= baseline.score;
257
+ }
258
+ const archivePath = promptEvolutionArchivePath(cwd);
259
+ const archive = await loadArchive(archivePath);
260
+ archive.winners.push({
261
+ promptName,
262
+ label: best.label,
263
+ score: best.evaluation.score,
264
+ objectives: best.evaluation.objectives,
265
+ updatedAt: new Date().toISOString()
266
+ });
267
+ await saveArchive(archivePath, archive);
268
+ return {
269
+ promptPath,
270
+ promptName,
271
+ baseline,
272
+ best,
273
+ exploredCandidates: candidates,
274
+ acceptedCandidates: candidates.filter((candidate) => candidate.accepted)
275
+ };
276
+ }
277
+ export async function writePromptEvolutionArtifacts(cwd, promptName = "learn") {
278
+ const run = await evolvePrompt(cwd, promptName);
279
+ const reportPath = join(promptEvolutionDir(cwd), `${promptName}.md`);
280
+ const evolvedPromptPath = join(promptEvolutionDir(cwd), `${promptName}.evolved.md`);
281
+ await writeFile(reportPath, promptEvolutionToMarkdown(run), "utf8");
282
+ await writeFile(evolvedPromptPath, run.best.prompt, "utf8");
283
+ return {
284
+ reportPath,
285
+ evolvedPromptPath,
286
+ bestScore: run.best.evaluation.score,
287
+ promptPath: run.promptPath
288
+ };
289
+ }
290
+ export function promptEvolutionToMarkdown(run) {
291
+ const objectiveList = (objectives) => [
292
+ `voice_divergence=${objectives.voice_divergence.toFixed(2)}`,
293
+ `diagnosis=${objectives.diagnosis.toFixed(2)}`,
294
+ `verification=${objectives.verification.toFixed(2)}`,
295
+ `retrieval=${objectives.retrieval.toFixed(2)}`,
296
+ `transfer=${objectives.transfer.toFixed(2)}`,
297
+ `structure=${objectives.structure.toFixed(2)}`
298
+ ];
299
+ const lines = [
300
+ `# Prompt Evolution Report: ${run.promptName}`,
301
+ "",
302
+ `- Source prompt: ${run.promptPath}`,
303
+ `- Baseline score: ${run.baseline.score.toFixed(2)}`,
304
+ `- Best candidate: ${run.best.label}`,
305
+ `- Best candidate score: ${run.best.evaluation.score.toFixed(2)}`,
306
+ `- PROSPER-style preference score: ${run.best.preferenceScore.toFixed(2)}`,
307
+ "",
308
+ "## Baseline Feedback",
309
+ ""
310
+ ];
311
+ if (run.baseline.feedback.length === 0) {
312
+ lines.push("- No major prompt-learning gaps detected.");
313
+ }
314
+ else {
315
+ for (const item of run.baseline.feedback) {
316
+ lines.push(`- ${item}`);
317
+ }
318
+ }
319
+ lines.push("");
320
+ lines.push("## Candidate Comparison");
321
+ lines.push("");
322
+ for (const candidate of run.exploredCandidates) {
323
+ lines.push(`### ${candidate.label}`);
324
+ lines.push(`- score: ${candidate.evaluation.score.toFixed(2)}`);
325
+ lines.push(`- preference: ${candidate.preferenceScore.toFixed(2)}`);
326
+ lines.push(`- accepted: ${candidate.accepted ? "yes" : "no"}`);
327
+ lines.push(`- objectives: ${objectiveList(candidate.evaluation.objectives).join(", ")}`);
328
+ lines.push("");
329
+ }
330
+ lines.push("## Recommended Prompt");
331
+ lines.push("");
332
+ lines.push("```md");
333
+ lines.push(run.best.prompt.trimEnd());
334
+ lines.push("```");
335
+ lines.push("");
336
+ return `${lines.join("\n")}\n`;
337
+ }
@@ -0,0 +1,19 @@
1
+ export class Prng {
2
+ state;
3
+ constructor(seed) {
4
+ this.state = seed >>> 0;
5
+ }
6
+ next() {
7
+ this.state += 0x6d2b79f5;
8
+ let value = this.state;
9
+ value = Math.imul(value ^ (value >>> 15), value | 1);
10
+ value ^= value + Math.imul(value ^ (value >>> 7), value | 61);
11
+ return ((value ^ (value >>> 14)) >>> 0) / 4294967296;
12
+ }
13
+ pick(items) {
14
+ return items[Math.floor(this.next() * items.length)];
15
+ }
16
+ int(min, max) {
17
+ return Math.floor(this.next() * (max - min + 1)) + min;
18
+ }
19
+ }