keating 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +334 -0
- package/SYSTEM.md +33 -0
- package/bin/keating.js +31 -0
- package/dist/src/cli/main.js +357 -0
- package/dist/src/cli/setup.js +197 -0
- package/dist/src/cli/web.js +84 -0
- package/dist/src/core/animation.js +304 -0
- package/dist/src/core/ax-optimizer.js +81 -0
- package/dist/src/core/ax-prompt-learner.js +59 -0
- package/dist/src/core/ax-trial.js +181 -0
- package/dist/src/core/benchmark.js +253 -0
- package/dist/src/core/commands.js +57 -0
- package/dist/src/core/config.js +120 -0
- package/dist/src/core/engagement.js +235 -0
- package/dist/src/core/env.js +9 -0
- package/dist/src/core/evolution.js +242 -0
- package/dist/src/core/flashcards.js +133 -0
- package/dist/src/core/learner-state.js +108 -0
- package/dist/src/core/lesson-plan.js +155 -0
- package/dist/src/core/map-elites.js +228 -0
- package/dist/src/core/map.js +89 -0
- package/dist/src/core/mastery.js +207 -0
- package/dist/src/core/paths.js +100 -0
- package/dist/src/core/pi-agent.js +82 -0
- package/dist/src/core/policy.js +79 -0
- package/dist/src/core/project.js +337 -0
- package/dist/src/core/projects.js +281 -0
- package/dist/src/core/prompt-evolution.js +344 -0
- package/dist/src/core/quiz.js +194 -0
- package/dist/src/core/random.js +19 -0
- package/dist/src/core/self-improve.js +425 -0
- package/dist/src/core/speech.js +54 -0
- package/dist/src/core/terminal.js +117 -0
- package/dist/src/core/theme.js +101 -0
- package/dist/src/core/topics.js +620 -0
- package/dist/src/core/types.js +1 -0
- package/dist/src/core/util.js +30 -0
- package/dist/src/core/verification.js +162 -0
- package/dist/src/pi/hyperteacher-extension.js +573 -0
- package/dist/src/runtime/pi.js +343 -0
- package/package.json +78 -0
- package/pi/prompts/bridge.md +14 -0
- package/pi/prompts/diagnose.md +15 -0
- package/pi/prompts/improve.md +39 -0
- package/pi/prompts/learn.md +21 -0
- package/pi/prompts/quiz.md +14 -0
- package/pi/skills/adaptive-teaching/SKILL.md +33 -0
- package/scripts/install/install.sh +308 -0
- package/web/dist/.well-known/llms.txt +44 -0
- package/web/dist/apple-touch-icon.svg +10 -0
- package/web/dist/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
- package/web/dist/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
- package/web/dist/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
- package/web/dist/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
- package/web/dist/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
- package/web/dist/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
- package/web/dist/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
- package/web/dist/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
- package/web/dist/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
- package/web/dist/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
- package/web/dist/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
- package/web/dist/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
- package/web/dist/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
- package/web/dist/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
- package/web/dist/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
- package/web/dist/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
- package/web/dist/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
- package/web/dist/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
- package/web/dist/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
- package/web/dist/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
- package/web/dist/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
- package/web/dist/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
- package/web/dist/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
- package/web/dist/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
- package/web/dist/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
- package/web/dist/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
- package/web/dist/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
- package/web/dist/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
- package/web/dist/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
- package/web/dist/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
- package/web/dist/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
- package/web/dist/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
- package/web/dist/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
- package/web/dist/assets/_baseFor-B_cjfMB6.js +1 -0
- package/web/dist/assets/anthropic-BT6Vfzb1.js +36 -0
- package/web/dist/assets/arc-x2nTilpc.js +1 -0
- package/web/dist/assets/architecture-YZFGNWBL-B1hlUWjX.js +1 -0
- package/web/dist/assets/architectureDiagram-Q4EWVU46-CMApWFyw.js +36 -0
- package/web/dist/assets/array-B9UHiPd-.js +1 -0
- package/web/dist/assets/azure-openai-responses-CommX3YJ.js +1 -0
- package/web/dist/assets/blockDiagram-DXYQGD6D-DOQbsNRY.js +132 -0
- package/web/dist/assets/c4Diagram-AHTNJAMY-VFfRZWWA.js +10 -0
- package/web/dist/assets/channel-KY2Tg8Ba.js +1 -0
- package/web/dist/assets/chunk-2KRD3SAO-B-AqvS0u.js +1 -0
- package/web/dist/assets/chunk-336JU56O-DlYgPyl6.js +2 -0
- package/web/dist/assets/chunk-426QAEUC-CsVoBkfR.js +1 -0
- package/web/dist/assets/chunk-4BX2VUAB-0Z13aFAn.js +1 -0
- package/web/dist/assets/chunk-4TB4RGXK-DqC0Zwm7.js +206 -0
- package/web/dist/assets/chunk-55IACEB6-CWE_u-IY.js +1 -0
- package/web/dist/assets/chunk-5FUZZQ4R-CApli0xX.js +62 -0
- package/web/dist/assets/chunk-5PVQY5BW-Cbzhfhln.js +2 -0
- package/web/dist/assets/chunk-67CJDMHE-Cx7uJS4d.js +1 -0
- package/web/dist/assets/chunk-7N4EOEYR-CYPNsFus.js +1 -0
- package/web/dist/assets/chunk-AA7GKIK3-rU0uhR_u.js +1 -0
- package/web/dist/assets/chunk-BSJP7CBP-5VmcfR4-.js +1 -0
- package/web/dist/assets/chunk-Bj-mKKzh.js +1 -0
- package/web/dist/assets/chunk-CIAEETIT-CHJ-L8H1.js +1 -0
- package/web/dist/assets/chunk-EDXVE4YY-DZHAJjMI.js +1 -0
- package/web/dist/assets/chunk-ENJZ2VHE-DbUDFa7w.js +10 -0
- package/web/dist/assets/chunk-FMBD7UC4-BsYE5e_h.js +15 -0
- package/web/dist/assets/chunk-FOC6F5B3-Cm6aoTv7.js +1 -0
- package/web/dist/assets/chunk-ICPOFSXX-C5eNZ4L6.js +123 -0
- package/web/dist/assets/chunk-K5T4RW27-R7dAJ4rq.js +94 -0
- package/web/dist/assets/chunk-KGLVRYIC-MO99YZXL.js +1 -0
- package/web/dist/assets/chunk-LIHQZDEY-DUJ656sT.js +1 -0
- package/web/dist/assets/chunk-ORNJ4GCN-DXuuEC1n.js +1 -0
- package/web/dist/assets/chunk-OYMX7WX6-pJlEprWq.js +231 -0
- package/web/dist/assets/chunk-QZHKN3VN-_pQxbbiW.js +1 -0
- package/web/dist/assets/chunk-U2HBQHQK-Mh_l9PLe.js +70 -0
- package/web/dist/assets/chunk-X2U36JSP-BOeiJW0w.js +1 -0
- package/web/dist/assets/chunk-XPW4576I-fQ9SDvr_.js +32 -0
- package/web/dist/assets/chunk-YZCP3GAM-eboO4P5S.js +1 -0
- package/web/dist/assets/chunk-ZZ45TVLE-Cky0eqlr.js +1 -0
- package/web/dist/assets/classDiagram-6PBFFD2Q-DEPsZSU3.js +1 -0
- package/web/dist/assets/classDiagram-v2-HSJHXN6E-DhmIOEpX.js +1 -0
- package/web/dist/assets/clone-DeTzYqo8.js +1 -0
- package/web/dist/assets/cose-bilkent-S5V4N54A-N4zWUJ7C.js +1 -0
- package/web/dist/assets/cytoscape.esm-BBMd0vGm.js +321 -0
- package/web/dist/assets/dagre-IpK1aoMm.js +1 -0
- package/web/dist/assets/dagre-KV5264BT-DCytJuju.js +4 -0
- package/web/dist/assets/defaultLocale-5eAKkKJC.js +1 -0
- package/web/dist/assets/diagram-5BDNPKRD-Cv4miBae.js +10 -0
- package/web/dist/assets/diagram-G4DWMVQ6-CtICKUFi.js +24 -0
- package/web/dist/assets/diagram-MMDJMWI5-Cn7aGorh.js +43 -0
- package/web/dist/assets/diagram-TYMM5635-CCUWDPsC.js +24 -0
- package/web/dist/assets/dist-Dm98VvTW.js +1 -0
- package/web/dist/assets/env-api-keys-BNlMKqxw.js +1 -0
- package/web/dist/assets/erDiagram-SMLLAGMA-uT88sBlT.js +85 -0
- package/web/dist/assets/event-stream-D33K9rpL.js +1 -0
- package/web/dist/assets/flatten-C-u5nd5-.js +1 -0
- package/web/dist/assets/flowDiagram-DWJPFMVM-Bl3O7S1m.js +162 -0
- package/web/dist/assets/ganttDiagram-T4ZO3ILL-B1FhwV45.js +292 -0
- package/web/dist/assets/gitGraph-7Q5UKJZL-Bc_7vzer.js +1 -0
- package/web/dist/assets/gitGraphDiagram-UUTBAWPF-DfW6svMS.js +106 -0
- package/web/dist/assets/github-copilot-headers-L39QqneT.js +1 -0
- package/web/dist/assets/google-BdYNeCP_.js +1 -0
- package/web/dist/assets/google-gemini-cli-DpxAL3K4.js +2 -0
- package/web/dist/assets/google-shared-DyQdgtsI.js +2 -0
- package/web/dist/assets/google-vertex-CKRybaXj.js +1 -0
- package/web/dist/assets/graphlib-CMTVFyOZ.js +1 -0
- package/web/dist/assets/hash-kZ2KD_no.js +1 -0
- package/web/dist/assets/index-Bdb7P7gx.css +2 -0
- package/web/dist/assets/index-DNxepp8B.js +2891 -0
- package/web/dist/assets/info-OMHHGYJF-BGcxeaZt.js +1 -0
- package/web/dist/assets/infoDiagram-42DDH7IO-BbES7X_c.js +2 -0
- package/web/dist/assets/init-DlZdxViB.js +1 -0
- package/web/dist/assets/isEmpty-DssUW35f.js +1 -0
- package/web/dist/assets/ishikawaDiagram-UXIWVN3A-DxQ28rho.js +70 -0
- package/web/dist/assets/journeyDiagram-VCZTEJTY-D0X8qQ0P.js +139 -0
- package/web/dist/assets/json-parse-C6tSeIxX.js +2 -0
- package/web/dist/assets/kanban-definition-6JOO6SKY-DWYfSlpl.js +89 -0
- package/web/dist/assets/katex-CyM-5LlM.js +265 -0
- package/web/dist/assets/line-CuHce5JG.js +1 -0
- package/web/dist/assets/linear-Ca0Vkwuj.js +1 -0
- package/web/dist/assets/mermaid-parser.core-Cy4iY_Dy.js +4 -0
- package/web/dist/assets/mermaid.core-6PGkQdYc.js +11 -0
- package/web/dist/assets/mindmap-definition-QFDTVHPH-BBnKdtQh.js +96 -0
- package/web/dist/assets/mistral-BWaUMIgd.js +7 -0
- package/web/dist/assets/openai-D4NSaQIs.js +16 -0
- package/web/dist/assets/openai-codex-responses-CHBgKhmb.js +7 -0
- package/web/dist/assets/openai-completions-kcXmmaHI.js +5 -0
- package/web/dist/assets/openai-responses-Cqq3H3p3.js +1 -0
- package/web/dist/assets/openai-responses-shared-CTNuo9ci.js +10 -0
- package/web/dist/assets/ordinal-_K3x1fkz.js +1 -0
- package/web/dist/assets/ort-wasm-simd-threaded.jsep-B0T3yYHD.wasm +0 -0
- package/web/dist/assets/packet-4T2RLAQJ-D35ZLSBH.js +1 -0
- package/web/dist/assets/path-6uRLdFF7.js +1 -0
- package/web/dist/assets/pdf.worker.min-Cpi8b8z3.mjs +28 -0
- package/web/dist/assets/pie-ZZUOXDRM-DRoETpJX.js +1 -0
- package/web/dist/assets/pieDiagram-DEJITSTG-DfMjfTQz.js +30 -0
- package/web/dist/assets/preload-helper-DSXbuxSR.js +1 -0
- package/web/dist/assets/quadrantDiagram-34T5L4WZ-DfBSEept.js +7 -0
- package/web/dist/assets/radar-PYXPWWZC-DLKxRJ0V.js +1 -0
- package/web/dist/assets/reduce-836A2NiQ.js +1 -0
- package/web/dist/assets/requirementDiagram-MS252O5E-BPkxJQkz.js +84 -0
- package/web/dist/assets/rough.esm-Djo4Abte.js +1 -0
- package/web/dist/assets/sankeyDiagram-XADWPNL6-He3x9tNT.js +10 -0
- package/web/dist/assets/sequenceDiagram-FGHM5R23-DfCDpvrT.js +157 -0
- package/web/dist/assets/src-DdOdIreR.js +1 -0
- package/web/dist/assets/stateDiagram-FHFEXIEX-fuww6347.js +1 -0
- package/web/dist/assets/stateDiagram-v2-QKLJ7IA2-U6voafO3.js +1 -0
- package/web/dist/assets/timeline-definition-GMOUNBTQ-BWunHgBC.js +120 -0
- package/web/dist/assets/transform-messages-CqKEdRVp.js +1 -0
- package/web/dist/assets/transformers.web-DKUtmSAi.js +2818 -0
- package/web/dist/assets/treeView-SZITEDCU-BCx0xSAm.js +1 -0
- package/web/dist/assets/treemap-W4RFUUIX-2CvghWJK.js +1 -0
- package/web/dist/assets/vennDiagram-DHZGUBPP-CBXRutSP.js +34 -0
- package/web/dist/assets/wardley-RL74JXVD-BkPL_mhd.js +1 -0
- package/web/dist/assets/wardleyDiagram-NUSXRM2D-DTcVscPH.js +20 -0
- package/web/dist/assets/web-CMKYLKbT.js +10 -0
- package/web/dist/assets/xychartDiagram-5P7HB3ND-CZLgX9Fe.js +7 -0
- package/web/dist/favicon.svg +10 -0
- package/web/dist/index.html +104 -0
- package/web/dist/keating-metaharness.pdf +10557 -3
- package/web/dist/llms.txt +44 -0
- package/web/dist/logo.png +0 -0
- package/web/dist/manifest.webmanifest +1 -0
- package/web/dist/og-image.png +0 -0
- package/web/dist/pwa-192x192.svg +10 -0
- package/web/dist/pwa-512x512.svg +10 -0
- package/web/dist/registerSW.js +1 -0
- package/web/dist/robots.txt +8 -0
- package/web/dist/sitemap.xml +39 -0
- package/web/dist/sw.js +1 -0
- package/web/dist/tapes/doctor.mp4 +0 -0
- package/web/dist/tapes/feedback-flow.mp4 +0 -0
- package/web/dist/tapes/improve-flow.mp4 +0 -0
- package/web/dist/tapes/intro.mp4 +0 -0
- package/web/dist/tapes/learning-flow.mp4 +0 -0
- package/web/dist/tapes/session-flow.mp4 +0 -0
- package/web/dist/tapes/teacher-flow.mp4 +0 -0
- package/web/dist/tapes/tests.mp4 +0 -0
- package/web/dist/workbox-66610c77.js +1 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quiz Engine — generates question sets, workbooks, and answer keys.
|
|
3
|
+
*/
|
|
4
|
+
import { resolveTopic } from "./topics.js";
|
|
5
|
+
import { Prng } from "./random.js";
|
|
6
|
+
// ─── Question Generation ──────────────────────────────────────────────────
|
|
7
|
+
function makeRecallQ(topic, _prng, idx) {
|
|
8
|
+
return {
|
|
9
|
+
id: `${topic.slug}-r${idx}`,
|
|
10
|
+
type: "short_answer",
|
|
11
|
+
level: "recall",
|
|
12
|
+
question: `Define "${topic.title}" in your own words.`,
|
|
13
|
+
correctAnswer: topic.summary,
|
|
14
|
+
explanation: `The core definition: ${topic.summary}`,
|
|
15
|
+
rubric: `0-1pt: vague. 2pts: captures essence. 3pts: precise, mentions nuance.`,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
function makeComprehensionQ(topic, prng, idx) {
|
|
19
|
+
const hooks = topic.intuition;
|
|
20
|
+
const hook = hooks[prng.int(0, hooks.length - 1)] ?? "the concept";
|
|
21
|
+
return {
|
|
22
|
+
id: `${topic.slug}-c${idx}`,
|
|
23
|
+
type: "short_answer",
|
|
24
|
+
level: "comprehension",
|
|
25
|
+
question: `Explain the intuition: "${hook}". Why is this a helpful way to think about ${topic.title}?`,
|
|
26
|
+
correctAnswer: `Because it maps ${topic.title} to something concrete before formal notation.`,
|
|
27
|
+
explanation: `Intuition first, formalism second: ${hook}`,
|
|
28
|
+
rubric: `2pts: explains the metaphor. 3pts: identifies when it breaks down.`,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
function makeApplicationQ(topic, prng, idx) {
|
|
32
|
+
const exs = topic.examples;
|
|
33
|
+
const ex = exs[prng.int(0, exs.length - 1)] ?? `an example involving ${topic.title}`;
|
|
34
|
+
return {
|
|
35
|
+
id: `${topic.slug}-a${idx}`,
|
|
36
|
+
type: "short_answer",
|
|
37
|
+
level: "application",
|
|
38
|
+
question: `Work through this example: ${ex}. Show your reasoning step-by-step.`,
|
|
39
|
+
correctAnswer: `Follow the mechanics demonstrated in ${topic.title}.`,
|
|
40
|
+
explanation: `Application grounds abstract knowledge: ${ex}`,
|
|
41
|
+
rubric: `2pts: attempts solution. 3pts: correct mechanics. 4pts: correct + clear reasoning.`,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
function makeMisconceptionQ(topic, prng, idx) {
|
|
45
|
+
const mis = topic.misconceptions;
|
|
46
|
+
const m = mis[prng.int(0, mis.length - 1)] ?? "a common misconception";
|
|
47
|
+
return {
|
|
48
|
+
id: `${topic.slug}-m${idx}`,
|
|
49
|
+
type: "multiple_choice",
|
|
50
|
+
level: "comprehension",
|
|
51
|
+
question: `Which of the following statements about ${topic.title} is FALSE?`,
|
|
52
|
+
options: [
|
|
53
|
+
`${m}`,
|
|
54
|
+
`This is a correct statement about ${topic.title}.`,
|
|
55
|
+
`Another correct property of ${topic.title}.`,
|
|
56
|
+
`A third correct property of ${topic.title}.`,
|
|
57
|
+
],
|
|
58
|
+
correctAnswer: `${m}`,
|
|
59
|
+
explanation: `"${m}" is a known misconception. The other statements are generally true.`,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
function makeTransferQ(topic, prng, idx) {
|
|
63
|
+
const hooks = topic.interdisciplinaryHooks;
|
|
64
|
+
const domain = hooks[prng.int(0, hooks.length - 1)] ?? "a new domain";
|
|
65
|
+
return {
|
|
66
|
+
id: `${topic.slug}-t${idx}`,
|
|
67
|
+
type: "short_answer",
|
|
68
|
+
level: "transfer",
|
|
69
|
+
question: `How could ${topic.title} be applied or analogized in ${domain}? Construct an explicit bridge.`,
|
|
70
|
+
correctAnswer: `A valid analogy that preserves structural relationships and acknowledges boundary conditions.`,
|
|
71
|
+
explanation: `Transfer requires mapping invariants, not surface features, to ${domain}.`,
|
|
72
|
+
rubric: `2pts: superficial analogy. 3pts: structural mapping. 4pts: mapping + awareness of limits.`,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
export function generateQuiz(topicName, seed = 42) {
|
|
76
|
+
const topic = resolveTopic(topicName);
|
|
77
|
+
const prng = new Prng(seed);
|
|
78
|
+
const questions = [];
|
|
79
|
+
// 2 recall
|
|
80
|
+
questions.push(makeRecallQ(topic, prng, 1));
|
|
81
|
+
questions.push(makeRecallQ(topic, prng, 2));
|
|
82
|
+
// 2 comprehension
|
|
83
|
+
questions.push(makeComprehensionQ(topic, prng, 1));
|
|
84
|
+
questions.push(makeMisconceptionQ(topic, prng, 1));
|
|
85
|
+
// 2 application
|
|
86
|
+
questions.push(makeApplicationQ(topic, prng, 1));
|
|
87
|
+
questions.push(makeApplicationQ(topic, prng, 2));
|
|
88
|
+
// 2 transfer
|
|
89
|
+
questions.push(makeTransferQ(topic, prng, 1));
|
|
90
|
+
questions.push(makeTransferQ(topic, prng, 2));
|
|
91
|
+
const answerKey = new Map();
|
|
92
|
+
for (const q of questions) {
|
|
93
|
+
answerKey.set(q.id, q.correctAnswer);
|
|
94
|
+
}
|
|
95
|
+
return {
|
|
96
|
+
topic: topic.title,
|
|
97
|
+
slug: topic.slug,
|
|
98
|
+
generatedAt: new Date().toISOString(),
|
|
99
|
+
questions,
|
|
100
|
+
answerKey,
|
|
101
|
+
totalPoints: questions.reduce((s, q) => s + (q.rubric ? 3 : 1), 0),
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
// ─── Workbook ─────────────────────────────────────────────────────────────
|
|
105
|
+
export function generateWorkbook(topicName, seed = 42) {
|
|
106
|
+
const topic = resolveTopic(topicName);
|
|
107
|
+
const prng = new Prng(seed);
|
|
108
|
+
const quiz = generateQuiz(topicName, seed);
|
|
109
|
+
const sections = [
|
|
110
|
+
{
|
|
111
|
+
title: "Part A: Foundation",
|
|
112
|
+
instructions: "Answer every question without looking at notes. Retrieval practice beats re-reading.",
|
|
113
|
+
questions: quiz.questions.filter(q => q.level === "recall" || q.level === "comprehension"),
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
title: "Part B: Application",
|
|
117
|
+
instructions: "Work each example out on paper or in a text editor. Show all steps.",
|
|
118
|
+
questions: quiz.questions.filter(q => q.level === "application"),
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
title: "Part C: Transfer",
|
|
122
|
+
instructions: "Bridge this concept into unfamiliar territory. If your analogy feels too easy, it is probably too shallow.",
|
|
123
|
+
questions: quiz.questions.filter(q => q.level === "transfer"),
|
|
124
|
+
},
|
|
125
|
+
];
|
|
126
|
+
return {
|
|
127
|
+
topic: topic.title,
|
|
128
|
+
slug: topic.slug,
|
|
129
|
+
sections,
|
|
130
|
+
generatedAt: new Date().toISOString(),
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
// ─── Markdown output ──────────────────────────────────────────────────────
|
|
134
|
+
export function quizToMarkdown(quiz) {
|
|
135
|
+
const lines = [
|
|
136
|
+
`# Quiz: ${quiz.topic}`,
|
|
137
|
+
`> Generated: ${quiz.generatedAt}`,
|
|
138
|
+
``,
|
|
139
|
+
];
|
|
140
|
+
for (const q of quiz.questions) {
|
|
141
|
+
lines.push(`## ${q.id} — ${q.level} (${q.type})`);
|
|
142
|
+
lines.push(q.question);
|
|
143
|
+
if (q.options) {
|
|
144
|
+
lines.push("");
|
|
145
|
+
for (let i = 0; i < q.options.length; i++) {
|
|
146
|
+
lines.push(`${String.fromCharCode(65 + i)}. ${q.options[i]}`);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
lines.push("");
|
|
150
|
+
if (q.rubric)
|
|
151
|
+
lines.push(`*Rubric:* ${q.rubric}`);
|
|
152
|
+
lines.push("");
|
|
153
|
+
}
|
|
154
|
+
return lines.join("\n") + "\n";
|
|
155
|
+
}
|
|
156
|
+
export function quizAnswerKeyToMarkdown(quiz) {
|
|
157
|
+
const lines = [
|
|
158
|
+
`# Answer Key: ${quiz.topic}`,
|
|
159
|
+
"",
|
|
160
|
+
];
|
|
161
|
+
for (const q of quiz.questions) {
|
|
162
|
+
lines.push(`## ${q.id}`);
|
|
163
|
+
lines.push(`**Answer:** ${q.correctAnswer}`);
|
|
164
|
+
lines.push(`**Explanation:** ${q.explanation}`);
|
|
165
|
+
lines.push("");
|
|
166
|
+
}
|
|
167
|
+
return lines.join("\n") + "\n";
|
|
168
|
+
}
|
|
169
|
+
export function workbookToMarkdown(wb) {
|
|
170
|
+
const lines = [
|
|
171
|
+
`# Workbook: ${wb.topic}`,
|
|
172
|
+
`> Self-paced. Do Part A in one sitting, Part B after a break, Part C tomorrow.`,
|
|
173
|
+
"",
|
|
174
|
+
];
|
|
175
|
+
for (const section of wb.sections) {
|
|
176
|
+
lines.push(`---`);
|
|
177
|
+
lines.push(`# ${section.title}`);
|
|
178
|
+
lines.push(section.instructions);
|
|
179
|
+
lines.push("");
|
|
180
|
+
for (const q of section.questions) {
|
|
181
|
+
lines.push(`### ${q.id} — ${q.level}`);
|
|
182
|
+
lines.push(q.question);
|
|
183
|
+
if (q.options) {
|
|
184
|
+
for (let i = 0; i < q.options.length; i++) {
|
|
185
|
+
lines.push(`${String.fromCharCode(65 + i)}. ${q.options[i]}`);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
lines.push("");
|
|
189
|
+
lines.push(`*Space for answer:*`);
|
|
190
|
+
lines.push("\n\n\n\n"); // Blank space
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
return lines.join("\n") + "\n";
|
|
194
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export class Prng {
|
|
2
|
+
state;
|
|
3
|
+
constructor(seed) {
|
|
4
|
+
this.state = seed >>> 0;
|
|
5
|
+
}
|
|
6
|
+
next() {
|
|
7
|
+
this.state += 0x6d2b79f5;
|
|
8
|
+
let value = this.state;
|
|
9
|
+
value = Math.imul(value ^ (value >>> 15), value | 1);
|
|
10
|
+
value ^= value + Math.imul(value ^ (value >>> 7), value | 61);
|
|
11
|
+
return ((value ^ (value >>> 14)) >>> 0) / 4294967296;
|
|
12
|
+
}
|
|
13
|
+
pick(items) {
|
|
14
|
+
return items[Math.floor(this.next() * items.length)];
|
|
15
|
+
}
|
|
16
|
+
int(min, max) {
|
|
17
|
+
return Math.floor(this.next() * (max - min + 1)) + min;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { runBenchmarkSuite } from "./benchmark.js";
|
|
4
|
+
import { loadPolicy } from "./policy.js";
|
|
5
|
+
import { currentPolicyPath, stateDir, outputsDir } from "./paths.js";
|
|
6
|
+
import { mean } from "./util.js";
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Mutable source files the meta-evolution loop is allowed to touch
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
const MUTABLE_SOURCES = {
|
|
11
|
+
"lesson-plan": "src/core/lesson-plan.ts",
|
|
12
|
+
"benchmark-weights": "src/core/benchmark.ts",
|
|
13
|
+
"animation": "src/core/animation.ts",
|
|
14
|
+
"topics": "src/core/topics.ts",
|
|
15
|
+
"map": "src/core/map.ts",
|
|
16
|
+
"policy-defaults": "src/core/policy.ts"
|
|
17
|
+
};
|
|
18
|
+
/** Files that must never be modified by self-improvement. Checked at proposal time. */
|
|
19
|
+
export const IMMUTABLE_SOURCES = new Set([
|
|
20
|
+
"src/core/self-improve.ts",
|
|
21
|
+
"src/core/types.ts",
|
|
22
|
+
"src/core/config.ts",
|
|
23
|
+
"src/core/paths.ts",
|
|
24
|
+
"src/core/random.ts"
|
|
25
|
+
]);
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Archive persistence
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
export function improvementArchivePath(cwd) {
|
|
30
|
+
return join(stateDir(cwd), "improvement-archive.json");
|
|
31
|
+
}
|
|
32
|
+
export function improvementsDir(cwd) {
|
|
33
|
+
return join(outputsDir(cwd), "improvements");
|
|
34
|
+
}
|
|
35
|
+
export function snapshotsDir(cwd) {
|
|
36
|
+
return join(stateDir(cwd), "snapshots");
|
|
37
|
+
}
|
|
38
|
+
export async function loadImprovementArchive(cwd) {
|
|
39
|
+
try {
|
|
40
|
+
const raw = await readFile(improvementArchivePath(cwd), "utf8");
|
|
41
|
+
return JSON.parse(raw);
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
return { attempts: [], totalAccepted: 0, totalRejected: 0, cumulativeImprovement: 0 };
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
async function saveImprovementArchive(cwd, archive) {
|
|
48
|
+
await writeFile(improvementArchivePath(cwd), JSON.stringify(archive, null, 2), "utf8");
|
|
49
|
+
}
|
|
50
|
+
function diagnoseFromBenchmark(result) {
|
|
51
|
+
const weaknesses = [];
|
|
52
|
+
// Find the weakest topic
|
|
53
|
+
const weakest = [...result.topicBenchmarks].sort((a, b) => a.meanScore - b.meanScore)[0];
|
|
54
|
+
if (weakest && weakest.meanScore < 55) {
|
|
55
|
+
weaknesses.push({
|
|
56
|
+
area: `topic:${weakest.topic.slug}`,
|
|
57
|
+
metric: "meanScore",
|
|
58
|
+
value: weakest.meanScore,
|
|
59
|
+
file: MUTABLE_SOURCES["topics"],
|
|
60
|
+
region: `Topic definition for "${weakest.topic.slug}"`,
|
|
61
|
+
explanation: `Topic "${weakest.topic.title}" scores ${weakest.meanScore.toFixed(1)}, well below the suite average of ${result.overallScore.toFixed(1)}. Its topic definition may need richer intuition, better misconceptions, or more targeted exercises.`
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
// Find metrics that are consistently weak across topics
|
|
65
|
+
const allConfusion = mean(result.topicBenchmarks.map(t => t.meanConfusion));
|
|
66
|
+
if (allConfusion > 0.3) {
|
|
67
|
+
weaknesses.push({
|
|
68
|
+
area: "simulation:overload",
|
|
69
|
+
metric: "meanConfusion",
|
|
70
|
+
value: allConfusion,
|
|
71
|
+
file: MUTABLE_SOURCES["benchmark-weights"],
|
|
72
|
+
region: "simulateTeaching overload calculation",
|
|
73
|
+
explanation: `Mean confusion across all topics is ${allConfusion.toFixed(2)} (target < 0.3). The overload formula or its interaction with policy parameters may be miscalibrated.`
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
const allTransfer = mean(result.topicBenchmarks.map(t => t.meanTransfer));
|
|
77
|
+
if (allTransfer < 0.35) {
|
|
78
|
+
weaknesses.push({
|
|
79
|
+
area: "simulation:transfer",
|
|
80
|
+
metric: "meanTransfer",
|
|
81
|
+
value: allTransfer,
|
|
82
|
+
file: MUTABLE_SOURCES["lesson-plan"],
|
|
83
|
+
region: "Transfer and Reflection phase",
|
|
84
|
+
explanation: `Mean transfer is ${allTransfer.toFixed(2)} (target > 0.35). The lesson plan's transfer phase or interdisciplinary hooks may need strengthening.`
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
const allEngagement = mean(result.topicBenchmarks.map(t => t.meanEngagement));
|
|
88
|
+
if (allEngagement < 0.45) {
|
|
89
|
+
weaknesses.push({
|
|
90
|
+
area: "simulation:engagement",
|
|
91
|
+
metric: "meanEngagement",
|
|
92
|
+
value: allEngagement,
|
|
93
|
+
file: MUTABLE_SOURCES["lesson-plan"],
|
|
94
|
+
region: "Socratic and practice phases",
|
|
95
|
+
explanation: `Mean engagement is ${allEngagement.toFixed(2)} (target > 0.45). Lesson phases may need more interactive elements, stronger Socratic scaffolding, or better diagram integration.`
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
// Check per-topic dominant weaknesses
|
|
99
|
+
for (const tb of result.topicBenchmarks) {
|
|
100
|
+
if (tb.dominantWeakness === "overload" && tb.meanConfusion > 0.35) {
|
|
101
|
+
weaknesses.push({
|
|
102
|
+
area: `topic-overload:${tb.topic.slug}`,
|
|
103
|
+
metric: "confusion",
|
|
104
|
+
value: tb.meanConfusion,
|
|
105
|
+
file: MUTABLE_SOURCES["lesson-plan"],
|
|
106
|
+
region: `Domain-specific guidance for "${tb.topic.domain}" topics`,
|
|
107
|
+
explanation: `Topic "${tb.topic.title}" (${tb.topic.domain}) causes excessive overload (confusion ${tb.meanConfusion.toFixed(2)}). The domain-specific lesson customization may need adjustment.`
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
if (tb.dominantWeakness === "diagramFit") {
|
|
111
|
+
weaknesses.push({
|
|
112
|
+
area: `visual:${tb.topic.slug}`,
|
|
113
|
+
metric: "diagramFit",
|
|
114
|
+
value: tb.meanScore,
|
|
115
|
+
file: MUTABLE_SOURCES["animation"],
|
|
116
|
+
region: `Scene generator for ${tb.topic.domain} domain`,
|
|
117
|
+
explanation: `Topic "${tb.topic.title}" has weak diagram fit. The animation scene for this domain may need richer visual representation.`
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return weaknesses;
|
|
122
|
+
}
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
// Proposal generation
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
let proposalCounter = 0;
|
|
127
|
+
function generateProposalId() {
|
|
128
|
+
proposalCounter += 1;
|
|
129
|
+
const ts = Date.now().toString(36);
|
|
130
|
+
return `improve-${ts}-${proposalCounter}`;
|
|
131
|
+
}
|
|
132
|
+
export async function generateImprovementProposal(cwd) {
|
|
133
|
+
const policy = await loadPolicy(currentPolicyPath(cwd));
|
|
134
|
+
const benchmark = await runBenchmarkSuite(cwd, policy);
|
|
135
|
+
const weaknesses = diagnoseFromBenchmark(benchmark);
|
|
136
|
+
// Prioritize: pick the top 3 weaknesses by severity
|
|
137
|
+
const sorted = weaknesses.sort((a, b) => a.value - b.value);
|
|
138
|
+
const targets = sorted.slice(0, 3).map(w => ({
|
|
139
|
+
file: w.file,
|
|
140
|
+
region: w.region,
|
|
141
|
+
weakness: w.area,
|
|
142
|
+
metric: w.metric,
|
|
143
|
+
currentValue: w.value,
|
|
144
|
+
rationale: w.explanation
|
|
145
|
+
}));
|
|
146
|
+
const hypothesis = targets.length > 0
|
|
147
|
+
? `Improving ${targets.map(t => t.weakness).join(", ")} should raise the overall benchmark score from ${benchmark.overallScore.toFixed(2)} by addressing the identified weak areas.`
|
|
148
|
+
: `The benchmark score is ${benchmark.overallScore.toFixed(2)} with no severe weaknesses detected. Consider exploring novel teaching strategies.`;
|
|
149
|
+
const instructions = await buildImprovementInstructions(cwd, targets, benchmark);
|
|
150
|
+
return {
|
|
151
|
+
id: generateProposalId(),
|
|
152
|
+
timestamp: new Date().toISOString(),
|
|
153
|
+
targets,
|
|
154
|
+
hypothesis,
|
|
155
|
+
instructions,
|
|
156
|
+
baselineScore: benchmark.overallScore,
|
|
157
|
+
status: "pending"
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
async function buildImprovementInstructions(_cwd, targets, benchmark) {
|
|
161
|
+
const lines = [
|
|
162
|
+
"# Self-Improvement Instructions",
|
|
163
|
+
"",
|
|
164
|
+
"You are Keating's meta-evolution agent. Your task is to modify Keating's own source code",
|
|
165
|
+
"to improve teaching effectiveness as measured by the benchmark suite.",
|
|
166
|
+
"",
|
|
167
|
+
"## Current Baseline",
|
|
168
|
+
"",
|
|
169
|
+
`- Overall score: ${benchmark.overallScore.toFixed(2)}`,
|
|
170
|
+
`- Weakest topic: ${benchmark.weakestTopic}`,
|
|
171
|
+
"",
|
|
172
|
+
"## Knowledge Context",
|
|
173
|
+
"",
|
|
174
|
+
"Where applicable, leverage these Ax optimization traces to inform your changes:",
|
|
175
|
+
`- **GEPA Pareto Front**: Check \`.keating/state/gepa-optimized.json\` for multi-objective hyperparameter tuning traces.`,
|
|
176
|
+
`- **ACE Playbook**: Check \`.keating/state/ace-playbook.json\` for the latest prompt-learning strategy updates.`,
|
|
177
|
+
"",
|
|
178
|
+
"## Safety Rules",
|
|
179
|
+
"",
|
|
180
|
+
"1. ONLY modify files listed in the targets below. Do not touch types, config, paths, or this self-improvement module.",
|
|
181
|
+
"2. After making changes, run `bun test ./test/*.test.ts` to verify no tests break.",
|
|
182
|
+
"3. Run `bun src/cli/main.ts bench` to measure the impact.",
|
|
183
|
+
"4. If the benchmark score decreases or tests fail, ROLLBACK all changes using the snapshots.",
|
|
184
|
+
"5. Keep changes small and focused. One logical change per target.",
|
|
185
|
+
"6. Do not change function signatures that are imported by other modules.",
|
|
186
|
+
"7. Add a comment `// [self-improve] <proposal-id>` near each changed region.",
|
|
187
|
+
"",
|
|
188
|
+
"## Targets",
|
|
189
|
+
""
|
|
190
|
+
];
|
|
191
|
+
for (let i = 0; i < targets.length; i++) {
|
|
192
|
+
const t = targets[i];
|
|
193
|
+
lines.push(`### Target ${i + 1}: ${t.weakness}`);
|
|
194
|
+
lines.push("");
|
|
195
|
+
lines.push(`- **File**: ${t.file}`);
|
|
196
|
+
lines.push(`- **Region**: ${t.region}`);
|
|
197
|
+
lines.push(`- **Metric**: ${t.metric} = ${t.currentValue.toFixed(2)}`);
|
|
198
|
+
lines.push(`- **Rationale**: ${t.rationale}`);
|
|
199
|
+
lines.push("");
|
|
200
|
+
lines.push("**Suggested approach**: Read the file, understand the region, and make a targeted change");
|
|
201
|
+
lines.push("that addresses the diagnosed weakness. Think about what the benchmark simulation actually");
|
|
202
|
+
lines.push("measures and how your code change will flow through to improve the metric.");
|
|
203
|
+
lines.push("");
|
|
204
|
+
}
|
|
205
|
+
lines.push("## Evaluation Protocol");
|
|
206
|
+
lines.push("");
|
|
207
|
+
lines.push("After applying changes:");
|
|
208
|
+
lines.push("1. Run `bun x tsc -p tsconfig.json` — must compile clean");
|
|
209
|
+
lines.push("2. Run `bun test ./test/*.test.ts` — all tests must pass");
|
|
210
|
+
lines.push("3. Run `bun src/cli/main.ts bench` — record the new overall score");
|
|
211
|
+
lines.push(`4. If new score > ${benchmark.overallScore.toFixed(2)}, the change is accepted`);
|
|
212
|
+
lines.push(`5. If new score <= ${benchmark.overallScore.toFixed(2)}, rollback using the snapshots`);
|
|
213
|
+
lines.push("6. Record the result using `/improve accept` or `/improve reject`");
|
|
214
|
+
lines.push("");
|
|
215
|
+
return lines.join("\n");
|
|
216
|
+
}
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
// Snapshot: save current state of mutable files before changes
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
export async function snapshotMutableSources(cwd, proposalId) {
|
|
221
|
+
const snapDir = join(snapshotsDir(cwd), proposalId);
|
|
222
|
+
await mkdir(snapDir, { recursive: true });
|
|
223
|
+
const snapshots = [];
|
|
224
|
+
for (const [_label, relativePath] of Object.entries(MUTABLE_SOURCES)) {
|
|
225
|
+
const fullPath = join(cwd, relativePath);
|
|
226
|
+
try {
|
|
227
|
+
const content = await readFile(fullPath, "utf8");
|
|
228
|
+
const snapshot = {
|
|
229
|
+
file: fullPath,
|
|
230
|
+
relativePath,
|
|
231
|
+
content,
|
|
232
|
+
snapshotAt: new Date().toISOString()
|
|
233
|
+
};
|
|
234
|
+
snapshots.push(snapshot);
|
|
235
|
+
await writeFile(join(snapDir, relativePath.replace(/\//g, "__")), content, "utf8");
|
|
236
|
+
}
|
|
237
|
+
catch {
|
|
238
|
+
// file doesn't exist, skip
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return snapshots;
|
|
242
|
+
}
|
|
243
|
+
// ---------------------------------------------------------------------------
|
|
244
|
+
// Rollback: restore files from snapshot
|
|
245
|
+
// ---------------------------------------------------------------------------
|
|
246
|
+
export async function rollbackFromSnapshots(snapshots) {
|
|
247
|
+
for (const snapshot of snapshots) {
|
|
248
|
+
await writeFile(snapshot.file, snapshot.content, "utf8");
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
// ---------------------------------------------------------------------------
|
|
252
|
+
// Evaluate: compare before/after benchmark scores
|
|
253
|
+
// ---------------------------------------------------------------------------
|
|
254
|
+
export async function evaluateImprovement(cwd, baselineScore) {
|
|
255
|
+
const policy = await loadPolicy(currentPolicyPath(cwd));
|
|
256
|
+
const result = await runBenchmarkSuite(cwd, policy);
|
|
257
|
+
const delta = result.overallScore - baselineScore;
|
|
258
|
+
return {
|
|
259
|
+
afterScore: result.overallScore,
|
|
260
|
+
improved: delta > 0,
|
|
261
|
+
delta
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
// ---------------------------------------------------------------------------
|
|
265
|
+
// Record: persist an improvement attempt to the archive
|
|
266
|
+
// ---------------------------------------------------------------------------
|
|
267
|
+
export async function recordAttempt(cwd, attempt) {
|
|
268
|
+
const archive = await loadImprovementArchive(cwd);
|
|
269
|
+
archive.attempts.push(attempt);
|
|
270
|
+
if (attempt.accepted) {
|
|
271
|
+
archive.totalAccepted += 1;
|
|
272
|
+
archive.cumulativeImprovement += attempt.scoreDelta ?? 0;
|
|
273
|
+
}
|
|
274
|
+
else {
|
|
275
|
+
archive.totalRejected += 1;
|
|
276
|
+
}
|
|
277
|
+
await saveImprovementArchive(cwd, archive);
|
|
278
|
+
}
|
|
279
|
+
export async function generateImprovementArtifact(cwd) {
|
|
280
|
+
const dir = improvementsDir(cwd);
|
|
281
|
+
await mkdir(dir, { recursive: true });
|
|
282
|
+
const proposal = await generateImprovementProposal(cwd);
|
|
283
|
+
const snapshots = await snapshotMutableSources(cwd, proposal.id);
|
|
284
|
+
// Write the proposal as a markdown artifact the agent can read and execute
|
|
285
|
+
const proposalPath = join(dir, `${proposal.id}.md`);
|
|
286
|
+
const content = [
|
|
287
|
+
`# Improvement Proposal: ${proposal.id}`,
|
|
288
|
+
"",
|
|
289
|
+
`**Timestamp**: ${proposal.timestamp}`,
|
|
290
|
+
`**Baseline score**: ${proposal.baselineScore.toFixed(2)}`,
|
|
291
|
+
`**Status**: ${proposal.status}`,
|
|
292
|
+
"",
|
|
293
|
+
`## Hypothesis`,
|
|
294
|
+
"",
|
|
295
|
+
proposal.hypothesis,
|
|
296
|
+
"",
|
|
297
|
+
`## Snapshotted Files`,
|
|
298
|
+
"",
|
|
299
|
+
...snapshots.map(s => `- ${s.relativePath} (${s.content.length} bytes)`),
|
|
300
|
+
"",
|
|
301
|
+
proposal.instructions,
|
|
302
|
+
"",
|
|
303
|
+
"## Archive Context",
|
|
304
|
+
""
|
|
305
|
+
].join("\n");
|
|
306
|
+
// Append prior attempt summaries for the agent's learning
|
|
307
|
+
const archive = await loadImprovementArchive(cwd);
|
|
308
|
+
const history = archive.attempts.slice(-5).map(a => {
|
|
309
|
+
const status = a.accepted ? "ACCEPTED" : "REJECTED";
|
|
310
|
+
const delta = a.scoreDelta != null ? ` (delta: ${a.scoreDelta.toFixed(2)})` : "";
|
|
311
|
+
const targets = a.proposal.targets.map(t => t.weakness).join(", ");
|
|
312
|
+
return `- ${a.proposal.id}: ${status}${delta} — targeted ${targets}`;
|
|
313
|
+
});
|
|
314
|
+
const fullContent = history.length > 0
|
|
315
|
+
? content + "Recent attempts (learn from these):\n\n" + history.join("\n") + "\n"
|
|
316
|
+
: content + "No prior improvement attempts. This is the first run.\n";
|
|
317
|
+
await writeFile(proposalPath, fullContent, "utf8");
|
|
318
|
+
return { proposalPath, proposal, snapshots };
|
|
319
|
+
}
|
|
320
|
+
// ---------------------------------------------------------------------------
|
|
321
|
+
// Accept / Reject helpers
|
|
322
|
+
// ---------------------------------------------------------------------------
|
|
323
|
+
export async function acceptImprovement(cwd, proposalId, afterScore) {
|
|
324
|
+
const archive = await loadImprovementArchive(cwd);
|
|
325
|
+
const existing = archive.attempts.find(a => a.proposal.id === proposalId);
|
|
326
|
+
if (existing) {
|
|
327
|
+
existing.accepted = true;
|
|
328
|
+
existing.afterScore = afterScore;
|
|
329
|
+
existing.scoreDelta = afterScore - existing.baselineScore;
|
|
330
|
+
existing.proposal.status = "accepted";
|
|
331
|
+
existing.completedAt = new Date().toISOString();
|
|
332
|
+
archive.totalAccepted += 1;
|
|
333
|
+
archive.cumulativeImprovement += existing.scoreDelta;
|
|
334
|
+
await saveImprovementArchive(cwd, archive);
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
// If not found in archive, create a new entry
|
|
338
|
+
const proposal = {
|
|
339
|
+
id: proposalId,
|
|
340
|
+
timestamp: new Date().toISOString(),
|
|
341
|
+
targets: [],
|
|
342
|
+
hypothesis: "Accepted externally",
|
|
343
|
+
instructions: "",
|
|
344
|
+
baselineScore: afterScore,
|
|
345
|
+
status: "accepted"
|
|
346
|
+
};
|
|
347
|
+
await recordAttempt(cwd, {
|
|
348
|
+
proposal,
|
|
349
|
+
snapshots: [],
|
|
350
|
+
baselineScore: 0,
|
|
351
|
+
afterScore,
|
|
352
|
+
scoreDelta: null,
|
|
353
|
+
accepted: true,
|
|
354
|
+
rollbackPerformed: false,
|
|
355
|
+
completedAt: new Date().toISOString()
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
export async function rejectImprovement(cwd, proposalId, snapshots) {
|
|
359
|
+
await rollbackFromSnapshots(snapshots);
|
|
360
|
+
const archive = await loadImprovementArchive(cwd);
|
|
361
|
+
const existing = archive.attempts.find(a => a.proposal.id === proposalId);
|
|
362
|
+
if (existing) {
|
|
363
|
+
existing.accepted = false;
|
|
364
|
+
existing.proposal.status = "rejected";
|
|
365
|
+
existing.rollbackPerformed = true;
|
|
366
|
+
existing.completedAt = new Date().toISOString();
|
|
367
|
+
archive.totalRejected += 1;
|
|
368
|
+
await saveImprovementArchive(cwd, archive);
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
await recordAttempt(cwd, {
|
|
372
|
+
proposal: {
|
|
373
|
+
id: proposalId,
|
|
374
|
+
timestamp: new Date().toISOString(),
|
|
375
|
+
targets: [],
|
|
376
|
+
hypothesis: "Rejected and rolled back",
|
|
377
|
+
instructions: "",
|
|
378
|
+
baselineScore: 0,
|
|
379
|
+
status: "rolled-back"
|
|
380
|
+
},
|
|
381
|
+
snapshots,
|
|
382
|
+
baselineScore: 0,
|
|
383
|
+
afterScore: null,
|
|
384
|
+
scoreDelta: null,
|
|
385
|
+
accepted: false,
|
|
386
|
+
rollbackPerformed: true,
|
|
387
|
+
completedAt: new Date().toISOString()
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
// ---------------------------------------------------------------------------
|
|
391
|
+
// Markdown report of improvement history
|
|
392
|
+
// ---------------------------------------------------------------------------
|
|
393
|
+
export function improvementHistoryToMarkdown(archive) {
|
|
394
|
+
const lines = [
|
|
395
|
+
"# Self-Improvement History",
|
|
396
|
+
"",
|
|
397
|
+
`- Total attempts: ${archive.attempts.length}`,
|
|
398
|
+
`- Accepted: ${archive.totalAccepted}`,
|
|
399
|
+
`- Rejected: ${archive.totalRejected}`,
|
|
400
|
+
`- Cumulative score improvement: ${archive.cumulativeImprovement.toFixed(2)}`,
|
|
401
|
+
""
|
|
402
|
+
];
|
|
403
|
+
if (archive.attempts.length === 0) {
|
|
404
|
+
lines.push("No improvement attempts yet. Run `/improve` to start the self-improvement loop.");
|
|
405
|
+
return lines.join("\n");
|
|
406
|
+
}
|
|
407
|
+
lines.push("## Attempts");
|
|
408
|
+
lines.push("");
|
|
409
|
+
for (const attempt of archive.attempts) {
|
|
410
|
+
const status = attempt.accepted ? "ACCEPTED" : attempt.rollbackPerformed ? "ROLLED BACK" : "REJECTED";
|
|
411
|
+
lines.push(`### ${attempt.proposal.id} — ${status}`);
|
|
412
|
+
lines.push("");
|
|
413
|
+
lines.push(`- Baseline: ${attempt.baselineScore.toFixed(2)}`);
|
|
414
|
+
if (attempt.afterScore != null) {
|
|
415
|
+
lines.push(`- After: ${attempt.afterScore.toFixed(2)}`);
|
|
416
|
+
lines.push(`- Delta: ${(attempt.scoreDelta ?? 0) >= 0 ? "+" : ""}${(attempt.scoreDelta ?? 0).toFixed(2)}`);
|
|
417
|
+
}
|
|
418
|
+
lines.push(`- Hypothesis: ${attempt.proposal.hypothesis}`);
|
|
419
|
+
if (attempt.proposal.targets.length > 0) {
|
|
420
|
+
lines.push(`- Targets: ${attempt.proposal.targets.map(t => `${t.file}:${t.region}`).join(", ")}`);
|
|
421
|
+
}
|
|
422
|
+
lines.push("");
|
|
423
|
+
}
|
|
424
|
+
return lines.join("\n");
|
|
425
|
+
}
|