@percepta/kaizen 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/README.md +54 -126
  2. package/agent/claude-command.md +23 -0
  3. package/agent/evals.md +41 -0
  4. package/agent/overview.md +53 -0
  5. package/agent/variant-builder.md +22 -0
  6. package/agent/views.md +51 -0
  7. package/dashboard/.next/standalone/package.json +1 -1
  8. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/BUILD_ID +1 -1
  9. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/build-manifest.json +22 -22
  10. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/prerender-manifest.json +3 -3
  11. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/routes-manifest.json +36 -10
  12. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/169.js +1 -0
  13. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/588.js +8 -0
  14. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/middleware-build-manifest.js +1 -1
  15. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/404.html +1 -1
  16. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/500.html +1 -1
  17. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/benchmarks.html +1 -1
  18. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/benchmarks.js.nft.json +1 -1
  19. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data/[[...path]].html +1 -0
  20. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data/[[...path]].js.nft.json +1 -0
  21. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/eval.html +1 -1
  22. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/eval.js.nft.json +1 -1
  23. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments/[[...path]].html +1 -0
  24. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments/[[...path]].js.nft.json +1 -0
  25. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/ideas.html +1 -1
  26. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/ideas.js.nft.json +1 -1
  27. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-action.js +1 -0
  28. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-action.js.nft.json +1 -0
  29. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-item.js +1 -1
  30. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-item.js.nft.json +1 -1
  31. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-mutation.js +1 -0
  32. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-mutation.js.nft.json +1 -0
  33. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset.js +1 -1
  34. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset.js.nft.json +1 -1
  35. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-datasets.js +1 -1
  36. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-datasets.js.nft.json +1 -1
  37. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace-memberships.js +1 -0
  38. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace-memberships.js.nft.json +1 -0
  39. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace.js +1 -1
  40. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace.js.nft.json +1 -1
  41. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-traces.js +1 -0
  42. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-traces.js.nft.json +1 -0
  43. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/linear-ideas.js +2 -2
  44. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/linear-ideas.js.nft.json +1 -1
  45. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-events.js +1 -1
  46. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-events.js.nft.json +1 -1
  47. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-failures.js +1 -1
  48. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-failures.js.nft.json +1 -1
  49. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-traces.js +1 -1
  50. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-traces.js.nft.json +1 -1
  51. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/runs.js +2 -2
  52. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/runs.js.nft.json +1 -1
  53. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/systems.js +2 -2
  54. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/systems.js.nft.json +1 -1
  55. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/trace-renderer.js +1 -1
  56. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/trace-renderer.js.nft.json +1 -1
  57. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/index.html +1 -1
  58. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/index.js.nft.json +1 -1
  59. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages-manifest.json +10 -6
  60. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/9JQIPpJv6qWldYoYMHZAl/_buildManifest.js +1 -0
  61. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/53-795fe9d662eaacfe.js +8 -0
  62. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{benchmarks-559dc9df52db3af4.js → benchmarks-bc38d751890170d0.js} +1 -1
  63. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/data/[[...path]]-8afe5a733bdde0f4.js +1 -0
  64. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{eval-3c911ea8744631fd.js → eval-ab900515b5b18b4d.js} +1 -1
  65. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/experiments/[[...path]]-7198800378ce98dc.js +1 -0
  66. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{ideas-6829a271003150a9.js → ideas-d8fd592d7cd21bb9.js} +1 -1
  67. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/{index-1d8b6719f49e4ae0.js → index-842f5332939fc510.js} +1 -1
  68. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/css/d97fcd1d34ebab98.css +1 -0
  69. package/dashboard/.next/standalone/packages/kaizen/package.json +8 -3
  70. package/dashboard/.next/standalone/packages/kaizen/shared/workspace-paths.js +84 -0
  71. package/dist/commands/create-view.js +58 -0
  72. package/dist/commands/create-view.js.map +1 -0
  73. package/dist/commands/guide.js +66 -0
  74. package/dist/commands/guide.js.map +1 -0
  75. package/dist/commands/ideas.js +4 -8
  76. package/dist/commands/ideas.js.map +1 -1
  77. package/dist/commands/init-system.js +22 -20
  78. package/dist/commands/init-system.js.map +1 -1
  79. package/dist/commands/init.js +28 -64
  80. package/dist/commands/init.js.map +1 -1
  81. package/dist/commands/log.js +5 -11
  82. package/dist/commands/log.js.map +1 -1
  83. package/dist/commands/rebuild.js +7 -9
  84. package/dist/commands/rebuild.js.map +1 -1
  85. package/dist/commands/run.js +5 -9
  86. package/dist/commands/run.js.map +1 -1
  87. package/dist/commands/studio.js +3 -3
  88. package/dist/commands/studio.js.map +1 -1
  89. package/dist/index.js +17 -21
  90. package/dist/index.js.map +1 -1
  91. package/dist/lib/cli.js +20 -0
  92. package/dist/lib/cli.js.map +1 -0
  93. package/dist/lib/events.js.map +1 -1
  94. package/dist/lib/fs-utils.js +3 -27
  95. package/dist/lib/fs-utils.js.map +1 -1
  96. package/dist/lib/leaderboard.js +1 -1
  97. package/dist/lib/leaderboard.js.map +1 -1
  98. package/dist/lib/paths.js +3 -3
  99. package/dist/lib/paths.js.map +1 -1
  100. package/dist/lib/promotion.js.map +1 -1
  101. package/dist/lib/run-dir.js +1 -1
  102. package/dist/lib/run-dir.js.map +1 -1
  103. package/dist/lib/runner.js +6 -5
  104. package/dist/lib/runner.js.map +1 -1
  105. package/dist/lib/system.js +4 -2
  106. package/dist/lib/system.js.map +1 -1
  107. package/dist/package.js +6 -3
  108. package/dist/shared/view-types.d.ts +67 -0
  109. package/dist/shared/view-types.d.ts.map +1 -0
  110. package/dist/shared/workspace-paths.js +84 -0
  111. package/dist/shared/workspace-paths.js.map +1 -0
  112. package/dist/types.d.ts +3 -30
  113. package/dist/types.d.ts.map +1 -1
  114. package/package.json +8 -3
  115. package/shared/view-types.d.ts +69 -0
  116. package/shared/view-types.js +1 -0
  117. package/shared/workspace-paths.d.ts +19 -0
  118. package/shared/workspace-paths.js +84 -0
  119. package/templates/system/eval.py +13 -6
  120. package/templates/system/eval.ts +11 -5
  121. package/templates/system/rubric.md +1 -1
  122. package/templates/system/system.md +6 -5
  123. package/templates/view/dataset-item.tsx +63 -0
  124. package/templates/view/trace.tsx +10 -0
  125. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/715.js +0 -6
  126. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data.html +0 -1
  127. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data.js.nft.json +0 -1
  128. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments.html +0 -1
  129. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments.js.nft.json +0 -1
  130. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/YpQ-I4VL-aEdQrM5uN7_3/_buildManifest.js +0 -1
  131. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/673-ed4be46027ae7a37.js +0 -6
  132. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/data-644e4280b4c86fe0.js +0 -1
  133. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/experiments-42f31600c2bb47ad.js +0 -1
  134. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/css/b18a6732b96168e1.css +0 -1
  135. package/dist/lib/env.js +0 -2
  136. package/dist/shared/env.js +0 -4
  137. package/templates/workspace/.claude/agents/variant-builder.md +0 -51
  138. package/templates/workspace/.claude/commands/kaizen.md +0 -65
  139. /package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/{YpQ-I4VL-aEdQrM5uN7_3 → 9JQIPpJv6qWldYoYMHZAl}/_ssgManifest.js +0 -0
@@ -1,22 +1,20 @@
1
- import { parseFlags, strFlag } from "../lib/parse-args.js";
2
1
  import { resolveStateDir, workspaceRoot } from "../lib/paths.js";
3
- import { findTerminal } from "../lib/events.js";
2
+ import { requireKaizenWorkspace } from "../lib/cli.js";
3
+ import { parseFlags, strFlag } from "../lib/parse-args.js";
4
4
  import { readJsonIfExists, readNdjson, writeJsonAtomic } from "../lib/run-dir.js";
5
- import { join } from "node:path";
5
+ import { findTerminal } from "../lib/events.js";
6
6
  import { existsSync, readdirSync } from "node:fs";
7
+ import { join } from "node:path";
7
8
  //#region src/commands/rebuild.ts
8
9
  async function runRebuild(argv) {
9
10
  const { flags } = parseFlags(argv);
10
11
  const root = workspaceRoot();
11
- const stateDir = resolveStateDir(root, strFlag(flags, "state-dir"));
12
- if (!existsSync(join(root, "kaizen.config.ts"))) {
13
- process.stderr.write(`no kaizen.config.ts in ${root}. run \`kaizen init\` first.\n`);
14
- return 1;
15
- }
12
+ const stateDir = resolveStateDir(root);
13
+ if (!requireKaizenWorkspace(root)) return 1;
16
14
  const single = strFlag(flags, "system");
17
15
  const runsRoot = join(stateDir, "runs");
18
16
  if (!existsSync(runsRoot)) {
19
- process.stdout.write("nothing to rebuild (no .kaizen/runs/).\n");
17
+ process.stdout.write("nothing to rebuild (no kaizen/.kaizen/runs/).\n");
20
18
  return 0;
21
19
  }
22
20
  const systems = single ? [single] : readdirSync(runsRoot);
@@ -1 +1 @@
1
- {"version":3,"file":"rebuild.js","names":[],"sources":["../../src/commands/rebuild.ts"],"sourcesContent":["import { existsSync, readdirSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { type Event, findTerminal } from \"../lib/events.js\";\nimport type { ManifestFile, StateFile } from \"../lib/leaderboard.js\";\nimport { parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\nimport {\n readJsonIfExists,\n readNdjson,\n writeJsonAtomic,\n} from \"../lib/run-dir.js\";\n\nexport async function runRebuild(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root, strFlag(flags, \"state-dir\"));\n\n if (!existsSync(join(root, \"kaizen.config.ts\"))) {\n process.stderr.write(\n `no kaizen.config.ts in ${root}. run \\`kaizen init\\` first.\\n`,\n );\n return 1;\n }\n\n const single = strFlag(flags, \"system\");\n const runsRoot = join(stateDir, \"runs\");\n if (!existsSync(runsRoot)) {\n process.stdout.write(\"nothing to rebuild (no .kaizen/runs/).\\n\");\n return 0;\n }\n\n const systems = single ? [single] : readdirSync(runsRoot);\n let touched = 0;\n let scanned = 0;\n for (const system of systems) {\n const pdir = join(runsRoot, system);\n if (!existsSync(pdir)) continue;\n for (const runId of readdirSync(pdir)) {\n const dir = join(pdir, runId);\n const manifest = readJsonIfExists<ManifestFile>(\n join(dir, \"manifest.json\"),\n );\n if (!manifest) continue;\n scanned++;\n const events = readNdjson<Event>(join(dir, \"events.jsonl\"));\n const newState = deriveState(manifest, events);\n const oldState = readJsonIfExists<StateFile>(join(dir, \"state.json\"));\n if (JSON.stringify(oldState) === JSON.stringify(newState)) continue;\n writeJsonAtomic(join(dir, \"state.json\"), newState);\n touched++;\n }\n }\n process.stdout.write(\n `rebuild: scanned ${scanned} run(s), updated ${touched} state.json file(s).\\n`,\n );\n return 0;\n}\n\nfunction deriveState(\n manifest: ManifestFile,\n events: Event[],\n): StateFile & {\n system: string;\n variant: string;\n eval_version: number;\n dataset_version: string;\n started_at: string;\n updated_at: string;\n} {\n let nTotal: number | null = null;\n let nDone = 0;\n let score: number | null = null;\n let promoted: boolean | null = null;\n let lastTs: number | null = null;\n for (const e of events) {\n if (e.type === \"start\") nTotal = e.n;\n else if (e.type === \"item\") nDone++;\n else if (e.type === \"complete\") score = e.score;\n else if (e.type === \"promotion\")\n promoted = (e as { promoted?: boolean }).promoted ?? null;\n if (typeof e.ts === \"number\") lastTs = e.ts;\n }\n const terminal = findTerminal(events);\n let status: StateFile[\"status\"] = \"running\";\n if (terminal) {\n if (terminal.type === \"complete\") status = \"complete\";\n else if (terminal.type === \"crashed\") status = \"crashed\";\n else if (terminal.type === \"aborted\") status = \"aborted\";\n }\n const endedAt =\n terminal && lastTs ? new Date(lastTs * 1000).toISOString() : null;\n return {\n run_id: manifest.run_id,\n system: manifest.system,\n variant: manifest.variant,\n status,\n score,\n n_total: nTotal,\n n_done: nDone,\n promoted,\n started_at: manifest.started_at,\n updated_at: endedAt ?? new Date().toISOString(),\n ended_at: endedAt,\n eval_version: manifest.eval_version,\n dataset_version: manifest.dataset_version,\n };\n}\n"],"mappings":";;;;;;;AAYA,eAAsB,WAAW,MAAiC;CAChE,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,MAAM,QAAQ,OAAO,YAAY,CAAC;AAEnE,KAAI,CAAC,WAAW,KAAK,MAAM,mBAAmB,CAAC,EAAE;AAC/C,UAAQ,OAAO,MACb,0BAA0B,KAAK,gCAChC;AACD,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,WAAW,KAAK,UAAU,OAAO;AACvC,KAAI,CAAC,WAAW,SAAS,EAAE;AACzB,UAAQ,OAAO,MAAM,2CAA2C;AAChE,SAAO;;CAGT,MAAM,UAAU,SAAS,CAAC,OAAO,GAAG,YAAY,SAAS;CACzD,IAAI,UAAU;CACd,IAAI,UAAU;AACd,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,OAAO,KAAK,UAAU,OAAO;AACnC,MAAI,CAAC,WAAW,KAAK,CAAE;AACvB,OAAK,MAAM,SAAS,YAAY,KAAK,EAAE;GACrC,MAAM,MAAM,KAAK,MAAM,MAAM;GAC7B,MAAM,WAAW,iBACf,KAAK,KAAK,gBAAgB,CAC3B;AACD,OAAI,CAAC,SAAU;AACf;GAEA,MAAM,WAAW,YAAY,UADd,WAAkB,KAAK,KAAK,eAAe,CACb,CAAC;GAC9C,MAAM,WAAW,iBAA4B,KAAK,KAAK,aAAa,CAAC;AACrE,OAAI,KAAK,UAAU,SAAS,KAAK,KAAK,UAAU,SAAS,CAAE;AAC3D,mBAAgB,KAAK,KAAK,aAAa,EAAE,SAAS;AAClD;;;AAGJ,SAAQ,OAAO,MACb,oBAAoB,QAAQ,mBAAmB,QAAQ,wBACxD;AACD,QAAO;;AAGT,SAAS,YACP,UACA,QAQA;CACA,IAAI,SAAwB;CAC5B,IAAI,QAAQ;CACZ,IAAI,QAAuB;CAC3B,IAAI,WAA2B;CAC/B,IAAI,SAAwB;AAC5B,MAAK,MAAM,KAAK,QAAQ;AACtB,MAAI,EAAE,SAAS,QAAS,UAAS,EAAE;WAC1B,EAAE,SAAS,OAAQ;WACnB,EAAE,SAAS,WAAY,SAAQ,EAAE;WACjC,EAAE,SAAS,YAClB,YAAY,EAA6B,YAAY;AACvD,MAAI,OAAO,EAAE,OAAO,SAAU,UAAS,EAAE;;CAE3C,MAAM,WAAW,aAAa,OAAO;CACrC,IAAI,SAA8B;AAClC,KAAI;MACE,SAAS,SAAS,WAAY,UAAS;WAClC,SAAS,SAAS,UAAW,UAAS;WACtC,SAAS,SAAS,UAAW,UAAS;;CAEjD,MAAM,UACJ,YAAY,0BAAS,IAAI,KAAK,SAAS,IAAK,EAAC,aAAa,GAAG;AAC/D,QAAO;EACL,QAAQ,SAAS;EACjB,QAAQ,SAAS;EACjB,SAAS,SAAS;EAClB;EACA;EACA,SAAS;EACT,QAAQ;EACR;EACA,YAAY,SAAS;EACrB,YAAY,4BAAW,IAAI,MAAM,EAAC,aAAa;EAC/C,UAAU;EACV,cAAc,SAAS;EACvB,iBAAiB,SAAS;EAC3B"}
1
+ {"version":3,"file":"rebuild.js","names":[],"sources":["../../src/commands/rebuild.ts"],"sourcesContent":["import { existsSync, readdirSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { requireKaizenWorkspace } from \"../lib/cli.js\";\nimport { type Event, findTerminal } from \"../lib/events.js\";\nimport type { ManifestFile, StateFile } from \"../lib/leaderboard.js\";\nimport { parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\nimport {\n readJsonIfExists,\n readNdjson,\n writeJsonAtomic,\n} from \"../lib/run-dir.js\";\n\nexport async function runRebuild(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root);\n\n if (!requireKaizenWorkspace(root)) return 1;\n\n const single = strFlag(flags, \"system\");\n const runsRoot = join(stateDir, \"runs\");\n if (!existsSync(runsRoot)) {\n process.stdout.write(\"nothing to rebuild (no kaizen/.kaizen/runs/).\\n\");\n return 0;\n }\n\n const systems = single ? [single] : readdirSync(runsRoot);\n let touched = 0;\n let scanned = 0;\n for (const system of systems) {\n const pdir = join(runsRoot, system);\n if (!existsSync(pdir)) continue;\n for (const runId of readdirSync(pdir)) {\n const dir = join(pdir, runId);\n const manifest = readJsonIfExists<ManifestFile>(\n join(dir, \"manifest.json\"),\n );\n if (!manifest) continue;\n scanned++;\n const events = readNdjson<Event>(join(dir, \"events.jsonl\"));\n const newState = deriveState(manifest, events);\n const oldState = readJsonIfExists<StateFile>(join(dir, \"state.json\"));\n if (JSON.stringify(oldState) === JSON.stringify(newState)) continue;\n writeJsonAtomic(join(dir, \"state.json\"), newState);\n touched++;\n }\n }\n process.stdout.write(\n `rebuild: scanned ${scanned} run(s), updated ${touched} state.json file(s).\\n`,\n );\n return 0;\n}\n\nfunction deriveState(\n manifest: ManifestFile,\n events: Event[],\n): StateFile & {\n system: string;\n variant: string;\n eval_version: number;\n dataset_version: string;\n started_at: string;\n updated_at: string;\n} {\n let nTotal: number | null = null;\n let nDone = 0;\n let score: number | null = null;\n let promoted: boolean | null = null;\n let lastTs: number | null = null;\n for (const e of events) {\n if (e.type === \"start\") nTotal = e.n;\n else if (e.type === \"item\") nDone++;\n else if (e.type === \"complete\") score = e.score;\n else if (e.type === \"promotion\")\n promoted = (e as { promoted?: boolean }).promoted ?? null;\n if (typeof e.ts === \"number\") lastTs = e.ts;\n }\n const terminal = findTerminal(events);\n let status: StateFile[\"status\"] = \"running\";\n if (terminal) {\n if (terminal.type === \"complete\") status = \"complete\";\n else if (terminal.type === \"crashed\") status = \"crashed\";\n else if (terminal.type === \"aborted\") status = \"aborted\";\n }\n const endedAt =\n terminal && lastTs ? new Date(lastTs * 1000).toISOString() : null;\n return {\n run_id: manifest.run_id,\n system: manifest.system,\n variant: manifest.variant,\n status,\n score,\n n_total: nTotal,\n n_done: nDone,\n promoted,\n started_at: manifest.started_at,\n updated_at: endedAt ?? new Date().toISOString(),\n ended_at: endedAt,\n eval_version: manifest.eval_version,\n dataset_version: manifest.dataset_version,\n };\n}\n"],"mappings":";;;;;;;;AAaA,eAAsB,WAAW,MAAiC;CAChE,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,KAAK;AAEtC,KAAI,CAAC,uBAAuB,KAAK,CAAE,QAAO;CAE1C,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,WAAW,KAAK,UAAU,OAAO;AACvC,KAAI,CAAC,WAAW,SAAS,EAAE;AACzB,UAAQ,OAAO,MAAM,kDAAkD;AACvE,SAAO;;CAGT,MAAM,UAAU,SAAS,CAAC,OAAO,GAAG,YAAY,SAAS;CACzD,IAAI,UAAU;CACd,IAAI,UAAU;AACd,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,OAAO,KAAK,UAAU,OAAO;AACnC,MAAI,CAAC,WAAW,KAAK,CAAE;AACvB,OAAK,MAAM,SAAS,YAAY,KAAK,EAAE;GACrC,MAAM,MAAM,KAAK,MAAM,MAAM;GAC7B,MAAM,WAAW,iBACf,KAAK,KAAK,gBAAgB,CAC3B;AACD,OAAI,CAAC,SAAU;AACf;GAEA,MAAM,WAAW,YAAY,UADd,WAAkB,KAAK,KAAK,eAAe,CACb,CAAC;GAC9C,MAAM,WAAW,iBAA4B,KAAK,KAAK,aAAa,CAAC;AACrE,OAAI,KAAK,UAAU,SAAS,KAAK,KAAK,UAAU,SAAS,CAAE;AAC3D,mBAAgB,KAAK,KAAK,aAAa,EAAE,SAAS;AAClD;;;AAGJ,SAAQ,OAAO,MACb,oBAAoB,QAAQ,mBAAmB,QAAQ,wBACxD;AACD,QAAO;;AAGT,SAAS,YACP,UACA,QAQA;CACA,IAAI,SAAwB;CAC5B,IAAI,QAAQ;CACZ,IAAI,QAAuB;CAC3B,IAAI,WAA2B;CAC/B,IAAI,SAAwB;AAC5B,MAAK,MAAM,KAAK,QAAQ;AACtB,MAAI,EAAE,SAAS,QAAS,UAAS,EAAE;WAC1B,EAAE,SAAS,OAAQ;WACnB,EAAE,SAAS,WAAY,SAAQ,EAAE;WACjC,EAAE,SAAS,YAClB,YAAY,EAA6B,YAAY;AACvD,MAAI,OAAO,EAAE,OAAO,SAAU,UAAS,EAAE;;CAE3C,MAAM,WAAW,aAAa,OAAO;CACrC,IAAI,SAA8B;AAClC,KAAI;MACE,SAAS,SAAS,WAAY,UAAS;WAClC,SAAS,SAAS,UAAW,UAAS;WACtC,SAAS,SAAS,UAAW,UAAS;;CAEjD,MAAM,UACJ,YAAY,0BAAS,IAAI,KAAK,SAAS,IAAK,EAAC,aAAa,GAAG;AAC/D,QAAO;EACL,QAAQ,SAAS;EACjB,QAAQ,SAAS;EACjB,SAAS,SAAS;EAClB;EACA;EACA,SAAS;EACT,QAAQ;EACR;EACA,YAAY,SAAS;EACrB,YAAY,4BAAW,IAAI,MAAM,EAAC,aAAa;EAC/C,UAAU;EACV,cAAc,SAAS;EACvB,iBAAiB,SAAS;EAC3B"}
@@ -1,19 +1,16 @@
1
+ import { workspaceRoot } from "../lib/paths.js";
2
+ import { requireKaizenWorkspace } from "../lib/cli.js";
1
3
  import { boolFlag, parseFlags, strFlag } from "../lib/parse-args.js";
2
- import { resolveStateDir, workspaceRoot } from "../lib/paths.js";
3
4
  import { runExperiment } from "../lib/runner.js";
4
5
  import { normalizeLinearIssue } from "../shared/linear-issue.js";
5
- import { dirname, join, resolve } from "node:path";
6
- import { existsSync, readFileSync } from "node:fs";
6
+ import { readFileSync } from "node:fs";
7
+ import { dirname, resolve } from "node:path";
7
8
  import { fileURLToPath } from "node:url";
8
9
  //#region src/commands/run.ts
9
10
  async function runRun(argv) {
10
11
  const { flags } = parseFlags(argv);
11
12
  const root = workspaceRoot();
12
- const stateDir = resolveStateDir(root, strFlag(flags, "state-dir"));
13
- if (!existsSync(join(root, "kaizen.config.ts"))) {
14
- process.stderr.write(`no kaizen.config.ts in ${root}. run \`kaizen init\` first.\n`);
15
- return 1;
16
- }
13
+ if (!requireKaizenWorkspace(root)) return 1;
17
14
  const system = strFlag(flags, "system");
18
15
  const variant = strFlag(flags, "variant");
19
16
  if (!system) {
@@ -42,7 +39,6 @@ async function runRun(argv) {
42
39
  }
43
40
  const result = await runExperiment({
44
41
  workspace: root,
45
- stateDir,
46
42
  systemId: system,
47
43
  variant,
48
44
  parent,
@@ -1 +1 @@
1
- {"version":3,"file":"run.js","names":[],"sources":["../../src/commands/run.ts"],"sourcesContent":["import { existsSync, readFileSync } from \"node:fs\";\nimport { dirname, join, resolve } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport { normalizeLinearIssue } from \"../../shared/linear-issue.js\";\nimport { boolFlag, parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\nimport { runExperiment } from \"../lib/runner.js\";\n\nexport async function runRun(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root, strFlag(flags, \"state-dir\"));\n\n if (!existsSync(join(root, \"kaizen.config.ts\"))) {\n process.stderr.write(\n `no kaizen.config.ts in ${root}. run \\`kaizen init\\` first.\\n`,\n );\n return 1;\n }\n\n const system = strFlag(flags, \"system\");\n const variant = strFlag(flags, \"variant\");\n if (!system) {\n process.stderr.write(\"kaizen run: --system <id> is required\\n\");\n return 1;\n }\n if (!variant) {\n process.stderr.write(\"kaizen run: --variant <id> is required\\n\");\n return 1;\n }\n\n const parent = strFlag(flags, \"parent\") ?? null;\n const hypothesis = strFlag(flags, \"hypothesis\") ?? \"\";\n const diagnostic = boolFlag(flags, \"diagnostic\");\n const noAutoPromote = boolFlag(flags, \"no-auto-promote\");\n const maxItemsFlag = strFlag(flags, \"max-items\");\n const maxItems = maxItemsFlag ? Number(maxItemsFlag) : null;\n if (maxItems !== null && (!Number.isFinite(maxItems) || maxItems <= 0)) {\n process.stderr.write(\n `kaizen run: --max-items must be a positive integer\\n`,\n );\n return 1;\n }\n const linearIssueInput =\n strFlag(flags, \"idea\") ??\n strFlag(flags, \"linear-issue\") ??\n strFlag(flags, \"linear-ticket\") ??\n null;\n const linearIssue = normalizeLinearIssue(linearIssueInput);\n if (linearIssueInput && !linearIssue) {\n process.stderr.write(\n \"kaizen run: --idea must be a Linear issue id like KZN-123 or a Linear issue URL\\n\",\n );\n return 1;\n }\n\n const result = await runExperiment({\n workspace: root,\n stateDir,\n systemId: system,\n variant,\n parent,\n hypothesis,\n diagnostic,\n noAutoPromote,\n maxItems,\n kaizenVersion: getKaizenVersion(),\n linearIssue,\n });\n\n // Single summary line on stdout — the agent reads this.\n const score = result.score === null ? \"null\" : result.score.toFixed(4);\n const promoted = result.promoted === null ? \"null\" : String(result.promoted);\n process.stdout.write(\n `score=${score} run_id=${result.runId} status=${result.status} promoted=${promoted}\\n`,\n );\n return result.exitCode;\n}\n\nfunction getKaizenVersion(): string {\n try {\n const here = dirname(fileURLToPath(import.meta.url));\n // src/commands -> package root\n const pkgPath = resolve(here, \"..\", \"..\", \"package.json\");\n return (\n JSON.parse(readFileSync(pkgPath, \"utf-8\")).version ?? \"0.0.0-unknown\"\n );\n } catch {\n return \"0.0.0-unknown\";\n }\n}\n"],"mappings":";;;;;;;;AAQA,eAAsB,OAAO,MAAiC;CAC5D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,MAAM,QAAQ,OAAO,YAAY,CAAC;AAEnE,KAAI,CAAC,WAAW,KAAK,MAAM,mBAAmB,CAAC,EAAE;AAC/C,UAAQ,OAAO,MACb,0BAA0B,KAAK,gCAChC;AACD,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,UAAU,QAAQ,OAAO,UAAU;AACzC,KAAI,CAAC,QAAQ;AACX,UAAQ,OAAO,MAAM,0CAA0C;AAC/D,SAAO;;AAET,KAAI,CAAC,SAAS;AACZ,UAAQ,OAAO,MAAM,2CAA2C;AAChE,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS,IAAI;CAC3C,MAAM,aAAa,QAAQ,OAAO,aAAa,IAAI;CACnD,MAAM,aAAa,SAAS,OAAO,aAAa;CAChD,MAAM,gBAAgB,SAAS,OAAO,kBAAkB;CACxD,MAAM,eAAe,QAAQ,OAAO,YAAY;CAChD,MAAM,WAAW,eAAe,OAAO,aAAa,GAAG;AACvD,KAAI,aAAa,SAAS,CAAC,OAAO,SAAS,SAAS,IAAI,YAAY,IAAI;AACtE,UAAQ,OAAO,MACb,uDACD;AACD,SAAO;;CAET,MAAM,mBACJ,QAAQ,OAAO,OAAO,IACtB,QAAQ,OAAO,eAAe,IAC9B,QAAQ,OAAO,gBAAgB,IAC/B;CACF,MAAM,cAAc,qBAAqB,iBAAiB;AAC1D,KAAI,oBAAoB,CAAC,aAAa;AACpC,UAAQ,OAAO,MACb,oFACD;AACD,SAAO;;CAGT,MAAM,SAAS,MAAM,cAAc;EACjC,WAAW;EACX;EACA,UAAU;EACV;EACA;EACA;EACA;EACA;EACA;EACA,eAAe,kBAAkB;EACjC;EACD,CAAC;CAGF,MAAM,QAAQ,OAAO,UAAU,OAAO,SAAS,OAAO,MAAM,QAAQ,EAAE;CACtE,MAAM,WAAW,OAAO,aAAa,OAAO,SAAS,OAAO,OAAO,SAAS;AAC5E,SAAQ,OAAO,MACb,SAAS,MAAM,UAAU,OAAO,MAAM,UAAU,OAAO,OAAO,YAAY,SAAS,IACpF;AACD,QAAO,OAAO;;AAGhB,SAAS,mBAA2B;AAClC,KAAI;EAGF,MAAM,UAAU,QAFH,QAAQ,cAAc,OAAO,KAAK,IAAI,CAEvB,EAAE,MAAM,MAAM,eAAe;AACzD,SACE,KAAK,MAAM,aAAa,SAAS,QAAQ,CAAC,CAAC,WAAW;SAElD;AACN,SAAO"}
1
+ {"version":3,"file":"run.js","names":[],"sources":["../../src/commands/run.ts"],"sourcesContent":["import { readFileSync } from \"node:fs\";\nimport { dirname, resolve } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport { normalizeLinearIssue } from \"../../shared/linear-issue.js\";\nimport { requireKaizenWorkspace } from \"../lib/cli.js\";\nimport { boolFlag, parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { workspaceRoot } from \"../lib/paths.js\";\nimport { runExperiment } from \"../lib/runner.js\";\n\nexport async function runRun(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n\n if (!requireKaizenWorkspace(root)) return 1;\n\n const system = strFlag(flags, \"system\");\n const variant = strFlag(flags, \"variant\");\n if (!system) {\n process.stderr.write(\"kaizen run: --system <id> is required\\n\");\n return 1;\n }\n if (!variant) {\n process.stderr.write(\"kaizen run: --variant <id> is required\\n\");\n return 1;\n }\n\n const parent = strFlag(flags, \"parent\") ?? null;\n const hypothesis = strFlag(flags, \"hypothesis\") ?? \"\";\n const diagnostic = boolFlag(flags, \"diagnostic\");\n const noAutoPromote = boolFlag(flags, \"no-auto-promote\");\n const maxItemsFlag = strFlag(flags, \"max-items\");\n const maxItems = maxItemsFlag ? Number(maxItemsFlag) : null;\n if (maxItems !== null && (!Number.isFinite(maxItems) || maxItems <= 0)) {\n process.stderr.write(\n `kaizen run: --max-items must be a positive integer\\n`,\n );\n return 1;\n }\n const linearIssueInput =\n strFlag(flags, \"idea\") ??\n strFlag(flags, \"linear-issue\") ??\n strFlag(flags, \"linear-ticket\") ??\n null;\n const linearIssue = normalizeLinearIssue(linearIssueInput);\n if (linearIssueInput && !linearIssue) {\n process.stderr.write(\n \"kaizen run: --idea must be a Linear issue id like KZN-123 or a Linear issue URL\\n\",\n );\n return 1;\n }\n\n const result = await runExperiment({\n workspace: root,\n systemId: system,\n variant,\n parent,\n hypothesis,\n diagnostic,\n noAutoPromote,\n maxItems,\n kaizenVersion: getKaizenVersion(),\n linearIssue,\n });\n\n // Single summary line on stdout — the agent reads this.\n const score = result.score === null ? \"null\" : result.score.toFixed(4);\n const promoted = result.promoted === null ? \"null\" : String(result.promoted);\n process.stdout.write(\n `score=${score} run_id=${result.runId} status=${result.status} promoted=${promoted}\\n`,\n );\n return result.exitCode;\n}\n\nfunction getKaizenVersion(): string {\n try {\n const here = dirname(fileURLToPath(import.meta.url));\n // src/commands -> package root\n const pkgPath = resolve(here, \"..\", \"..\", \"package.json\");\n return (\n JSON.parse(readFileSync(pkgPath, \"utf-8\")).version ?? \"0.0.0-unknown\"\n );\n } catch {\n return \"0.0.0-unknown\";\n }\n}\n"],"mappings":";;;;;;;;;AASA,eAAsB,OAAO,MAAiC;CAC5D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;AAE5B,KAAI,CAAC,uBAAuB,KAAK,CAAE,QAAO;CAE1C,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,UAAU,QAAQ,OAAO,UAAU;AACzC,KAAI,CAAC,QAAQ;AACX,UAAQ,OAAO,MAAM,0CAA0C;AAC/D,SAAO;;AAET,KAAI,CAAC,SAAS;AACZ,UAAQ,OAAO,MAAM,2CAA2C;AAChE,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS,IAAI;CAC3C,MAAM,aAAa,QAAQ,OAAO,aAAa,IAAI;CACnD,MAAM,aAAa,SAAS,OAAO,aAAa;CAChD,MAAM,gBAAgB,SAAS,OAAO,kBAAkB;CACxD,MAAM,eAAe,QAAQ,OAAO,YAAY;CAChD,MAAM,WAAW,eAAe,OAAO,aAAa,GAAG;AACvD,KAAI,aAAa,SAAS,CAAC,OAAO,SAAS,SAAS,IAAI,YAAY,IAAI;AACtE,UAAQ,OAAO,MACb,uDACD;AACD,SAAO;;CAET,MAAM,mBACJ,QAAQ,OAAO,OAAO,IACtB,QAAQ,OAAO,eAAe,IAC9B,QAAQ,OAAO,gBAAgB,IAC/B;CACF,MAAM,cAAc,qBAAqB,iBAAiB;AAC1D,KAAI,oBAAoB,CAAC,aAAa;AACpC,UAAQ,OAAO,MACb,oFACD;AACD,SAAO;;CAGT,MAAM,SAAS,MAAM,cAAc;EACjC,WAAW;EACX,UAAU;EACV;EACA;EACA;EACA;EACA;EACA;EACA,eAAe,kBAAkB;EACjC;EACD,CAAC;CAGF,MAAM,QAAQ,OAAO,UAAU,OAAO,SAAS,OAAO,MAAM,QAAQ,EAAE;CACtE,MAAM,WAAW,OAAO,aAAa,OAAO,SAAS,OAAO,OAAO,SAAS;AAC5E,SAAQ,OAAO,MACb,SAAS,MAAM,UAAU,OAAO,MAAM,UAAU,OAAO,OAAO,YAAY,SAAS,IACpF;AACD,QAAO,OAAO;;AAGhB,SAAS,mBAA2B;AAClC,KAAI;EAGF,MAAM,UAAU,QAFH,QAAQ,cAAc,OAAO,KAAK,IAAI,CAEvB,EAAE,MAAM,MAAM,eAAe;AACzD,SACE,KAAK,MAAM,aAAa,SAAS,QAAQ,CAAC,CAAC,WAAW;SAElD;AACN,SAAO"}
@@ -1,14 +1,14 @@
1
- import { parseFlags, strFlag } from "../lib/parse-args.js";
2
1
  import { packageRoot, resolveStateDir, workspaceRoot } from "../lib/paths.js";
3
- import { join } from "node:path";
2
+ import { parseFlags, strFlag } from "../lib/parse-args.js";
4
3
  import { existsSync } from "node:fs";
4
+ import { join } from "node:path";
5
5
  import { spawn } from "node:child_process";
6
6
  //#region src/commands/studio.ts
7
7
  async function runStudio(argv) {
8
8
  const { flags } = parseFlags(argv);
9
9
  const port = strFlag(flags, "port") ?? "6789";
10
10
  const root = workspaceRoot();
11
- const stateDir = resolveStateDir(root, strFlag(flags, "state-dir"));
11
+ const stateDir = resolveStateDir(root);
12
12
  const dashboardRoot = join(packageRoot(), "dashboard", ".next", "standalone");
13
13
  const serverPath = join(dashboardRoot, "packages", "kaizen", "dashboard", "server.js");
14
14
  if (!existsSync(serverPath)) {
@@ -1 +1 @@
1
- {"version":3,"file":"studio.js","names":[],"sources":["../../src/commands/studio.ts"],"sourcesContent":["import { spawn } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { packageRoot, resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\n\nexport async function runStudio(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const port = strFlag(flags, \"port\") ?? \"6789\";\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root, strFlag(flags, \"state-dir\"));\n const dashboardRoot = join(packageRoot(), \"dashboard\", \".next\", \"standalone\");\n const serverPath = join(\n dashboardRoot,\n \"packages\",\n \"kaizen\",\n \"dashboard\",\n \"server.js\",\n );\n\n if (!existsSync(serverPath)) {\n process.stderr.write(\n `kaizen studio: bundled dashboard not found at ${serverPath}.\\n` +\n \"Build the package first with `pnpm --filter @percepta/kaizen build`.\\n\",\n );\n return 1;\n }\n\n const env = {\n ...process.env,\n PORT: port,\n KAIZEN_WORKSPACE: root,\n KAIZEN_STATE_DIR: stateDir,\n };\n\n process.stdout.write(`kaizen studio: ${root}\\n`);\n process.stdout.write(`state: ${stateDir}\\n`);\n process.stdout.write(`url: http://localhost:${port}\\n`);\n\n const next = spawn(process.execPath, [serverPath], {\n cwd: dashboardRoot,\n env,\n stdio: \"inherit\",\n });\n\n const shutdown = () => next.kill(\"SIGTERM\");\n process.on(\"SIGINT\", shutdown);\n process.on(\"SIGTERM\", shutdown);\n\n return await new Promise<number>((resolve) => {\n next.on(\"exit\", (code) => resolve(code ?? 0));\n });\n}\n"],"mappings":";;;;;;AAMA,eAAsB,UAAU,MAAiC;CAC/D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,QAAQ,OAAO,OAAO,IAAI;CACvC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,MAAM,QAAQ,OAAO,YAAY,CAAC;CACnE,MAAM,gBAAgB,KAAK,aAAa,EAAE,aAAa,SAAS,aAAa;CAC7E,MAAM,aAAa,KACjB,eACA,YACA,UACA,aACA,YACD;AAED,KAAI,CAAC,WAAW,WAAW,EAAE;AAC3B,UAAQ,OAAO,MACb,iDAAiD,WAAW;EAE7D;AACD,SAAO;;CAGT,MAAM,MAAM;EACV,GAAG,QAAQ;EACX,MAAM;EACN,kBAAkB;EAClB,kBAAkB;EACnB;AAED,SAAQ,OAAO,MAAM,kBAAkB,KAAK,IAAI;AAChD,SAAQ,OAAO,MAAM,UAAU,SAAS,IAAI;AAC5C,SAAQ,OAAO,MAAM,yBAAyB,KAAK,IAAI;CAEvD,MAAM,OAAO,MAAM,QAAQ,UAAU,CAAC,WAAW,EAAE;EACjD,KAAK;EACL;EACA,OAAO;EACR,CAAC;CAEF,MAAM,iBAAiB,KAAK,KAAK,UAAU;AAC3C,SAAQ,GAAG,UAAU,SAAS;AAC9B,SAAQ,GAAG,WAAW,SAAS;AAE/B,QAAO,MAAM,IAAI,SAAiB,YAAY;AAC5C,OAAK,GAAG,SAAS,SAAS,QAAQ,QAAQ,EAAE,CAAC;GAC7C"}
1
+ {"version":3,"file":"studio.js","names":[],"sources":["../../src/commands/studio.ts"],"sourcesContent":["import { spawn } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { packageRoot, resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\n\nexport async function runStudio(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const port = strFlag(flags, \"port\") ?? \"6789\";\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root);\n const dashboardRoot = join(packageRoot(), \"dashboard\", \".next\", \"standalone\");\n const serverPath = join(\n dashboardRoot,\n \"packages\",\n \"kaizen\",\n \"dashboard\",\n \"server.js\",\n );\n\n if (!existsSync(serverPath)) {\n process.stderr.write(\n `kaizen studio: bundled dashboard not found at ${serverPath}.\\n` +\n \"Build the package first with `pnpm --filter @percepta/kaizen build`.\\n\",\n );\n return 1;\n }\n\n const env = {\n ...process.env,\n PORT: port,\n KAIZEN_WORKSPACE: root,\n KAIZEN_STATE_DIR: stateDir,\n };\n\n process.stdout.write(`kaizen studio: ${root}\\n`);\n process.stdout.write(`state: ${stateDir}\\n`);\n process.stdout.write(`url: http://localhost:${port}\\n`);\n\n const next = spawn(process.execPath, [serverPath], {\n cwd: dashboardRoot,\n env,\n stdio: \"inherit\",\n });\n\n const shutdown = () => next.kill(\"SIGTERM\");\n process.on(\"SIGINT\", shutdown);\n process.on(\"SIGTERM\", shutdown);\n\n return await new Promise<number>((resolve) => {\n next.on(\"exit\", (code) => resolve(code ?? 0));\n });\n}\n"],"mappings":";;;;;;AAMA,eAAsB,UAAU,MAAiC;CAC/D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,QAAQ,OAAO,OAAO,IAAI;CACvC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,KAAK;CACtC,MAAM,gBAAgB,KAAK,aAAa,EAAE,aAAa,SAAS,aAAa;CAC7E,MAAM,aAAa,KACjB,eACA,YACA,UACA,aACA,YACD;AAED,KAAI,CAAC,WAAW,WAAW,EAAE;AAC3B,UAAQ,OAAO,MACb,iDAAiD,WAAW;EAE7D;AACD,SAAO;;CAGT,MAAM,MAAM;EACV,GAAG,QAAQ;EACX,MAAM;EACN,kBAAkB;EAClB,kBAAkB;EACnB;AAED,SAAQ,OAAO,MAAM,kBAAkB,KAAK,IAAI;AAChD,SAAQ,OAAO,MAAM,UAAU,SAAS,IAAI;AAC5C,SAAQ,OAAO,MAAM,yBAAyB,KAAK,IAAI;CAEvD,MAAM,OAAO,MAAM,QAAQ,UAAU,CAAC,WAAW,EAAE;EACjD,KAAK;EACL;EACA,OAAO;EACR,CAAC;CAEF,MAAM,iBAAiB,KAAK,KAAK,UAAU;AAC3C,SAAQ,GAAG,UAAU,SAAS;AAC9B,SAAQ,GAAG,WAAW,SAAS;AAE/B,QAAO,MAAM,IAAI,SAAiB,YAAY;AAC5C,OAAK,GAAG,SAAS,SAAS,QAAQ,QAAQ,EAAE,CAAC;GAC7C"}
package/dist/index.js CHANGED
@@ -1,3 +1,5 @@
1
+ import { runCreateView } from "./commands/create-view.js";
2
+ import { runGuide } from "./commands/guide.js";
1
3
  import { runIdeas } from "./commands/ideas.js";
2
4
  import { runInitSystem } from "./commands/init-system.js";
3
5
  import { runInit } from "./commands/init.js";
@@ -8,33 +10,32 @@ import { runStudio } from "./commands/studio.js";
8
10
  //#region src/index.ts
9
11
  const HELP = `kaizen — automated AI researcher
10
12
 
11
- human-facing:
13
+ workspace:
12
14
  kaizen init scaffold a Kaizen workspace (run once per repo)
15
+ kaizen guide [topic] print package-owned agent instructions
16
+ topics: overview, evals, views,
17
+ variant-builder, claude-command, all
13
18
  kaizen create system [name] add a system definition + starter eval
14
19
  [--eval-language py|ts]
15
- kaizen studio [--port 6789] [--state-dir <dir>]
20
+ kaizen create view <system> add kaizen/systems/<system>/<type>.tsx
21
+ --type trace|dataset-item
22
+ kaizen studio [--port 6789]
16
23
  open the local Kaizen dashboard against this workspace
17
24
  kaizen ideas --system <s> [--json] [--limit <n>]
18
25
  list Linear issues labeled Kaizen for a system
19
26
 
20
27
  agent-facing (the inner loop):
21
28
  kaizen run --system <s> --variant <v> [--parent <id>] [--hypothesis "..."]
22
- [--idea <KZN-123>] [--diagnostic]
23
- [--no-auto-promote] [--state-dir <dir>]
29
+ [--idea <KZN-123>] [--diagnostic] [--no-auto-promote]
24
30
  record one run; emits score on stdout
25
- kaizen log [--system <s>] [-n <count>] [--json] [--state-dir <dir>]
31
+ kaizen log [--system <s>] [-n <count>] [--json]
26
32
  promoted baseline + chronological run history
27
33
 
28
34
  recovery (rare):
29
- kaizen rebuild [--system <s>] [--state-dir <dir>]
35
+ kaizen rebuild [--system <s>]
30
36
  re-derive state.json from events.jsonl
31
37
 
32
38
  kaizen --version print version
33
-
34
- compatibility:
35
- kaizen init system <name> alias for kaizen create system <name>
36
- kaizen get ideas --system <s> alias for kaizen ideas --system <s>
37
-
38
39
  `;
39
40
  async function main(argv) {
40
41
  const [cmd, sub, ...rest] = argv;
@@ -49,26 +50,21 @@ async function main(argv) {
49
50
  }
50
51
  switch (cmd) {
51
52
  case "create":
52
- if (!sub) return runInitSystem([]);
53
53
  if (sub === "system") return runInitSystem(rest);
54
- return runInitSystem([sub, ...rest].filter(Boolean));
55
- case "init":
56
- if (sub === "system") return runInitSystem(rest);
57
- return runInit([sub, ...rest].filter(Boolean));
54
+ if (sub === "view") return runCreateView(rest);
55
+ process.stderr.write(`unknown create target: ${sub ?? ""}\n\n${HELP}`);
56
+ return 1;
57
+ case "init": return runInit([sub, ...rest].filter(Boolean));
58
+ case "guide": return runGuide([sub, ...rest].filter(Boolean));
58
59
  case "run": return runRun([sub, ...rest].filter(Boolean));
59
60
  case "log": return runLog([sub, ...rest].filter(Boolean));
60
61
  case "rebuild": return runRebuild([sub, ...rest].filter(Boolean));
61
62
  case "studio": return runStudio([sub, ...rest].filter(Boolean));
62
63
  case "ideas": return runIdeas([sub, ...rest].filter(Boolean));
63
- case "get":
64
- if (sub === "ideas") return runIdeas(rest);
65
- break;
66
64
  default:
67
65
  process.stderr.write(`unknown command: ${cmd}\n\n${HELP}`);
68
66
  return 1;
69
67
  }
70
- process.stderr.write(`unknown command: ${cmd} ${sub ?? ""}\n\n${HELP}`);
71
- return 1;
72
68
  }
73
69
  main(process.argv.slice(2)).then((code) => process.exit(code), (err) => {
74
70
  process.stderr.write(`error: ${err?.message ?? err}\n`);
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","names":[],"sources":["../src/index.ts"],"sourcesContent":["import { runIdeas } from \"./commands/ideas.js\";\nimport { runInitSystem } from \"./commands/init-system.js\";\nimport { runInit } from \"./commands/init.js\";\nimport { runLog } from \"./commands/log.js\";\nimport { runRebuild } from \"./commands/rebuild.js\";\nimport { runRun } from \"./commands/run.js\";\nimport { runStudio } from \"./commands/studio.js\";\n\nconst HELP = `kaizen — automated AI researcher\n\nhuman-facing:\n kaizen init scaffold a Kaizen workspace (run once per repo)\n kaizen create system [name] add a system definition + starter eval\n [--eval-language py|ts]\n kaizen studio [--port 6789] [--state-dir <dir>]\n open the local Kaizen dashboard against this workspace\n kaizen ideas --system <s> [--json] [--limit <n>]\n list Linear issues labeled Kaizen for a system\n\nagent-facing (the inner loop):\n kaizen run --system <s> --variant <v> [--parent <id>] [--hypothesis \"...\"]\n [--idea <KZN-123>] [--diagnostic]\n [--no-auto-promote] [--state-dir <dir>]\n record one run; emits score on stdout\n kaizen log [--system <s>] [-n <count>] [--json] [--state-dir <dir>]\n promoted baseline + chronological run history\n\nrecovery (rare):\n kaizen rebuild [--system <s>] [--state-dir <dir>]\n re-derive state.json from events.jsonl\n\n kaizen --version print version\n\ncompatibility:\n kaizen init system <name> alias for kaizen create system <name>\n kaizen get ideas --system <s> alias for kaizen ideas --system <s>\n\n`;\n\nasync function main(argv: string[]): Promise<number> {\n const [cmd, sub, ...rest] = argv;\n\n if (!cmd || cmd === \"--help\" || cmd === \"-h\" || cmd === \"help\") {\n process.stdout.write(HELP);\n return 0;\n }\n if (cmd === \"--version\" || cmd === \"-v\") {\n const pkg = await import(\"../package.json\", { with: { type: \"json\" } });\n process.stdout.write(`${pkg.default.version}\\n`);\n return 0;\n }\n\n switch (cmd) {\n case \"create\":\n if (!sub) {\n return runInitSystem([]);\n }\n if (sub === \"system\") {\n return runInitSystem(rest);\n }\n return runInitSystem([sub, ...rest].filter(Boolean));\n case \"init\":\n if (sub === \"system\") {\n return runInitSystem(rest);\n }\n return runInit([sub, ...rest].filter(Boolean));\n case \"run\":\n return runRun([sub, ...rest].filter(Boolean));\n case \"log\":\n return runLog([sub, ...rest].filter(Boolean));\n case \"rebuild\":\n return runRebuild([sub, ...rest].filter(Boolean));\n case \"studio\":\n return runStudio([sub, ...rest].filter(Boolean));\n case \"ideas\":\n return runIdeas([sub, ...rest].filter(Boolean));\n case \"get\":\n if (sub === \"ideas\") {\n return runIdeas(rest);\n }\n break;\n default:\n process.stderr.write(`unknown command: ${cmd}\\n\\n${HELP}`);\n return 1;\n }\n process.stderr.write(`unknown command: ${cmd} ${sub ?? \"\"}\\n\\n${HELP}`);\n return 1;\n}\n\nmain(process.argv.slice(2)).then(\n (code) => process.exit(code),\n (err) => {\n process.stderr.write(`error: ${err?.message ?? err}\\n`);\n if (process.env.KAIZEN_DEBUG) process.stderr.write(`${err?.stack}\\n`);\n process.exit(1);\n },\n);\n"],"mappings":";;;;;;;;AAQA,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+Bb,eAAe,KAAK,MAAiC;CACnD,MAAM,CAAC,KAAK,KAAK,GAAG,QAAQ;AAE5B,KAAI,CAAC,OAAO,QAAQ,YAAY,QAAQ,QAAQ,QAAQ,QAAQ;AAC9D,UAAQ,OAAO,MAAM,KAAK;AAC1B,SAAO;;AAET,KAAI,QAAQ,eAAe,QAAQ,MAAM;EACvC,MAAM,MAAM,MAAM,OAAO,mBAAmB,EAAE,MAAM,EAAE,MAAM,QAAQ,EAAE;AACtE,UAAQ,OAAO,MAAM,GAAG,IAAI,QAAQ,QAAQ,IAAI;AAChD,SAAO;;AAGT,SAAQ,KAAR;EACE,KAAK;AACH,OAAI,CAAC,IACH,QAAO,cAAc,EAAE,CAAC;AAE1B,OAAI,QAAQ,SACV,QAAO,cAAc,KAAK;AAE5B,UAAO,cAAc,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACtD,KAAK;AACH,OAAI,QAAQ,SACV,QAAO,cAAc,KAAK;AAE5B,UAAO,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAChD,KAAK,MACH,QAAO,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAC/C,KAAK,MACH,QAAO,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAC/C,KAAK,UACH,QAAO,WAAW,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACnD,KAAK,SACH,QAAO,UAAU,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAClD,KAAK,QACH,QAAO,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACjD,KAAK;AACH,OAAI,QAAQ,QACV,QAAO,SAAS,KAAK;AAEvB;EACF;AACE,WAAQ,OAAO,MAAM,oBAAoB,IAAI,MAAM,OAAO;AAC1D,UAAO;;AAEX,SAAQ,OAAO,MAAM,oBAAoB,IAAI,GAAG,OAAO,GAAG,MAAM,OAAO;AACvE,QAAO;;AAGT,KAAK,QAAQ,KAAK,MAAM,EAAE,CAAC,CAAC,MACzB,SAAS,QAAQ,KAAK,KAAK,GAC3B,QAAQ;AACP,SAAQ,OAAO,MAAM,UAAU,KAAK,WAAW,IAAI,IAAI;AACvD,KAAI,QAAQ,IAAI,aAAc,SAAQ,OAAO,MAAM,GAAG,KAAK,MAAM,IAAI;AACrE,SAAQ,KAAK,EAAE;EAElB"}
1
+ {"version":3,"file":"index.js","names":[],"sources":["../src/index.ts"],"sourcesContent":["import { runCreateView } from \"./commands/create-view.js\";\nimport { runGuide } from \"./commands/guide.js\";\nimport { runIdeas } from \"./commands/ideas.js\";\nimport { runInitSystem } from \"./commands/init-system.js\";\nimport { runInit } from \"./commands/init.js\";\nimport { runLog } from \"./commands/log.js\";\nimport { runRebuild } from \"./commands/rebuild.js\";\nimport { runRun } from \"./commands/run.js\";\nimport { runStudio } from \"./commands/studio.js\";\n\nconst HELP = `kaizen — automated AI researcher\n\nworkspace:\n kaizen init scaffold a Kaizen workspace (run once per repo)\n kaizen guide [topic] print package-owned agent instructions\n topics: overview, evals, views,\n variant-builder, claude-command, all\n kaizen create system [name] add a system definition + starter eval\n [--eval-language py|ts]\n kaizen create view <system> add kaizen/systems/<system>/<type>.tsx\n --type trace|dataset-item\n kaizen studio [--port 6789]\n open the local Kaizen dashboard against this workspace\n kaizen ideas --system <s> [--json] [--limit <n>]\n list Linear issues labeled Kaizen for a system\n\nagent-facing (the inner loop):\n kaizen run --system <s> --variant <v> [--parent <id>] [--hypothesis \"...\"]\n [--idea <KZN-123>] [--diagnostic] [--no-auto-promote]\n record one run; emits score on stdout\n kaizen log [--system <s>] [-n <count>] [--json]\n promoted baseline + chronological run history\n\nrecovery (rare):\n kaizen rebuild [--system <s>]\n re-derive state.json from events.jsonl\n\n kaizen --version print version\n`;\n\nasync function main(argv: string[]): Promise<number> {\n const [cmd, sub, ...rest] = argv;\n\n if (!cmd || cmd === \"--help\" || cmd === \"-h\" || cmd === \"help\") {\n process.stdout.write(HELP);\n return 0;\n }\n if (cmd === \"--version\" || cmd === \"-v\") {\n const pkg = await import(\"../package.json\", { with: { type: \"json\" } });\n process.stdout.write(`${pkg.default.version}\\n`);\n return 0;\n }\n\n switch (cmd) {\n case \"create\":\n if (sub === \"system\") {\n return runInitSystem(rest);\n }\n if (sub === \"view\") {\n return runCreateView(rest);\n }\n process.stderr.write(`unknown create target: ${sub ?? \"\"}\\n\\n${HELP}`);\n return 1;\n case \"init\":\n return runInit([sub, ...rest].filter(Boolean));\n case \"guide\":\n return runGuide([sub, ...rest].filter(Boolean));\n case \"run\":\n return runRun([sub, ...rest].filter(Boolean));\n case \"log\":\n return runLog([sub, ...rest].filter(Boolean));\n case \"rebuild\":\n return runRebuild([sub, ...rest].filter(Boolean));\n case \"studio\":\n return runStudio([sub, ...rest].filter(Boolean));\n case \"ideas\":\n return runIdeas([sub, ...rest].filter(Boolean));\n default:\n process.stderr.write(`unknown command: ${cmd}\\n\\n${HELP}`);\n return 1;\n }\n}\n\nmain(process.argv.slice(2)).then(\n (code) => process.exit(code),\n (err) => {\n process.stderr.write(`error: ${err?.message ?? err}\\n`);\n if (process.env.KAIZEN_DEBUG) process.stderr.write(`${err?.stack}\\n`);\n process.exit(1);\n },\n);\n"],"mappings":";;;;;;;;;;AAUA,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA8Bb,eAAe,KAAK,MAAiC;CACnD,MAAM,CAAC,KAAK,KAAK,GAAG,QAAQ;AAE5B,KAAI,CAAC,OAAO,QAAQ,YAAY,QAAQ,QAAQ,QAAQ,QAAQ;AAC9D,UAAQ,OAAO,MAAM,KAAK;AAC1B,SAAO;;AAET,KAAI,QAAQ,eAAe,QAAQ,MAAM;EACvC,MAAM,MAAM,MAAM,OAAO,mBAAmB,EAAE,MAAM,EAAE,MAAM,QAAQ,EAAE;AACtE,UAAQ,OAAO,MAAM,GAAG,IAAI,QAAQ,QAAQ,IAAI;AAChD,SAAO;;AAGT,SAAQ,KAAR;EACE,KAAK;AACH,OAAI,QAAQ,SACV,QAAO,cAAc,KAAK;AAE5B,OAAI,QAAQ,OACV,QAAO,cAAc,KAAK;AAE5B,WAAQ,OAAO,MAAM,0BAA0B,OAAO,GAAG,MAAM,OAAO;AACtE,UAAO;EACT,KAAK,OACH,QAAO,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAChD,KAAK,QACH,QAAO,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACjD,KAAK,MACH,QAAO,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAC/C,KAAK,MACH,QAAO,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAC/C,KAAK,UACH,QAAO,WAAW,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACnD,KAAK,SACH,QAAO,UAAU,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAClD,KAAK,QACH,QAAO,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACjD;AACE,WAAQ,OAAO,MAAM,oBAAoB,IAAI,MAAM,OAAO;AAC1D,UAAO;;;AAIb,KAAK,QAAQ,KAAK,MAAM,EAAE,CAAC,CAAC,MACzB,SAAS,QAAQ,KAAK,KAAK,GAC3B,QAAQ;AACP,SAAQ,OAAO,MAAM,UAAU,KAAK,WAAW,IAAI,IAAI;AACvD,KAAI,QAAQ,IAAI,aAAc,SAAQ,OAAO,MAAM,GAAG,KAAK,MAAM,IAAI;AACrE,SAAQ,KAAK,EAAE;EAElB"}
@@ -0,0 +1,20 @@
1
+ import { kaizenConfigPath } from "../shared/workspace-paths.js";
2
+ import "./paths.js";
3
+ import { existsSync } from "node:fs";
4
+ //#region src/lib/cli.ts
5
+ function requireKaizenWorkspace(root) {
6
+ if (existsSync(kaizenConfigPath(root))) return true;
7
+ process.stderr.write(`no kaizen/config.ts in ${root}. run \`kaizen init\` first.\n`);
8
+ return false;
9
+ }
10
+ function isSystemId(value) {
11
+ return /^[a-z][a-z0-9-]*$/.test(value);
12
+ }
13
+ function pad(s, w) {
14
+ if (s.length >= w) return s.slice(0, w - 1) + " ";
15
+ return s + " ".repeat(w - s.length);
16
+ }
17
+ //#endregion
18
+ export { isSystemId, pad, requireKaizenWorkspace };
19
+
20
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","names":[],"sources":["../../src/lib/cli.ts"],"sourcesContent":["import { existsSync } from \"node:fs\";\nimport { kaizenConfigPath } from \"./paths.js\";\n\nexport function requireKaizenWorkspace(root: string): boolean {\n if (existsSync(kaizenConfigPath(root))) return true;\n process.stderr.write(\n `no kaizen/config.ts in ${root}. run \\`kaizen init\\` first.\\n`,\n );\n return false;\n}\n\nexport function isSystemId(value: string): boolean {\n return /^[a-z][a-z0-9-]*$/.test(value);\n}\n\nexport function pad(s: string, w: number): string {\n if (s.length >= w) return s.slice(0, w - 1) + \" \";\n return s + \" \".repeat(w - s.length);\n}\n"],"mappings":";;;;AAGA,SAAgB,uBAAuB,MAAuB;AAC5D,KAAI,WAAW,iBAAiB,KAAK,CAAC,CAAE,QAAO;AAC/C,SAAQ,OAAO,MACb,0BAA0B,KAAK,gCAChC;AACD,QAAO;;AAGT,SAAgB,WAAW,OAAwB;AACjD,QAAO,oBAAoB,KAAK,MAAM;;AAGxC,SAAgB,IAAI,GAAW,GAAmB;AAChD,KAAI,EAAE,UAAU,EAAG,QAAO,EAAE,MAAM,GAAG,IAAI,EAAE,GAAG;AAC9C,QAAO,IAAI,IAAI,OAAO,IAAI,EAAE,OAAO"}
@@ -1 +1 @@
1
- {"version":3,"file":"events.js","names":[],"sources":["../../src/lib/events.ts"],"sourcesContent":["// Event schema for the NDJSON contract between eval scripts and the runner.\n\nexport interface BaseEvent {\n type: string;\n ts?: number;\n}\n\nexport interface StartEvent extends BaseEvent {\n type: \"start\";\n n: number;\n eval_version: number;\n dataset_version: string;\n variant?: string;\n}\n\nexport interface ItemEvent extends BaseEvent {\n type: \"item\";\n id: string;\n score: number;\n breakdown?: Record<string, number>;\n trace_id?: string | null;\n why?: string;\n subgroup?: Record<string, string>;\n}\n\nexport interface ProgressEvent extends BaseEvent {\n type: \"progress\";\n done: number;\n total: number;\n}\n\nexport interface CompleteEvent extends BaseEvent {\n type: \"complete\";\n score: number;\n breakdown?: Record<string, number>;\n n: number;\n worst_traces?: Array<{ id: string; score: number; trace_id?: string | null }>;\n}\n\nexport interface ErrorEvent extends BaseEvent {\n type: \"error\";\n message: string;\n}\n\n// Synthesized by the runner, not emitted by the eval.\nexport interface CrashedEvent extends BaseEvent {\n type: \"crashed\";\n exit_code: number | null;\n signal?: string | null;\n stderr_tail?: string;\n reason: string;\n}\n\nexport interface AbortedEvent extends BaseEvent {\n type: \"aborted\";\n reason: string;\n}\n\nexport interface PromotionEvent extends BaseEvent {\n type: \"promotion\";\n promoted: boolean;\n rule:\n | \"auto\"\n | \"force\"\n | \"no_baseline\"\n | \"version_mismatch\"\n | \"stat_insufficient\"\n | \"subgroup_regression\";\n ci_low?: number;\n ci_high?: number;\n mean_delta?: number;\n n_compared?: number;\n parent_run_id?: string;\n details?: string;\n}\n\nexport type Event =\n | StartEvent\n | ItemEvent\n | ProgressEvent\n | CompleteEvent\n | ErrorEvent\n | CrashedEvent\n | AbortedEvent\n | PromotionEvent;\n\nexport function validateEvent(value: unknown): Event {\n if (!value || typeof value !== \"object\") {\n throw new Error(\"event must be an object\");\n }\n const e = value as Record<string, unknown>;\n if (typeof e.type !== \"string\") {\n throw new Error(\"event.type must be a string\");\n }\n\n switch (e.type) {\n case \"start\":\n requireNumber(e, \"n\");\n requireNumber(e, \"eval_version\");\n requireString(e, \"dataset_version\");\n break;\n case \"item\":\n requireString(e, \"id\");\n requireNumber(e, \"score\");\n break;\n case \"progress\":\n requireNumber(e, \"done\");\n requireNumber(e, \"total\");\n break;\n case \"complete\":\n requireNumber(e, \"score\");\n requireNumber(e, \"n\");\n break;\n case \"error\":\n requireString(e, \"message\");\n break;\n case \"crashed\":\n requireString(e, \"reason\");\n break;\n case \"aborted\":\n requireString(e, \"reason\");\n break;\n case \"promotion\":\n if (typeof e.promoted !== \"boolean\")\n throw new Error(\"promotion.promoted must be boolean\");\n requireString(e, \"rule\");\n break;\n default:\n throw new Error(`unknown event type: ${e.type}`);\n }\n\n if (e.ts !== undefined && typeof e.ts !== \"number\") {\n throw new Error(`${e.type}.ts must be a number when present`);\n }\n return e as unknown as Event;\n}\n\nfunction requireString(e: Record<string, unknown>, key: string): void {\n if (typeof e[key] !== \"string\" || e[key] === \"\") {\n throw new Error(`${String(e.type)}.${key} must be a non-empty string`);\n }\n}\n\nfunction requireNumber(e: Record<string, unknown>, key: string): void {\n if (typeof e[key] !== \"number\" || !Number.isFinite(e[key])) {\n throw new Error(`${String(e.type)}.${key} must be a finite number`);\n }\n}\n\nexport type TerminalType = \"complete\" | \"crashed\" | \"aborted\";\n\nexport function isTerminal(\n e: Event,\n): e is CompleteEvent | CrashedEvent | AbortedEvent {\n return e.type === \"complete\" || e.type === \"crashed\" || e.type === \"aborted\";\n}\n\nexport function findTerminal(\n events: Event[],\n): CompleteEvent | CrashedEvent | AbortedEvent | null {\n for (let i = events.length - 1; i >= 0; i--) {\n if (isTerminal(events[i]))\n return events[i] as CompleteEvent | CrashedEvent | AbortedEvent;\n }\n return null;\n}\n\n/** Stream-mode NDJSON parser. Feed `push(chunk)` from a Readable; receive parsed events via `onEvent`. */\nexport class NdjsonReader {\n private buffer = \"\";\n public constructor(\n private onEvent: (e: Event) => void,\n private onParseError?: (line: string, err: unknown) => void,\n ) {}\n\n public push(chunk: string | Buffer): void {\n this.buffer += typeof chunk === \"string\" ? chunk : chunk.toString(\"utf-8\");\n let nl: number;\n while ((nl = this.buffer.indexOf(\"\\n\")) !== -1) {\n const line = this.buffer.slice(0, nl);\n this.buffer = this.buffer.slice(nl + 1);\n if (!line.trim()) continue;\n try {\n this.onEvent(validateEvent(JSON.parse(line)));\n } catch (err) {\n this.onParseError?.(line, err);\n }\n }\n }\n\n /** Flush any remaining unterminated content as a best-effort parse. */\n public end(): void {\n const tail = this.buffer.trim();\n this.buffer = \"\";\n if (!tail) return;\n try {\n this.onEvent(validateEvent(JSON.parse(tail)));\n } catch (err) {\n this.onParseError?.(tail, err);\n }\n }\n}\n"],"mappings":";AAsFA,SAAgB,cAAc,OAAuB;AACnD,KAAI,CAAC,SAAS,OAAO,UAAU,SAC7B,OAAM,IAAI,MAAM,0BAA0B;CAE5C,MAAM,IAAI;AACV,KAAI,OAAO,EAAE,SAAS,SACpB,OAAM,IAAI,MAAM,8BAA8B;AAGhD,SAAQ,EAAE,MAAV;EACE,KAAK;AACH,iBAAc,GAAG,IAAI;AACrB,iBAAc,GAAG,eAAe;AAChC,iBAAc,GAAG,kBAAkB;AACnC;EACF,KAAK;AACH,iBAAc,GAAG,KAAK;AACtB,iBAAc,GAAG,QAAQ;AACzB;EACF,KAAK;AACH,iBAAc,GAAG,OAAO;AACxB,iBAAc,GAAG,QAAQ;AACzB;EACF,KAAK;AACH,iBAAc,GAAG,QAAQ;AACzB,iBAAc,GAAG,IAAI;AACrB;EACF,KAAK;AACH,iBAAc,GAAG,UAAU;AAC3B;EACF,KAAK;AACH,iBAAc,GAAG,SAAS;AAC1B;EACF,KAAK;AACH,iBAAc,GAAG,SAAS;AAC1B;EACF,KAAK;AACH,OAAI,OAAO,EAAE,aAAa,UACxB,OAAM,IAAI,MAAM,qCAAqC;AACvD,iBAAc,GAAG,OAAO;AACxB;EACF,QACE,OAAM,IAAI,MAAM,uBAAuB,EAAE,OAAO;;AAGpD,KAAI,EAAE,OAAO,KAAA,KAAa,OAAO,EAAE,OAAO,SACxC,OAAM,IAAI,MAAM,GAAG,EAAE,KAAK,mCAAmC;AAE/D,QAAO;;AAGT,SAAS,cAAc,GAA4B,KAAmB;AACpE,KAAI,OAAO,EAAE,SAAS,YAAY,EAAE,SAAS,GAC3C,OAAM,IAAI,MAAM,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,IAAI,6BAA6B;;AAI1E,SAAS,cAAc,GAA4B,KAAmB;AACpE,KAAI,OAAO,EAAE,SAAS,YAAY,CAAC,OAAO,SAAS,EAAE,KAAK,CACxD,OAAM,IAAI,MAAM,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,IAAI,0BAA0B;;AAMvE,SAAgB,WACd,GACkD;AAClD,QAAO,EAAE,SAAS,cAAc,EAAE,SAAS,aAAa,EAAE,SAAS;;AAGrE,SAAgB,aACd,QACoD;AACpD,MAAK,IAAI,IAAI,OAAO,SAAS,GAAG,KAAK,GAAG,IACtC,KAAI,WAAW,OAAO,GAAG,CACvB,QAAO,OAAO;AAElB,QAAO;;;AAIT,IAAa,eAAb,MAA0B;CACxB,SAAiB;CACjB,YACE,SACA,cACA;AAFQ,OAAA,UAAA;AACA,OAAA,eAAA;;CAGV,KAAY,OAA8B;AACxC,OAAK,UAAU,OAAO,UAAU,WAAW,QAAQ,MAAM,SAAS,QAAQ;EAC1E,IAAI;AACJ,UAAQ,KAAK,KAAK,OAAO,QAAQ,KAAK,MAAM,IAAI;GAC9C,MAAM,OAAO,KAAK,OAAO,MAAM,GAAG,GAAG;AACrC,QAAK,SAAS,KAAK,OAAO,MAAM,KAAK,EAAE;AACvC,OAAI,CAAC,KAAK,MAAM,CAAE;AAClB,OAAI;AACF,SAAK,QAAQ,cAAc,KAAK,MAAM,KAAK,CAAC,CAAC;YACtC,KAAK;AACZ,SAAK,eAAe,MAAM,IAAI;;;;;CAMpC,MAAmB;EACjB,MAAM,OAAO,KAAK,OAAO,MAAM;AAC/B,OAAK,SAAS;AACd,MAAI,CAAC,KAAM;AACX,MAAI;AACF,QAAK,QAAQ,cAAc,KAAK,MAAM,KAAK,CAAC,CAAC;WACtC,KAAK;AACZ,QAAK,eAAe,MAAM,IAAI"}
1
+ {"version":3,"file":"events.js","names":[],"sources":["../../src/lib/events.ts"],"sourcesContent":["// Event schema for the NDJSON contract between eval scripts and the runner.\n\ninterface BaseEvent {\n type: string;\n ts?: number;\n}\n\ninterface StartEvent extends BaseEvent {\n type: \"start\";\n n: number;\n eval_version: number;\n dataset_version: string;\n variant?: string;\n}\n\nexport interface ItemEvent extends BaseEvent {\n type: \"item\";\n id: string;\n score: number;\n breakdown?: Record<string, number>;\n trace_id?: string | null;\n why?: string;\n subgroup?: Record<string, string>;\n}\n\ninterface ProgressEvent extends BaseEvent {\n type: \"progress\";\n done: number;\n total: number;\n}\n\nexport interface CompleteEvent extends BaseEvent {\n type: \"complete\";\n score: number;\n breakdown?: Record<string, number>;\n n: number;\n worst_traces?: Array<{ id: string; score: number; trace_id?: string | null }>;\n}\n\ninterface ErrorEvent extends BaseEvent {\n type: \"error\";\n message: string;\n}\n\n// Synthesized by the runner, not emitted by the eval.\nexport interface CrashedEvent extends BaseEvent {\n type: \"crashed\";\n exit_code: number | null;\n signal?: string | null;\n stderr_tail?: string;\n reason: string;\n}\n\ninterface AbortedEvent extends BaseEvent {\n type: \"aborted\";\n reason: string;\n}\n\nexport interface PromotionEvent extends BaseEvent {\n type: \"promotion\";\n promoted: boolean;\n rule:\n | \"auto\"\n | \"force\"\n | \"no_baseline\"\n | \"version_mismatch\"\n | \"stat_insufficient\"\n | \"subgroup_regression\";\n ci_low?: number;\n ci_high?: number;\n mean_delta?: number;\n n_compared?: number;\n parent_run_id?: string;\n details?: string;\n}\n\nexport type Event =\n | StartEvent\n | ItemEvent\n | ProgressEvent\n | CompleteEvent\n | ErrorEvent\n | CrashedEvent\n | AbortedEvent\n | PromotionEvent;\n\nexport function validateEvent(value: unknown): Event {\n if (!value || typeof value !== \"object\") {\n throw new Error(\"event must be an object\");\n }\n const e = value as Record<string, unknown>;\n if (typeof e.type !== \"string\") {\n throw new Error(\"event.type must be a string\");\n }\n\n switch (e.type) {\n case \"start\":\n requireNumber(e, \"n\");\n requireNumber(e, \"eval_version\");\n requireString(e, \"dataset_version\");\n break;\n case \"item\":\n requireString(e, \"id\");\n requireNumber(e, \"score\");\n break;\n case \"progress\":\n requireNumber(e, \"done\");\n requireNumber(e, \"total\");\n break;\n case \"complete\":\n requireNumber(e, \"score\");\n requireNumber(e, \"n\");\n break;\n case \"error\":\n requireString(e, \"message\");\n break;\n case \"crashed\":\n requireString(e, \"reason\");\n break;\n case \"aborted\":\n requireString(e, \"reason\");\n break;\n case \"promotion\":\n if (typeof e.promoted !== \"boolean\")\n throw new Error(\"promotion.promoted must be boolean\");\n requireString(e, \"rule\");\n break;\n default:\n throw new Error(`unknown event type: ${e.type}`);\n }\n\n if (e.ts !== undefined && typeof e.ts !== \"number\") {\n throw new Error(`${e.type}.ts must be a number when present`);\n }\n return e as unknown as Event;\n}\n\nfunction requireString(e: Record<string, unknown>, key: string): void {\n if (typeof e[key] !== \"string\" || e[key] === \"\") {\n throw new Error(`${String(e.type)}.${key} must be a non-empty string`);\n }\n}\n\nfunction requireNumber(e: Record<string, unknown>, key: string): void {\n if (typeof e[key] !== \"number\" || !Number.isFinite(e[key])) {\n throw new Error(`${String(e.type)}.${key} must be a finite number`);\n }\n}\n\nfunction isTerminal(\n e: Event,\n): e is CompleteEvent | CrashedEvent | AbortedEvent {\n return e.type === \"complete\" || e.type === \"crashed\" || e.type === \"aborted\";\n}\n\nexport function findTerminal(\n events: Event[],\n): CompleteEvent | CrashedEvent | AbortedEvent | null {\n for (let i = events.length - 1; i >= 0; i--) {\n if (isTerminal(events[i]))\n return events[i] as CompleteEvent | CrashedEvent | AbortedEvent;\n }\n return null;\n}\n\n/** Stream-mode NDJSON parser. Feed `push(chunk)` from a Readable; receive parsed events via `onEvent`. */\nexport class NdjsonReader {\n private buffer = \"\";\n public constructor(\n private onEvent: (e: Event) => void,\n private onParseError?: (line: string, err: unknown) => void,\n ) {}\n\n public push(chunk: string | Buffer): void {\n this.buffer += typeof chunk === \"string\" ? chunk : chunk.toString(\"utf-8\");\n let nl: number;\n while ((nl = this.buffer.indexOf(\"\\n\")) !== -1) {\n const line = this.buffer.slice(0, nl);\n this.buffer = this.buffer.slice(nl + 1);\n if (!line.trim()) continue;\n try {\n this.onEvent(validateEvent(JSON.parse(line)));\n } catch (err) {\n this.onParseError?.(line, err);\n }\n }\n }\n\n /** Flush any remaining unterminated content as a best-effort parse. */\n public end(): void {\n const tail = this.buffer.trim();\n this.buffer = \"\";\n if (!tail) return;\n try {\n this.onEvent(validateEvent(JSON.parse(tail)));\n } catch (err) {\n this.onParseError?.(tail, err);\n }\n }\n}\n"],"mappings":";AAsFA,SAAgB,cAAc,OAAuB;AACnD,KAAI,CAAC,SAAS,OAAO,UAAU,SAC7B,OAAM,IAAI,MAAM,0BAA0B;CAE5C,MAAM,IAAI;AACV,KAAI,OAAO,EAAE,SAAS,SACpB,OAAM,IAAI,MAAM,8BAA8B;AAGhD,SAAQ,EAAE,MAAV;EACE,KAAK;AACH,iBAAc,GAAG,IAAI;AACrB,iBAAc,GAAG,eAAe;AAChC,iBAAc,GAAG,kBAAkB;AACnC;EACF,KAAK;AACH,iBAAc,GAAG,KAAK;AACtB,iBAAc,GAAG,QAAQ;AACzB;EACF,KAAK;AACH,iBAAc,GAAG,OAAO;AACxB,iBAAc,GAAG,QAAQ;AACzB;EACF,KAAK;AACH,iBAAc,GAAG,QAAQ;AACzB,iBAAc,GAAG,IAAI;AACrB;EACF,KAAK;AACH,iBAAc,GAAG,UAAU;AAC3B;EACF,KAAK;AACH,iBAAc,GAAG,SAAS;AAC1B;EACF,KAAK;AACH,iBAAc,GAAG,SAAS;AAC1B;EACF,KAAK;AACH,OAAI,OAAO,EAAE,aAAa,UACxB,OAAM,IAAI,MAAM,qCAAqC;AACvD,iBAAc,GAAG,OAAO;AACxB;EACF,QACE,OAAM,IAAI,MAAM,uBAAuB,EAAE,OAAO;;AAGpD,KAAI,EAAE,OAAO,KAAA,KAAa,OAAO,EAAE,OAAO,SACxC,OAAM,IAAI,MAAM,GAAG,EAAE,KAAK,mCAAmC;AAE/D,QAAO;;AAGT,SAAS,cAAc,GAA4B,KAAmB;AACpE,KAAI,OAAO,EAAE,SAAS,YAAY,EAAE,SAAS,GAC3C,OAAM,IAAI,MAAM,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,IAAI,6BAA6B;;AAI1E,SAAS,cAAc,GAA4B,KAAmB;AACpE,KAAI,OAAO,EAAE,SAAS,YAAY,CAAC,OAAO,SAAS,EAAE,KAAK,CACxD,OAAM,IAAI,MAAM,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,IAAI,0BAA0B;;AAIvE,SAAS,WACP,GACkD;AAClD,QAAO,EAAE,SAAS,cAAc,EAAE,SAAS,aAAa,EAAE,SAAS;;AAGrE,SAAgB,aACd,QACoD;AACpD,MAAK,IAAI,IAAI,OAAO,SAAS,GAAG,KAAK,GAAG,IACtC,KAAI,WAAW,OAAO,GAAG,CACvB,QAAO,OAAO;AAElB,QAAO;;;AAIT,IAAa,eAAb,MAA0B;CACxB,SAAiB;CACjB,YACE,SACA,cACA;AAFQ,OAAA,UAAA;AACA,OAAA,eAAA;;CAGV,KAAY,OAA8B;AACxC,OAAK,UAAU,OAAO,UAAU,WAAW,QAAQ,MAAM,SAAS,QAAQ;EAC1E,IAAI;AACJ,UAAQ,KAAK,KAAK,OAAO,QAAQ,KAAK,MAAM,IAAI;GAC9C,MAAM,OAAO,KAAK,OAAO,MAAM,GAAG,GAAG;AACrC,QAAK,SAAS,KAAK,OAAO,MAAM,KAAK,EAAE;AACvC,OAAI,CAAC,KAAK,MAAM,CAAE;AAClB,OAAI;AACF,SAAK,QAAQ,cAAc,KAAK,MAAM,KAAK,CAAC,CAAC;YACtC,KAAK;AACZ,SAAK,eAAe,MAAM,IAAI;;;;;CAMpC,MAAmB;EACjB,MAAM,OAAO,KAAK,OAAO,MAAM;AAC/B,OAAK,SAAS;AACd,MAAI,CAAC,KAAM;AACX,MAAI;AACF,QAAK,QAAQ,cAAc,KAAK,MAAM,KAAK,CAAC,CAAC;WACtC,KAAK;AACZ,QAAK,eAAe,MAAM,IAAI"}
@@ -1,5 +1,5 @@
1
- import { dirname, join, relative } from "node:path";
2
- import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { dirname } from "node:path";
3
3
  //#region src/lib/fs-utils.ts
4
4
  function ensureDir(p) {
5
5
  mkdirSync(p, { recursive: true });
@@ -10,30 +10,6 @@ function writeFileSafely(p, contents, opts = {}) {
10
10
  writeFileSync(p, contents);
11
11
  return true;
12
12
  }
13
- function copyTreeIfMissing(srcDir, destDir, vars = {}) {
14
- const written = [];
15
- if (!existsSync(srcDir)) return written;
16
- const stack = [srcDir];
17
- while (stack.length > 0) {
18
- const current = stack.pop();
19
- for (const entry of readdirSync(current)) {
20
- const srcPath = join(current, entry);
21
- const rel = relative(srcDir, srcPath);
22
- const destPath = join(destDir, rel);
23
- if (statSync(srcPath).isDirectory()) {
24
- ensureDir(destPath);
25
- stack.push(srcPath);
26
- } else {
27
- if (existsSync(destPath)) continue;
28
- const contents = applyVars(readFileSync(srcPath, "utf-8"), vars);
29
- ensureDir(dirname(destPath));
30
- writeFileSync(destPath, contents);
31
- written.push(rel);
32
- }
33
- }
34
- }
35
- return written;
36
- }
37
13
  function applyVars(text, vars) {
38
14
  return text.replace(/\{\{\s*(\w+)\s*\}\}/g, (m, key) => key in vars ? vars[key] : m);
39
15
  }
@@ -45,6 +21,6 @@ function appendIfMissing(path, line) {
45
21
  return true;
46
22
  }
47
23
  //#endregion
48
- export { appendIfMissing, applyVars, copyTreeIfMissing, ensureDir, writeFileSafely };
24
+ export { appendIfMissing, applyVars, ensureDir, writeFileSafely };
49
25
 
50
26
  //# sourceMappingURL=fs-utils.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"fs-utils.js","names":[],"sources":["../../src/lib/fs-utils.ts"],"sourcesContent":["import {\n existsSync,\n mkdirSync,\n readFileSync,\n readdirSync,\n statSync,\n writeFileSync,\n} from \"node:fs\";\nimport { dirname, join, relative } from \"node:path\";\n\nexport function ensureDir(p: string): void {\n mkdirSync(p, { recursive: true });\n}\n\nexport function writeFileSafely(\n p: string,\n contents: string,\n opts: { overwrite?: boolean } = {},\n): boolean {\n if (existsSync(p) && !opts.overwrite) return false;\n ensureDir(dirname(p));\n writeFileSync(p, contents);\n return true;\n}\n\nexport function copyTreeIfMissing(\n srcDir: string,\n destDir: string,\n vars: Record<string, string> = {},\n): string[] {\n const written: string[] = [];\n if (!existsSync(srcDir)) return written;\n const stack: string[] = [srcDir];\n while (stack.length > 0) {\n const current = stack.pop()!;\n for (const entry of readdirSync(current)) {\n const srcPath = join(current, entry);\n const rel = relative(srcDir, srcPath);\n const destPath = join(destDir, rel);\n if (statSync(srcPath).isDirectory()) {\n ensureDir(destPath);\n stack.push(srcPath);\n } else {\n if (existsSync(destPath)) continue;\n const contents = applyVars(readFileSync(srcPath, \"utf-8\"), vars);\n ensureDir(dirname(destPath));\n writeFileSync(destPath, contents);\n written.push(rel);\n }\n }\n }\n return written;\n}\n\nexport function applyVars(text: string, vars: Record<string, string>): string {\n return text.replace(/\\{\\{\\s*(\\w+)\\s*\\}\\}/g, (m, key) =>\n key in vars ? vars[key] : m,\n );\n}\n\nexport function appendIfMissing(path: string, line: string): boolean {\n const existing = existsSync(path) ? readFileSync(path, \"utf-8\") : \"\";\n if (existing.split(\"\\n\").some((l) => l.trim() === line.trim())) return false;\n ensureDir(dirname(path));\n const sep = existing.length === 0 || existing.endsWith(\"\\n\") ? \"\" : \"\\n\";\n writeFileSync(path, existing + sep + line + \"\\n\");\n return true;\n}\n"],"mappings":";;;AAUA,SAAgB,UAAU,GAAiB;AACzC,WAAU,GAAG,EAAE,WAAW,MAAM,CAAC;;AAGnC,SAAgB,gBACd,GACA,UACA,OAAgC,EAAE,EACzB;AACT,KAAI,WAAW,EAAE,IAAI,CAAC,KAAK,UAAW,QAAO;AAC7C,WAAU,QAAQ,EAAE,CAAC;AACrB,eAAc,GAAG,SAAS;AAC1B,QAAO;;AAGT,SAAgB,kBACd,QACA,SACA,OAA+B,EAAE,EACvB;CACV,MAAM,UAAoB,EAAE;AAC5B,KAAI,CAAC,WAAW,OAAO,CAAE,QAAO;CAChC,MAAM,QAAkB,CAAC,OAAO;AAChC,QAAO,MAAM,SAAS,GAAG;EACvB,MAAM,UAAU,MAAM,KAAK;AAC3B,OAAK,MAAM,SAAS,YAAY,QAAQ,EAAE;GACxC,MAAM,UAAU,KAAK,SAAS,MAAM;GACpC,MAAM,MAAM,SAAS,QAAQ,QAAQ;GACrC,MAAM,WAAW,KAAK,SAAS,IAAI;AACnC,OAAI,SAAS,QAAQ,CAAC,aAAa,EAAE;AACnC,cAAU,SAAS;AACnB,UAAM,KAAK,QAAQ;UACd;AACL,QAAI,WAAW,SAAS,CAAE;IAC1B,MAAM,WAAW,UAAU,aAAa,SAAS,QAAQ,EAAE,KAAK;AAChE,cAAU,QAAQ,SAAS,CAAC;AAC5B,kBAAc,UAAU,SAAS;AACjC,YAAQ,KAAK,IAAI;;;;AAIvB,QAAO;;AAGT,SAAgB,UAAU,MAAc,MAAsC;AAC5E,QAAO,KAAK,QAAQ,yBAAyB,GAAG,QAC9C,OAAO,OAAO,KAAK,OAAO,EAC3B;;AAGH,SAAgB,gBAAgB,MAAc,MAAuB;CACnE,MAAM,WAAW,WAAW,KAAK,GAAG,aAAa,MAAM,QAAQ,GAAG;AAClE,KAAI,SAAS,MAAM,KAAK,CAAC,MAAM,MAAM,EAAE,MAAM,KAAK,KAAK,MAAM,CAAC,CAAE,QAAO;AACvE,WAAU,QAAQ,KAAK,CAAC;AAExB,eAAc,MAAM,YADR,SAAS,WAAW,KAAK,SAAS,SAAS,KAAK,GAAG,KAAK,QAC/B,OAAO,KAAK;AACjD,QAAO"}
1
+ {"version":3,"file":"fs-utils.js","names":[],"sources":["../../src/lib/fs-utils.ts"],"sourcesContent":["import { existsSync, mkdirSync, readFileSync, writeFileSync } from \"node:fs\";\nimport { dirname } from \"node:path\";\n\nexport function ensureDir(p: string): void {\n mkdirSync(p, { recursive: true });\n}\n\nexport function writeFileSafely(\n p: string,\n contents: string,\n opts: { overwrite?: boolean } = {},\n): boolean {\n if (existsSync(p) && !opts.overwrite) return false;\n ensureDir(dirname(p));\n writeFileSync(p, contents);\n return true;\n}\n\nexport function applyVars(text: string, vars: Record<string, string>): string {\n return text.replace(/\\{\\{\\s*(\\w+)\\s*\\}\\}/g, (m, key) =>\n key in vars ? vars[key] : m,\n );\n}\n\nexport function appendIfMissing(path: string, line: string): boolean {\n const existing = existsSync(path) ? readFileSync(path, \"utf-8\") : \"\";\n if (existing.split(\"\\n\").some((l) => l.trim() === line.trim())) return false;\n ensureDir(dirname(path));\n const sep = existing.length === 0 || existing.endsWith(\"\\n\") ? \"\" : \"\\n\";\n writeFileSync(path, existing + sep + line + \"\\n\");\n return true;\n}\n"],"mappings":";;;AAGA,SAAgB,UAAU,GAAiB;AACzC,WAAU,GAAG,EAAE,WAAW,MAAM,CAAC;;AAGnC,SAAgB,gBACd,GACA,UACA,OAAgC,EAAE,EACzB;AACT,KAAI,WAAW,EAAE,IAAI,CAAC,KAAK,UAAW,QAAO;AAC7C,WAAU,QAAQ,EAAE,CAAC;AACrB,eAAc,GAAG,SAAS;AAC1B,QAAO;;AAGT,SAAgB,UAAU,MAAc,MAAsC;AAC5E,QAAO,KAAK,QAAQ,yBAAyB,GAAG,QAC9C,OAAO,OAAO,KAAK,OAAO,EAC3B;;AAGH,SAAgB,gBAAgB,MAAc,MAAuB;CACnE,MAAM,WAAW,WAAW,KAAK,GAAG,aAAa,MAAM,QAAQ,GAAG;AAClE,KAAI,SAAS,MAAM,KAAK,CAAC,MAAM,MAAM,EAAE,MAAM,KAAK,KAAK,MAAM,CAAC,CAAE,QAAO;AACvE,WAAU,QAAQ,KAAK,CAAC;AAExB,eAAc,MAAM,YADR,SAAS,WAAW,KAAK,SAAS,SAAS,KAAK,GAAG,KAAK,QAC/B,OAAO,KAAK;AACjD,QAAO"}
@@ -1,6 +1,6 @@
1
1
  import { readJsonIfExists, systemRunsDir } from "./run-dir.js";
2
- import { join } from "node:path";
3
2
  import { existsSync, readdirSync, statSync } from "node:fs";
3
+ import { join } from "node:path";
4
4
  //#region src/lib/leaderboard.ts
5
5
  function listRuns(stateDir, systemId) {
6
6
  const dir = systemRunsDir(stateDir, systemId);
@@ -1 +1 @@
1
- {"version":3,"file":"leaderboard.js","names":[],"sources":["../../src/lib/leaderboard.ts"],"sourcesContent":["import { existsSync, readdirSync, statSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { findTerminal, type Event } from \"./events.js\";\nimport { systemRunsDir, readJsonIfExists, readNdjson } from \"./run-dir.js\";\n\nexport interface RunSummary {\n run_id: string;\n system: string;\n variant: string;\n parent_id: string | null;\n hypothesis: string;\n status: \"complete\" | \"crashed\" | \"aborted\" | \"running\";\n score: number | null;\n n: number | null;\n promoted: boolean | null;\n started_at: string;\n ended_at: string | null;\n eval_version: number;\n dataset_version: string;\n events_path: string;\n state_path: string;\n manifest_path: string;\n linear_issue_id?: string | null;\n linear_issue_url?: string | null;\n}\n\nexport interface ManifestFile {\n run_id: string;\n system: string;\n variant: string;\n parent_id: string | null;\n hypothesis: string;\n git_sha?: string | null;\n git_branch?: string | null;\n worktree_root?: string | null;\n git_common_dir?: string | null;\n eval_version: number;\n dataset_version: string;\n started_at: string;\n host?: string;\n kaizen_version?: string;\n state_dir?: string;\n diagnostic?: boolean;\n linear_issue_id?: string | null;\n linear_issue_url?: string | null;\n}\n\nexport interface StateFile {\n run_id: string;\n status: RunSummary[\"status\"];\n score: number | null;\n n_total?: number | null;\n n_done?: number;\n promoted?: boolean | null;\n ended_at?: string | null;\n}\n\nexport function listRuns(stateDir: string, systemId: string): RunSummary[] {\n const dir = systemRunsDir(stateDir, systemId);\n if (!existsSync(dir)) return [];\n const out: RunSummary[] = [];\n for (const entry of readdirSync(dir)) {\n const runDir = join(dir, entry);\n if (!statSync(runDir).isDirectory()) continue;\n const manifest_path = join(runDir, \"manifest.json\");\n const state_path = join(runDir, \"state.json\");\n const events_path = join(runDir, \"events.jsonl\");\n const manifest = readJsonIfExists<ManifestFile>(manifest_path);\n if (!manifest) continue;\n const state = readJsonIfExists<StateFile>(state_path);\n out.push({\n run_id: manifest.run_id,\n system: manifest.system,\n variant: manifest.variant,\n parent_id: manifest.parent_id,\n hypothesis: manifest.hypothesis,\n status: state?.status ?? \"running\",\n score: state?.score ?? null,\n n: state?.n_total ?? null,\n promoted: state?.promoted ?? null,\n started_at: manifest.started_at,\n ended_at: state?.ended_at ?? null,\n eval_version: manifest.eval_version,\n dataset_version: manifest.dataset_version,\n events_path,\n state_path,\n manifest_path,\n linear_issue_id: manifest.linear_issue_id ?? null,\n linear_issue_url: manifest.linear_issue_url ?? null,\n });\n }\n // Newest first\n out.sort((a, b) => (b.started_at < a.started_at ? -1 : 1));\n return out;\n}\n\n/**\n * Promoted baseline = the most recent run that was promoted, under matching versions.\n * Falls back to the highest-scored complete run if nothing has been promoted yet\n * (e.g. the very first run before promotion semantics kick in).\n */\nexport function currentBaseline(\n runs: RunSummary[],\n evalVersion: number,\n datasetVersion: string,\n): RunSummary | null {\n const eligible = runs.filter(\n (r) =>\n r.status === \"complete\" &&\n r.score !== null &&\n r.eval_version === evalVersion &&\n r.dataset_version === datasetVersion,\n );\n if (eligible.length === 0) return null;\n // listRuns sorts newest-first; find the newest with promoted: true.\n const promoted = eligible.find((r) => r.promoted === true);\n if (promoted) return promoted;\n // Fallback: highest-scored eligible run (handles legacy/imported runs without promotion data).\n let best = eligible[0];\n for (const r of eligible) {\n if (r.score! > best.score!) best = r;\n }\n return best;\n}\n\n/** Recover the score from events.jsonl if state.json was missing or pre-terminal. */\nexport function deriveScoreFromEvents(eventsPath: string): number | null {\n const events = readNdjson<Event>(eventsPath);\n const term = findTerminal(events);\n if (!term || term.type !== \"complete\") return null;\n return term.score;\n}\n"],"mappings":";;;;AAyDA,SAAgB,SAAS,UAAkB,UAAgC;CACzE,MAAM,MAAM,cAAc,UAAU,SAAS;AAC7C,KAAI,CAAC,WAAW,IAAI,CAAE,QAAO,EAAE;CAC/B,MAAM,MAAoB,EAAE;AAC5B,MAAK,MAAM,SAAS,YAAY,IAAI,EAAE;EACpC,MAAM,SAAS,KAAK,KAAK,MAAM;AAC/B,MAAI,CAAC,SAAS,OAAO,CAAC,aAAa,CAAE;EACrC,MAAM,gBAAgB,KAAK,QAAQ,gBAAgB;EACnD,MAAM,aAAa,KAAK,QAAQ,aAAa;EAC7C,MAAM,cAAc,KAAK,QAAQ,eAAe;EAChD,MAAM,WAAW,iBAA+B,cAAc;AAC9D,MAAI,CAAC,SAAU;EACf,MAAM,QAAQ,iBAA4B,WAAW;AACrD,MAAI,KAAK;GACP,QAAQ,SAAS;GACjB,QAAQ,SAAS;GACjB,SAAS,SAAS;GAClB,WAAW,SAAS;GACpB,YAAY,SAAS;GACrB,QAAQ,OAAO,UAAU;GACzB,OAAO,OAAO,SAAS;GACvB,GAAG,OAAO,WAAW;GACrB,UAAU,OAAO,YAAY;GAC7B,YAAY,SAAS;GACrB,UAAU,OAAO,YAAY;GAC7B,cAAc,SAAS;GACvB,iBAAiB,SAAS;GAC1B;GACA;GACA;GACA,iBAAiB,SAAS,mBAAmB;GAC7C,kBAAkB,SAAS,oBAAoB;GAChD,CAAC;;AAGJ,KAAI,MAAM,GAAG,MAAO,EAAE,aAAa,EAAE,aAAa,KAAK,EAAG;AAC1D,QAAO;;;;;;;AAQT,SAAgB,gBACd,MACA,aACA,gBACmB;CACnB,MAAM,WAAW,KAAK,QACnB,MACC,EAAE,WAAW,cACb,EAAE,UAAU,QACZ,EAAE,iBAAiB,eACnB,EAAE,oBAAoB,eACzB;AACD,KAAI,SAAS,WAAW,EAAG,QAAO;CAElC,MAAM,WAAW,SAAS,MAAM,MAAM,EAAE,aAAa,KAAK;AAC1D,KAAI,SAAU,QAAO;CAErB,IAAI,OAAO,SAAS;AACpB,MAAK,MAAM,KAAK,SACd,KAAI,EAAE,QAAS,KAAK,MAAQ,QAAO;AAErC,QAAO"}
1
+ {"version":3,"file":"leaderboard.js","names":[],"sources":["../../src/lib/leaderboard.ts"],"sourcesContent":["import { existsSync, readdirSync, statSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { systemRunsDir, readJsonIfExists } from \"./run-dir.js\";\n\nexport interface RunSummary {\n run_id: string;\n system: string;\n variant: string;\n parent_id: string | null;\n hypothesis: string;\n status: \"complete\" | \"crashed\" | \"aborted\" | \"running\";\n score: number | null;\n n: number | null;\n promoted: boolean | null;\n started_at: string;\n ended_at: string | null;\n eval_version: number;\n dataset_version: string;\n events_path: string;\n state_path: string;\n manifest_path: string;\n linear_issue_id?: string | null;\n linear_issue_url?: string | null;\n}\n\nexport interface ManifestFile {\n run_id: string;\n system: string;\n variant: string;\n parent_id: string | null;\n hypothesis: string;\n git_sha?: string | null;\n git_branch?: string | null;\n worktree_root?: string | null;\n git_common_dir?: string | null;\n eval_version: number;\n dataset_version: string;\n started_at: string;\n host?: string;\n kaizen_version?: string;\n state_dir?: string;\n diagnostic?: boolean;\n linear_issue_id?: string | null;\n linear_issue_url?: string | null;\n}\n\nexport interface StateFile {\n run_id: string;\n status: RunSummary[\"status\"];\n score: number | null;\n n_total?: number | null;\n n_done?: number;\n promoted?: boolean | null;\n ended_at?: string | null;\n}\n\nexport function listRuns(stateDir: string, systemId: string): RunSummary[] {\n const dir = systemRunsDir(stateDir, systemId);\n if (!existsSync(dir)) return [];\n const out: RunSummary[] = [];\n for (const entry of readdirSync(dir)) {\n const runDir = join(dir, entry);\n if (!statSync(runDir).isDirectory()) continue;\n const manifest_path = join(runDir, \"manifest.json\");\n const state_path = join(runDir, \"state.json\");\n const events_path = join(runDir, \"events.jsonl\");\n const manifest = readJsonIfExists<ManifestFile>(manifest_path);\n if (!manifest) continue;\n const state = readJsonIfExists<StateFile>(state_path);\n out.push({\n run_id: manifest.run_id,\n system: manifest.system,\n variant: manifest.variant,\n parent_id: manifest.parent_id,\n hypothesis: manifest.hypothesis,\n status: state?.status ?? \"running\",\n score: state?.score ?? null,\n n: state?.n_total ?? null,\n promoted: state?.promoted ?? null,\n started_at: manifest.started_at,\n ended_at: state?.ended_at ?? null,\n eval_version: manifest.eval_version,\n dataset_version: manifest.dataset_version,\n events_path,\n state_path,\n manifest_path,\n linear_issue_id: manifest.linear_issue_id ?? null,\n linear_issue_url: manifest.linear_issue_url ?? null,\n });\n }\n // Newest first\n out.sort((a, b) => (b.started_at < a.started_at ? -1 : 1));\n return out;\n}\n\n/**\n * Promoted baseline = the most recent run that was promoted, under matching versions.\n * Falls back to the highest-scored complete run if nothing has been promoted yet\n * (e.g. the very first run before promotion semantics kick in).\n */\nexport function currentBaseline(\n runs: RunSummary[],\n evalVersion: number,\n datasetVersion: string,\n): RunSummary | null {\n const eligible = runs.filter(\n (r) =>\n r.status === \"complete\" &&\n r.score !== null &&\n r.eval_version === evalVersion &&\n r.dataset_version === datasetVersion,\n );\n if (eligible.length === 0) return null;\n // listRuns sorts newest-first; find the newest with promoted: true.\n const promoted = eligible.find((r) => r.promoted === true);\n if (promoted) return promoted;\n // Fallback: highest-scored eligible run when promotion data is unavailable.\n let best = eligible[0];\n for (const r of eligible) {\n if (r.score! > best.score!) best = r;\n }\n return best;\n}\n"],"mappings":";;;;AAwDA,SAAgB,SAAS,UAAkB,UAAgC;CACzE,MAAM,MAAM,cAAc,UAAU,SAAS;AAC7C,KAAI,CAAC,WAAW,IAAI,CAAE,QAAO,EAAE;CAC/B,MAAM,MAAoB,EAAE;AAC5B,MAAK,MAAM,SAAS,YAAY,IAAI,EAAE;EACpC,MAAM,SAAS,KAAK,KAAK,MAAM;AAC/B,MAAI,CAAC,SAAS,OAAO,CAAC,aAAa,CAAE;EACrC,MAAM,gBAAgB,KAAK,QAAQ,gBAAgB;EACnD,MAAM,aAAa,KAAK,QAAQ,aAAa;EAC7C,MAAM,cAAc,KAAK,QAAQ,eAAe;EAChD,MAAM,WAAW,iBAA+B,cAAc;AAC9D,MAAI,CAAC,SAAU;EACf,MAAM,QAAQ,iBAA4B,WAAW;AACrD,MAAI,KAAK;GACP,QAAQ,SAAS;GACjB,QAAQ,SAAS;GACjB,SAAS,SAAS;GAClB,WAAW,SAAS;GACpB,YAAY,SAAS;GACrB,QAAQ,OAAO,UAAU;GACzB,OAAO,OAAO,SAAS;GACvB,GAAG,OAAO,WAAW;GACrB,UAAU,OAAO,YAAY;GAC7B,YAAY,SAAS;GACrB,UAAU,OAAO,YAAY;GAC7B,cAAc,SAAS;GACvB,iBAAiB,SAAS;GAC1B;GACA;GACA;GACA,iBAAiB,SAAS,mBAAmB;GAC7C,kBAAkB,SAAS,oBAAoB;GAChD,CAAC;;AAGJ,KAAI,MAAM,GAAG,MAAO,EAAE,aAAa,EAAE,aAAa,KAAK,EAAG;AAC1D,QAAO;;;;;;;AAQT,SAAgB,gBACd,MACA,aACA,gBACmB;CACnB,MAAM,WAAW,KAAK,QACnB,MACC,EAAE,WAAW,cACb,EAAE,UAAU,QACZ,EAAE,iBAAiB,eACnB,EAAE,oBAAoB,eACzB;AACD,KAAI,SAAS,WAAW,EAAG,QAAO;CAElC,MAAM,WAAW,SAAS,MAAM,MAAM,EAAE,aAAa,KAAK;AAC1D,KAAI,SAAU,QAAO;CAErB,IAAI,OAAO,SAAS;AACpB,MAAK,MAAM,KAAK,SACd,KAAI,EAAE,QAAS,KAAK,MAAQ,QAAO;AAErC,QAAO"}
package/dist/lib/paths.js CHANGED
@@ -1,3 +1,4 @@
1
+ import { resolveKaizenStateDir } from "../shared/workspace-paths.js";
1
2
  import { dirname, resolve } from "node:path";
2
3
  import { fileURLToPath } from "node:url";
3
4
  //#region src/lib/paths.ts
@@ -12,9 +13,8 @@ function workspaceRoot() {
12
13
  const raw = process.env.KAIZEN_WORKSPACE;
13
14
  return raw ? resolve(raw) : process.cwd();
14
15
  }
15
- function resolveStateDir(workspace, explicit) {
16
- const raw = explicit ?? process.env.KAIZEN_STATE_DIR;
17
- return raw ? resolve(workspace, raw) : resolve(workspace, ".kaizen");
16
+ function resolveStateDir(workspace) {
17
+ return resolveKaizenStateDir(workspace);
18
18
  }
19
19
  //#endregion
20
20
  export { packageRoot, resolveStateDir, templatesDir, workspaceRoot };
@@ -1 +1 @@
1
- {"version":3,"file":"paths.js","names":[],"sources":["../../src/lib/paths.ts"],"sourcesContent":["import { dirname, resolve } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\n\nconst HERE = dirname(fileURLToPath(import.meta.url));\n\nexport function packageRoot(): string {\n // src/lib/paths.ts -> package root\n return resolve(HERE, \"..\", \"..\");\n}\n\nexport function templatesDir(): string {\n return resolve(packageRoot(), \"templates\");\n}\n\nexport function workspaceRoot(): string {\n const raw = process.env.KAIZEN_WORKSPACE;\n return raw ? resolve(raw) : process.cwd();\n}\n\nexport function resolveStateDir(workspace: string, explicit?: string): string {\n const raw = explicit ?? process.env.KAIZEN_STATE_DIR;\n return raw ? resolve(workspace, raw) : resolve(workspace, \".kaizen\");\n}\n"],"mappings":";;;AAGA,MAAM,OAAO,QAAQ,cAAc,OAAO,KAAK,IAAI,CAAC;AAEpD,SAAgB,cAAsB;AAEpC,QAAO,QAAQ,MAAM,MAAM,KAAK;;AAGlC,SAAgB,eAAuB;AACrC,QAAO,QAAQ,aAAa,EAAE,YAAY;;AAG5C,SAAgB,gBAAwB;CACtC,MAAM,MAAM,QAAQ,IAAI;AACxB,QAAO,MAAM,QAAQ,IAAI,GAAG,QAAQ,KAAK;;AAG3C,SAAgB,gBAAgB,WAAmB,UAA2B;CAC5E,MAAM,MAAM,YAAY,QAAQ,IAAI;AACpC,QAAO,MAAM,QAAQ,WAAW,IAAI,GAAG,QAAQ,WAAW,UAAU"}
1
+ {"version":3,"file":"paths.js","names":[],"sources":["../../src/lib/paths.ts"],"sourcesContent":["import { dirname, resolve } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport {\n defaultKaizenStateDir,\n kaizenConfigPath,\n kaizenDir,\n kaizenSystemDir,\n kaizenSystemPath,\n kaizenSystemsDir,\n primaryWorktreeRoot,\n resolveKaizenStateDir,\n} from \"../../shared/workspace-paths.js\";\n\nconst HERE = dirname(fileURLToPath(import.meta.url));\n\nexport function packageRoot(): string {\n // src/lib/paths.ts -> package root\n return resolve(HERE, \"..\", \"..\");\n}\n\nexport function templatesDir(): string {\n return resolve(packageRoot(), \"templates\");\n}\n\nexport function workspaceRoot(): string {\n const raw = process.env.KAIZEN_WORKSPACE;\n return raw ? resolve(raw) : process.cwd();\n}\n\nexport {\n defaultKaizenStateDir,\n kaizenConfigPath,\n kaizenDir,\n kaizenSystemDir,\n kaizenSystemPath,\n kaizenSystemsDir,\n primaryWorktreeRoot,\n};\n\nexport function resolveStateDir(workspace: string): string {\n return resolveKaizenStateDir(workspace);\n}\n"],"mappings":";;;;AAaA,MAAM,OAAO,QAAQ,cAAc,OAAO,KAAK,IAAI,CAAC;AAEpD,SAAgB,cAAsB;AAEpC,QAAO,QAAQ,MAAM,MAAM,KAAK;;AAGlC,SAAgB,eAAuB;AACrC,QAAO,QAAQ,aAAa,EAAE,YAAY;;AAG5C,SAAgB,gBAAwB;CACtC,MAAM,MAAM,QAAQ,IAAI;AACxB,QAAO,MAAM,QAAQ,IAAI,GAAG,QAAQ,KAAK;;AAa3C,SAAgB,gBAAgB,WAA2B;AACzD,QAAO,sBAAsB,UAAU"}
@@ -1 +1 @@
1
- {"version":3,"file":"promotion.js","names":[],"sources":["../../src/lib/promotion.ts"],"sourcesContent":["import { bootstrapMeanCI, pairDeltas } from \"./bootstrap.js\";\nimport type { Event, ItemEvent, PromotionEvent } from \"./events.js\";\nimport { readNdjson } from \"./run-dir.js\";\n\nexport interface BaselineSummary {\n run_id: string;\n variant: string;\n score: number;\n events_path: string; // path to events.jsonl, used to lazily read item scores\n}\n\nexport type PromotionDecision = Omit<PromotionEvent, \"type\" | \"ts\">;\n\n/** Read item scores from a run's events.jsonl. Returns Map<item_id, score>. */\nexport function readItemScores(eventsPath: string): Map<string, number> {\n const events = readNdjson<Event>(eventsPath);\n const scores = new Map<string, number>();\n for (const e of events) {\n if (e.type === \"item\") {\n const it = e;\n scores.set(it.id, it.score);\n }\n }\n return scores;\n}\n\nexport interface DecideOpts {\n /** Set of subgroup keys to guard against regressions. */\n subgroupKeys?: string[];\n /** Bootstrap resamples; default 10k. */\n resamples?: number;\n /** Seed for determinism. */\n seed?: number;\n}\n\n/**\n * Decide auto-promotion given the current run's item scores and an optional baseline.\n * Returns a structured decision suitable for emitting as a `promotion` event.\n */\nexport function decidePromotion(\n currentScores: Map<string, number>,\n currentItemEvents: ItemEvent[],\n baseline: BaselineSummary | null,\n opts: DecideOpts = {},\n): PromotionDecision {\n if (!baseline) {\n return {\n promoted: true,\n rule: \"no_baseline\",\n n_compared: 0,\n details: \"no prior baseline; this run becomes the baseline by default\",\n };\n }\n\n const baselineScores = readItemScores(baseline.events_path);\n const { deltas, matchedIds } = pairDeltas(currentScores, baselineScores);\n if (deltas.length < 5) {\n return {\n promoted: false,\n rule: \"stat_insufficient\",\n n_compared: deltas.length,\n parent_run_id: baseline.run_id,\n details: `only ${deltas.length} items paired with baseline ${baseline.run_id}; need ≥5 to bootstrap`,\n };\n }\n\n const ci = bootstrapMeanCI(deltas, {\n resamples: opts.resamples,\n seed: opts.seed,\n });\n if (!(ci.ci_low > 0)) {\n return {\n promoted: false,\n rule: \"stat_insufficient\",\n ci_low: ci.ci_low,\n ci_high: ci.ci_high,\n mean_delta: ci.mean_delta,\n n_compared: ci.n,\n parent_run_id: baseline.run_id,\n details: `95% CI on mean delta is [${ci.ci_low.toFixed(4)}, ${ci.ci_high.toFixed(4)}]; lower bound is not > 0`,\n };\n }\n\n // Headline gate passed. Now check subgroup guards.\n if (opts.subgroupKeys && opts.subgroupKeys.length > 0) {\n const subgroupOf = (id: string, key: string): string | undefined => {\n for (const ev of currentItemEvents) {\n if (ev.id === id && ev.subgroup) return ev.subgroup[key];\n }\n return undefined;\n };\n for (const key of opts.subgroupKeys) {\n const groups = new Map<string, number[]>();\n for (let i = 0; i < matchedIds.length; i++) {\n const sg = subgroupOf(matchedIds[i], key);\n if (sg === undefined) continue;\n if (!groups.has(sg)) groups.set(sg, []);\n groups.get(sg)!.push(deltas[i]);\n }\n for (const [sg, sgDeltas] of groups) {\n if (sgDeltas.length < 5) continue; // not enough to test; ignore\n const sgCi = bootstrapMeanCI(sgDeltas, {\n resamples: opts.resamples,\n seed: opts.seed,\n });\n if (sgCi.ci_high < 0) {\n return {\n promoted: false,\n rule: \"subgroup_regression\",\n ci_low: ci.ci_low,\n ci_high: ci.ci_high,\n mean_delta: ci.mean_delta,\n n_compared: ci.n,\n parent_run_id: baseline.run_id,\n details: `subgroup ${key}=${sg} regressed: 95% CI [${sgCi.ci_low.toFixed(4)}, ${sgCi.ci_high.toFixed(4)}]`,\n };\n }\n }\n }\n }\n\n return {\n promoted: true,\n rule: \"auto\",\n ci_low: ci.ci_low,\n ci_high: ci.ci_high,\n mean_delta: ci.mean_delta,\n n_compared: ci.n,\n parent_run_id: baseline.run_id,\n };\n}\n"],"mappings":";;;;AAcA,SAAgB,eAAe,YAAyC;CACtE,MAAM,SAAS,WAAkB,WAAW;CAC5C,MAAM,yBAAS,IAAI,KAAqB;AACxC,MAAK,MAAM,KAAK,OACd,KAAI,EAAE,SAAS,QAAQ;EACrB,MAAM,KAAK;AACX,SAAO,IAAI,GAAG,IAAI,GAAG,MAAM;;AAG/B,QAAO;;;;;;AAgBT,SAAgB,gBACd,eACA,mBACA,UACA,OAAmB,EAAE,EACF;AACnB,KAAI,CAAC,SACH,QAAO;EACL,UAAU;EACV,MAAM;EACN,YAAY;EACZ,SAAS;EACV;CAIH,MAAM,EAAE,QAAQ,eAAe,WAAW,eADnB,eAAe,SAAS,YACwB,CAAC;AACxE,KAAI,OAAO,SAAS,EAClB,QAAO;EACL,UAAU;EACV,MAAM;EACN,YAAY,OAAO;EACnB,eAAe,SAAS;EACxB,SAAS,QAAQ,OAAO,OAAO,8BAA8B,SAAS,OAAO;EAC9E;CAGH,MAAM,KAAK,gBAAgB,QAAQ;EACjC,WAAW,KAAK;EAChB,MAAM,KAAK;EACZ,CAAC;AACF,KAAI,EAAE,GAAG,SAAS,GAChB,QAAO;EACL,UAAU;EACV,MAAM;EACN,QAAQ,GAAG;EACX,SAAS,GAAG;EACZ,YAAY,GAAG;EACf,YAAY,GAAG;EACf,eAAe,SAAS;EACxB,SAAS,4BAA4B,GAAG,OAAO,QAAQ,EAAE,CAAC,IAAI,GAAG,QAAQ,QAAQ,EAAE,CAAC;EACrF;AAIH,KAAI,KAAK,gBAAgB,KAAK,aAAa,SAAS,GAAG;EACrD,MAAM,cAAc,IAAY,QAAoC;AAClE,QAAK,MAAM,MAAM,kBACf,KAAI,GAAG,OAAO,MAAM,GAAG,SAAU,QAAO,GAAG,SAAS;;AAIxD,OAAK,MAAM,OAAO,KAAK,cAAc;GACnC,MAAM,yBAAS,IAAI,KAAuB;AAC1C,QAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;IAC1C,MAAM,KAAK,WAAW,WAAW,IAAI,IAAI;AACzC,QAAI,OAAO,KAAA,EAAW;AACtB,QAAI,CAAC,OAAO,IAAI,GAAG,CAAE,QAAO,IAAI,IAAI,EAAE,CAAC;AACvC,WAAO,IAAI,GAAG,CAAE,KAAK,OAAO,GAAG;;AAEjC,QAAK,MAAM,CAAC,IAAI,aAAa,QAAQ;AACnC,QAAI,SAAS,SAAS,EAAG;IACzB,MAAM,OAAO,gBAAgB,UAAU;KACrC,WAAW,KAAK;KAChB,MAAM,KAAK;KACZ,CAAC;AACF,QAAI,KAAK,UAAU,EACjB,QAAO;KACL,UAAU;KACV,MAAM;KACN,QAAQ,GAAG;KACX,SAAS,GAAG;KACZ,YAAY,GAAG;KACf,YAAY,GAAG;KACf,eAAe,SAAS;KACxB,SAAS,YAAY,IAAI,GAAG,GAAG,sBAAsB,KAAK,OAAO,QAAQ,EAAE,CAAC,IAAI,KAAK,QAAQ,QAAQ,EAAE,CAAC;KACzG;;;;AAMT,QAAO;EACL,UAAU;EACV,MAAM;EACN,QAAQ,GAAG;EACX,SAAS,GAAG;EACZ,YAAY,GAAG;EACf,YAAY,GAAG;EACf,eAAe,SAAS;EACzB"}
1
+ {"version":3,"file":"promotion.js","names":[],"sources":["../../src/lib/promotion.ts"],"sourcesContent":["import { bootstrapMeanCI, pairDeltas } from \"./bootstrap.js\";\nimport type { Event, ItemEvent, PromotionEvent } from \"./events.js\";\nimport { readNdjson } from \"./run-dir.js\";\n\nexport interface BaselineSummary {\n run_id: string;\n variant: string;\n score: number;\n events_path: string; // path to events.jsonl, used to lazily read item scores\n}\n\nexport type PromotionDecision = Omit<PromotionEvent, \"type\" | \"ts\">;\n\n/** Read item scores from a run's events.jsonl. Returns Map<item_id, score>. */\nfunction readItemScores(eventsPath: string): Map<string, number> {\n const events = readNdjson<Event>(eventsPath);\n const scores = new Map<string, number>();\n for (const e of events) {\n if (e.type === \"item\") {\n const it = e;\n scores.set(it.id, it.score);\n }\n }\n return scores;\n}\n\nexport interface DecideOpts {\n /** Set of subgroup keys to guard against regressions. */\n subgroupKeys?: string[];\n /** Bootstrap resamples; default 10k. */\n resamples?: number;\n /** Seed for determinism. */\n seed?: number;\n}\n\n/**\n * Decide auto-promotion given the current run's item scores and an optional baseline.\n * Returns a structured decision suitable for emitting as a `promotion` event.\n */\nexport function decidePromotion(\n currentScores: Map<string, number>,\n currentItemEvents: ItemEvent[],\n baseline: BaselineSummary | null,\n opts: DecideOpts = {},\n): PromotionDecision {\n if (!baseline) {\n return {\n promoted: true,\n rule: \"no_baseline\",\n n_compared: 0,\n details: \"no prior baseline; this run becomes the baseline by default\",\n };\n }\n\n const baselineScores = readItemScores(baseline.events_path);\n const { deltas, matchedIds } = pairDeltas(currentScores, baselineScores);\n if (deltas.length < 5) {\n return {\n promoted: false,\n rule: \"stat_insufficient\",\n n_compared: deltas.length,\n parent_run_id: baseline.run_id,\n details: `only ${deltas.length} items paired with baseline ${baseline.run_id}; need ≥5 to bootstrap`,\n };\n }\n\n const ci = bootstrapMeanCI(deltas, {\n resamples: opts.resamples,\n seed: opts.seed,\n });\n if (!(ci.ci_low > 0)) {\n return {\n promoted: false,\n rule: \"stat_insufficient\",\n ci_low: ci.ci_low,\n ci_high: ci.ci_high,\n mean_delta: ci.mean_delta,\n n_compared: ci.n,\n parent_run_id: baseline.run_id,\n details: `95% CI on mean delta is [${ci.ci_low.toFixed(4)}, ${ci.ci_high.toFixed(4)}]; lower bound is not > 0`,\n };\n }\n\n // Headline gate passed. Now check subgroup guards.\n if (opts.subgroupKeys && opts.subgroupKeys.length > 0) {\n const subgroupOf = (id: string, key: string): string | undefined => {\n for (const ev of currentItemEvents) {\n if (ev.id === id && ev.subgroup) return ev.subgroup[key];\n }\n return undefined;\n };\n for (const key of opts.subgroupKeys) {\n const groups = new Map<string, number[]>();\n for (let i = 0; i < matchedIds.length; i++) {\n const sg = subgroupOf(matchedIds[i], key);\n if (sg === undefined) continue;\n if (!groups.has(sg)) groups.set(sg, []);\n groups.get(sg)!.push(deltas[i]);\n }\n for (const [sg, sgDeltas] of groups) {\n if (sgDeltas.length < 5) continue; // not enough to test; ignore\n const sgCi = bootstrapMeanCI(sgDeltas, {\n resamples: opts.resamples,\n seed: opts.seed,\n });\n if (sgCi.ci_high < 0) {\n return {\n promoted: false,\n rule: \"subgroup_regression\",\n ci_low: ci.ci_low,\n ci_high: ci.ci_high,\n mean_delta: ci.mean_delta,\n n_compared: ci.n,\n parent_run_id: baseline.run_id,\n details: `subgroup ${key}=${sg} regressed: 95% CI [${sgCi.ci_low.toFixed(4)}, ${sgCi.ci_high.toFixed(4)}]`,\n };\n }\n }\n }\n }\n\n return {\n promoted: true,\n rule: \"auto\",\n ci_low: ci.ci_low,\n ci_high: ci.ci_high,\n mean_delta: ci.mean_delta,\n n_compared: ci.n,\n parent_run_id: baseline.run_id,\n };\n}\n"],"mappings":";;;;AAcA,SAAS,eAAe,YAAyC;CAC/D,MAAM,SAAS,WAAkB,WAAW;CAC5C,MAAM,yBAAS,IAAI,KAAqB;AACxC,MAAK,MAAM,KAAK,OACd,KAAI,EAAE,SAAS,QAAQ;EACrB,MAAM,KAAK;AACX,SAAO,IAAI,GAAG,IAAI,GAAG,MAAM;;AAG/B,QAAO;;;;;;AAgBT,SAAgB,gBACd,eACA,mBACA,UACA,OAAmB,EAAE,EACF;AACnB,KAAI,CAAC,SACH,QAAO;EACL,UAAU;EACV,MAAM;EACN,YAAY;EACZ,SAAS;EACV;CAIH,MAAM,EAAE,QAAQ,eAAe,WAAW,eADnB,eAAe,SAAS,YACwB,CAAC;AACxE,KAAI,OAAO,SAAS,EAClB,QAAO;EACL,UAAU;EACV,MAAM;EACN,YAAY,OAAO;EACnB,eAAe,SAAS;EACxB,SAAS,QAAQ,OAAO,OAAO,8BAA8B,SAAS,OAAO;EAC9E;CAGH,MAAM,KAAK,gBAAgB,QAAQ;EACjC,WAAW,KAAK;EAChB,MAAM,KAAK;EACZ,CAAC;AACF,KAAI,EAAE,GAAG,SAAS,GAChB,QAAO;EACL,UAAU;EACV,MAAM;EACN,QAAQ,GAAG;EACX,SAAS,GAAG;EACZ,YAAY,GAAG;EACf,YAAY,GAAG;EACf,eAAe,SAAS;EACxB,SAAS,4BAA4B,GAAG,OAAO,QAAQ,EAAE,CAAC,IAAI,GAAG,QAAQ,QAAQ,EAAE,CAAC;EACrF;AAIH,KAAI,KAAK,gBAAgB,KAAK,aAAa,SAAS,GAAG;EACrD,MAAM,cAAc,IAAY,QAAoC;AAClE,QAAK,MAAM,MAAM,kBACf,KAAI,GAAG,OAAO,MAAM,GAAG,SAAU,QAAO,GAAG,SAAS;;AAIxD,OAAK,MAAM,OAAO,KAAK,cAAc;GACnC,MAAM,yBAAS,IAAI,KAAuB;AAC1C,QAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;IAC1C,MAAM,KAAK,WAAW,WAAW,IAAI,IAAI;AACzC,QAAI,OAAO,KAAA,EAAW;AACtB,QAAI,CAAC,OAAO,IAAI,GAAG,CAAE,QAAO,IAAI,IAAI,EAAE,CAAC;AACvC,WAAO,IAAI,GAAG,CAAE,KAAK,OAAO,GAAG;;AAEjC,QAAK,MAAM,CAAC,IAAI,aAAa,QAAQ;AACnC,QAAI,SAAS,SAAS,EAAG;IACzB,MAAM,OAAO,gBAAgB,UAAU;KACrC,WAAW,KAAK;KAChB,MAAM,KAAK;KACZ,CAAC;AACF,QAAI,KAAK,UAAU,EACjB,QAAO;KACL,UAAU;KACV,MAAM;KACN,QAAQ,GAAG;KACX,SAAS,GAAG;KACZ,YAAY,GAAG;KACf,YAAY,GAAG;KACf,eAAe,SAAS;KACxB,SAAS,YAAY,IAAI,GAAG,GAAG,sBAAsB,KAAK,OAAO,QAAQ,EAAE,CAAC,IAAI,KAAK,QAAQ,QAAQ,EAAE,CAAC;KACzG;;;;AAMT,QAAO;EACL,UAAU;EACV,MAAM;EACN,QAAQ,GAAG;EACX,SAAS,GAAG;EACZ,YAAY,GAAG;EACf,YAAY,GAAG;EACf,eAAe,SAAS;EACzB"}
@@ -1,5 +1,5 @@
1
- import { dirname, join } from "node:path";
2
1
  import { appendFileSync, closeSync, existsSync, fsyncSync, mkdirSync, openSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
3
  import { randomBytes } from "node:crypto";
4
4
  //#region src/lib/run-dir.ts
5
5
  function systemRunsDir(stateDir, systemId) {
@@ -1 +1 @@
1
- {"version":3,"file":"run-dir.js","names":[],"sources":["../../src/lib/run-dir.ts"],"sourcesContent":["import { randomBytes } from \"node:crypto\";\nimport {\n appendFileSync,\n closeSync,\n existsSync,\n fsyncSync,\n mkdirSync,\n openSync,\n readFileSync,\n renameSync,\n unlinkSync,\n writeFileSync,\n} from \"node:fs\";\nimport { dirname, join } from \"node:path\";\n\nexport const KAIZEN_DIR = \".kaizen\";\n\nexport function kaizenDir(workspace: string): string {\n return join(workspace, KAIZEN_DIR);\n}\n\nexport function systemRunsDir(stateDir: string, systemId: string): string {\n return join(stateDir, \"runs\", systemId);\n}\n\nexport function runDir(\n stateDir: string,\n systemId: string,\n runId: string,\n): string {\n return join(systemRunsDir(stateDir, systemId), runId);\n}\n\nexport function hypothesesPath(stateDir: string, systemId: string): string {\n return join(stateDir, \"hypotheses\", `${systemId}.jsonl`);\n}\n\nexport function generateRunId(): string {\n // r-<4 hex chars><timestamp suffix>: short, sortable-ish, collision-resistant enough for our scale.\n const ts = Date.now().toString(36);\n const rand = randomBytes(2).toString(\"hex\");\n return `r-${rand}${ts.slice(-6)}`;\n}\n\nexport function ensureDir(p: string): void {\n mkdirSync(p, { recursive: true });\n}\n\n/** Atomic write: write to a temp file in the same dir, fsync, rename over the target. */\nexport function writeFileAtomic(\n path: string,\n contents: string | Uint8Array,\n): void {\n ensureDir(dirname(path));\n const tmp = `${path}.tmp.${process.pid}.${randomBytes(3).toString(\"hex\")}`;\n const fd = openSync(tmp, \"w\", 0o644);\n try {\n writeFileSync(fd, contents);\n fsyncSync(fd);\n } finally {\n closeSync(fd);\n }\n renameSync(tmp, path);\n}\n\nexport function writeJsonAtomic(path: string, value: unknown): void {\n writeFileAtomic(path, JSON.stringify(value, null, 2) + \"\\n\");\n}\n\n/** Append a single NDJSON line. POSIX guarantees writes ≤ PIPE_BUF with O_APPEND are atomic. */\nexport function appendNdjsonLine(path: string, value: unknown): void {\n ensureDir(dirname(path));\n appendFileSync(path, JSON.stringify(value) + \"\\n\");\n}\n\nexport function readJson<T>(path: string): T {\n return JSON.parse(readFileSync(path, \"utf-8\")) as T;\n}\n\nexport function readJsonIfExists<T>(path: string): T | null {\n if (!existsSync(path)) return null;\n return readJson<T>(path);\n}\n\n/** Read an NDJSON file as an array of parsed objects. Tolerates a trailing partial line. */\nexport function readNdjson<T>(path: string): T[] {\n if (!existsSync(path)) return [];\n const text = readFileSync(path, \"utf-8\");\n const out: T[] = [];\n for (const line of text.split(\"\\n\")) {\n if (!line) continue;\n try {\n out.push(JSON.parse(line) as T);\n } catch {\n // Last line may be a partial write if a process is still appending. Skip silently.\n }\n }\n return out;\n}\n\n// --- lock file ---\n\nexport interface LockInfo {\n pid: number;\n startedAt: string;\n}\n\nexport function lockPath(runDirPath: string): string {\n return join(runDirPath, \".lock\");\n}\n\nexport function writeLock(runDirPath: string): void {\n const info: LockInfo = {\n pid: process.pid,\n startedAt: new Date().toISOString(),\n };\n writeJsonAtomic(lockPath(runDirPath), info);\n}\n\nexport function readLock(runDirPath: string): LockInfo | null {\n return readJsonIfExists<LockInfo>(lockPath(runDirPath));\n}\n\nexport function clearLock(runDirPath: string): void {\n const p = lockPath(runDirPath);\n if (existsSync(p)) unlinkSync(p);\n}\n\n/** True if the PID in the lock file is still alive on this machine. */\nexport function isPidAlive(pid: number): boolean {\n try {\n process.kill(pid, 0);\n return true;\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n return code === \"EPERM\"; // exists but we can't signal — still alive\n }\n}\n"],"mappings":";;;;AAqBA,SAAgB,cAAc,UAAkB,UAA0B;AACxE,QAAO,KAAK,UAAU,QAAQ,SAAS;;AAGzC,SAAgB,OACd,UACA,UACA,OACQ;AACR,QAAO,KAAK,cAAc,UAAU,SAAS,EAAE,MAAM;;AAGvD,SAAgB,eAAe,UAAkB,UAA0B;AACzE,QAAO,KAAK,UAAU,cAAc,GAAG,SAAS,QAAQ;;AAG1D,SAAgB,gBAAwB;CAEtC,MAAM,KAAK,KAAK,KAAK,CAAC,SAAS,GAAG;AAElC,QAAO,KADM,YAAY,EAAE,CAAC,SAAS,MACrB,GAAG,GAAG,MAAM,GAAG;;AAGjC,SAAgB,UAAU,GAAiB;AACzC,WAAU,GAAG,EAAE,WAAW,MAAM,CAAC;;;AAInC,SAAgB,gBACd,MACA,UACM;AACN,WAAU,QAAQ,KAAK,CAAC;CACxB,MAAM,MAAM,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG,YAAY,EAAE,CAAC,SAAS,MAAM;CACxE,MAAM,KAAK,SAAS,KAAK,KAAK,IAAM;AACpC,KAAI;AACF,gBAAc,IAAI,SAAS;AAC3B,YAAU,GAAG;WACL;AACR,YAAU,GAAG;;AAEf,YAAW,KAAK,KAAK;;AAGvB,SAAgB,gBAAgB,MAAc,OAAsB;AAClE,iBAAgB,MAAM,KAAK,UAAU,OAAO,MAAM,EAAE,GAAG,KAAK;;;AAI9D,SAAgB,iBAAiB,MAAc,OAAsB;AACnE,WAAU,QAAQ,KAAK,CAAC;AACxB,gBAAe,MAAM,KAAK,UAAU,MAAM,GAAG,KAAK;;AAGpD,SAAgB,SAAY,MAAiB;AAC3C,QAAO,KAAK,MAAM,aAAa,MAAM,QAAQ,CAAC;;AAGhD,SAAgB,iBAAoB,MAAwB;AAC1D,KAAI,CAAC,WAAW,KAAK,CAAE,QAAO;AAC9B,QAAO,SAAY,KAAK;;;AAI1B,SAAgB,WAAc,MAAmB;AAC/C,KAAI,CAAC,WAAW,KAAK,CAAE,QAAO,EAAE;CAChC,MAAM,OAAO,aAAa,MAAM,QAAQ;CACxC,MAAM,MAAW,EAAE;AACnB,MAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,EAAE;AACnC,MAAI,CAAC,KAAM;AACX,MAAI;AACF,OAAI,KAAK,KAAK,MAAM,KAAK,CAAM;UACzB;;AAIV,QAAO;;AAUT,SAAgB,SAAS,YAA4B;AACnD,QAAO,KAAK,YAAY,QAAQ;;AAGlC,SAAgB,UAAU,YAA0B;CAClD,MAAM,OAAiB;EACrB,KAAK,QAAQ;EACb,4BAAW,IAAI,MAAM,EAAC,aAAa;EACpC;AACD,iBAAgB,SAAS,WAAW,EAAE,KAAK;;AAG7C,SAAgB,SAAS,YAAqC;AAC5D,QAAO,iBAA2B,SAAS,WAAW,CAAC;;AAGzD,SAAgB,UAAU,YAA0B;CAClD,MAAM,IAAI,SAAS,WAAW;AAC9B,KAAI,WAAW,EAAE,CAAE,YAAW,EAAE;;;AAIlC,SAAgB,WAAW,KAAsB;AAC/C,KAAI;AACF,UAAQ,KAAK,KAAK,EAAE;AACpB,SAAO;UACA,KAAK;AAEZ,SADc,IAA8B,SAC5B"}
1
+ {"version":3,"file":"run-dir.js","names":[],"sources":["../../src/lib/run-dir.ts"],"sourcesContent":["import { randomBytes } from \"node:crypto\";\nimport {\n appendFileSync,\n closeSync,\n existsSync,\n fsyncSync,\n mkdirSync,\n openSync,\n readFileSync,\n renameSync,\n unlinkSync,\n writeFileSync,\n} from \"node:fs\";\nimport { dirname, join } from \"node:path\";\n\nexport function systemRunsDir(stateDir: string, systemId: string): string {\n return join(stateDir, \"runs\", systemId);\n}\n\nexport function runDir(\n stateDir: string,\n systemId: string,\n runId: string,\n): string {\n return join(systemRunsDir(stateDir, systemId), runId);\n}\n\nexport function hypothesesPath(stateDir: string, systemId: string): string {\n return join(stateDir, \"hypotheses\", `${systemId}.jsonl`);\n}\n\nexport function generateRunId(): string {\n // r-<4 hex chars><timestamp suffix>: short, sortable-ish, collision-resistant enough for our scale.\n const ts = Date.now().toString(36);\n const rand = randomBytes(2).toString(\"hex\");\n return `r-${rand}${ts.slice(-6)}`;\n}\n\nexport function ensureDir(p: string): void {\n mkdirSync(p, { recursive: true });\n}\n\n/** Atomic write: write to a temp file in the same dir, fsync, rename over the target. */\nfunction writeFileAtomic(path: string, contents: string | Uint8Array): void {\n ensureDir(dirname(path));\n const tmp = `${path}.tmp.${process.pid}.${randomBytes(3).toString(\"hex\")}`;\n const fd = openSync(tmp, \"w\", 0o644);\n try {\n writeFileSync(fd, contents);\n fsyncSync(fd);\n } finally {\n closeSync(fd);\n }\n renameSync(tmp, path);\n}\n\nexport function writeJsonAtomic(path: string, value: unknown): void {\n writeFileAtomic(path, JSON.stringify(value, null, 2) + \"\\n\");\n}\n\n/** Append a single NDJSON line. POSIX guarantees writes ≤ PIPE_BUF with O_APPEND are atomic. */\nexport function appendNdjsonLine(path: string, value: unknown): void {\n ensureDir(dirname(path));\n appendFileSync(path, JSON.stringify(value) + \"\\n\");\n}\n\nfunction readJson<T>(path: string): T {\n return JSON.parse(readFileSync(path, \"utf-8\")) as T;\n}\n\nexport function readJsonIfExists<T>(path: string): T | null {\n if (!existsSync(path)) return null;\n return readJson<T>(path);\n}\n\n/** Read an NDJSON file as an array of parsed objects. Tolerates a trailing partial line. */\nexport function readNdjson<T>(path: string): T[] {\n if (!existsSync(path)) return [];\n const text = readFileSync(path, \"utf-8\");\n const out: T[] = [];\n for (const line of text.split(\"\\n\")) {\n if (!line) continue;\n try {\n out.push(JSON.parse(line) as T);\n } catch {\n // Last line may be a partial write if a process is still appending. Skip silently.\n }\n }\n return out;\n}\n\n// --- lock file ---\n\nexport interface LockInfo {\n pid: number;\n startedAt: string;\n}\n\nfunction lockPath(runDirPath: string): string {\n return join(runDirPath, \".lock\");\n}\n\nexport function writeLock(runDirPath: string): void {\n const info: LockInfo = {\n pid: process.pid,\n startedAt: new Date().toISOString(),\n };\n writeJsonAtomic(lockPath(runDirPath), info);\n}\n\nexport function readLock(runDirPath: string): LockInfo | null {\n return readJsonIfExists<LockInfo>(lockPath(runDirPath));\n}\n\nexport function clearLock(runDirPath: string): void {\n const p = lockPath(runDirPath);\n if (existsSync(p)) unlinkSync(p);\n}\n\n/** True if the PID in the lock file is still alive on this machine. */\nexport function isPidAlive(pid: number): boolean {\n try {\n process.kill(pid, 0);\n return true;\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n return code === \"EPERM\"; // exists but we can't signal — still alive\n }\n}\n"],"mappings":";;;;AAeA,SAAgB,cAAc,UAAkB,UAA0B;AACxE,QAAO,KAAK,UAAU,QAAQ,SAAS;;AAGzC,SAAgB,OACd,UACA,UACA,OACQ;AACR,QAAO,KAAK,cAAc,UAAU,SAAS,EAAE,MAAM;;AAGvD,SAAgB,eAAe,UAAkB,UAA0B;AACzE,QAAO,KAAK,UAAU,cAAc,GAAG,SAAS,QAAQ;;AAG1D,SAAgB,gBAAwB;CAEtC,MAAM,KAAK,KAAK,KAAK,CAAC,SAAS,GAAG;AAElC,QAAO,KADM,YAAY,EAAE,CAAC,SAAS,MACrB,GAAG,GAAG,MAAM,GAAG;;AAGjC,SAAgB,UAAU,GAAiB;AACzC,WAAU,GAAG,EAAE,WAAW,MAAM,CAAC;;;AAInC,SAAS,gBAAgB,MAAc,UAAqC;AAC1E,WAAU,QAAQ,KAAK,CAAC;CACxB,MAAM,MAAM,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG,YAAY,EAAE,CAAC,SAAS,MAAM;CACxE,MAAM,KAAK,SAAS,KAAK,KAAK,IAAM;AACpC,KAAI;AACF,gBAAc,IAAI,SAAS;AAC3B,YAAU,GAAG;WACL;AACR,YAAU,GAAG;;AAEf,YAAW,KAAK,KAAK;;AAGvB,SAAgB,gBAAgB,MAAc,OAAsB;AAClE,iBAAgB,MAAM,KAAK,UAAU,OAAO,MAAM,EAAE,GAAG,KAAK;;;AAI9D,SAAgB,iBAAiB,MAAc,OAAsB;AACnE,WAAU,QAAQ,KAAK,CAAC;AACxB,gBAAe,MAAM,KAAK,UAAU,MAAM,GAAG,KAAK;;AAGpD,SAAS,SAAY,MAAiB;AACpC,QAAO,KAAK,MAAM,aAAa,MAAM,QAAQ,CAAC;;AAGhD,SAAgB,iBAAoB,MAAwB;AAC1D,KAAI,CAAC,WAAW,KAAK,CAAE,QAAO;AAC9B,QAAO,SAAY,KAAK;;;AAI1B,SAAgB,WAAc,MAAmB;AAC/C,KAAI,CAAC,WAAW,KAAK,CAAE,QAAO,EAAE;CAChC,MAAM,OAAO,aAAa,MAAM,QAAQ;CACxC,MAAM,MAAW,EAAE;AACnB,MAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,EAAE;AACnC,MAAI,CAAC,KAAM;AACX,MAAI;AACF,OAAI,KAAK,KAAK,MAAM,KAAK,CAAM;UACzB;;AAIV,QAAO;;AAUT,SAAS,SAAS,YAA4B;AAC5C,QAAO,KAAK,YAAY,QAAQ;;AAGlC,SAAgB,UAAU,YAA0B;CAClD,MAAM,OAAiB;EACrB,KAAK,QAAQ;EACb,4BAAW,IAAI,MAAM,EAAC,aAAa;EACpC;AACD,iBAAgB,SAAS,WAAW,EAAE,KAAK;;AAG7C,SAAgB,SAAS,YAAqC;AAC5D,QAAO,iBAA2B,SAAS,WAAW,CAAC;;AAGzD,SAAgB,UAAU,YAA0B;CAClD,MAAM,IAAI,SAAS,WAAW;AAC9B,KAAI,WAAW,EAAE,CAAE,YAAW,EAAE;;;AAIlC,SAAgB,WAAW,KAAsB;AAC/C,KAAI;AACF,UAAQ,KAAK,KAAK,EAAE;AACpB,SAAO;UACA,KAAK;AAEZ,SADc,IAA8B,SAC5B"}
@@ -1,22 +1,23 @@
1
+ import { resolveStateDir } from "./paths.js";
1
2
  import { loadSystem, resolveEvalPath } from "./system.js";
2
- import { NdjsonReader } from "./events.js";
3
3
  import { appendNdjsonLine, clearLock, ensureDir, generateRunId, hypothesesPath, isPidAlive, readJsonIfExists, readLock, runDir, systemRunsDir, writeJsonAtomic, writeLock } from "./run-dir.js";
4
4
  import { currentBaseline, listRuns } from "./leaderboard.js";
5
+ import { NdjsonReader } from "./events.js";
5
6
  import { decidePromotion } from "./promotion.js";
6
- import { extname, join } from "node:path";
7
7
  import { createWriteStream, existsSync, writeFileSync } from "node:fs";
8
- import { arch, hostname, platform } from "node:os";
8
+ import { extname, join } from "node:path";
9
9
  import { execSync, spawn } from "node:child_process";
10
10
  import { createRequire } from "node:module";
11
+ import { arch, hostname, platform } from "node:os";
11
12
  //#region src/lib/runner.ts
12
13
  const FAILURE_K = 10;
13
14
  const require = createRequire(import.meta.url);
14
15
  async function runExperiment(opts) {
15
- const stateDir = opts.stateDir ?? join(opts.workspace, ".kaizen");
16
+ const stateDir = resolveStateDir(opts.workspace);
16
17
  reapStaleRuns(stateDir, opts.systemId);
17
18
  const system = loadSystem(opts.workspace, opts.systemId);
18
19
  const evalAbs = resolveEvalPath(opts.workspace, system);
19
- if (!existsSync(evalAbs)) throw new Error(`eval script not found at ${evalAbs} (declared as run_eval: ${system.frontmatter.run_eval} in systems/${opts.systemId}.md)`);
20
+ if (!existsSync(evalAbs)) throw new Error(`eval script not found at ${evalAbs} (declared as run_eval: ${system.frontmatter.run_eval} in kaizen/systems/${opts.systemId}/system.md)`);
20
21
  const interp = detectInterpreter(evalAbs);
21
22
  const runId = generateRunId();
22
23
  const dir = runDir(stateDir, opts.systemId, runId);