@percepta/kaizen 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/README.md +54 -126
  2. package/agent/claude-command.md +23 -0
  3. package/agent/evals.md +41 -0
  4. package/agent/overview.md +53 -0
  5. package/agent/variant-builder.md +22 -0
  6. package/agent/views.md +51 -0
  7. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/BUILD_ID +1 -1
  8. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/build-manifest.json +22 -22
  9. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/prerender-manifest.json +3 -3
  10. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/routes-manifest.json +30 -10
  11. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/27.js +1 -0
  12. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/516.js +8 -0
  13. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/913.js +1 -0
  14. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/middleware-build-manifest.js +1 -1
  15. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/404.html +1 -1
  16. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/500.html +1 -1
  17. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/benchmarks.html +1 -1
  18. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/benchmarks.js.nft.json +1 -1
  19. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data/[[...path]].html +1 -0
  20. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data/[[...path]].js.nft.json +1 -0
  21. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/eval.html +1 -1
  22. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/eval.js.nft.json +1 -1
  23. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments/[[...path]].html +1 -0
  24. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments/[[...path]].js.nft.json +1 -0
  25. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/ideas.html +1 -1
  26. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/ideas.js.nft.json +1 -1
  27. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-action.js +1 -0
  28. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-action.js.nft.json +1 -0
  29. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-item.js +1 -1
  30. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-item.js.nft.json +1 -1
  31. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-mutation.js +1 -0
  32. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-mutation.js.nft.json +1 -0
  33. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset.js +1 -1
  34. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset.js.nft.json +1 -1
  35. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-datasets.js +1 -1
  36. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-datasets.js.nft.json +1 -1
  37. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace.js +1 -1
  38. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace.js.nft.json +1 -1
  39. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-traces.js +1 -0
  40. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-traces.js.nft.json +1 -0
  41. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/linear-ideas.js +2 -2
  42. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/linear-ideas.js.nft.json +1 -1
  43. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-events.js +1 -1
  44. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-events.js.nft.json +1 -1
  45. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-failures.js +1 -1
  46. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-failures.js.nft.json +1 -1
  47. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-traces.js +1 -1
  48. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-traces.js.nft.json +1 -1
  49. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/runs.js +2 -2
  50. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/runs.js.nft.json +1 -1
  51. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/systems.js +2 -2
  52. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/systems.js.nft.json +1 -1
  53. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/trace-renderer.js +1 -1
  54. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/trace-renderer.js.nft.json +1 -1
  55. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/index.html +1 -1
  56. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/index.js.nft.json +1 -1
  57. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages-manifest.json +8 -5
  58. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/SCF0o7YxElB9rzWaOohsA/_buildManifest.js +1 -0
  59. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/253-85c76c34f33c9604.js +8 -0
  60. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{benchmarks-559dc9df52db3af4.js → benchmarks-30a17b7659010b8c.js} +1 -1
  61. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/data/[[...path]]-e5f4083fe9ffe429.js +1 -0
  62. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{eval-3c911ea8744631fd.js → eval-160237a604b47416.js} +1 -1
  63. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/experiments/[[...path]]-91e47a4893093600.js +1 -0
  64. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{ideas-6829a271003150a9.js → ideas-96e58e4624952e26.js} +1 -1
  65. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/{index-1d8b6719f49e4ae0.js → index-d3306bb6f5d7d235.js} +1 -1
  66. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/css/cd3873236eb77caa.css +1 -0
  67. package/dashboard/.next/standalone/packages/kaizen/package.json +5 -3
  68. package/dashboard/.next/standalone/packages/kaizen/shared/workspace-paths.js +84 -0
  69. package/dist/commands/create-view.js +58 -0
  70. package/dist/commands/create-view.js.map +1 -0
  71. package/dist/commands/guide.js +66 -0
  72. package/dist/commands/guide.js.map +1 -0
  73. package/dist/commands/ideas.js +4 -8
  74. package/dist/commands/ideas.js.map +1 -1
  75. package/dist/commands/init-system.js +22 -20
  76. package/dist/commands/init-system.js.map +1 -1
  77. package/dist/commands/init.js +28 -64
  78. package/dist/commands/init.js.map +1 -1
  79. package/dist/commands/log.js +5 -11
  80. package/dist/commands/log.js.map +1 -1
  81. package/dist/commands/rebuild.js +7 -9
  82. package/dist/commands/rebuild.js.map +1 -1
  83. package/dist/commands/run.js +5 -9
  84. package/dist/commands/run.js.map +1 -1
  85. package/dist/commands/studio.js +3 -3
  86. package/dist/commands/studio.js.map +1 -1
  87. package/dist/index.js +17 -21
  88. package/dist/index.js.map +1 -1
  89. package/dist/lib/cli.js +20 -0
  90. package/dist/lib/cli.js.map +1 -0
  91. package/dist/lib/events.js.map +1 -1
  92. package/dist/lib/fs-utils.js +3 -27
  93. package/dist/lib/fs-utils.js.map +1 -1
  94. package/dist/lib/leaderboard.js +1 -1
  95. package/dist/lib/leaderboard.js.map +1 -1
  96. package/dist/lib/paths.js +3 -3
  97. package/dist/lib/paths.js.map +1 -1
  98. package/dist/lib/promotion.js.map +1 -1
  99. package/dist/lib/run-dir.js +1 -1
  100. package/dist/lib/run-dir.js.map +1 -1
  101. package/dist/lib/runner.js +6 -5
  102. package/dist/lib/runner.js.map +1 -1
  103. package/dist/lib/system.js +4 -2
  104. package/dist/lib/system.js.map +1 -1
  105. package/dist/package.js +5 -3
  106. package/dist/shared/view-types.d.ts +67 -0
  107. package/dist/shared/view-types.d.ts.map +1 -0
  108. package/dist/shared/workspace-paths.js +84 -0
  109. package/dist/shared/workspace-paths.js.map +1 -0
  110. package/dist/types.d.ts +3 -30
  111. package/dist/types.d.ts.map +1 -1
  112. package/package.json +5 -3
  113. package/shared/view-types.d.ts +69 -0
  114. package/shared/view-types.js +1 -0
  115. package/shared/workspace-paths.d.ts +19 -0
  116. package/shared/workspace-paths.js +84 -0
  117. package/templates/system/eval.py +13 -6
  118. package/templates/system/eval.ts +11 -5
  119. package/templates/system/rubric.md +1 -1
  120. package/templates/system/system.md +6 -5
  121. package/templates/view/dataset-item.tsx +63 -0
  122. package/templates/view/trace.tsx +10 -0
  123. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/715.js +0 -6
  124. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data.html +0 -1
  125. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data.js.nft.json +0 -1
  126. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments.html +0 -1
  127. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments.js.nft.json +0 -1
  128. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/YpQ-I4VL-aEdQrM5uN7_3/_buildManifest.js +0 -1
  129. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/673-ed4be46027ae7a37.js +0 -6
  130. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/data-644e4280b4c86fe0.js +0 -1
  131. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/experiments-42f31600c2bb47ad.js +0 -1
  132. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/css/b18a6732b96168e1.css +0 -1
  133. package/dist/lib/env.js +0 -2
  134. package/dist/shared/env.js +0 -4
  135. package/templates/workspace/.claude/agents/variant-builder.md +0 -51
  136. package/templates/workspace/.claude/commands/kaizen.md +0 -65
  137. /package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/{YpQ-I4VL-aEdQrM5uN7_3 → SCF0o7YxElB9rzWaOohsA}/_ssgManifest.js +0 -0
@@ -1,39 +1,33 @@
1
+ import { kaizenConfigPath, kaizenDir, kaizenSystemsDir } from "../shared/workspace-paths.js";
2
+ import { resolveStateDir, workspaceRoot } from "../lib/paths.js";
3
+ import { appendIfMissing, ensureDir, writeFileSafely } from "../lib/fs-utils.js";
1
4
  import { boolFlag, parseFlags, strFlag } from "../lib/parse-args.js";
2
- import { templatesDir, workspaceRoot } from "../lib/paths.js";
3
- import { appendIfMissing, copyTreeIfMissing, ensureDir, writeFileSafely } from "../lib/fs-utils.js";
4
5
  import { prompt } from "../lib/prompt.js";
6
+ import { existsSync } from "node:fs";
5
7
  import { join } from "node:path";
6
- import { existsSync, readFileSync, writeFileSync } from "node:fs";
7
8
  //#region src/commands/init.ts
8
9
  async function runInit(argv) {
9
- const { flags } = parseFlags(argv);
10
+ const { positional, flags } = parseFlags(argv);
11
+ if (positional.length > 0) {
12
+ process.stderr.write(`kaizen init does not take positional arguments. Run \`kaizen create system <name>\` to add a system.\n`);
13
+ return 1;
14
+ }
10
15
  const force = boolFlag(flags, "force");
11
16
  const root = workspaceRoot();
12
- const configPath = join(root, "kaizen.config.ts");
17
+ const configPath = kaizenConfigPath(root);
18
+ const stateDir = resolveStateDir(root);
13
19
  if (existsSync(configPath) && !force) {
14
- process.stdout.write(`kaizen.config.ts already exists at ${configPath}\nre-run with --force to overwrite, or run \`kaizen create system <name>\` to add a system.\n`);
20
+ process.stdout.write(`kaizen/config.ts already exists at ${configPath}\nre-run with --force to overwrite, or run \`kaizen create system <name>\` to add a system.\n`);
15
21
  return 1;
16
22
  }
17
23
  process.stdout.write("kaizen init — scaffolding a workspace\n\n");
18
- const customer = strFlag(flags, "customer") ?? await prompt("customer slug (e.g. tc, cbh, janus)", inferCustomerSlug(root));
19
- const vars = {
20
- customer,
21
- customer_name: strFlag(flags, "customer-name") ?? await prompt("customer display name", customer),
22
- langfuse_host: strFlag(flags, "langfuse-host") ?? await prompt("langfuse host URL", "https://langfuse.example.com")
23
- };
24
+ const vars = { customer_name: strFlag(flags, "customer-name") ?? strFlag(flags, "customer") ?? await prompt("customer display name", inferCustomerName(root)) };
25
+ ensureDir(kaizenDir(root));
24
26
  writeFileSafely(configPath, renderConfig(vars), { overwrite: force });
25
- for (const dir of [
26
- "systems",
27
- "rubrics",
28
- "eval",
29
- "views"
30
- ]) {
31
- ensureDir(join(root, dir));
32
- writeFileSafely(join(root, dir, ".gitkeep"), "");
33
- }
34
- ensureDir(join(root, ".kaizen"));
35
- const claudeWritten = copyTreeIfMissing(join(templatesDir(), "workspace", ".claude"), join(root, ".claude"), vars);
36
- writeFileSafely(join(root, ".kaizen", ".gitignore"), [
27
+ ensureDir(kaizenSystemsDir(root));
28
+ writeFileSafely(join(kaizenSystemsDir(root), ".gitkeep"), "");
29
+ ensureDir(stateDir);
30
+ writeFileSafely(join(stateDir, ".gitignore"), [
37
31
  "# Kaizen-managed run state. Runs are large and per-machine; commit only the durable summaries.",
38
32
  "runs/",
39
33
  "dist/",
@@ -41,25 +35,27 @@ async function runInit(argv) {
41
35
  ""
42
36
  ].join("\n"));
43
37
  appendIfMissing(join(root, ".gitignore"), "# Kaizen");
44
- appendIfMissing(join(root, ".gitignore"), ".kaizen/runs/");
45
- appendIfMissing(join(root, ".gitignore"), ".kaizen/dist/");
46
- appendClaudeSection(join(root, "CLAUDE.md"), vars);
38
+ appendIfMissing(join(root, ".gitignore"), "kaizen/.kaizen/runs/");
39
+ appendIfMissing(join(root, ".gitignore"), "kaizen/.kaizen/dist/");
47
40
  process.stdout.write("\nscaffolded:\n");
48
41
  process.stdout.write(` ${rel(root, configPath)}\n`);
49
- process.stdout.write(` systems/, rubrics/, eval/, views/\n`);
50
- process.stdout.write(` .kaizen/.gitignore\n`);
51
- for (const f of claudeWritten) process.stdout.write(` .claude/${f}\n`);
42
+ process.stdout.write(` kaizen/systems/\n`);
43
+ process.stdout.write(` ${rel(root, join(stateDir, ".gitignore"))}\n`);
44
+ process.stdout.write(` package-owned agent guide available via \`kaizen guide\`\n`);
52
45
  process.stdout.write([
53
46
  "",
54
47
  "next:",
48
+ " kaizen guide # print agent instructions from the installed package",
55
49
  " kaizen create system <name> # add your first system",
50
+ " kaizen create view <name> --type trace",
51
+ " kaizen create view <name> --type dataset-item",
56
52
  " kaizen studio # open the dashboard",
57
53
  ""
58
54
  ].join("\n"));
59
55
  return 0;
60
56
  }
61
- function inferCustomerSlug(root) {
62
- return (root.split("/").pop() ?? "").replace(/[^a-z0-9-]/gi, "").toLowerCase() || "customer";
57
+ function inferCustomerName(root) {
58
+ return (root.split("/").pop() ?? "") || "Customer";
63
59
  }
64
60
  function rel(root, p) {
65
61
  return p.startsWith(root) ? p.slice(root.length + 1) : p;
@@ -69,45 +65,13 @@ function renderConfig(vars) {
69
65
 
70
66
  const config: KaizenConfig = {
71
67
  customer: {
72
- slug: ${JSON.stringify(vars.customer)},
73
68
  name: ${JSON.stringify(vars.customer_name)},
74
69
  },
75
- langfuse: {
76
- host: ${JSON.stringify(vars.langfuse_host)},
77
- // Read keys from env. Never commit credentials.
78
- publicKeyEnv: "LANGFUSE_PUBLIC_KEY",
79
- secretKeyEnv: "LANGFUSE_SECRET_KEY",
80
- },
81
- studio: {
82
- port: 6789,
83
- },
84
70
  };
85
71
 
86
72
  export default config;
87
73
  `;
88
74
  }
89
- function appendClaudeSection(path, vars) {
90
- const cur = existsSync(path) ? readFileSync(path, "utf-8") : "";
91
- if (cur.includes("## Kaizen")) return;
92
- const section = [
93
- "## Kaizen",
94
- "",
95
- "This repo uses Kaizen — an automated AI researcher — to evaluate and improve systems.",
96
- "",
97
- "- System definitions live in `systems/*.md`.",
98
- "- Each system has an eval script under `eval/` that emits NDJSON events.",
99
- "- Real Langfuse-backed eval scripts should also link each dataset item to the fresh run trace in a Langfuse dataset run and write the primary metric as a trace score.",
100
- "- Record runs via the supervisor: `kaizen run --system <s> --variant <v>`.",
101
- "- Open the dashboard with `kaizen studio` (default port 6789).",
102
- "- Custom per-system UIs go in `views/<system>/index.tsx`.",
103
- "- Run state lives under `.kaizen/runs/` (gitignored).",
104
- "- The `/kaizen` slash command and the `variant-builder` agent are in `.claude/`.",
105
- "",
106
- `Customer: ${vars.customer_name} (slug: \`${vars.customer}\`)`,
107
- ""
108
- ].join("\n");
109
- writeFileSync(path, cur + (cur.length === 0 || cur.endsWith("\n") ? "" : "\n") + (cur.length > 0 ? "\n" : "") + section);
110
- }
111
75
  //#endregion
112
76
  export { runInit };
113
77
 
@@ -1 +1 @@
1
- {"version":3,"file":"init.js","names":[],"sources":["../../src/commands/init.ts"],"sourcesContent":["import { existsSync, readFileSync, writeFileSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport {\n appendIfMissing,\n copyTreeIfMissing,\n ensureDir,\n writeFileSafely,\n} from \"../lib/fs-utils.js\";\nimport { boolFlag, parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { templatesDir, workspaceRoot } from \"../lib/paths.js\";\nimport { prompt } from \"../lib/prompt.js\";\n\nexport async function runInit(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const force = boolFlag(flags, \"force\");\n const root = workspaceRoot();\n const configPath = join(root, \"kaizen.config.ts\");\n\n if (existsSync(configPath) && !force) {\n process.stdout.write(\n `kaizen.config.ts already exists at ${configPath}\\n` +\n `re-run with --force to overwrite, or run \\`kaizen create system <name>\\` to add a system.\\n`,\n );\n return 1;\n }\n\n process.stdout.write(\"kaizen init — scaffolding a workspace\\n\\n\");\n\n const customer =\n strFlag(flags, \"customer\") ??\n (await prompt(\n \"customer slug (e.g. tc, cbh, janus)\",\n inferCustomerSlug(root),\n ));\n const customerName =\n strFlag(flags, \"customer-name\") ??\n (await prompt(\"customer display name\", customer));\n const langfuseHost =\n strFlag(flags, \"langfuse-host\") ??\n (await prompt(\"langfuse host URL\", \"https://langfuse.example.com\"));\n\n const vars: Record<string, string> = {\n customer,\n customer_name: customerName,\n langfuse_host: langfuseHost,\n };\n\n // 1) kaizen.config.ts\n writeFileSafely(configPath, renderConfig(vars), { overwrite: force });\n\n // 2) directory tree\n for (const dir of [\"systems\", \"rubrics\", \"eval\", \"views\"]) {\n ensureDir(join(root, dir));\n writeFileSafely(join(root, dir, \".gitkeep\"), \"\");\n }\n ensureDir(join(root, \".kaizen\"));\n\n // 3) copy .claude templates (skill + agent)\n const claudeWritten = copyTreeIfMissing(\n join(templatesDir(), \"workspace\", \".claude\"),\n join(root, \".claude\"),\n vars,\n );\n\n // 4) .kaizen/.gitignore — ignore bulky generated state but keep durable summaries.\n writeFileSafely(\n join(root, \".kaizen\", \".gitignore\"),\n [\n \"# Kaizen-managed run state. Runs are large and per-machine; commit only the durable summaries.\",\n \"runs/\",\n \"dist/\",\n \"!hypotheses/\",\n \"\",\n ].join(\"\\n\"),\n );\n\n // 5) project .gitignore additions\n appendIfMissing(join(root, \".gitignore\"), \"# Kaizen\");\n appendIfMissing(join(root, \".gitignore\"), \".kaizen/runs/\");\n appendIfMissing(join(root, \".gitignore\"), \".kaizen/dist/\");\n\n // 6) CLAUDE.md note (append a section if not already present)\n appendClaudeSection(join(root, \"CLAUDE.md\"), vars);\n\n process.stdout.write(\"\\nscaffolded:\\n\");\n process.stdout.write(` ${rel(root, configPath)}\\n`);\n process.stdout.write(` systems/, rubrics/, eval/, views/\\n`);\n process.stdout.write(` .kaizen/.gitignore\\n`);\n for (const f of claudeWritten) process.stdout.write(` .claude/${f}\\n`);\n\n process.stdout.write(\n [\n \"\",\n \"next:\",\n \" kaizen create system <name> # add your first system\",\n \" kaizen studio # open the dashboard\",\n \"\",\n ].join(\"\\n\"),\n );\n\n return 0;\n}\n\nfunction inferCustomerSlug(root: string): string {\n const name = root.split(\"/\").pop() ?? \"\";\n return name.replace(/[^a-z0-9-]/gi, \"\").toLowerCase() || \"customer\";\n}\n\nfunction rel(root: string, p: string): string {\n return p.startsWith(root) ? p.slice(root.length + 1) : p;\n}\n\nfunction renderConfig(vars: Record<string, string>): string {\n // JSON.stringify escapes quotes/backslashes/newlines correctly so user input\n // with characters like O'Brien or paths with backslashes don't produce\n // invalid TypeScript.\n return `import type { KaizenConfig } from \"@percepta/kaizen\";\n\nconst config: KaizenConfig = {\n customer: {\n slug: ${JSON.stringify(vars.customer)},\n name: ${JSON.stringify(vars.customer_name)},\n },\n langfuse: {\n host: ${JSON.stringify(vars.langfuse_host)},\n // Read keys from env. Never commit credentials.\n publicKeyEnv: \"LANGFUSE_PUBLIC_KEY\",\n secretKeyEnv: \"LANGFUSE_SECRET_KEY\",\n },\n studio: {\n port: 6789,\n },\n};\n\nexport default config;\n`;\n}\n\nfunction appendClaudeSection(path: string, vars: Record<string, string>): void {\n const cur = existsSync(path) ? readFileSync(path, \"utf-8\") : \"\";\n if (cur.includes(\"## Kaizen\")) return; // already present, leave alone\n const section = [\n \"## Kaizen\",\n \"\",\n \"This repo uses Kaizen — an automated AI researcher — to evaluate and improve systems.\",\n \"\",\n \"- System definitions live in `systems/*.md`.\",\n \"- Each system has an eval script under `eval/` that emits NDJSON events.\",\n \"- Real Langfuse-backed eval scripts should also link each dataset item to the fresh run trace in a Langfuse dataset run and write the primary metric as a trace score.\",\n \"- Record runs via the supervisor: `kaizen run --system <s> --variant <v>`.\",\n \"- Open the dashboard with `kaizen studio` (default port 6789).\",\n \"- Custom per-system UIs go in `views/<system>/index.tsx`.\",\n \"- Run state lives under `.kaizen/runs/` (gitignored).\",\n \"- The `/kaizen` slash command and the `variant-builder` agent are in `.claude/`.\",\n \"\",\n `Customer: ${vars.customer_name} (slug: \\`${vars.customer}\\`)`,\n \"\",\n ].join(\"\\n\");\n const sep = cur.length === 0 || cur.endsWith(\"\\n\") ? \"\" : \"\\n\";\n writeFileSync(path, cur + sep + (cur.length > 0 ? \"\\n\" : \"\") + section);\n}\n"],"mappings":";;;;;;;AAYA,eAAsB,QAAQ,MAAiC;CAC7D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,QAAQ,SAAS,OAAO,QAAQ;CACtC,MAAM,OAAO,eAAe;CAC5B,MAAM,aAAa,KAAK,MAAM,mBAAmB;AAEjD,KAAI,WAAW,WAAW,IAAI,CAAC,OAAO;AACpC,UAAQ,OAAO,MACb,sCAAsC,WAAW,+FAElD;AACD,SAAO;;AAGT,SAAQ,OAAO,MAAM,4CAA4C;CAEjE,MAAM,WACJ,QAAQ,OAAO,WAAW,IACzB,MAAM,OACL,uCACA,kBAAkB,KAAK,CACxB;CAQH,MAAM,OAA+B;EACnC;EACA,eARA,QAAQ,OAAO,gBAAgB,IAC9B,MAAM,OAAO,yBAAyB,SAAS;EAQhD,eANA,QAAQ,OAAO,gBAAgB,IAC9B,MAAM,OAAO,qBAAqB,+BAA+B;EAMnE;AAGD,iBAAgB,YAAY,aAAa,KAAK,EAAE,EAAE,WAAW,OAAO,CAAC;AAGrE,MAAK,MAAM,OAAO;EAAC;EAAW;EAAW;EAAQ;EAAQ,EAAE;AACzD,YAAU,KAAK,MAAM,IAAI,CAAC;AAC1B,kBAAgB,KAAK,MAAM,KAAK,WAAW,EAAE,GAAG;;AAElD,WAAU,KAAK,MAAM,UAAU,CAAC;CAGhC,MAAM,gBAAgB,kBACpB,KAAK,cAAc,EAAE,aAAa,UAAU,EAC5C,KAAK,MAAM,UAAU,EACrB,KACD;AAGD,iBACE,KAAK,MAAM,WAAW,aAAa,EACnC;EACE;EACA;EACA;EACA;EACA;EACD,CAAC,KAAK,KAAK,CACb;AAGD,iBAAgB,KAAK,MAAM,aAAa,EAAE,WAAW;AACrD,iBAAgB,KAAK,MAAM,aAAa,EAAE,gBAAgB;AAC1D,iBAAgB,KAAK,MAAM,aAAa,EAAE,gBAAgB;AAG1D,qBAAoB,KAAK,MAAM,YAAY,EAAE,KAAK;AAElD,SAAQ,OAAO,MAAM,kBAAkB;AACvC,SAAQ,OAAO,MAAM,KAAK,IAAI,MAAM,WAAW,CAAC,IAAI;AACpD,SAAQ,OAAO,MAAM,wCAAwC;AAC7D,SAAQ,OAAO,MAAM,yBAAyB;AAC9C,MAAK,MAAM,KAAK,cAAe,SAAQ,OAAO,MAAM,aAAa,EAAE,IAAI;AAEvE,SAAQ,OAAO,MACb;EACE;EACA;EACA;EACA;EACA;EACD,CAAC,KAAK,KAAK,CACb;AAED,QAAO;;AAGT,SAAS,kBAAkB,MAAsB;AAE/C,SADa,KAAK,MAAM,IAAI,CAAC,KAAK,IAAI,IAC1B,QAAQ,gBAAgB,GAAG,CAAC,aAAa,IAAI;;AAG3D,SAAS,IAAI,MAAc,GAAmB;AAC5C,QAAO,EAAE,WAAW,KAAK,GAAG,EAAE,MAAM,KAAK,SAAS,EAAE,GAAG;;AAGzD,SAAS,aAAa,MAAsC;AAI1D,QAAO;;;;YAIG,KAAK,UAAU,KAAK,SAAS,CAAC;YAC9B,KAAK,UAAU,KAAK,cAAc,CAAC;;;YAGnC,KAAK,UAAU,KAAK,cAAc,CAAC;;;;;;;;;;;;;AAc/C,SAAS,oBAAoB,MAAc,MAAoC;CAC7E,MAAM,MAAM,WAAW,KAAK,GAAG,aAAa,MAAM,QAAQ,GAAG;AAC7D,KAAI,IAAI,SAAS,YAAY,CAAE;CAC/B,MAAM,UAAU;EACd;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA,aAAa,KAAK,cAAc,YAAY,KAAK,SAAS;EAC1D;EACD,CAAC,KAAK,KAAK;AAEZ,eAAc,MAAM,OADR,IAAI,WAAW,KAAK,IAAI,SAAS,KAAK,GAAG,KAAK,SACzB,IAAI,SAAS,IAAI,OAAO,MAAM,QAAQ"}
1
+ {"version":3,"file":"init.js","names":[],"sources":["../../src/commands/init.ts"],"sourcesContent":["import { existsSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport {\n appendIfMissing,\n ensureDir,\n writeFileSafely,\n} from \"../lib/fs-utils.js\";\nimport { boolFlag, parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport {\n kaizenConfigPath,\n kaizenDir,\n kaizenSystemsDir,\n resolveStateDir,\n workspaceRoot,\n} from \"../lib/paths.js\";\nimport { prompt } from \"../lib/prompt.js\";\n\nexport async function runInit(argv: string[]): Promise<number> {\n const { positional, flags } = parseFlags(argv);\n if (positional.length > 0) {\n process.stderr.write(\n `kaizen init does not take positional arguments. Run \\`kaizen create system <name>\\` to add a system.\\n`,\n );\n return 1;\n }\n const force = boolFlag(flags, \"force\");\n const root = workspaceRoot();\n const configPath = kaizenConfigPath(root);\n const stateDir = resolveStateDir(root);\n\n if (existsSync(configPath) && !force) {\n process.stdout.write(\n `kaizen/config.ts already exists at ${configPath}\\n` +\n `re-run with --force to overwrite, or run \\`kaizen create system <name>\\` to add a system.\\n`,\n );\n return 1;\n }\n\n process.stdout.write(\"kaizen init — scaffolding a workspace\\n\\n\");\n\n const customerName =\n strFlag(flags, \"customer-name\") ??\n strFlag(flags, \"customer\") ??\n (await prompt(\"customer display name\", inferCustomerName(root)));\n\n const vars: Record<string, string> = {\n customer_name: customerName,\n };\n\n // 1) kaizen/config.ts\n ensureDir(kaizenDir(root));\n writeFileSafely(configPath, renderConfig(vars), { overwrite: force });\n\n // 2) directory tree\n ensureDir(kaizenSystemsDir(root));\n writeFileSafely(join(kaizenSystemsDir(root), \".gitkeep\"), \"\");\n ensureDir(stateDir);\n\n // 3) kaizen/.kaizen/.gitignore — ignore bulky generated state but keep durable summaries.\n writeFileSafely(\n join(stateDir, \".gitignore\"),\n [\n \"# Kaizen-managed run state. Runs are large and per-machine; commit only the durable summaries.\",\n \"runs/\",\n \"dist/\",\n \"!hypotheses/\",\n \"\",\n ].join(\"\\n\"),\n );\n\n // 4) project .gitignore additions\n appendIfMissing(join(root, \".gitignore\"), \"# Kaizen\");\n appendIfMissing(join(root, \".gitignore\"), \"kaizen/.kaizen/runs/\");\n appendIfMissing(join(root, \".gitignore\"), \"kaizen/.kaizen/dist/\");\n\n process.stdout.write(\"\\nscaffolded:\\n\");\n process.stdout.write(` ${rel(root, configPath)}\\n`);\n process.stdout.write(` kaizen/systems/\\n`);\n process.stdout.write(` ${rel(root, join(stateDir, \".gitignore\"))}\\n`);\n process.stdout.write(\n ` package-owned agent guide available via \\`kaizen guide\\`\\n`,\n );\n\n process.stdout.write(\n [\n \"\",\n \"next:\",\n \" kaizen guide # print agent instructions from the installed package\",\n \" kaizen create system <name> # add your first system\",\n \" kaizen create view <name> --type trace\",\n \" kaizen create view <name> --type dataset-item\",\n \" kaizen studio # open the dashboard\",\n \"\",\n ].join(\"\\n\"),\n );\n\n return 0;\n}\n\nfunction inferCustomerName(root: string): string {\n const name = root.split(\"/\").pop() ?? \"\";\n return name || \"Customer\";\n}\n\nfunction rel(root: string, p: string): string {\n return p.startsWith(root) ? p.slice(root.length + 1) : p;\n}\n\nfunction renderConfig(vars: Record<string, string>): string {\n // JSON.stringify escapes quotes/backslashes/newlines correctly so user input\n // with characters like O'Brien or paths with backslashes don't produce\n // invalid TypeScript.\n return `import type { KaizenConfig } from \"@percepta/kaizen\";\n\nconst config: KaizenConfig = {\n customer: {\n name: ${JSON.stringify(vars.customer_name)},\n },\n};\n\nexport default config;\n`;\n}\n"],"mappings":";;;;;;;;AAiBA,eAAsB,QAAQ,MAAiC;CAC7D,MAAM,EAAE,YAAY,UAAU,WAAW,KAAK;AAC9C,KAAI,WAAW,SAAS,GAAG;AACzB,UAAQ,OAAO,MACb,yGACD;AACD,SAAO;;CAET,MAAM,QAAQ,SAAS,OAAO,QAAQ;CACtC,MAAM,OAAO,eAAe;CAC5B,MAAM,aAAa,iBAAiB,KAAK;CACzC,MAAM,WAAW,gBAAgB,KAAK;AAEtC,KAAI,WAAW,WAAW,IAAI,CAAC,OAAO;AACpC,UAAQ,OAAO,MACb,sCAAsC,WAAW,+FAElD;AACD,SAAO;;AAGT,SAAQ,OAAO,MAAM,4CAA4C;CAOjE,MAAM,OAA+B,EACnC,eALA,QAAQ,OAAO,gBAAgB,IAC/B,QAAQ,OAAO,WAAW,IACzB,MAAM,OAAO,yBAAyB,kBAAkB,KAAK,CAAC,EAIhE;AAGD,WAAU,UAAU,KAAK,CAAC;AAC1B,iBAAgB,YAAY,aAAa,KAAK,EAAE,EAAE,WAAW,OAAO,CAAC;AAGrE,WAAU,iBAAiB,KAAK,CAAC;AACjC,iBAAgB,KAAK,iBAAiB,KAAK,EAAE,WAAW,EAAE,GAAG;AAC7D,WAAU,SAAS;AAGnB,iBACE,KAAK,UAAU,aAAa,EAC5B;EACE;EACA;EACA;EACA;EACA;EACD,CAAC,KAAK,KAAK,CACb;AAGD,iBAAgB,KAAK,MAAM,aAAa,EAAE,WAAW;AACrD,iBAAgB,KAAK,MAAM,aAAa,EAAE,uBAAuB;AACjE,iBAAgB,KAAK,MAAM,aAAa,EAAE,uBAAuB;AAEjE,SAAQ,OAAO,MAAM,kBAAkB;AACvC,SAAQ,OAAO,MAAM,KAAK,IAAI,MAAM,WAAW,CAAC,IAAI;AACpD,SAAQ,OAAO,MAAM,sBAAsB;AAC3C,SAAQ,OAAO,MAAM,KAAK,IAAI,MAAM,KAAK,UAAU,aAAa,CAAC,CAAC,IAAI;AACtE,SAAQ,OAAO,MACb,+DACD;AAED,SAAQ,OAAO,MACb;EACE;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACD,CAAC,KAAK,KAAK,CACb;AAED,QAAO;;AAGT,SAAS,kBAAkB,MAAsB;AAE/C,SADa,KAAK,MAAM,IAAI,CAAC,KAAK,IAAI,OACvB;;AAGjB,SAAS,IAAI,MAAc,GAAmB;AAC5C,QAAO,EAAE,WAAW,KAAK,GAAG,EAAE,MAAM,KAAK,SAAS,EAAE,GAAG;;AAGzD,SAAS,aAAa,MAAsC;AAI1D,QAAO;;;;YAIG,KAAK,UAAU,KAAK,cAAc,CAAC"}
@@ -1,18 +1,16 @@
1
- import { boolFlag, parseFlags, strFlag } from "../lib/parse-args.js";
2
1
  import { resolveStateDir, workspaceRoot } from "../lib/paths.js";
2
+ import { pad, requireKaizenWorkspace } from "../lib/cli.js";
3
+ import { boolFlag, parseFlags, strFlag } from "../lib/parse-args.js";
3
4
  import { currentBaseline, listRuns } from "../lib/leaderboard.js";
4
5
  import { reapStaleRuns } from "../lib/runner.js";
5
- import { join } from "node:path";
6
6
  import { existsSync, readdirSync, statSync } from "node:fs";
7
+ import { join } from "node:path";
7
8
  //#region src/commands/log.ts
8
9
  async function runLog(argv) {
9
10
  const { flags } = parseFlags(argv);
10
11
  const root = workspaceRoot();
11
- const stateDir = resolveStateDir(root, strFlag(flags, "state-dir"));
12
- if (!existsSync(join(root, "kaizen.config.ts"))) {
13
- process.stderr.write(`no kaizen.config.ts in ${root}. run \`kaizen init\` first.\n`);
14
- return 1;
15
- }
12
+ const stateDir = resolveStateDir(root);
13
+ if (!requireKaizenWorkspace(root)) return 1;
16
14
  const single = strFlag(flags, "system");
17
15
  const n = Number(strFlag(flags, "n") ?? "10");
18
16
  const json = boolFlag(flags, "json");
@@ -102,10 +100,6 @@ function printSystem(stateDir, systemId, n) {
102
100
  process.stdout.write(pad(String(idx), 4) + pad(r.run_id, 14) + pad(r.variant, 22) + pad(score, 8) + pad(r.status, 11) + pad(promoted, 9) + pad(r.started_at.replace("T", " ").slice(0, 19), 21) + (r.hypothesis || "") + "\n");
103
101
  }
104
102
  }
105
- function pad(s, w) {
106
- if (s.length >= w) return s.slice(0, w - 1) + " ";
107
- return s + " ".repeat(w - s.length);
108
- }
109
103
  //#endregion
110
104
  export { runLog };
111
105
 
@@ -1 +1 @@
1
- {"version":3,"file":"log.js","names":[],"sources":["../../src/commands/log.ts"],"sourcesContent":["import { existsSync, readdirSync, statSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport {\n type RunSummary,\n currentBaseline,\n listRuns,\n} from \"../lib/leaderboard.js\";\nimport { boolFlag, parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\nimport { reapStaleRuns } from \"../lib/runner.js\";\n\nexport async function runLog(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root, strFlag(flags, \"state-dir\"));\n\n if (!existsSync(join(root, \"kaizen.config.ts\"))) {\n process.stderr.write(\n `no kaizen.config.ts in ${root}. run \\`kaizen init\\` first.\\n`,\n );\n return 1;\n }\n\n const single = strFlag(flags, \"system\");\n const n = Number(strFlag(flags, \"n\") ?? \"10\");\n const json = boolFlag(flags, \"json\");\n\n const systems = single ? [single] : discoverSystems(stateDir);\n if (systems.length === 0) {\n process.stdout.write(\"no systems found.\\n\");\n return 0;\n }\n\n if (json) {\n const out = systems.map((s) => buildSystemReport(stateDir, s, n));\n process.stdout.write(JSON.stringify(out, null, 2) + \"\\n\");\n return 0;\n }\n\n for (let i = 0; i < systems.length; i++) {\n if (i > 0) process.stdout.write(\"\\n\");\n printSystem(stateDir, systems[i], n);\n }\n return 0;\n}\n\nfunction discoverSystems(stateDir: string): string[] {\n const runsRoot = join(stateDir, \"runs\");\n if (!existsSync(runsRoot)) return [];\n return readdirSync(runsRoot).filter((d) => {\n try {\n return statSync(join(runsRoot, d)).isDirectory();\n } catch {\n return false;\n }\n });\n}\n\ninterface SystemReport {\n system: string;\n baseline: {\n run_id: string;\n variant: string;\n score: number;\n n: number | null;\n eval_version: number;\n dataset_version: string;\n } | null;\n totals: {\n all: number;\n complete: number;\n crashed: number;\n aborted: number;\n running: number;\n };\n recent: RunSummary[];\n}\n\nfunction buildSystemReport(\n stateDir: string,\n systemId: string,\n n: number,\n): SystemReport {\n reapStaleRuns(stateDir, systemId);\n const runs = listRuns(stateDir, systemId);\n const baseline = pickBaseline(runs);\n const totals = countByStatus(runs);\n const recent = runs.slice(0, n);\n return {\n system: systemId,\n baseline: baseline\n ? {\n run_id: baseline.run_id,\n variant: baseline.variant,\n score: baseline.score!,\n n: baseline.n,\n eval_version: baseline.eval_version,\n dataset_version: baseline.dataset_version,\n }\n : null,\n totals,\n recent,\n };\n}\n\nfunction pickBaseline(runs: RunSummary[]): RunSummary | null {\n // Use the eval/dataset versions of the most recent complete run as the \"current\" anchor.\n const newest = runs.find((r) => r.status === \"complete\");\n if (!newest) return null;\n return currentBaseline(runs, newest.eval_version, newest.dataset_version);\n}\n\nfunction countByStatus(runs: RunSummary[]) {\n const c = {\n all: runs.length,\n complete: 0,\n crashed: 0,\n aborted: 0,\n running: 0,\n };\n for (const r of runs) {\n if (r.status === \"complete\") c.complete++;\n else if (r.status === \"crashed\") c.crashed++;\n else if (r.status === \"aborted\") c.aborted++;\n else if (r.status === \"running\") c.running++;\n }\n return c;\n}\n\nfunction printSystem(stateDir: string, systemId: string, n: number): void {\n const report = buildSystemReport(stateDir, systemId, n);\n const baseHeader = report.baseline\n ? `promoted baseline: ${report.baseline.run_id} ${report.baseline.variant} (score ${report.baseline.score.toFixed(4)}, n=${report.baseline.n ?? \"?\"}, eval_v=${report.baseline.eval_version}, dataset=${report.baseline.dataset_version})`\n : \"promoted baseline: <none>\";\n process.stdout.write(`system: ${systemId} ${baseHeader}\\n`);\n const t = report.totals;\n process.stdout.write(\n `total runs: ${t.all} completed: ${t.complete} crashed: ${t.crashed} aborted: ${t.aborted} running: ${t.running}\\n`,\n );\n if (report.recent.length === 0) {\n process.stdout.write(\"(no runs yet)\\n\");\n return;\n }\n process.stdout.write(\"\\n\");\n process.stdout.write(\n pad(\"#\", 4) +\n pad(\"run_id\", 14) +\n pad(\"variant\", 22) +\n pad(\"score\", 8) +\n pad(\"status\", 11) +\n pad(\"promoted\", 9) +\n pad(\"started_at\", 21) +\n \"hypothesis\\n\",\n );\n for (let i = 0; i < report.recent.length; i++) {\n const r = report.recent[i];\n const idx = report.totals.all - i;\n const score = r.score === null ? \"—\" : r.score.toFixed(3);\n const promoted = r.promoted === null ? \"—\" : r.promoted ? \"yes\" : \"no\";\n process.stdout.write(\n pad(String(idx), 4) +\n pad(r.run_id, 14) +\n pad(r.variant, 22) +\n pad(score, 8) +\n pad(r.status, 11) +\n pad(promoted, 9) +\n pad(r.started_at.replace(\"T\", \" \").slice(0, 19), 21) +\n (r.hypothesis || \"\") +\n \"\\n\",\n );\n }\n}\n\nfunction pad(s: string, w: number): string {\n if (s.length >= w) return s.slice(0, w - 1) + \" \";\n return s + \" \".repeat(w - s.length);\n}\n"],"mappings":";;;;;;;AAWA,eAAsB,OAAO,MAAiC;CAC5D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,MAAM,QAAQ,OAAO,YAAY,CAAC;AAEnE,KAAI,CAAC,WAAW,KAAK,MAAM,mBAAmB,CAAC,EAAE;AAC/C,UAAQ,OAAO,MACb,0BAA0B,KAAK,gCAChC;AACD,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,IAAI,OAAO,QAAQ,OAAO,IAAI,IAAI,KAAK;CAC7C,MAAM,OAAO,SAAS,OAAO,OAAO;CAEpC,MAAM,UAAU,SAAS,CAAC,OAAO,GAAG,gBAAgB,SAAS;AAC7D,KAAI,QAAQ,WAAW,GAAG;AACxB,UAAQ,OAAO,MAAM,sBAAsB;AAC3C,SAAO;;AAGT,KAAI,MAAM;EACR,MAAM,MAAM,QAAQ,KAAK,MAAM,kBAAkB,UAAU,GAAG,EAAE,CAAC;AACjE,UAAQ,OAAO,MAAM,KAAK,UAAU,KAAK,MAAM,EAAE,GAAG,KAAK;AACzD,SAAO;;AAGT,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,MAAI,IAAI,EAAG,SAAQ,OAAO,MAAM,KAAK;AACrC,cAAY,UAAU,QAAQ,IAAI,EAAE;;AAEtC,QAAO;;AAGT,SAAS,gBAAgB,UAA4B;CACnD,MAAM,WAAW,KAAK,UAAU,OAAO;AACvC,KAAI,CAAC,WAAW,SAAS,CAAE,QAAO,EAAE;AACpC,QAAO,YAAY,SAAS,CAAC,QAAQ,MAAM;AACzC,MAAI;AACF,UAAO,SAAS,KAAK,UAAU,EAAE,CAAC,CAAC,aAAa;UAC1C;AACN,UAAO;;GAET;;AAuBJ,SAAS,kBACP,UACA,UACA,GACc;AACd,eAAc,UAAU,SAAS;CACjC,MAAM,OAAO,SAAS,UAAU,SAAS;CACzC,MAAM,WAAW,aAAa,KAAK;CACnC,MAAM,SAAS,cAAc,KAAK;CAClC,MAAM,SAAS,KAAK,MAAM,GAAG,EAAE;AAC/B,QAAO;EACL,QAAQ;EACR,UAAU,WACN;GACE,QAAQ,SAAS;GACjB,SAAS,SAAS;GAClB,OAAO,SAAS;GAChB,GAAG,SAAS;GACZ,cAAc,SAAS;GACvB,iBAAiB,SAAS;GAC3B,GACD;EACJ;EACA;EACD;;AAGH,SAAS,aAAa,MAAuC;CAE3D,MAAM,SAAS,KAAK,MAAM,MAAM,EAAE,WAAW,WAAW;AACxD,KAAI,CAAC,OAAQ,QAAO;AACpB,QAAO,gBAAgB,MAAM,OAAO,cAAc,OAAO,gBAAgB;;AAG3E,SAAS,cAAc,MAAoB;CACzC,MAAM,IAAI;EACR,KAAK,KAAK;EACV,UAAU;EACV,SAAS;EACT,SAAS;EACT,SAAS;EACV;AACD,MAAK,MAAM,KAAK,KACd,KAAI,EAAE,WAAW,WAAY,GAAE;UACtB,EAAE,WAAW,UAAW,GAAE;UAC1B,EAAE,WAAW,UAAW,GAAE;UAC1B,EAAE,WAAW,UAAW,GAAE;AAErC,QAAO;;AAGT,SAAS,YAAY,UAAkB,UAAkB,GAAiB;CACxE,MAAM,SAAS,kBAAkB,UAAU,UAAU,EAAE;CACvD,MAAM,aAAa,OAAO,WACtB,sBAAsB,OAAO,SAAS,OAAO,GAAG,OAAO,SAAS,QAAQ,UAAU,OAAO,SAAS,MAAM,QAAQ,EAAE,CAAC,MAAM,OAAO,SAAS,KAAK,IAAI,WAAW,OAAO,SAAS,aAAa,YAAY,OAAO,SAAS,gBAAgB,KACtO;AACJ,SAAQ,OAAO,MAAM,WAAW,SAAS,KAAK,WAAW,IAAI;CAC7D,MAAM,IAAI,OAAO;AACjB,SAAQ,OAAO,MACb,eAAe,EAAE,IAAI,gBAAgB,EAAE,SAAS,cAAc,EAAE,QAAQ,cAAc,EAAE,QAAQ,cAAc,EAAE,QAAQ,IACzH;AACD,KAAI,OAAO,OAAO,WAAW,GAAG;AAC9B,UAAQ,OAAO,MAAM,kBAAkB;AACvC;;AAEF,SAAQ,OAAO,MAAM,KAAK;AAC1B,SAAQ,OAAO,MACb,IAAI,KAAK,EAAE,GACT,IAAI,UAAU,GAAG,GACjB,IAAI,WAAW,GAAG,GAClB,IAAI,SAAS,EAAE,GACf,IAAI,UAAU,GAAG,GACjB,IAAI,YAAY,EAAE,GAClB,IAAI,cAAc,GAAG,GACrB,eACH;AACD,MAAK,IAAI,IAAI,GAAG,IAAI,OAAO,OAAO,QAAQ,KAAK;EAC7C,MAAM,IAAI,OAAO,OAAO;EACxB,MAAM,MAAM,OAAO,OAAO,MAAM;EAChC,MAAM,QAAQ,EAAE,UAAU,OAAO,MAAM,EAAE,MAAM,QAAQ,EAAE;EACzD,MAAM,WAAW,EAAE,aAAa,OAAO,MAAM,EAAE,WAAW,QAAQ;AAClE,UAAQ,OAAO,MACb,IAAI,OAAO,IAAI,EAAE,EAAE,GACjB,IAAI,EAAE,QAAQ,GAAG,GACjB,IAAI,EAAE,SAAS,GAAG,GAClB,IAAI,OAAO,EAAE,GACb,IAAI,EAAE,QAAQ,GAAG,GACjB,IAAI,UAAU,EAAE,GAChB,IAAI,EAAE,WAAW,QAAQ,KAAK,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,GAAG,IACnD,EAAE,cAAc,MACjB,KACH;;;AAIL,SAAS,IAAI,GAAW,GAAmB;AACzC,KAAI,EAAE,UAAU,EAAG,QAAO,EAAE,MAAM,GAAG,IAAI,EAAE,GAAG;AAC9C,QAAO,IAAI,IAAI,OAAO,IAAI,EAAE,OAAO"}
1
+ {"version":3,"file":"log.js","names":[],"sources":["../../src/commands/log.ts"],"sourcesContent":["import { existsSync, readdirSync, statSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { pad, requireKaizenWorkspace } from \"../lib/cli.js\";\nimport {\n type RunSummary,\n currentBaseline,\n listRuns,\n} from \"../lib/leaderboard.js\";\nimport { boolFlag, parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\nimport { reapStaleRuns } from \"../lib/runner.js\";\n\nexport async function runLog(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root);\n\n if (!requireKaizenWorkspace(root)) return 1;\n\n const single = strFlag(flags, \"system\");\n const n = Number(strFlag(flags, \"n\") ?? \"10\");\n const json = boolFlag(flags, \"json\");\n\n const systems = single ? [single] : discoverSystems(stateDir);\n if (systems.length === 0) {\n process.stdout.write(\"no systems found.\\n\");\n return 0;\n }\n\n if (json) {\n const out = systems.map((s) => buildSystemReport(stateDir, s, n));\n process.stdout.write(JSON.stringify(out, null, 2) + \"\\n\");\n return 0;\n }\n\n for (let i = 0; i < systems.length; i++) {\n if (i > 0) process.stdout.write(\"\\n\");\n printSystem(stateDir, systems[i], n);\n }\n return 0;\n}\n\nfunction discoverSystems(stateDir: string): string[] {\n const runsRoot = join(stateDir, \"runs\");\n if (!existsSync(runsRoot)) return [];\n return readdirSync(runsRoot).filter((d) => {\n try {\n return statSync(join(runsRoot, d)).isDirectory();\n } catch {\n return false;\n }\n });\n}\n\ninterface SystemReport {\n system: string;\n baseline: {\n run_id: string;\n variant: string;\n score: number;\n n: number | null;\n eval_version: number;\n dataset_version: string;\n } | null;\n totals: {\n all: number;\n complete: number;\n crashed: number;\n aborted: number;\n running: number;\n };\n recent: RunSummary[];\n}\n\nfunction buildSystemReport(\n stateDir: string,\n systemId: string,\n n: number,\n): SystemReport {\n reapStaleRuns(stateDir, systemId);\n const runs = listRuns(stateDir, systemId);\n const baseline = pickBaseline(runs);\n const totals = countByStatus(runs);\n const recent = runs.slice(0, n);\n return {\n system: systemId,\n baseline: baseline\n ? {\n run_id: baseline.run_id,\n variant: baseline.variant,\n score: baseline.score!,\n n: baseline.n,\n eval_version: baseline.eval_version,\n dataset_version: baseline.dataset_version,\n }\n : null,\n totals,\n recent,\n };\n}\n\nfunction pickBaseline(runs: RunSummary[]): RunSummary | null {\n // Use the eval/dataset versions of the most recent complete run as the \"current\" anchor.\n const newest = runs.find((r) => r.status === \"complete\");\n if (!newest) return null;\n return currentBaseline(runs, newest.eval_version, newest.dataset_version);\n}\n\nfunction countByStatus(runs: RunSummary[]) {\n const c = {\n all: runs.length,\n complete: 0,\n crashed: 0,\n aborted: 0,\n running: 0,\n };\n for (const r of runs) {\n if (r.status === \"complete\") c.complete++;\n else if (r.status === \"crashed\") c.crashed++;\n else if (r.status === \"aborted\") c.aborted++;\n else if (r.status === \"running\") c.running++;\n }\n return c;\n}\n\nfunction printSystem(stateDir: string, systemId: string, n: number): void {\n const report = buildSystemReport(stateDir, systemId, n);\n const baseHeader = report.baseline\n ? `promoted baseline: ${report.baseline.run_id} ${report.baseline.variant} (score ${report.baseline.score.toFixed(4)}, n=${report.baseline.n ?? \"?\"}, eval_v=${report.baseline.eval_version}, dataset=${report.baseline.dataset_version})`\n : \"promoted baseline: <none>\";\n process.stdout.write(`system: ${systemId} ${baseHeader}\\n`);\n const t = report.totals;\n process.stdout.write(\n `total runs: ${t.all} completed: ${t.complete} crashed: ${t.crashed} aborted: ${t.aborted} running: ${t.running}\\n`,\n );\n if (report.recent.length === 0) {\n process.stdout.write(\"(no runs yet)\\n\");\n return;\n }\n process.stdout.write(\"\\n\");\n process.stdout.write(\n pad(\"#\", 4) +\n pad(\"run_id\", 14) +\n pad(\"variant\", 22) +\n pad(\"score\", 8) +\n pad(\"status\", 11) +\n pad(\"promoted\", 9) +\n pad(\"started_at\", 21) +\n \"hypothesis\\n\",\n );\n for (let i = 0; i < report.recent.length; i++) {\n const r = report.recent[i];\n const idx = report.totals.all - i;\n const score = r.score === null ? \"—\" : r.score.toFixed(3);\n const promoted = r.promoted === null ? \"—\" : r.promoted ? \"yes\" : \"no\";\n process.stdout.write(\n pad(String(idx), 4) +\n pad(r.run_id, 14) +\n pad(r.variant, 22) +\n pad(score, 8) +\n pad(r.status, 11) +\n pad(promoted, 9) +\n pad(r.started_at.replace(\"T\", \" \").slice(0, 19), 21) +\n (r.hypothesis || \"\") +\n \"\\n\",\n );\n }\n}\n"],"mappings":";;;;;;;;AAYA,eAAsB,OAAO,MAAiC;CAC5D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,KAAK;AAEtC,KAAI,CAAC,uBAAuB,KAAK,CAAE,QAAO;CAE1C,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,IAAI,OAAO,QAAQ,OAAO,IAAI,IAAI,KAAK;CAC7C,MAAM,OAAO,SAAS,OAAO,OAAO;CAEpC,MAAM,UAAU,SAAS,CAAC,OAAO,GAAG,gBAAgB,SAAS;AAC7D,KAAI,QAAQ,WAAW,GAAG;AACxB,UAAQ,OAAO,MAAM,sBAAsB;AAC3C,SAAO;;AAGT,KAAI,MAAM;EACR,MAAM,MAAM,QAAQ,KAAK,MAAM,kBAAkB,UAAU,GAAG,EAAE,CAAC;AACjE,UAAQ,OAAO,MAAM,KAAK,UAAU,KAAK,MAAM,EAAE,GAAG,KAAK;AACzD,SAAO;;AAGT,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,MAAI,IAAI,EAAG,SAAQ,OAAO,MAAM,KAAK;AACrC,cAAY,UAAU,QAAQ,IAAI,EAAE;;AAEtC,QAAO;;AAGT,SAAS,gBAAgB,UAA4B;CACnD,MAAM,WAAW,KAAK,UAAU,OAAO;AACvC,KAAI,CAAC,WAAW,SAAS,CAAE,QAAO,EAAE;AACpC,QAAO,YAAY,SAAS,CAAC,QAAQ,MAAM;AACzC,MAAI;AACF,UAAO,SAAS,KAAK,UAAU,EAAE,CAAC,CAAC,aAAa;UAC1C;AACN,UAAO;;GAET;;AAuBJ,SAAS,kBACP,UACA,UACA,GACc;AACd,eAAc,UAAU,SAAS;CACjC,MAAM,OAAO,SAAS,UAAU,SAAS;CACzC,MAAM,WAAW,aAAa,KAAK;CACnC,MAAM,SAAS,cAAc,KAAK;CAClC,MAAM,SAAS,KAAK,MAAM,GAAG,EAAE;AAC/B,QAAO;EACL,QAAQ;EACR,UAAU,WACN;GACE,QAAQ,SAAS;GACjB,SAAS,SAAS;GAClB,OAAO,SAAS;GAChB,GAAG,SAAS;GACZ,cAAc,SAAS;GACvB,iBAAiB,SAAS;GAC3B,GACD;EACJ;EACA;EACD;;AAGH,SAAS,aAAa,MAAuC;CAE3D,MAAM,SAAS,KAAK,MAAM,MAAM,EAAE,WAAW,WAAW;AACxD,KAAI,CAAC,OAAQ,QAAO;AACpB,QAAO,gBAAgB,MAAM,OAAO,cAAc,OAAO,gBAAgB;;AAG3E,SAAS,cAAc,MAAoB;CACzC,MAAM,IAAI;EACR,KAAK,KAAK;EACV,UAAU;EACV,SAAS;EACT,SAAS;EACT,SAAS;EACV;AACD,MAAK,MAAM,KAAK,KACd,KAAI,EAAE,WAAW,WAAY,GAAE;UACtB,EAAE,WAAW,UAAW,GAAE;UAC1B,EAAE,WAAW,UAAW,GAAE;UAC1B,EAAE,WAAW,UAAW,GAAE;AAErC,QAAO;;AAGT,SAAS,YAAY,UAAkB,UAAkB,GAAiB;CACxE,MAAM,SAAS,kBAAkB,UAAU,UAAU,EAAE;CACvD,MAAM,aAAa,OAAO,WACtB,sBAAsB,OAAO,SAAS,OAAO,GAAG,OAAO,SAAS,QAAQ,UAAU,OAAO,SAAS,MAAM,QAAQ,EAAE,CAAC,MAAM,OAAO,SAAS,KAAK,IAAI,WAAW,OAAO,SAAS,aAAa,YAAY,OAAO,SAAS,gBAAgB,KACtO;AACJ,SAAQ,OAAO,MAAM,WAAW,SAAS,KAAK,WAAW,IAAI;CAC7D,MAAM,IAAI,OAAO;AACjB,SAAQ,OAAO,MACb,eAAe,EAAE,IAAI,gBAAgB,EAAE,SAAS,cAAc,EAAE,QAAQ,cAAc,EAAE,QAAQ,cAAc,EAAE,QAAQ,IACzH;AACD,KAAI,OAAO,OAAO,WAAW,GAAG;AAC9B,UAAQ,OAAO,MAAM,kBAAkB;AACvC;;AAEF,SAAQ,OAAO,MAAM,KAAK;AAC1B,SAAQ,OAAO,MACb,IAAI,KAAK,EAAE,GACT,IAAI,UAAU,GAAG,GACjB,IAAI,WAAW,GAAG,GAClB,IAAI,SAAS,EAAE,GACf,IAAI,UAAU,GAAG,GACjB,IAAI,YAAY,EAAE,GAClB,IAAI,cAAc,GAAG,GACrB,eACH;AACD,MAAK,IAAI,IAAI,GAAG,IAAI,OAAO,OAAO,QAAQ,KAAK;EAC7C,MAAM,IAAI,OAAO,OAAO;EACxB,MAAM,MAAM,OAAO,OAAO,MAAM;EAChC,MAAM,QAAQ,EAAE,UAAU,OAAO,MAAM,EAAE,MAAM,QAAQ,EAAE;EACzD,MAAM,WAAW,EAAE,aAAa,OAAO,MAAM,EAAE,WAAW,QAAQ;AAClE,UAAQ,OAAO,MACb,IAAI,OAAO,IAAI,EAAE,EAAE,GACjB,IAAI,EAAE,QAAQ,GAAG,GACjB,IAAI,EAAE,SAAS,GAAG,GAClB,IAAI,OAAO,EAAE,GACb,IAAI,EAAE,QAAQ,GAAG,GACjB,IAAI,UAAU,EAAE,GAChB,IAAI,EAAE,WAAW,QAAQ,KAAK,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,GAAG,IACnD,EAAE,cAAc,MACjB,KACH"}
@@ -1,22 +1,20 @@
1
- import { parseFlags, strFlag } from "../lib/parse-args.js";
2
1
  import { resolveStateDir, workspaceRoot } from "../lib/paths.js";
3
- import { findTerminal } from "../lib/events.js";
2
+ import { requireKaizenWorkspace } from "../lib/cli.js";
3
+ import { parseFlags, strFlag } from "../lib/parse-args.js";
4
4
  import { readJsonIfExists, readNdjson, writeJsonAtomic } from "../lib/run-dir.js";
5
- import { join } from "node:path";
5
+ import { findTerminal } from "../lib/events.js";
6
6
  import { existsSync, readdirSync } from "node:fs";
7
+ import { join } from "node:path";
7
8
  //#region src/commands/rebuild.ts
8
9
  async function runRebuild(argv) {
9
10
  const { flags } = parseFlags(argv);
10
11
  const root = workspaceRoot();
11
- const stateDir = resolveStateDir(root, strFlag(flags, "state-dir"));
12
- if (!existsSync(join(root, "kaizen.config.ts"))) {
13
- process.stderr.write(`no kaizen.config.ts in ${root}. run \`kaizen init\` first.\n`);
14
- return 1;
15
- }
12
+ const stateDir = resolveStateDir(root);
13
+ if (!requireKaizenWorkspace(root)) return 1;
16
14
  const single = strFlag(flags, "system");
17
15
  const runsRoot = join(stateDir, "runs");
18
16
  if (!existsSync(runsRoot)) {
19
- process.stdout.write("nothing to rebuild (no .kaizen/runs/).\n");
17
+ process.stdout.write("nothing to rebuild (no kaizen/.kaizen/runs/).\n");
20
18
  return 0;
21
19
  }
22
20
  const systems = single ? [single] : readdirSync(runsRoot);
@@ -1 +1 @@
1
- {"version":3,"file":"rebuild.js","names":[],"sources":["../../src/commands/rebuild.ts"],"sourcesContent":["import { existsSync, readdirSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { type Event, findTerminal } from \"../lib/events.js\";\nimport type { ManifestFile, StateFile } from \"../lib/leaderboard.js\";\nimport { parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\nimport {\n readJsonIfExists,\n readNdjson,\n writeJsonAtomic,\n} from \"../lib/run-dir.js\";\n\nexport async function runRebuild(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root, strFlag(flags, \"state-dir\"));\n\n if (!existsSync(join(root, \"kaizen.config.ts\"))) {\n process.stderr.write(\n `no kaizen.config.ts in ${root}. run \\`kaizen init\\` first.\\n`,\n );\n return 1;\n }\n\n const single = strFlag(flags, \"system\");\n const runsRoot = join(stateDir, \"runs\");\n if (!existsSync(runsRoot)) {\n process.stdout.write(\"nothing to rebuild (no .kaizen/runs/).\\n\");\n return 0;\n }\n\n const systems = single ? [single] : readdirSync(runsRoot);\n let touched = 0;\n let scanned = 0;\n for (const system of systems) {\n const pdir = join(runsRoot, system);\n if (!existsSync(pdir)) continue;\n for (const runId of readdirSync(pdir)) {\n const dir = join(pdir, runId);\n const manifest = readJsonIfExists<ManifestFile>(\n join(dir, \"manifest.json\"),\n );\n if (!manifest) continue;\n scanned++;\n const events = readNdjson<Event>(join(dir, \"events.jsonl\"));\n const newState = deriveState(manifest, events);\n const oldState = readJsonIfExists<StateFile>(join(dir, \"state.json\"));\n if (JSON.stringify(oldState) === JSON.stringify(newState)) continue;\n writeJsonAtomic(join(dir, \"state.json\"), newState);\n touched++;\n }\n }\n process.stdout.write(\n `rebuild: scanned ${scanned} run(s), updated ${touched} state.json file(s).\\n`,\n );\n return 0;\n}\n\nfunction deriveState(\n manifest: ManifestFile,\n events: Event[],\n): StateFile & {\n system: string;\n variant: string;\n eval_version: number;\n dataset_version: string;\n started_at: string;\n updated_at: string;\n} {\n let nTotal: number | null = null;\n let nDone = 0;\n let score: number | null = null;\n let promoted: boolean | null = null;\n let lastTs: number | null = null;\n for (const e of events) {\n if (e.type === \"start\") nTotal = e.n;\n else if (e.type === \"item\") nDone++;\n else if (e.type === \"complete\") score = e.score;\n else if (e.type === \"promotion\")\n promoted = (e as { promoted?: boolean }).promoted ?? null;\n if (typeof e.ts === \"number\") lastTs = e.ts;\n }\n const terminal = findTerminal(events);\n let status: StateFile[\"status\"] = \"running\";\n if (terminal) {\n if (terminal.type === \"complete\") status = \"complete\";\n else if (terminal.type === \"crashed\") status = \"crashed\";\n else if (terminal.type === \"aborted\") status = \"aborted\";\n }\n const endedAt =\n terminal && lastTs ? new Date(lastTs * 1000).toISOString() : null;\n return {\n run_id: manifest.run_id,\n system: manifest.system,\n variant: manifest.variant,\n status,\n score,\n n_total: nTotal,\n n_done: nDone,\n promoted,\n started_at: manifest.started_at,\n updated_at: endedAt ?? new Date().toISOString(),\n ended_at: endedAt,\n eval_version: manifest.eval_version,\n dataset_version: manifest.dataset_version,\n };\n}\n"],"mappings":";;;;;;;AAYA,eAAsB,WAAW,MAAiC;CAChE,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,MAAM,QAAQ,OAAO,YAAY,CAAC;AAEnE,KAAI,CAAC,WAAW,KAAK,MAAM,mBAAmB,CAAC,EAAE;AAC/C,UAAQ,OAAO,MACb,0BAA0B,KAAK,gCAChC;AACD,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,WAAW,KAAK,UAAU,OAAO;AACvC,KAAI,CAAC,WAAW,SAAS,EAAE;AACzB,UAAQ,OAAO,MAAM,2CAA2C;AAChE,SAAO;;CAGT,MAAM,UAAU,SAAS,CAAC,OAAO,GAAG,YAAY,SAAS;CACzD,IAAI,UAAU;CACd,IAAI,UAAU;AACd,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,OAAO,KAAK,UAAU,OAAO;AACnC,MAAI,CAAC,WAAW,KAAK,CAAE;AACvB,OAAK,MAAM,SAAS,YAAY,KAAK,EAAE;GACrC,MAAM,MAAM,KAAK,MAAM,MAAM;GAC7B,MAAM,WAAW,iBACf,KAAK,KAAK,gBAAgB,CAC3B;AACD,OAAI,CAAC,SAAU;AACf;GAEA,MAAM,WAAW,YAAY,UADd,WAAkB,KAAK,KAAK,eAAe,CACb,CAAC;GAC9C,MAAM,WAAW,iBAA4B,KAAK,KAAK,aAAa,CAAC;AACrE,OAAI,KAAK,UAAU,SAAS,KAAK,KAAK,UAAU,SAAS,CAAE;AAC3D,mBAAgB,KAAK,KAAK,aAAa,EAAE,SAAS;AAClD;;;AAGJ,SAAQ,OAAO,MACb,oBAAoB,QAAQ,mBAAmB,QAAQ,wBACxD;AACD,QAAO;;AAGT,SAAS,YACP,UACA,QAQA;CACA,IAAI,SAAwB;CAC5B,IAAI,QAAQ;CACZ,IAAI,QAAuB;CAC3B,IAAI,WAA2B;CAC/B,IAAI,SAAwB;AAC5B,MAAK,MAAM,KAAK,QAAQ;AACtB,MAAI,EAAE,SAAS,QAAS,UAAS,EAAE;WAC1B,EAAE,SAAS,OAAQ;WACnB,EAAE,SAAS,WAAY,SAAQ,EAAE;WACjC,EAAE,SAAS,YAClB,YAAY,EAA6B,YAAY;AACvD,MAAI,OAAO,EAAE,OAAO,SAAU,UAAS,EAAE;;CAE3C,MAAM,WAAW,aAAa,OAAO;CACrC,IAAI,SAA8B;AAClC,KAAI;MACE,SAAS,SAAS,WAAY,UAAS;WAClC,SAAS,SAAS,UAAW,UAAS;WACtC,SAAS,SAAS,UAAW,UAAS;;CAEjD,MAAM,UACJ,YAAY,0BAAS,IAAI,KAAK,SAAS,IAAK,EAAC,aAAa,GAAG;AAC/D,QAAO;EACL,QAAQ,SAAS;EACjB,QAAQ,SAAS;EACjB,SAAS,SAAS;EAClB;EACA;EACA,SAAS;EACT,QAAQ;EACR;EACA,YAAY,SAAS;EACrB,YAAY,4BAAW,IAAI,MAAM,EAAC,aAAa;EAC/C,UAAU;EACV,cAAc,SAAS;EACvB,iBAAiB,SAAS;EAC3B"}
1
+ {"version":3,"file":"rebuild.js","names":[],"sources":["../../src/commands/rebuild.ts"],"sourcesContent":["import { existsSync, readdirSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { requireKaizenWorkspace } from \"../lib/cli.js\";\nimport { type Event, findTerminal } from \"../lib/events.js\";\nimport type { ManifestFile, StateFile } from \"../lib/leaderboard.js\";\nimport { parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\nimport {\n readJsonIfExists,\n readNdjson,\n writeJsonAtomic,\n} from \"../lib/run-dir.js\";\n\nexport async function runRebuild(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root);\n\n if (!requireKaizenWorkspace(root)) return 1;\n\n const single = strFlag(flags, \"system\");\n const runsRoot = join(stateDir, \"runs\");\n if (!existsSync(runsRoot)) {\n process.stdout.write(\"nothing to rebuild (no kaizen/.kaizen/runs/).\\n\");\n return 0;\n }\n\n const systems = single ? [single] : readdirSync(runsRoot);\n let touched = 0;\n let scanned = 0;\n for (const system of systems) {\n const pdir = join(runsRoot, system);\n if (!existsSync(pdir)) continue;\n for (const runId of readdirSync(pdir)) {\n const dir = join(pdir, runId);\n const manifest = readJsonIfExists<ManifestFile>(\n join(dir, \"manifest.json\"),\n );\n if (!manifest) continue;\n scanned++;\n const events = readNdjson<Event>(join(dir, \"events.jsonl\"));\n const newState = deriveState(manifest, events);\n const oldState = readJsonIfExists<StateFile>(join(dir, \"state.json\"));\n if (JSON.stringify(oldState) === JSON.stringify(newState)) continue;\n writeJsonAtomic(join(dir, \"state.json\"), newState);\n touched++;\n }\n }\n process.stdout.write(\n `rebuild: scanned ${scanned} run(s), updated ${touched} state.json file(s).\\n`,\n );\n return 0;\n}\n\nfunction deriveState(\n manifest: ManifestFile,\n events: Event[],\n): StateFile & {\n system: string;\n variant: string;\n eval_version: number;\n dataset_version: string;\n started_at: string;\n updated_at: string;\n} {\n let nTotal: number | null = null;\n let nDone = 0;\n let score: number | null = null;\n let promoted: boolean | null = null;\n let lastTs: number | null = null;\n for (const e of events) {\n if (e.type === \"start\") nTotal = e.n;\n else if (e.type === \"item\") nDone++;\n else if (e.type === \"complete\") score = e.score;\n else if (e.type === \"promotion\")\n promoted = (e as { promoted?: boolean }).promoted ?? null;\n if (typeof e.ts === \"number\") lastTs = e.ts;\n }\n const terminal = findTerminal(events);\n let status: StateFile[\"status\"] = \"running\";\n if (terminal) {\n if (terminal.type === \"complete\") status = \"complete\";\n else if (terminal.type === \"crashed\") status = \"crashed\";\n else if (terminal.type === \"aborted\") status = \"aborted\";\n }\n const endedAt =\n terminal && lastTs ? new Date(lastTs * 1000).toISOString() : null;\n return {\n run_id: manifest.run_id,\n system: manifest.system,\n variant: manifest.variant,\n status,\n score,\n n_total: nTotal,\n n_done: nDone,\n promoted,\n started_at: manifest.started_at,\n updated_at: endedAt ?? new Date().toISOString(),\n ended_at: endedAt,\n eval_version: manifest.eval_version,\n dataset_version: manifest.dataset_version,\n };\n}\n"],"mappings":";;;;;;;;AAaA,eAAsB,WAAW,MAAiC;CAChE,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,KAAK;AAEtC,KAAI,CAAC,uBAAuB,KAAK,CAAE,QAAO;CAE1C,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,WAAW,KAAK,UAAU,OAAO;AACvC,KAAI,CAAC,WAAW,SAAS,EAAE;AACzB,UAAQ,OAAO,MAAM,kDAAkD;AACvE,SAAO;;CAGT,MAAM,UAAU,SAAS,CAAC,OAAO,GAAG,YAAY,SAAS;CACzD,IAAI,UAAU;CACd,IAAI,UAAU;AACd,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,OAAO,KAAK,UAAU,OAAO;AACnC,MAAI,CAAC,WAAW,KAAK,CAAE;AACvB,OAAK,MAAM,SAAS,YAAY,KAAK,EAAE;GACrC,MAAM,MAAM,KAAK,MAAM,MAAM;GAC7B,MAAM,WAAW,iBACf,KAAK,KAAK,gBAAgB,CAC3B;AACD,OAAI,CAAC,SAAU;AACf;GAEA,MAAM,WAAW,YAAY,UADd,WAAkB,KAAK,KAAK,eAAe,CACb,CAAC;GAC9C,MAAM,WAAW,iBAA4B,KAAK,KAAK,aAAa,CAAC;AACrE,OAAI,KAAK,UAAU,SAAS,KAAK,KAAK,UAAU,SAAS,CAAE;AAC3D,mBAAgB,KAAK,KAAK,aAAa,EAAE,SAAS;AAClD;;;AAGJ,SAAQ,OAAO,MACb,oBAAoB,QAAQ,mBAAmB,QAAQ,wBACxD;AACD,QAAO;;AAGT,SAAS,YACP,UACA,QAQA;CACA,IAAI,SAAwB;CAC5B,IAAI,QAAQ;CACZ,IAAI,QAAuB;CAC3B,IAAI,WAA2B;CAC/B,IAAI,SAAwB;AAC5B,MAAK,MAAM,KAAK,QAAQ;AACtB,MAAI,EAAE,SAAS,QAAS,UAAS,EAAE;WAC1B,EAAE,SAAS,OAAQ;WACnB,EAAE,SAAS,WAAY,SAAQ,EAAE;WACjC,EAAE,SAAS,YAClB,YAAY,EAA6B,YAAY;AACvD,MAAI,OAAO,EAAE,OAAO,SAAU,UAAS,EAAE;;CAE3C,MAAM,WAAW,aAAa,OAAO;CACrC,IAAI,SAA8B;AAClC,KAAI;MACE,SAAS,SAAS,WAAY,UAAS;WAClC,SAAS,SAAS,UAAW,UAAS;WACtC,SAAS,SAAS,UAAW,UAAS;;CAEjD,MAAM,UACJ,YAAY,0BAAS,IAAI,KAAK,SAAS,IAAK,EAAC,aAAa,GAAG;AAC/D,QAAO;EACL,QAAQ,SAAS;EACjB,QAAQ,SAAS;EACjB,SAAS,SAAS;EAClB;EACA;EACA,SAAS;EACT,QAAQ;EACR;EACA,YAAY,SAAS;EACrB,YAAY,4BAAW,IAAI,MAAM,EAAC,aAAa;EAC/C,UAAU;EACV,cAAc,SAAS;EACvB,iBAAiB,SAAS;EAC3B"}
@@ -1,19 +1,16 @@
1
+ import { workspaceRoot } from "../lib/paths.js";
2
+ import { requireKaizenWorkspace } from "../lib/cli.js";
1
3
  import { boolFlag, parseFlags, strFlag } from "../lib/parse-args.js";
2
- import { resolveStateDir, workspaceRoot } from "../lib/paths.js";
3
4
  import { runExperiment } from "../lib/runner.js";
4
5
  import { normalizeLinearIssue } from "../shared/linear-issue.js";
5
- import { dirname, join, resolve } from "node:path";
6
- import { existsSync, readFileSync } from "node:fs";
6
+ import { readFileSync } from "node:fs";
7
+ import { dirname, resolve } from "node:path";
7
8
  import { fileURLToPath } from "node:url";
8
9
  //#region src/commands/run.ts
9
10
  async function runRun(argv) {
10
11
  const { flags } = parseFlags(argv);
11
12
  const root = workspaceRoot();
12
- const stateDir = resolveStateDir(root, strFlag(flags, "state-dir"));
13
- if (!existsSync(join(root, "kaizen.config.ts"))) {
14
- process.stderr.write(`no kaizen.config.ts in ${root}. run \`kaizen init\` first.\n`);
15
- return 1;
16
- }
13
+ if (!requireKaizenWorkspace(root)) return 1;
17
14
  const system = strFlag(flags, "system");
18
15
  const variant = strFlag(flags, "variant");
19
16
  if (!system) {
@@ -42,7 +39,6 @@ async function runRun(argv) {
42
39
  }
43
40
  const result = await runExperiment({
44
41
  workspace: root,
45
- stateDir,
46
42
  systemId: system,
47
43
  variant,
48
44
  parent,
@@ -1 +1 @@
1
- {"version":3,"file":"run.js","names":[],"sources":["../../src/commands/run.ts"],"sourcesContent":["import { existsSync, readFileSync } from \"node:fs\";\nimport { dirname, join, resolve } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport { normalizeLinearIssue } from \"../../shared/linear-issue.js\";\nimport { boolFlag, parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\nimport { runExperiment } from \"../lib/runner.js\";\n\nexport async function runRun(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root, strFlag(flags, \"state-dir\"));\n\n if (!existsSync(join(root, \"kaizen.config.ts\"))) {\n process.stderr.write(\n `no kaizen.config.ts in ${root}. run \\`kaizen init\\` first.\\n`,\n );\n return 1;\n }\n\n const system = strFlag(flags, \"system\");\n const variant = strFlag(flags, \"variant\");\n if (!system) {\n process.stderr.write(\"kaizen run: --system <id> is required\\n\");\n return 1;\n }\n if (!variant) {\n process.stderr.write(\"kaizen run: --variant <id> is required\\n\");\n return 1;\n }\n\n const parent = strFlag(flags, \"parent\") ?? null;\n const hypothesis = strFlag(flags, \"hypothesis\") ?? \"\";\n const diagnostic = boolFlag(flags, \"diagnostic\");\n const noAutoPromote = boolFlag(flags, \"no-auto-promote\");\n const maxItemsFlag = strFlag(flags, \"max-items\");\n const maxItems = maxItemsFlag ? Number(maxItemsFlag) : null;\n if (maxItems !== null && (!Number.isFinite(maxItems) || maxItems <= 0)) {\n process.stderr.write(\n `kaizen run: --max-items must be a positive integer\\n`,\n );\n return 1;\n }\n const linearIssueInput =\n strFlag(flags, \"idea\") ??\n strFlag(flags, \"linear-issue\") ??\n strFlag(flags, \"linear-ticket\") ??\n null;\n const linearIssue = normalizeLinearIssue(linearIssueInput);\n if (linearIssueInput && !linearIssue) {\n process.stderr.write(\n \"kaizen run: --idea must be a Linear issue id like KZN-123 or a Linear issue URL\\n\",\n );\n return 1;\n }\n\n const result = await runExperiment({\n workspace: root,\n stateDir,\n systemId: system,\n variant,\n parent,\n hypothesis,\n diagnostic,\n noAutoPromote,\n maxItems,\n kaizenVersion: getKaizenVersion(),\n linearIssue,\n });\n\n // Single summary line on stdout — the agent reads this.\n const score = result.score === null ? \"null\" : result.score.toFixed(4);\n const promoted = result.promoted === null ? \"null\" : String(result.promoted);\n process.stdout.write(\n `score=${score} run_id=${result.runId} status=${result.status} promoted=${promoted}\\n`,\n );\n return result.exitCode;\n}\n\nfunction getKaizenVersion(): string {\n try {\n const here = dirname(fileURLToPath(import.meta.url));\n // src/commands -> package root\n const pkgPath = resolve(here, \"..\", \"..\", \"package.json\");\n return (\n JSON.parse(readFileSync(pkgPath, \"utf-8\")).version ?? \"0.0.0-unknown\"\n );\n } catch {\n return \"0.0.0-unknown\";\n }\n}\n"],"mappings":";;;;;;;;AAQA,eAAsB,OAAO,MAAiC;CAC5D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,MAAM,QAAQ,OAAO,YAAY,CAAC;AAEnE,KAAI,CAAC,WAAW,KAAK,MAAM,mBAAmB,CAAC,EAAE;AAC/C,UAAQ,OAAO,MACb,0BAA0B,KAAK,gCAChC;AACD,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,UAAU,QAAQ,OAAO,UAAU;AACzC,KAAI,CAAC,QAAQ;AACX,UAAQ,OAAO,MAAM,0CAA0C;AAC/D,SAAO;;AAET,KAAI,CAAC,SAAS;AACZ,UAAQ,OAAO,MAAM,2CAA2C;AAChE,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS,IAAI;CAC3C,MAAM,aAAa,QAAQ,OAAO,aAAa,IAAI;CACnD,MAAM,aAAa,SAAS,OAAO,aAAa;CAChD,MAAM,gBAAgB,SAAS,OAAO,kBAAkB;CACxD,MAAM,eAAe,QAAQ,OAAO,YAAY;CAChD,MAAM,WAAW,eAAe,OAAO,aAAa,GAAG;AACvD,KAAI,aAAa,SAAS,CAAC,OAAO,SAAS,SAAS,IAAI,YAAY,IAAI;AACtE,UAAQ,OAAO,MACb,uDACD;AACD,SAAO;;CAET,MAAM,mBACJ,QAAQ,OAAO,OAAO,IACtB,QAAQ,OAAO,eAAe,IAC9B,QAAQ,OAAO,gBAAgB,IAC/B;CACF,MAAM,cAAc,qBAAqB,iBAAiB;AAC1D,KAAI,oBAAoB,CAAC,aAAa;AACpC,UAAQ,OAAO,MACb,oFACD;AACD,SAAO;;CAGT,MAAM,SAAS,MAAM,cAAc;EACjC,WAAW;EACX;EACA,UAAU;EACV;EACA;EACA;EACA;EACA;EACA;EACA,eAAe,kBAAkB;EACjC;EACD,CAAC;CAGF,MAAM,QAAQ,OAAO,UAAU,OAAO,SAAS,OAAO,MAAM,QAAQ,EAAE;CACtE,MAAM,WAAW,OAAO,aAAa,OAAO,SAAS,OAAO,OAAO,SAAS;AAC5E,SAAQ,OAAO,MACb,SAAS,MAAM,UAAU,OAAO,MAAM,UAAU,OAAO,OAAO,YAAY,SAAS,IACpF;AACD,QAAO,OAAO;;AAGhB,SAAS,mBAA2B;AAClC,KAAI;EAGF,MAAM,UAAU,QAFH,QAAQ,cAAc,OAAO,KAAK,IAAI,CAEvB,EAAE,MAAM,MAAM,eAAe;AACzD,SACE,KAAK,MAAM,aAAa,SAAS,QAAQ,CAAC,CAAC,WAAW;SAElD;AACN,SAAO"}
1
+ {"version":3,"file":"run.js","names":[],"sources":["../../src/commands/run.ts"],"sourcesContent":["import { readFileSync } from \"node:fs\";\nimport { dirname, resolve } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport { normalizeLinearIssue } from \"../../shared/linear-issue.js\";\nimport { requireKaizenWorkspace } from \"../lib/cli.js\";\nimport { boolFlag, parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { workspaceRoot } from \"../lib/paths.js\";\nimport { runExperiment } from \"../lib/runner.js\";\n\nexport async function runRun(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const root = workspaceRoot();\n\n if (!requireKaizenWorkspace(root)) return 1;\n\n const system = strFlag(flags, \"system\");\n const variant = strFlag(flags, \"variant\");\n if (!system) {\n process.stderr.write(\"kaizen run: --system <id> is required\\n\");\n return 1;\n }\n if (!variant) {\n process.stderr.write(\"kaizen run: --variant <id> is required\\n\");\n return 1;\n }\n\n const parent = strFlag(flags, \"parent\") ?? null;\n const hypothesis = strFlag(flags, \"hypothesis\") ?? \"\";\n const diagnostic = boolFlag(flags, \"diagnostic\");\n const noAutoPromote = boolFlag(flags, \"no-auto-promote\");\n const maxItemsFlag = strFlag(flags, \"max-items\");\n const maxItems = maxItemsFlag ? Number(maxItemsFlag) : null;\n if (maxItems !== null && (!Number.isFinite(maxItems) || maxItems <= 0)) {\n process.stderr.write(\n `kaizen run: --max-items must be a positive integer\\n`,\n );\n return 1;\n }\n const linearIssueInput =\n strFlag(flags, \"idea\") ??\n strFlag(flags, \"linear-issue\") ??\n strFlag(flags, \"linear-ticket\") ??\n null;\n const linearIssue = normalizeLinearIssue(linearIssueInput);\n if (linearIssueInput && !linearIssue) {\n process.stderr.write(\n \"kaizen run: --idea must be a Linear issue id like KZN-123 or a Linear issue URL\\n\",\n );\n return 1;\n }\n\n const result = await runExperiment({\n workspace: root,\n systemId: system,\n variant,\n parent,\n hypothesis,\n diagnostic,\n noAutoPromote,\n maxItems,\n kaizenVersion: getKaizenVersion(),\n linearIssue,\n });\n\n // Single summary line on stdout — the agent reads this.\n const score = result.score === null ? \"null\" : result.score.toFixed(4);\n const promoted = result.promoted === null ? \"null\" : String(result.promoted);\n process.stdout.write(\n `score=${score} run_id=${result.runId} status=${result.status} promoted=${promoted}\\n`,\n );\n return result.exitCode;\n}\n\nfunction getKaizenVersion(): string {\n try {\n const here = dirname(fileURLToPath(import.meta.url));\n // src/commands -> package root\n const pkgPath = resolve(here, \"..\", \"..\", \"package.json\");\n return (\n JSON.parse(readFileSync(pkgPath, \"utf-8\")).version ?? \"0.0.0-unknown\"\n );\n } catch {\n return \"0.0.0-unknown\";\n }\n}\n"],"mappings":";;;;;;;;;AASA,eAAsB,OAAO,MAAiC;CAC5D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,eAAe;AAE5B,KAAI,CAAC,uBAAuB,KAAK,CAAE,QAAO;CAE1C,MAAM,SAAS,QAAQ,OAAO,SAAS;CACvC,MAAM,UAAU,QAAQ,OAAO,UAAU;AACzC,KAAI,CAAC,QAAQ;AACX,UAAQ,OAAO,MAAM,0CAA0C;AAC/D,SAAO;;AAET,KAAI,CAAC,SAAS;AACZ,UAAQ,OAAO,MAAM,2CAA2C;AAChE,SAAO;;CAGT,MAAM,SAAS,QAAQ,OAAO,SAAS,IAAI;CAC3C,MAAM,aAAa,QAAQ,OAAO,aAAa,IAAI;CACnD,MAAM,aAAa,SAAS,OAAO,aAAa;CAChD,MAAM,gBAAgB,SAAS,OAAO,kBAAkB;CACxD,MAAM,eAAe,QAAQ,OAAO,YAAY;CAChD,MAAM,WAAW,eAAe,OAAO,aAAa,GAAG;AACvD,KAAI,aAAa,SAAS,CAAC,OAAO,SAAS,SAAS,IAAI,YAAY,IAAI;AACtE,UAAQ,OAAO,MACb,uDACD;AACD,SAAO;;CAET,MAAM,mBACJ,QAAQ,OAAO,OAAO,IACtB,QAAQ,OAAO,eAAe,IAC9B,QAAQ,OAAO,gBAAgB,IAC/B;CACF,MAAM,cAAc,qBAAqB,iBAAiB;AAC1D,KAAI,oBAAoB,CAAC,aAAa;AACpC,UAAQ,OAAO,MACb,oFACD;AACD,SAAO;;CAGT,MAAM,SAAS,MAAM,cAAc;EACjC,WAAW;EACX,UAAU;EACV;EACA;EACA;EACA;EACA;EACA;EACA,eAAe,kBAAkB;EACjC;EACD,CAAC;CAGF,MAAM,QAAQ,OAAO,UAAU,OAAO,SAAS,OAAO,MAAM,QAAQ,EAAE;CACtE,MAAM,WAAW,OAAO,aAAa,OAAO,SAAS,OAAO,OAAO,SAAS;AAC5E,SAAQ,OAAO,MACb,SAAS,MAAM,UAAU,OAAO,MAAM,UAAU,OAAO,OAAO,YAAY,SAAS,IACpF;AACD,QAAO,OAAO;;AAGhB,SAAS,mBAA2B;AAClC,KAAI;EAGF,MAAM,UAAU,QAFH,QAAQ,cAAc,OAAO,KAAK,IAAI,CAEvB,EAAE,MAAM,MAAM,eAAe;AACzD,SACE,KAAK,MAAM,aAAa,SAAS,QAAQ,CAAC,CAAC,WAAW;SAElD;AACN,SAAO"}
@@ -1,14 +1,14 @@
1
- import { parseFlags, strFlag } from "../lib/parse-args.js";
2
1
  import { packageRoot, resolveStateDir, workspaceRoot } from "../lib/paths.js";
3
- import { join } from "node:path";
2
+ import { parseFlags, strFlag } from "../lib/parse-args.js";
4
3
  import { existsSync } from "node:fs";
4
+ import { join } from "node:path";
5
5
  import { spawn } from "node:child_process";
6
6
  //#region src/commands/studio.ts
7
7
  async function runStudio(argv) {
8
8
  const { flags } = parseFlags(argv);
9
9
  const port = strFlag(flags, "port") ?? "6789";
10
10
  const root = workspaceRoot();
11
- const stateDir = resolveStateDir(root, strFlag(flags, "state-dir"));
11
+ const stateDir = resolveStateDir(root);
12
12
  const dashboardRoot = join(packageRoot(), "dashboard", ".next", "standalone");
13
13
  const serverPath = join(dashboardRoot, "packages", "kaizen", "dashboard", "server.js");
14
14
  if (!existsSync(serverPath)) {
@@ -1 +1 @@
1
- {"version":3,"file":"studio.js","names":[],"sources":["../../src/commands/studio.ts"],"sourcesContent":["import { spawn } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { packageRoot, resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\n\nexport async function runStudio(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const port = strFlag(flags, \"port\") ?? \"6789\";\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root, strFlag(flags, \"state-dir\"));\n const dashboardRoot = join(packageRoot(), \"dashboard\", \".next\", \"standalone\");\n const serverPath = join(\n dashboardRoot,\n \"packages\",\n \"kaizen\",\n \"dashboard\",\n \"server.js\",\n );\n\n if (!existsSync(serverPath)) {\n process.stderr.write(\n `kaizen studio: bundled dashboard not found at ${serverPath}.\\n` +\n \"Build the package first with `pnpm --filter @percepta/kaizen build`.\\n\",\n );\n return 1;\n }\n\n const env = {\n ...process.env,\n PORT: port,\n KAIZEN_WORKSPACE: root,\n KAIZEN_STATE_DIR: stateDir,\n };\n\n process.stdout.write(`kaizen studio: ${root}\\n`);\n process.stdout.write(`state: ${stateDir}\\n`);\n process.stdout.write(`url: http://localhost:${port}\\n`);\n\n const next = spawn(process.execPath, [serverPath], {\n cwd: dashboardRoot,\n env,\n stdio: \"inherit\",\n });\n\n const shutdown = () => next.kill(\"SIGTERM\");\n process.on(\"SIGINT\", shutdown);\n process.on(\"SIGTERM\", shutdown);\n\n return await new Promise<number>((resolve) => {\n next.on(\"exit\", (code) => resolve(code ?? 0));\n });\n}\n"],"mappings":";;;;;;AAMA,eAAsB,UAAU,MAAiC;CAC/D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,QAAQ,OAAO,OAAO,IAAI;CACvC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,MAAM,QAAQ,OAAO,YAAY,CAAC;CACnE,MAAM,gBAAgB,KAAK,aAAa,EAAE,aAAa,SAAS,aAAa;CAC7E,MAAM,aAAa,KACjB,eACA,YACA,UACA,aACA,YACD;AAED,KAAI,CAAC,WAAW,WAAW,EAAE;AAC3B,UAAQ,OAAO,MACb,iDAAiD,WAAW;EAE7D;AACD,SAAO;;CAGT,MAAM,MAAM;EACV,GAAG,QAAQ;EACX,MAAM;EACN,kBAAkB;EAClB,kBAAkB;EACnB;AAED,SAAQ,OAAO,MAAM,kBAAkB,KAAK,IAAI;AAChD,SAAQ,OAAO,MAAM,UAAU,SAAS,IAAI;AAC5C,SAAQ,OAAO,MAAM,yBAAyB,KAAK,IAAI;CAEvD,MAAM,OAAO,MAAM,QAAQ,UAAU,CAAC,WAAW,EAAE;EACjD,KAAK;EACL;EACA,OAAO;EACR,CAAC;CAEF,MAAM,iBAAiB,KAAK,KAAK,UAAU;AAC3C,SAAQ,GAAG,UAAU,SAAS;AAC9B,SAAQ,GAAG,WAAW,SAAS;AAE/B,QAAO,MAAM,IAAI,SAAiB,YAAY;AAC5C,OAAK,GAAG,SAAS,SAAS,QAAQ,QAAQ,EAAE,CAAC;GAC7C"}
1
+ {"version":3,"file":"studio.js","names":[],"sources":["../../src/commands/studio.ts"],"sourcesContent":["import { spawn } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { parseFlags, strFlag } from \"../lib/parse-args.js\";\nimport { packageRoot, resolveStateDir, workspaceRoot } from \"../lib/paths.js\";\n\nexport async function runStudio(argv: string[]): Promise<number> {\n const { flags } = parseFlags(argv);\n const port = strFlag(flags, \"port\") ?? \"6789\";\n const root = workspaceRoot();\n const stateDir = resolveStateDir(root);\n const dashboardRoot = join(packageRoot(), \"dashboard\", \".next\", \"standalone\");\n const serverPath = join(\n dashboardRoot,\n \"packages\",\n \"kaizen\",\n \"dashboard\",\n \"server.js\",\n );\n\n if (!existsSync(serverPath)) {\n process.stderr.write(\n `kaizen studio: bundled dashboard not found at ${serverPath}.\\n` +\n \"Build the package first with `pnpm --filter @percepta/kaizen build`.\\n\",\n );\n return 1;\n }\n\n const env = {\n ...process.env,\n PORT: port,\n KAIZEN_WORKSPACE: root,\n KAIZEN_STATE_DIR: stateDir,\n };\n\n process.stdout.write(`kaizen studio: ${root}\\n`);\n process.stdout.write(`state: ${stateDir}\\n`);\n process.stdout.write(`url: http://localhost:${port}\\n`);\n\n const next = spawn(process.execPath, [serverPath], {\n cwd: dashboardRoot,\n env,\n stdio: \"inherit\",\n });\n\n const shutdown = () => next.kill(\"SIGTERM\");\n process.on(\"SIGINT\", shutdown);\n process.on(\"SIGTERM\", shutdown);\n\n return await new Promise<number>((resolve) => {\n next.on(\"exit\", (code) => resolve(code ?? 0));\n });\n}\n"],"mappings":";;;;;;AAMA,eAAsB,UAAU,MAAiC;CAC/D,MAAM,EAAE,UAAU,WAAW,KAAK;CAClC,MAAM,OAAO,QAAQ,OAAO,OAAO,IAAI;CACvC,MAAM,OAAO,eAAe;CAC5B,MAAM,WAAW,gBAAgB,KAAK;CACtC,MAAM,gBAAgB,KAAK,aAAa,EAAE,aAAa,SAAS,aAAa;CAC7E,MAAM,aAAa,KACjB,eACA,YACA,UACA,aACA,YACD;AAED,KAAI,CAAC,WAAW,WAAW,EAAE;AAC3B,UAAQ,OAAO,MACb,iDAAiD,WAAW;EAE7D;AACD,SAAO;;CAGT,MAAM,MAAM;EACV,GAAG,QAAQ;EACX,MAAM;EACN,kBAAkB;EAClB,kBAAkB;EACnB;AAED,SAAQ,OAAO,MAAM,kBAAkB,KAAK,IAAI;AAChD,SAAQ,OAAO,MAAM,UAAU,SAAS,IAAI;AAC5C,SAAQ,OAAO,MAAM,yBAAyB,KAAK,IAAI;CAEvD,MAAM,OAAO,MAAM,QAAQ,UAAU,CAAC,WAAW,EAAE;EACjD,KAAK;EACL;EACA,OAAO;EACR,CAAC;CAEF,MAAM,iBAAiB,KAAK,KAAK,UAAU;AAC3C,SAAQ,GAAG,UAAU,SAAS;AAC9B,SAAQ,GAAG,WAAW,SAAS;AAE/B,QAAO,MAAM,IAAI,SAAiB,YAAY;AAC5C,OAAK,GAAG,SAAS,SAAS,QAAQ,QAAQ,EAAE,CAAC;GAC7C"}
package/dist/index.js CHANGED
@@ -1,3 +1,5 @@
1
+ import { runCreateView } from "./commands/create-view.js";
2
+ import { runGuide } from "./commands/guide.js";
1
3
  import { runIdeas } from "./commands/ideas.js";
2
4
  import { runInitSystem } from "./commands/init-system.js";
3
5
  import { runInit } from "./commands/init.js";
@@ -8,33 +10,32 @@ import { runStudio } from "./commands/studio.js";
8
10
  //#region src/index.ts
9
11
  const HELP = `kaizen — automated AI researcher
10
12
 
11
- human-facing:
13
+ workspace:
12
14
  kaizen init scaffold a Kaizen workspace (run once per repo)
15
+ kaizen guide [topic] print package-owned agent instructions
16
+ topics: overview, evals, views,
17
+ variant-builder, claude-command, all
13
18
  kaizen create system [name] add a system definition + starter eval
14
19
  [--eval-language py|ts]
15
- kaizen studio [--port 6789] [--state-dir <dir>]
20
+ kaizen create view <system> add kaizen/systems/<system>/<type>.tsx
21
+ --type trace|dataset-item
22
+ kaizen studio [--port 6789]
16
23
  open the local Kaizen dashboard against this workspace
17
24
  kaizen ideas --system <s> [--json] [--limit <n>]
18
25
  list Linear issues labeled Kaizen for a system
19
26
 
20
27
  agent-facing (the inner loop):
21
28
  kaizen run --system <s> --variant <v> [--parent <id>] [--hypothesis "..."]
22
- [--idea <KZN-123>] [--diagnostic]
23
- [--no-auto-promote] [--state-dir <dir>]
29
+ [--idea <KZN-123>] [--diagnostic] [--no-auto-promote]
24
30
  record one run; emits score on stdout
25
- kaizen log [--system <s>] [-n <count>] [--json] [--state-dir <dir>]
31
+ kaizen log [--system <s>] [-n <count>] [--json]
26
32
  promoted baseline + chronological run history
27
33
 
28
34
  recovery (rare):
29
- kaizen rebuild [--system <s>] [--state-dir <dir>]
35
+ kaizen rebuild [--system <s>]
30
36
  re-derive state.json from events.jsonl
31
37
 
32
38
  kaizen --version print version
33
-
34
- compatibility:
35
- kaizen init system <name> alias for kaizen create system <name>
36
- kaizen get ideas --system <s> alias for kaizen ideas --system <s>
37
-
38
39
  `;
39
40
  async function main(argv) {
40
41
  const [cmd, sub, ...rest] = argv;
@@ -49,26 +50,21 @@ async function main(argv) {
49
50
  }
50
51
  switch (cmd) {
51
52
  case "create":
52
- if (!sub) return runInitSystem([]);
53
53
  if (sub === "system") return runInitSystem(rest);
54
- return runInitSystem([sub, ...rest].filter(Boolean));
55
- case "init":
56
- if (sub === "system") return runInitSystem(rest);
57
- return runInit([sub, ...rest].filter(Boolean));
54
+ if (sub === "view") return runCreateView(rest);
55
+ process.stderr.write(`unknown create target: ${sub ?? ""}\n\n${HELP}`);
56
+ return 1;
57
+ case "init": return runInit([sub, ...rest].filter(Boolean));
58
+ case "guide": return runGuide([sub, ...rest].filter(Boolean));
58
59
  case "run": return runRun([sub, ...rest].filter(Boolean));
59
60
  case "log": return runLog([sub, ...rest].filter(Boolean));
60
61
  case "rebuild": return runRebuild([sub, ...rest].filter(Boolean));
61
62
  case "studio": return runStudio([sub, ...rest].filter(Boolean));
62
63
  case "ideas": return runIdeas([sub, ...rest].filter(Boolean));
63
- case "get":
64
- if (sub === "ideas") return runIdeas(rest);
65
- break;
66
64
  default:
67
65
  process.stderr.write(`unknown command: ${cmd}\n\n${HELP}`);
68
66
  return 1;
69
67
  }
70
- process.stderr.write(`unknown command: ${cmd} ${sub ?? ""}\n\n${HELP}`);
71
- return 1;
72
68
  }
73
69
  main(process.argv.slice(2)).then((code) => process.exit(code), (err) => {
74
70
  process.stderr.write(`error: ${err?.message ?? err}\n`);
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","names":[],"sources":["../src/index.ts"],"sourcesContent":["import { runIdeas } from \"./commands/ideas.js\";\nimport { runInitSystem } from \"./commands/init-system.js\";\nimport { runInit } from \"./commands/init.js\";\nimport { runLog } from \"./commands/log.js\";\nimport { runRebuild } from \"./commands/rebuild.js\";\nimport { runRun } from \"./commands/run.js\";\nimport { runStudio } from \"./commands/studio.js\";\n\nconst HELP = `kaizen — automated AI researcher\n\nhuman-facing:\n kaizen init scaffold a Kaizen workspace (run once per repo)\n kaizen create system [name] add a system definition + starter eval\n [--eval-language py|ts]\n kaizen studio [--port 6789] [--state-dir <dir>]\n open the local Kaizen dashboard against this workspace\n kaizen ideas --system <s> [--json] [--limit <n>]\n list Linear issues labeled Kaizen for a system\n\nagent-facing (the inner loop):\n kaizen run --system <s> --variant <v> [--parent <id>] [--hypothesis \"...\"]\n [--idea <KZN-123>] [--diagnostic]\n [--no-auto-promote] [--state-dir <dir>]\n record one run; emits score on stdout\n kaizen log [--system <s>] [-n <count>] [--json] [--state-dir <dir>]\n promoted baseline + chronological run history\n\nrecovery (rare):\n kaizen rebuild [--system <s>] [--state-dir <dir>]\n re-derive state.json from events.jsonl\n\n kaizen --version print version\n\ncompatibility:\n kaizen init system <name> alias for kaizen create system <name>\n kaizen get ideas --system <s> alias for kaizen ideas --system <s>\n\n`;\n\nasync function main(argv: string[]): Promise<number> {\n const [cmd, sub, ...rest] = argv;\n\n if (!cmd || cmd === \"--help\" || cmd === \"-h\" || cmd === \"help\") {\n process.stdout.write(HELP);\n return 0;\n }\n if (cmd === \"--version\" || cmd === \"-v\") {\n const pkg = await import(\"../package.json\", { with: { type: \"json\" } });\n process.stdout.write(`${pkg.default.version}\\n`);\n return 0;\n }\n\n switch (cmd) {\n case \"create\":\n if (!sub) {\n return runInitSystem([]);\n }\n if (sub === \"system\") {\n return runInitSystem(rest);\n }\n return runInitSystem([sub, ...rest].filter(Boolean));\n case \"init\":\n if (sub === \"system\") {\n return runInitSystem(rest);\n }\n return runInit([sub, ...rest].filter(Boolean));\n case \"run\":\n return runRun([sub, ...rest].filter(Boolean));\n case \"log\":\n return runLog([sub, ...rest].filter(Boolean));\n case \"rebuild\":\n return runRebuild([sub, ...rest].filter(Boolean));\n case \"studio\":\n return runStudio([sub, ...rest].filter(Boolean));\n case \"ideas\":\n return runIdeas([sub, ...rest].filter(Boolean));\n case \"get\":\n if (sub === \"ideas\") {\n return runIdeas(rest);\n }\n break;\n default:\n process.stderr.write(`unknown command: ${cmd}\\n\\n${HELP}`);\n return 1;\n }\n process.stderr.write(`unknown command: ${cmd} ${sub ?? \"\"}\\n\\n${HELP}`);\n return 1;\n}\n\nmain(process.argv.slice(2)).then(\n (code) => process.exit(code),\n (err) => {\n process.stderr.write(`error: ${err?.message ?? err}\\n`);\n if (process.env.KAIZEN_DEBUG) process.stderr.write(`${err?.stack}\\n`);\n process.exit(1);\n },\n);\n"],"mappings":";;;;;;;;AAQA,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+Bb,eAAe,KAAK,MAAiC;CACnD,MAAM,CAAC,KAAK,KAAK,GAAG,QAAQ;AAE5B,KAAI,CAAC,OAAO,QAAQ,YAAY,QAAQ,QAAQ,QAAQ,QAAQ;AAC9D,UAAQ,OAAO,MAAM,KAAK;AAC1B,SAAO;;AAET,KAAI,QAAQ,eAAe,QAAQ,MAAM;EACvC,MAAM,MAAM,MAAM,OAAO,mBAAmB,EAAE,MAAM,EAAE,MAAM,QAAQ,EAAE;AACtE,UAAQ,OAAO,MAAM,GAAG,IAAI,QAAQ,QAAQ,IAAI;AAChD,SAAO;;AAGT,SAAQ,KAAR;EACE,KAAK;AACH,OAAI,CAAC,IACH,QAAO,cAAc,EAAE,CAAC;AAE1B,OAAI,QAAQ,SACV,QAAO,cAAc,KAAK;AAE5B,UAAO,cAAc,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACtD,KAAK;AACH,OAAI,QAAQ,SACV,QAAO,cAAc,KAAK;AAE5B,UAAO,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAChD,KAAK,MACH,QAAO,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAC/C,KAAK,MACH,QAAO,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAC/C,KAAK,UACH,QAAO,WAAW,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACnD,KAAK,SACH,QAAO,UAAU,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAClD,KAAK,QACH,QAAO,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACjD,KAAK;AACH,OAAI,QAAQ,QACV,QAAO,SAAS,KAAK;AAEvB;EACF;AACE,WAAQ,OAAO,MAAM,oBAAoB,IAAI,MAAM,OAAO;AAC1D,UAAO;;AAEX,SAAQ,OAAO,MAAM,oBAAoB,IAAI,GAAG,OAAO,GAAG,MAAM,OAAO;AACvE,QAAO;;AAGT,KAAK,QAAQ,KAAK,MAAM,EAAE,CAAC,CAAC,MACzB,SAAS,QAAQ,KAAK,KAAK,GAC3B,QAAQ;AACP,SAAQ,OAAO,MAAM,UAAU,KAAK,WAAW,IAAI,IAAI;AACvD,KAAI,QAAQ,IAAI,aAAc,SAAQ,OAAO,MAAM,GAAG,KAAK,MAAM,IAAI;AACrE,SAAQ,KAAK,EAAE;EAElB"}
1
+ {"version":3,"file":"index.js","names":[],"sources":["../src/index.ts"],"sourcesContent":["import { runCreateView } from \"./commands/create-view.js\";\nimport { runGuide } from \"./commands/guide.js\";\nimport { runIdeas } from \"./commands/ideas.js\";\nimport { runInitSystem } from \"./commands/init-system.js\";\nimport { runInit } from \"./commands/init.js\";\nimport { runLog } from \"./commands/log.js\";\nimport { runRebuild } from \"./commands/rebuild.js\";\nimport { runRun } from \"./commands/run.js\";\nimport { runStudio } from \"./commands/studio.js\";\n\nconst HELP = `kaizen — automated AI researcher\n\nworkspace:\n kaizen init scaffold a Kaizen workspace (run once per repo)\n kaizen guide [topic] print package-owned agent instructions\n topics: overview, evals, views,\n variant-builder, claude-command, all\n kaizen create system [name] add a system definition + starter eval\n [--eval-language py|ts]\n kaizen create view <system> add kaizen/systems/<system>/<type>.tsx\n --type trace|dataset-item\n kaizen studio [--port 6789]\n open the local Kaizen dashboard against this workspace\n kaizen ideas --system <s> [--json] [--limit <n>]\n list Linear issues labeled Kaizen for a system\n\nagent-facing (the inner loop):\n kaizen run --system <s> --variant <v> [--parent <id>] [--hypothesis \"...\"]\n [--idea <KZN-123>] [--diagnostic] [--no-auto-promote]\n record one run; emits score on stdout\n kaizen log [--system <s>] [-n <count>] [--json]\n promoted baseline + chronological run history\n\nrecovery (rare):\n kaizen rebuild [--system <s>]\n re-derive state.json from events.jsonl\n\n kaizen --version print version\n`;\n\nasync function main(argv: string[]): Promise<number> {\n const [cmd, sub, ...rest] = argv;\n\n if (!cmd || cmd === \"--help\" || cmd === \"-h\" || cmd === \"help\") {\n process.stdout.write(HELP);\n return 0;\n }\n if (cmd === \"--version\" || cmd === \"-v\") {\n const pkg = await import(\"../package.json\", { with: { type: \"json\" } });\n process.stdout.write(`${pkg.default.version}\\n`);\n return 0;\n }\n\n switch (cmd) {\n case \"create\":\n if (sub === \"system\") {\n return runInitSystem(rest);\n }\n if (sub === \"view\") {\n return runCreateView(rest);\n }\n process.stderr.write(`unknown create target: ${sub ?? \"\"}\\n\\n${HELP}`);\n return 1;\n case \"init\":\n return runInit([sub, ...rest].filter(Boolean));\n case \"guide\":\n return runGuide([sub, ...rest].filter(Boolean));\n case \"run\":\n return runRun([sub, ...rest].filter(Boolean));\n case \"log\":\n return runLog([sub, ...rest].filter(Boolean));\n case \"rebuild\":\n return runRebuild([sub, ...rest].filter(Boolean));\n case \"studio\":\n return runStudio([sub, ...rest].filter(Boolean));\n case \"ideas\":\n return runIdeas([sub, ...rest].filter(Boolean));\n default:\n process.stderr.write(`unknown command: ${cmd}\\n\\n${HELP}`);\n return 1;\n }\n}\n\nmain(process.argv.slice(2)).then(\n (code) => process.exit(code),\n (err) => {\n process.stderr.write(`error: ${err?.message ?? err}\\n`);\n if (process.env.KAIZEN_DEBUG) process.stderr.write(`${err?.stack}\\n`);\n process.exit(1);\n },\n);\n"],"mappings":";;;;;;;;;;AAUA,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA8Bb,eAAe,KAAK,MAAiC;CACnD,MAAM,CAAC,KAAK,KAAK,GAAG,QAAQ;AAE5B,KAAI,CAAC,OAAO,QAAQ,YAAY,QAAQ,QAAQ,QAAQ,QAAQ;AAC9D,UAAQ,OAAO,MAAM,KAAK;AAC1B,SAAO;;AAET,KAAI,QAAQ,eAAe,QAAQ,MAAM;EACvC,MAAM,MAAM,MAAM,OAAO,mBAAmB,EAAE,MAAM,EAAE,MAAM,QAAQ,EAAE;AACtE,UAAQ,OAAO,MAAM,GAAG,IAAI,QAAQ,QAAQ,IAAI;AAChD,SAAO;;AAGT,SAAQ,KAAR;EACE,KAAK;AACH,OAAI,QAAQ,SACV,QAAO,cAAc,KAAK;AAE5B,OAAI,QAAQ,OACV,QAAO,cAAc,KAAK;AAE5B,WAAQ,OAAO,MAAM,0BAA0B,OAAO,GAAG,MAAM,OAAO;AACtE,UAAO;EACT,KAAK,OACH,QAAO,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAChD,KAAK,QACH,QAAO,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACjD,KAAK,MACH,QAAO,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAC/C,KAAK,MACH,QAAO,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAC/C,KAAK,UACH,QAAO,WAAW,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACnD,KAAK,SACH,QAAO,UAAU,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EAClD,KAAK,QACH,QAAO,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,QAAQ,CAAC;EACjD;AACE,WAAQ,OAAO,MAAM,oBAAoB,IAAI,MAAM,OAAO;AAC1D,UAAO;;;AAIb,KAAK,QAAQ,KAAK,MAAM,EAAE,CAAC,CAAC,MACzB,SAAS,QAAQ,KAAK,KAAK,GAC3B,QAAQ;AACP,SAAQ,OAAO,MAAM,UAAU,KAAK,WAAW,IAAI,IAAI;AACvD,KAAI,QAAQ,IAAI,aAAc,SAAQ,OAAO,MAAM,GAAG,KAAK,MAAM,IAAI;AACrE,SAAQ,KAAK,EAAE;EAElB"}
@@ -0,0 +1,20 @@
1
+ import { kaizenConfigPath } from "../shared/workspace-paths.js";
2
+ import "./paths.js";
3
+ import { existsSync } from "node:fs";
4
+ //#region src/lib/cli.ts
5
+ function requireKaizenWorkspace(root) {
6
+ if (existsSync(kaizenConfigPath(root))) return true;
7
+ process.stderr.write(`no kaizen/config.ts in ${root}. run \`kaizen init\` first.\n`);
8
+ return false;
9
+ }
10
+ function isSystemId(value) {
11
+ return /^[a-z][a-z0-9-]*$/.test(value);
12
+ }
13
+ function pad(s, w) {
14
+ if (s.length >= w) return s.slice(0, w - 1) + " ";
15
+ return s + " ".repeat(w - s.length);
16
+ }
17
+ //#endregion
18
+ export { isSystemId, pad, requireKaizenWorkspace };
19
+
20
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","names":[],"sources":["../../src/lib/cli.ts"],"sourcesContent":["import { existsSync } from \"node:fs\";\nimport { kaizenConfigPath } from \"./paths.js\";\n\nexport function requireKaizenWorkspace(root: string): boolean {\n if (existsSync(kaizenConfigPath(root))) return true;\n process.stderr.write(\n `no kaizen/config.ts in ${root}. run \\`kaizen init\\` first.\\n`,\n );\n return false;\n}\n\nexport function isSystemId(value: string): boolean {\n return /^[a-z][a-z0-9-]*$/.test(value);\n}\n\nexport function pad(s: string, w: number): string {\n if (s.length >= w) return s.slice(0, w - 1) + \" \";\n return s + \" \".repeat(w - s.length);\n}\n"],"mappings":";;;;AAGA,SAAgB,uBAAuB,MAAuB;AAC5D,KAAI,WAAW,iBAAiB,KAAK,CAAC,CAAE,QAAO;AAC/C,SAAQ,OAAO,MACb,0BAA0B,KAAK,gCAChC;AACD,QAAO;;AAGT,SAAgB,WAAW,OAAwB;AACjD,QAAO,oBAAoB,KAAK,MAAM;;AAGxC,SAAgB,IAAI,GAAW,GAAmB;AAChD,KAAI,EAAE,UAAU,EAAG,QAAO,EAAE,MAAM,GAAG,IAAI,EAAE,GAAG;AAC9C,QAAO,IAAI,IAAI,OAAO,IAAI,EAAE,OAAO"}
@@ -1 +1 @@
1
- {"version":3,"file":"events.js","names":[],"sources":["../../src/lib/events.ts"],"sourcesContent":["// Event schema for the NDJSON contract between eval scripts and the runner.\n\nexport interface BaseEvent {\n type: string;\n ts?: number;\n}\n\nexport interface StartEvent extends BaseEvent {\n type: \"start\";\n n: number;\n eval_version: number;\n dataset_version: string;\n variant?: string;\n}\n\nexport interface ItemEvent extends BaseEvent {\n type: \"item\";\n id: string;\n score: number;\n breakdown?: Record<string, number>;\n trace_id?: string | null;\n why?: string;\n subgroup?: Record<string, string>;\n}\n\nexport interface ProgressEvent extends BaseEvent {\n type: \"progress\";\n done: number;\n total: number;\n}\n\nexport interface CompleteEvent extends BaseEvent {\n type: \"complete\";\n score: number;\n breakdown?: Record<string, number>;\n n: number;\n worst_traces?: Array<{ id: string; score: number; trace_id?: string | null }>;\n}\n\nexport interface ErrorEvent extends BaseEvent {\n type: \"error\";\n message: string;\n}\n\n// Synthesized by the runner, not emitted by the eval.\nexport interface CrashedEvent extends BaseEvent {\n type: \"crashed\";\n exit_code: number | null;\n signal?: string | null;\n stderr_tail?: string;\n reason: string;\n}\n\nexport interface AbortedEvent extends BaseEvent {\n type: \"aborted\";\n reason: string;\n}\n\nexport interface PromotionEvent extends BaseEvent {\n type: \"promotion\";\n promoted: boolean;\n rule:\n | \"auto\"\n | \"force\"\n | \"no_baseline\"\n | \"version_mismatch\"\n | \"stat_insufficient\"\n | \"subgroup_regression\";\n ci_low?: number;\n ci_high?: number;\n mean_delta?: number;\n n_compared?: number;\n parent_run_id?: string;\n details?: string;\n}\n\nexport type Event =\n | StartEvent\n | ItemEvent\n | ProgressEvent\n | CompleteEvent\n | ErrorEvent\n | CrashedEvent\n | AbortedEvent\n | PromotionEvent;\n\nexport function validateEvent(value: unknown): Event {\n if (!value || typeof value !== \"object\") {\n throw new Error(\"event must be an object\");\n }\n const e = value as Record<string, unknown>;\n if (typeof e.type !== \"string\") {\n throw new Error(\"event.type must be a string\");\n }\n\n switch (e.type) {\n case \"start\":\n requireNumber(e, \"n\");\n requireNumber(e, \"eval_version\");\n requireString(e, \"dataset_version\");\n break;\n case \"item\":\n requireString(e, \"id\");\n requireNumber(e, \"score\");\n break;\n case \"progress\":\n requireNumber(e, \"done\");\n requireNumber(e, \"total\");\n break;\n case \"complete\":\n requireNumber(e, \"score\");\n requireNumber(e, \"n\");\n break;\n case \"error\":\n requireString(e, \"message\");\n break;\n case \"crashed\":\n requireString(e, \"reason\");\n break;\n case \"aborted\":\n requireString(e, \"reason\");\n break;\n case \"promotion\":\n if (typeof e.promoted !== \"boolean\")\n throw new Error(\"promotion.promoted must be boolean\");\n requireString(e, \"rule\");\n break;\n default:\n throw new Error(`unknown event type: ${e.type}`);\n }\n\n if (e.ts !== undefined && typeof e.ts !== \"number\") {\n throw new Error(`${e.type}.ts must be a number when present`);\n }\n return e as unknown as Event;\n}\n\nfunction requireString(e: Record<string, unknown>, key: string): void {\n if (typeof e[key] !== \"string\" || e[key] === \"\") {\n throw new Error(`${String(e.type)}.${key} must be a non-empty string`);\n }\n}\n\nfunction requireNumber(e: Record<string, unknown>, key: string): void {\n if (typeof e[key] !== \"number\" || !Number.isFinite(e[key])) {\n throw new Error(`${String(e.type)}.${key} must be a finite number`);\n }\n}\n\nexport type TerminalType = \"complete\" | \"crashed\" | \"aborted\";\n\nexport function isTerminal(\n e: Event,\n): e is CompleteEvent | CrashedEvent | AbortedEvent {\n return e.type === \"complete\" || e.type === \"crashed\" || e.type === \"aborted\";\n}\n\nexport function findTerminal(\n events: Event[],\n): CompleteEvent | CrashedEvent | AbortedEvent | null {\n for (let i = events.length - 1; i >= 0; i--) {\n if (isTerminal(events[i]))\n return events[i] as CompleteEvent | CrashedEvent | AbortedEvent;\n }\n return null;\n}\n\n/** Stream-mode NDJSON parser. Feed `push(chunk)` from a Readable; receive parsed events via `onEvent`. */\nexport class NdjsonReader {\n private buffer = \"\";\n public constructor(\n private onEvent: (e: Event) => void,\n private onParseError?: (line: string, err: unknown) => void,\n ) {}\n\n public push(chunk: string | Buffer): void {\n this.buffer += typeof chunk === \"string\" ? chunk : chunk.toString(\"utf-8\");\n let nl: number;\n while ((nl = this.buffer.indexOf(\"\\n\")) !== -1) {\n const line = this.buffer.slice(0, nl);\n this.buffer = this.buffer.slice(nl + 1);\n if (!line.trim()) continue;\n try {\n this.onEvent(validateEvent(JSON.parse(line)));\n } catch (err) {\n this.onParseError?.(line, err);\n }\n }\n }\n\n /** Flush any remaining unterminated content as a best-effort parse. */\n public end(): void {\n const tail = this.buffer.trim();\n this.buffer = \"\";\n if (!tail) return;\n try {\n this.onEvent(validateEvent(JSON.parse(tail)));\n } catch (err) {\n this.onParseError?.(tail, err);\n }\n }\n}\n"],"mappings":";AAsFA,SAAgB,cAAc,OAAuB;AACnD,KAAI,CAAC,SAAS,OAAO,UAAU,SAC7B,OAAM,IAAI,MAAM,0BAA0B;CAE5C,MAAM,IAAI;AACV,KAAI,OAAO,EAAE,SAAS,SACpB,OAAM,IAAI,MAAM,8BAA8B;AAGhD,SAAQ,EAAE,MAAV;EACE,KAAK;AACH,iBAAc,GAAG,IAAI;AACrB,iBAAc,GAAG,eAAe;AAChC,iBAAc,GAAG,kBAAkB;AACnC;EACF,KAAK;AACH,iBAAc,GAAG,KAAK;AACtB,iBAAc,GAAG,QAAQ;AACzB;EACF,KAAK;AACH,iBAAc,GAAG,OAAO;AACxB,iBAAc,GAAG,QAAQ;AACzB;EACF,KAAK;AACH,iBAAc,GAAG,QAAQ;AACzB,iBAAc,GAAG,IAAI;AACrB;EACF,KAAK;AACH,iBAAc,GAAG,UAAU;AAC3B;EACF,KAAK;AACH,iBAAc,GAAG,SAAS;AAC1B;EACF,KAAK;AACH,iBAAc,GAAG,SAAS;AAC1B;EACF,KAAK;AACH,OAAI,OAAO,EAAE,aAAa,UACxB,OAAM,IAAI,MAAM,qCAAqC;AACvD,iBAAc,GAAG,OAAO;AACxB;EACF,QACE,OAAM,IAAI,MAAM,uBAAuB,EAAE,OAAO;;AAGpD,KAAI,EAAE,OAAO,KAAA,KAAa,OAAO,EAAE,OAAO,SACxC,OAAM,IAAI,MAAM,GAAG,EAAE,KAAK,mCAAmC;AAE/D,QAAO;;AAGT,SAAS,cAAc,GAA4B,KAAmB;AACpE,KAAI,OAAO,EAAE,SAAS,YAAY,EAAE,SAAS,GAC3C,OAAM,IAAI,MAAM,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,IAAI,6BAA6B;;AAI1E,SAAS,cAAc,GAA4B,KAAmB;AACpE,KAAI,OAAO,EAAE,SAAS,YAAY,CAAC,OAAO,SAAS,EAAE,KAAK,CACxD,OAAM,IAAI,MAAM,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,IAAI,0BAA0B;;AAMvE,SAAgB,WACd,GACkD;AAClD,QAAO,EAAE,SAAS,cAAc,EAAE,SAAS,aAAa,EAAE,SAAS;;AAGrE,SAAgB,aACd,QACoD;AACpD,MAAK,IAAI,IAAI,OAAO,SAAS,GAAG,KAAK,GAAG,IACtC,KAAI,WAAW,OAAO,GAAG,CACvB,QAAO,OAAO;AAElB,QAAO;;;AAIT,IAAa,eAAb,MAA0B;CACxB,SAAiB;CACjB,YACE,SACA,cACA;AAFQ,OAAA,UAAA;AACA,OAAA,eAAA;;CAGV,KAAY,OAA8B;AACxC,OAAK,UAAU,OAAO,UAAU,WAAW,QAAQ,MAAM,SAAS,QAAQ;EAC1E,IAAI;AACJ,UAAQ,KAAK,KAAK,OAAO,QAAQ,KAAK,MAAM,IAAI;GAC9C,MAAM,OAAO,KAAK,OAAO,MAAM,GAAG,GAAG;AACrC,QAAK,SAAS,KAAK,OAAO,MAAM,KAAK,EAAE;AACvC,OAAI,CAAC,KAAK,MAAM,CAAE;AAClB,OAAI;AACF,SAAK,QAAQ,cAAc,KAAK,MAAM,KAAK,CAAC,CAAC;YACtC,KAAK;AACZ,SAAK,eAAe,MAAM,IAAI;;;;;CAMpC,MAAmB;EACjB,MAAM,OAAO,KAAK,OAAO,MAAM;AAC/B,OAAK,SAAS;AACd,MAAI,CAAC,KAAM;AACX,MAAI;AACF,QAAK,QAAQ,cAAc,KAAK,MAAM,KAAK,CAAC,CAAC;WACtC,KAAK;AACZ,QAAK,eAAe,MAAM,IAAI"}
1
+ {"version":3,"file":"events.js","names":[],"sources":["../../src/lib/events.ts"],"sourcesContent":["// Event schema for the NDJSON contract between eval scripts and the runner.\n\ninterface BaseEvent {\n type: string;\n ts?: number;\n}\n\ninterface StartEvent extends BaseEvent {\n type: \"start\";\n n: number;\n eval_version: number;\n dataset_version: string;\n variant?: string;\n}\n\nexport interface ItemEvent extends BaseEvent {\n type: \"item\";\n id: string;\n score: number;\n breakdown?: Record<string, number>;\n trace_id?: string | null;\n why?: string;\n subgroup?: Record<string, string>;\n}\n\ninterface ProgressEvent extends BaseEvent {\n type: \"progress\";\n done: number;\n total: number;\n}\n\nexport interface CompleteEvent extends BaseEvent {\n type: \"complete\";\n score: number;\n breakdown?: Record<string, number>;\n n: number;\n worst_traces?: Array<{ id: string; score: number; trace_id?: string | null }>;\n}\n\ninterface ErrorEvent extends BaseEvent {\n type: \"error\";\n message: string;\n}\n\n// Synthesized by the runner, not emitted by the eval.\nexport interface CrashedEvent extends BaseEvent {\n type: \"crashed\";\n exit_code: number | null;\n signal?: string | null;\n stderr_tail?: string;\n reason: string;\n}\n\ninterface AbortedEvent extends BaseEvent {\n type: \"aborted\";\n reason: string;\n}\n\nexport interface PromotionEvent extends BaseEvent {\n type: \"promotion\";\n promoted: boolean;\n rule:\n | \"auto\"\n | \"force\"\n | \"no_baseline\"\n | \"version_mismatch\"\n | \"stat_insufficient\"\n | \"subgroup_regression\";\n ci_low?: number;\n ci_high?: number;\n mean_delta?: number;\n n_compared?: number;\n parent_run_id?: string;\n details?: string;\n}\n\nexport type Event =\n | StartEvent\n | ItemEvent\n | ProgressEvent\n | CompleteEvent\n | ErrorEvent\n | CrashedEvent\n | AbortedEvent\n | PromotionEvent;\n\nexport function validateEvent(value: unknown): Event {\n if (!value || typeof value !== \"object\") {\n throw new Error(\"event must be an object\");\n }\n const e = value as Record<string, unknown>;\n if (typeof e.type !== \"string\") {\n throw new Error(\"event.type must be a string\");\n }\n\n switch (e.type) {\n case \"start\":\n requireNumber(e, \"n\");\n requireNumber(e, \"eval_version\");\n requireString(e, \"dataset_version\");\n break;\n case \"item\":\n requireString(e, \"id\");\n requireNumber(e, \"score\");\n break;\n case \"progress\":\n requireNumber(e, \"done\");\n requireNumber(e, \"total\");\n break;\n case \"complete\":\n requireNumber(e, \"score\");\n requireNumber(e, \"n\");\n break;\n case \"error\":\n requireString(e, \"message\");\n break;\n case \"crashed\":\n requireString(e, \"reason\");\n break;\n case \"aborted\":\n requireString(e, \"reason\");\n break;\n case \"promotion\":\n if (typeof e.promoted !== \"boolean\")\n throw new Error(\"promotion.promoted must be boolean\");\n requireString(e, \"rule\");\n break;\n default:\n throw new Error(`unknown event type: ${e.type}`);\n }\n\n if (e.ts !== undefined && typeof e.ts !== \"number\") {\n throw new Error(`${e.type}.ts must be a number when present`);\n }\n return e as unknown as Event;\n}\n\nfunction requireString(e: Record<string, unknown>, key: string): void {\n if (typeof e[key] !== \"string\" || e[key] === \"\") {\n throw new Error(`${String(e.type)}.${key} must be a non-empty string`);\n }\n}\n\nfunction requireNumber(e: Record<string, unknown>, key: string): void {\n if (typeof e[key] !== \"number\" || !Number.isFinite(e[key])) {\n throw new Error(`${String(e.type)}.${key} must be a finite number`);\n }\n}\n\nfunction isTerminal(\n e: Event,\n): e is CompleteEvent | CrashedEvent | AbortedEvent {\n return e.type === \"complete\" || e.type === \"crashed\" || e.type === \"aborted\";\n}\n\nexport function findTerminal(\n events: Event[],\n): CompleteEvent | CrashedEvent | AbortedEvent | null {\n for (let i = events.length - 1; i >= 0; i--) {\n if (isTerminal(events[i]))\n return events[i] as CompleteEvent | CrashedEvent | AbortedEvent;\n }\n return null;\n}\n\n/** Stream-mode NDJSON parser. Feed `push(chunk)` from a Readable; receive parsed events via `onEvent`. */\nexport class NdjsonReader {\n private buffer = \"\";\n public constructor(\n private onEvent: (e: Event) => void,\n private onParseError?: (line: string, err: unknown) => void,\n ) {}\n\n public push(chunk: string | Buffer): void {\n this.buffer += typeof chunk === \"string\" ? chunk : chunk.toString(\"utf-8\");\n let nl: number;\n while ((nl = this.buffer.indexOf(\"\\n\")) !== -1) {\n const line = this.buffer.slice(0, nl);\n this.buffer = this.buffer.slice(nl + 1);\n if (!line.trim()) continue;\n try {\n this.onEvent(validateEvent(JSON.parse(line)));\n } catch (err) {\n this.onParseError?.(line, err);\n }\n }\n }\n\n /** Flush any remaining unterminated content as a best-effort parse. */\n public end(): void {\n const tail = this.buffer.trim();\n this.buffer = \"\";\n if (!tail) return;\n try {\n this.onEvent(validateEvent(JSON.parse(tail)));\n } catch (err) {\n this.onParseError?.(tail, err);\n }\n }\n}\n"],"mappings":";AAsFA,SAAgB,cAAc,OAAuB;AACnD,KAAI,CAAC,SAAS,OAAO,UAAU,SAC7B,OAAM,IAAI,MAAM,0BAA0B;CAE5C,MAAM,IAAI;AACV,KAAI,OAAO,EAAE,SAAS,SACpB,OAAM,IAAI,MAAM,8BAA8B;AAGhD,SAAQ,EAAE,MAAV;EACE,KAAK;AACH,iBAAc,GAAG,IAAI;AACrB,iBAAc,GAAG,eAAe;AAChC,iBAAc,GAAG,kBAAkB;AACnC;EACF,KAAK;AACH,iBAAc,GAAG,KAAK;AACtB,iBAAc,GAAG,QAAQ;AACzB;EACF,KAAK;AACH,iBAAc,GAAG,OAAO;AACxB,iBAAc,GAAG,QAAQ;AACzB;EACF,KAAK;AACH,iBAAc,GAAG,QAAQ;AACzB,iBAAc,GAAG,IAAI;AACrB;EACF,KAAK;AACH,iBAAc,GAAG,UAAU;AAC3B;EACF,KAAK;AACH,iBAAc,GAAG,SAAS;AAC1B;EACF,KAAK;AACH,iBAAc,GAAG,SAAS;AAC1B;EACF,KAAK;AACH,OAAI,OAAO,EAAE,aAAa,UACxB,OAAM,IAAI,MAAM,qCAAqC;AACvD,iBAAc,GAAG,OAAO;AACxB;EACF,QACE,OAAM,IAAI,MAAM,uBAAuB,EAAE,OAAO;;AAGpD,KAAI,EAAE,OAAO,KAAA,KAAa,OAAO,EAAE,OAAO,SACxC,OAAM,IAAI,MAAM,GAAG,EAAE,KAAK,mCAAmC;AAE/D,QAAO;;AAGT,SAAS,cAAc,GAA4B,KAAmB;AACpE,KAAI,OAAO,EAAE,SAAS,YAAY,EAAE,SAAS,GAC3C,OAAM,IAAI,MAAM,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,IAAI,6BAA6B;;AAI1E,SAAS,cAAc,GAA4B,KAAmB;AACpE,KAAI,OAAO,EAAE,SAAS,YAAY,CAAC,OAAO,SAAS,EAAE,KAAK,CACxD,OAAM,IAAI,MAAM,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,IAAI,0BAA0B;;AAIvE,SAAS,WACP,GACkD;AAClD,QAAO,EAAE,SAAS,cAAc,EAAE,SAAS,aAAa,EAAE,SAAS;;AAGrE,SAAgB,aACd,QACoD;AACpD,MAAK,IAAI,IAAI,OAAO,SAAS,GAAG,KAAK,GAAG,IACtC,KAAI,WAAW,OAAO,GAAG,CACvB,QAAO,OAAO;AAElB,QAAO;;;AAIT,IAAa,eAAb,MAA0B;CACxB,SAAiB;CACjB,YACE,SACA,cACA;AAFQ,OAAA,UAAA;AACA,OAAA,eAAA;;CAGV,KAAY,OAA8B;AACxC,OAAK,UAAU,OAAO,UAAU,WAAW,QAAQ,MAAM,SAAS,QAAQ;EAC1E,IAAI;AACJ,UAAQ,KAAK,KAAK,OAAO,QAAQ,KAAK,MAAM,IAAI;GAC9C,MAAM,OAAO,KAAK,OAAO,MAAM,GAAG,GAAG;AACrC,QAAK,SAAS,KAAK,OAAO,MAAM,KAAK,EAAE;AACvC,OAAI,CAAC,KAAK,MAAM,CAAE;AAClB,OAAI;AACF,SAAK,QAAQ,cAAc,KAAK,MAAM,KAAK,CAAC,CAAC;YACtC,KAAK;AACZ,SAAK,eAAe,MAAM,IAAI;;;;;CAMpC,MAAmB;EACjB,MAAM,OAAO,KAAK,OAAO,MAAM;AAC/B,OAAK,SAAS;AACd,MAAI,CAAC,KAAM;AACX,MAAI;AACF,QAAK,QAAQ,cAAc,KAAK,MAAM,KAAK,CAAC,CAAC;WACtC,KAAK;AACZ,QAAK,eAAe,MAAM,IAAI"}