@percepta/kaizen 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/README.md +54 -126
  2. package/agent/claude-command.md +23 -0
  3. package/agent/evals.md +41 -0
  4. package/agent/overview.md +53 -0
  5. package/agent/variant-builder.md +22 -0
  6. package/agent/views.md +51 -0
  7. package/dashboard/.next/standalone/package.json +1 -1
  8. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/BUILD_ID +1 -1
  9. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/build-manifest.json +22 -22
  10. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/prerender-manifest.json +3 -3
  11. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/routes-manifest.json +36 -10
  12. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/169.js +1 -0
  13. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/588.js +8 -0
  14. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/middleware-build-manifest.js +1 -1
  15. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/404.html +1 -1
  16. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/500.html +1 -1
  17. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/benchmarks.html +1 -1
  18. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/benchmarks.js.nft.json +1 -1
  19. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data/[[...path]].html +1 -0
  20. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data/[[...path]].js.nft.json +1 -0
  21. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/eval.html +1 -1
  22. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/eval.js.nft.json +1 -1
  23. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments/[[...path]].html +1 -0
  24. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments/[[...path]].js.nft.json +1 -0
  25. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/ideas.html +1 -1
  26. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/ideas.js.nft.json +1 -1
  27. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-action.js +1 -0
  28. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-action.js.nft.json +1 -0
  29. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-item.js +1 -1
  30. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-item.js.nft.json +1 -1
  31. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-mutation.js +1 -0
  32. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-mutation.js.nft.json +1 -0
  33. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset.js +1 -1
  34. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset.js.nft.json +1 -1
  35. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-datasets.js +1 -1
  36. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-datasets.js.nft.json +1 -1
  37. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace-memberships.js +1 -0
  38. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace-memberships.js.nft.json +1 -0
  39. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace.js +1 -1
  40. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace.js.nft.json +1 -1
  41. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-traces.js +1 -0
  42. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-traces.js.nft.json +1 -0
  43. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/linear-ideas.js +2 -2
  44. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/linear-ideas.js.nft.json +1 -1
  45. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-events.js +1 -1
  46. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-events.js.nft.json +1 -1
  47. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-failures.js +1 -1
  48. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-failures.js.nft.json +1 -1
  49. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-traces.js +1 -1
  50. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-traces.js.nft.json +1 -1
  51. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/runs.js +2 -2
  52. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/runs.js.nft.json +1 -1
  53. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/systems.js +2 -2
  54. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/systems.js.nft.json +1 -1
  55. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/trace-renderer.js +1 -1
  56. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/trace-renderer.js.nft.json +1 -1
  57. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/index.html +1 -1
  58. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/index.js.nft.json +1 -1
  59. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages-manifest.json +10 -6
  60. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/9JQIPpJv6qWldYoYMHZAl/_buildManifest.js +1 -0
  61. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/53-795fe9d662eaacfe.js +8 -0
  62. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{benchmarks-559dc9df52db3af4.js → benchmarks-bc38d751890170d0.js} +1 -1
  63. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/data/[[...path]]-8afe5a733bdde0f4.js +1 -0
  64. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{eval-3c911ea8744631fd.js → eval-ab900515b5b18b4d.js} +1 -1
  65. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/experiments/[[...path]]-7198800378ce98dc.js +1 -0
  66. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{ideas-6829a271003150a9.js → ideas-d8fd592d7cd21bb9.js} +1 -1
  67. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/{index-1d8b6719f49e4ae0.js → index-842f5332939fc510.js} +1 -1
  68. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/css/d97fcd1d34ebab98.css +1 -0
  69. package/dashboard/.next/standalone/packages/kaizen/package.json +8 -3
  70. package/dashboard/.next/standalone/packages/kaizen/shared/workspace-paths.js +84 -0
  71. package/dist/commands/create-view.js +58 -0
  72. package/dist/commands/create-view.js.map +1 -0
  73. package/dist/commands/guide.js +66 -0
  74. package/dist/commands/guide.js.map +1 -0
  75. package/dist/commands/ideas.js +4 -8
  76. package/dist/commands/ideas.js.map +1 -1
  77. package/dist/commands/init-system.js +22 -20
  78. package/dist/commands/init-system.js.map +1 -1
  79. package/dist/commands/init.js +28 -64
  80. package/dist/commands/init.js.map +1 -1
  81. package/dist/commands/log.js +5 -11
  82. package/dist/commands/log.js.map +1 -1
  83. package/dist/commands/rebuild.js +7 -9
  84. package/dist/commands/rebuild.js.map +1 -1
  85. package/dist/commands/run.js +5 -9
  86. package/dist/commands/run.js.map +1 -1
  87. package/dist/commands/studio.js +3 -3
  88. package/dist/commands/studio.js.map +1 -1
  89. package/dist/index.js +17 -21
  90. package/dist/index.js.map +1 -1
  91. package/dist/lib/cli.js +20 -0
  92. package/dist/lib/cli.js.map +1 -0
  93. package/dist/lib/events.js.map +1 -1
  94. package/dist/lib/fs-utils.js +3 -27
  95. package/dist/lib/fs-utils.js.map +1 -1
  96. package/dist/lib/leaderboard.js +1 -1
  97. package/dist/lib/leaderboard.js.map +1 -1
  98. package/dist/lib/paths.js +3 -3
  99. package/dist/lib/paths.js.map +1 -1
  100. package/dist/lib/promotion.js.map +1 -1
  101. package/dist/lib/run-dir.js +1 -1
  102. package/dist/lib/run-dir.js.map +1 -1
  103. package/dist/lib/runner.js +6 -5
  104. package/dist/lib/runner.js.map +1 -1
  105. package/dist/lib/system.js +4 -2
  106. package/dist/lib/system.js.map +1 -1
  107. package/dist/package.js +6 -3
  108. package/dist/shared/view-types.d.ts +67 -0
  109. package/dist/shared/view-types.d.ts.map +1 -0
  110. package/dist/shared/workspace-paths.js +84 -0
  111. package/dist/shared/workspace-paths.js.map +1 -0
  112. package/dist/types.d.ts +3 -30
  113. package/dist/types.d.ts.map +1 -1
  114. package/package.json +8 -3
  115. package/shared/view-types.d.ts +69 -0
  116. package/shared/view-types.js +1 -0
  117. package/shared/workspace-paths.d.ts +19 -0
  118. package/shared/workspace-paths.js +84 -0
  119. package/templates/system/eval.py +13 -6
  120. package/templates/system/eval.ts +11 -5
  121. package/templates/system/rubric.md +1 -1
  122. package/templates/system/system.md +6 -5
  123. package/templates/view/dataset-item.tsx +63 -0
  124. package/templates/view/trace.tsx +10 -0
  125. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/715.js +0 -6
  126. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data.html +0 -1
  127. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data.js.nft.json +0 -1
  128. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments.html +0 -1
  129. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments.js.nft.json +0 -1
  130. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/YpQ-I4VL-aEdQrM5uN7_3/_buildManifest.js +0 -1
  131. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/673-ed4be46027ae7a37.js +0 -6
  132. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/data-644e4280b4c86fe0.js +0 -1
  133. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/experiments-42f31600c2bb47ad.js +0 -1
  134. package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/css/b18a6732b96168e1.css +0 -1
  135. package/dist/lib/env.js +0 -2
  136. package/dist/shared/env.js +0 -4
  137. package/templates/workspace/.claude/agents/variant-builder.md +0 -51
  138. package/templates/workspace/.claude/commands/kaizen.md +0 -65
  139. /package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/{YpQ-I4VL-aEdQrM5uN7_3 → 9JQIPpJv6qWldYoYMHZAl}/_ssgManifest.js +0 -0
@@ -1 +1 @@
1
- {"version":3,"file":"runner.js","names":[],"sources":["../../src/lib/runner.ts"],"sourcesContent":["import { type ChildProcess, execSync, spawn } from \"node:child_process\";\nimport { createWriteStream, existsSync, writeFileSync } from \"node:fs\";\nimport { createRequire } from \"node:module\";\nimport { arch, hostname, platform } from \"node:os\";\nimport { extname, join } from \"node:path\";\nimport type { Readable } from \"node:stream\";\nimport type { LinearIssueLink } from \"../../shared/linear-issue.js\";\nimport {\n type CompleteEvent,\n type CrashedEvent,\n type Event,\n type ItemEvent,\n NdjsonReader,\n type PromotionEvent,\n} from \"./events.js\";\nimport {\n type ManifestFile,\n type StateFile,\n currentBaseline,\n listRuns,\n} from \"./leaderboard.js\";\nimport { decidePromotion } from \"./promotion.js\";\nimport {\n appendNdjsonLine,\n clearLock,\n ensureDir,\n generateRunId,\n hypothesesPath,\n isPidAlive,\n systemRunsDir,\n readJsonIfExists,\n readLock,\n runDir,\n writeJsonAtomic,\n writeLock,\n} from \"./run-dir.js\";\nimport { type SystemDef, loadSystem, resolveEvalPath } from \"./system.js\";\n\nexport interface RunOptions {\n workspace: string;\n stateDir?: string;\n systemId: string;\n variant: string;\n parent: string | null;\n hypothesis: string;\n diagnostic: boolean;\n noAutoPromote: boolean;\n maxItems: number | null;\n kaizenVersion: string;\n linearIssue: LinearIssueLink | null;\n}\n\nexport interface RunResult {\n runId: string;\n status: \"complete\" | \"crashed\" | \"aborted\";\n score: number | null;\n promoted: boolean | null;\n exitCode: number;\n}\n\nconst FAILURE_K = 10;\nconst require = createRequire(import.meta.url);\n\nexport async function runExperiment(opts: RunOptions): Promise<RunResult> {\n const stateDir = opts.stateDir ?? join(opts.workspace, \".kaizen\");\n\n // 0. Reap stale runs across this system (lock present, PID dead, no terminal event).\n reapStaleRuns(stateDir, opts.systemId);\n\n // 1. Load system + resolve eval interpreter.\n const system = loadSystem(opts.workspace, opts.systemId);\n const evalAbs = resolveEvalPath(opts.workspace, system);\n if (!existsSync(evalAbs)) {\n throw new Error(\n `eval script not found at ${evalAbs} (declared as run_eval: ${system.frontmatter.run_eval} in systems/${opts.systemId}.md)`,\n );\n }\n const interp = detectInterpreter(evalAbs);\n\n // 2. Generate run_id, create dir, write manifest, take lock.\n const runId = generateRunId();\n const dir = runDir(stateDir, opts.systemId, runId);\n ensureDir(dir);\n const startedAt = new Date().toISOString();\n const manifest: ManifestFile = {\n run_id: runId,\n system: opts.systemId,\n variant: opts.variant,\n parent_id: opts.parent,\n hypothesis: opts.hypothesis,\n git_sha: gitSha(opts.workspace),\n git_branch: gitBranch(opts.workspace),\n worktree_root: gitTopLevel(opts.workspace),\n git_common_dir: gitCommonDir(opts.workspace),\n eval_version: system.frontmatter.eval_version,\n dataset_version: system.frontmatter.dataset_version,\n started_at: startedAt,\n host: `${platform()}-${arch()} (${hostname()})`,\n kaizen_version: opts.kaizenVersion,\n state_dir: stateDir,\n diagnostic: opts.diagnostic,\n linear_issue_id: opts.linearIssue?.id ?? null,\n linear_issue_url: opts.linearIssue?.url ?? null,\n };\n writeJsonAtomic(join(dir, \"manifest.json\"), manifest);\n writeLock(dir);\n\n const eventsPath = join(dir, \"events.jsonl\");\n const stdoutPath = join(dir, \"stdout.log\");\n const stderrPath = join(dir, \"stderr.log\");\n\n // 3. Build initial state and write it.\n const state: StateFile & {\n system: string;\n variant: string;\n eval_version: number;\n dataset_version: string;\n started_at: string;\n updated_at: string;\n n_done: number;\n } = {\n run_id: runId,\n system: opts.systemId,\n variant: opts.variant,\n status: \"running\",\n score: null,\n n_total: null,\n n_done: 0,\n promoted: null,\n started_at: startedAt,\n updated_at: startedAt,\n ended_at: null,\n eval_version: system.frontmatter.eval_version,\n dataset_version: system.frontmatter.dataset_version,\n };\n writeJsonAtomic(join(dir, \"state.json\"), state);\n\n // 4. Spawn the eval with fd 3 piped for events.\n // For shebang scripts (cmd === evalAbs) we omit evalAbs from args, since\n // spawn already passes cmd as argv[0]. Including it would duplicate the\n // path as a positional and break argparse-based eval scripts.\n const evalArgs = [\n ...interp.preArgs,\n ...(interp.cmd === evalAbs ? [] : [evalAbs]),\n \"--variant\",\n opts.variant,\n \"--dataset\",\n system.frontmatter.dataset_version,\n \"--out-fd\",\n \"3\",\n ];\n const effectiveMax = opts.diagnostic ? 5 : opts.maxItems;\n if (effectiveMax !== null && effectiveMax !== undefined) {\n evalArgs.push(\"--max-items\", String(effectiveMax));\n }\n\n const stdoutLog = createWriteStream(stdoutPath);\n const stderrLog = createWriteStream(stderrPath);\n let stderrTail = \"\";\n\n const child: ChildProcess = spawn(interp.cmd, evalArgs, {\n cwd: opts.workspace,\n stdio: [\"ignore\", \"pipe\", \"pipe\", \"pipe\"],\n env: process.env,\n });\n\n // Tee stdout/stderr to log files. Capture last ~4KB of stderr for crash diagnostics.\n child.stdout!.on(\"data\", (chunk: Buffer) => stdoutLog.write(chunk));\n child.stderr!.on(\"data\", (chunk: Buffer) => {\n stderrLog.write(chunk);\n stderrTail = (stderrTail + chunk.toString(\"utf-8\")).slice(-4096);\n });\n\n // Event stream on fd 3. spawn(['ignore','pipe','pipe','pipe']) makes child.stdio[3] a Readable.\n const eventChan = child.stdio[3] as Readable;\n const itemEvents: ItemEvent[] = [];\n let sawComplete: CompleteEvent | null = null;\n let parseErrors = 0;\n let contractErrors = 0;\n\n const reader = new NdjsonReader(\n (e) => onEvent(e),\n (line, err) => {\n parseErrors++;\n stderrLog.write(\n `[runner] could not parse event: ${line.slice(0, 200)}\\n`,\n );\n stderrLog.write(\n `[runner] ${err instanceof Error ? err.message : String(err)}\\n`,\n );\n },\n );\n eventChan.setEncoding?.(\"utf-8\");\n eventChan.on(\"data\", (chunk) => reader.push(chunk as string | Buffer));\n // The event channel may close after the child's `exit` fires (Node docs\n // explicitly note stdio streams can outlive `exit`). We must drain it\n // before deciding terminal state — otherwise a successful run that\n // emitted a `complete` event still in the pipe buffer gets misclassified\n // as \"crashed\" because `sawComplete` is observed null.\n const eventChanEnded = new Promise<void>((resolve) => {\n eventChan.on(\"end\", () => {\n reader.end();\n resolve();\n });\n eventChan.on(\"close\", () => {\n reader.end();\n resolve();\n });\n });\n\n function onEvent(e: Event): void {\n appendNdjsonLine(eventsPath, e);\n if (e.type === \"start\") {\n const s = e;\n if (s.eval_version !== system.frontmatter.eval_version) {\n contractErrors++;\n stderrLog.write(\n `[runner] start.eval_version ${s.eval_version} does not match system eval_version ${system.frontmatter.eval_version}\\n`,\n );\n }\n if (s.dataset_version !== system.frontmatter.dataset_version) {\n contractErrors++;\n stderrLog.write(\n `[runner] start.dataset_version ${s.dataset_version} does not match system dataset_version ${system.frontmatter.dataset_version}\\n`,\n );\n }\n state.n_total = s.n;\n state.updated_at = new Date().toISOString();\n writeJsonAtomic(join(dir, \"state.json\"), state);\n } else if (e.type === \"item\") {\n const it = e;\n itemEvents.push(it);\n state.n_done = (state.n_done ?? 0) + 1;\n state.updated_at = new Date().toISOString();\n writeJsonAtomic(join(dir, \"state.json\"), state);\n } else if (e.type === \"progress\") {\n // n_done is authoritative from item events; progress is informational.\n state.updated_at = new Date().toISOString();\n writeJsonAtomic(join(dir, \"state.json\"), state);\n } else if (e.type === \"complete\") {\n sawComplete = e;\n if (state.n_total !== null && sawComplete.n !== state.n_total) {\n contractErrors++;\n stderrLog.write(\n `[runner] complete.n ${sawComplete.n} does not match start.n ${state.n_total}\\n`,\n );\n }\n state.score = sawComplete.score;\n state.updated_at = new Date().toISOString();\n writeJsonAtomic(join(dir, \"state.json\"), state);\n }\n // error/promotion/etc. — no state mutation here\n }\n\n // 5. Wait for child to exit AND for the event channel to be fully drained.\n // Both must complete before we decide terminal state — the child's exit\n // doesn't guarantee its stdio buffers have been consumed by us yet.\n const { code, signal } = await waitForExit(child);\n await eventChanEnded;\n\n // Flush logs.\n stdoutLog.end();\n stderrLog.end();\n await Promise.all([\n new Promise<void>((r) => stdoutLog.on(\"close\", () => r())),\n new Promise<void>((r) => stderrLog.on(\"close\", () => r())),\n ]);\n\n // 6. Decide terminal state.\n const endedAt = new Date().toISOString();\n if (\n code === 0 &&\n sawComplete !== null &&\n parseErrors === 0 &&\n contractErrors === 0\n ) {\n state.status = \"complete\";\n state.score = (sawComplete as CompleteEvent).score;\n // The complete event is already in events.jsonl from onEvent.\n } else {\n const crashed: CrashedEvent = {\n type: \"crashed\",\n exit_code: code,\n signal: signal ?? null,\n stderr_tail: stderrTail.slice(-2000),\n reason:\n code !== 0 && code !== null\n ? `eval exited with code ${code}`\n : signal\n ? `eval terminated by signal ${signal}`\n : parseErrors > 0\n ? `eval emitted ${parseErrors} malformed event(s)`\n : contractErrors > 0\n ? `eval emitted ${contractErrors} contract-violating event(s)`\n : sawComplete !== null\n ? \"eval emitted complete but exited non-zero\"\n : \"eval exited without emitting complete event\",\n ts: Date.now() / 1000,\n };\n appendNdjsonLine(eventsPath, crashed);\n state.status = \"crashed\";\n }\n state.ended_at = endedAt;\n state.updated_at = endedAt;\n\n // 7. Write failures.jsonl (worst-K).\n if (itemEvents.length > 0) {\n const sorted = [...itemEvents].sort((a, b) => a.score - b.score);\n const worst = sorted.slice(0, Math.min(FAILURE_K, sorted.length));\n const text =\n worst.map((it) => JSON.stringify(it)).join(\"\\n\") +\n (worst.length > 0 ? \"\\n\" : \"\");\n writeFileSync(join(dir, \"failures.jsonl\"), text);\n }\n\n // 8. Promotion (only if completed cleanly).\n let promotionEvent: PromotionEvent | null = null;\n if (state.status === \"complete\" && !opts.noAutoPromote) {\n const allRuns = listRuns(stateDir, opts.systemId).filter(\n (r) => r.run_id !== runId,\n );\n const baselineRun = currentBaseline(\n allRuns,\n system.frontmatter.eval_version,\n system.frontmatter.dataset_version,\n );\n const baseline = baselineRun\n ? {\n run_id: baselineRun.run_id,\n variant: baselineRun.variant,\n score: baselineRun.score!,\n events_path: baselineRun.events_path,\n }\n : null;\n const decision = decidePromotion(\n mapFromItems(itemEvents),\n itemEvents,\n baseline,\n { subgroupKeys: subgroupKeys(system) },\n );\n promotionEvent = { type: \"promotion\", ts: Date.now() / 1000, ...decision };\n appendNdjsonLine(eventsPath, promotionEvent);\n state.promoted = decision.promoted;\n } else if (opts.noAutoPromote) {\n state.promoted = false;\n }\n writeJsonAtomic(join(dir, \"state.json\"), state);\n\n // 9. Append hypothesis log line (every run, success or failure).\n const hypoLine = {\n run_id: runId,\n system: opts.systemId,\n variant: opts.variant,\n parent_id: opts.parent,\n hypothesis: opts.hypothesis,\n status: state.status,\n score: state.score,\n promoted: state.promoted,\n started_at: startedAt,\n ended_at: endedAt,\n eval_version: system.frontmatter.eval_version,\n dataset_version: system.frontmatter.dataset_version,\n };\n appendNdjsonLine(hypothesesPath(stateDir, opts.systemId), hypoLine);\n\n // 10. Clear lock.\n clearLock(dir);\n\n void parseErrors; // not surfaced yet; logged to stderr.log\n\n return {\n runId,\n status: state.status as \"complete\" | \"crashed\" | \"aborted\",\n score: state.score,\n promoted: state.promoted ?? null,\n exitCode: state.status === \"complete\" ? 0 : 1,\n };\n}\n\n// --- helpers ---\n\ninterface InterpreterChoice {\n cmd: string;\n preArgs: string[];\n}\n\nfunction detectInterpreter(evalPath: string): InterpreterChoice {\n const ext = extname(evalPath).toLowerCase();\n if (ext === \".py\") return { cmd: \"python3\", preArgs: [] };\n if (ext === \".js\" || ext === \".mjs\" || ext === \".cjs\")\n return { cmd: \"node\", preArgs: [] };\n if (ext === \".ts\" || ext === \".tsx\" || ext === \".mts\" || ext === \".cts\") {\n return {\n cmd: process.execPath,\n preArgs: [\"--import\", require.resolve(\"tsx\")],\n };\n }\n // Default: assume executable with shebang.\n return { cmd: evalPath, preArgs: [] };\n}\n\nfunction waitForExit(\n child: ChildProcess,\n): Promise<{ code: number | null; signal: NodeJS.Signals | null }> {\n return new Promise((resolve) => {\n child.on(\"exit\", (code: number | null, signal: NodeJS.Signals | null) =>\n resolve({ code, signal }),\n );\n });\n}\n\nfunction gitSha(cwd: string): string | null {\n try {\n return (\n execSync(\"git rev-parse HEAD\", {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n })\n .toString()\n .trim() || null\n );\n } catch {\n return null;\n }\n}\n\nfunction gitBranch(cwd: string): string | null {\n try {\n return (\n execSync(\"git rev-parse --abbrev-ref HEAD\", {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n })\n .toString()\n .trim() || null\n );\n } catch {\n return null;\n }\n}\n\nfunction gitTopLevel(cwd: string): string | null {\n try {\n return (\n execSync(\"git rev-parse --show-toplevel\", {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n })\n .toString()\n .trim() || null\n );\n } catch {\n return null;\n }\n}\n\nfunction gitCommonDir(cwd: string): string | null {\n try {\n return (\n execSync(\"git rev-parse --git-common-dir\", {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n })\n .toString()\n .trim() || null\n );\n } catch {\n return null;\n }\n}\n\nfunction mapFromItems(items: ItemEvent[]): Map<string, number> {\n const m = new Map<string, number>();\n for (const it of items) m.set(it.id, it.score);\n return m;\n}\n\nfunction subgroupKeys(system: SystemDef): string[] {\n const sg = (\n system.frontmatter as {\n subgroups?: Array<{ field?: string; name?: string }>;\n }\n ).subgroups;\n if (!Array.isArray(sg)) return [];\n return sg.map((s) => s.field ?? s.name ?? \"\").filter(Boolean);\n}\n\n/**\n * Sweep stale runs whose lock points to a dead PID and which never recorded a terminal event.\n * Mark them `crashed` with reason \"supervisor died\" so the dashboard / log don't show ghosts.\n */\nexport function reapStaleRuns(stateDir: string, systemId: string): void {\n const dir = systemRunsDir(stateDir, systemId);\n if (!existsSync(dir)) return;\n for (const run of listRuns(stateDir, systemId)) {\n if (run.status !== \"running\") continue;\n const lock = readLock(join(dir, run.run_id));\n if (!lock) continue;\n if (isPidAlive(lock.pid)) continue;\n const reapedEvent: CrashedEvent = {\n type: \"crashed\",\n exit_code: null,\n signal: null,\n reason: `runner pid ${lock.pid} died without writing a terminal event; reaped`,\n ts: Date.now() / 1000,\n };\n appendNdjsonLine(run.events_path, reapedEvent);\n\n const state = readJsonIfExists<StateFile>(run.state_path);\n if (state) {\n state.status = \"crashed\";\n state.ended_at = new Date().toISOString();\n writeJsonAtomic(run.state_path, state);\n }\n clearLock(join(dir, run.run_id));\n }\n}\n"],"mappings":";;;;;;;;;;;AA4DA,MAAM,YAAY;AAClB,MAAM,UAAU,cAAc,OAAO,KAAK,IAAI;AAE9C,eAAsB,cAAc,MAAsC;CACxE,MAAM,WAAW,KAAK,YAAY,KAAK,KAAK,WAAW,UAAU;AAGjE,eAAc,UAAU,KAAK,SAAS;CAGtC,MAAM,SAAS,WAAW,KAAK,WAAW,KAAK,SAAS;CACxD,MAAM,UAAU,gBAAgB,KAAK,WAAW,OAAO;AACvD,KAAI,CAAC,WAAW,QAAQ,CACtB,OAAM,IAAI,MACR,4BAA4B,QAAQ,0BAA0B,OAAO,YAAY,SAAS,cAAc,KAAK,SAAS,MACvH;CAEH,MAAM,SAAS,kBAAkB,QAAQ;CAGzC,MAAM,QAAQ,eAAe;CAC7B,MAAM,MAAM,OAAO,UAAU,KAAK,UAAU,MAAM;AAClD,WAAU,IAAI;CACd,MAAM,6BAAY,IAAI,MAAM,EAAC,aAAa;CAC1C,MAAM,WAAyB;EAC7B,QAAQ;EACR,QAAQ,KAAK;EACb,SAAS,KAAK;EACd,WAAW,KAAK;EAChB,YAAY,KAAK;EACjB,SAAS,OAAO,KAAK,UAAU;EAC/B,YAAY,UAAU,KAAK,UAAU;EACrC,eAAe,YAAY,KAAK,UAAU;EAC1C,gBAAgB,aAAa,KAAK,UAAU;EAC5C,cAAc,OAAO,YAAY;EACjC,iBAAiB,OAAO,YAAY;EACpC,YAAY;EACZ,MAAM,GAAG,UAAU,CAAC,GAAG,MAAM,CAAC,IAAI,UAAU,CAAC;EAC7C,gBAAgB,KAAK;EACrB,WAAW;EACX,YAAY,KAAK;EACjB,iBAAiB,KAAK,aAAa,MAAM;EACzC,kBAAkB,KAAK,aAAa,OAAO;EAC5C;AACD,iBAAgB,KAAK,KAAK,gBAAgB,EAAE,SAAS;AACrD,WAAU,IAAI;CAEd,MAAM,aAAa,KAAK,KAAK,eAAe;CAC5C,MAAM,aAAa,KAAK,KAAK,aAAa;CAC1C,MAAM,aAAa,KAAK,KAAK,aAAa;CAG1C,MAAM,QAQF;EACF,QAAQ;EACR,QAAQ,KAAK;EACb,SAAS,KAAK;EACd,QAAQ;EACR,OAAO;EACP,SAAS;EACT,QAAQ;EACR,UAAU;EACV,YAAY;EACZ,YAAY;EACZ,UAAU;EACV,cAAc,OAAO,YAAY;EACjC,iBAAiB,OAAO,YAAY;EACrC;AACD,iBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;CAM/C,MAAM,WAAW;EACf,GAAG,OAAO;EACV,GAAI,OAAO,QAAQ,UAAU,EAAE,GAAG,CAAC,QAAQ;EAC3C;EACA,KAAK;EACL;EACA,OAAO,YAAY;EACnB;EACA;EACD;CACD,MAAM,eAAe,KAAK,aAAa,IAAI,KAAK;AAChD,KAAI,iBAAiB,QAAQ,iBAAiB,KAAA,EAC5C,UAAS,KAAK,eAAe,OAAO,aAAa,CAAC;CAGpD,MAAM,YAAY,kBAAkB,WAAW;CAC/C,MAAM,YAAY,kBAAkB,WAAW;CAC/C,IAAI,aAAa;CAEjB,MAAM,QAAsB,MAAM,OAAO,KAAK,UAAU;EACtD,KAAK,KAAK;EACV,OAAO;GAAC;GAAU;GAAQ;GAAQ;GAAO;EACzC,KAAK,QAAQ;EACd,CAAC;AAGF,OAAM,OAAQ,GAAG,SAAS,UAAkB,UAAU,MAAM,MAAM,CAAC;AACnE,OAAM,OAAQ,GAAG,SAAS,UAAkB;AAC1C,YAAU,MAAM,MAAM;AACtB,gBAAc,aAAa,MAAM,SAAS,QAAQ,EAAE,MAAM,MAAM;GAChE;CAGF,MAAM,YAAY,MAAM,MAAM;CAC9B,MAAM,aAA0B,EAAE;CAClC,IAAI,cAAoC;CACxC,IAAI,cAAc;CAClB,IAAI,iBAAiB;CAErB,MAAM,SAAS,IAAI,cAChB,MAAM,QAAQ,EAAE,GAChB,MAAM,QAAQ;AACb;AACA,YAAU,MACR,mCAAmC,KAAK,MAAM,GAAG,IAAI,CAAC,IACvD;AACD,YAAU,MACR,YAAY,eAAe,QAAQ,IAAI,UAAU,OAAO,IAAI,CAAC,IAC9D;GAEJ;AACD,WAAU,cAAc,QAAQ;AAChC,WAAU,GAAG,SAAS,UAAU,OAAO,KAAK,MAAyB,CAAC;CAMtE,MAAM,iBAAiB,IAAI,SAAe,YAAY;AACpD,YAAU,GAAG,aAAa;AACxB,UAAO,KAAK;AACZ,YAAS;IACT;AACF,YAAU,GAAG,eAAe;AAC1B,UAAO,KAAK;AACZ,YAAS;IACT;GACF;CAEF,SAAS,QAAQ,GAAgB;AAC/B,mBAAiB,YAAY,EAAE;AAC/B,MAAI,EAAE,SAAS,SAAS;GACtB,MAAM,IAAI;AACV,OAAI,EAAE,iBAAiB,OAAO,YAAY,cAAc;AACtD;AACA,cAAU,MACR,+BAA+B,EAAE,aAAa,sCAAsC,OAAO,YAAY,aAAa,IACrH;;AAEH,OAAI,EAAE,oBAAoB,OAAO,YAAY,iBAAiB;AAC5D;AACA,cAAU,MACR,kCAAkC,EAAE,gBAAgB,yCAAyC,OAAO,YAAY,gBAAgB,IACjI;;AAEH,SAAM,UAAU,EAAE;AAClB,SAAM,8BAAa,IAAI,MAAM,EAAC,aAAa;AAC3C,mBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;aACtC,EAAE,SAAS,QAAQ;GAC5B,MAAM,KAAK;AACX,cAAW,KAAK,GAAG;AACnB,SAAM,UAAU,MAAM,UAAU,KAAK;AACrC,SAAM,8BAAa,IAAI,MAAM,EAAC,aAAa;AAC3C,mBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;aACtC,EAAE,SAAS,YAAY;AAEhC,SAAM,8BAAa,IAAI,MAAM,EAAC,aAAa;AAC3C,mBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;aACtC,EAAE,SAAS,YAAY;AAChC,iBAAc;AACd,OAAI,MAAM,YAAY,QAAQ,YAAY,MAAM,MAAM,SAAS;AAC7D;AACA,cAAU,MACR,uBAAuB,YAAY,EAAE,0BAA0B,MAAM,QAAQ,IAC9E;;AAEH,SAAM,QAAQ,YAAY;AAC1B,SAAM,8BAAa,IAAI,MAAM,EAAC,aAAa;AAC3C,mBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;;;CAQnD,MAAM,EAAE,MAAM,WAAW,MAAM,YAAY,MAAM;AACjD,OAAM;AAGN,WAAU,KAAK;AACf,WAAU,KAAK;AACf,OAAM,QAAQ,IAAI,CAChB,IAAI,SAAe,MAAM,UAAU,GAAG,eAAe,GAAG,CAAC,CAAC,EAC1D,IAAI,SAAe,MAAM,UAAU,GAAG,eAAe,GAAG,CAAC,CAAC,CAC3D,CAAC;CAGF,MAAM,2BAAU,IAAI,MAAM,EAAC,aAAa;AACxC,KACE,SAAS,KACT,gBAAgB,QAChB,gBAAgB,KAChB,mBAAmB,GACnB;AACA,QAAM,SAAS;AACf,QAAM,QAAS,YAA8B;QAExC;AAoBL,mBAAiB,YAAY;GAlB3B,MAAM;GACN,WAAW;GACX,QAAQ,UAAU;GAClB,aAAa,WAAW,MAAM,KAAM;GACpC,QACE,SAAS,KAAK,SAAS,OACnB,yBAAyB,SACzB,SACE,6BAA6B,WAC7B,cAAc,IACZ,gBAAgB,YAAY,uBAC5B,iBAAiB,IACf,gBAAgB,eAAe,gCAC/B,gBAAgB,OACd,8CACA;GACd,IAAI,KAAK,KAAK,GAAG;GAEiB,CAAC;AACrC,QAAM,SAAS;;AAEjB,OAAM,WAAW;AACjB,OAAM,aAAa;AAGnB,KAAI,WAAW,SAAS,GAAG;EACzB,MAAM,SAAS,CAAC,GAAG,WAAW,CAAC,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;EAChE,MAAM,QAAQ,OAAO,MAAM,GAAG,KAAK,IAAI,WAAW,OAAO,OAAO,CAAC;EACjE,MAAM,OACJ,MAAM,KAAK,OAAO,KAAK,UAAU,GAAG,CAAC,CAAC,KAAK,KAAK,IAC/C,MAAM,SAAS,IAAI,OAAO;AAC7B,gBAAc,KAAK,KAAK,iBAAiB,EAAE,KAAK;;CAIlD,IAAI,iBAAwC;AAC5C,KAAI,MAAM,WAAW,cAAc,CAAC,KAAK,eAAe;EAItD,MAAM,cAAc,gBAHJ,SAAS,UAAU,KAAK,SAAS,CAAC,QAC/C,MAAM,EAAE,WAAW,MAGb,EACP,OAAO,YAAY,cACnB,OAAO,YAAY,gBACpB;EACD,MAAM,WAAW,cACb;GACE,QAAQ,YAAY;GACpB,SAAS,YAAY;GACrB,OAAO,YAAY;GACnB,aAAa,YAAY;GAC1B,GACD;EACJ,MAAM,WAAW,gBACf,aAAa,WAAW,EACxB,YACA,UACA,EAAE,cAAc,aAAa,OAAO,EAAE,CACvC;AACD,mBAAiB;GAAE,MAAM;GAAa,IAAI,KAAK,KAAK,GAAG;GAAM,GAAG;GAAU;AAC1E,mBAAiB,YAAY,eAAe;AAC5C,QAAM,WAAW,SAAS;YACjB,KAAK,cACd,OAAM,WAAW;AAEnB,iBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;CAG/C,MAAM,WAAW;EACf,QAAQ;EACR,QAAQ,KAAK;EACb,SAAS,KAAK;EACd,WAAW,KAAK;EAChB,YAAY,KAAK;EACjB,QAAQ,MAAM;EACd,OAAO,MAAM;EACb,UAAU,MAAM;EAChB,YAAY;EACZ,UAAU;EACV,cAAc,OAAO,YAAY;EACjC,iBAAiB,OAAO,YAAY;EACrC;AACD,kBAAiB,eAAe,UAAU,KAAK,SAAS,EAAE,SAAS;AAGnE,WAAU,IAAI;AAId,QAAO;EACL;EACA,QAAQ,MAAM;EACd,OAAO,MAAM;EACb,UAAU,MAAM,YAAY;EAC5B,UAAU,MAAM,WAAW,aAAa,IAAI;EAC7C;;AAUH,SAAS,kBAAkB,UAAqC;CAC9D,MAAM,MAAM,QAAQ,SAAS,CAAC,aAAa;AAC3C,KAAI,QAAQ,MAAO,QAAO;EAAE,KAAK;EAAW,SAAS,EAAE;EAAE;AACzD,KAAI,QAAQ,SAAS,QAAQ,UAAU,QAAQ,OAC7C,QAAO;EAAE,KAAK;EAAQ,SAAS,EAAE;EAAE;AACrC,KAAI,QAAQ,SAAS,QAAQ,UAAU,QAAQ,UAAU,QAAQ,OAC/D,QAAO;EACL,KAAK,QAAQ;EACb,SAAS,CAAC,YAAY,QAAQ,QAAQ,MAAM,CAAC;EAC9C;AAGH,QAAO;EAAE,KAAK;EAAU,SAAS,EAAE;EAAE;;AAGvC,SAAS,YACP,OACiE;AACjE,QAAO,IAAI,SAAS,YAAY;AAC9B,QAAM,GAAG,SAAS,MAAqB,WACrC,QAAQ;GAAE;GAAM;GAAQ,CAAC,CAC1B;GACD;;AAGJ,SAAS,OAAO,KAA4B;AAC1C,KAAI;AACF,SACE,SAAS,sBAAsB;GAC7B;GACA,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CAAC,CACC,UAAU,CACV,MAAM,IAAI;SAET;AACN,SAAO;;;AAIX,SAAS,UAAU,KAA4B;AAC7C,KAAI;AACF,SACE,SAAS,mCAAmC;GAC1C;GACA,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CAAC,CACC,UAAU,CACV,MAAM,IAAI;SAET;AACN,SAAO;;;AAIX,SAAS,YAAY,KAA4B;AAC/C,KAAI;AACF,SACE,SAAS,iCAAiC;GACxC;GACA,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CAAC,CACC,UAAU,CACV,MAAM,IAAI;SAET;AACN,SAAO;;;AAIX,SAAS,aAAa,KAA4B;AAChD,KAAI;AACF,SACE,SAAS,kCAAkC;GACzC;GACA,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CAAC,CACC,UAAU,CACV,MAAM,IAAI;SAET;AACN,SAAO;;;AAIX,SAAS,aAAa,OAAyC;CAC7D,MAAM,oBAAI,IAAI,KAAqB;AACnC,MAAK,MAAM,MAAM,MAAO,GAAE,IAAI,GAAG,IAAI,GAAG,MAAM;AAC9C,QAAO;;AAGT,SAAS,aAAa,QAA6B;CACjD,MAAM,KACJ,OAAO,YAGP;AACF,KAAI,CAAC,MAAM,QAAQ,GAAG,CAAE,QAAO,EAAE;AACjC,QAAO,GAAG,KAAK,MAAM,EAAE,SAAS,EAAE,QAAQ,GAAG,CAAC,OAAO,QAAQ;;;;;;AAO/D,SAAgB,cAAc,UAAkB,UAAwB;CACtE,MAAM,MAAM,cAAc,UAAU,SAAS;AAC7C,KAAI,CAAC,WAAW,IAAI,CAAE;AACtB,MAAK,MAAM,OAAO,SAAS,UAAU,SAAS,EAAE;AAC9C,MAAI,IAAI,WAAW,UAAW;EAC9B,MAAM,OAAO,SAAS,KAAK,KAAK,IAAI,OAAO,CAAC;AAC5C,MAAI,CAAC,KAAM;AACX,MAAI,WAAW,KAAK,IAAI,CAAE;EAC1B,MAAM,cAA4B;GAChC,MAAM;GACN,WAAW;GACX,QAAQ;GACR,QAAQ,cAAc,KAAK,IAAI;GAC/B,IAAI,KAAK,KAAK,GAAG;GAClB;AACD,mBAAiB,IAAI,aAAa,YAAY;EAE9C,MAAM,QAAQ,iBAA4B,IAAI,WAAW;AACzD,MAAI,OAAO;AACT,SAAM,SAAS;AACf,SAAM,4BAAW,IAAI,MAAM,EAAC,aAAa;AACzC,mBAAgB,IAAI,YAAY,MAAM;;AAExC,YAAU,KAAK,KAAK,IAAI,OAAO,CAAC"}
1
+ {"version":3,"file":"runner.js","names":[],"sources":["../../src/lib/runner.ts"],"sourcesContent":["import { type ChildProcess, execSync, spawn } from \"node:child_process\";\nimport { createWriteStream, existsSync, writeFileSync } from \"node:fs\";\nimport { createRequire } from \"node:module\";\nimport { arch, hostname, platform } from \"node:os\";\nimport { extname, join } from \"node:path\";\nimport type { Readable } from \"node:stream\";\nimport type { LinearIssueLink } from \"../../shared/linear-issue.js\";\nimport {\n type CompleteEvent,\n type CrashedEvent,\n type Event,\n type ItemEvent,\n NdjsonReader,\n type PromotionEvent,\n} from \"./events.js\";\nimport {\n type ManifestFile,\n type StateFile,\n currentBaseline,\n listRuns,\n} from \"./leaderboard.js\";\nimport { resolveStateDir } from \"./paths.js\";\nimport { decidePromotion } from \"./promotion.js\";\nimport {\n appendNdjsonLine,\n clearLock,\n ensureDir,\n generateRunId,\n hypothesesPath,\n isPidAlive,\n systemRunsDir,\n readJsonIfExists,\n readLock,\n runDir,\n writeJsonAtomic,\n writeLock,\n} from \"./run-dir.js\";\nimport { type SystemDef, loadSystem, resolveEvalPath } from \"./system.js\";\n\nexport interface RunOptions {\n workspace: string;\n systemId: string;\n variant: string;\n parent: string | null;\n hypothesis: string;\n diagnostic: boolean;\n noAutoPromote: boolean;\n maxItems: number | null;\n kaizenVersion: string;\n linearIssue: LinearIssueLink | null;\n}\n\nexport interface RunResult {\n runId: string;\n status: \"complete\" | \"crashed\" | \"aborted\";\n score: number | null;\n promoted: boolean | null;\n exitCode: number;\n}\n\nconst FAILURE_K = 10;\nconst require = createRequire(import.meta.url);\n\nexport async function runExperiment(opts: RunOptions): Promise<RunResult> {\n const stateDir = resolveStateDir(opts.workspace);\n\n // 0. Reap stale runs across this system (lock present, PID dead, no terminal event).\n reapStaleRuns(stateDir, opts.systemId);\n\n // 1. Load system + resolve eval interpreter.\n const system = loadSystem(opts.workspace, opts.systemId);\n const evalAbs = resolveEvalPath(opts.workspace, system);\n if (!existsSync(evalAbs)) {\n throw new Error(\n `eval script not found at ${evalAbs} (declared as run_eval: ${system.frontmatter.run_eval} in kaizen/systems/${opts.systemId}/system.md)`,\n );\n }\n const interp = detectInterpreter(evalAbs);\n\n // 2. Generate run_id, create dir, write manifest, take lock.\n const runId = generateRunId();\n const dir = runDir(stateDir, opts.systemId, runId);\n ensureDir(dir);\n const startedAt = new Date().toISOString();\n const manifest: ManifestFile = {\n run_id: runId,\n system: opts.systemId,\n variant: opts.variant,\n parent_id: opts.parent,\n hypothesis: opts.hypothesis,\n git_sha: gitSha(opts.workspace),\n git_branch: gitBranch(opts.workspace),\n worktree_root: gitTopLevel(opts.workspace),\n git_common_dir: gitCommonDir(opts.workspace),\n eval_version: system.frontmatter.eval_version,\n dataset_version: system.frontmatter.dataset_version,\n started_at: startedAt,\n host: `${platform()}-${arch()} (${hostname()})`,\n kaizen_version: opts.kaizenVersion,\n state_dir: stateDir,\n diagnostic: opts.diagnostic,\n linear_issue_id: opts.linearIssue?.id ?? null,\n linear_issue_url: opts.linearIssue?.url ?? null,\n };\n writeJsonAtomic(join(dir, \"manifest.json\"), manifest);\n writeLock(dir);\n\n const eventsPath = join(dir, \"events.jsonl\");\n const stdoutPath = join(dir, \"stdout.log\");\n const stderrPath = join(dir, \"stderr.log\");\n\n // 3. Build initial state and write it.\n const state: StateFile & {\n system: string;\n variant: string;\n eval_version: number;\n dataset_version: string;\n started_at: string;\n updated_at: string;\n n_done: number;\n } = {\n run_id: runId,\n system: opts.systemId,\n variant: opts.variant,\n status: \"running\",\n score: null,\n n_total: null,\n n_done: 0,\n promoted: null,\n started_at: startedAt,\n updated_at: startedAt,\n ended_at: null,\n eval_version: system.frontmatter.eval_version,\n dataset_version: system.frontmatter.dataset_version,\n };\n writeJsonAtomic(join(dir, \"state.json\"), state);\n\n // 4. Spawn the eval with fd 3 piped for events.\n // For shebang scripts (cmd === evalAbs) we omit evalAbs from args, since\n // spawn already passes cmd as argv[0]. Including it would duplicate the\n // path as a positional and break argparse-based eval scripts.\n const evalArgs = [\n ...interp.preArgs,\n ...(interp.cmd === evalAbs ? [] : [evalAbs]),\n \"--variant\",\n opts.variant,\n \"--dataset\",\n system.frontmatter.dataset_version,\n \"--out-fd\",\n \"3\",\n ];\n const effectiveMax = opts.diagnostic ? 5 : opts.maxItems;\n if (effectiveMax !== null && effectiveMax !== undefined) {\n evalArgs.push(\"--max-items\", String(effectiveMax));\n }\n\n const stdoutLog = createWriteStream(stdoutPath);\n const stderrLog = createWriteStream(stderrPath);\n let stderrTail = \"\";\n\n const child: ChildProcess = spawn(interp.cmd, evalArgs, {\n cwd: opts.workspace,\n stdio: [\"ignore\", \"pipe\", \"pipe\", \"pipe\"],\n env: process.env,\n });\n\n // Tee stdout/stderr to log files. Capture last ~4KB of stderr for crash diagnostics.\n child.stdout!.on(\"data\", (chunk: Buffer) => stdoutLog.write(chunk));\n child.stderr!.on(\"data\", (chunk: Buffer) => {\n stderrLog.write(chunk);\n stderrTail = (stderrTail + chunk.toString(\"utf-8\")).slice(-4096);\n });\n\n // Event stream on fd 3. spawn(['ignore','pipe','pipe','pipe']) makes child.stdio[3] a Readable.\n const eventChan = child.stdio[3] as Readable;\n const itemEvents: ItemEvent[] = [];\n let sawComplete: CompleteEvent | null = null;\n let parseErrors = 0;\n let contractErrors = 0;\n\n const reader = new NdjsonReader(\n (e) => onEvent(e),\n (line, err) => {\n parseErrors++;\n stderrLog.write(\n `[runner] could not parse event: ${line.slice(0, 200)}\\n`,\n );\n stderrLog.write(\n `[runner] ${err instanceof Error ? err.message : String(err)}\\n`,\n );\n },\n );\n eventChan.setEncoding?.(\"utf-8\");\n eventChan.on(\"data\", (chunk) => reader.push(chunk as string | Buffer));\n // The event channel may close after the child's `exit` fires (Node docs\n // explicitly note stdio streams can outlive `exit`). We must drain it\n // before deciding terminal state — otherwise a successful run that\n // emitted a `complete` event still in the pipe buffer gets misclassified\n // as \"crashed\" because `sawComplete` is observed null.\n const eventChanEnded = new Promise<void>((resolve) => {\n eventChan.on(\"end\", () => {\n reader.end();\n resolve();\n });\n eventChan.on(\"close\", () => {\n reader.end();\n resolve();\n });\n });\n\n function onEvent(e: Event): void {\n appendNdjsonLine(eventsPath, e);\n if (e.type === \"start\") {\n const s = e;\n if (s.eval_version !== system.frontmatter.eval_version) {\n contractErrors++;\n stderrLog.write(\n `[runner] start.eval_version ${s.eval_version} does not match system eval_version ${system.frontmatter.eval_version}\\n`,\n );\n }\n if (s.dataset_version !== system.frontmatter.dataset_version) {\n contractErrors++;\n stderrLog.write(\n `[runner] start.dataset_version ${s.dataset_version} does not match system dataset_version ${system.frontmatter.dataset_version}\\n`,\n );\n }\n state.n_total = s.n;\n state.updated_at = new Date().toISOString();\n writeJsonAtomic(join(dir, \"state.json\"), state);\n } else if (e.type === \"item\") {\n const it = e;\n itemEvents.push(it);\n state.n_done = (state.n_done ?? 0) + 1;\n state.updated_at = new Date().toISOString();\n writeJsonAtomic(join(dir, \"state.json\"), state);\n } else if (e.type === \"progress\") {\n // n_done is authoritative from item events; progress is informational.\n state.updated_at = new Date().toISOString();\n writeJsonAtomic(join(dir, \"state.json\"), state);\n } else if (e.type === \"complete\") {\n sawComplete = e;\n if (state.n_total !== null && sawComplete.n !== state.n_total) {\n contractErrors++;\n stderrLog.write(\n `[runner] complete.n ${sawComplete.n} does not match start.n ${state.n_total}\\n`,\n );\n }\n state.score = sawComplete.score;\n state.updated_at = new Date().toISOString();\n writeJsonAtomic(join(dir, \"state.json\"), state);\n }\n // error/promotion/etc. — no state mutation here\n }\n\n // 5. Wait for child to exit AND for the event channel to be fully drained.\n // Both must complete before we decide terminal state — the child's exit\n // doesn't guarantee its stdio buffers have been consumed by us yet.\n const { code, signal } = await waitForExit(child);\n await eventChanEnded;\n\n // Flush logs.\n stdoutLog.end();\n stderrLog.end();\n await Promise.all([\n new Promise<void>((r) => stdoutLog.on(\"close\", () => r())),\n new Promise<void>((r) => stderrLog.on(\"close\", () => r())),\n ]);\n\n // 6. Decide terminal state.\n const endedAt = new Date().toISOString();\n if (\n code === 0 &&\n sawComplete !== null &&\n parseErrors === 0 &&\n contractErrors === 0\n ) {\n state.status = \"complete\";\n state.score = (sawComplete as CompleteEvent).score;\n // The complete event is already in events.jsonl from onEvent.\n } else {\n const crashed: CrashedEvent = {\n type: \"crashed\",\n exit_code: code,\n signal: signal ?? null,\n stderr_tail: stderrTail.slice(-2000),\n reason:\n code !== 0 && code !== null\n ? `eval exited with code ${code}`\n : signal\n ? `eval terminated by signal ${signal}`\n : parseErrors > 0\n ? `eval emitted ${parseErrors} malformed event(s)`\n : contractErrors > 0\n ? `eval emitted ${contractErrors} contract-violating event(s)`\n : sawComplete !== null\n ? \"eval emitted complete but exited non-zero\"\n : \"eval exited without emitting complete event\",\n ts: Date.now() / 1000,\n };\n appendNdjsonLine(eventsPath, crashed);\n state.status = \"crashed\";\n }\n state.ended_at = endedAt;\n state.updated_at = endedAt;\n\n // 7. Write failures.jsonl (worst-K).\n if (itemEvents.length > 0) {\n const sorted = [...itemEvents].sort((a, b) => a.score - b.score);\n const worst = sorted.slice(0, Math.min(FAILURE_K, sorted.length));\n const text =\n worst.map((it) => JSON.stringify(it)).join(\"\\n\") +\n (worst.length > 0 ? \"\\n\" : \"\");\n writeFileSync(join(dir, \"failures.jsonl\"), text);\n }\n\n // 8. Promotion (only if completed cleanly).\n let promotionEvent: PromotionEvent | null = null;\n if (state.status === \"complete\" && !opts.noAutoPromote) {\n const allRuns = listRuns(stateDir, opts.systemId).filter(\n (r) => r.run_id !== runId,\n );\n const baselineRun = currentBaseline(\n allRuns,\n system.frontmatter.eval_version,\n system.frontmatter.dataset_version,\n );\n const baseline = baselineRun\n ? {\n run_id: baselineRun.run_id,\n variant: baselineRun.variant,\n score: baselineRun.score!,\n events_path: baselineRun.events_path,\n }\n : null;\n const decision = decidePromotion(\n mapFromItems(itemEvents),\n itemEvents,\n baseline,\n { subgroupKeys: subgroupKeys(system) },\n );\n promotionEvent = { type: \"promotion\", ts: Date.now() / 1000, ...decision };\n appendNdjsonLine(eventsPath, promotionEvent);\n state.promoted = decision.promoted;\n } else if (opts.noAutoPromote) {\n state.promoted = false;\n }\n writeJsonAtomic(join(dir, \"state.json\"), state);\n\n // 9. Append hypothesis log line (every run, success or failure).\n const hypoLine = {\n run_id: runId,\n system: opts.systemId,\n variant: opts.variant,\n parent_id: opts.parent,\n hypothesis: opts.hypothesis,\n status: state.status,\n score: state.score,\n promoted: state.promoted,\n started_at: startedAt,\n ended_at: endedAt,\n eval_version: system.frontmatter.eval_version,\n dataset_version: system.frontmatter.dataset_version,\n };\n appendNdjsonLine(hypothesesPath(stateDir, opts.systemId), hypoLine);\n\n // 10. Clear lock.\n clearLock(dir);\n\n void parseErrors; // not surfaced yet; logged to stderr.log\n\n return {\n runId,\n status: state.status as \"complete\" | \"crashed\" | \"aborted\",\n score: state.score,\n promoted: state.promoted ?? null,\n exitCode: state.status === \"complete\" ? 0 : 1,\n };\n}\n\n// --- helpers ---\n\ninterface InterpreterChoice {\n cmd: string;\n preArgs: string[];\n}\n\nfunction detectInterpreter(evalPath: string): InterpreterChoice {\n const ext = extname(evalPath).toLowerCase();\n if (ext === \".py\") return { cmd: \"python3\", preArgs: [] };\n if (ext === \".js\" || ext === \".mjs\" || ext === \".cjs\")\n return { cmd: \"node\", preArgs: [] };\n if (ext === \".ts\" || ext === \".tsx\" || ext === \".mts\" || ext === \".cts\") {\n return {\n cmd: process.execPath,\n preArgs: [\"--import\", require.resolve(\"tsx\")],\n };\n }\n // Default: assume executable with shebang.\n return { cmd: evalPath, preArgs: [] };\n}\n\nfunction waitForExit(\n child: ChildProcess,\n): Promise<{ code: number | null; signal: NodeJS.Signals | null }> {\n return new Promise((resolve) => {\n child.on(\"exit\", (code: number | null, signal: NodeJS.Signals | null) =>\n resolve({ code, signal }),\n );\n });\n}\n\nfunction gitSha(cwd: string): string | null {\n try {\n return (\n execSync(\"git rev-parse HEAD\", {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n })\n .toString()\n .trim() || null\n );\n } catch {\n return null;\n }\n}\n\nfunction gitBranch(cwd: string): string | null {\n try {\n return (\n execSync(\"git rev-parse --abbrev-ref HEAD\", {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n })\n .toString()\n .trim() || null\n );\n } catch {\n return null;\n }\n}\n\nfunction gitTopLevel(cwd: string): string | null {\n try {\n return (\n execSync(\"git rev-parse --show-toplevel\", {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n })\n .toString()\n .trim() || null\n );\n } catch {\n return null;\n }\n}\n\nfunction gitCommonDir(cwd: string): string | null {\n try {\n return (\n execSync(\"git rev-parse --git-common-dir\", {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n })\n .toString()\n .trim() || null\n );\n } catch {\n return null;\n }\n}\n\nfunction mapFromItems(items: ItemEvent[]): Map<string, number> {\n const m = new Map<string, number>();\n for (const it of items) m.set(it.id, it.score);\n return m;\n}\n\nfunction subgroupKeys(system: SystemDef): string[] {\n const sg = (\n system.frontmatter as {\n subgroups?: Array<{ field?: string; name?: string }>;\n }\n ).subgroups;\n if (!Array.isArray(sg)) return [];\n return sg.map((s) => s.field ?? s.name ?? \"\").filter(Boolean);\n}\n\n/**\n * Sweep stale runs whose lock points to a dead PID and which never recorded a terminal event.\n * Mark them `crashed` with reason \"supervisor died\" so the dashboard / log don't show ghosts.\n */\nexport function reapStaleRuns(stateDir: string, systemId: string): void {\n const dir = systemRunsDir(stateDir, systemId);\n if (!existsSync(dir)) return;\n for (const run of listRuns(stateDir, systemId)) {\n if (run.status !== \"running\") continue;\n const lock = readLock(join(dir, run.run_id));\n if (!lock) continue;\n if (isPidAlive(lock.pid)) continue;\n const reapedEvent: CrashedEvent = {\n type: \"crashed\",\n exit_code: null,\n signal: null,\n reason: `runner pid ${lock.pid} died without writing a terminal event; reaped`,\n ts: Date.now() / 1000,\n };\n appendNdjsonLine(run.events_path, reapedEvent);\n\n const state = readJsonIfExists<StateFile>(run.state_path);\n if (state) {\n state.status = \"crashed\";\n state.ended_at = new Date().toISOString();\n writeJsonAtomic(run.state_path, state);\n }\n clearLock(join(dir, run.run_id));\n }\n}\n"],"mappings":";;;;;;;;;;;;AA4DA,MAAM,YAAY;AAClB,MAAM,UAAU,cAAc,OAAO,KAAK,IAAI;AAE9C,eAAsB,cAAc,MAAsC;CACxE,MAAM,WAAW,gBAAgB,KAAK,UAAU;AAGhD,eAAc,UAAU,KAAK,SAAS;CAGtC,MAAM,SAAS,WAAW,KAAK,WAAW,KAAK,SAAS;CACxD,MAAM,UAAU,gBAAgB,KAAK,WAAW,OAAO;AACvD,KAAI,CAAC,WAAW,QAAQ,CACtB,OAAM,IAAI,MACR,4BAA4B,QAAQ,0BAA0B,OAAO,YAAY,SAAS,qBAAqB,KAAK,SAAS,aAC9H;CAEH,MAAM,SAAS,kBAAkB,QAAQ;CAGzC,MAAM,QAAQ,eAAe;CAC7B,MAAM,MAAM,OAAO,UAAU,KAAK,UAAU,MAAM;AAClD,WAAU,IAAI;CACd,MAAM,6BAAY,IAAI,MAAM,EAAC,aAAa;CAC1C,MAAM,WAAyB;EAC7B,QAAQ;EACR,QAAQ,KAAK;EACb,SAAS,KAAK;EACd,WAAW,KAAK;EAChB,YAAY,KAAK;EACjB,SAAS,OAAO,KAAK,UAAU;EAC/B,YAAY,UAAU,KAAK,UAAU;EACrC,eAAe,YAAY,KAAK,UAAU;EAC1C,gBAAgB,aAAa,KAAK,UAAU;EAC5C,cAAc,OAAO,YAAY;EACjC,iBAAiB,OAAO,YAAY;EACpC,YAAY;EACZ,MAAM,GAAG,UAAU,CAAC,GAAG,MAAM,CAAC,IAAI,UAAU,CAAC;EAC7C,gBAAgB,KAAK;EACrB,WAAW;EACX,YAAY,KAAK;EACjB,iBAAiB,KAAK,aAAa,MAAM;EACzC,kBAAkB,KAAK,aAAa,OAAO;EAC5C;AACD,iBAAgB,KAAK,KAAK,gBAAgB,EAAE,SAAS;AACrD,WAAU,IAAI;CAEd,MAAM,aAAa,KAAK,KAAK,eAAe;CAC5C,MAAM,aAAa,KAAK,KAAK,aAAa;CAC1C,MAAM,aAAa,KAAK,KAAK,aAAa;CAG1C,MAAM,QAQF;EACF,QAAQ;EACR,QAAQ,KAAK;EACb,SAAS,KAAK;EACd,QAAQ;EACR,OAAO;EACP,SAAS;EACT,QAAQ;EACR,UAAU;EACV,YAAY;EACZ,YAAY;EACZ,UAAU;EACV,cAAc,OAAO,YAAY;EACjC,iBAAiB,OAAO,YAAY;EACrC;AACD,iBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;CAM/C,MAAM,WAAW;EACf,GAAG,OAAO;EACV,GAAI,OAAO,QAAQ,UAAU,EAAE,GAAG,CAAC,QAAQ;EAC3C;EACA,KAAK;EACL;EACA,OAAO,YAAY;EACnB;EACA;EACD;CACD,MAAM,eAAe,KAAK,aAAa,IAAI,KAAK;AAChD,KAAI,iBAAiB,QAAQ,iBAAiB,KAAA,EAC5C,UAAS,KAAK,eAAe,OAAO,aAAa,CAAC;CAGpD,MAAM,YAAY,kBAAkB,WAAW;CAC/C,MAAM,YAAY,kBAAkB,WAAW;CAC/C,IAAI,aAAa;CAEjB,MAAM,QAAsB,MAAM,OAAO,KAAK,UAAU;EACtD,KAAK,KAAK;EACV,OAAO;GAAC;GAAU;GAAQ;GAAQ;GAAO;EACzC,KAAK,QAAQ;EACd,CAAC;AAGF,OAAM,OAAQ,GAAG,SAAS,UAAkB,UAAU,MAAM,MAAM,CAAC;AACnE,OAAM,OAAQ,GAAG,SAAS,UAAkB;AAC1C,YAAU,MAAM,MAAM;AACtB,gBAAc,aAAa,MAAM,SAAS,QAAQ,EAAE,MAAM,MAAM;GAChE;CAGF,MAAM,YAAY,MAAM,MAAM;CAC9B,MAAM,aAA0B,EAAE;CAClC,IAAI,cAAoC;CACxC,IAAI,cAAc;CAClB,IAAI,iBAAiB;CAErB,MAAM,SAAS,IAAI,cAChB,MAAM,QAAQ,EAAE,GAChB,MAAM,QAAQ;AACb;AACA,YAAU,MACR,mCAAmC,KAAK,MAAM,GAAG,IAAI,CAAC,IACvD;AACD,YAAU,MACR,YAAY,eAAe,QAAQ,IAAI,UAAU,OAAO,IAAI,CAAC,IAC9D;GAEJ;AACD,WAAU,cAAc,QAAQ;AAChC,WAAU,GAAG,SAAS,UAAU,OAAO,KAAK,MAAyB,CAAC;CAMtE,MAAM,iBAAiB,IAAI,SAAe,YAAY;AACpD,YAAU,GAAG,aAAa;AACxB,UAAO,KAAK;AACZ,YAAS;IACT;AACF,YAAU,GAAG,eAAe;AAC1B,UAAO,KAAK;AACZ,YAAS;IACT;GACF;CAEF,SAAS,QAAQ,GAAgB;AAC/B,mBAAiB,YAAY,EAAE;AAC/B,MAAI,EAAE,SAAS,SAAS;GACtB,MAAM,IAAI;AACV,OAAI,EAAE,iBAAiB,OAAO,YAAY,cAAc;AACtD;AACA,cAAU,MACR,+BAA+B,EAAE,aAAa,sCAAsC,OAAO,YAAY,aAAa,IACrH;;AAEH,OAAI,EAAE,oBAAoB,OAAO,YAAY,iBAAiB;AAC5D;AACA,cAAU,MACR,kCAAkC,EAAE,gBAAgB,yCAAyC,OAAO,YAAY,gBAAgB,IACjI;;AAEH,SAAM,UAAU,EAAE;AAClB,SAAM,8BAAa,IAAI,MAAM,EAAC,aAAa;AAC3C,mBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;aACtC,EAAE,SAAS,QAAQ;GAC5B,MAAM,KAAK;AACX,cAAW,KAAK,GAAG;AACnB,SAAM,UAAU,MAAM,UAAU,KAAK;AACrC,SAAM,8BAAa,IAAI,MAAM,EAAC,aAAa;AAC3C,mBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;aACtC,EAAE,SAAS,YAAY;AAEhC,SAAM,8BAAa,IAAI,MAAM,EAAC,aAAa;AAC3C,mBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;aACtC,EAAE,SAAS,YAAY;AAChC,iBAAc;AACd,OAAI,MAAM,YAAY,QAAQ,YAAY,MAAM,MAAM,SAAS;AAC7D;AACA,cAAU,MACR,uBAAuB,YAAY,EAAE,0BAA0B,MAAM,QAAQ,IAC9E;;AAEH,SAAM,QAAQ,YAAY;AAC1B,SAAM,8BAAa,IAAI,MAAM,EAAC,aAAa;AAC3C,mBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;;;CAQnD,MAAM,EAAE,MAAM,WAAW,MAAM,YAAY,MAAM;AACjD,OAAM;AAGN,WAAU,KAAK;AACf,WAAU,KAAK;AACf,OAAM,QAAQ,IAAI,CAChB,IAAI,SAAe,MAAM,UAAU,GAAG,eAAe,GAAG,CAAC,CAAC,EAC1D,IAAI,SAAe,MAAM,UAAU,GAAG,eAAe,GAAG,CAAC,CAAC,CAC3D,CAAC;CAGF,MAAM,2BAAU,IAAI,MAAM,EAAC,aAAa;AACxC,KACE,SAAS,KACT,gBAAgB,QAChB,gBAAgB,KAChB,mBAAmB,GACnB;AACA,QAAM,SAAS;AACf,QAAM,QAAS,YAA8B;QAExC;AAoBL,mBAAiB,YAAY;GAlB3B,MAAM;GACN,WAAW;GACX,QAAQ,UAAU;GAClB,aAAa,WAAW,MAAM,KAAM;GACpC,QACE,SAAS,KAAK,SAAS,OACnB,yBAAyB,SACzB,SACE,6BAA6B,WAC7B,cAAc,IACZ,gBAAgB,YAAY,uBAC5B,iBAAiB,IACf,gBAAgB,eAAe,gCAC/B,gBAAgB,OACd,8CACA;GACd,IAAI,KAAK,KAAK,GAAG;GAEiB,CAAC;AACrC,QAAM,SAAS;;AAEjB,OAAM,WAAW;AACjB,OAAM,aAAa;AAGnB,KAAI,WAAW,SAAS,GAAG;EACzB,MAAM,SAAS,CAAC,GAAG,WAAW,CAAC,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;EAChE,MAAM,QAAQ,OAAO,MAAM,GAAG,KAAK,IAAI,WAAW,OAAO,OAAO,CAAC;EACjE,MAAM,OACJ,MAAM,KAAK,OAAO,KAAK,UAAU,GAAG,CAAC,CAAC,KAAK,KAAK,IAC/C,MAAM,SAAS,IAAI,OAAO;AAC7B,gBAAc,KAAK,KAAK,iBAAiB,EAAE,KAAK;;CAIlD,IAAI,iBAAwC;AAC5C,KAAI,MAAM,WAAW,cAAc,CAAC,KAAK,eAAe;EAItD,MAAM,cAAc,gBAHJ,SAAS,UAAU,KAAK,SAAS,CAAC,QAC/C,MAAM,EAAE,WAAW,MAGb,EACP,OAAO,YAAY,cACnB,OAAO,YAAY,gBACpB;EACD,MAAM,WAAW,cACb;GACE,QAAQ,YAAY;GACpB,SAAS,YAAY;GACrB,OAAO,YAAY;GACnB,aAAa,YAAY;GAC1B,GACD;EACJ,MAAM,WAAW,gBACf,aAAa,WAAW,EACxB,YACA,UACA,EAAE,cAAc,aAAa,OAAO,EAAE,CACvC;AACD,mBAAiB;GAAE,MAAM;GAAa,IAAI,KAAK,KAAK,GAAG;GAAM,GAAG;GAAU;AAC1E,mBAAiB,YAAY,eAAe;AAC5C,QAAM,WAAW,SAAS;YACjB,KAAK,cACd,OAAM,WAAW;AAEnB,iBAAgB,KAAK,KAAK,aAAa,EAAE,MAAM;CAG/C,MAAM,WAAW;EACf,QAAQ;EACR,QAAQ,KAAK;EACb,SAAS,KAAK;EACd,WAAW,KAAK;EAChB,YAAY,KAAK;EACjB,QAAQ,MAAM;EACd,OAAO,MAAM;EACb,UAAU,MAAM;EAChB,YAAY;EACZ,UAAU;EACV,cAAc,OAAO,YAAY;EACjC,iBAAiB,OAAO,YAAY;EACrC;AACD,kBAAiB,eAAe,UAAU,KAAK,SAAS,EAAE,SAAS;AAGnE,WAAU,IAAI;AAId,QAAO;EACL;EACA,QAAQ,MAAM;EACd,OAAO,MAAM;EACb,UAAU,MAAM,YAAY;EAC5B,UAAU,MAAM,WAAW,aAAa,IAAI;EAC7C;;AAUH,SAAS,kBAAkB,UAAqC;CAC9D,MAAM,MAAM,QAAQ,SAAS,CAAC,aAAa;AAC3C,KAAI,QAAQ,MAAO,QAAO;EAAE,KAAK;EAAW,SAAS,EAAE;EAAE;AACzD,KAAI,QAAQ,SAAS,QAAQ,UAAU,QAAQ,OAC7C,QAAO;EAAE,KAAK;EAAQ,SAAS,EAAE;EAAE;AACrC,KAAI,QAAQ,SAAS,QAAQ,UAAU,QAAQ,UAAU,QAAQ,OAC/D,QAAO;EACL,KAAK,QAAQ;EACb,SAAS,CAAC,YAAY,QAAQ,QAAQ,MAAM,CAAC;EAC9C;AAGH,QAAO;EAAE,KAAK;EAAU,SAAS,EAAE;EAAE;;AAGvC,SAAS,YACP,OACiE;AACjE,QAAO,IAAI,SAAS,YAAY;AAC9B,QAAM,GAAG,SAAS,MAAqB,WACrC,QAAQ;GAAE;GAAM;GAAQ,CAAC,CAC1B;GACD;;AAGJ,SAAS,OAAO,KAA4B;AAC1C,KAAI;AACF,SACE,SAAS,sBAAsB;GAC7B;GACA,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CAAC,CACC,UAAU,CACV,MAAM,IAAI;SAET;AACN,SAAO;;;AAIX,SAAS,UAAU,KAA4B;AAC7C,KAAI;AACF,SACE,SAAS,mCAAmC;GAC1C;GACA,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CAAC,CACC,UAAU,CACV,MAAM,IAAI;SAET;AACN,SAAO;;;AAIX,SAAS,YAAY,KAA4B;AAC/C,KAAI;AACF,SACE,SAAS,iCAAiC;GACxC;GACA,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CAAC,CACC,UAAU,CACV,MAAM,IAAI;SAET;AACN,SAAO;;;AAIX,SAAS,aAAa,KAA4B;AAChD,KAAI;AACF,SACE,SAAS,kCAAkC;GACzC;GACA,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CAAC,CACC,UAAU,CACV,MAAM,IAAI;SAET;AACN,SAAO;;;AAIX,SAAS,aAAa,OAAyC;CAC7D,MAAM,oBAAI,IAAI,KAAqB;AACnC,MAAK,MAAM,MAAM,MAAO,GAAE,IAAI,GAAG,IAAI,GAAG,MAAM;AAC9C,QAAO;;AAGT,SAAS,aAAa,QAA6B;CACjD,MAAM,KACJ,OAAO,YAGP;AACF,KAAI,CAAC,MAAM,QAAQ,GAAG,CAAE,QAAO,EAAE;AACjC,QAAO,GAAG,KAAK,MAAM,EAAE,SAAS,EAAE,QAAQ,GAAG,CAAC,OAAO,QAAQ;;;;;;AAO/D,SAAgB,cAAc,UAAkB,UAAwB;CACtE,MAAM,MAAM,cAAc,UAAU,SAAS;AAC7C,KAAI,CAAC,WAAW,IAAI,CAAE;AACtB,MAAK,MAAM,OAAO,SAAS,UAAU,SAAS,EAAE;AAC9C,MAAI,IAAI,WAAW,UAAW;EAC9B,MAAM,OAAO,SAAS,KAAK,KAAK,IAAI,OAAO,CAAC;AAC5C,MAAI,CAAC,KAAM;AACX,MAAI,WAAW,KAAK,IAAI,CAAE;EAC1B,MAAM,cAA4B;GAChC,MAAM;GACN,WAAW;GACX,QAAQ;GACR,QAAQ,cAAc,KAAK,IAAI;GAC/B,IAAI,KAAK,KAAK,GAAG;GAClB;AACD,mBAAiB,IAAI,aAAa,YAAY;EAE9C,MAAM,QAAQ,iBAA4B,IAAI,WAAW;AACzD,MAAI,OAAO;AACT,SAAM,SAAS;AACf,SAAM,4BAAW,IAAI,MAAM,EAAC,aAAa;AACzC,mBAAgB,IAAI,YAAY,MAAM;;AAExC,YAAU,KAAK,KAAK,IAAI,OAAO,CAAC"}
@@ -1,9 +1,11 @@
1
- import { join, resolve } from "node:path";
1
+ import { kaizenSystemPath } from "../shared/workspace-paths.js";
2
+ import "./paths.js";
2
3
  import { existsSync, readFileSync } from "node:fs";
4
+ import { resolve } from "node:path";
3
5
  import { parse } from "yaml";
4
6
  //#region src/lib/system.ts
5
7
  function systemPath(workspaceRoot, id) {
6
- return join(workspaceRoot, "systems", `${id}.md`);
8
+ return kaizenSystemPath(workspaceRoot, id);
7
9
  }
8
10
  function loadSystem(workspaceRoot, id) {
9
11
  const path = systemPath(workspaceRoot, id);
@@ -1 +1 @@
1
- {"version":3,"file":"system.js","names":["parseYaml"],"sources":["../../src/lib/system.ts"],"sourcesContent":["import { existsSync, readFileSync } from \"node:fs\";\nimport { join, resolve } from \"node:path\";\nimport { parse as parseYaml } from \"yaml\";\n\nexport type EvalStyle = \"ground-truth\" | \"llm-as-judge\" | \"hybrid\";\nexport type ExecutionMode = \"in_process\" | \"server\";\n\nexport interface SubgroupDef {\n name: string;\n field: string;\n}\n\nexport interface SystemFrontmatter {\n name: string;\n description?: string;\n run_eval: string;\n eval_version: number;\n dataset_version: string;\n eval_style?: EvalStyle;\n primary_metric: string;\n target?: number;\n execution_mode?: ExecutionMode;\n kaizen_version?: string;\n subgroups?: SubgroupDef[];\n trace_renderer?: string;\n [key: string]: unknown;\n}\n\nexport interface SystemDef {\n id: string;\n path: string;\n frontmatter: SystemFrontmatter;\n body: string;\n}\n\nexport function systemPath(workspaceRoot: string, id: string): string {\n return join(workspaceRoot, \"systems\", `${id}.md`);\n}\n\nexport function loadSystem(workspaceRoot: string, id: string): SystemDef {\n const path = systemPath(workspaceRoot, id);\n if (!existsSync(path)) {\n throw new Error(\n `system \"${id}\" not found at ${path}. run \\`kaizen create system ${id}\\` to scaffold one.`,\n );\n }\n const text = readFileSync(path, \"utf-8\");\n const { frontmatter, body } = splitFrontmatter(text);\n if (!frontmatter) {\n throw new Error(`system ${id}: no YAML frontmatter found in ${path}`);\n }\n const fm = parseYaml(frontmatter) as Partial<SystemFrontmatter> | null;\n if (!fm || typeof fm !== \"object\") {\n throw new Error(`system ${id}: frontmatter is not a YAML mapping`);\n }\n const required = [\n \"name\",\n \"run_eval\",\n \"eval_version\",\n \"dataset_version\",\n \"primary_metric\",\n ] as const;\n for (const k of required) {\n if (fm[k] === undefined || fm[k] === null || fm[k] === \"\") {\n throw new Error(\n `system ${id}: frontmatter missing required field \"${k}\"`,\n );\n }\n }\n return {\n id,\n path,\n frontmatter: fm as SystemFrontmatter,\n body,\n };\n}\n\nexport function resolveEvalPath(\n workspaceRoot: string,\n system: SystemDef,\n): string {\n return resolve(workspaceRoot, system.frontmatter.run_eval);\n}\n\nfunction splitFrontmatter(text: string): {\n frontmatter: string | null;\n body: string;\n} {\n // Frontmatter starts on line 1 with ---, ends with --- on its own line.\n if (!text.startsWith(\"---\")) return { frontmatter: null, body: text };\n const lines = text.split(\"\\n\");\n // First line is \"---\". Find the next \"---\".\n let close = -1;\n for (let i = 1; i < lines.length; i++) {\n if (lines[i].trim() === \"---\") {\n close = i;\n break;\n }\n }\n if (close === -1) return { frontmatter: null, body: text };\n return {\n frontmatter: lines.slice(1, close).join(\"\\n\"),\n body: lines.slice(close + 1).join(\"\\n\"),\n };\n}\n"],"mappings":";;;;AAmCA,SAAgB,WAAW,eAAuB,IAAoB;AACpE,QAAO,KAAK,eAAe,WAAW,GAAG,GAAG,KAAK;;AAGnD,SAAgB,WAAW,eAAuB,IAAuB;CACvE,MAAM,OAAO,WAAW,eAAe,GAAG;AAC1C,KAAI,CAAC,WAAW,KAAK,CACnB,OAAM,IAAI,MACR,WAAW,GAAG,iBAAiB,KAAK,+BAA+B,GAAG,qBACvE;CAGH,MAAM,EAAE,aAAa,SAAS,iBADjB,aAAa,MAAM,QACmB,CAAC;AACpD,KAAI,CAAC,YACH,OAAM,IAAI,MAAM,UAAU,GAAG,iCAAiC,OAAO;CAEvE,MAAM,KAAKA,MAAU,YAAY;AACjC,KAAI,CAAC,MAAM,OAAO,OAAO,SACvB,OAAM,IAAI,MAAM,UAAU,GAAG,qCAAqC;AASpE,MAAK,MAAM,KAAK;EANd;EACA;EACA;EACA;EACA;EAEsB,CACtB,KAAI,GAAG,OAAO,KAAA,KAAa,GAAG,OAAO,QAAQ,GAAG,OAAO,GACrD,OAAM,IAAI,MACR,UAAU,GAAG,wCAAwC,EAAE,GACxD;AAGL,QAAO;EACL;EACA;EACA,aAAa;EACb;EACD;;AAGH,SAAgB,gBACd,eACA,QACQ;AACR,QAAO,QAAQ,eAAe,OAAO,YAAY,SAAS;;AAG5D,SAAS,iBAAiB,MAGxB;AAEA,KAAI,CAAC,KAAK,WAAW,MAAM,CAAE,QAAO;EAAE,aAAa;EAAM,MAAM;EAAM;CACrE,MAAM,QAAQ,KAAK,MAAM,KAAK;CAE9B,IAAI,QAAQ;AACZ,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,IAChC,KAAI,MAAM,GAAG,MAAM,KAAK,OAAO;AAC7B,UAAQ;AACR;;AAGJ,KAAI,UAAU,GAAI,QAAO;EAAE,aAAa;EAAM,MAAM;EAAM;AAC1D,QAAO;EACL,aAAa,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,KAAK;EAC7C,MAAM,MAAM,MAAM,QAAQ,EAAE,CAAC,KAAK,KAAK;EACxC"}
1
+ {"version":3,"file":"system.js","names":["parseYaml"],"sources":["../../src/lib/system.ts"],"sourcesContent":["import { existsSync, readFileSync } from \"node:fs\";\nimport { resolve } from \"node:path\";\nimport { parse as parseYaml } from \"yaml\";\nimport { kaizenSystemPath } from \"./paths.js\";\n\nexport type EvalStyle = \"ground-truth\" | \"llm-as-judge\" | \"hybrid\";\n\nexport interface SubgroupDef {\n name: string;\n field: string;\n}\n\nexport interface SystemFrontmatter {\n name: string;\n description?: string;\n run_eval: string;\n eval_version: number;\n dataset_version: string;\n eval_style?: EvalStyle;\n primary_metric: string;\n target?: number;\n kaizen_version?: string;\n subgroups?: SubgroupDef[];\n [key: string]: unknown;\n}\n\nexport interface SystemDef {\n id: string;\n path: string;\n frontmatter: SystemFrontmatter;\n body: string;\n}\n\nfunction systemPath(workspaceRoot: string, id: string): string {\n return kaizenSystemPath(workspaceRoot, id);\n}\n\nexport function loadSystem(workspaceRoot: string, id: string): SystemDef {\n const path = systemPath(workspaceRoot, id);\n if (!existsSync(path)) {\n throw new Error(\n `system \"${id}\" not found at ${path}. run \\`kaizen create system ${id}\\` to scaffold one.`,\n );\n }\n const text = readFileSync(path, \"utf-8\");\n const { frontmatter, body } = splitFrontmatter(text);\n if (!frontmatter) {\n throw new Error(`system ${id}: no YAML frontmatter found in ${path}`);\n }\n const fm = parseYaml(frontmatter) as Partial<SystemFrontmatter> | null;\n if (!fm || typeof fm !== \"object\") {\n throw new Error(`system ${id}: frontmatter is not a YAML mapping`);\n }\n const required = [\n \"name\",\n \"run_eval\",\n \"eval_version\",\n \"dataset_version\",\n \"primary_metric\",\n ] as const;\n for (const k of required) {\n if (fm[k] === undefined || fm[k] === null || fm[k] === \"\") {\n throw new Error(\n `system ${id}: frontmatter missing required field \"${k}\"`,\n );\n }\n }\n return {\n id,\n path,\n frontmatter: fm as SystemFrontmatter,\n body,\n };\n}\n\nexport function resolveEvalPath(\n workspaceRoot: string,\n system: SystemDef,\n): string {\n return resolve(workspaceRoot, system.frontmatter.run_eval);\n}\n\nfunction splitFrontmatter(text: string): {\n frontmatter: string | null;\n body: string;\n} {\n // Frontmatter starts on line 1 with ---, ends with --- on its own line.\n if (!text.startsWith(\"---\")) return { frontmatter: null, body: text };\n const lines = text.split(\"\\n\");\n // First line is \"---\". Find the next \"---\".\n let close = -1;\n for (let i = 1; i < lines.length; i++) {\n if (lines[i].trim() === \"---\") {\n close = i;\n break;\n }\n }\n if (close === -1) return { frontmatter: null, body: text };\n return {\n frontmatter: lines.slice(1, close).join(\"\\n\"),\n body: lines.slice(close + 1).join(\"\\n\"),\n };\n}\n"],"mappings":";;;;;;AAiCA,SAAS,WAAW,eAAuB,IAAoB;AAC7D,QAAO,iBAAiB,eAAe,GAAG;;AAG5C,SAAgB,WAAW,eAAuB,IAAuB;CACvE,MAAM,OAAO,WAAW,eAAe,GAAG;AAC1C,KAAI,CAAC,WAAW,KAAK,CACnB,OAAM,IAAI,MACR,WAAW,GAAG,iBAAiB,KAAK,+BAA+B,GAAG,qBACvE;CAGH,MAAM,EAAE,aAAa,SAAS,iBADjB,aAAa,MAAM,QACmB,CAAC;AACpD,KAAI,CAAC,YACH,OAAM,IAAI,MAAM,UAAU,GAAG,iCAAiC,OAAO;CAEvE,MAAM,KAAKA,MAAU,YAAY;AACjC,KAAI,CAAC,MAAM,OAAO,OAAO,SACvB,OAAM,IAAI,MAAM,UAAU,GAAG,qCAAqC;AASpE,MAAK,MAAM,KAAK;EANd;EACA;EACA;EACA;EACA;EAEsB,CACtB,KAAI,GAAG,OAAO,KAAA,KAAa,GAAG,OAAO,QAAQ,GAAG,OAAO,GACrD,OAAM,IAAI,MACR,UAAU,GAAG,wCAAwC,EAAE,GACxD;AAGL,QAAO;EACL;EACA;EACA,aAAa;EACb;EACD;;AAGH,SAAgB,gBACd,eACA,QACQ;AACR,QAAO,QAAQ,eAAe,OAAO,YAAY,SAAS;;AAG5D,SAAS,iBAAiB,MAGxB;AAEA,KAAI,CAAC,KAAK,WAAW,MAAM,CAAE,QAAO;EAAE,aAAa;EAAM,MAAM;EAAM;CACrE,MAAM,QAAQ,KAAK,MAAM,KAAK;CAE9B,IAAI,QAAQ;AACZ,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,IAChC,KAAI,MAAM,GAAG,MAAM,KAAK,OAAO;AAC7B,UAAQ;AACR;;AAGJ,KAAI,UAAU,GAAI,QAAO;EAAE,aAAa;EAAM,MAAM;EAAM;AAC1D,QAAO;EACL,aAAa,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,KAAK;EAC7C,MAAM,MAAM,MAAM,QAAQ,EAAE,CAAC,KAAK,KAAK;EACxC"}
package/dist/package.js CHANGED
@@ -1,7 +1,7 @@
1
1
  //#region package.json
2
2
  var package_default = {
3
3
  name: "@percepta/kaizen",
4
- version: "0.6.0",
4
+ version: "0.8.0",
5
5
  description: "Automated AI researcher that improves AI systems",
6
6
  keywords: [
7
7
  "ai",
@@ -13,6 +13,7 @@ var package_default = {
13
13
  license: "MIT",
14
14
  bin: { "kaizen": "./bin/kaizen.js" },
15
15
  files: [
16
+ "agent",
16
17
  "bin",
17
18
  "dashboard/.next/standalone",
18
19
  "dist",
@@ -43,8 +44,9 @@ var package_default = {
43
44
  "typecheck:dashboard": "tsc -p dashboard --noEmit",
44
45
  "test": "vitest run",
45
46
  "dev": "tsx src/index.ts",
46
- "dev:studio": "KAIZEN_WORKSPACE=examples/legacy-workspace next dev dashboard --webpack --port 6789",
47
- "dev:next": "KAIZEN_WORKSPACE=examples/legacy-workspace next dev dashboard --webpack --port 6789",
47
+ "dev:studio": "KAIZEN_DEMO_MODE=1 KAIZEN_WORKSPACE=$PWD/examples/demo-workspace next dev dashboard --webpack --port 6789",
48
+ "dev:next": "KAIZEN_DEMO_MODE=1 KAIZEN_WORKSPACE=$PWD/examples/demo-workspace next dev dashboard --webpack --port 6789",
49
+ "seed:demo-data": "node scripts/seed-demo-traces.mjs --workspace $PWD/examples/demo-workspace",
48
50
  "kaizen": "tsx src/index.ts"
49
51
  },
50
52
  dependencies: {
@@ -69,6 +71,7 @@ var package_default = {
69
71
  "vitest": "^4.0.17",
70
72
  "zod": "4.1.4"
71
73
  },
74
+ peerDependencies: { "@types/react": "^19.0.0" },
72
75
  engines: { "node": ">=20" }
73
76
  };
74
77
  //#endregion
@@ -0,0 +1,67 @@
1
+ //#region shared/view-types.d.ts
2
+ /** Data shape passed to custom trace renderers. */
3
+ interface TraceData {
4
+ id?: string;
5
+ name?: string;
6
+ tags?: string[];
7
+ timestamp?: string;
8
+ metadata?: unknown;
9
+ input?: unknown;
10
+ output?: unknown;
11
+ [key: string]: unknown;
12
+ }
13
+ /** Props contract for custom trace renderer components. */
14
+ interface TraceRendererProps {
15
+ trace: TraceData;
16
+ context: TraceRendererContext;
17
+ actions: TraceRendererActions;
18
+ }
19
+ interface DatasetItemRendererProps {
20
+ datasetItem: DatasetItemData;
21
+ trace?: TraceData | null;
22
+ context: TraceRendererContext;
23
+ actions: DatasetItemRendererActions;
24
+ }
25
+ interface DatasetItemData {
26
+ id: string;
27
+ input?: unknown;
28
+ expectedOutput?: unknown;
29
+ metadata?: Record<string, unknown> | null;
30
+ [key: string]: unknown;
31
+ }
32
+ interface TraceRendererContext {
33
+ systemId: string;
34
+ surface: "trace" | "dataset-item" | "run-trace";
35
+ datasetName?: string | null;
36
+ runId?: string | null;
37
+ }
38
+ interface TraceRendererActions {
39
+ createScore(input: {
40
+ traceId?: string;
41
+ name: string;
42
+ value: number | string | boolean;
43
+ comment?: string;
44
+ metadata?: Record<string, unknown>;
45
+ }): Promise<unknown>;
46
+ }
47
+ interface DatasetItemRendererActions extends TraceRendererActions {
48
+ updateDatasetItem(input: {
49
+ datasetName?: string;
50
+ itemId?: string;
51
+ expectedOutput?: unknown;
52
+ metadata?: Record<string, unknown> | null;
53
+ input?: unknown;
54
+ sourceTraceId?: string | null;
55
+ status?: string | null;
56
+ }): Promise<unknown>;
57
+ createDatasetRunItem(input: {
58
+ datasetItemId?: string;
59
+ traceId?: string;
60
+ runName: string;
61
+ runDescription?: string;
62
+ metadata?: Record<string, unknown>;
63
+ }): Promise<unknown>;
64
+ }
65
+ //#endregion
66
+ export { DatasetItemData, DatasetItemRendererActions, DatasetItemRendererProps, TraceData, TraceRendererActions, TraceRendererContext, TraceRendererProps };
67
+ //# sourceMappingURL=view-types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"view-types.d.ts","names":["TraceData","id","name","tags","timestamp","metadata","input","output","key","TraceRendererProps","TraceRendererContext","TraceRendererActions","trace","context","actions","DatasetItemRendererProps","DatasetItemData","DatasetItemRendererActions","datasetItem","Record","expectedOutput","systemId","surface","datasetName","runId","Promise","createScore","traceId","value","comment","updateDatasetItem","itemId","sourceTraceId","status","createDatasetRunItem","datasetItemId","runName","runDescription"],"sources":["../../shared/view-types.d.ts"],"mappings":";;UACiBA,SAAAA;EACfC,EAAAA;EACAC,IAAAA;EACAC,IAAAA;EACAC,SAAAA;EACAC,QAAAA;EACAC,KAAAA;EACAC,MAAAA;EAAAA,CACCC,GAAAA;AAAAA;;UAIcC,kBAAAA;EACfG,KAAAA,EAAOZ,SAAAA;EACPa,OAAAA,EAASH,oBAAAA;EACTI,OAAAA,EAASH,oBAAAA;AAAAA;AAAAA,UAGMI,wBAAAA;EACfG,WAAAA,EAAaF,eAAAA;EACbJ,KAAAA,GAAQZ,SAAAA;EACRa,OAAAA,EAASH,oBAAAA;EACTI,OAAAA,EAASG,0BAAAA;AAAAA;AAAAA,UAGMD,eAAAA;EACff,EAAAA;EACAK,KAAAA;EACAc,cAAAA;EACAf,QAAAA,GAAWc,MAAAA;EAAAA,CACVX,GAAAA;AAAAA;AAAAA,UAGcE,oBAAAA;EACfW,QAAAA;EACAC,OAAAA;EACAC,WAAAA;EACAC,KAAAA;AAAAA;AAAAA,UAGeb,oBAAAA;EACfe,WAAAA,CAAYpB,KAAAA;IACVqB,OAAAA;IACAzB,IAAAA;IACA0B,KAAAA;IACAC,OAAAA;IACAxB,QAAAA,GAAWc,MAAAA;EAAAA,IACTM,OAAAA;AAAAA;AAAAA,UAGWR,0BAAAA,SAAmCN,oBAAAA;EAClDmB,iBAAAA,CAAkBxB,KAAAA;IAChBiB,WAAAA;IACAQ,MAAAA;IACAX,cAAAA;IACAf,QAAAA,GAAWc,MAAAA;IACXb,KAAAA;IACA0B,aAAAA;IACAC,MAAAA;EAAAA,IACER,OAAAA;EACJS,oBAAAA,CAAqB5B,KAAAA;IACnB6B,aAAAA;IACAR,OAAAA;IACAS,OAAAA;IACAC,cAAAA;IACAhC,QAAAA,GAAWc,MAAAA;EAAAA,IACTM,OAAAA;AAAAA"}
@@ -0,0 +1,84 @@
1
+ import { realpathSync } from "node:fs";
2
+ import { join, resolve } from "node:path";
3
+ import { execFileSync } from "node:child_process";
4
+ //#region shared/workspace-paths.js
5
+ const KAIZEN_DIR = "kaizen";
6
+ const KAIZEN_CONFIG = "config.ts";
7
+ const KAIZEN_STATE_DIR = ".kaizen";
8
+ const KAIZEN_SYSTEMS_DIR = "systems";
9
+ function kaizenDir(workspaceRoot) {
10
+ return join(workspaceRoot, KAIZEN_DIR);
11
+ }
12
+ function kaizenConfigPath(workspaceRoot) {
13
+ return join(kaizenDir(workspaceRoot), KAIZEN_CONFIG);
14
+ }
15
+ function kaizenSystemsDir(workspaceRoot) {
16
+ return join(kaizenDir(workspaceRoot), KAIZEN_SYSTEMS_DIR);
17
+ }
18
+ function kaizenSystemDir(workspaceRoot, systemId) {
19
+ return join(kaizenSystemsDir(workspaceRoot), systemId);
20
+ }
21
+ function kaizenSystemPath(workspaceRoot, systemId) {
22
+ return join(kaizenSystemDir(workspaceRoot, systemId), "system.md");
23
+ }
24
+ function defaultKaizenStateDir(workspaceRoot) {
25
+ return join(primaryWorktreeRoot(workspaceRoot), KAIZEN_DIR, KAIZEN_STATE_DIR);
26
+ }
27
+ function resolveKaizenStateDir(workspaceRoot) {
28
+ const raw = process.env.KAIZEN_STATE_DIR;
29
+ return raw ? resolve(workspaceRoot, raw) : defaultKaizenStateDir(workspaceRoot);
30
+ }
31
+ function primaryWorktreeRoot(workspaceRoot) {
32
+ const normalizedWorkspace = canonicalPath(workspaceRoot);
33
+ const gitRoot = gitTopLevel(normalizedWorkspace);
34
+ if (!gitRoot || canonicalPath(gitRoot) !== normalizedWorkspace) return normalizedWorkspace;
35
+ try {
36
+ const first = execFileSync("git", [
37
+ "-C",
38
+ normalizedWorkspace,
39
+ "worktree",
40
+ "list",
41
+ "--porcelain"
42
+ ], {
43
+ encoding: "utf-8",
44
+ stdio: [
45
+ "ignore",
46
+ "pipe",
47
+ "ignore"
48
+ ]
49
+ }).split("\n").find((line) => line.startsWith("worktree "));
50
+ return first ? canonicalPath(first.slice(9)) : normalizedWorkspace;
51
+ } catch {
52
+ return normalizedWorkspace;
53
+ }
54
+ }
55
+ function gitTopLevel(workspaceRoot) {
56
+ try {
57
+ return execFileSync("git", [
58
+ "-C",
59
+ workspaceRoot,
60
+ "rev-parse",
61
+ "--show-toplevel"
62
+ ], {
63
+ encoding: "utf-8",
64
+ stdio: [
65
+ "ignore",
66
+ "pipe",
67
+ "ignore"
68
+ ]
69
+ }).trim();
70
+ } catch {
71
+ return null;
72
+ }
73
+ }
74
+ function canonicalPath(path) {
75
+ try {
76
+ return realpathSync(path);
77
+ } catch {
78
+ return resolve(path);
79
+ }
80
+ }
81
+ //#endregion
82
+ export { defaultKaizenStateDir, kaizenConfigPath, kaizenDir, kaizenSystemDir, kaizenSystemPath, kaizenSystemsDir, primaryWorktreeRoot, resolveKaizenStateDir };
83
+
84
+ //# sourceMappingURL=workspace-paths.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"workspace-paths.js","names":[],"sources":["../../shared/workspace-paths.js"],"sourcesContent":["import { execFileSync } from \"node:child_process\";\nimport { realpathSync } from \"node:fs\";\nimport { join, resolve } from \"node:path\";\n\nexport const KAIZEN_DIR = \"kaizen\";\nexport const KAIZEN_CONFIG = \"config.ts\";\nexport const KAIZEN_STATE_DIR = \".kaizen\";\nexport const KAIZEN_SYSTEMS_DIR = \"systems\";\n\nexport function kaizenDir(workspaceRoot) {\n return join(workspaceRoot, KAIZEN_DIR);\n}\n\nexport function kaizenConfigPath(workspaceRoot) {\n return join(kaizenDir(workspaceRoot), KAIZEN_CONFIG);\n}\n\nexport function kaizenSystemsDir(workspaceRoot) {\n return join(kaizenDir(workspaceRoot), KAIZEN_SYSTEMS_DIR);\n}\n\nexport function kaizenSystemDir(workspaceRoot, systemId) {\n return join(kaizenSystemsDir(workspaceRoot), systemId);\n}\n\nexport function kaizenSystemPath(workspaceRoot, systemId) {\n return join(kaizenSystemDir(workspaceRoot, systemId), \"system.md\");\n}\n\nexport function defaultKaizenStateDir(workspaceRoot) {\n return join(primaryWorktreeRoot(workspaceRoot), KAIZEN_DIR, KAIZEN_STATE_DIR);\n}\n\nexport function resolveKaizenStateDir(workspaceRoot) {\n const raw = process.env.KAIZEN_STATE_DIR;\n return raw\n ? resolve(workspaceRoot, raw)\n : defaultKaizenStateDir(workspaceRoot);\n}\n\nexport function primaryWorktreeRoot(workspaceRoot) {\n const normalizedWorkspace = canonicalPath(workspaceRoot);\n const gitRoot = gitTopLevel(normalizedWorkspace);\n if (!gitRoot || canonicalPath(gitRoot) !== normalizedWorkspace) {\n return normalizedWorkspace;\n }\n\n try {\n const out = execFileSync(\n \"git\",\n [\"-C\", normalizedWorkspace, \"worktree\", \"list\", \"--porcelain\"],\n { encoding: \"utf-8\", stdio: [\"ignore\", \"pipe\", \"ignore\"] },\n );\n const first = out.split(\"\\n\").find((line) => line.startsWith(\"worktree \"));\n return first\n ? canonicalPath(first.slice(\"worktree \".length))\n : normalizedWorkspace;\n } catch {\n return normalizedWorkspace;\n }\n}\n\nfunction gitTopLevel(workspaceRoot) {\n try {\n return execFileSync(\n \"git\",\n [\"-C\", workspaceRoot, \"rev-parse\", \"--show-toplevel\"],\n {\n encoding: \"utf-8\",\n stdio: [\"ignore\", \"pipe\", \"ignore\"],\n },\n ).trim();\n } catch {\n return null;\n }\n}\n\nfunction canonicalPath(path) {\n try {\n return realpathSync(path);\n } catch {\n return resolve(path);\n }\n}\n"],"mappings":";;;;AAIA,MAAa,aAAa;AAC1B,MAAa,gBAAgB;AAC7B,MAAa,mBAAmB;AAChC,MAAa,qBAAqB;AAElC,SAAgB,UAAU,eAAe;AACvC,QAAO,KAAK,eAAe,WAAW;;AAGxC,SAAgB,iBAAiB,eAAe;AAC9C,QAAO,KAAK,UAAU,cAAc,EAAE,cAAc;;AAGtD,SAAgB,iBAAiB,eAAe;AAC9C,QAAO,KAAK,UAAU,cAAc,EAAE,mBAAmB;;AAG3D,SAAgB,gBAAgB,eAAe,UAAU;AACvD,QAAO,KAAK,iBAAiB,cAAc,EAAE,SAAS;;AAGxD,SAAgB,iBAAiB,eAAe,UAAU;AACxD,QAAO,KAAK,gBAAgB,eAAe,SAAS,EAAE,YAAY;;AAGpE,SAAgB,sBAAsB,eAAe;AACnD,QAAO,KAAK,oBAAoB,cAAc,EAAE,YAAY,iBAAiB;;AAG/E,SAAgB,sBAAsB,eAAe;CACnD,MAAM,MAAM,QAAQ,IAAI;AACxB,QAAO,MACH,QAAQ,eAAe,IAAI,GAC3B,sBAAsB,cAAc;;AAG1C,SAAgB,oBAAoB,eAAe;CACjD,MAAM,sBAAsB,cAAc,cAAc;CACxD,MAAM,UAAU,YAAY,oBAAoB;AAChD,KAAI,CAAC,WAAW,cAAc,QAAQ,KAAK,oBACzC,QAAO;AAGT,KAAI;EAMF,MAAM,QALM,aACV,OACA;GAAC;GAAM;GAAqB;GAAY;GAAQ;GAAc,EAC9D;GAAE,UAAU;GAAS,OAAO;IAAC;IAAU;IAAQ;IAAS;GAAE,CAE3C,CAAC,MAAM,KAAK,CAAC,MAAM,SAAS,KAAK,WAAW,YAAY,CAAC;AAC1E,SAAO,QACH,cAAc,MAAM,MAAM,EAAmB,CAAC,GAC9C;SACE;AACN,SAAO;;;AAIX,SAAS,YAAY,eAAe;AAClC,KAAI;AACF,SAAO,aACL,OACA;GAAC;GAAM;GAAe;GAAa;GAAkB,EACrD;GACE,UAAU;GACV,OAAO;IAAC;IAAU;IAAQ;IAAS;GACpC,CACF,CAAC,MAAM;SACF;AACN,SAAO;;;AAIX,SAAS,cAAc,MAAM;AAC3B,KAAI;AACF,SAAO,aAAa,KAAK;SACnB;AACN,SAAO,QAAQ,KAAK"}
package/dist/types.d.ts CHANGED
@@ -1,38 +1,11 @@
1
+ import { DatasetItemData, DatasetItemRendererActions, DatasetItemRendererProps, TraceData, TraceRendererActions, TraceRendererContext, TraceRendererProps } from "./shared/view-types.js";
2
+
1
3
  //#region src/types.d.ts
2
4
  interface KaizenConfig {
3
5
  customer: {
4
- slug: string;
5
6
  name: string;
6
7
  };
7
- langfuse?: {
8
- host?: string;
9
- publicKeyEnv?: string;
10
- secretKeyEnv?: string;
11
- };
12
- studio?: {
13
- port?: number;
14
- };
15
- }
16
- /** Data shape passed to custom trace renderers. */
17
- interface TraceData {
18
- id?: string;
19
- name?: string;
20
- tags?: string[];
21
- timestamp?: string;
22
- metadata?: unknown;
23
- input?: unknown;
24
- output?: unknown;
25
- }
26
- /** Props contract for custom trace renderer components. */
27
- interface TraceRendererProps {
28
- trace: TraceData;
29
- datasetItem?: {
30
- id: string;
31
- input?: unknown;
32
- expectedOutput?: unknown;
33
- metadata?: Record<string, unknown> | null;
34
- } | null;
35
8
  }
36
9
  //#endregion
37
- export { KaizenConfig, TraceData, TraceRendererProps };
10
+ export { type DatasetItemData, type DatasetItemRendererActions, type DatasetItemRendererProps, KaizenConfig, type TraceData, type TraceRendererActions, type TraceRendererContext, type TraceRendererProps };
38
11
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","names":[],"sources":["../src/types.ts"],"mappings":";UAAiB,YAAA;EACf,QAAA;IACE,IAAA;IACA,IAAA;EAAA;EAEF,QAAA;IACE,IAAA;IACA,YAAA;IACA,YAAA;EAAA;EAEF,MAAA;IACE,IAAA;EAAA;AAAA;;UAKa,SAAA;EACf,EAAA;EACA,IAAA;EACA,IAAA;EACA,SAAA;EACA,QAAA;EACA,KAAA;EACA,MAAA;AAAA;;UAIe,kBAAA;EACf,KAAA,EAAO,SAAA;EACP,WAAA;IACE,EAAA;IACA,KAAA;IACA,cAAA;IACA,QAAA,GAAW,MAAA;EAAA;AAAA"}
1
+ {"version":3,"file":"types.d.ts","names":[],"sources":["../src/types.ts"],"mappings":";;;UAAiB,YAAA;EACf,QAAA;IACE,IAAA;EAAA;AAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@percepta/kaizen",
3
- "version": "0.6.0",
3
+ "version": "0.8.0",
4
4
  "description": "Automated AI researcher that improves AI systems",
5
5
  "keywords": [
6
6
  "ai",
@@ -14,6 +14,7 @@
14
14
  "kaizen": "./bin/kaizen.js"
15
15
  },
16
16
  "files": [
17
+ "agent",
17
18
  "bin",
18
19
  "dashboard/.next/standalone",
19
20
  "dist",
@@ -58,6 +59,9 @@
58
59
  "zod": "4.1.4",
59
60
  "@percepta/build": "1.1.0"
60
61
  },
62
+ "peerDependencies": {
63
+ "@types/react": "^19.0.0"
64
+ },
61
65
  "engines": {
62
66
  "node": ">=20"
63
67
  },
@@ -71,8 +75,9 @@
71
75
  "typecheck:dashboard": "tsc -p dashboard --noEmit",
72
76
  "test": "vitest run",
73
77
  "dev": "tsx src/index.ts",
74
- "dev:studio": "KAIZEN_WORKSPACE=examples/legacy-workspace next dev dashboard --webpack --port 6789",
75
- "dev:next": "KAIZEN_WORKSPACE=examples/legacy-workspace next dev dashboard --webpack --port 6789",
78
+ "dev:studio": "KAIZEN_DEMO_MODE=1 KAIZEN_WORKSPACE=$PWD/examples/demo-workspace next dev dashboard --webpack --port 6789",
79
+ "dev:next": "KAIZEN_DEMO_MODE=1 KAIZEN_WORKSPACE=$PWD/examples/demo-workspace next dev dashboard --webpack --port 6789",
80
+ "seed:demo-data": "node scripts/seed-demo-traces.mjs --workspace $PWD/examples/demo-workspace",
76
81
  "kaizen": "tsx src/index.ts"
77
82
  }
78
83
  }
@@ -0,0 +1,69 @@
1
+ /** Data shape passed to custom trace renderers. */
2
+ export interface TraceData {
3
+ id?: string;
4
+ name?: string;
5
+ tags?: string[];
6
+ timestamp?: string;
7
+ metadata?: unknown;
8
+ input?: unknown;
9
+ output?: unknown;
10
+ [key: string]: unknown;
11
+ }
12
+
13
+ /** Props contract for custom trace renderer components. */
14
+ export interface TraceRendererProps {
15
+ trace: TraceData;
16
+ context: TraceRendererContext;
17
+ actions: TraceRendererActions;
18
+ }
19
+
20
+ export interface DatasetItemRendererProps {
21
+ datasetItem: DatasetItemData;
22
+ trace?: TraceData | null;
23
+ context: TraceRendererContext;
24
+ actions: DatasetItemRendererActions;
25
+ }
26
+
27
+ export interface DatasetItemData {
28
+ id: string;
29
+ input?: unknown;
30
+ expectedOutput?: unknown;
31
+ metadata?: Record<string, unknown> | null;
32
+ [key: string]: unknown;
33
+ }
34
+
35
+ export interface TraceRendererContext {
36
+ systemId: string;
37
+ surface: "trace" | "dataset-item" | "run-trace";
38
+ datasetName?: string | null;
39
+ runId?: string | null;
40
+ }
41
+
42
+ export interface TraceRendererActions {
43
+ createScore(input: {
44
+ traceId?: string;
45
+ name: string;
46
+ value: number | string | boolean;
47
+ comment?: string;
48
+ metadata?: Record<string, unknown>;
49
+ }): Promise<unknown>;
50
+ }
51
+
52
+ export interface DatasetItemRendererActions extends TraceRendererActions {
53
+ updateDatasetItem(input: {
54
+ datasetName?: string;
55
+ itemId?: string;
56
+ expectedOutput?: unknown;
57
+ metadata?: Record<string, unknown> | null;
58
+ input?: unknown;
59
+ sourceTraceId?: string | null;
60
+ status?: string | null;
61
+ }): Promise<unknown>;
62
+ createDatasetRunItem(input: {
63
+ datasetItemId?: string;
64
+ traceId?: string;
65
+ runName: string;
66
+ runDescription?: string;
67
+ metadata?: Record<string, unknown>;
68
+ }): Promise<unknown>;
69
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,19 @@
1
+ export declare const KAIZEN_DIR = "kaizen";
2
+ export declare const KAIZEN_CONFIG = "config.ts";
3
+ export declare const KAIZEN_STATE_DIR = ".kaizen";
4
+ export declare const KAIZEN_SYSTEMS_DIR = "systems";
5
+
6
+ export declare function kaizenDir(workspaceRoot: string): string;
7
+ export declare function kaizenConfigPath(workspaceRoot: string): string;
8
+ export declare function kaizenSystemsDir(workspaceRoot: string): string;
9
+ export declare function kaizenSystemDir(
10
+ workspaceRoot: string,
11
+ systemId: string,
12
+ ): string;
13
+ export declare function kaizenSystemPath(
14
+ workspaceRoot: string,
15
+ systemId: string,
16
+ ): string;
17
+ export declare function defaultKaizenStateDir(workspaceRoot: string): string;
18
+ export declare function resolveKaizenStateDir(workspaceRoot: string): string;
19
+ export declare function primaryWorktreeRoot(workspaceRoot: string): string;
@@ -0,0 +1,84 @@
1
+ import { execFileSync } from "node:child_process";
2
+ import { realpathSync } from "node:fs";
3
+ import { join, resolve } from "node:path";
4
+
5
+ export const KAIZEN_DIR = "kaizen";
6
+ export const KAIZEN_CONFIG = "config.ts";
7
+ export const KAIZEN_STATE_DIR = ".kaizen";
8
+ export const KAIZEN_SYSTEMS_DIR = "systems";
9
+
10
+ export function kaizenDir(workspaceRoot) {
11
+ return join(workspaceRoot, KAIZEN_DIR);
12
+ }
13
+
14
+ export function kaizenConfigPath(workspaceRoot) {
15
+ return join(kaizenDir(workspaceRoot), KAIZEN_CONFIG);
16
+ }
17
+
18
+ export function kaizenSystemsDir(workspaceRoot) {
19
+ return join(kaizenDir(workspaceRoot), KAIZEN_SYSTEMS_DIR);
20
+ }
21
+
22
+ export function kaizenSystemDir(workspaceRoot, systemId) {
23
+ return join(kaizenSystemsDir(workspaceRoot), systemId);
24
+ }
25
+
26
+ export function kaizenSystemPath(workspaceRoot, systemId) {
27
+ return join(kaizenSystemDir(workspaceRoot, systemId), "system.md");
28
+ }
29
+
30
+ export function defaultKaizenStateDir(workspaceRoot) {
31
+ return join(primaryWorktreeRoot(workspaceRoot), KAIZEN_DIR, KAIZEN_STATE_DIR);
32
+ }
33
+
34
+ export function resolveKaizenStateDir(workspaceRoot) {
35
+ const raw = process.env.KAIZEN_STATE_DIR;
36
+ return raw
37
+ ? resolve(workspaceRoot, raw)
38
+ : defaultKaizenStateDir(workspaceRoot);
39
+ }
40
+
41
+ export function primaryWorktreeRoot(workspaceRoot) {
42
+ const normalizedWorkspace = canonicalPath(workspaceRoot);
43
+ const gitRoot = gitTopLevel(normalizedWorkspace);
44
+ if (!gitRoot || canonicalPath(gitRoot) !== normalizedWorkspace) {
45
+ return normalizedWorkspace;
46
+ }
47
+
48
+ try {
49
+ const out = execFileSync(
50
+ "git",
51
+ ["-C", normalizedWorkspace, "worktree", "list", "--porcelain"],
52
+ { encoding: "utf-8", stdio: ["ignore", "pipe", "ignore"] },
53
+ );
54
+ const first = out.split("\n").find((line) => line.startsWith("worktree "));
55
+ return first
56
+ ? canonicalPath(first.slice("worktree ".length))
57
+ : normalizedWorkspace;
58
+ } catch {
59
+ return normalizedWorkspace;
60
+ }
61
+ }
62
+
63
+ function gitTopLevel(workspaceRoot) {
64
+ try {
65
+ return execFileSync(
66
+ "git",
67
+ ["-C", workspaceRoot, "rev-parse", "--show-toplevel"],
68
+ {
69
+ encoding: "utf-8",
70
+ stdio: ["ignore", "pipe", "ignore"],
71
+ },
72
+ ).trim();
73
+ } catch {
74
+ return null;
75
+ }
76
+ }
77
+
78
+ function canonicalPath(path) {
79
+ try {
80
+ return realpathSync(path);
81
+ } catch {
82
+ return resolve(path);
83
+ }
84
+ }