evalution 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { a as MemoryTraceProvider, i as setupStepCommand, r as CONFIG_FILE_RELATIVE_PATH, t as VercelAISDK } from "../vercel-ai-sdk-B5BwUGKp.js";
1
+ import { a as MemoryTraceProvider, i as setupStepCommand, r as CONFIG_FILE_RELATIVE_PATH, t as VercelAISDK } from "../vercel-ai-sdk-B8ivuOzP.js";
2
2
  import module from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import path from "node:path";
@@ -99,7 +99,10 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
99
99
  rootPath,
100
100
  configured: hasConfig
101
101
  }));
102
- app.get("/api/setup-tasks", (c) => c.json(setupTasks ? setupTasks.resolve(rootPath) : []));
102
+ app.get("/api/setup-tasks", (c) => c.json(setupTasks ? setupTasks.resolve(rootPath) : {
103
+ agent: [],
104
+ sdk: []
105
+ }));
103
106
  app.post("/api/setup-tasks/:taskId/steps/:stepId/execute", async (c) => {
104
107
  if (!setupTasks) return c.json({ error: "Setup tasks are not available" }, 404);
105
108
  const { taskId, stepId } = c.req.param();
@@ -314,6 +317,53 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
314
317
  }));
315
318
  }
316
319
  //#endregion
320
+ //#region src/agent/registry.ts
321
+ const AGENT_SETUP_DOMAIN = "evalut.io";
322
+ /** The prompt handed to a coding agent */
323
+ const AGENT_SETUP_PROMPT = `Follow manual setup steps from ${`https://${AGENT_SETUP_DOMAIN}/n/docs/setup.md`}`;
324
+ /**
325
+ * Every coding agent offered a one-click launcher in onboarding, in display
326
+ * order. This is the single source of truth for which agents exist and their
327
+ * task ids — each is a {@link SetupTask} whose lone {@link SetupStep} runs the
328
+ * agent's CLI with the setup prompt queued up in an interactive terminal.
329
+ *
330
+ * Mirrors {@link AI_SDK_REGISTRY} in `../sdk/registry.ts`, but agents have no
331
+ * adapter class, so they live here as plain tasks. `icon` keys into the
332
+ * client's `ProviderIcon`.
333
+ */
334
+ const AGENT_REGISTRY = [{
335
+ id: "claude-code",
336
+ label: "Claude Code",
337
+ icon: "Anthropic",
338
+ steps: [{
339
+ kind: "run_command",
340
+ id: "launch",
341
+ command: `claude "${AGENT_SETUP_PROMPT}" --allowedTools "WebFetch(domain:${AGENT_SETUP_DOMAIN})"`,
342
+ label: "Claude Code"
343
+ }]
344
+ }, {
345
+ id: "codex",
346
+ label: "Codex",
347
+ icon: "OpenAI",
348
+ steps: [{
349
+ kind: "run_command",
350
+ id: "launch",
351
+ command: `codex -c 'features.network_proxy.enabled=true' -c 'features.network_proxy.domains={ "${AGENT_SETUP_DOMAIN}" = "allow" }' -c 'sandbox_workspace_write.network_access=true' "${AGENT_SETUP_PROMPT}"`,
352
+ label: "Codex"
353
+ }]
354
+ }];
355
+ /** Look up an agent {@link SetupTask} by its id, or `undefined` if none matches. */
356
+ function findSetupTask$1(taskId) {
357
+ return AGENT_REGISTRY.find((task) => task.id === taskId);
358
+ }
359
+ /**
360
+ * Look up a step within an agent task by both ids, or `undefined` if either is
361
+ * unknown.
362
+ */
363
+ function findSetupStep$2(taskId, stepId) {
364
+ return findSetupTask$1(taskId)?.steps.find((s) => s.id === stepId);
365
+ }
366
+ //#endregion
317
367
  //#region src/sdk/registry.ts
318
368
  /**
319
369
  * Every AI SDK offered in manual onboarding, in display order. This is the
@@ -330,12 +380,19 @@ function findSetupTask(taskId) {
330
380
  * Look up a step within a task by both ids, or `undefined` if either is
331
381
  * unknown.
332
382
  */
333
- function findSetupStep(taskId, stepId) {
383
+ function findSetupStep$1(taskId, stepId) {
334
384
  return findSetupTask(taskId)?.steps.find((s) => s.id === stepId);
335
385
  }
336
386
  //#endregion
337
387
  //#region src/server/setup-tasks.ts
338
388
  /**
389
+ * Resolves a setup step across both the agent and SDK registries by its
390
+ * `taskId`/`stepId`, or `undefined` if neither knows it.
391
+ */
392
+ function findSetupStep(taskId, stepId) {
393
+ return findSetupStep$1(taskId, stepId) ?? findSetupStep$2(taskId, stepId);
394
+ }
395
+ /**
339
396
  * Thrown when a requested task or step id does not exist in the registry. The
340
397
  * route layer maps this to a 404, distinguishing it from execution failures.
341
398
  */
@@ -367,36 +424,61 @@ async function executeSetupStep(rootPath, taskId, stepId) {
367
424
  case "create_config": return { path: await writeConfigFile(rootPath, step) };
368
425
  case "run_command":
369
426
  case "install_package": throw new Error(`${step.kind} steps are not yet supported`);
427
+ default: throw new Error();
370
428
  }
371
429
  }
372
430
  /**
373
- * Returns the onboarding tasks with each step's runtime
374
- * {@link SetupStepBase.completed | completion status} resolved against the
375
- * project at `rootPath` (config file present, package installed).
431
+ * Returns the onboarding tasks — coding agents and AI SDKs — with each step's
432
+ * runtime {@link SetupStepBase.completed | completion status} resolved against
433
+ * the project at `rootPath` (config file present, package installed).
376
434
  *
377
435
  * @param rootPath - Absolute path to the project root.
378
436
  */
379
437
  function resolveSetupTasks(rootPath) {
380
- return AI_SDK_REGISTRY.map((cls) => ({
381
- ...cls.setupTask,
382
- steps: cls.setupTask.steps.map((step) => resolveStepStatus(rootPath, step))
383
- }));
438
+ const resolve = (task) => ({
439
+ ...task,
440
+ steps: task.steps.map((step) => resolveStepStatus(rootPath, step))
441
+ });
442
+ return {
443
+ agent: AGENT_REGISTRY.map(resolve),
444
+ sdk: AI_SDK_REGISTRY.map((cls) => resolve(cls.setupTask))
445
+ };
384
446
  }
385
447
  /** Adds the runtime `completed` flag to a single step where determinable. */
386
448
  function resolveStepStatus(rootPath, step) {
387
449
  switch (step.kind) {
388
- case "install_package": return {
389
- ...step,
390
- completed: isPackageInstalled(rootPath, step.package)
391
- };
392
450
  case "create_config": return {
393
451
  ...step,
394
452
  completed: fs$1.existsSync(path.join(rootPath, step.path))
395
453
  };
396
- case "run_command": return step;
454
+ case "install_package":
455
+ case "run_command": {
456
+ const result = { ...step };
457
+ if (step.kind === "install_package") result.completed = isPackageInstalled(rootPath, step.package);
458
+ const bin = setupStepCommand(step).split(/\s+/)[0];
459
+ if (bin && !isBinaryOnPath(bin)) result.disabledReason = `${bin} not found in PATH`;
460
+ return result;
461
+ }
462
+ default: throw new Error();
397
463
  }
398
464
  }
399
465
  /**
466
+ * Whether an executable named `bin` is resolvable on the current `PATH`. Used
467
+ * to disable coding-agent launchers whose CLI isn't installed. Honours
468
+ * `PATHEXT` on Windows; elsewhere it requires the file to be executable.
469
+ *
470
+ * @param bin - The bare executable name to look for, e.g. `claude`.
471
+ */
472
+ function isBinaryOnPath(bin) {
473
+ const dirs = (process.env.PATH ?? "").split(path.delimiter).filter(Boolean);
474
+ const exts = process.platform === "win32" ? (process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM").split(";") : [""];
475
+ for (const dir of dirs) for (const ext of exts) try {
476
+ fs$1.accessSync(path.join(dir, bin + ext), fs$1.constants.X_OK);
477
+ return true;
478
+ } catch {}
479
+ return false;
480
+ }
481
+ /**
400
482
  * Whether `pkg` is installed for the project at `rootPath`, walking up the
401
483
  * directory tree to honour hoisted/workspace `node_modules`.
402
484
  *
@@ -433,6 +515,12 @@ async function writeConfigFile(rootPath, step) {
433
515
  /** Shell used to run a resolved step command, so shell syntax in it works. */
434
516
  const SHELL = process.env.SHELL || (process.platform === "win32" ? "powershell.exe" : "bash");
435
517
  /**
518
+ * How long a session's PTY is kept alive after its WebSocket drops, waiting for
519
+ * the client to reconnect. Covers the brief gap while the server restarts
520
+ * itself once a config file appears, so the coding agent isn't killed mid-task.
521
+ */
522
+ const GRACE_PERIOD_MS = 1e4;
523
+ /**
436
524
  * Arguments to run `command` in {@link SHELL}, skipping the user's startup files
437
525
  * where the shell allows it. Those rc files (e.g. nvm in `~/.zshrc`) can add
438
526
  * seconds of latency before the command even begins, and they are unnecessary
@@ -465,56 +553,184 @@ function resolveTerminalCommand(taskId, stepId) {
465
553
  if (!step || step.kind === "create_config") return null;
466
554
  return setupStepCommand(step);
467
555
  }
468
- function send(ws, message) {
469
- ws.send(JSON.stringify(message));
556
+ /** Spawns a real `node-pty` PTY running the command in {@link SHELL}. */
557
+ function defaultSpawn(options) {
558
+ return pty.spawn(SHELL, shellCommandArgs(options.command), {
559
+ name: "xterm-color",
560
+ cols: options.cols || 80,
561
+ rows: options.rows || 24,
562
+ cwd: options.cwd,
563
+ env: options.env
564
+ });
565
+ }
566
+ /**
567
+ * A single onboarding terminal: one PTY plus the WebSocket currently attached to
568
+ * it. The PTY outlives any one socket so it can survive the server restart that
569
+ * happens when a config file appears — while a client is attached, output is
570
+ * streamed live; while it is detached, output is buffered and a grace timer
571
+ * reaps the PTY if no client reconnects in time.
572
+ */
573
+ var TerminalSession = class {
574
+ /** While detached: PTY output accumulated to replay on reconnect. */
575
+ buffer = null;
576
+ graceTimer;
577
+ socket = null;
578
+ exited = false;
579
+ child;
580
+ onReap;
581
+ gracePeriodMs;
582
+ /**
583
+ * @param child - The PTY this session owns.
584
+ * @param onReap - Called once the session is done (exit, grace expiry, or
585
+ * intentional kill) so the registry can drop it.
586
+ * @param gracePeriodMs - How long to keep the PTY alive after the socket drops.
587
+ */
588
+ constructor(child, onReap, gracePeriodMs = GRACE_PERIOD_MS) {
589
+ this.child = child;
590
+ this.onReap = onReap;
591
+ this.gracePeriodMs = gracePeriodMs;
592
+ child.onData((data) => this.handleData(data));
593
+ child.onExit(({ exitCode }) => this.handleExit(exitCode));
594
+ }
595
+ send(message) {
596
+ this.socket?.send(JSON.stringify(message));
597
+ }
598
+ handleData(data) {
599
+ if (this.socket) this.send({
600
+ type: "data",
601
+ data
602
+ });
603
+ else this.buffer?.push(data);
604
+ }
605
+ handleExit(code) {
606
+ this.exited = true;
607
+ this.send({
608
+ type: "exit",
609
+ code
610
+ });
611
+ this.socket?.close();
612
+ this.clearGrace();
613
+ this.onReap();
614
+ }
615
+ /**
616
+ * Attach a (re)connected socket. Replays any output buffered while detached,
617
+ * then resumes live streaming. Cancels a pending grace-period reap.
618
+ */
619
+ attach(ws) {
620
+ this.clearGrace();
621
+ this.socket = ws;
622
+ if (this.buffer) {
623
+ for (const data of this.buffer) this.send({
624
+ type: "data",
625
+ data
626
+ });
627
+ this.buffer = null;
628
+ }
629
+ }
630
+ /**
631
+ * The attached socket dropped (e.g. the server is restarting). Start buffering
632
+ * output and a grace timer; if no client reattaches in time, reap the PTY.
633
+ */
634
+ detach() {
635
+ if (this.exited || !this.socket) return;
636
+ this.socket = null;
637
+ this.buffer = [];
638
+ this.graceTimer = setTimeout(() => {
639
+ this.buffer = null;
640
+ if (!this.exited) this.child.kill();
641
+ this.onReap();
642
+ }, this.gracePeriodMs);
643
+ }
644
+ /** The client intentionally left: kill the PTY now, skipping the grace wait. */
645
+ kill() {
646
+ this.clearGrace();
647
+ this.socket = null;
648
+ this.buffer = null;
649
+ if (!this.exited) this.child.kill();
650
+ this.onReap();
651
+ }
652
+ /** Forward the user's keystrokes to the PTY. */
653
+ write(data) {
654
+ this.child.write(data);
655
+ }
656
+ /** Resize the PTY to match the client's terminal. */
657
+ resize(cols, rows) {
658
+ this.child.resize(cols || 80, rows || 24);
659
+ }
660
+ clearGrace() {
661
+ if (this.graceTimer) {
662
+ clearTimeout(this.graceTimer);
663
+ this.graceTimer = void 0;
664
+ }
665
+ }
666
+ };
667
+ /**
668
+ * Holds the live {@link TerminalSession | terminal sessions} keyed by a
669
+ * client-supplied session id. Owned by the CLI process (not by any one server
670
+ * instance) so sessions — and the PTYs they wrap — survive the server restart
671
+ * that happens when a config file appears.
672
+ */
673
+ var TerminalSessionRegistry = class {
674
+ sessions = /* @__PURE__ */ new Map();
675
+ spawn;
676
+ gracePeriodMs;
677
+ /**
678
+ * @param spawn - PTY spawner; overridable in tests.
679
+ * @param gracePeriodMs - Reconnect grace window passed to each session.
680
+ */
681
+ constructor(spawn = defaultSpawn, gracePeriodMs = GRACE_PERIOD_MS) {
682
+ this.spawn = spawn;
683
+ this.gracePeriodMs = gracePeriodMs;
684
+ }
685
+ /** The session for `id`, if one is still live. */
686
+ get(id) {
687
+ return this.sessions.get(id);
688
+ }
689
+ /** Spawn a PTY and register a new session under `id`. */
690
+ create(id, options) {
691
+ const session = new TerminalSession(this.spawn(options), () => this.sessions.delete(id), this.gracePeriodMs);
692
+ this.sessions.set(id, session);
693
+ return session;
694
+ }
695
+ };
696
+ function sendError(ws, message) {
697
+ ws.send(JSON.stringify({
698
+ type: "error",
699
+ message
700
+ }));
470
701
  }
471
702
  /**
472
703
  * Registers the interactive-terminal WebSocket route at `/api/terminal`.
473
704
  *
474
- * The client connects with `taskId` and `stepId` query params identifying a
475
- * setup step; the server resolves the actual command from its own registry
476
- * (never from the request body) and, once the client signals `start`, spawns it
477
- * in a PTY rooted at the project. Output is streamed to the client and the
478
- * client's keystrokes are written to the process, so prompts (e.g. npm's
479
- * "Ok to proceed?") work. The trust boundary mirrors the step-execute route:
480
- * the client can only ask to run a step that already exists server-side.
705
+ * The client connects with `taskId`, `stepId`, and a client-generated
706
+ * `sessionId` query param. The server resolves the actual command from its own
707
+ * registry (never from the request body). On the first connection for a session
708
+ * the client signals `start` and the server spawns the command in a PTY rooted
709
+ * at the project; output is streamed to the client and the client's keystrokes
710
+ * are written to the process, so prompts (e.g. npm's "Ok to proceed?") work.
711
+ *
712
+ * Sessions live in `sessions`, which outlives this server instance, so when the
713
+ * server restarts itself after a config file appears the PTY keeps running and a
714
+ * reconnecting client (same `sessionId`) re-attaches and replays the gap.
715
+ *
716
+ * The trust boundary mirrors the step-execute route: the client can only ask to
717
+ * run a step that already exists server-side.
481
718
  */
482
- function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
719
+ function registerTerminalRoute(app, upgradeWebSocket, rootPath, sessions) {
483
720
  app.get("/api/terminal", upgradeWebSocket((c) => {
484
721
  const taskId = c.req.query("taskId");
485
722
  const stepId = c.req.query("stepId");
486
- let child;
487
- const start = (ws, cols, rows) => {
488
- if (child) return;
489
- const command = taskId && stepId ? resolveTerminalCommand(taskId, stepId) : null;
490
- if (!command) {
491
- send(ws, {
492
- type: "error",
493
- message: "Unknown or non-runnable setup step."
494
- });
495
- ws.close();
496
- return;
497
- }
498
- child = pty.spawn(SHELL, shellCommandArgs(command), {
499
- name: "xterm-color",
500
- cols: cols || 80,
501
- rows: rows || 24,
502
- cwd: rootPath,
503
- env: process.env
504
- });
505
- child.onData((data) => send(ws, {
506
- type: "data",
507
- data
508
- }));
509
- child.onExit(({ exitCode }) => {
510
- send(ws, {
511
- type: "exit",
512
- code: exitCode
513
- });
514
- ws.close();
515
- });
516
- };
723
+ const sessionId = c.req.query("sessionId") ?? `${taskId}:${stepId}`;
724
+ let session;
725
+ let leaving = false;
517
726
  return {
727
+ onOpen(_evt, ws) {
728
+ const existing = sessions.get(sessionId);
729
+ if (existing) {
730
+ session = existing;
731
+ existing.attach(ws);
732
+ }
733
+ },
518
734
  onMessage(evt, ws) {
519
735
  let msg;
520
736
  try {
@@ -523,20 +739,38 @@ function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
523
739
  return;
524
740
  }
525
741
  switch (msg.type) {
526
- case "start":
527
- start(ws, msg.cols, msg.rows);
742
+ case "start": {
743
+ if (session) return;
744
+ const command = taskId && stepId ? resolveTerminalCommand(taskId, stepId) : null;
745
+ if (!command) {
746
+ sendError(ws, "Unknown or non-runnable setup step.");
747
+ ws.close();
748
+ return;
749
+ }
750
+ session = sessions.create(sessionId, {
751
+ command,
752
+ cols: msg.cols,
753
+ rows: msg.rows,
754
+ cwd: rootPath,
755
+ env: process.env
756
+ });
757
+ session.attach(ws);
528
758
  break;
759
+ }
529
760
  case "input":
530
- child?.write(msg.data);
761
+ session?.write(msg.data);
531
762
  break;
532
763
  case "resize":
533
- child?.resize(msg.cols || 80, msg.rows || 24);
764
+ session?.resize(msg.cols || 80, msg.rows || 24);
765
+ break;
766
+ case "detach":
767
+ leaving = true;
768
+ session?.kill();
534
769
  break;
535
770
  }
536
771
  },
537
772
  onClose() {
538
- child?.kill();
539
- child = void 0;
773
+ if (!leaving) session?.detach();
540
774
  }
541
775
  };
542
776
  }));
@@ -544,7 +778,7 @@ function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
544
778
  //#endregion
545
779
  //#region src/server/index.ts
546
780
  async function startServer(options) {
547
- const { promptProviders, traceProviders, port, rootPath, hasConfig } = options;
781
+ const { promptProviders, traceProviders, port, rootPath, hasConfig, terminalSessions } = options;
548
782
  const promptProviderMap = new Map(promptProviders.map((p) => [p.id, p]));
549
783
  const traceProviderMap = new Map(traceProviders.map((p) => [p.id, p]));
550
784
  const promptRegistry = new PromptRegistry();
@@ -577,7 +811,7 @@ async function startServer(options) {
577
811
  executeStep: executeSetupStep
578
812
  }
579
813
  });
580
- registerTerminalRoute(app, upgradeWebSocket, rootPath);
814
+ registerTerminalRoute(app, upgradeWebSocket, rootPath, terminalSessions);
581
815
  const clientRoot = fileURLToPath(new URL("../client/", import.meta.url));
582
816
  app.get("*", serveStatic({ root: clientRoot }));
583
817
  for (const [providerId, provider] of promptProviderMap) if (provider.watch) provider.watch(async (event) => {
@@ -613,6 +847,7 @@ async function startServer(options) {
613
847
  });
614
848
  });
615
849
  const close = () => new Promise((resolve, reject) => {
850
+ for (const ws of wss.clients) ws.close();
616
851
  if ("closeAllConnections" in server) server.closeAllConnections();
617
852
  server.close((err) => err ? reject(err) : resolve());
618
853
  });
@@ -836,14 +1071,15 @@ function applyDotenv(rootDir) {
836
1071
  if (err?.code !== "ENOENT") console.warn(`Warning: failed to load .env from ${envPath}:`, err.message);
837
1072
  }
838
1073
  }
839
- function startConfiguredServer(rootDir, config, hasConfig, port) {
1074
+ function startConfiguredServer(rootDir, config, hasConfig, port, terminalSessions) {
840
1075
  if (config.useDotenv !== false) applyDotenv(rootDir);
841
1076
  return startServer({
842
1077
  promptProviders: config.promptProviders ?? [],
843
1078
  traceProviders: config.traceProviders ?? [new MemoryTraceProvider()],
844
1079
  port,
845
1080
  rootPath: rootDir,
846
- hasConfig
1081
+ hasConfig,
1082
+ terminalSessions
847
1083
  });
848
1084
  }
849
1085
  async function main() {
@@ -861,11 +1097,12 @@ async function main() {
861
1097
  const maybeOpen = (url) => {
862
1098
  if (!process.env.EVALUTION_NO_OPEN) openBrowser(url);
863
1099
  };
1100
+ const terminalSessions = new TerminalSessionRegistry();
864
1101
  if (hasConfig) {
865
- maybeOpen((await startConfiguredServer(rootDir, await loadConfig(rootDir), true, port)).url);
1102
+ maybeOpen((await startConfiguredServer(rootDir, await loadConfig(rootDir), true, port, terminalSessions)).url);
866
1103
  return;
867
1104
  }
868
- let server = await startConfiguredServer(rootDir, {}, false, port);
1105
+ let server = await startConfiguredServer(rootDir, {}, false, port, terminalSessions);
869
1106
  maybeOpen(server.url);
870
1107
  console.log(`👀 No config found; watching ${path.join(rootDir, ".evalution", "config.ts")} for creation...`);
871
1108
  const stopWatching = watchForConfigCreation(rootDir, async () => {
@@ -873,7 +1110,7 @@ async function main() {
873
1110
  console.log("⚙️ Config loaded; restarting server...");
874
1111
  stopWatching();
875
1112
  await server.close();
876
- server = await startConfiguredServer(rootDir, config, true, port);
1113
+ server = await startConfiguredServer(rootDir, config, true, port, terminalSessions);
877
1114
  });
878
1115
  }
879
1116
  main().catch((error) => {