evalution 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { a as MemoryTraceProvider, i as setupStepCommand, r as CONFIG_FILE_RELATIVE_PATH, t as VercelAISDK } from "../vercel-ai-sdk-CareWPDM.js";
1
+ import { a as MemoryTraceProvider, i as setupStepCommand, r as CONFIG_FILE_RELATIVE_PATH, t as VercelAISDK } from "../vercel-ai-sdk-B8ivuOzP.js";
2
2
  import module from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import path from "node:path";
@@ -74,123 +74,15 @@ var PromptRegistry = class {
74
74
  }
75
75
  };
76
76
  //#endregion
77
- //#region src/sdk/registry.ts
78
- /**
79
- * Every AI SDK offered in manual onboarding, in display order. This is the
80
- * single source of truth for which SDKs exist and their task ids — adding one
81
- * to onboarding means giving its adapter a static `setupTask` and listing it
82
- * here.
83
- */
84
- const AI_SDK_REGISTRY = [VercelAISDK];
85
- /** Look up a {@link SetupTask} by its id, or `undefined` if none matches. */
86
- function findSetupTask(taskId) {
87
- for (const cls of AI_SDK_REGISTRY) if (cls.setupTask.id === taskId) return cls.setupTask;
88
- }
89
- /**
90
- * Look up a step within a task by both ids, or `undefined` if either is
91
- * unknown.
92
- */
93
- function findSetupStep(taskId, stepId) {
94
- return findSetupTask(taskId)?.steps.find((s) => s.id === stepId);
95
- }
96
- //#endregion
97
- //#region src/server/setup-tasks.ts
98
- /**
99
- * Thrown when a requested task or step id does not exist in the registry. The
100
- * route layer maps this to a 404, distinguishing it from execution failures.
101
- */
102
- var SetupStepNotFoundError = class extends Error {
103
- constructor(message) {
104
- super(message);
105
- this.name = "SetupStepNotFoundError";
106
- }
107
- };
108
- /**
109
- * Executes a single onboarding step, resolved from the server-side registry by
110
- * `taskId`/`stepId`.
111
- *
112
- * The client only sends ids; the step definition (file contents, command, ...)
113
- * comes entirely from {@link AI_SDK_REGISTRY}, so a request can never write
114
- * arbitrary files or run arbitrary commands.
115
- *
116
- * @param rootPath - Absolute path to the project root.
117
- * @param taskId - Id of the {@link SetupTask} to run a step from.
118
- * @param stepId - Id of the step within that task.
119
- * @throws {SetupStepNotFoundError} if the task or step id is unknown.
120
- * @throws if the step kind is unsupported or execution fails (e.g. the config
121
- * file already exists).
122
- */
123
- async function executeSetupStep(rootPath, taskId, stepId) {
124
- const step = findSetupStep(taskId, stepId);
125
- if (!step) throw new SetupStepNotFoundError(`Unknown step '${stepId}' for task '${taskId}'`);
126
- switch (step.kind) {
127
- case "create_config": return { path: await writeConfigFile(rootPath, step) };
128
- case "run_command":
129
- case "install_package": throw new Error(`${step.kind} steps are not yet supported`);
130
- }
131
- }
132
- /**
133
- * Returns the onboarding tasks with each step's runtime
134
- * {@link SetupStepBase.completed | completion status} resolved against the
135
- * project at `rootPath` (config file present, package installed).
136
- *
137
- * @param rootPath - Absolute path to the project root.
138
- */
139
- function resolveSetupTasks(rootPath) {
140
- return AI_SDK_REGISTRY.map((cls) => ({
141
- ...cls.setupTask,
142
- steps: cls.setupTask.steps.map((step) => resolveStepStatus(rootPath, step))
143
- }));
144
- }
145
- /** Adds the runtime `completed` flag to a single step where determinable. */
146
- function resolveStepStatus(rootPath, step) {
147
- switch (step.kind) {
148
- case "install_package": return {
149
- ...step,
150
- completed: isPackageInstalled(rootPath, step.package)
151
- };
152
- case "create_config": return {
153
- ...step,
154
- completed: fs$1.existsSync(path.join(rootPath, step.path))
155
- };
156
- case "run_command": return step;
157
- }
158
- }
159
- /**
160
- * Whether `pkg` is installed for the project at `rootPath`, walking up the
161
- * directory tree to honour hoisted/workspace `node_modules`.
162
- *
163
- * @param rootPath - Absolute path to start the search from.
164
- * @param pkg - The npm package name to look for.
165
- */
166
- function isPackageInstalled(rootPath, pkg) {
167
- let dir = rootPath;
168
- while (true) {
169
- if (fs$1.existsSync(path.join(dir, "node_modules", pkg, "package.json"))) return true;
170
- const parent = path.dirname(dir);
171
- if (parent === dir) return false;
172
- dir = parent;
173
- }
174
- }
175
- /**
176
- * Writes the config file for a `create_config` step, creating parent
177
- * directories as needed. Refuses to clobber an existing file.
178
- */
179
- async function writeConfigFile(rootPath, step) {
180
- const filePath = path.join(rootPath, step.path);
181
- try {
182
- await fs.access(filePath);
183
- throw new Error(`${step.path} already exists`);
184
- } catch (err) {
185
- if (err?.code !== "ENOENT") throw err;
186
- }
187
- await fs.mkdir(path.dirname(filePath), { recursive: true });
188
- await fs.writeFile(filePath, step.contents, "utf8");
189
- return step.path;
190
- }
191
- //#endregion
192
77
  //#region src/server/api-routes.ts
193
- function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hotReloadSubscribers, rootPath, hasConfig, tracer, defaultTraceProviderId }) {
78
+ /** Decodes a URL-safe base64 prompt id produced by `encodePromptId`. Uses the
79
+ * Web `atob` (rather than Node's `Buffer`) so it works in browser/worker
80
+ * bundles too. */
81
+ function decodePromptId(encoded) {
82
+ const b64 = encoded.replace(/-/g, "+").replace(/_/g, "/");
83
+ return atob(b64);
84
+ }
85
+ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hotReloadSubscribers, rootPath, hasConfig, tracer, defaultTraceProviderId, setupTasks, executeDisabledMessage }) {
194
86
  const resolveSpanPrompt = (span) => {
195
87
  if (!span.prompt) return span;
196
88
  const resolved = promptRegistry.resolve(span.prompt.id, span.prompt.providerId);
@@ -207,13 +99,17 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
207
99
  rootPath,
208
100
  configured: hasConfig
209
101
  }));
210
- app.get("/api/setup-tasks", (c) => c.json(resolveSetupTasks(rootPath)));
102
+ app.get("/api/setup-tasks", (c) => c.json(setupTasks ? setupTasks.resolve(rootPath) : {
103
+ agent: [],
104
+ sdk: []
105
+ }));
211
106
  app.post("/api/setup-tasks/:taskId/steps/:stepId/execute", async (c) => {
107
+ if (!setupTasks) return c.json({ error: "Setup tasks are not available" }, 404);
212
108
  const { taskId, stepId } = c.req.param();
213
109
  try {
214
- return c.json(await executeSetupStep(rootPath, taskId, stepId));
110
+ return c.json(await setupTasks.executeStep(rootPath, taskId, stepId));
215
111
  } catch (error) {
216
- const status = error instanceof SetupStepNotFoundError ? 404 : 400;
112
+ const status = error?.name === "SetupStepNotFoundError" ? 404 : 400;
217
113
  return c.json({ error: error.message }, status);
218
114
  }
219
115
  });
@@ -270,7 +166,7 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
270
166
  const { providerId, id } = c.req.param();
271
167
  const provider = promptProviders.get(providerId);
272
168
  if (!provider) return c.json({ error: "Provider not found" }, 404);
273
- const decodedId = Buffer.from(id, "base64url").toString("utf8");
169
+ const decodedId = decodePromptId(id);
274
170
  const prompt = await provider.getPrompt(decodedId);
275
171
  if (!prompt) return c.json({ error: "Prompt not found" }, 404);
276
172
  return c.json({
@@ -288,7 +184,7 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
288
184
  const provider = promptProviders.get(providerId);
289
185
  if (!provider) return c.json({ error: "Provider not found" }, 404);
290
186
  if (!provider.renamePrompt) return c.json({ error: "This provider does not support renaming" }, 405);
291
- const decodedId = Buffer.from(id, "base64url").toString("utf8");
187
+ const decodedId = decodePromptId(id);
292
188
  const updatedPrompt = await provider.renamePrompt(decodedId, newName);
293
189
  return c.json({
294
190
  ...updatedPrompt,
@@ -304,7 +200,7 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
304
200
  const provider = promptProviders.get(providerId);
305
201
  if (!provider) return c.json({ error: "Provider not found" }, 404);
306
202
  if (!provider.updatePromptProperties) return c.json({ error: "This provider does not support editing" }, 405);
307
- const decodedId = Buffer.from(id, "base64url").toString("utf8");
203
+ const decodedId = decodePromptId(id);
308
204
  const updatedPrompt = await provider.updatePromptProperties(decodedId, await c.req.json());
309
205
  return c.json({
310
206
  ...updatedPrompt,
@@ -316,16 +212,17 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
316
212
  });
317
213
  app.post("/api/prompts/:providerId/:id/execute", async (c) => {
318
214
  try {
215
+ if (executeDisabledMessage) return c.json({ error: executeDisabledMessage }, 400);
319
216
  const { providerId, id } = c.req.param();
320
217
  const provider = promptProviders.get(providerId);
321
218
  if (!provider) return c.json({ error: "Provider not found" }, 404);
322
- const decodedId = Buffer.from(id, "base64url").toString("utf8");
219
+ const decodedId = decodePromptId(id);
323
220
  const { functionParams = [] } = await c.req.json().catch(() => ({}));
324
221
  const prompt = await provider.getPrompt(decodedId);
325
222
  if (!prompt) return c.json({ error: "Prompt not found" }, 404);
326
223
  const response = tracer.startActiveSpan(prompt.name, (span) => {
327
224
  const { traceId } = span.spanContext();
328
- provider.execute(decodedId, functionParams, false).then(() => {
225
+ provider.execute(decodedId, functionParams).then(() => {
329
226
  span.setStatus({ code: SpanStatusCode.OK });
330
227
  }, (err) => {
331
228
  console.error("prompt execution failed:", err);
@@ -420,10 +317,210 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
420
317
  }));
421
318
  }
422
319
  //#endregion
320
+ //#region src/agent/registry.ts
321
+ const AGENT_SETUP_DOMAIN = "evalut.io";
322
+ /** The prompt handed to a coding agent */
323
+ const AGENT_SETUP_PROMPT = `Follow manual setup steps from ${`https://${AGENT_SETUP_DOMAIN}/n/docs/setup.md`}`;
324
+ /**
325
+ * Every coding agent offered a one-click launcher in onboarding, in display
326
+ * order. This is the single source of truth for which agents exist and their
327
+ * task ids — each is a {@link SetupTask} whose lone {@link SetupStep} runs the
328
+ * agent's CLI with the setup prompt queued up in an interactive terminal.
329
+ *
330
+ * Mirrors {@link AI_SDK_REGISTRY} in `../sdk/registry.ts`, but agents have no
331
+ * adapter class, so they live here as plain tasks. `icon` keys into the
332
+ * client's `ProviderIcon`.
333
+ */
334
+ const AGENT_REGISTRY = [{
335
+ id: "claude-code",
336
+ label: "Claude Code",
337
+ icon: "Anthropic",
338
+ steps: [{
339
+ kind: "run_command",
340
+ id: "launch",
341
+ command: `claude "${AGENT_SETUP_PROMPT}" --allowedTools "WebFetch(domain:${AGENT_SETUP_DOMAIN})"`,
342
+ label: "Claude Code"
343
+ }]
344
+ }, {
345
+ id: "codex",
346
+ label: "Codex",
347
+ icon: "OpenAI",
348
+ steps: [{
349
+ kind: "run_command",
350
+ id: "launch",
351
+ command: `codex -c 'features.network_proxy.enabled=true' -c 'features.network_proxy.domains={ "${AGENT_SETUP_DOMAIN}" = "allow" }' -c 'sandbox_workspace_write.network_access=true' "${AGENT_SETUP_PROMPT}"`,
352
+ label: "Codex"
353
+ }]
354
+ }];
355
+ /** Look up an agent {@link SetupTask} by its id, or `undefined` if none matches. */
356
+ function findSetupTask$1(taskId) {
357
+ return AGENT_REGISTRY.find((task) => task.id === taskId);
358
+ }
359
+ /**
360
+ * Look up a step within an agent task by both ids, or `undefined` if either is
361
+ * unknown.
362
+ */
363
+ function findSetupStep$2(taskId, stepId) {
364
+ return findSetupTask$1(taskId)?.steps.find((s) => s.id === stepId);
365
+ }
366
+ //#endregion
367
+ //#region src/sdk/registry.ts
368
+ /**
369
+ * Every AI SDK offered in manual onboarding, in display order. This is the
370
+ * single source of truth for which SDKs exist and their task ids — adding one
371
+ * to onboarding means giving its adapter a static `setupTask` and listing it
372
+ * here.
373
+ */
374
+ const AI_SDK_REGISTRY = [VercelAISDK];
375
+ /** Look up a {@link SetupTask} by its id, or `undefined` if none matches. */
376
+ function findSetupTask(taskId) {
377
+ for (const cls of AI_SDK_REGISTRY) if (cls.setupTask.id === taskId) return cls.setupTask;
378
+ }
379
+ /**
380
+ * Look up a step within a task by both ids, or `undefined` if either is
381
+ * unknown.
382
+ */
383
+ function findSetupStep$1(taskId, stepId) {
384
+ return findSetupTask(taskId)?.steps.find((s) => s.id === stepId);
385
+ }
386
+ //#endregion
387
+ //#region src/server/setup-tasks.ts
388
+ /**
389
+ * Resolves a setup step across both the agent and SDK registries by its
390
+ * `taskId`/`stepId`, or `undefined` if neither knows it.
391
+ */
392
+ function findSetupStep(taskId, stepId) {
393
+ return findSetupStep$1(taskId, stepId) ?? findSetupStep$2(taskId, stepId);
394
+ }
395
+ /**
396
+ * Thrown when a requested task or step id does not exist in the registry. The
397
+ * route layer maps this to a 404, distinguishing it from execution failures.
398
+ */
399
+ var SetupStepNotFoundError = class extends Error {
400
+ constructor(message) {
401
+ super(message);
402
+ this.name = "SetupStepNotFoundError";
403
+ }
404
+ };
405
+ /**
406
+ * Executes a single onboarding step, resolved from the server-side registry by
407
+ * `taskId`/`stepId`.
408
+ *
409
+ * The client only sends ids; the step definition (file contents, command, ...)
410
+ * comes entirely from {@link AI_SDK_REGISTRY}, so a request can never write
411
+ * arbitrary files or run arbitrary commands.
412
+ *
413
+ * @param rootPath - Absolute path to the project root.
414
+ * @param taskId - Id of the {@link SetupTask} to run a step from.
415
+ * @param stepId - Id of the step within that task.
416
+ * @throws {SetupStepNotFoundError} if the task or step id is unknown.
417
+ * @throws if the step kind is unsupported or execution fails (e.g. the config
418
+ * file already exists).
419
+ */
420
+ async function executeSetupStep(rootPath, taskId, stepId) {
421
+ const step = findSetupStep(taskId, stepId);
422
+ if (!step) throw new SetupStepNotFoundError(`Unknown step '${stepId}' for task '${taskId}'`);
423
+ switch (step.kind) {
424
+ case "create_config": return { path: await writeConfigFile(rootPath, step) };
425
+ case "run_command":
426
+ case "install_package": throw new Error(`${step.kind} steps are not yet supported`);
427
+ default: throw new Error();
428
+ }
429
+ }
430
+ /**
431
+ * Returns the onboarding tasks — coding agents and AI SDKs — with each step's
432
+ * runtime {@link SetupStepBase.completed | completion status} resolved against
433
+ * the project at `rootPath` (config file present, package installed).
434
+ *
435
+ * @param rootPath - Absolute path to the project root.
436
+ */
437
+ function resolveSetupTasks(rootPath) {
438
+ const resolve = (task) => ({
439
+ ...task,
440
+ steps: task.steps.map((step) => resolveStepStatus(rootPath, step))
441
+ });
442
+ return {
443
+ agent: AGENT_REGISTRY.map(resolve),
444
+ sdk: AI_SDK_REGISTRY.map((cls) => resolve(cls.setupTask))
445
+ };
446
+ }
447
+ /** Adds the runtime `completed` flag to a single step where determinable. */
448
+ function resolveStepStatus(rootPath, step) {
449
+ switch (step.kind) {
450
+ case "create_config": return {
451
+ ...step,
452
+ completed: fs$1.existsSync(path.join(rootPath, step.path))
453
+ };
454
+ case "install_package":
455
+ case "run_command": {
456
+ const result = { ...step };
457
+ if (step.kind === "install_package") result.completed = isPackageInstalled(rootPath, step.package);
458
+ const bin = setupStepCommand(step).split(/\s+/)[0];
459
+ if (bin && !isBinaryOnPath(bin)) result.disabledReason = `${bin} not found in PATH`;
460
+ return result;
461
+ }
462
+ default: throw new Error();
463
+ }
464
+ }
465
+ /**
466
+ * Whether an executable named `bin` is resolvable on the current `PATH`. Used
467
+ * to disable coding-agent launchers whose CLI isn't installed. Honours
468
+ * `PATHEXT` on Windows; elsewhere it requires the file to be executable.
469
+ *
470
+ * @param bin - The bare executable name to look for, e.g. `claude`.
471
+ */
472
+ function isBinaryOnPath(bin) {
473
+ const dirs = (process.env.PATH ?? "").split(path.delimiter).filter(Boolean);
474
+ const exts = process.platform === "win32" ? (process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM").split(";") : [""];
475
+ for (const dir of dirs) for (const ext of exts) try {
476
+ fs$1.accessSync(path.join(dir, bin + ext), fs$1.constants.X_OK);
477
+ return true;
478
+ } catch {}
479
+ return false;
480
+ }
481
+ /**
482
+ * Whether `pkg` is installed for the project at `rootPath`, walking up the
483
+ * directory tree to honour hoisted/workspace `node_modules`.
484
+ *
485
+ * @param rootPath - Absolute path to start the search from.
486
+ * @param pkg - The npm package name to look for.
487
+ */
488
+ function isPackageInstalled(rootPath, pkg) {
489
+ let dir = rootPath;
490
+ while (true) {
491
+ if (fs$1.existsSync(path.join(dir, "node_modules", pkg, "package.json"))) return true;
492
+ const parent = path.dirname(dir);
493
+ if (parent === dir) return false;
494
+ dir = parent;
495
+ }
496
+ }
497
+ /**
498
+ * Writes the config file for a `create_config` step, creating parent
499
+ * directories as needed. Refuses to clobber an existing file.
500
+ */
501
+ async function writeConfigFile(rootPath, step) {
502
+ const filePath = path.join(rootPath, step.path);
503
+ try {
504
+ await fs.access(filePath);
505
+ throw new Error(`${step.path} already exists`);
506
+ } catch (err) {
507
+ if (err?.code !== "ENOENT") throw err;
508
+ }
509
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
510
+ await fs.writeFile(filePath, step.contents, "utf8");
511
+ return step.path;
512
+ }
513
+ //#endregion
423
514
  //#region src/server/terminal.ts
424
515
  /** Shell used to run a resolved step command, so shell syntax in it works. */
425
516
  const SHELL = process.env.SHELL || (process.platform === "win32" ? "powershell.exe" : "bash");
426
517
  /**
518
+ * How long a session's PTY is kept alive after its WebSocket drops, waiting for
519
+ * the client to reconnect. Covers the brief gap while the server restarts
520
+ * itself once a config file appears, so the coding agent isn't killed mid-task.
521
+ */
522
+ const GRACE_PERIOD_MS = 1e4;
523
+ /**
427
524
  * Arguments to run `command` in {@link SHELL}, skipping the user's startup files
428
525
  * where the shell allows it. Those rc files (e.g. nvm in `~/.zshrc`) can add
429
526
  * seconds of latency before the command even begins, and they are unnecessary
@@ -456,56 +553,184 @@ function resolveTerminalCommand(taskId, stepId) {
456
553
  if (!step || step.kind === "create_config") return null;
457
554
  return setupStepCommand(step);
458
555
  }
459
- function send(ws, message) {
460
- ws.send(JSON.stringify(message));
556
+ /** Spawns a real `node-pty` PTY running the command in {@link SHELL}. */
557
+ function defaultSpawn(options) {
558
+ return pty.spawn(SHELL, shellCommandArgs(options.command), {
559
+ name: "xterm-color",
560
+ cols: options.cols || 80,
561
+ rows: options.rows || 24,
562
+ cwd: options.cwd,
563
+ env: options.env
564
+ });
565
+ }
566
+ /**
567
+ * A single onboarding terminal: one PTY plus the WebSocket currently attached to
568
+ * it. The PTY outlives any one socket so it can survive the server restart that
569
+ * happens when a config file appears — while a client is attached, output is
570
+ * streamed live; while it is detached, output is buffered and a grace timer
571
+ * reaps the PTY if no client reconnects in time.
572
+ */
573
+ var TerminalSession = class {
574
+ /** While detached: PTY output accumulated to replay on reconnect. */
575
+ buffer = null;
576
+ graceTimer;
577
+ socket = null;
578
+ exited = false;
579
+ child;
580
+ onReap;
581
+ gracePeriodMs;
582
+ /**
583
+ * @param child - The PTY this session owns.
584
+ * @param onReap - Called once the session is done (exit, grace expiry, or
585
+ * intentional kill) so the registry can drop it.
586
+ * @param gracePeriodMs - How long to keep the PTY alive after the socket drops.
587
+ */
588
+ constructor(child, onReap, gracePeriodMs = GRACE_PERIOD_MS) {
589
+ this.child = child;
590
+ this.onReap = onReap;
591
+ this.gracePeriodMs = gracePeriodMs;
592
+ child.onData((data) => this.handleData(data));
593
+ child.onExit(({ exitCode }) => this.handleExit(exitCode));
594
+ }
595
+ send(message) {
596
+ this.socket?.send(JSON.stringify(message));
597
+ }
598
+ handleData(data) {
599
+ if (this.socket) this.send({
600
+ type: "data",
601
+ data
602
+ });
603
+ else this.buffer?.push(data);
604
+ }
605
+ handleExit(code) {
606
+ this.exited = true;
607
+ this.send({
608
+ type: "exit",
609
+ code
610
+ });
611
+ this.socket?.close();
612
+ this.clearGrace();
613
+ this.onReap();
614
+ }
615
+ /**
616
+ * Attach a (re)connected socket. Replays any output buffered while detached,
617
+ * then resumes live streaming. Cancels a pending grace-period reap.
618
+ */
619
+ attach(ws) {
620
+ this.clearGrace();
621
+ this.socket = ws;
622
+ if (this.buffer) {
623
+ for (const data of this.buffer) this.send({
624
+ type: "data",
625
+ data
626
+ });
627
+ this.buffer = null;
628
+ }
629
+ }
630
+ /**
631
+ * The attached socket dropped (e.g. the server is restarting). Start buffering
632
+ * output and a grace timer; if no client reattaches in time, reap the PTY.
633
+ */
634
+ detach() {
635
+ if (this.exited || !this.socket) return;
636
+ this.socket = null;
637
+ this.buffer = [];
638
+ this.graceTimer = setTimeout(() => {
639
+ this.buffer = null;
640
+ if (!this.exited) this.child.kill();
641
+ this.onReap();
642
+ }, this.gracePeriodMs);
643
+ }
644
+ /** The client intentionally left: kill the PTY now, skipping the grace wait. */
645
+ kill() {
646
+ this.clearGrace();
647
+ this.socket = null;
648
+ this.buffer = null;
649
+ if (!this.exited) this.child.kill();
650
+ this.onReap();
651
+ }
652
+ /** Forward the user's keystrokes to the PTY. */
653
+ write(data) {
654
+ this.child.write(data);
655
+ }
656
+ /** Resize the PTY to match the client's terminal. */
657
+ resize(cols, rows) {
658
+ this.child.resize(cols || 80, rows || 24);
659
+ }
660
+ clearGrace() {
661
+ if (this.graceTimer) {
662
+ clearTimeout(this.graceTimer);
663
+ this.graceTimer = void 0;
664
+ }
665
+ }
666
+ };
667
+ /**
668
+ * Holds the live {@link TerminalSession | terminal sessions} keyed by a
669
+ * client-supplied session id. Owned by the CLI process (not by any one server
670
+ * instance) so sessions — and the PTYs they wrap — survive the server restart
671
+ * that happens when a config file appears.
672
+ */
673
+ var TerminalSessionRegistry = class {
674
+ sessions = /* @__PURE__ */ new Map();
675
+ spawn;
676
+ gracePeriodMs;
677
+ /**
678
+ * @param spawn - PTY spawner; overridable in tests.
679
+ * @param gracePeriodMs - Reconnect grace window passed to each session.
680
+ */
681
+ constructor(spawn = defaultSpawn, gracePeriodMs = GRACE_PERIOD_MS) {
682
+ this.spawn = spawn;
683
+ this.gracePeriodMs = gracePeriodMs;
684
+ }
685
+ /** The session for `id`, if one is still live. */
686
+ get(id) {
687
+ return this.sessions.get(id);
688
+ }
689
+ /** Spawn a PTY and register a new session under `id`. */
690
+ create(id, options) {
691
+ const session = new TerminalSession(this.spawn(options), () => this.sessions.delete(id), this.gracePeriodMs);
692
+ this.sessions.set(id, session);
693
+ return session;
694
+ }
695
+ };
696
+ function sendError(ws, message) {
697
+ ws.send(JSON.stringify({
698
+ type: "error",
699
+ message
700
+ }));
461
701
  }
462
702
  /**
463
703
  * Registers the interactive-terminal WebSocket route at `/api/terminal`.
464
704
  *
465
- * The client connects with `taskId` and `stepId` query params identifying a
466
- * setup step; the server resolves the actual command from its own registry
467
- * (never from the request body) and, once the client signals `start`, spawns it
468
- * in a PTY rooted at the project. Output is streamed to the client and the
469
- * client's keystrokes are written to the process, so prompts (e.g. npm's
470
- * "Ok to proceed?") work. The trust boundary mirrors the step-execute route:
471
- * the client can only ask to run a step that already exists server-side.
705
+ * The client connects with `taskId`, `stepId`, and a client-generated
706
+ * `sessionId` query param. The server resolves the actual command from its own
707
+ * registry (never from the request body). On the first connection for a session
708
+ * the client signals `start` and the server spawns the command in a PTY rooted
709
+ * at the project; output is streamed to the client and the client's keystrokes
710
+ * are written to the process, so prompts (e.g. npm's "Ok to proceed?") work.
711
+ *
712
+ * Sessions live in `sessions`, which outlives this server instance, so when the
713
+ * server restarts itself after a config file appears the PTY keeps running and a
714
+ * reconnecting client (same `sessionId`) re-attaches and replays the gap.
715
+ *
716
+ * The trust boundary mirrors the step-execute route: the client can only ask to
717
+ * run a step that already exists server-side.
472
718
  */
473
- function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
719
+ function registerTerminalRoute(app, upgradeWebSocket, rootPath, sessions) {
474
720
  app.get("/api/terminal", upgradeWebSocket((c) => {
475
721
  const taskId = c.req.query("taskId");
476
722
  const stepId = c.req.query("stepId");
477
- let child;
478
- const start = (ws, cols, rows) => {
479
- if (child) return;
480
- const command = taskId && stepId ? resolveTerminalCommand(taskId, stepId) : null;
481
- if (!command) {
482
- send(ws, {
483
- type: "error",
484
- message: "Unknown or non-runnable setup step."
485
- });
486
- ws.close();
487
- return;
488
- }
489
- child = pty.spawn(SHELL, shellCommandArgs(command), {
490
- name: "xterm-color",
491
- cols: cols || 80,
492
- rows: rows || 24,
493
- cwd: rootPath,
494
- env: process.env
495
- });
496
- child.onData((data) => send(ws, {
497
- type: "data",
498
- data
499
- }));
500
- child.onExit(({ exitCode }) => {
501
- send(ws, {
502
- type: "exit",
503
- code: exitCode
504
- });
505
- ws.close();
506
- });
507
- };
723
+ const sessionId = c.req.query("sessionId") ?? `${taskId}:${stepId}`;
724
+ let session;
725
+ let leaving = false;
508
726
  return {
727
+ onOpen(_evt, ws) {
728
+ const existing = sessions.get(sessionId);
729
+ if (existing) {
730
+ session = existing;
731
+ existing.attach(ws);
732
+ }
733
+ },
509
734
  onMessage(evt, ws) {
510
735
  let msg;
511
736
  try {
@@ -514,20 +739,38 @@ function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
514
739
  return;
515
740
  }
516
741
  switch (msg.type) {
517
- case "start":
518
- start(ws, msg.cols, msg.rows);
742
+ case "start": {
743
+ if (session) return;
744
+ const command = taskId && stepId ? resolveTerminalCommand(taskId, stepId) : null;
745
+ if (!command) {
746
+ sendError(ws, "Unknown or non-runnable setup step.");
747
+ ws.close();
748
+ return;
749
+ }
750
+ session = sessions.create(sessionId, {
751
+ command,
752
+ cols: msg.cols,
753
+ rows: msg.rows,
754
+ cwd: rootPath,
755
+ env: process.env
756
+ });
757
+ session.attach(ws);
519
758
  break;
759
+ }
520
760
  case "input":
521
- child?.write(msg.data);
761
+ session?.write(msg.data);
522
762
  break;
523
763
  case "resize":
524
- child?.resize(msg.cols || 80, msg.rows || 24);
764
+ session?.resize(msg.cols || 80, msg.rows || 24);
765
+ break;
766
+ case "detach":
767
+ leaving = true;
768
+ session?.kill();
525
769
  break;
526
770
  }
527
771
  },
528
772
  onClose() {
529
- child?.kill();
530
- child = void 0;
773
+ if (!leaving) session?.detach();
531
774
  }
532
775
  };
533
776
  }));
@@ -535,7 +778,7 @@ function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
535
778
  //#endregion
536
779
  //#region src/server/index.ts
537
780
  async function startServer(options) {
538
- const { promptProviders, traceProviders, port, rootPath, hasConfig } = options;
781
+ const { promptProviders, traceProviders, port, rootPath, hasConfig, terminalSessions } = options;
539
782
  const promptProviderMap = new Map(promptProviders.map((p) => [p.id, p]));
540
783
  const traceProviderMap = new Map(traceProviders.map((p) => [p.id, p]));
541
784
  const promptRegistry = new PromptRegistry();
@@ -562,9 +805,13 @@ async function startServer(options) {
562
805
  rootPath,
563
806
  hasConfig,
564
807
  tracer,
565
- defaultTraceProviderId
808
+ defaultTraceProviderId,
809
+ setupTasks: {
810
+ resolve: resolveSetupTasks,
811
+ executeStep: executeSetupStep
812
+ }
566
813
  });
567
- registerTerminalRoute(app, upgradeWebSocket, rootPath);
814
+ registerTerminalRoute(app, upgradeWebSocket, rootPath, terminalSessions);
568
815
  const clientRoot = fileURLToPath(new URL("../client/", import.meta.url));
569
816
  app.get("*", serveStatic({ root: clientRoot }));
570
817
  for (const [providerId, provider] of promptProviderMap) if (provider.watch) provider.watch(async (event) => {
@@ -600,6 +847,7 @@ async function startServer(options) {
600
847
  });
601
848
  });
602
849
  const close = () => new Promise((resolve, reject) => {
850
+ for (const ws of wss.clients) ws.close();
603
851
  if ("closeAllConnections" in server) server.closeAllConnections();
604
852
  server.close((err) => err ? reject(err) : resolve());
605
853
  });
@@ -823,14 +1071,15 @@ function applyDotenv(rootDir) {
823
1071
  if (err?.code !== "ENOENT") console.warn(`Warning: failed to load .env from ${envPath}:`, err.message);
824
1072
  }
825
1073
  }
826
- function startConfiguredServer(rootDir, config, hasConfig, port) {
1074
+ function startConfiguredServer(rootDir, config, hasConfig, port, terminalSessions) {
827
1075
  if (config.useDotenv !== false) applyDotenv(rootDir);
828
1076
  return startServer({
829
1077
  promptProviders: config.promptProviders ?? [],
830
1078
  traceProviders: config.traceProviders ?? [new MemoryTraceProvider()],
831
1079
  port,
832
1080
  rootPath: rootDir,
833
- hasConfig
1081
+ hasConfig,
1082
+ terminalSessions
834
1083
  });
835
1084
  }
836
1085
  async function main() {
@@ -848,11 +1097,12 @@ async function main() {
848
1097
  const maybeOpen = (url) => {
849
1098
  if (!process.env.EVALUTION_NO_OPEN) openBrowser(url);
850
1099
  };
1100
+ const terminalSessions = new TerminalSessionRegistry();
851
1101
  if (hasConfig) {
852
- maybeOpen((await startConfiguredServer(rootDir, await loadConfig(rootDir), true, port)).url);
1102
+ maybeOpen((await startConfiguredServer(rootDir, await loadConfig(rootDir), true, port, terminalSessions)).url);
853
1103
  return;
854
1104
  }
855
- let server = await startConfiguredServer(rootDir, {}, false, port);
1105
+ let server = await startConfiguredServer(rootDir, {}, false, port, terminalSessions);
856
1106
  maybeOpen(server.url);
857
1107
  console.log(`👀 No config found; watching ${path.join(rootDir, ".evalution", "config.ts")} for creation...`);
858
1108
  const stopWatching = watchForConfigCreation(rootDir, async () => {
@@ -860,7 +1110,7 @@ async function main() {
860
1110
  console.log("⚙️ Config loaded; restarting server...");
861
1111
  stopWatching();
862
1112
  await server.close();
863
- server = await startConfiguredServer(rootDir, config, true, port);
1113
+ server = await startConfiguredServer(rootDir, config, true, port, terminalSessions);
864
1114
  });
865
1115
  }
866
1116
  main().catch((error) => {