evalution 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +430 -180
- package/dist/client/assets/{index-CORbBplP.js → index-C4XXh0na.js} +38 -38
- package/dist/client/assets/{index-CgcFVsRZ.css → index-CHPZsnKo.css} +1 -1
- package/dist/client/index.html +2 -2
- package/dist/index.d.ts +86 -56
- package/dist/index.js +92 -126
- package/dist/{vercel-ai-sdk-CareWPDM.js → vercel-ai-sdk-B8ivuOzP.js} +129 -35
- package/package.json +3 -3
package/dist/cli/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a as MemoryTraceProvider, i as setupStepCommand, r as CONFIG_FILE_RELATIVE_PATH, t as VercelAISDK } from "../vercel-ai-sdk-
|
|
1
|
+
import { a as MemoryTraceProvider, i as setupStepCommand, r as CONFIG_FILE_RELATIVE_PATH, t as VercelAISDK } from "../vercel-ai-sdk-B8ivuOzP.js";
|
|
2
2
|
import module from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import path from "node:path";
|
|
@@ -74,123 +74,15 @@ var PromptRegistry = class {
|
|
|
74
74
|
}
|
|
75
75
|
};
|
|
76
76
|
//#endregion
|
|
77
|
-
//#region src/sdk/registry.ts
|
|
78
|
-
/**
|
|
79
|
-
* Every AI SDK offered in manual onboarding, in display order. This is the
|
|
80
|
-
* single source of truth for which SDKs exist and their task ids — adding one
|
|
81
|
-
* to onboarding means giving its adapter a static `setupTask` and listing it
|
|
82
|
-
* here.
|
|
83
|
-
*/
|
|
84
|
-
const AI_SDK_REGISTRY = [VercelAISDK];
|
|
85
|
-
/** Look up a {@link SetupTask} by its id, or `undefined` if none matches. */
|
|
86
|
-
function findSetupTask(taskId) {
|
|
87
|
-
for (const cls of AI_SDK_REGISTRY) if (cls.setupTask.id === taskId) return cls.setupTask;
|
|
88
|
-
}
|
|
89
|
-
/**
|
|
90
|
-
* Look up a step within a task by both ids, or `undefined` if either is
|
|
91
|
-
* unknown.
|
|
92
|
-
*/
|
|
93
|
-
function findSetupStep(taskId, stepId) {
|
|
94
|
-
return findSetupTask(taskId)?.steps.find((s) => s.id === stepId);
|
|
95
|
-
}
|
|
96
|
-
//#endregion
|
|
97
|
-
//#region src/server/setup-tasks.ts
|
|
98
|
-
/**
|
|
99
|
-
* Thrown when a requested task or step id does not exist in the registry. The
|
|
100
|
-
* route layer maps this to a 404, distinguishing it from execution failures.
|
|
101
|
-
*/
|
|
102
|
-
var SetupStepNotFoundError = class extends Error {
|
|
103
|
-
constructor(message) {
|
|
104
|
-
super(message);
|
|
105
|
-
this.name = "SetupStepNotFoundError";
|
|
106
|
-
}
|
|
107
|
-
};
|
|
108
|
-
/**
|
|
109
|
-
* Executes a single onboarding step, resolved from the server-side registry by
|
|
110
|
-
* `taskId`/`stepId`.
|
|
111
|
-
*
|
|
112
|
-
* The client only sends ids; the step definition (file contents, command, ...)
|
|
113
|
-
* comes entirely from {@link AI_SDK_REGISTRY}, so a request can never write
|
|
114
|
-
* arbitrary files or run arbitrary commands.
|
|
115
|
-
*
|
|
116
|
-
* @param rootPath - Absolute path to the project root.
|
|
117
|
-
* @param taskId - Id of the {@link SetupTask} to run a step from.
|
|
118
|
-
* @param stepId - Id of the step within that task.
|
|
119
|
-
* @throws {SetupStepNotFoundError} if the task or step id is unknown.
|
|
120
|
-
* @throws if the step kind is unsupported or execution fails (e.g. the config
|
|
121
|
-
* file already exists).
|
|
122
|
-
*/
|
|
123
|
-
async function executeSetupStep(rootPath, taskId, stepId) {
|
|
124
|
-
const step = findSetupStep(taskId, stepId);
|
|
125
|
-
if (!step) throw new SetupStepNotFoundError(`Unknown step '${stepId}' for task '${taskId}'`);
|
|
126
|
-
switch (step.kind) {
|
|
127
|
-
case "create_config": return { path: await writeConfigFile(rootPath, step) };
|
|
128
|
-
case "run_command":
|
|
129
|
-
case "install_package": throw new Error(`${step.kind} steps are not yet supported`);
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Returns the onboarding tasks with each step's runtime
|
|
134
|
-
* {@link SetupStepBase.completed | completion status} resolved against the
|
|
135
|
-
* project at `rootPath` (config file present, package installed).
|
|
136
|
-
*
|
|
137
|
-
* @param rootPath - Absolute path to the project root.
|
|
138
|
-
*/
|
|
139
|
-
function resolveSetupTasks(rootPath) {
|
|
140
|
-
return AI_SDK_REGISTRY.map((cls) => ({
|
|
141
|
-
...cls.setupTask,
|
|
142
|
-
steps: cls.setupTask.steps.map((step) => resolveStepStatus(rootPath, step))
|
|
143
|
-
}));
|
|
144
|
-
}
|
|
145
|
-
/** Adds the runtime `completed` flag to a single step where determinable. */
|
|
146
|
-
function resolveStepStatus(rootPath, step) {
|
|
147
|
-
switch (step.kind) {
|
|
148
|
-
case "install_package": return {
|
|
149
|
-
...step,
|
|
150
|
-
completed: isPackageInstalled(rootPath, step.package)
|
|
151
|
-
};
|
|
152
|
-
case "create_config": return {
|
|
153
|
-
...step,
|
|
154
|
-
completed: fs$1.existsSync(path.join(rootPath, step.path))
|
|
155
|
-
};
|
|
156
|
-
case "run_command": return step;
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
/**
|
|
160
|
-
* Whether `pkg` is installed for the project at `rootPath`, walking up the
|
|
161
|
-
* directory tree to honour hoisted/workspace `node_modules`.
|
|
162
|
-
*
|
|
163
|
-
* @param rootPath - Absolute path to start the search from.
|
|
164
|
-
* @param pkg - The npm package name to look for.
|
|
165
|
-
*/
|
|
166
|
-
function isPackageInstalled(rootPath, pkg) {
|
|
167
|
-
let dir = rootPath;
|
|
168
|
-
while (true) {
|
|
169
|
-
if (fs$1.existsSync(path.join(dir, "node_modules", pkg, "package.json"))) return true;
|
|
170
|
-
const parent = path.dirname(dir);
|
|
171
|
-
if (parent === dir) return false;
|
|
172
|
-
dir = parent;
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
/**
|
|
176
|
-
* Writes the config file for a `create_config` step, creating parent
|
|
177
|
-
* directories as needed. Refuses to clobber an existing file.
|
|
178
|
-
*/
|
|
179
|
-
async function writeConfigFile(rootPath, step) {
|
|
180
|
-
const filePath = path.join(rootPath, step.path);
|
|
181
|
-
try {
|
|
182
|
-
await fs.access(filePath);
|
|
183
|
-
throw new Error(`${step.path} already exists`);
|
|
184
|
-
} catch (err) {
|
|
185
|
-
if (err?.code !== "ENOENT") throw err;
|
|
186
|
-
}
|
|
187
|
-
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
188
|
-
await fs.writeFile(filePath, step.contents, "utf8");
|
|
189
|
-
return step.path;
|
|
190
|
-
}
|
|
191
|
-
//#endregion
|
|
192
77
|
//#region src/server/api-routes.ts
|
|
193
|
-
|
|
78
|
+
/** Decodes a URL-safe base64 prompt id produced by `encodePromptId`. Uses the
|
|
79
|
+
* Web `atob` (rather than Node's `Buffer`) so it works in browser/worker
|
|
80
|
+
* bundles too. */
|
|
81
|
+
function decodePromptId(encoded) {
|
|
82
|
+
const b64 = encoded.replace(/-/g, "+").replace(/_/g, "/");
|
|
83
|
+
return atob(b64);
|
|
84
|
+
}
|
|
85
|
+
function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hotReloadSubscribers, rootPath, hasConfig, tracer, defaultTraceProviderId, setupTasks, executeDisabledMessage }) {
|
|
194
86
|
const resolveSpanPrompt = (span) => {
|
|
195
87
|
if (!span.prompt) return span;
|
|
196
88
|
const resolved = promptRegistry.resolve(span.prompt.id, span.prompt.providerId);
|
|
@@ -207,13 +99,17 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
|
|
|
207
99
|
rootPath,
|
|
208
100
|
configured: hasConfig
|
|
209
101
|
}));
|
|
210
|
-
app.get("/api/setup-tasks", (c) => c.json(
|
|
102
|
+
app.get("/api/setup-tasks", (c) => c.json(setupTasks ? setupTasks.resolve(rootPath) : {
|
|
103
|
+
agent: [],
|
|
104
|
+
sdk: []
|
|
105
|
+
}));
|
|
211
106
|
app.post("/api/setup-tasks/:taskId/steps/:stepId/execute", async (c) => {
|
|
107
|
+
if (!setupTasks) return c.json({ error: "Setup tasks are not available" }, 404);
|
|
212
108
|
const { taskId, stepId } = c.req.param();
|
|
213
109
|
try {
|
|
214
|
-
return c.json(await
|
|
110
|
+
return c.json(await setupTasks.executeStep(rootPath, taskId, stepId));
|
|
215
111
|
} catch (error) {
|
|
216
|
-
const status = error
|
|
112
|
+
const status = error?.name === "SetupStepNotFoundError" ? 404 : 400;
|
|
217
113
|
return c.json({ error: error.message }, status);
|
|
218
114
|
}
|
|
219
115
|
});
|
|
@@ -270,7 +166,7 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
|
|
|
270
166
|
const { providerId, id } = c.req.param();
|
|
271
167
|
const provider = promptProviders.get(providerId);
|
|
272
168
|
if (!provider) return c.json({ error: "Provider not found" }, 404);
|
|
273
|
-
const decodedId =
|
|
169
|
+
const decodedId = decodePromptId(id);
|
|
274
170
|
const prompt = await provider.getPrompt(decodedId);
|
|
275
171
|
if (!prompt) return c.json({ error: "Prompt not found" }, 404);
|
|
276
172
|
return c.json({
|
|
@@ -288,7 +184,7 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
|
|
|
288
184
|
const provider = promptProviders.get(providerId);
|
|
289
185
|
if (!provider) return c.json({ error: "Provider not found" }, 404);
|
|
290
186
|
if (!provider.renamePrompt) return c.json({ error: "This provider does not support renaming" }, 405);
|
|
291
|
-
const decodedId =
|
|
187
|
+
const decodedId = decodePromptId(id);
|
|
292
188
|
const updatedPrompt = await provider.renamePrompt(decodedId, newName);
|
|
293
189
|
return c.json({
|
|
294
190
|
...updatedPrompt,
|
|
@@ -304,7 +200,7 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
|
|
|
304
200
|
const provider = promptProviders.get(providerId);
|
|
305
201
|
if (!provider) return c.json({ error: "Provider not found" }, 404);
|
|
306
202
|
if (!provider.updatePromptProperties) return c.json({ error: "This provider does not support editing" }, 405);
|
|
307
|
-
const decodedId =
|
|
203
|
+
const decodedId = decodePromptId(id);
|
|
308
204
|
const updatedPrompt = await provider.updatePromptProperties(decodedId, await c.req.json());
|
|
309
205
|
return c.json({
|
|
310
206
|
...updatedPrompt,
|
|
@@ -316,16 +212,17 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
|
|
|
316
212
|
});
|
|
317
213
|
app.post("/api/prompts/:providerId/:id/execute", async (c) => {
|
|
318
214
|
try {
|
|
215
|
+
if (executeDisabledMessage) return c.json({ error: executeDisabledMessage }, 400);
|
|
319
216
|
const { providerId, id } = c.req.param();
|
|
320
217
|
const provider = promptProviders.get(providerId);
|
|
321
218
|
if (!provider) return c.json({ error: "Provider not found" }, 404);
|
|
322
|
-
const decodedId =
|
|
219
|
+
const decodedId = decodePromptId(id);
|
|
323
220
|
const { functionParams = [] } = await c.req.json().catch(() => ({}));
|
|
324
221
|
const prompt = await provider.getPrompt(decodedId);
|
|
325
222
|
if (!prompt) return c.json({ error: "Prompt not found" }, 404);
|
|
326
223
|
const response = tracer.startActiveSpan(prompt.name, (span) => {
|
|
327
224
|
const { traceId } = span.spanContext();
|
|
328
|
-
provider.execute(decodedId, functionParams
|
|
225
|
+
provider.execute(decodedId, functionParams).then(() => {
|
|
329
226
|
span.setStatus({ code: SpanStatusCode.OK });
|
|
330
227
|
}, (err) => {
|
|
331
228
|
console.error("prompt execution failed:", err);
|
|
@@ -420,10 +317,210 @@ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hot
|
|
|
420
317
|
}));
|
|
421
318
|
}
|
|
422
319
|
//#endregion
|
|
320
|
+
//#region src/agent/registry.ts
|
|
321
|
+
const AGENT_SETUP_DOMAIN = "evalut.io";
|
|
322
|
+
/** The prompt handed to a coding agent */
|
|
323
|
+
const AGENT_SETUP_PROMPT = `Follow manual setup steps from ${`https://${AGENT_SETUP_DOMAIN}/n/docs/setup.md`}`;
|
|
324
|
+
/**
|
|
325
|
+
* Every coding agent offered a one-click launcher in onboarding, in display
|
|
326
|
+
* order. This is the single source of truth for which agents exist and their
|
|
327
|
+
* task ids — each is a {@link SetupTask} whose lone {@link SetupStep} runs the
|
|
328
|
+
* agent's CLI with the setup prompt queued up in an interactive terminal.
|
|
329
|
+
*
|
|
330
|
+
* Mirrors {@link AI_SDK_REGISTRY} in `../sdk/registry.ts`, but agents have no
|
|
331
|
+
* adapter class, so they live here as plain tasks. `icon` keys into the
|
|
332
|
+
* client's `ProviderIcon`.
|
|
333
|
+
*/
|
|
334
|
+
const AGENT_REGISTRY = [{
|
|
335
|
+
id: "claude-code",
|
|
336
|
+
label: "Claude Code",
|
|
337
|
+
icon: "Anthropic",
|
|
338
|
+
steps: [{
|
|
339
|
+
kind: "run_command",
|
|
340
|
+
id: "launch",
|
|
341
|
+
command: `claude "${AGENT_SETUP_PROMPT}" --allowedTools "WebFetch(domain:${AGENT_SETUP_DOMAIN})"`,
|
|
342
|
+
label: "Claude Code"
|
|
343
|
+
}]
|
|
344
|
+
}, {
|
|
345
|
+
id: "codex",
|
|
346
|
+
label: "Codex",
|
|
347
|
+
icon: "OpenAI",
|
|
348
|
+
steps: [{
|
|
349
|
+
kind: "run_command",
|
|
350
|
+
id: "launch",
|
|
351
|
+
command: `codex -c 'features.network_proxy.enabled=true' -c 'features.network_proxy.domains={ "${AGENT_SETUP_DOMAIN}" = "allow" }' -c 'sandbox_workspace_write.network_access=true' "${AGENT_SETUP_PROMPT}"`,
|
|
352
|
+
label: "Codex"
|
|
353
|
+
}]
|
|
354
|
+
}];
|
|
355
|
+
/** Look up an agent {@link SetupTask} by its id, or `undefined` if none matches. */
|
|
356
|
+
function findSetupTask$1(taskId) {
|
|
357
|
+
return AGENT_REGISTRY.find((task) => task.id === taskId);
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Look up a step within an agent task by both ids, or `undefined` if either is
|
|
361
|
+
* unknown.
|
|
362
|
+
*/
|
|
363
|
+
function findSetupStep$2(taskId, stepId) {
|
|
364
|
+
return findSetupTask$1(taskId)?.steps.find((s) => s.id === stepId);
|
|
365
|
+
}
|
|
366
|
+
//#endregion
|
|
367
|
+
//#region src/sdk/registry.ts
|
|
368
|
+
/**
|
|
369
|
+
* Every AI SDK offered in manual onboarding, in display order. This is the
|
|
370
|
+
* single source of truth for which SDKs exist and their task ids — adding one
|
|
371
|
+
* to onboarding means giving its adapter a static `setupTask` and listing it
|
|
372
|
+
* here.
|
|
373
|
+
*/
|
|
374
|
+
const AI_SDK_REGISTRY = [VercelAISDK];
|
|
375
|
+
/** Look up a {@link SetupTask} by its id, or `undefined` if none matches. */
|
|
376
|
+
function findSetupTask(taskId) {
|
|
377
|
+
for (const cls of AI_SDK_REGISTRY) if (cls.setupTask.id === taskId) return cls.setupTask;
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Look up a step within a task by both ids, or `undefined` if either is
|
|
381
|
+
* unknown.
|
|
382
|
+
*/
|
|
383
|
+
function findSetupStep$1(taskId, stepId) {
|
|
384
|
+
return findSetupTask(taskId)?.steps.find((s) => s.id === stepId);
|
|
385
|
+
}
|
|
386
|
+
//#endregion
|
|
387
|
+
//#region src/server/setup-tasks.ts
|
|
388
|
+
/**
|
|
389
|
+
* Resolves a setup step across both the agent and SDK registries by its
|
|
390
|
+
* `taskId`/`stepId`, or `undefined` if neither knows it.
|
|
391
|
+
*/
|
|
392
|
+
function findSetupStep(taskId, stepId) {
|
|
393
|
+
return findSetupStep$1(taskId, stepId) ?? findSetupStep$2(taskId, stepId);
|
|
394
|
+
}
|
|
395
|
+
/**
|
|
396
|
+
* Thrown when a requested task or step id does not exist in the registry. The
|
|
397
|
+
* route layer maps this to a 404, distinguishing it from execution failures.
|
|
398
|
+
*/
|
|
399
|
+
var SetupStepNotFoundError = class extends Error {
|
|
400
|
+
constructor(message) {
|
|
401
|
+
super(message);
|
|
402
|
+
this.name = "SetupStepNotFoundError";
|
|
403
|
+
}
|
|
404
|
+
};
|
|
405
|
+
/**
|
|
406
|
+
* Executes a single onboarding step, resolved from the server-side registry by
|
|
407
|
+
* `taskId`/`stepId`.
|
|
408
|
+
*
|
|
409
|
+
* The client only sends ids; the step definition (file contents, command, ...)
|
|
410
|
+
* comes entirely from {@link AI_SDK_REGISTRY}, so a request can never write
|
|
411
|
+
* arbitrary files or run arbitrary commands.
|
|
412
|
+
*
|
|
413
|
+
* @param rootPath - Absolute path to the project root.
|
|
414
|
+
* @param taskId - Id of the {@link SetupTask} to run a step from.
|
|
415
|
+
* @param stepId - Id of the step within that task.
|
|
416
|
+
* @throws {SetupStepNotFoundError} if the task or step id is unknown.
|
|
417
|
+
* @throws if the step kind is unsupported or execution fails (e.g. the config
|
|
418
|
+
* file already exists).
|
|
419
|
+
*/
|
|
420
|
+
async function executeSetupStep(rootPath, taskId, stepId) {
|
|
421
|
+
const step = findSetupStep(taskId, stepId);
|
|
422
|
+
if (!step) throw new SetupStepNotFoundError(`Unknown step '${stepId}' for task '${taskId}'`);
|
|
423
|
+
switch (step.kind) {
|
|
424
|
+
case "create_config": return { path: await writeConfigFile(rootPath, step) };
|
|
425
|
+
case "run_command":
|
|
426
|
+
case "install_package": throw new Error(`${step.kind} steps are not yet supported`);
|
|
427
|
+
default: throw new Error();
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Returns the onboarding tasks — coding agents and AI SDKs — with each step's
|
|
432
|
+
* runtime {@link SetupStepBase.completed | completion status} resolved against
|
|
433
|
+
* the project at `rootPath` (config file present, package installed).
|
|
434
|
+
*
|
|
435
|
+
* @param rootPath - Absolute path to the project root.
|
|
436
|
+
*/
|
|
437
|
+
function resolveSetupTasks(rootPath) {
|
|
438
|
+
const resolve = (task) => ({
|
|
439
|
+
...task,
|
|
440
|
+
steps: task.steps.map((step) => resolveStepStatus(rootPath, step))
|
|
441
|
+
});
|
|
442
|
+
return {
|
|
443
|
+
agent: AGENT_REGISTRY.map(resolve),
|
|
444
|
+
sdk: AI_SDK_REGISTRY.map((cls) => resolve(cls.setupTask))
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
/** Adds the runtime `completed` flag to a single step where determinable. */
|
|
448
|
+
function resolveStepStatus(rootPath, step) {
|
|
449
|
+
switch (step.kind) {
|
|
450
|
+
case "create_config": return {
|
|
451
|
+
...step,
|
|
452
|
+
completed: fs$1.existsSync(path.join(rootPath, step.path))
|
|
453
|
+
};
|
|
454
|
+
case "install_package":
|
|
455
|
+
case "run_command": {
|
|
456
|
+
const result = { ...step };
|
|
457
|
+
if (step.kind === "install_package") result.completed = isPackageInstalled(rootPath, step.package);
|
|
458
|
+
const bin = setupStepCommand(step).split(/\s+/)[0];
|
|
459
|
+
if (bin && !isBinaryOnPath(bin)) result.disabledReason = `${bin} not found in PATH`;
|
|
460
|
+
return result;
|
|
461
|
+
}
|
|
462
|
+
default: throw new Error();
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
/**
|
|
466
|
+
* Whether an executable named `bin` is resolvable on the current `PATH`. Used
|
|
467
|
+
* to disable coding-agent launchers whose CLI isn't installed. Honours
|
|
468
|
+
* `PATHEXT` on Windows; elsewhere it requires the file to be executable.
|
|
469
|
+
*
|
|
470
|
+
* @param bin - The bare executable name to look for, e.g. `claude`.
|
|
471
|
+
*/
|
|
472
|
+
function isBinaryOnPath(bin) {
|
|
473
|
+
const dirs = (process.env.PATH ?? "").split(path.delimiter).filter(Boolean);
|
|
474
|
+
const exts = process.platform === "win32" ? (process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM").split(";") : [""];
|
|
475
|
+
for (const dir of dirs) for (const ext of exts) try {
|
|
476
|
+
fs$1.accessSync(path.join(dir, bin + ext), fs$1.constants.X_OK);
|
|
477
|
+
return true;
|
|
478
|
+
} catch {}
|
|
479
|
+
return false;
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Whether `pkg` is installed for the project at `rootPath`, walking up the
|
|
483
|
+
* directory tree to honour hoisted/workspace `node_modules`.
|
|
484
|
+
*
|
|
485
|
+
* @param rootPath - Absolute path to start the search from.
|
|
486
|
+
* @param pkg - The npm package name to look for.
|
|
487
|
+
*/
|
|
488
|
+
function isPackageInstalled(rootPath, pkg) {
|
|
489
|
+
let dir = rootPath;
|
|
490
|
+
while (true) {
|
|
491
|
+
if (fs$1.existsSync(path.join(dir, "node_modules", pkg, "package.json"))) return true;
|
|
492
|
+
const parent = path.dirname(dir);
|
|
493
|
+
if (parent === dir) return false;
|
|
494
|
+
dir = parent;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* Writes the config file for a `create_config` step, creating parent
|
|
499
|
+
* directories as needed. Refuses to clobber an existing file.
|
|
500
|
+
*/
|
|
501
|
+
async function writeConfigFile(rootPath, step) {
|
|
502
|
+
const filePath = path.join(rootPath, step.path);
|
|
503
|
+
try {
|
|
504
|
+
await fs.access(filePath);
|
|
505
|
+
throw new Error(`${step.path} already exists`);
|
|
506
|
+
} catch (err) {
|
|
507
|
+
if (err?.code !== "ENOENT") throw err;
|
|
508
|
+
}
|
|
509
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
510
|
+
await fs.writeFile(filePath, step.contents, "utf8");
|
|
511
|
+
return step.path;
|
|
512
|
+
}
|
|
513
|
+
//#endregion
|
|
423
514
|
//#region src/server/terminal.ts
|
|
424
515
|
/** Shell used to run a resolved step command, so shell syntax in it works. */
|
|
425
516
|
const SHELL = process.env.SHELL || (process.platform === "win32" ? "powershell.exe" : "bash");
|
|
426
517
|
/**
|
|
518
|
+
* How long a session's PTY is kept alive after its WebSocket drops, waiting for
|
|
519
|
+
* the client to reconnect. Covers the brief gap while the server restarts
|
|
520
|
+
* itself once a config file appears, so the coding agent isn't killed mid-task.
|
|
521
|
+
*/
|
|
522
|
+
const GRACE_PERIOD_MS = 1e4;
|
|
523
|
+
/**
|
|
427
524
|
* Arguments to run `command` in {@link SHELL}, skipping the user's startup files
|
|
428
525
|
* where the shell allows it. Those rc files (e.g. nvm in `~/.zshrc`) can add
|
|
429
526
|
* seconds of latency before the command even begins, and they are unnecessary
|
|
@@ -456,56 +553,184 @@ function resolveTerminalCommand(taskId, stepId) {
|
|
|
456
553
|
if (!step || step.kind === "create_config") return null;
|
|
457
554
|
return setupStepCommand(step);
|
|
458
555
|
}
|
|
459
|
-
|
|
460
|
-
|
|
556
|
+
/** Spawns a real `node-pty` PTY running the command in {@link SHELL}. */
|
|
557
|
+
function defaultSpawn(options) {
|
|
558
|
+
return pty.spawn(SHELL, shellCommandArgs(options.command), {
|
|
559
|
+
name: "xterm-color",
|
|
560
|
+
cols: options.cols || 80,
|
|
561
|
+
rows: options.rows || 24,
|
|
562
|
+
cwd: options.cwd,
|
|
563
|
+
env: options.env
|
|
564
|
+
});
|
|
565
|
+
}
|
|
566
|
+
/**
|
|
567
|
+
* A single onboarding terminal: one PTY plus the WebSocket currently attached to
|
|
568
|
+
* it. The PTY outlives any one socket so it can survive the server restart that
|
|
569
|
+
* happens when a config file appears — while a client is attached, output is
|
|
570
|
+
* streamed live; while it is detached, output is buffered and a grace timer
|
|
571
|
+
* reaps the PTY if no client reconnects in time.
|
|
572
|
+
*/
|
|
573
|
+
var TerminalSession = class {
|
|
574
|
+
/** While detached: PTY output accumulated to replay on reconnect. */
|
|
575
|
+
buffer = null;
|
|
576
|
+
graceTimer;
|
|
577
|
+
socket = null;
|
|
578
|
+
exited = false;
|
|
579
|
+
child;
|
|
580
|
+
onReap;
|
|
581
|
+
gracePeriodMs;
|
|
582
|
+
/**
|
|
583
|
+
* @param child - The PTY this session owns.
|
|
584
|
+
* @param onReap - Called once the session is done (exit, grace expiry, or
|
|
585
|
+
* intentional kill) so the registry can drop it.
|
|
586
|
+
* @param gracePeriodMs - How long to keep the PTY alive after the socket drops.
|
|
587
|
+
*/
|
|
588
|
+
constructor(child, onReap, gracePeriodMs = GRACE_PERIOD_MS) {
|
|
589
|
+
this.child = child;
|
|
590
|
+
this.onReap = onReap;
|
|
591
|
+
this.gracePeriodMs = gracePeriodMs;
|
|
592
|
+
child.onData((data) => this.handleData(data));
|
|
593
|
+
child.onExit(({ exitCode }) => this.handleExit(exitCode));
|
|
594
|
+
}
|
|
595
|
+
send(message) {
|
|
596
|
+
this.socket?.send(JSON.stringify(message));
|
|
597
|
+
}
|
|
598
|
+
handleData(data) {
|
|
599
|
+
if (this.socket) this.send({
|
|
600
|
+
type: "data",
|
|
601
|
+
data
|
|
602
|
+
});
|
|
603
|
+
else this.buffer?.push(data);
|
|
604
|
+
}
|
|
605
|
+
handleExit(code) {
|
|
606
|
+
this.exited = true;
|
|
607
|
+
this.send({
|
|
608
|
+
type: "exit",
|
|
609
|
+
code
|
|
610
|
+
});
|
|
611
|
+
this.socket?.close();
|
|
612
|
+
this.clearGrace();
|
|
613
|
+
this.onReap();
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* Attach a (re)connected socket. Replays any output buffered while detached,
|
|
617
|
+
* then resumes live streaming. Cancels a pending grace-period reap.
|
|
618
|
+
*/
|
|
619
|
+
attach(ws) {
|
|
620
|
+
this.clearGrace();
|
|
621
|
+
this.socket = ws;
|
|
622
|
+
if (this.buffer) {
|
|
623
|
+
for (const data of this.buffer) this.send({
|
|
624
|
+
type: "data",
|
|
625
|
+
data
|
|
626
|
+
});
|
|
627
|
+
this.buffer = null;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* The attached socket dropped (e.g. the server is restarting). Start buffering
|
|
632
|
+
* output and a grace timer; if no client reattaches in time, reap the PTY.
|
|
633
|
+
*/
|
|
634
|
+
detach() {
|
|
635
|
+
if (this.exited || !this.socket) return;
|
|
636
|
+
this.socket = null;
|
|
637
|
+
this.buffer = [];
|
|
638
|
+
this.graceTimer = setTimeout(() => {
|
|
639
|
+
this.buffer = null;
|
|
640
|
+
if (!this.exited) this.child.kill();
|
|
641
|
+
this.onReap();
|
|
642
|
+
}, this.gracePeriodMs);
|
|
643
|
+
}
|
|
644
|
+
/** The client intentionally left: kill the PTY now, skipping the grace wait. */
|
|
645
|
+
kill() {
|
|
646
|
+
this.clearGrace();
|
|
647
|
+
this.socket = null;
|
|
648
|
+
this.buffer = null;
|
|
649
|
+
if (!this.exited) this.child.kill();
|
|
650
|
+
this.onReap();
|
|
651
|
+
}
|
|
652
|
+
/** Forward the user's keystrokes to the PTY. */
|
|
653
|
+
write(data) {
|
|
654
|
+
this.child.write(data);
|
|
655
|
+
}
|
|
656
|
+
/** Resize the PTY to match the client's terminal. */
|
|
657
|
+
resize(cols, rows) {
|
|
658
|
+
this.child.resize(cols || 80, rows || 24);
|
|
659
|
+
}
|
|
660
|
+
clearGrace() {
|
|
661
|
+
if (this.graceTimer) {
|
|
662
|
+
clearTimeout(this.graceTimer);
|
|
663
|
+
this.graceTimer = void 0;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
};
|
|
667
|
+
/**
|
|
668
|
+
* Holds the live {@link TerminalSession | terminal sessions} keyed by a
|
|
669
|
+
* client-supplied session id. Owned by the CLI process (not by any one server
|
|
670
|
+
* instance) so sessions — and the PTYs they wrap — survive the server restart
|
|
671
|
+
* that happens when a config file appears.
|
|
672
|
+
*/
|
|
673
|
+
var TerminalSessionRegistry = class {
|
|
674
|
+
sessions = /* @__PURE__ */ new Map();
|
|
675
|
+
spawn;
|
|
676
|
+
gracePeriodMs;
|
|
677
|
+
/**
|
|
678
|
+
* @param spawn - PTY spawner; overridable in tests.
|
|
679
|
+
* @param gracePeriodMs - Reconnect grace window passed to each session.
|
|
680
|
+
*/
|
|
681
|
+
constructor(spawn = defaultSpawn, gracePeriodMs = GRACE_PERIOD_MS) {
|
|
682
|
+
this.spawn = spawn;
|
|
683
|
+
this.gracePeriodMs = gracePeriodMs;
|
|
684
|
+
}
|
|
685
|
+
/** The session for `id`, if one is still live. */
|
|
686
|
+
get(id) {
|
|
687
|
+
return this.sessions.get(id);
|
|
688
|
+
}
|
|
689
|
+
/** Spawn a PTY and register a new session under `id`. */
|
|
690
|
+
create(id, options) {
|
|
691
|
+
const session = new TerminalSession(this.spawn(options), () => this.sessions.delete(id), this.gracePeriodMs);
|
|
692
|
+
this.sessions.set(id, session);
|
|
693
|
+
return session;
|
|
694
|
+
}
|
|
695
|
+
};
|
|
696
|
+
function sendError(ws, message) {
|
|
697
|
+
ws.send(JSON.stringify({
|
|
698
|
+
type: "error",
|
|
699
|
+
message
|
|
700
|
+
}));
|
|
461
701
|
}
|
|
462
702
|
/**
|
|
463
703
|
* Registers the interactive-terminal WebSocket route at `/api/terminal`.
|
|
464
704
|
*
|
|
465
|
-
* The client connects with `taskId
|
|
466
|
-
*
|
|
467
|
-
* (never from the request body)
|
|
468
|
-
*
|
|
469
|
-
*
|
|
470
|
-
* "Ok to proceed?") work.
|
|
471
|
-
*
|
|
705
|
+
* The client connects with `taskId`, `stepId`, and a client-generated
|
|
706
|
+
* `sessionId` query param. The server resolves the actual command from its own
|
|
707
|
+
* registry (never from the request body). On the first connection for a session
|
|
708
|
+
* the client signals `start` and the server spawns the command in a PTY rooted
|
|
709
|
+
* at the project; output is streamed to the client and the client's keystrokes
|
|
710
|
+
* are written to the process, so prompts (e.g. npm's "Ok to proceed?") work.
|
|
711
|
+
*
|
|
712
|
+
* Sessions live in `sessions`, which outlives this server instance, so when the
|
|
713
|
+
* server restarts itself after a config file appears the PTY keeps running and a
|
|
714
|
+
* reconnecting client (same `sessionId`) re-attaches and replays the gap.
|
|
715
|
+
*
|
|
716
|
+
* The trust boundary mirrors the step-execute route: the client can only ask to
|
|
717
|
+
* run a step that already exists server-side.
|
|
472
718
|
*/
|
|
473
|
-
function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
|
|
719
|
+
function registerTerminalRoute(app, upgradeWebSocket, rootPath, sessions) {
|
|
474
720
|
app.get("/api/terminal", upgradeWebSocket((c) => {
|
|
475
721
|
const taskId = c.req.query("taskId");
|
|
476
722
|
const stepId = c.req.query("stepId");
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
const command = taskId && stepId ? resolveTerminalCommand(taskId, stepId) : null;
|
|
481
|
-
if (!command) {
|
|
482
|
-
send(ws, {
|
|
483
|
-
type: "error",
|
|
484
|
-
message: "Unknown or non-runnable setup step."
|
|
485
|
-
});
|
|
486
|
-
ws.close();
|
|
487
|
-
return;
|
|
488
|
-
}
|
|
489
|
-
child = pty.spawn(SHELL, shellCommandArgs(command), {
|
|
490
|
-
name: "xterm-color",
|
|
491
|
-
cols: cols || 80,
|
|
492
|
-
rows: rows || 24,
|
|
493
|
-
cwd: rootPath,
|
|
494
|
-
env: process.env
|
|
495
|
-
});
|
|
496
|
-
child.onData((data) => send(ws, {
|
|
497
|
-
type: "data",
|
|
498
|
-
data
|
|
499
|
-
}));
|
|
500
|
-
child.onExit(({ exitCode }) => {
|
|
501
|
-
send(ws, {
|
|
502
|
-
type: "exit",
|
|
503
|
-
code: exitCode
|
|
504
|
-
});
|
|
505
|
-
ws.close();
|
|
506
|
-
});
|
|
507
|
-
};
|
|
723
|
+
const sessionId = c.req.query("sessionId") ?? `${taskId}:${stepId}`;
|
|
724
|
+
let session;
|
|
725
|
+
let leaving = false;
|
|
508
726
|
return {
|
|
727
|
+
onOpen(_evt, ws) {
|
|
728
|
+
const existing = sessions.get(sessionId);
|
|
729
|
+
if (existing) {
|
|
730
|
+
session = existing;
|
|
731
|
+
existing.attach(ws);
|
|
732
|
+
}
|
|
733
|
+
},
|
|
509
734
|
onMessage(evt, ws) {
|
|
510
735
|
let msg;
|
|
511
736
|
try {
|
|
@@ -514,20 +739,38 @@ function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
|
|
|
514
739
|
return;
|
|
515
740
|
}
|
|
516
741
|
switch (msg.type) {
|
|
517
|
-
case "start":
|
|
518
|
-
|
|
742
|
+
case "start": {
|
|
743
|
+
if (session) return;
|
|
744
|
+
const command = taskId && stepId ? resolveTerminalCommand(taskId, stepId) : null;
|
|
745
|
+
if (!command) {
|
|
746
|
+
sendError(ws, "Unknown or non-runnable setup step.");
|
|
747
|
+
ws.close();
|
|
748
|
+
return;
|
|
749
|
+
}
|
|
750
|
+
session = sessions.create(sessionId, {
|
|
751
|
+
command,
|
|
752
|
+
cols: msg.cols,
|
|
753
|
+
rows: msg.rows,
|
|
754
|
+
cwd: rootPath,
|
|
755
|
+
env: process.env
|
|
756
|
+
});
|
|
757
|
+
session.attach(ws);
|
|
519
758
|
break;
|
|
759
|
+
}
|
|
520
760
|
case "input":
|
|
521
|
-
|
|
761
|
+
session?.write(msg.data);
|
|
522
762
|
break;
|
|
523
763
|
case "resize":
|
|
524
|
-
|
|
764
|
+
session?.resize(msg.cols || 80, msg.rows || 24);
|
|
765
|
+
break;
|
|
766
|
+
case "detach":
|
|
767
|
+
leaving = true;
|
|
768
|
+
session?.kill();
|
|
525
769
|
break;
|
|
526
770
|
}
|
|
527
771
|
},
|
|
528
772
|
onClose() {
|
|
529
|
-
|
|
530
|
-
child = void 0;
|
|
773
|
+
if (!leaving) session?.detach();
|
|
531
774
|
}
|
|
532
775
|
};
|
|
533
776
|
}));
|
|
@@ -535,7 +778,7 @@ function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
|
|
|
535
778
|
//#endregion
|
|
536
779
|
//#region src/server/index.ts
|
|
537
780
|
async function startServer(options) {
|
|
538
|
-
const { promptProviders, traceProviders, port, rootPath, hasConfig } = options;
|
|
781
|
+
const { promptProviders, traceProviders, port, rootPath, hasConfig, terminalSessions } = options;
|
|
539
782
|
const promptProviderMap = new Map(promptProviders.map((p) => [p.id, p]));
|
|
540
783
|
const traceProviderMap = new Map(traceProviders.map((p) => [p.id, p]));
|
|
541
784
|
const promptRegistry = new PromptRegistry();
|
|
@@ -562,9 +805,13 @@ async function startServer(options) {
|
|
|
562
805
|
rootPath,
|
|
563
806
|
hasConfig,
|
|
564
807
|
tracer,
|
|
565
|
-
defaultTraceProviderId
|
|
808
|
+
defaultTraceProviderId,
|
|
809
|
+
setupTasks: {
|
|
810
|
+
resolve: resolveSetupTasks,
|
|
811
|
+
executeStep: executeSetupStep
|
|
812
|
+
}
|
|
566
813
|
});
|
|
567
|
-
registerTerminalRoute(app, upgradeWebSocket, rootPath);
|
|
814
|
+
registerTerminalRoute(app, upgradeWebSocket, rootPath, terminalSessions);
|
|
568
815
|
const clientRoot = fileURLToPath(new URL("../client/", import.meta.url));
|
|
569
816
|
app.get("*", serveStatic({ root: clientRoot }));
|
|
570
817
|
for (const [providerId, provider] of promptProviderMap) if (provider.watch) provider.watch(async (event) => {
|
|
@@ -600,6 +847,7 @@ async function startServer(options) {
|
|
|
600
847
|
});
|
|
601
848
|
});
|
|
602
849
|
const close = () => new Promise((resolve, reject) => {
|
|
850
|
+
for (const ws of wss.clients) ws.close();
|
|
603
851
|
if ("closeAllConnections" in server) server.closeAllConnections();
|
|
604
852
|
server.close((err) => err ? reject(err) : resolve());
|
|
605
853
|
});
|
|
@@ -823,14 +1071,15 @@ function applyDotenv(rootDir) {
|
|
|
823
1071
|
if (err?.code !== "ENOENT") console.warn(`Warning: failed to load .env from ${envPath}:`, err.message);
|
|
824
1072
|
}
|
|
825
1073
|
}
|
|
826
|
-
function startConfiguredServer(rootDir, config, hasConfig, port) {
|
|
1074
|
+
function startConfiguredServer(rootDir, config, hasConfig, port, terminalSessions) {
|
|
827
1075
|
if (config.useDotenv !== false) applyDotenv(rootDir);
|
|
828
1076
|
return startServer({
|
|
829
1077
|
promptProviders: config.promptProviders ?? [],
|
|
830
1078
|
traceProviders: config.traceProviders ?? [new MemoryTraceProvider()],
|
|
831
1079
|
port,
|
|
832
1080
|
rootPath: rootDir,
|
|
833
|
-
hasConfig
|
|
1081
|
+
hasConfig,
|
|
1082
|
+
terminalSessions
|
|
834
1083
|
});
|
|
835
1084
|
}
|
|
836
1085
|
async function main() {
|
|
@@ -848,11 +1097,12 @@ async function main() {
|
|
|
848
1097
|
const maybeOpen = (url) => {
|
|
849
1098
|
if (!process.env.EVALUTION_NO_OPEN) openBrowser(url);
|
|
850
1099
|
};
|
|
1100
|
+
const terminalSessions = new TerminalSessionRegistry();
|
|
851
1101
|
if (hasConfig) {
|
|
852
|
-
maybeOpen((await startConfiguredServer(rootDir, await loadConfig(rootDir), true, port)).url);
|
|
1102
|
+
maybeOpen((await startConfiguredServer(rootDir, await loadConfig(rootDir), true, port, terminalSessions)).url);
|
|
853
1103
|
return;
|
|
854
1104
|
}
|
|
855
|
-
let server = await startConfiguredServer(rootDir, {}, false, port);
|
|
1105
|
+
let server = await startConfiguredServer(rootDir, {}, false, port, terminalSessions);
|
|
856
1106
|
maybeOpen(server.url);
|
|
857
1107
|
console.log(`👀 No config found; watching ${path.join(rootDir, ".evalution", "config.ts")} for creation...`);
|
|
858
1108
|
const stopWatching = watchForConfigCreation(rootDir, async () => {
|
|
@@ -860,7 +1110,7 @@ async function main() {
|
|
|
860
1110
|
console.log("⚙️ Config loaded; restarting server...");
|
|
861
1111
|
stopWatching();
|
|
862
1112
|
await server.close();
|
|
863
|
-
server = await startConfiguredServer(rootDir, config, true, port);
|
|
1113
|
+
server = await startConfiguredServer(rootDir, config, true, port, terminalSessions);
|
|
864
1114
|
});
|
|
865
1115
|
}
|
|
866
1116
|
main().catch((error) => {
|