evalution 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,871 @@
1
+ import { a as MemoryTraceProvider, i as setupStepCommand, r as CONFIG_FILE_RELATIVE_PATH, t as VercelAISDK } from "../vercel-ai-sdk-CareWPDM.js";
2
+ import module from "node:module";
3
+ import fs from "node:fs/promises";
4
+ import path from "node:path";
5
+ import { fileURLToPath, pathToFileURL } from "node:url";
6
+ import { serve, upgradeWebSocket } from "@hono/node-server";
7
+ import { serveStatic } from "@hono/node-server/serve-static";
8
+ import { SpanStatusCode, context, trace } from "@opentelemetry/api";
9
+ import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks";
10
+ import { BasicTracerProvider } from "@opentelemetry/sdk-trace-base";
11
+ import { Hono } from "hono";
12
+ import { WebSocketServer } from "ws";
13
+ import { streamSSE } from "hono/streaming";
14
+ import fs$1 from "node:fs";
15
+ import * as pty from "@lydell/node-pty";
16
+ import chokidar from "chokidar";
17
+ import net from "node:net";
18
+ import { spawn } from "node:child_process";
19
+ //#region src/prompt/prompt-registry.ts
20
+ /**
21
+ * Maps globally-usable prompt IDs to the provider-scoped prompt they refer to,
22
+ * so runtime trace spans (which carry only a {@link PromptID}) can be linked
23
+ * back to a concrete prompt.
24
+ *
25
+ * The map is built by scanning every provider's prompts: each prompt registers
26
+ * both its provider-scoped `id` and its author-supplied `globalId` (when set) as
27
+ * keys. A key seen from more than one prompt is **ambiguous** — it is nulled out
28
+ * so it can never resolve to the wrong prompt.
29
+ */
30
+ var PromptRegistry = class {
31
+ /** `null` marks an ambiguous key (seen from more than one prompt). */
32
+ map = /* @__PURE__ */ new Map();
33
+ /**
34
+ * Rebuilds the map from scratch by scanning every provider's prompts. Call
35
+ * after the initial load and whenever a provider's prompts change.
36
+ */
37
+ async rebuild(providers) {
38
+ const next = /* @__PURE__ */ new Map();
39
+ for (const [providerId, provider] of providers) {
40
+ const prompts = await provider.getAllPrompts();
41
+ for (const prompt of prompts) {
42
+ this.register(next, prompt.id, {
43
+ providerId,
44
+ promptId: prompt.id
45
+ });
46
+ if (prompt.globalId) this.register(next, prompt.globalId, {
47
+ providerId,
48
+ promptId: prompt.id
49
+ });
50
+ }
51
+ }
52
+ this.map = next;
53
+ }
54
+ register(map, key, value) {
55
+ if (map.has(key)) map.set(key, null);
56
+ else map.set(key, value);
57
+ }
58
+ /**
59
+ * Resolves a {@link PromptID} to a provider-scoped reference, or `undefined`
60
+ * when it can't be resolved.
61
+ *
62
+ * The map is always consulted first: it translates a (possibly global) `id`
63
+ * into the real provider-scoped prompt ID the provider can actually open.
64
+ * Only when the map has nothing (absent, or ambiguous) do we fall back to
65
+ * trusting `providerId` together with `id` as already provider-scoped.
66
+ */
67
+ resolve(id, providerId) {
68
+ const entry = this.map.get(id);
69
+ if (entry) return entry;
70
+ return providerId ? {
71
+ providerId,
72
+ promptId: id
73
+ } : void 0;
74
+ }
75
+ };
76
+ //#endregion
77
+ //#region src/sdk/registry.ts
78
+ /**
79
+ * Every AI SDK offered in manual onboarding, in display order. This is the
80
+ * single source of truth for which SDKs exist and their task ids — adding one
81
+ * to onboarding means giving its adapter a static `setupTask` and listing it
82
+ * here.
83
+ */
84
+ const AI_SDK_REGISTRY = [VercelAISDK];
85
+ /** Look up a {@link SetupTask} by its id, or `undefined` if none matches. */
86
+ function findSetupTask(taskId) {
87
+ for (const cls of AI_SDK_REGISTRY) if (cls.setupTask.id === taskId) return cls.setupTask;
88
+ }
89
+ /**
90
+ * Look up a step within a task by both ids, or `undefined` if either is
91
+ * unknown.
92
+ */
93
+ function findSetupStep(taskId, stepId) {
94
+ return findSetupTask(taskId)?.steps.find((s) => s.id === stepId);
95
+ }
96
+ //#endregion
97
+ //#region src/server/setup-tasks.ts
98
+ /**
99
+ * Thrown when a requested task or step id does not exist in the registry. The
100
+ * route layer maps this to a 404, distinguishing it from execution failures.
101
+ */
102
+ var SetupStepNotFoundError = class extends Error {
103
+ constructor(message) {
104
+ super(message);
105
+ this.name = "SetupStepNotFoundError";
106
+ }
107
+ };
108
+ /**
109
+ * Executes a single onboarding step, resolved from the server-side registry by
110
+ * `taskId`/`stepId`.
111
+ *
112
+ * The client only sends ids; the step definition (file contents, command, ...)
113
+ * comes entirely from {@link AI_SDK_REGISTRY}, so a request can never write
114
+ * arbitrary files or run arbitrary commands.
115
+ *
116
+ * @param rootPath - Absolute path to the project root.
117
+ * @param taskId - Id of the {@link SetupTask} to run a step from.
118
+ * @param stepId - Id of the step within that task.
119
+ * @throws {SetupStepNotFoundError} if the task or step id is unknown.
120
+ * @throws if the step kind is unsupported or execution fails (e.g. the config
121
+ * file already exists).
122
+ */
123
+ async function executeSetupStep(rootPath, taskId, stepId) {
124
+ const step = findSetupStep(taskId, stepId);
125
+ if (!step) throw new SetupStepNotFoundError(`Unknown step '${stepId}' for task '${taskId}'`);
126
+ switch (step.kind) {
127
+ case "create_config": return { path: await writeConfigFile(rootPath, step) };
128
+ case "run_command":
129
+ case "install_package": throw new Error(`${step.kind} steps are not yet supported`);
130
+ }
131
+ }
132
+ /**
133
+ * Returns the onboarding tasks with each step's runtime
134
+ * {@link SetupStepBase.completed | completion status} resolved against the
135
+ * project at `rootPath` (config file present, package installed).
136
+ *
137
+ * @param rootPath - Absolute path to the project root.
138
+ */
139
+ function resolveSetupTasks(rootPath) {
140
+ return AI_SDK_REGISTRY.map((cls) => ({
141
+ ...cls.setupTask,
142
+ steps: cls.setupTask.steps.map((step) => resolveStepStatus(rootPath, step))
143
+ }));
144
+ }
145
+ /** Adds the runtime `completed` flag to a single step where determinable. */
146
+ function resolveStepStatus(rootPath, step) {
147
+ switch (step.kind) {
148
+ case "install_package": return {
149
+ ...step,
150
+ completed: isPackageInstalled(rootPath, step.package)
151
+ };
152
+ case "create_config": return {
153
+ ...step,
154
+ completed: fs$1.existsSync(path.join(rootPath, step.path))
155
+ };
156
+ case "run_command": return step;
157
+ }
158
+ }
159
+ /**
160
+ * Whether `pkg` is installed for the project at `rootPath`, walking up the
161
+ * directory tree to honour hoisted/workspace `node_modules`.
162
+ *
163
+ * @param rootPath - Absolute path to start the search from.
164
+ * @param pkg - The npm package name to look for.
165
+ */
166
+ function isPackageInstalled(rootPath, pkg) {
167
+ let dir = rootPath;
168
+ while (true) {
169
+ if (fs$1.existsSync(path.join(dir, "node_modules", pkg, "package.json"))) return true;
170
+ const parent = path.dirname(dir);
171
+ if (parent === dir) return false;
172
+ dir = parent;
173
+ }
174
+ }
175
+ /**
176
+ * Writes the config file for a `create_config` step, creating parent
177
+ * directories as needed. Refuses to clobber an existing file.
178
+ */
179
+ async function writeConfigFile(rootPath, step) {
180
+ const filePath = path.join(rootPath, step.path);
181
+ try {
182
+ await fs.access(filePath);
183
+ throw new Error(`${step.path} already exists`);
184
+ } catch (err) {
185
+ if (err?.code !== "ENOENT") throw err;
186
+ }
187
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
188
+ await fs.writeFile(filePath, step.contents, "utf8");
189
+ return step.path;
190
+ }
191
+ //#endregion
192
+ //#region src/server/api-routes.ts
193
+ function setupRoutes({ app, promptProviders, traceProviders, promptRegistry, hotReloadSubscribers, rootPath, hasConfig, tracer, defaultTraceProviderId }) {
194
+ const resolveSpanPrompt = (span) => {
195
+ if (!span.prompt) return span;
196
+ const resolved = promptRegistry.resolve(span.prompt.id, span.prompt.providerId);
197
+ if (!resolved) return span;
198
+ return {
199
+ ...span,
200
+ prompt: {
201
+ id: resolved.promptId,
202
+ providerId: resolved.providerId
203
+ }
204
+ };
205
+ };
206
+ app.get("/api/config", (c) => c.json({
207
+ rootPath,
208
+ configured: hasConfig
209
+ }));
210
+ app.get("/api/setup-tasks", (c) => c.json(resolveSetupTasks(rootPath)));
211
+ app.post("/api/setup-tasks/:taskId/steps/:stepId/execute", async (c) => {
212
+ const { taskId, stepId } = c.req.param();
213
+ try {
214
+ return c.json(await executeSetupStep(rootPath, taskId, stepId));
215
+ } catch (error) {
216
+ const status = error instanceof SetupStepNotFoundError ? 404 : 400;
217
+ return c.json({ error: error.message }, status);
218
+ }
219
+ });
220
+ app.get("/api/providers", (c) => c.json(Array.from(promptProviders.entries()).map(([id, provider]) => ({
221
+ id,
222
+ displayName: provider.displayName,
223
+ description: provider.description,
224
+ icon: provider.icon,
225
+ hasAddPrompt: !!provider.addPrompt
226
+ }))));
227
+ app.post("/api/providers/:providerId/add-prompt", async (c) => {
228
+ try {
229
+ const { providerId } = c.req.param();
230
+ const provider = promptProviders.get(providerId);
231
+ if (!provider) return c.json({ error: "Provider not found" }, 404);
232
+ if (!provider.addPrompt) return c.json({ error: "This provider does not support adding prompts" }, 405);
233
+ const result = await provider.addPrompt(await c.req.json());
234
+ if ("fields" in result) return c.json(result);
235
+ return c.json({
236
+ ...result,
237
+ providerId
238
+ });
239
+ } catch (error) {
240
+ return c.json({ error: error.message }, 400);
241
+ }
242
+ });
243
+ app.get("/api/providers/:providerId/models", async (c) => {
244
+ const { providerId } = c.req.param();
245
+ const provider = promptProviders.get(providerId);
246
+ if (!provider) return c.json({ error: "Provider not found" }, 404);
247
+ return c.json(await provider.getModelCatalog?.() ?? { providers: {} });
248
+ });
249
+ app.get("/api/providers/:providerId/model-parameters", async (c) => {
250
+ const { providerId } = c.req.param();
251
+ const provider = promptProviders.get(providerId);
252
+ if (!provider) return c.json({ error: "Provider not found" }, 404);
253
+ return c.json(provider.getModelParameters?.() ?? []);
254
+ });
255
+ app.get("/api/prompts", async (c) => {
256
+ try {
257
+ const results = await Promise.all(Array.from(promptProviders.entries()).map(async ([providerId, provider]) => {
258
+ return (await provider.getAllPrompts()).map((prompt) => ({
259
+ ...prompt,
260
+ providerId
261
+ }));
262
+ }));
263
+ return c.json(results.flat());
264
+ } catch (error) {
265
+ return c.json({ error: error.message }, 500);
266
+ }
267
+ });
268
+ app.get("/api/prompts/:providerId/:id", async (c) => {
269
+ try {
270
+ const { providerId, id } = c.req.param();
271
+ const provider = promptProviders.get(providerId);
272
+ if (!provider) return c.json({ error: "Provider not found" }, 404);
273
+ const decodedId = Buffer.from(id, "base64url").toString("utf8");
274
+ const prompt = await provider.getPrompt(decodedId);
275
+ if (!prompt) return c.json({ error: "Prompt not found" }, 404);
276
+ return c.json({
277
+ ...prompt,
278
+ providerId
279
+ });
280
+ } catch (error) {
281
+ return c.json({ error: error.message }, 500);
282
+ }
283
+ });
284
+ app.post("/api/prompts/:providerId/:id/rename", async (c) => {
285
+ try {
286
+ const { providerId, id } = c.req.param();
287
+ const { newName } = await c.req.json();
288
+ const provider = promptProviders.get(providerId);
289
+ if (!provider) return c.json({ error: "Provider not found" }, 404);
290
+ if (!provider.renamePrompt) return c.json({ error: "This provider does not support renaming" }, 405);
291
+ const decodedId = Buffer.from(id, "base64url").toString("utf8");
292
+ const updatedPrompt = await provider.renamePrompt(decodedId, newName);
293
+ return c.json({
294
+ ...updatedPrompt,
295
+ providerId
296
+ });
297
+ } catch (error) {
298
+ return c.json({ error: error.message }, 400);
299
+ }
300
+ });
301
+ app.post("/api/prompts/:providerId/:id/update", async (c) => {
302
+ try {
303
+ const { providerId, id } = c.req.param();
304
+ const provider = promptProviders.get(providerId);
305
+ if (!provider) return c.json({ error: "Provider not found" }, 404);
306
+ if (!provider.updatePromptProperties) return c.json({ error: "This provider does not support editing" }, 405);
307
+ const decodedId = Buffer.from(id, "base64url").toString("utf8");
308
+ const updatedPrompt = await provider.updatePromptProperties(decodedId, await c.req.json());
309
+ return c.json({
310
+ ...updatedPrompt,
311
+ providerId
312
+ });
313
+ } catch (error) {
314
+ return c.json({ error: error.message }, 400);
315
+ }
316
+ });
317
+ app.post("/api/prompts/:providerId/:id/execute", async (c) => {
318
+ try {
319
+ const { providerId, id } = c.req.param();
320
+ const provider = promptProviders.get(providerId);
321
+ if (!provider) return c.json({ error: "Provider not found" }, 404);
322
+ const decodedId = Buffer.from(id, "base64url").toString("utf8");
323
+ const { functionParams = [] } = await c.req.json().catch(() => ({}));
324
+ const prompt = await provider.getPrompt(decodedId);
325
+ if (!prompt) return c.json({ error: "Prompt not found" }, 404);
326
+ const response = tracer.startActiveSpan(prompt.name, (span) => {
327
+ const { traceId } = span.spanContext();
328
+ provider.execute(decodedId, functionParams, false).then(() => {
329
+ span.setStatus({ code: SpanStatusCode.OK });
330
+ }, (err) => {
331
+ console.error("prompt execution failed:", err);
332
+ span.recordException(err);
333
+ span.setStatus({
334
+ code: SpanStatusCode.ERROR,
335
+ message: err?.error ? JSON.stringify(err.error, null, 2) : err?.message ?? String(err)
336
+ });
337
+ }).finally(() => {
338
+ span.end();
339
+ });
340
+ return {
341
+ traceId,
342
+ tracerProviderId: defaultTraceProviderId,
343
+ rootSpanId: span.spanContext().spanId
344
+ };
345
+ });
346
+ return c.json(response);
347
+ } catch (error) {
348
+ return c.json({ error: error.message }, 500);
349
+ }
350
+ });
351
+ app.get("/api/trace-providers", (c) => c.json(Array.from(traceProviders.entries()).map(([id, provider]) => ({
352
+ id,
353
+ displayName: provider.displayName,
354
+ description: provider.description
355
+ }))));
356
+ app.get("/api/traces", async (c) => {
357
+ try {
358
+ const results = await Promise.all(Array.from(traceProviders.values()).map((p) => p.getAllTraces()));
359
+ return c.json(results.flat());
360
+ } catch (error) {
361
+ return c.json({ error: error.message }, 500);
362
+ }
363
+ });
364
+ app.get("/api/traces/:providerId/:id", async (c) => {
365
+ const { providerId, id } = c.req.param();
366
+ const provider = traceProviders.get(providerId);
367
+ if (!provider) return c.json({ error: "Trace provider not found" }, 404);
368
+ const trace = await provider.getTrace(id);
369
+ if (!trace) return c.json({ error: "Trace not found" }, 404);
370
+ return c.json({
371
+ ...trace,
372
+ spans: trace.spans.map(resolveSpanPrompt)
373
+ });
374
+ });
375
+ app.get("/api/traces/:providerId/:id/events", (c) => {
376
+ const { providerId, id } = c.req.param();
377
+ const provider = traceProviders.get(providerId);
378
+ if (!provider) return c.json({ error: "Trace provider not found" }, 404);
379
+ const resolveEvent = (event) => "span" in event ? {
380
+ ...event,
381
+ span: resolveSpanPrompt(event.span)
382
+ } : event;
383
+ return streamSSE(c, async (stream) => {
384
+ await stream.writeSSE({ data: JSON.stringify({ type: "connected" }) });
385
+ const existing = await provider.getTrace(id);
386
+ if (existing) for (const span of existing.spans) {
387
+ const resolved = resolveSpanPrompt(span);
388
+ const initial = resolved.endTime === void 0 ? {
389
+ type: "span-start",
390
+ span: resolved
391
+ } : {
392
+ type: "span-end",
393
+ span: resolved
394
+ };
395
+ await stream.writeSSE({ data: JSON.stringify(initial) });
396
+ }
397
+ const unsubscribe = provider.subscribeTrace(id, (event) => {
398
+ stream.writeSSE({ data: JSON.stringify(resolveEvent(event)) });
399
+ });
400
+ await new Promise((resolve) => {
401
+ stream.onAbort(() => {
402
+ unsubscribe();
403
+ resolve();
404
+ });
405
+ });
406
+ });
407
+ });
408
+ app.get("/api/events", (c) => streamSSE(c, async (stream) => {
409
+ await stream.writeSSE({ data: JSON.stringify({ type: "connected" }) });
410
+ const send = (data) => {
411
+ stream.writeSSE({ data: JSON.stringify(data) });
412
+ };
413
+ hotReloadSubscribers.add(send);
414
+ await new Promise((resolve) => {
415
+ stream.onAbort(() => {
416
+ hotReloadSubscribers.delete(send);
417
+ resolve();
418
+ });
419
+ });
420
+ }));
421
+ }
422
+ //#endregion
423
+ //#region src/server/terminal.ts
424
+ /** Shell used to run a resolved step command, so shell syntax in it works. */
425
+ const SHELL = process.env.SHELL || (process.platform === "win32" ? "powershell.exe" : "bash");
426
+ /**
427
+ * Arguments to run `command` in {@link SHELL}, skipping the user's startup files
428
+ * where the shell allows it. Those rc files (e.g. nvm in `~/.zshrc`) can add
429
+ * seconds of latency before the command even begins, and they are unnecessary
430
+ * here: the PTY inherits the server process's `PATH`, so tools resolve without
431
+ * them.
432
+ */
433
+ function shellCommandArgs(command) {
434
+ const shell = SHELL.toLowerCase();
435
+ if (shell.includes("zsh")) return [
436
+ "-f",
437
+ "-c",
438
+ command
439
+ ];
440
+ if (shell.includes("bash")) return [
441
+ "--norc",
442
+ "--noprofile",
443
+ "-c",
444
+ command
445
+ ];
446
+ return ["-c", command];
447
+ }
448
+ /**
449
+ * Resolves the shell command a terminal should run for a setup step, looking it
450
+ * up in the server's own registry rather than trusting anything from the client.
451
+ * Returns `null` when the step is unknown or is not a runnable command (e.g.
452
+ * `create_config`, which writes a file instead of running in a terminal).
453
+ */
454
+ function resolveTerminalCommand(taskId, stepId) {
455
+ const step = findSetupStep(taskId, stepId);
456
+ if (!step || step.kind === "create_config") return null;
457
+ return setupStepCommand(step);
458
+ }
459
+ function send(ws, message) {
460
+ ws.send(JSON.stringify(message));
461
+ }
462
+ /**
463
+ * Registers the interactive-terminal WebSocket route at `/api/terminal`.
464
+ *
465
+ * The client connects with `taskId` and `stepId` query params identifying a
466
+ * setup step; the server resolves the actual command from its own registry
467
+ * (never from the request body) and, once the client signals `start`, spawns it
468
+ * in a PTY rooted at the project. Output is streamed to the client and the
469
+ * client's keystrokes are written to the process, so prompts (e.g. npm's
470
+ * "Ok to proceed?") work. The trust boundary mirrors the step-execute route:
471
+ * the client can only ask to run a step that already exists server-side.
472
+ */
473
+ function registerTerminalRoute(app, upgradeWebSocket, rootPath) {
474
+ app.get("/api/terminal", upgradeWebSocket((c) => {
475
+ const taskId = c.req.query("taskId");
476
+ const stepId = c.req.query("stepId");
477
+ let child;
478
+ const start = (ws, cols, rows) => {
479
+ if (child) return;
480
+ const command = taskId && stepId ? resolveTerminalCommand(taskId, stepId) : null;
481
+ if (!command) {
482
+ send(ws, {
483
+ type: "error",
484
+ message: "Unknown or non-runnable setup step."
485
+ });
486
+ ws.close();
487
+ return;
488
+ }
489
+ child = pty.spawn(SHELL, shellCommandArgs(command), {
490
+ name: "xterm-color",
491
+ cols: cols || 80,
492
+ rows: rows || 24,
493
+ cwd: rootPath,
494
+ env: process.env
495
+ });
496
+ child.onData((data) => send(ws, {
497
+ type: "data",
498
+ data
499
+ }));
500
+ child.onExit(({ exitCode }) => {
501
+ send(ws, {
502
+ type: "exit",
503
+ code: exitCode
504
+ });
505
+ ws.close();
506
+ });
507
+ };
508
+ return {
509
+ onMessage(evt, ws) {
510
+ let msg;
511
+ try {
512
+ msg = JSON.parse(String(evt.data));
513
+ } catch {
514
+ return;
515
+ }
516
+ switch (msg.type) {
517
+ case "start":
518
+ start(ws, msg.cols, msg.rows);
519
+ break;
520
+ case "input":
521
+ child?.write(msg.data);
522
+ break;
523
+ case "resize":
524
+ child?.resize(msg.cols || 80, msg.rows || 24);
525
+ break;
526
+ }
527
+ },
528
+ onClose() {
529
+ child?.kill();
530
+ child = void 0;
531
+ }
532
+ };
533
+ }));
534
+ }
535
+ //#endregion
536
+ //#region src/server/index.ts
537
+ async function startServer(options) {
538
+ const { promptProviders, traceProviders, port, rootPath, hasConfig } = options;
539
+ const promptProviderMap = new Map(promptProviders.map((p) => [p.id, p]));
540
+ const traceProviderMap = new Map(traceProviders.map((p) => [p.id, p]));
541
+ const promptRegistry = new PromptRegistry();
542
+ await promptRegistry.rebuild(promptProviderMap);
543
+ const tracerProvider = new BasicTracerProvider({ spanProcessors: traceProviders.filter((p) => !!p.getSpanProcessor).map((p) => p.getSpanProcessor()) });
544
+ trace.setGlobalTracerProvider(tracerProvider);
545
+ const contextManager = new AsyncLocalStorageContextManager();
546
+ contextManager.enable();
547
+ context.setGlobalContextManager(contextManager);
548
+ const tracer = tracerProvider.getTracer("evalution");
549
+ const defaultTraceProviderId = traceProviders.find((p) => p instanceof MemoryTraceProvider)?.id ?? traceProviders[0]?.id;
550
+ if (!defaultTraceProviderId) throw new Error("At least one trace provider must be configured");
551
+ const app = new Hono();
552
+ const hotReloadSubscribers = /* @__PURE__ */ new Set();
553
+ const broadcast = (data) => {
554
+ for (const send of hotReloadSubscribers) send(data);
555
+ };
556
+ setupRoutes({
557
+ app,
558
+ promptProviders: promptProviderMap,
559
+ traceProviders: traceProviderMap,
560
+ promptRegistry,
561
+ hotReloadSubscribers,
562
+ rootPath,
563
+ hasConfig,
564
+ tracer,
565
+ defaultTraceProviderId
566
+ });
567
+ registerTerminalRoute(app, upgradeWebSocket, rootPath);
568
+ const clientRoot = fileURLToPath(new URL("../client/", import.meta.url));
569
+ app.get("*", serveStatic({ root: clientRoot }));
570
+ for (const [providerId, provider] of promptProviderMap) if (provider.watch) provider.watch(async (event) => {
571
+ await promptRegistry.rebuild(promptProviderMap);
572
+ broadcast({
573
+ type: "prompt-changed",
574
+ providerId,
575
+ event
576
+ });
577
+ });
578
+ for (const [providerId, provider] of traceProviderMap) if (provider.watch) provider.watch((event) => {
579
+ broadcast({
580
+ type: "trace-changed",
581
+ providerId,
582
+ event
583
+ });
584
+ });
585
+ const wss = new WebSocketServer({ noServer: true });
586
+ const url = `http://localhost:${port}`;
587
+ const isDevServer = import.meta.url.includes("/src/");
588
+ const server = await new Promise((resolve) => {
589
+ const s = serve({
590
+ fetch: app.fetch,
591
+ port,
592
+ hostname: "0.0.0.0",
593
+ websocket: { server: wss }
594
+ }, () => {
595
+ if (isDevServer) {
596
+ console.log(`\n✨ Evalution API server running on ${url}`);
597
+ console.log(` Frontend dev server: http://localhost:5173\n`);
598
+ } else console.log(`\n✨ Evalution is running at ${url}\n`);
599
+ resolve(s);
600
+ });
601
+ });
602
+ const close = () => new Promise((resolve, reject) => {
603
+ if ("closeAllConnections" in server) server.closeAllConnections();
604
+ server.close((err) => err ? reject(err) : resolve());
605
+ });
606
+ const shutdown = async () => {
607
+ console.log("\n\nShutting down gracefully...");
608
+ await close();
609
+ process.exit(0);
610
+ };
611
+ process.on("SIGINT", shutdown);
612
+ process.on("SIGTERM", shutdown);
613
+ return {
614
+ url,
615
+ close
616
+ };
617
+ }
618
+ //#endregion
619
+ //#region src/cli/config-loader-hooks.ts
620
+ /**
621
+ * Registers an in-thread module-resolution hook so a project's
622
+ * `.evalution/config.ts` can import the framework by bare specifier
623
+ * (`import { FilePromptProvider } from 'evalution'`) regardless of where it
624
+ * lives or whether evalution is installed in the project's `node_modules`.
625
+ *
626
+ * By default Node resolves `evalution` against the config file's directory,
627
+ * which fails when evalution is run via `npx` (or pointed at another directory)
628
+ * and isn't installed locally. This hook redirects the `evalution` specifier
629
+ * (and its subpaths) to resolve from the running CLI instead, so the config
630
+ * always binds to the same evalution the CLI is executing — no local install
631
+ * required.
632
+ *
633
+ * Uses {@link https://nodejs.org/api/module.html#moduleregisterhooksoptions | `module.registerHooks`}
634
+ * (synchronous, same-thread) rather than `module.register`, so it needs no
635
+ * separate loader file — it survives bundling and behaves identically whether
636
+ * the CLI runs from source (dev) or the compiled bundle (published).
637
+ *
638
+ * @param parentURL - Module URL the `evalution` specifier is resolved against;
639
+ * pass the CLI's own `import.meta.url`.
640
+ */
641
+ function registerEvalutionResolver(parentURL) {
642
+ module.registerHooks({ resolve(specifier, context, nextResolve) {
643
+ if (specifier === "evalution" || specifier.startsWith("evalution/")) return nextResolve(specifier, {
644
+ ...context,
645
+ parentURL
646
+ });
647
+ return nextResolve(specifier, context);
648
+ } });
649
+ }
650
+ //#endregion
651
+ //#region src/cli/config-watcher.ts
652
+ /**
653
+ * Watches `rootDir` for `.evalution/config.ts` appearing or changing and runs
654
+ * `onConfig` each time, awaiting it so failures surface rather than vanishing.
655
+ *
656
+ * Used when the server starts without a config file: the onboarding UI (via
657
+ * `POST /api/config/create`) writes the config, and this watcher lets the CLI
658
+ * pick it up and restart with the real config.
659
+ *
660
+ * Rather than watch the not-yet-existing nested config path directly, this
661
+ * watches `rootDir` and ignores everything except the `.evalution` directory
662
+ * and the config file itself, so the watcher reliably fires when the directory
663
+ * and file are created together while never descending into `node_modules` and
664
+ * friends.
665
+ *
666
+ * `onConfig` may be async; it is awaited and any rejection is logged instead of
667
+ * becoming an unhandled rejection. The watcher keeps firing on subsequent
668
+ * changes (overlapping runs are skipped), so if a load fails — e.g. the config
669
+ * has a bad import — the user can fix the file and have it retried. The caller
670
+ * is expected to stop the watcher (via the returned function) once the config
671
+ * has loaded successfully.
672
+ *
673
+ * @param rootDir - The project root that will contain `.evalution/config.ts`.
674
+ * @param onConfig - Run when the config file is created or changed.
675
+ * @returns A function that stops watching.
676
+ */
677
+ function watchForConfigCreation(rootDir, onConfig) {
678
+ const configPath = path.join(rootDir, CONFIG_FILE_RELATIVE_PATH);
679
+ const configDir = path.dirname(configPath);
680
+ const allowed = new Set([
681
+ rootDir,
682
+ configDir,
683
+ configPath
684
+ ]);
685
+ const watcher = chokidar.watch(rootDir, {
686
+ ignoreInitial: true,
687
+ persistent: true,
688
+ depth: 2,
689
+ ignored: (p) => !allowed.has(p)
690
+ });
691
+ let running = false;
692
+ const handle = async (p) => {
693
+ if (p !== configPath || running) return;
694
+ running = true;
695
+ try {
696
+ await onConfig();
697
+ } catch (err) {
698
+ console.error(`Failed to load config from ${configPath}:`, err);
699
+ } finally {
700
+ running = false;
701
+ }
702
+ };
703
+ watcher.on("add", handle);
704
+ watcher.on("change", handle);
705
+ return () => {
706
+ watcher.close();
707
+ };
708
+ }
709
+ //#endregion
710
+ //#region src/cli/find-port.ts
711
+ /** Probes whether `port` can be bound on `host`, resolving to `true` if free. */
712
+ function isPortFree(port, host) {
713
+ return new Promise((resolve) => {
714
+ const tester = net.createServer().once("error", () => {
715
+ resolve(false);
716
+ tester.close();
717
+ }).once("listening", () => {
718
+ tester.close(() => resolve(true));
719
+ }).listen(port, host);
720
+ });
721
+ }
722
+ /**
723
+ * Returns the first free port at or after `preferred`, scanning upward. Used by
724
+ * the CLI so `npx evalution` still starts when the default port is already in
725
+ * use instead of crashing with `EADDRINUSE`.
726
+ *
727
+ * @param preferred - The port to try first.
728
+ * @param host - The host to bind against; defaults to `0.0.0.0`.
729
+ * @param maxAttempts - How many sequential ports to try before giving up.
730
+ * @throws If no free port is found within `maxAttempts`.
731
+ */
732
+ async function findAvailablePort(preferred, host = "0.0.0.0", maxAttempts = 20) {
733
+ for (let port = preferred; port < preferred + maxAttempts; port++) if (await isPortFree(port, host)) return port;
734
+ throw new Error(`No free port found in range ${preferred}-${preferred + maxAttempts - 1}`);
735
+ }
736
+ //#endregion
737
+ //#region src/cli/open-browser.ts
738
+ /**
739
+ * Returns the platform-specific command and args used to open `url` in the
740
+ * user's default browser. Exposed separately from {@link openBrowser} so the
741
+ * mapping can be unit-tested without spawning a process.
742
+ *
743
+ * @param url - The URL to open.
744
+ * @param platform - A `process.platform` value; defaults to the current platform.
745
+ */
746
+ function browserOpenCommand(url, platform = process.platform) {
747
+ switch (platform) {
748
+ case "darwin": return {
749
+ command: "open",
750
+ args: [url]
751
+ };
752
+ case "win32": return {
753
+ command: "cmd",
754
+ args: [
755
+ "/c",
756
+ "start",
757
+ "\"\"",
758
+ url
759
+ ]
760
+ };
761
+ default: return {
762
+ command: "xdg-open",
763
+ args: [url]
764
+ };
765
+ }
766
+ }
767
+ /**
768
+ * Opens `url` in the user's default browser, detached so it never blocks or
769
+ * keeps the CLI alive. Failures (e.g. no browser, headless host) are swallowed
770
+ * — opening the browser is a convenience, not a requirement, and the URL is
771
+ * always printed to the console as a fallback.
772
+ *
773
+ * @param url - The URL to open.
774
+ */
775
+ function openBrowser(url) {
776
+ try {
777
+ const { command, args } = browserOpenCommand(url);
778
+ const child = spawn(command, args, {
779
+ stdio: "ignore",
780
+ detached: true
781
+ });
782
+ child.on("error", () => {});
783
+ child.unref();
784
+ } catch {}
785
+ }
786
+ //#endregion
787
+ //#region src/cli/index.ts
788
+ registerEvalutionResolver(import.meta.url);
789
+ async function findRootDir(startDir) {
790
+ let dir = startDir;
791
+ while (true) {
792
+ const configPath = path.join(dir, ".evalution", "config.ts");
793
+ try {
794
+ await fs.access(configPath);
795
+ return {
796
+ rootDir: dir,
797
+ hasConfig: true
798
+ };
799
+ } catch {
800
+ const parent = path.dirname(dir);
801
+ if (parent === dir) break;
802
+ dir = parent;
803
+ }
804
+ }
805
+ return {
806
+ rootDir: startDir,
807
+ hasConfig: false
808
+ };
809
+ }
810
+ async function loadConfig(rootDir) {
811
+ const configPath = path.join(rootDir, ".evalution", "config.ts");
812
+ process.chdir(rootDir);
813
+ const mod = await import(pathToFileURL(configPath).href);
814
+ console.log(`⚙️ Loaded config from ${configPath}`);
815
+ return mod.default ?? {};
816
+ }
817
+ function applyDotenv(rootDir) {
818
+ const envPath = path.join(rootDir, ".env");
819
+ try {
820
+ process.loadEnvFile(envPath);
821
+ console.log(`📄 Loaded environment variables from ${envPath}`);
822
+ } catch (err) {
823
+ if (err?.code !== "ENOENT") console.warn(`Warning: failed to load .env from ${envPath}:`, err.message);
824
+ }
825
+ }
826
+ function startConfiguredServer(rootDir, config, hasConfig, port) {
827
+ if (config.useDotenv !== false) applyDotenv(rootDir);
828
+ return startServer({
829
+ promptProviders: config.promptProviders ?? [],
830
+ traceProviders: config.traceProviders ?? [new MemoryTraceProvider()],
831
+ port,
832
+ rootPath: rootDir,
833
+ hasConfig
834
+ });
835
+ }
836
+ async function main() {
837
+ const args = process.argv.slice(2);
838
+ if (args.length > 0 && args[0] !== "ui") {
839
+ console.error(`Unknown command: ${args[0]}`);
840
+ console.error("Usage: evalution [ui [path]]");
841
+ process.exit(1);
842
+ }
843
+ const pathArg = args[1];
844
+ const { rootDir, hasConfig } = await findRootDir(pathArg ? path.resolve(pathArg) : process.cwd());
845
+ let port;
846
+ if (process.env.PORT) port = parseInt(process.env.PORT, 10);
847
+ else port = await findAvailablePort(3e3);
848
+ const maybeOpen = (url) => {
849
+ if (!process.env.EVALUTION_NO_OPEN) openBrowser(url);
850
+ };
851
+ if (hasConfig) {
852
+ maybeOpen((await startConfiguredServer(rootDir, await loadConfig(rootDir), true, port)).url);
853
+ return;
854
+ }
855
+ let server = await startConfiguredServer(rootDir, {}, false, port);
856
+ maybeOpen(server.url);
857
+ console.log(`👀 No config found; watching ${path.join(rootDir, ".evalution", "config.ts")} for creation...`);
858
+ const stopWatching = watchForConfigCreation(rootDir, async () => {
859
+ const config = await loadConfig(rootDir);
860
+ console.log("⚙️ Config loaded; restarting server...");
861
+ stopWatching();
862
+ await server.close();
863
+ server = await startConfiguredServer(rootDir, config, true, port);
864
+ });
865
+ }
866
+ main().catch((error) => {
867
+ console.error("Fatal error:", error);
868
+ process.exit(1);
869
+ });
870
+ //#endregion
871
+ export {};