libretto 0.6.11 → 0.6.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -8
- package/README.template.md +7 -8
- package/dist/cli/cli.js +0 -22
- package/dist/cli/commands/browser.js +18 -24
- package/dist/cli/commands/execution.js +254 -234
- package/dist/cli/commands/experiments.js +100 -0
- package/dist/cli/commands/setup.js +3 -310
- package/dist/cli/commands/shared.js +10 -0
- package/dist/cli/commands/snapshot.js +46 -64
- package/dist/cli/commands/status.js +1 -40
- package/dist/cli/core/browser.js +303 -124
- package/dist/cli/core/config.js +5 -6
- package/dist/cli/core/context.js +4 -0
- package/dist/cli/core/daemon/config.js +0 -6
- package/dist/cli/core/daemon/daemon.js +497 -90
- package/dist/cli/core/daemon/ipc.js +170 -129
- package/dist/cli/core/daemon/snapshot.js +48 -9
- package/dist/cli/core/experiments.js +39 -0
- package/dist/cli/core/session.js +5 -4
- package/dist/cli/core/skill-version.js +2 -1
- package/dist/cli/core/workflow-runner/runner.js +147 -0
- package/dist/cli/core/workflow-runtime.js +60 -0
- package/dist/cli/index.js +0 -2
- package/dist/cli/router.js +4 -3
- package/dist/shared/debug/pause-handler.d.ts +9 -0
- package/dist/shared/debug/pause-handler.js +15 -0
- package/dist/shared/debug/pause.d.ts +1 -2
- package/dist/shared/debug/pause.js +13 -36
- package/dist/shared/instrumentation/instrument.js +4 -4
- package/dist/shared/ipc/child-process-transport.d.ts +7 -0
- package/dist/shared/ipc/child-process-transport.js +60 -0
- package/dist/shared/ipc/child-process-transport.spec.d.ts +2 -0
- package/dist/shared/ipc/child-process-transport.spec.js +68 -0
- package/dist/shared/ipc/ipc.d.ts +46 -0
- package/dist/shared/ipc/ipc.js +165 -0
- package/dist/shared/ipc/ipc.spec.d.ts +2 -0
- package/dist/shared/ipc/ipc.spec.js +114 -0
- package/dist/shared/ipc/socket-transport.d.ts +9 -0
- package/dist/shared/ipc/socket-transport.js +143 -0
- package/dist/shared/ipc/socket-transport.spec.d.ts +2 -0
- package/dist/shared/ipc/socket-transport.spec.js +117 -0
- package/dist/shared/package-manager.d.ts +7 -0
- package/dist/shared/package-manager.js +60 -0
- package/dist/shared/paths/paths.d.ts +1 -8
- package/dist/shared/paths/paths.js +1 -49
- package/dist/shared/snapshot/capture-snapshot.d.ts +9 -0
- package/dist/shared/snapshot/capture-snapshot.js +463 -0
- package/dist/shared/snapshot/diff-snapshots.d.ts +72 -0
- package/dist/shared/snapshot/diff-snapshots.js +358 -0
- package/dist/shared/snapshot/render-snapshot.d.ts +39 -0
- package/dist/shared/snapshot/render-snapshot.js +651 -0
- package/dist/shared/snapshot/snapshot.spec.d.ts +2 -0
- package/dist/shared/snapshot/snapshot.spec.js +333 -0
- package/dist/shared/snapshot/types.d.ts +40 -0
- package/dist/shared/snapshot/types.js +0 -0
- package/dist/shared/snapshot/wait-for-page-stable.d.ts +17 -0
- package/dist/shared/snapshot/wait-for-page-stable.js +281 -0
- package/dist/shared/state/session-state.d.ts +1 -0
- package/dist/shared/state/session-state.js +1 -0
- package/docs/experiments.md +67 -0
- package/docs/releasing.md +8 -6
- package/package.json +5 -2
- package/skills/libretto/SKILL.md +19 -19
- package/skills/libretto/references/configuration-file-reference.md +6 -12
- package/skills/libretto/references/pages-and-page-targeting.md +1 -1
- package/skills/libretto-readonly/SKILL.md +2 -9
- package/src/cli/AGENTS.md +7 -0
- package/src/cli/cli.ts +0 -23
- package/src/cli/commands/browser.ts +14 -18
- package/src/cli/commands/execution.ts +303 -271
- package/src/cli/commands/experiments.ts +120 -0
- package/src/cli/commands/setup.ts +3 -400
- package/src/cli/commands/shared.ts +20 -0
- package/src/cli/commands/snapshot.ts +54 -94
- package/src/cli/commands/status.ts +1 -48
- package/src/cli/core/browser.ts +372 -150
- package/src/cli/core/config.ts +4 -5
- package/src/cli/core/context.ts +4 -0
- package/src/cli/core/daemon/config.ts +35 -19
- package/src/cli/core/daemon/daemon.ts +645 -107
- package/src/cli/core/daemon/ipc.ts +319 -214
- package/src/cli/core/daemon/snapshot.ts +71 -15
- package/src/cli/core/experiments.ts +56 -0
- package/src/cli/core/resolve-model.ts +5 -0
- package/src/cli/core/session.ts +5 -4
- package/src/cli/core/skill-version.ts +2 -1
- package/src/cli/core/workflow-runner/runner.ts +237 -0
- package/src/cli/core/workflow-runtime.ts +86 -0
- package/src/cli/index.ts +0 -1
- package/src/cli/router.ts +4 -3
- package/src/shared/debug/pause-handler.ts +20 -0
- package/src/shared/debug/pause.ts +14 -48
- package/src/shared/instrumentation/instrument.ts +4 -4
- package/src/shared/ipc/AGENTS.md +24 -0
- package/src/shared/ipc/child-process-transport.spec.ts +86 -0
- package/src/shared/ipc/child-process-transport.ts +96 -0
- package/src/shared/ipc/ipc.spec.ts +161 -0
- package/src/shared/ipc/ipc.ts +288 -0
- package/src/shared/ipc/socket-transport.spec.ts +141 -0
- package/src/shared/ipc/socket-transport.ts +189 -0
- package/src/shared/package-manager.ts +76 -0
- package/src/shared/paths/paths.ts +0 -72
- package/src/shared/snapshot/capture-snapshot.ts +615 -0
- package/src/shared/snapshot/diff-snapshots.ts +579 -0
- package/src/shared/snapshot/render-snapshot.ts +962 -0
- package/src/shared/snapshot/snapshot.spec.ts +388 -0
- package/src/shared/snapshot/types.ts +43 -0
- package/src/shared/snapshot/wait-for-page-stable.ts +425 -0
- package/src/shared/state/session-state.ts +1 -0
- package/dist/cli/commands/ai.js +0 -109
- package/dist/cli/core/ai-model.js +0 -192
- package/dist/cli/core/api-snapshot-analyzer.js +0 -86
- package/dist/cli/core/daemon/index.js +0 -16
- package/dist/cli/core/daemon/spawn.js +0 -90
- package/dist/cli/core/pause-signals.js +0 -29
- package/dist/cli/core/snapshot-analyzer.js +0 -666
- package/dist/cli/workers/run-integration-runtime.js +0 -235
- package/dist/cli/workers/run-integration-worker-protocol.js +0 -17
- package/dist/cli/workers/run-integration-worker.js +0 -64
- package/scripts/summarize-evals.mjs +0 -135
- package/src/cli/commands/ai.ts +0 -143
- package/src/cli/core/ai-model.ts +0 -298
- package/src/cli/core/api-snapshot-analyzer.ts +0 -110
- package/src/cli/core/daemon/index.ts +0 -24
- package/src/cli/core/daemon/spawn.ts +0 -171
- package/src/cli/core/pause-signals.ts +0 -35
- package/src/cli/core/snapshot-analyzer.ts +0 -855
- package/src/cli/workers/run-integration-runtime.ts +0 -326
- package/src/cli/workers/run-integration-worker-protocol.ts +0 -19
- package/src/cli/workers/run-integration-worker.ts +0 -72
|
@@ -1,235 +0,0 @@
|
|
|
1
|
-
import { appendFileSync, existsSync, readFileSync } from "node:fs";
|
|
2
|
-
import { writeFile } from "node:fs/promises";
|
|
3
|
-
import { cwd } from "node:process";
|
|
4
|
-
import { isAbsolute, resolve } from "node:path";
|
|
5
|
-
import { pathToFileURL } from "node:url";
|
|
6
|
-
import {
|
|
7
|
-
getDefaultWorkflowFromModuleExports,
|
|
8
|
-
getWorkflowsFromModuleExports,
|
|
9
|
-
instrumentContext,
|
|
10
|
-
launchBrowser
|
|
11
|
-
} from "../../index.js";
|
|
12
|
-
import { parseSessionStateContent } from "../../shared/state/index.js";
|
|
13
|
-
import {
|
|
14
|
-
getProfilePath,
|
|
15
|
-
normalizeDomain,
|
|
16
|
-
normalizeUrl
|
|
17
|
-
} from "../core/browser.js";
|
|
18
|
-
import {
|
|
19
|
-
getSessionActionsLogPath,
|
|
20
|
-
getSessionNetworkLogPath,
|
|
21
|
-
getSessionStatePath
|
|
22
|
-
} from "../core/context.js";
|
|
23
|
-
import {
|
|
24
|
-
getPauseSignalPaths,
|
|
25
|
-
removeSignalIfExists
|
|
26
|
-
} from "../core/pause-signals.js";
|
|
27
|
-
import { installSessionTelemetry } from "../core/session-telemetry.js";
|
|
28
|
-
const FAILURE_HOLD_POLL_INTERVAL_MS = 250;
|
|
29
|
-
const TSCONFIG_HINT = "TypeScript compilation failed. Pass --tsconfig <path> to run against a specific tsconfig.";
|
|
30
|
-
function isTsxCompileError(error) {
|
|
31
|
-
return error instanceof Error && (error.name === "TransformError" || error.message.startsWith("Cannot resolve tsconfig at path:"));
|
|
32
|
-
}
|
|
33
|
-
function mirrorStdoutToFile(filePath) {
|
|
34
|
-
const stdout = process.stdout;
|
|
35
|
-
const originalWrite = stdout.write.bind(stdout);
|
|
36
|
-
stdout.write = ((chunk, ...args) => {
|
|
37
|
-
try {
|
|
38
|
-
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk), "utf8");
|
|
39
|
-
appendFileSync(filePath, buffer);
|
|
40
|
-
} catch {
|
|
41
|
-
}
|
|
42
|
-
return originalWrite(chunk, ...args);
|
|
43
|
-
});
|
|
44
|
-
return () => {
|
|
45
|
-
stdout.write = originalWrite;
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
function readSessionStatePid(session) {
|
|
49
|
-
const statePath = getSessionStatePath(session);
|
|
50
|
-
if (!existsSync(statePath)) return null;
|
|
51
|
-
try {
|
|
52
|
-
return parseSessionStateContent(readFileSync(statePath, "utf8"), statePath).pid ?? null;
|
|
53
|
-
} catch {
|
|
54
|
-
return null;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
async function waitForFailureSessionRelease(args) {
|
|
58
|
-
const { session, expectedPid, logger } = args;
|
|
59
|
-
logger.info("run-failure-session-hold", { session, expectedPid });
|
|
60
|
-
while (true) {
|
|
61
|
-
const currentPid = readSessionStatePid(session);
|
|
62
|
-
if (currentPid !== expectedPid) {
|
|
63
|
-
logger.info("run-failure-session-released", {
|
|
64
|
-
session,
|
|
65
|
-
expectedPid,
|
|
66
|
-
currentPid
|
|
67
|
-
});
|
|
68
|
-
return;
|
|
69
|
-
}
|
|
70
|
-
await new Promise(
|
|
71
|
-
(resolveWait) => setTimeout(resolveWait, FAILURE_HOLD_POLL_INTERVAL_MS)
|
|
72
|
-
);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
function getMissingLocalAuthProfileError(args) {
|
|
76
|
-
return [
|
|
77
|
-
`Local auth profile not found for domain "${args.normalizedDomain}".`,
|
|
78
|
-
`Expected profile file: ${args.profilePath}`,
|
|
79
|
-
"To create it:",
|
|
80
|
-
` 1. libretto open https://${args.normalizedDomain} --headed --session ${args.session}`,
|
|
81
|
-
" 2. Log in manually in the browser window.",
|
|
82
|
-
` 3. libretto save ${args.normalizedDomain} --session ${args.session}`
|
|
83
|
-
].join("\n");
|
|
84
|
-
}
|
|
85
|
-
function getAbsoluteIntegrationPath(integrationPath) {
|
|
86
|
-
const absolutePath = isAbsolute(integrationPath) ? integrationPath : resolve(cwd(), integrationPath);
|
|
87
|
-
if (!existsSync(absolutePath)) {
|
|
88
|
-
throw new Error(`Integration file does not exist: ${absolutePath}`);
|
|
89
|
-
}
|
|
90
|
-
return absolutePath;
|
|
91
|
-
}
|
|
92
|
-
async function loadDefaultWorkflow(absolutePath) {
|
|
93
|
-
let loadedModule;
|
|
94
|
-
try {
|
|
95
|
-
loadedModule = await import(pathToFileURL(absolutePath).href);
|
|
96
|
-
} catch (error) {
|
|
97
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
98
|
-
const compileHint = isTsxCompileError(error) ? `
|
|
99
|
-
${TSCONFIG_HINT}` : "";
|
|
100
|
-
throw new Error(
|
|
101
|
-
`Failed to import integration module at ${absolutePath}: ${message}${compileHint}`
|
|
102
|
-
);
|
|
103
|
-
}
|
|
104
|
-
const defaultWorkflow = getDefaultWorkflowFromModuleExports(loadedModule);
|
|
105
|
-
if (defaultWorkflow) {
|
|
106
|
-
return defaultWorkflow;
|
|
107
|
-
}
|
|
108
|
-
const availableWorkflowNames = getWorkflowsFromModuleExports(loadedModule).map(
|
|
109
|
-
(candidate) => candidate.name
|
|
110
|
-
);
|
|
111
|
-
if (availableWorkflowNames.length === 0) {
|
|
112
|
-
throw new Error(
|
|
113
|
-
`No default-exported workflow found in ${absolutePath}. Export the workflow with \`export default workflow("name", handler)\`.`
|
|
114
|
-
);
|
|
115
|
-
}
|
|
116
|
-
throw new Error(
|
|
117
|
-
`No default-exported workflow found in ${absolutePath}. libretto run only uses the file's default export. Available named workflows: ${availableWorkflowNames.join(", ")}`
|
|
118
|
-
);
|
|
119
|
-
}
|
|
120
|
-
async function installHeadedWorkflowVisualization(args) {
|
|
121
|
-
await (args.instrument ?? instrumentContext)(args.context, {
|
|
122
|
-
visualize: true,
|
|
123
|
-
logger: args.logger
|
|
124
|
-
});
|
|
125
|
-
}
|
|
126
|
-
async function runIntegrationInternal(args, options) {
|
|
127
|
-
const { logger } = options;
|
|
128
|
-
const absolutePath = getAbsoluteIntegrationPath(args.integrationPath);
|
|
129
|
-
const workflow = await loadDefaultWorkflow(absolutePath);
|
|
130
|
-
const signalPaths = getPauseSignalPaths(args.session);
|
|
131
|
-
await removeSignalIfExists(signalPaths.pausedSignalPath);
|
|
132
|
-
await removeSignalIfExists(signalPaths.resumeSignalPath);
|
|
133
|
-
await removeSignalIfExists(signalPaths.completedSignalPath);
|
|
134
|
-
await removeSignalIfExists(signalPaths.failedSignalPath);
|
|
135
|
-
const restoreStdout = mirrorStdoutToFile(signalPaths.outputSignalPath);
|
|
136
|
-
console.log(
|
|
137
|
-
`Running workflow "${workflow.name}" from ${absolutePath} (${args.headless ? "headless" : "headed"})...`
|
|
138
|
-
);
|
|
139
|
-
const integrationLogger = logger.withScope("integration-run", {
|
|
140
|
-
integrationPath: absolutePath,
|
|
141
|
-
workflowName: workflow.name,
|
|
142
|
-
session: args.session
|
|
143
|
-
});
|
|
144
|
-
const authProfileDomain = args.authProfileDomain;
|
|
145
|
-
const normalizedAuthProfileDomain = authProfileDomain ? normalizeDomain(normalizeUrl(authProfileDomain)) : void 0;
|
|
146
|
-
const storageStatePath = normalizedAuthProfileDomain ? getProfilePath(normalizedAuthProfileDomain) : void 0;
|
|
147
|
-
if (normalizedAuthProfileDomain && storageStatePath && !existsSync(storageStatePath)) {
|
|
148
|
-
throw new Error(
|
|
149
|
-
getMissingLocalAuthProfileError({
|
|
150
|
-
normalizedDomain: normalizedAuthProfileDomain,
|
|
151
|
-
profilePath: storageStatePath,
|
|
152
|
-
session: args.session
|
|
153
|
-
})
|
|
154
|
-
);
|
|
155
|
-
}
|
|
156
|
-
const browserSession = await launchBrowser({
|
|
157
|
-
sessionName: args.session,
|
|
158
|
-
headless: args.headless,
|
|
159
|
-
storageStatePath,
|
|
160
|
-
viewport: args.viewport,
|
|
161
|
-
accessMode: args.accessMode,
|
|
162
|
-
cdpEndpoint: args.cdpEndpoint,
|
|
163
|
-
provider: args.provider
|
|
164
|
-
});
|
|
165
|
-
if (!args.headless && args.visualize !== false) {
|
|
166
|
-
await installHeadedWorkflowVisualization({
|
|
167
|
-
context: browserSession.context,
|
|
168
|
-
logger: integrationLogger
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
const actionsLogPath = getSessionActionsLogPath(args.session);
|
|
172
|
-
const networkLogPath = getSessionNetworkLogPath(args.session);
|
|
173
|
-
await installSessionTelemetry({
|
|
174
|
-
context: browserSession.context,
|
|
175
|
-
initialPage: browserSession.page,
|
|
176
|
-
includeUserDomActions: true,
|
|
177
|
-
logAction: (entry) => {
|
|
178
|
-
appendFileSync(actionsLogPath, JSON.stringify(entry) + "\n");
|
|
179
|
-
},
|
|
180
|
-
logNetwork: (entry) => {
|
|
181
|
-
appendFileSync(networkLogPath, JSON.stringify(entry) + "\n");
|
|
182
|
-
}
|
|
183
|
-
});
|
|
184
|
-
await browserSession.context.addInitScript(() => {
|
|
185
|
-
globalThis.__name = (target, value) => Object.defineProperty(target, "name", { value, configurable: true });
|
|
186
|
-
});
|
|
187
|
-
const workflowContext = {
|
|
188
|
-
session: args.session,
|
|
189
|
-
page: browserSession.page
|
|
190
|
-
};
|
|
191
|
-
try {
|
|
192
|
-
try {
|
|
193
|
-
await workflow.run(workflowContext, args.params ?? {});
|
|
194
|
-
} catch (error) {
|
|
195
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
196
|
-
await writeFile(
|
|
197
|
-
signalPaths.failedSignalPath,
|
|
198
|
-
JSON.stringify(
|
|
199
|
-
{
|
|
200
|
-
failedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
201
|
-
message: errorMessage,
|
|
202
|
-
phase: "workflow"
|
|
203
|
-
},
|
|
204
|
-
null,
|
|
205
|
-
2
|
|
206
|
-
),
|
|
207
|
-
"utf8"
|
|
208
|
-
);
|
|
209
|
-
await waitForFailureSessionRelease({
|
|
210
|
-
session: args.session,
|
|
211
|
-
expectedPid: process.pid,
|
|
212
|
-
logger
|
|
213
|
-
});
|
|
214
|
-
return { status: "failed-held" };
|
|
215
|
-
}
|
|
216
|
-
await writeFile(
|
|
217
|
-
signalPaths.completedSignalPath,
|
|
218
|
-
JSON.stringify({ completedAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2),
|
|
219
|
-
"utf8"
|
|
220
|
-
);
|
|
221
|
-
return { status: "completed" };
|
|
222
|
-
} finally {
|
|
223
|
-
restoreStdout();
|
|
224
|
-
await browserSession.close();
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
async function runIntegrationFromFileInWorker(args, logger) {
|
|
228
|
-
return await runIntegrationInternal(args, {
|
|
229
|
-
logger
|
|
230
|
-
});
|
|
231
|
-
}
|
|
232
|
-
export {
|
|
233
|
-
installHeadedWorkflowVisualization,
|
|
234
|
-
runIntegrationFromFileInWorker
|
|
235
|
-
};
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import { SessionAccessModeSchema } from "../../shared/state/index.js";
|
|
3
|
-
const RunIntegrationWorkerRequestSchema = z.object({
|
|
4
|
-
integrationPath: z.string().min(1),
|
|
5
|
-
session: z.string().min(1),
|
|
6
|
-
params: z.unknown(),
|
|
7
|
-
headless: z.boolean(),
|
|
8
|
-
visualize: z.boolean().default(true),
|
|
9
|
-
authProfileDomain: z.string().optional(),
|
|
10
|
-
viewport: z.object({ width: z.number(), height: z.number() }).optional(),
|
|
11
|
-
accessMode: SessionAccessModeSchema.default("write-access"),
|
|
12
|
-
cdpEndpoint: z.string().optional(),
|
|
13
|
-
provider: z.object({ name: z.string(), sessionId: z.string() }).optional()
|
|
14
|
-
});
|
|
15
|
-
export {
|
|
16
|
-
RunIntegrationWorkerRequestSchema
|
|
17
|
-
};
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
import { writeFile } from "node:fs/promises";
|
|
2
|
-
import { ZodError } from "zod";
|
|
3
|
-
import {
|
|
4
|
-
RunIntegrationWorkerRequestSchema
|
|
5
|
-
} from "./run-integration-worker-protocol.js";
|
|
6
|
-
import { runIntegrationFromFileInWorker } from "./run-integration-runtime.js";
|
|
7
|
-
import { ensureLibrettoSetup, withSessionLogger } from "../core/context.js";
|
|
8
|
-
import { getPauseSignalPaths } from "../core/pause-signals.js";
|
|
9
|
-
function parseWorkerRequest(argv) {
|
|
10
|
-
const rawPayload = argv[2];
|
|
11
|
-
if (!rawPayload) {
|
|
12
|
-
throw new Error("Missing worker payload argument.");
|
|
13
|
-
}
|
|
14
|
-
let parsed;
|
|
15
|
-
try {
|
|
16
|
-
parsed = JSON.parse(rawPayload);
|
|
17
|
-
} catch (error) {
|
|
18
|
-
throw new Error(
|
|
19
|
-
`Invalid worker payload JSON: ${error instanceof Error ? error.message : String(error)}`
|
|
20
|
-
);
|
|
21
|
-
}
|
|
22
|
-
try {
|
|
23
|
-
return RunIntegrationWorkerRequestSchema.parse(parsed);
|
|
24
|
-
} catch (error) {
|
|
25
|
-
if (error instanceof ZodError) {
|
|
26
|
-
const details = error.issues.map((issue) => `${issue.path.join(".") || "root"}: ${issue.message}`).join("; ");
|
|
27
|
-
throw new Error(`Worker payload is invalid: ${details}`);
|
|
28
|
-
}
|
|
29
|
-
throw error;
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
async function main() {
|
|
33
|
-
let request = null;
|
|
34
|
-
let exitCode = 0;
|
|
35
|
-
try {
|
|
36
|
-
request = parseWorkerRequest(process.argv);
|
|
37
|
-
const workerRequest = request;
|
|
38
|
-
ensureLibrettoSetup();
|
|
39
|
-
await withSessionLogger(workerRequest.session, async (logger) => {
|
|
40
|
-
await runIntegrationFromFileInWorker(workerRequest, logger);
|
|
41
|
-
});
|
|
42
|
-
} catch (error) {
|
|
43
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
44
|
-
if (request) {
|
|
45
|
-
const { failedSignalPath } = getPauseSignalPaths(request.session);
|
|
46
|
-
await writeFile(
|
|
47
|
-
failedSignalPath,
|
|
48
|
-
JSON.stringify(
|
|
49
|
-
{
|
|
50
|
-
failedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
51
|
-
message,
|
|
52
|
-
phase: "setup"
|
|
53
|
-
},
|
|
54
|
-
null,
|
|
55
|
-
2
|
|
56
|
-
),
|
|
57
|
-
"utf8"
|
|
58
|
-
);
|
|
59
|
-
}
|
|
60
|
-
exitCode = 1;
|
|
61
|
-
}
|
|
62
|
-
process.exit(exitCode);
|
|
63
|
-
}
|
|
64
|
-
void main();
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
import { readdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
4
|
-
import { basename, join, resolve } from "node:path";
|
|
5
|
-
import { fileURLToPath } from "node:url";
|
|
6
|
-
|
|
7
|
-
function usage() {
|
|
8
|
-
console.error(
|
|
9
|
-
"Usage: node scripts/summarize-evals.mjs <score-dir> <summary-json-path>",
|
|
10
|
-
);
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
function normalizeFailureRecord(failure) {
|
|
14
|
-
return {
|
|
15
|
-
criterion: String(failure?.criterion ?? "").trim(),
|
|
16
|
-
reason: String(failure?.reason ?? "").trim(),
|
|
17
|
-
};
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
function normalizeRecord(record) {
|
|
21
|
-
const failures = Array.isArray(record?.failures)
|
|
22
|
-
? record.failures
|
|
23
|
-
.map(normalizeFailureRecord)
|
|
24
|
-
.filter(
|
|
25
|
-
(failure) =>
|
|
26
|
-
failure.criterion.length > 0 && failure.reason.length > 0,
|
|
27
|
-
)
|
|
28
|
-
: [];
|
|
29
|
-
|
|
30
|
-
return {
|
|
31
|
-
name: String(record?.name ?? "").trim(),
|
|
32
|
-
passed: Number(record?.passed ?? 0),
|
|
33
|
-
total: Number(record?.total ?? 0),
|
|
34
|
-
percent: Number(record?.percent ?? 0),
|
|
35
|
-
failures,
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
export function loadScoreRecords(scoreDirArg) {
|
|
40
|
-
const scoreDir = resolve(scoreDirArg);
|
|
41
|
-
return readdirSync(scoreDir, { withFileTypes: true })
|
|
42
|
-
.filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
|
|
43
|
-
.map((entry) =>
|
|
44
|
-
JSON.parse(readFileSync(join(scoreDir, entry.name), "utf8")),
|
|
45
|
-
)
|
|
46
|
-
.map(normalizeRecord)
|
|
47
|
-
.sort((a, b) => String(a.name).localeCompare(String(b.name)));
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
export function buildSummary(records) {
|
|
51
|
-
const passed = records.reduce(
|
|
52
|
-
(sum, record) => sum + Number(record.passed || 0),
|
|
53
|
-
0,
|
|
54
|
-
);
|
|
55
|
-
const total = records.reduce(
|
|
56
|
-
(sum, record) => sum + Number(record.total || 0),
|
|
57
|
-
0,
|
|
58
|
-
);
|
|
59
|
-
const percent = total > 0 ? Number(((passed / total) * 100).toFixed(2)) : 0;
|
|
60
|
-
const failingRecords = records.filter((record) => record.failures.length > 0);
|
|
61
|
-
|
|
62
|
-
return {
|
|
63
|
-
generatedAt: new Date().toISOString(),
|
|
64
|
-
recordCount: records.length,
|
|
65
|
-
passed,
|
|
66
|
-
total,
|
|
67
|
-
percent,
|
|
68
|
-
failingRecordCount: failingRecords.length,
|
|
69
|
-
records,
|
|
70
|
-
};
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
export function buildMarkdown(summary, summaryPathArg) {
|
|
74
|
-
const lines = [
|
|
75
|
-
"# Eval Summary",
|
|
76
|
-
"",
|
|
77
|
-
`- Overall score: \`${summary.percent}%\``,
|
|
78
|
-
`- Passed criteria: \`${summary.passed}/${summary.total}\``,
|
|
79
|
-
`- Recorded score entries: \`${summary.recordCount}\``,
|
|
80
|
-
`- Failed evals: \`${summary.failingRecordCount}\``,
|
|
81
|
-
`- Summary file: \`${basename(summaryPathArg)}\``,
|
|
82
|
-
];
|
|
83
|
-
|
|
84
|
-
if (summary.records.length > 0) {
|
|
85
|
-
lines.push("", "## Breakdown", "");
|
|
86
|
-
for (const record of summary.records) {
|
|
87
|
-
const status = record.failures.length > 0 ? "fail" : "pass";
|
|
88
|
-
lines.push(
|
|
89
|
-
`- ${status} \`${record.name}\`: \`${record.percent}%\` (${record.passed}/${record.total})`,
|
|
90
|
-
);
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
if (summary.failingRecordCount > 0) {
|
|
95
|
-
lines.push("", "## Failed Evals", "");
|
|
96
|
-
for (const record of summary.records.filter(
|
|
97
|
-
(candidate) => candidate.failures.length > 0,
|
|
98
|
-
)) {
|
|
99
|
-
lines.push(`### \`${record.name}\``);
|
|
100
|
-
lines.push("");
|
|
101
|
-
lines.push(
|
|
102
|
-
`- Score: \`${record.percent}%\` (${record.passed}/${record.total})`,
|
|
103
|
-
);
|
|
104
|
-
for (const failure of record.failures) {
|
|
105
|
-
lines.push(`- ${failure.criterion}: ${failure.reason}`);
|
|
106
|
-
}
|
|
107
|
-
lines.push("");
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
return `${lines.join("\n").trimEnd()}\n`;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
function main(argv) {
|
|
115
|
-
const [, , scoreDirArg, summaryPathArg] = argv;
|
|
116
|
-
|
|
117
|
-
if (!scoreDirArg || !summaryPathArg) {
|
|
118
|
-
usage();
|
|
119
|
-
process.exit(1);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
const summaryPath = resolve(summaryPathArg);
|
|
123
|
-
const records = loadScoreRecords(scoreDirArg);
|
|
124
|
-
const summary = buildSummary(records);
|
|
125
|
-
|
|
126
|
-
writeFileSync(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8");
|
|
127
|
-
process.stdout.write(buildMarkdown(summary, summaryPath));
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
if (
|
|
131
|
-
process.argv[1] &&
|
|
132
|
-
resolve(process.argv[1]) === fileURLToPath(import.meta.url)
|
|
133
|
-
) {
|
|
134
|
-
main(process.argv);
|
|
135
|
-
}
|
package/src/cli/commands/ai.ts
DELETED
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import {
|
|
3
|
-
CURRENT_CONFIG_VERSION,
|
|
4
|
-
readSnapshotModel,
|
|
5
|
-
writeSnapshotModel,
|
|
6
|
-
clearSnapshotModel,
|
|
7
|
-
} from "../core/config.js";
|
|
8
|
-
import { LIBRETTO_CONFIG_PATH } from "../core/context.js";
|
|
9
|
-
import { DEFAULT_SNAPSHOT_MODELS } from "../core/ai-model.js";
|
|
10
|
-
import { SimpleCLI } from "../framework/simple-cli.js";
|
|
11
|
-
|
|
12
|
-
const PROVIDER_ALIASES: Record<string, string> = {
|
|
13
|
-
claude: DEFAULT_SNAPSHOT_MODELS.anthropic,
|
|
14
|
-
gemini: DEFAULT_SNAPSHOT_MODELS.google,
|
|
15
|
-
google: DEFAULT_SNAPSHOT_MODELS.google,
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
const CONFIGURE_PROVIDERS = [
|
|
19
|
-
"openai",
|
|
20
|
-
"anthropic",
|
|
21
|
-
"gemini",
|
|
22
|
-
"vertex",
|
|
23
|
-
] as const;
|
|
24
|
-
|
|
25
|
-
function formatConfigureProviders(separator = " | "): string {
|
|
26
|
-
return CONFIGURE_PROVIDERS.join(separator);
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
function printSnapshotModelConfig(model: string, configPath: string): void {
|
|
30
|
-
console.log(`Snapshot model: ${model}`);
|
|
31
|
-
console.log(`Config file: ${configPath}`);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Resolve the model string from a `ai configure` argument.
|
|
36
|
-
* Accepts a provider shorthand ("openai", "anthropic", "gemini", "vertex")
|
|
37
|
-
* or a full provider/model-id string ("openai/gpt-4o", "anthropic/claude-sonnet-4-6").
|
|
38
|
-
*/
|
|
39
|
-
function resolveModelFromInput(input: string): string | null {
|
|
40
|
-
const trimmed = input.trim();
|
|
41
|
-
if (!trimmed) return null;
|
|
42
|
-
|
|
43
|
-
// Full model string (contains a slash)
|
|
44
|
-
if (trimmed.includes("/")) return trimmed;
|
|
45
|
-
|
|
46
|
-
// Provider shorthand
|
|
47
|
-
const normalized = trimmed.toLowerCase();
|
|
48
|
-
return (
|
|
49
|
-
(DEFAULT_SNAPSHOT_MODELS as Record<string, string>)[normalized] ??
|
|
50
|
-
PROVIDER_ALIASES[normalized] ??
|
|
51
|
-
null
|
|
52
|
-
);
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
export function runAiConfigure(
|
|
56
|
-
input: {
|
|
57
|
-
preset?: string;
|
|
58
|
-
clear?: boolean;
|
|
59
|
-
},
|
|
60
|
-
options: {
|
|
61
|
-
configureCommandName?: string;
|
|
62
|
-
configPath?: string;
|
|
63
|
-
} = {},
|
|
64
|
-
): void {
|
|
65
|
-
const configureCommandName =
|
|
66
|
-
options.configureCommandName ?? "npx libretto ai configure";
|
|
67
|
-
const configPath = options.configPath ?? LIBRETTO_CONFIG_PATH;
|
|
68
|
-
|
|
69
|
-
const presetArg = input.preset?.trim();
|
|
70
|
-
|
|
71
|
-
if (!presetArg && !input.clear) {
|
|
72
|
-
const model = readSnapshotModel(configPath);
|
|
73
|
-
if (!model) {
|
|
74
|
-
console.log(
|
|
75
|
-
`No snapshot model set. Choose a default model: ${configureCommandName} ${formatConfigureProviders()}`,
|
|
76
|
-
);
|
|
77
|
-
console.log(
|
|
78
|
-
"Provider credentials still come from your shell or .env file.",
|
|
79
|
-
);
|
|
80
|
-
return;
|
|
81
|
-
}
|
|
82
|
-
printSnapshotModelConfig(model, configPath);
|
|
83
|
-
return;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
if (input.clear) {
|
|
87
|
-
const removed = clearSnapshotModel(configPath);
|
|
88
|
-
if (removed) {
|
|
89
|
-
console.log(`Cleared snapshot model config: ${configPath}`);
|
|
90
|
-
} else {
|
|
91
|
-
console.log("No snapshot model was set.");
|
|
92
|
-
}
|
|
93
|
-
return;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
const model = resolveModelFromInput(presetArg!);
|
|
97
|
-
if (!model) {
|
|
98
|
-
console.log(
|
|
99
|
-
`Usage: ${configureCommandName} <${CONFIGURE_PROVIDERS.join("|")}|provider/model-id>\n` +
|
|
100
|
-
` ${configureCommandName}\n` +
|
|
101
|
-
` ${configureCommandName} --clear`,
|
|
102
|
-
);
|
|
103
|
-
throw new Error(
|
|
104
|
-
`Invalid provider or model. Use one of: ${formatConfigureProviders()}, or a full model string like "openai/gpt-4o".`,
|
|
105
|
-
);
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
writeSnapshotModel(model, configPath);
|
|
109
|
-
console.log("Snapshot model saved.");
|
|
110
|
-
printSnapshotModelConfig(model, configPath);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
export const aiConfigureInput = SimpleCLI.input({
|
|
114
|
-
positionals: [
|
|
115
|
-
SimpleCLI.positional("preset", z.string().optional(), {
|
|
116
|
-
help: "Provider shorthand or provider/model-id",
|
|
117
|
-
}),
|
|
118
|
-
],
|
|
119
|
-
named: {
|
|
120
|
-
clear: SimpleCLI.flag({ help: "Clear existing AI config" }),
|
|
121
|
-
},
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
export const aiCommands = SimpleCLI.group({
|
|
125
|
-
description: "AI commands",
|
|
126
|
-
routes: {
|
|
127
|
-
configure: SimpleCLI.command({
|
|
128
|
-
description: "Configure AI runtime",
|
|
129
|
-
})
|
|
130
|
-
.input(aiConfigureInput)
|
|
131
|
-
.handle(async ({ input }) => {
|
|
132
|
-
runAiConfigure(
|
|
133
|
-
{
|
|
134
|
-
clear: input.clear,
|
|
135
|
-
preset: input.preset,
|
|
136
|
-
},
|
|
137
|
-
{
|
|
138
|
-
configureCommandName: `libretto ai configure`,
|
|
139
|
-
},
|
|
140
|
-
);
|
|
141
|
-
}),
|
|
142
|
-
},
|
|
143
|
-
});
|