usertester 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +219 -0
- package/dist/browser/agent.d.ts +33 -0
- package/dist/browser/agent.js +393 -0
- package/dist/browser/agent.js.map +1 -0
- package/dist/cli/cleanup.d.ts +5 -0
- package/dist/cli/cleanup.js +75 -0
- package/dist/cli/cleanup.js.map +1 -0
- package/dist/cli/harness.d.ts +10 -0
- package/dist/cli/harness.js +108 -0
- package/dist/cli/harness.js.map +1 -0
- package/dist/cli/index.d.ts +5 -0
- package/dist/cli/index.js +31 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/kill.d.ts +5 -0
- package/dist/cli/kill.js +46 -0
- package/dist/cli/kill.js.map +1 -0
- package/dist/cli/logs.d.ts +5 -0
- package/dist/cli/logs.js +64 -0
- package/dist/cli/logs.js.map +1 -0
- package/dist/cli/profiles.d.ts +5 -0
- package/dist/cli/profiles.js +67 -0
- package/dist/cli/profiles.js.map +1 -0
- package/dist/cli/send.d.ts +5 -0
- package/dist/cli/send.js +46 -0
- package/dist/cli/send.js.map +1 -0
- package/dist/cli/setup.d.ts +6 -0
- package/dist/cli/setup.js +168 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/cli/spawn.d.ts +5 -0
- package/dist/cli/spawn.js +52 -0
- package/dist/cli/spawn.js.map +1 -0
- package/dist/cli/status.d.ts +5 -0
- package/dist/cli/status.js +85 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/harness/applier.d.ts +38 -0
- package/dist/harness/applier.js +152 -0
- package/dist/harness/applier.js.map +1 -0
- package/dist/harness/index.d.ts +14 -0
- package/dist/harness/index.js +110 -0
- package/dist/harness/index.js.map +1 -0
- package/dist/harness/patterns.d.ts +14 -0
- package/dist/harness/patterns.js +96 -0
- package/dist/harness/patterns.js.map +1 -0
- package/dist/harness/proposer.d.ts +26 -0
- package/dist/harness/proposer.js +181 -0
- package/dist/harness/proposer.js.map +1 -0
- package/dist/harness/traces.d.ts +29 -0
- package/dist/harness/traces.js +65 -0
- package/dist/harness/traces.js.map +1 -0
- package/dist/harness/validator.d.ts +6 -0
- package/dist/harness/validator.js +112 -0
- package/dist/harness/validator.js.map +1 -0
- package/dist/inbox/agentmail.d.ts +11 -0
- package/dist/inbox/agentmail.js +36 -0
- package/dist/inbox/agentmail.js.map +1 -0
- package/dist/llm/provider.d.ts +15 -0
- package/dist/llm/provider.js +65 -0
- package/dist/llm/provider.js.map +1 -0
- package/dist/orchestrator/agent.d.ts +17 -0
- package/dist/orchestrator/agent.js +195 -0
- package/dist/orchestrator/agent.js.map +1 -0
- package/dist/orchestrator/index.d.ts +7 -0
- package/dist/orchestrator/index.js +92 -0
- package/dist/orchestrator/index.js.map +1 -0
- package/dist/orchestrator/retry.d.ts +27 -0
- package/dist/orchestrator/retry.js +145 -0
- package/dist/orchestrator/retry.js.map +1 -0
- package/dist/orchestrator/session.d.ts +13 -0
- package/dist/orchestrator/session.js +55 -0
- package/dist/orchestrator/session.js.map +1 -0
- package/dist/output/events.d.ts +12 -0
- package/dist/output/events.js +81 -0
- package/dist/output/events.js.map +1 -0
- package/dist/profiles/learner.d.ts +4 -0
- package/dist/profiles/learner.js +168 -0
- package/dist/profiles/learner.js.map +1 -0
- package/dist/tools/captcha.d.ts +19 -0
- package/dist/tools/captcha.js +76 -0
- package/dist/tools/captcha.js.map +1 -0
- package/dist/tools/inbox.d.ts +30 -0
- package/dist/tools/inbox.js +65 -0
- package/dist/tools/inbox.js.map +1 -0
- package/dist/types.d.ts +121 -0
- package/dist/types.js +30 -0
- package/dist/types.js.map +1 -0
- package/package.json +60 -0
- package/tasks.example.json +5 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validates a proposed code patch by:
|
|
3
|
+
* 1. Verifying oldCode appears exactly once in the source file
|
|
4
|
+
* 2. Applying the patch to a temp copy of src/
|
|
5
|
+
* 3. Running tsc --noEmit on the temp copy with a 15-second timeout
|
|
6
|
+
*/
|
|
7
|
+
import { execFile } from 'node:child_process';
|
|
8
|
+
import os from 'node:os';
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
export async function validatePatch(patch, projectRoot) {
|
|
12
|
+
// Step 1: Verify oldCode appears exactly once
|
|
13
|
+
const absoluteFilePath = path.join(projectRoot, patch.file);
|
|
14
|
+
let originalContents;
|
|
15
|
+
try {
|
|
16
|
+
originalContents = fs.readFileSync(absoluteFilePath, 'utf-8');
|
|
17
|
+
}
|
|
18
|
+
catch (err) {
|
|
19
|
+
return { valid: false, error: `Cannot read source file ${patch.file}: ${err}` };
|
|
20
|
+
}
|
|
21
|
+
const occurrences = originalContents.split(patch.oldCode).length - 1;
|
|
22
|
+
if (occurrences !== 1) {
|
|
23
|
+
return {
|
|
24
|
+
valid: false,
|
|
25
|
+
error: `oldCode appears ${occurrences} times in ${patch.file} (expected exactly 1)`,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
// Step 2: Create temp dir and copy src/ tree into it
|
|
29
|
+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'usertester-validate-'));
|
|
30
|
+
try {
|
|
31
|
+
const srcDir = path.join(projectRoot, 'src');
|
|
32
|
+
const tempSrcDir = path.join(tempDir, 'src');
|
|
33
|
+
copyDirRecursive(srcDir, tempSrcDir);
|
|
34
|
+
// Step 3: Apply the patch to the temp copy
|
|
35
|
+
const patchedContents = originalContents.replace(patch.oldCode, patch.newCode);
|
|
36
|
+
const tempFilePath = path.join(tempDir, patch.file);
|
|
37
|
+
fs.writeFileSync(tempFilePath, patchedContents, 'utf-8');
|
|
38
|
+
// Step 4: Create a minimal tsconfig pointing at the temp src
|
|
39
|
+
const originalTsConfig = path.join(projectRoot, 'tsconfig.json');
|
|
40
|
+
let tsConfigContent;
|
|
41
|
+
try {
|
|
42
|
+
tsConfigContent = JSON.parse(fs.readFileSync(originalTsConfig, 'utf-8'));
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
tsConfigContent = {};
|
|
46
|
+
}
|
|
47
|
+
const tempTsConfig = {
|
|
48
|
+
...tsConfigContent,
|
|
49
|
+
compilerOptions: {
|
|
50
|
+
...(tsConfigContent.compilerOptions ?? {}),
|
|
51
|
+
rootDir: './src',
|
|
52
|
+
outDir: './dist',
|
|
53
|
+
noEmit: true,
|
|
54
|
+
},
|
|
55
|
+
include: ['src/**/*'],
|
|
56
|
+
};
|
|
57
|
+
fs.writeFileSync(path.join(tempDir, 'tsconfig.json'), JSON.stringify(tempTsConfig, null, 2));
|
|
58
|
+
// Copy node_modules reference via paths or just use the project root's node_modules
|
|
59
|
+
// We run npx tsc from the project root but point at temp tsconfig
|
|
60
|
+
// Actually simpler: run from project root with explicit rootDir override
|
|
61
|
+
const tscResult = await runTsc(tempDir, projectRoot);
|
|
62
|
+
return tscResult;
|
|
63
|
+
}
|
|
64
|
+
finally {
|
|
65
|
+
// Clean up temp dir
|
|
66
|
+
try {
|
|
67
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
68
|
+
}
|
|
69
|
+
catch { }
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
function copyDirRecursive(src, dest) {
|
|
73
|
+
fs.mkdirSync(dest, { recursive: true });
|
|
74
|
+
const entries = fs.readdirSync(src, { withFileTypes: true });
|
|
75
|
+
for (const entry of entries) {
|
|
76
|
+
const srcPath = path.join(src, entry.name);
|
|
77
|
+
const destPath = path.join(dest, entry.name);
|
|
78
|
+
if (entry.isDirectory()) {
|
|
79
|
+
copyDirRecursive(srcPath, destPath);
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
fs.copyFileSync(srcPath, destPath);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
function runTsc(tempDir, projectRoot) {
|
|
87
|
+
return new Promise((resolve) => {
|
|
88
|
+
const tempTsConfig = path.join(tempDir, 'tsconfig.json');
|
|
89
|
+
// Use npx from project root so node_modules is found
|
|
90
|
+
const child = execFile('npx', ['tsc', '--noEmit', '--project', tempTsConfig], {
|
|
91
|
+
cwd: projectRoot,
|
|
92
|
+
timeout: 15_000,
|
|
93
|
+
}, (error, _stdout, stderr) => {
|
|
94
|
+
if (error) {
|
|
95
|
+
const output = stderr || error.message || 'tsc failed';
|
|
96
|
+
resolve({ valid: false, error: output.slice(0, 1000) });
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
resolve({ valid: true });
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
// Safety: kill after 15s if not already timed out
|
|
103
|
+
setTimeout(() => {
|
|
104
|
+
try {
|
|
105
|
+
child.kill();
|
|
106
|
+
}
|
|
107
|
+
catch { }
|
|
108
|
+
resolve({ valid: false, error: 'tsc validation timed out after 15s' });
|
|
109
|
+
}, 15_000);
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
//# sourceMappingURL=validator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/harness/validator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAA;AAC7C,OAAO,EAAE,MAAM,SAAS,CAAA;AACxB,OAAO,EAAE,MAAM,SAAS,CAAA;AACxB,OAAO,IAAI,MAAM,WAAW,CAAA;AAQ5B,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAgB,EAChB,WAAmB;IAEnB,8CAA8C;IAC9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,CAAA;IAC3D,IAAI,gBAAwB,CAAA;IAC5B,IAAI,CAAC;QACH,gBAAgB,GAAG,EAAE,CAAC,YAAY,CAAC,gBAAgB,EAAE,OAAO,CAAC,CAAA;IAC/D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,2BAA2B,KAAK,CAAC,IAAI,KAAK,GAAG,EAAE,EAAE,CAAA;IACjF,CAAC;IAED,MAAM,WAAW,GAAG,gBAAgB,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAA;IACpE,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,KAAK,EAAE,mBAAmB,WAAW,aAAa,KAAK,CAAC,IAAI,uBAAuB;SACpF,CAAA;IACH,CAAC;IAED,qDAAqD;IACrD,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,sBAAsB,CAAC,CAAC,CAAA;IAE9E,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,CAAC,CAAA;QAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAA;QAC5C,gBAAgB,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;QAEpC,2CAA2C;QAC3C,MAAM,eAAe,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;QAC9E,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,CAAA;QACnD,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,eAAe,EAAE,OAAO,CAAC,CAAA;QAExD,6DAA6D;QAC7D,MAAM,gBAAgB,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,eAAe,CAAC,CAAA;QAChE,IAAI,eAAwC,CAAA;QAC5C,IAAI,CAAC;YACH,eAAe,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,gBAAgB,EAAE,OAAO,CAAC,CAA4B,CAAA;QACrG,CAAC;QAAC,MAAM,CAAC;YACP,eAAe,GAAG,EAAE,CAAA;QACtB,CAAC;QAED,MAAM,YAAY,GAAG;YACnB,GAAG,eAAe;YAClB,eAAe,EAAE;gBACf,GAAG,CAAE,eAAe,CAAC,eAA2C,IAAI,EAAE,CAAC;gBACvE,OAAO,EAAE,OAAO;gBAChB,MAAM,EAAE,QAAQ;gBAChB,MAAM,EAAE,IAAI;aACb;YACD,OAAO,EAAE,CAAC,UAAU,CAAC;SACtB,CAAA;QAED,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,CAAC,EACnC,IAAI,CAAC,SAAS,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,CACtC,CAAA;QAED,oFAAoF;QACpF,kEAAkE;QAClE,yEAAyE;QACzE,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;QACpD,OAAO,SAAS,CAAA;IAClB,CAAC;YAAS,CAAC;QACT,oBAAoB;QACpB,IAAI,CAAC;YACH,EAAE,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAA;QACtD,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;AACH,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAW,EAAE,IAAY;IACjD,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACvC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAA;IAC5D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAA;QAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAA;QACrC,CAAC;aAAM,CAAC;YACN,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAA;QACpC,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,MAAM,CAAC,OAAe,EAAE,WAAmB;IAClD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,CAAC,CAAA;QAExD,qDAAqD;QACrD,MAAM,KAAK,GAAG,QAAQ,CACpB,KAAK,EACL,CAAC,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,CAAC,EAC9C;YACE,GAAG,EAAE,WAAW;YAChB,OAAO,EAAE,MAAM;SAChB,EACD,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE;YACzB,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,MAAM,GAAG,MAAM,IAAI,KAAK,CAAC,OAAO,IAAI,YAAY,CAAA;gBACtD,OAAO,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAA;YACzD,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAA;YAC1B,CAAC;QACH,CAAC,CACF,CAAA;QAED,kDAAkD;QAClD,UAAU,CAAC,GAAG,EAAE;YACd,IAAI,CAAC;gBAAC,KAAK,CAAC,IAAI,EAAE,CAAA;YAAC,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;YAC7B,OAAO,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,oCAAoC,EAAE,CAAC,CAAA;QACxE,CAAC,EAAE,MAAM,CAAC,CAAA;IACZ,CAAC,CAAC,CAAA;AACJ,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface Inbox {
|
|
2
|
+
inboxId: string;
|
|
3
|
+
}
|
|
4
|
+
export declare class InboxManager {
|
|
5
|
+
private client;
|
|
6
|
+
constructor(apiKey: string);
|
|
7
|
+
provision(username: string): Promise<Inbox>;
|
|
8
|
+
delete(inboxId: string): Promise<void>;
|
|
9
|
+
listThreads(inboxId: string): Promise<unknown[]>;
|
|
10
|
+
waitForEmail(inboxId: string, subject: string, timeoutMs?: number, pollIntervalMs?: number): Promise<string | null>;
|
|
11
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AgentMail inbox management
|
|
3
|
+
* Uses the agentmail TypeScript SDK directly (no Composio needed for REST ops)
|
|
4
|
+
*/
|
|
5
|
+
import { AgentMailClient } from 'agentmail';
|
|
6
|
+
export class InboxManager {
|
|
7
|
+
client;
|
|
8
|
+
constructor(apiKey) {
|
|
9
|
+
this.client = new AgentMailClient({ apiKey });
|
|
10
|
+
}
|
|
11
|
+
async provision(username) {
|
|
12
|
+
const inbox = await this.client.inboxes.create({ username });
|
|
13
|
+
return { inboxId: inbox.inboxId };
|
|
14
|
+
}
|
|
15
|
+
async delete(inboxId) {
|
|
16
|
+
await this.client.inboxes.delete(inboxId);
|
|
17
|
+
}
|
|
18
|
+
async listThreads(inboxId) {
|
|
19
|
+
const result = await this.client.inboxes.threads.list(inboxId);
|
|
20
|
+
return result.items ?? [];
|
|
21
|
+
}
|
|
22
|
+
async waitForEmail(inboxId, subject, timeoutMs = 60_000, pollIntervalMs = 3_000) {
|
|
23
|
+
const deadline = Date.now() + timeoutMs;
|
|
24
|
+
while (Date.now() < deadline) {
|
|
25
|
+
const threads = await this.listThreads(inboxId);
|
|
26
|
+
for (const thread of threads) {
|
|
27
|
+
if (thread.subject?.toLowerCase().includes(subject.toLowerCase())) {
|
|
28
|
+
return thread.snippet ?? thread.subject ?? null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
await new Promise(r => setTimeout(r, pollIntervalMs));
|
|
32
|
+
}
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=agentmail.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agentmail.js","sourceRoot":"","sources":["../../src/inbox/agentmail.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAA;AAM3C,MAAM,OAAO,YAAY;IACf,MAAM,CAAiB;IAE/B,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;IAC/C,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,QAAgB;QAC9B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAA;QAC5D,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAA;IACnC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAAe;QAC1B,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;IAC3C,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,OAAe;QAC/B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC9D,OAAQ,MAAgC,CAAC,KAAK,IAAI,EAAE,CAAA;IACtD,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,OAAe,EACf,OAAe,EACf,SAAS,GAAG,MAAM,EAClB,cAAc,GAAG,KAAK;QAEtB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;QACvC,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAA;YAC/C,KAAK,MAAM,MAAM,IAAI,OAAwD,EAAE,CAAC;gBAC9E,IAAI,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;oBAClE,OAAO,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,CAAA;gBACjD,CAAC;YACH,CAAC;YACD,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC,CAAA;QACvD,CAAC;QACD,OAAO,IAAI,CAAA;IACb,CAAC;CACF"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { LanguageModel } from 'ai';
|
|
2
|
+
import type { UsertesterConfig } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Resolve a model string to a LanguageModel instance.
|
|
5
|
+
* Returns LanguageModel (LanguageModelV2) for use with generateText/AISdkClient.
|
|
6
|
+
*/
|
|
7
|
+
export declare function resolveModel(modelString: string, config?: Partial<UsertesterConfig>): LanguageModel;
|
|
8
|
+
/**
|
|
9
|
+
* Make a single cheap LLM call. Returns the text response.
|
|
10
|
+
*/
|
|
11
|
+
export declare function cheapCall(prompt: string, config?: Partial<UsertesterConfig>, maxTokens?: number): Promise<string>;
|
|
12
|
+
/**
|
|
13
|
+
* Make multiple cheap LLM calls in parallel. Returns an array of text responses.
|
|
14
|
+
*/
|
|
15
|
+
export declare function cheapBatch(prompts: string[], config?: Partial<UsertesterConfig>, maxTokens?: number): Promise<string[]>;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider-agnostic LLM abstraction layer
|
|
3
|
+
*
|
|
4
|
+
* Model string format:
|
|
5
|
+
* openrouter/openai/gpt-5.4-mini → OpenRouter with OPENROUTER_API_KEY
|
|
6
|
+
* openrouter/anthropic/claude-opus-4-6 → OpenRouter with OPENROUTER_API_KEY
|
|
7
|
+
* anthropic/claude-opus-4-6 → direct Anthropic with ANTHROPIC_API_KEY
|
|
8
|
+
* openai/gpt-5.4-mini → direct OpenAI with OPENAI_API_KEY
|
|
9
|
+
*/
|
|
10
|
+
import { createAnthropic } from '@ai-sdk/anthropic';
|
|
11
|
+
import { createOpenAI } from '@ai-sdk/openai';
|
|
12
|
+
import { createOpenRouter } from '@openrouter/ai-sdk-provider';
|
|
13
|
+
import { generateText } from 'ai';
|
|
14
|
+
/**
|
|
15
|
+
* Resolve a model string to a LanguageModel instance.
|
|
16
|
+
* Returns LanguageModel (LanguageModelV2) for use with generateText/AISdkClient.
|
|
17
|
+
*/
|
|
18
|
+
export function resolveModel(modelString, config) {
|
|
19
|
+
if (modelString.startsWith('openrouter/')) {
|
|
20
|
+
// e.g. "openrouter/anthropic/claude-opus-4-6" → "anthropic/claude-opus-4-6"
|
|
21
|
+
const modelId = modelString.slice('openrouter/'.length);
|
|
22
|
+
const apiKey = config?.openrouter_api_key ?? process.env.OPENROUTER_API_KEY;
|
|
23
|
+
const or = createOpenRouter({ apiKey });
|
|
24
|
+
return or(modelId);
|
|
25
|
+
}
|
|
26
|
+
if (modelString.startsWith('anthropic/')) {
|
|
27
|
+
const modelId = modelString.slice('anthropic/'.length);
|
|
28
|
+
const apiKey = config?.anthropic_api_key ?? process.env.ANTHROPIC_API_KEY;
|
|
29
|
+
const provider = createAnthropic({ apiKey });
|
|
30
|
+
return provider(modelId);
|
|
31
|
+
}
|
|
32
|
+
if (modelString.startsWith('openai/')) {
|
|
33
|
+
const modelId = modelString.slice('openai/'.length);
|
|
34
|
+
const apiKey = config?.openai_api_key ?? process.env.OPENAI_API_KEY;
|
|
35
|
+
const provider = createOpenAI({ apiKey });
|
|
36
|
+
return provider(modelId);
|
|
37
|
+
}
|
|
38
|
+
throw new Error(`Unknown model string format: "${modelString}". ` +
|
|
39
|
+
'Expected: openrouter/<provider>/<model>, anthropic/<model>, or openai/<model>');
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Make a single cheap LLM call. Returns the text response.
|
|
43
|
+
*/
|
|
44
|
+
export async function cheapCall(prompt, config, maxTokens = 300) {
|
|
45
|
+
const modelString = config?.orchestrator_model ?? 'openrouter/openai/gpt-5.4-mini';
|
|
46
|
+
const model = resolveModel(modelString, config);
|
|
47
|
+
try {
|
|
48
|
+
const result = await generateText({
|
|
49
|
+
model,
|
|
50
|
+
messages: [{ role: 'user', content: prompt }],
|
|
51
|
+
maxOutputTokens: maxTokens,
|
|
52
|
+
});
|
|
53
|
+
return result.text;
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return '';
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Make multiple cheap LLM calls in parallel. Returns an array of text responses.
|
|
61
|
+
*/
|
|
62
|
+
export async function cheapBatch(prompts, config, maxTokens = 300) {
|
|
63
|
+
return Promise.all(prompts.map(p => cheapCall(p, config, maxTokens)));
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=provider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provider.js","sourceRoot":"","sources":["../../src/llm/provider.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAA;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAA;AAIjC;;;GAGG;AACH,MAAM,UAAU,YAAY,CAC1B,WAAmB,EACnB,MAAkC;IAElC,IAAI,WAAW,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1C,4EAA4E;QAC5E,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAA;QACvD,MAAM,MAAM,GACV,MAAM,EAAE,kBAAkB,IAAI,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAA;QAC9D,MAAM,EAAE,GAAG,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;QACvC,OAAO,EAAE,CAAC,OAAO,CAA6B,CAAA;IAChD,CAAC;IAED,IAAI,WAAW,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,CAAC,CAAA;QACtD,MAAM,MAAM,GACV,MAAM,EAAE,iBAAiB,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAA;QAC5D,MAAM,QAAQ,GAAG,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;QAC5C,OAAO,QAAQ,CAAC,OAAO,CAAC,CAAA;IAC1B,CAAC;IAED,IAAI,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;QACnD,MAAM,MAAM,GACV,MAAM,EAAE,cAAc,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,CAAA;QACtD,MAAM,QAAQ,GAAG,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;QACzC,OAAO,QAAQ,CAAC,OAAO,CAAC,CAAA;IAC1B,CAAC;IAED,MAAM,IAAI,KAAK,CACb,iCAAiC,WAAW,KAAK;QAC/C,+EAA+E,CAClF,CAAA;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,MAAc,EACd,MAAkC,EAClC,SAAS,GAAG,GAAG;IAEf,MAAM,WAAW,GACf,MAAM,EAAE,kBAAkB,IAAI,gCAAgC,CAAA;IAChE,MAAM,KAAK,GAAG,YAAY,CAAC,WAAW,EAAE,MAAM,CAAC,CAAA;IAE/C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC;YAChC,KAAK;YACL,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,eAAe,EAAE,SAAS;SAC3B,CAAC,CAAA;QACF,OAAO,MAAM,CAAC,IAAI,CAAA;IACpB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,OAAiB,EACjB,MAAkC,EAClC,SAAS,GAAG,GAAG;IAEf,OAAO,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,CAAA;AACvE,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { SessionState, UsertesterConfig } from '../types.js';
|
|
2
|
+
import type { RetryAttempt } from '../orchestrator/retry.js';
|
|
3
|
+
export interface AgentResult {
|
|
4
|
+
retryHistory: RetryAttempt[];
|
|
5
|
+
toolsUsed: string[];
|
|
6
|
+
profileHit: boolean;
|
|
7
|
+
}
|
|
8
|
+
export declare function runAgent(opts: {
|
|
9
|
+
agentId: string;
|
|
10
|
+
sessionId: string;
|
|
11
|
+
url: string;
|
|
12
|
+
initialMessage: string;
|
|
13
|
+
config: UsertesterConfig;
|
|
14
|
+
state: SessionState;
|
|
15
|
+
onStateChange: (newState: SessionState) => void;
|
|
16
|
+
getState: () => SessionState;
|
|
17
|
+
}): Promise<AgentResult>;
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent runner: manages a single agent's lifecycle through the state machine.
|
|
3
|
+
* Runs in its own async context (spawned by orchestrator).
|
|
4
|
+
*
|
|
5
|
+
* State machine:
|
|
6
|
+
* QUEUED → SPAWNING → INBOX_READY → SIGNING_UP → RUNNING → WAITING
|
|
7
|
+
* | |
|
|
8
|
+
* └──────────► FAILED
|
|
9
|
+
* WAITING → (send received) → RUNNING
|
|
10
|
+
* WAITING → (kill received) → CANCELLED
|
|
11
|
+
* RUNNING → (kill received) → CANCELLED
|
|
12
|
+
*/
|
|
13
|
+
import path from 'node:path';
|
|
14
|
+
import fs from 'node:fs';
|
|
15
|
+
import { BrowserAgent } from '../browser/agent.js';
|
|
16
|
+
import { cheapCall } from '../llm/provider.js';
|
|
17
|
+
import { InboxManager } from '../inbox/agentmail.js';
|
|
18
|
+
import { emitEvent, ts, appendAgentLog, appendAgentEvent, getAgentDir, readPendingCommand, } from '../output/events.js';
|
|
19
|
+
import { saveSession, transitionAgent } from './session.js';
|
|
20
|
+
import { loadProfile, updateProfile, updateProfileWithSuccess } from '../profiles/learner.js';
|
|
21
|
+
const COMMAND_POLL_MS = 500;
|
|
22
|
+
const MAX_RETRIES = 3;
|
|
23
|
+
export async function runAgent(opts) {
|
|
24
|
+
const { agentId, sessionId, url, initialMessage, config } = opts;
|
|
25
|
+
const sessionDir = path.join(config.results_dir, sessionId);
|
|
26
|
+
const agentDir = getAgentDir(sessionDir, agentId);
|
|
27
|
+
const transition = (to, extras) => {
|
|
28
|
+
appendAgentLog(agentDir, `State → ${to}`);
|
|
29
|
+
appendAgentEvent(agentDir, { event: 'state', from: opts.getState().agents.find(a => a.id === agentId)?.status, to, ...extras });
|
|
30
|
+
const newState = transitionAgent(opts.getState(), agentId, to, extras);
|
|
31
|
+
opts.onStateChange(newState);
|
|
32
|
+
saveSession(config.results_dir, newState);
|
|
33
|
+
emitEvent({
|
|
34
|
+
event: 'state',
|
|
35
|
+
agent: agentId,
|
|
36
|
+
from: opts.getState().agents.find(a => a.id === agentId)?.status,
|
|
37
|
+
to,
|
|
38
|
+
ts: ts(),
|
|
39
|
+
});
|
|
40
|
+
};
|
|
41
|
+
const fail = (error) => {
|
|
42
|
+
appendAgentLog(agentDir, `FAILED: ${error}`);
|
|
43
|
+
transition('FAILED', { error });
|
|
44
|
+
emitEvent({ event: 'failed', agent: agentId, error, ts: ts() });
|
|
45
|
+
};
|
|
46
|
+
// --- SPAWNING: provision inbox ---
|
|
47
|
+
transition('SPAWNING');
|
|
48
|
+
let inboxId;
|
|
49
|
+
try {
|
|
50
|
+
const inboxMgr = new InboxManager(config.agentmail_api_key);
|
|
51
|
+
const username = `ut-${sessionId.slice(-6)}-${agentId.replace('agent-', '')}`;
|
|
52
|
+
const inbox = await inboxMgr.provision(username);
|
|
53
|
+
inboxId = inbox.inboxId;
|
|
54
|
+
appendAgentLog(agentDir, `Inbox provisioned: ${inboxId}`);
|
|
55
|
+
}
|
|
56
|
+
catch (err) {
|
|
57
|
+
fail(`Inbox provisioning failed: ${err}`);
|
|
58
|
+
return { retryHistory: [], toolsUsed: [], profileHit: false };
|
|
59
|
+
}
|
|
60
|
+
// --- INBOX_READY ---
|
|
61
|
+
transition('INBOX_READY', { inboxId });
|
|
62
|
+
emitEvent({ event: 'spawned', agent: agentId, inbox: inboxId, ts: ts() });
|
|
63
|
+
// Load profile hints for this url/scenario
|
|
64
|
+
const profile = await loadProfile(config.results_dir, url, 'signup');
|
|
65
|
+
const profileHit = profile !== null && profile !== undefined;
|
|
66
|
+
// --- SIGNING_UP: launch browser and execute initial task ---
|
|
67
|
+
transition('SIGNING_UP', { currentMessage: initialMessage, startedAt: Date.now() });
|
|
68
|
+
const browserAgent = new BrowserAgent({
|
|
69
|
+
config,
|
|
70
|
+
agentDir,
|
|
71
|
+
rlmRecentActions: config.rlm_recent_actions,
|
|
72
|
+
rlmMaxFailedActions: config.rlm_max_failed_actions,
|
|
73
|
+
});
|
|
74
|
+
try {
|
|
75
|
+
// RUNNING during initial task execution
|
|
76
|
+
transition('RUNNING');
|
|
77
|
+
await browserAgent.start(url, inboxId, initialMessage, profile ?? undefined);
|
|
78
|
+
// Take screenshot after task
|
|
79
|
+
appendAgentLog(agentDir, 'Initial task complete, taking screenshot...');
|
|
80
|
+
}
|
|
81
|
+
catch (err) {
|
|
82
|
+
const agentState = opts.getState().agents.find(a => a.id === agentId);
|
|
83
|
+
if ((agentState?.retryCount ?? 0) < MAX_RETRIES) {
|
|
84
|
+
appendAgentLog(agentDir, `Error during SIGNING_UP (retry ${(agentState?.retryCount ?? 0) + 1}): ${err}`);
|
|
85
|
+
const newState = transitionAgent(opts.getState(), agentId, 'SIGNING_UP', {
|
|
86
|
+
retryCount: (agentState?.retryCount ?? 0) + 1,
|
|
87
|
+
});
|
|
88
|
+
opts.onStateChange(newState);
|
|
89
|
+
// Re-run start (same inbox, fresh attempt)
|
|
90
|
+
try {
|
|
91
|
+
await browserAgent.start(url, inboxId, initialMessage, profile ?? undefined);
|
|
92
|
+
}
|
|
93
|
+
catch (err2) {
|
|
94
|
+
fail(`Browser agent failed after retry: ${err2}`);
|
|
95
|
+
await browserAgent.destroy();
|
|
96
|
+
return { retryHistory: browserAgent.exportRetryHistory(), toolsUsed: [], profileHit };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
fail(`Browser agent failed: ${err}`);
|
|
101
|
+
await browserAgent.destroy();
|
|
102
|
+
return { retryHistory: browserAgent.exportRetryHistory(), toolsUsed: [], profileHit };
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// Initial task done — emit ready + go to WAITING
|
|
106
|
+
const summary = await generateSummary(browserAgent, initialMessage, config);
|
|
107
|
+
const screenshotPath = path.join(agentDir, 'screenshots', '001.png');
|
|
108
|
+
emitEvent({
|
|
109
|
+
event: 'ready',
|
|
110
|
+
agent: agentId,
|
|
111
|
+
message_completed: initialMessage,
|
|
112
|
+
summary,
|
|
113
|
+
screenshot: fs.existsSync(screenshotPath) ? screenshotPath : undefined,
|
|
114
|
+
ts: ts(),
|
|
115
|
+
});
|
|
116
|
+
transition('WAITING');
|
|
117
|
+
// Update profile — recovery tip takes priority over LLM-based failure extraction
|
|
118
|
+
const memory = browserAgent.exportMemory();
|
|
119
|
+
if (memory.recoveryTips.length > 0) {
|
|
120
|
+
// Success path: write recovery tip + run MemCollab intersection (no LLM needed)
|
|
121
|
+
const latestTip = memory.recoveryTips[memory.recoveryTips.length - 1];
|
|
122
|
+
updateProfileWithSuccess(config.results_dir, latestTip).catch(() => { });
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
// Failure path: use LLM to extract hints from failure trace
|
|
126
|
+
updateProfile(config.results_dir, url, 'signup', memory).catch(() => { });
|
|
127
|
+
}
|
|
128
|
+
// --- WAITING: poll for commands ---
|
|
129
|
+
const commandsPath = path.join(agentDir, 'commands.ndjson');
|
|
130
|
+
const timeoutAt = Date.now() + config.agent_timeout_ms;
|
|
131
|
+
while (Date.now() < timeoutAt) {
|
|
132
|
+
const agentState = opts.getState().agents.find(a => a.id === agentId);
|
|
133
|
+
if (!agentState || agentState.status === 'CANCELLED' || agentState.status === 'DONE')
|
|
134
|
+
break;
|
|
135
|
+
const cmd = readPendingCommand(commandsPath);
|
|
136
|
+
if (cmd?.type === 'kill') {
|
|
137
|
+
appendAgentLog(agentDir, 'Received kill command');
|
|
138
|
+
transition('CANCELLED');
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
if (cmd?.type === 'send' && cmd.message) {
|
|
142
|
+
const message = cmd.message;
|
|
143
|
+
appendAgentLog(agentDir, `Received send command: ${message}`);
|
|
144
|
+
transition('RUNNING', { currentMessage: message });
|
|
145
|
+
try {
|
|
146
|
+
const result = await browserAgent.resume(message);
|
|
147
|
+
emitEvent({
|
|
148
|
+
event: 'ready',
|
|
149
|
+
agent: agentId,
|
|
150
|
+
message_completed: message,
|
|
151
|
+
summary: result.summary,
|
|
152
|
+
screenshot: result.screenshotPath,
|
|
153
|
+
ts: ts(),
|
|
154
|
+
});
|
|
155
|
+
transition('WAITING');
|
|
156
|
+
updateProfile(config.results_dir, url, 'signup', browserAgent.exportMemory()).catch(() => { });
|
|
157
|
+
}
|
|
158
|
+
catch (err) {
|
|
159
|
+
fail(`Resume failed: ${err}`);
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
// No command — wait
|
|
165
|
+
await new Promise(r => setTimeout(r, COMMAND_POLL_MS));
|
|
166
|
+
}
|
|
167
|
+
// Timeout → DONE
|
|
168
|
+
const finalState = opts.getState().agents.find(a => a.id === agentId);
|
|
169
|
+
if (finalState?.status === 'WAITING') {
|
|
170
|
+
appendAgentLog(agentDir, 'Session timeout — transitioning to DONE');
|
|
171
|
+
transition('DONE');
|
|
172
|
+
}
|
|
173
|
+
const retryHistory = browserAgent.exportRetryHistory();
|
|
174
|
+
const toolsUsed = [...new Set(retryHistory.flatMap(a => a.toolsInjected))];
|
|
175
|
+
await browserAgent.destroy();
|
|
176
|
+
appendAgentLog(agentDir, 'Agent finished');
|
|
177
|
+
return { retryHistory, toolsUsed, profileHit };
|
|
178
|
+
}
|
|
179
|
+
async function generateSummary(agent, task, config) {
|
|
180
|
+
const memory = agent.exportMemory();
|
|
181
|
+
const recentActions = memory.actions.slice(-10);
|
|
182
|
+
if (recentActions.length === 0)
|
|
183
|
+
return 'No actions recorded.';
|
|
184
|
+
const actionsStr = recentActions
|
|
185
|
+
.map(a => `${a.action} → ${a.result}${a.observation ? ` (${a.observation})` : ''}`)
|
|
186
|
+
.join('\n');
|
|
187
|
+
try {
|
|
188
|
+
const text = await cheapCall(`Task: "${task}"\n\nActions:\n${actionsStr}\n\nSummarize in 1-2 sentences: what happened, did the task complete, anything confusing or broken?`, config, 200);
|
|
189
|
+
return text || 'Task execution complete.';
|
|
190
|
+
}
|
|
191
|
+
catch {
|
|
192
|
+
return `Completed ${recentActions.filter(a => a.result === 'success').length}/${recentActions.length} steps.`;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
//# sourceMappingURL=agent.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent.js","sourceRoot":"","sources":["../../src/orchestrator/agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,MAAM,SAAS,CAAA;AAExB,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAA;AAElD,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAA;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AACpD,OAAO,EACL,SAAS,EACT,EAAE,EACF,cAAc,EACd,gBAAgB,EAChB,WAAW,EACX,kBAAkB,GACnB,MAAM,qBAAqB,CAAA;AAC5B,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC3D,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAA;AAE7F,MAAM,eAAe,GAAG,GAAG,CAAA;AAC3B,MAAM,WAAW,GAAG,CAAC,CAAA;AAQrB,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAS9B;IACC,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI,CAAA;IAChE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,CAAC,CAAA;IAC3D,MAAM,QAAQ,GAAG,WAAW,CAAC,UAAU,EAAE,OAAO,CAAC,CAAA;IAEjD,MAAM,UAAU,GAAG,CAAC,EAAe,EAAE,MAAgC,EAAE,EAAE;QACvE,cAAc,CAAC,QAAQ,EAAE,WAAW,EAAE,EAAE,CAAC,CAAA;QACzC,gBAAgB,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,GAAG,MAAM,EAAE,CAAC,CAAA;QAC/H,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAa,CAAC,CAAA;QAC7E,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAA;QAC5B,WAAW,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAA;QACzC,SAAS,CAAC;YACR,KAAK,EAAE,OAAO;YACd,KAAK,EAAE,OAAO;YACd,IAAI,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,MAAqB;YAC/E,EAAE;YACF,EAAE,EAAE,EAAE,EAAE;SACT,CAAC,CAAA;IACJ,CAAC,CAAA;IAED,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,EAAE;QAC7B,cAAc,CAAC,QAAQ,EAAE,WAAW,KAAK,EAAE,CAAC,CAAA;QAC5C,UAAU,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,CAAC,CAAA;QAC/B,SAAS,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAA;IACjE,CAAC,CAAA;IAED,oCAAoC;IACpC,UAAU,CAAC,UAAU,CAAC,CAAA;IAEtB,IAAI,OAAe,CAAA;IACnB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,iBAAkB,CAAC,CAAA;QAC5D,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAA;QAC7E,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;QAChD,OAAO,GAAG,KAAK,CAAC,OAAO,CAAA;QACvB,cAAc,CAAC,QAAQ,EAAE,sBAAsB,OAAO,EAAE,CAAC,CAAA;IAC3D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAA;QACzC,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,UAAU,EAAE,KAAK,EAAE,CAAA;IAC/D,CAAC;IAED,sBAAsB;IACtB,UAAU,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,CAAC,CAAA;IACtC,SAAS,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAA;IAEzE,2CAA2C;IAC3C,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAA;IACpE,MAAM,UAAU,GAAG,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,CAAA;IAE5D,8DAA8D;IAC9D,UAAU,CAAC,YAAY,EAAE,EAAE,cAAc,EAAE,cAAc,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;IAEnF,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC;QACpC,MAAM;QACN,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,kBAAkB;QAC3C,mBAAmB,EAAE,MAAM,CAAC,sBAAsB;KACnD,CAAC,CAAA;IAEF,IAAI,CAAC;QACH,wCAAwC;QACxC,UAAU,CAAC,SAAS,CAAC,CAAA;QACrB,MAAM,YAAY,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,EAAE,cAAc,EAAE,OAAO,IAAI,SAAS,CAAC,CAAA;QAE5E,6BAA6B;QAC7B,cAAc,CAAC,QAAQ,EAAE,6CAA6C,CAAC,CAAA;IACzE,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,CAAA;QACrE,IAAI,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC,CAAC,GAAG,WAAW,EAAE,CAAC;YAChD,cAAc,CAAC,QAAQ,EAAE,kCAAkC,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,EAAE,CAAC,CAAA;YACxG,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;gBACvE,UAAU,EAAE,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC,CAAC,GAAG,CAAC;aAC9C,CAAC,CAAA;YACF,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAA;YAC5B,2CAA2C;YAC3C,IAAI,CAAC;gBACH,MAAM,YAAY,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,EAAE,cAAc,EAAE,OAAO,IAAI,SAAS,CAAC,CAAA;YAC9E,CAAC;YAAC,OAAO,IAAI,EAAE,CAAC;gBACd,IAAI,CAAC,qCAAqC,IAAI,EAAE,CAAC,CAAA;gBACjD,MAAM,YAAY,CAAC,OAAO,EAAE,CAAA;gBAC5B,OAAO,EAAE,YAAY,EAAE,YAAY,CAAC,kBAAkB,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,UAAU,EAAE,CAAA;YACvF,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,yBAAyB,GAAG,EAAE,CAAC,CAAA;YACpC,MAAM,YAAY,CAAC,OAAO,EAAE,CAAA;YAC5B,OAAO,EAAE,YAAY,EAAE,YAAY,CAAC,kBAAkB,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,UAAU,EAAE,CAAA;QACvF,CAAC;IACH,CAAC;IAED,iDAAiD;IACjD,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,YAAY,EAAE,cAAc,EAAE,MAAM,CAAC,CAAA;IAC3E,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,EAAE,SAAS,CAAC,CAAA;IAEpE,SAAS,CAAC;QACR,KAAK,EAAE,OAAO;QACd,KAAK,EAAE,OAAO;QACd,iBAAiB,EAAE,cAAc;QACjC,OAAO;QACP,UAAU,EAAE,EAAE,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS;QACtE,EAAE,EAAE,EAAE,EAAE;KACT,CAAC,CAAA;IAEF,UAAU,CAAC,SAAS,CAAC,CAAA;IAErB,iFAAiF;IACjF,MAAM,MAAM,GAAG,YAAY,CAAC,YAAY,EAAE,CAAA;IAC1C,IAAI,MAAM,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnC,gFAAgF;QAChF,MAAM,SAAS,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;QACrE,wBAAwB,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IACzE,CAAC;SAAM,CAAC;QACN,4DAA4D;QAC5D,aAAa,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IAC1E,CAAC;IAED,qCAAqC;IACrC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAA;IAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,gBAAgB,CAAA;IAEtD,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,CAAA;QACrE,IAAI,CAAC,UAAU,IAAI,UAAU,CAAC,MAAM,KAAK,WAAW,IAAI,UAAU,CAAC,MAAM,KAAK,MAAM;YAAE,MAAK;QAE3F,MAAM,GAAG,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAA;QAE5C,IAAI,GAAG,EAAE,IAAI,KAAK,MAAM,EAAE,CAAC;YACzB,cAAc,CAAC,QAAQ,EAAE,uBAAuB,CAAC,CAAA;YACjD,UAAU,CAAC,WAAW,CAAC,CAAA;YACvB,MAAK;QACP,CAAC;QAED,IAAI,GAAG,EAAE,IAAI,KAAK,MAAM,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAA;YAC3B,cAAc,CAAC,QAAQ,EAAE,0BAA0B,OAAO,EAAE,CAAC,CAAA;YAC7D,UAAU,CAAC,SAAS,EAAE,EAAE,cAAc,EAAE,OAAO,EAAE,CAAC,CAAA;YAElD,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;gBACjD,SAAS,CAAC;oBACR,KAAK,EAAE,OAAO;oBACd,KAAK,EAAE,OAAO;oBACd,iBAAiB,EAAE,OAAO;oBAC1B,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,UAAU,EAAE,MAAM,CAAC,cAAc;oBACjC,EAAE,EAAE,EAAE,EAAE;iBACT,CAAC,CAAA;gBACF,UAAU,CAAC,SAAS,CAAC,CAAA;gBACrB,aAAa,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,EAAE,QAAQ,EAAE,YAAY,CAAC,YAAY,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;YAC/F,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,IAAI,CAAC,kBAAkB,GAAG,EAAE,CAAC,CAAA;gBAC7B,MAAK;YACP,CAAC;YAED,SAAQ;QACV,CAAC;QAED,oBAAoB;QACpB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC,CAAA;IACxD,CAAC;IAED,iBAAiB;IACjB,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,CAAA;IACrE,IAAI,UAAU,EAAE,MAAM,KAAK,SAAS,EAAE,CAAC;QACrC,cAAc,CAAC,QAAQ,EAAE,yCAAyC,CAAC,CAAA;QACnE,UAAU,CAAC,MAAM,CAAC,CAAA;IACpB,CAAC;IAED,MAAM,YAAY,GAAG,YAAY,CAAC,kBAAkB,EAAE,CAAA;IACtD,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAA;IAC1E,MAAM,YAAY,CAAC,OAAO,EAAE,CAAA;IAC5B,cAAc,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAA;IAC1C,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE,UAAU,EAAE,CAAA;AAChD,CAAC;AAED,KAAK,UAAU,eAAe,CAC5B,KAAmB,EACnB,IAAY,EACZ,MAAwB;IAExB,MAAM,MAAM,GAAG,KAAK,CAAC,YAAY,EAAE,CAAA;IACnC,MAAM,aAAa,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAA;IAE/C,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,sBAAsB,CAAA;IAE7D,MAAM,UAAU,GAAG,aAAa;SAC7B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;SAClF,IAAI,CAAC,IAAI,CAAC,CAAA;IAEb,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,SAAS,CAC1B,UAAU,IAAI,kBAAkB,UAAU,qGAAqG,EAC/I,MAAM,EACN,GAAG,CACJ,CAAA;QACD,OAAO,IAAI,IAAI,0BAA0B,CAAA;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,aAAa,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,MAAM,IAAI,aAAa,CAAC,MAAM,SAAS,CAAA;IAC/G,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestrator: manages N agents, concurrency queue, session lifecycle
|
|
3
|
+
*/
|
|
4
|
+
import crypto from 'node:crypto';
|
|
5
|
+
import path from 'node:path';
|
|
6
|
+
import { emitEvent, ts, initSessionDirs } from '../output/events.js';
|
|
7
|
+
import { createSession, saveSession, transitionAgent } from './session.js';
|
|
8
|
+
import { runAgent } from './agent.js';
|
|
9
|
+
import { runHarnessLoop } from '../harness/index.js';
|
|
10
|
+
export async function orchestrate(opts) {
|
|
11
|
+
const { url, messages, n, config } = opts;
|
|
12
|
+
const sessionId = crypto.randomBytes(4).toString('hex');
|
|
13
|
+
const agentIds = Array.from({ length: n }, (_, i) => `agent-${String(i + 1).padStart(2, '0')}`);
|
|
14
|
+
// Init session dirs
|
|
15
|
+
initSessionDirs(config.results_dir, sessionId, agentIds);
|
|
16
|
+
// Create initial session state
|
|
17
|
+
let state = createSession({ resultsDir: config.results_dir, sessionId, url, agentIds });
|
|
18
|
+
saveSession(config.results_dir, state);
|
|
19
|
+
emitEvent({ event: 'session_start', sessionId, url, n, ts: ts() });
|
|
20
|
+
// State mutation: agents call this to update shared state
|
|
21
|
+
const onStateChange = (newState) => {
|
|
22
|
+
state = newState;
|
|
23
|
+
};
|
|
24
|
+
const getState = () => state;
|
|
25
|
+
// Concurrency queue
|
|
26
|
+
const agentPromises = [];
|
|
27
|
+
// Collect per-agent results for the harness loop
|
|
28
|
+
const agentResults = new Array(agentIds.length).fill(null);
|
|
29
|
+
const concurrencyLimit = Math.min(config.cua_concurrency_limit, n);
|
|
30
|
+
let activeCount = 0;
|
|
31
|
+
let agentIndex = 0;
|
|
32
|
+
await new Promise((resolve) => {
|
|
33
|
+
function launchNext() {
|
|
34
|
+
while (activeCount < concurrencyLimit && agentIndex < agentIds.length) {
|
|
35
|
+
const currentIndex = agentIndex++;
|
|
36
|
+
const agentId = agentIds[currentIndex];
|
|
37
|
+
const message = messages[currentIndex % messages.length];
|
|
38
|
+
activeCount++;
|
|
39
|
+
// Mark as QUEUED → will be updated by runAgent
|
|
40
|
+
const p = runAgent({
|
|
41
|
+
agentId,
|
|
42
|
+
sessionId,
|
|
43
|
+
url,
|
|
44
|
+
initialMessage: message,
|
|
45
|
+
config,
|
|
46
|
+
state,
|
|
47
|
+
onStateChange,
|
|
48
|
+
getState,
|
|
49
|
+
})
|
|
50
|
+
.then(result => {
|
|
51
|
+
agentResults[currentIndex] = result;
|
|
52
|
+
})
|
|
53
|
+
.catch(err => {
|
|
54
|
+
const newState = transitionAgent(getState(), agentId, 'FAILED', { error: String(err) });
|
|
55
|
+
onStateChange(newState);
|
|
56
|
+
saveSession(config.results_dir, newState);
|
|
57
|
+
emitEvent({ event: 'failed', agent: agentId, error: String(err), ts: ts() });
|
|
58
|
+
})
|
|
59
|
+
.finally(() => {
|
|
60
|
+
activeCount--;
|
|
61
|
+
launchNext();
|
|
62
|
+
if (activeCount === 0 && agentIndex >= agentIds.length) {
|
|
63
|
+
resolve();
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
agentPromises.push(p);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
launchNext();
|
|
70
|
+
// Edge case: n=0
|
|
71
|
+
if (agentIds.length === 0)
|
|
72
|
+
resolve();
|
|
73
|
+
});
|
|
74
|
+
await Promise.allSettled(agentPromises);
|
|
75
|
+
// Outer loop: fire-and-forget harness improvement
|
|
76
|
+
const harnessDir = path.join(config.results_dir, 'harness');
|
|
77
|
+
const agentSucceeded = state.agents.map(a => ['DONE', 'WAITING'].includes(a.status));
|
|
78
|
+
runHarnessLoop({
|
|
79
|
+
sessionId,
|
|
80
|
+
agentRetryHistories: agentResults.map(r => r?.retryHistory ?? []),
|
|
81
|
+
agentToolsUsed: agentResults.map(r => r?.toolsUsed ?? []),
|
|
82
|
+
agentProfileHits: agentResults.map(r => r?.profileHit ?? false),
|
|
83
|
+
agentSucceeded,
|
|
84
|
+
url,
|
|
85
|
+
nAgents: n,
|
|
86
|
+
config,
|
|
87
|
+
harnessDir,
|
|
88
|
+
projectRoot: new URL('../..', import.meta.url).pathname,
|
|
89
|
+
}).catch(() => { }); // never throw — outer loop is non-fatal
|
|
90
|
+
emitEvent({ event: 'session_complete', sessionId, ts: ts() });
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/orchestrator/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,MAAM,MAAM,aAAa,CAAA;AAChC,OAAO,IAAI,MAAM,WAAW,CAAA;AAE5B,OAAO,EAAE,SAAS,EAAE,EAAE,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACpE,OAAO,EAAE,aAAa,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC1E,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AAErC,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AAEpD,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAKjC;IACC,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAA;IAEzC,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAA;IACvD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,SAAS,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;IAE/F,oBAAoB;IACpB,eAAe,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAA;IAExD,+BAA+B;IAC/B,IAAI,KAAK,GAAG,aAAa,CAAC,EAAE,UAAU,EAAE,MAAM,CAAC,WAAW,EAAE,SAAS,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAA;IACvF,WAAW,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,CAAC,CAAA;IAEtC,SAAS,CAAC,EAAE,KAAK,EAAE,eAAe,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAA;IAElE,0DAA0D;IAC1D,MAAM,aAAa,GAAG,CAAC,QAAsB,EAAE,EAAE;QAC/C,KAAK,GAAG,QAAQ,CAAA;IAClB,CAAC,CAAA;IACD,MAAM,QAAQ,GAAG,GAAG,EAAE,CAAC,KAAK,CAAA;IAE5B,oBAAoB;IACpB,MAAM,aAAa,GAAoB,EAAE,CAAA;IACzC,iDAAiD;IACjD,MAAM,YAAY,GAChB,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACvC,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,qBAAqB,EAAE,CAAC,CAAC,CAAA;IAClE,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,IAAI,UAAU,GAAG,CAAC,CAAA;IAElB,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;QAClC,SAAS,UAAU;YACjB,OAAO,WAAW,GAAG,gBAAgB,IAAI,UAAU,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;gBACtE,MAAM,YAAY,GAAG,UAAU,EAAE,CAAA;gBACjC,MAAM,OAAO,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAA;gBACtC,MAAM,OAAO,GAAG,QAAQ,CAAC,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAA;gBAExD,WAAW,EAAE,CAAA;gBAEb,+CAA+C;gBAC/C,MAAM,CAAC,GAAG,QAAQ,CAAC;oBACjB,OAAO;oBACP,SAAS;oBACT,GAAG;oBACH,cAAc,EAAE,OAAO;oBACvB,MAAM;oBACN,KAAK;oBACL,aAAa;oBACb,QAAQ;iBACT,CAAC;qBACC,IAAI,CAAC,MAAM,CAAC,EAAE;oBACb,YAAY,CAAC,YAAY,CAAC,GAAG,MAAM,CAAA;gBACrC,CAAC,CAAC;qBACD,KAAK,CAAC,GAAG,CAAC,EAAE;oBACX,MAAM,QAAQ,GAAG,eAAe,CAAC,QAAQ,EAAE,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;oBACvF,aAAa,CAAC,QAAQ,CAAC,CAAA;oBACvB,WAAW,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAA;oBACzC,SAAS,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAA;gBAC9E,CAAC,CAAC;qBACD,OAAO,CAAC,GAAG,EAAE;oBACZ,WAAW,EAAE,CAAA;oBACb,UAAU,EAAE,CAAA;oBACZ,IAAI,WAAW,KAAK,CAAC,IAAI,UAAU,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;wBACvD,OAAO,EAAE,CAAA;oBACX,CAAC;gBACH,CAAC,CAAC,CAAA;gBAEJ,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACvB,CAAC;QACH,CAAC;QAED,UAAU,EAAE,CAAA;QAEZ,iBAAiB;QACjB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAA;IACtC,CAAC,CAAC,CAAA;IAEF,MAAM,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAA;IAEvC,kDAAkD;IAClD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,SAAS,CAAC,CAAA;IAC3D,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;IACpF,cAAc,CAAC;QACb,SAAS;QACT,mBAAmB,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,YAAY,IAAI,EAAE,CAAC;QACjE,cAAc,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;QACzD,gBAAgB,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,UAAU,IAAI,KAAK,CAAC;QAC/D,cAAc;QACd,GAAG;QACH,OAAO,EAAE,CAAC;QACV,MAAM;QACN,UAAU;QACV,WAAW,EAAE,IAAI,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ;KACxD,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA,CAAE,wCAAwC;IAE5D,SAAS,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE,SAAS,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAA;AAC/D,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { UsertesterConfig, RecoveryTip } from '../types.js';
|
|
2
|
+
export type FailureType = 'COMPLETE' | 'TRANSIENT' | 'RATE_LIMITED' | 'WRONG_APPROACH' | 'CAPABILITY_GAP' | 'ENVIRONMENT_BLOCK' | 'ESCALATE';
|
|
3
|
+
export interface FailureClassification {
|
|
4
|
+
type: FailureType;
|
|
5
|
+
evidence: string;
|
|
6
|
+
recoveryHint: string;
|
|
7
|
+
}
|
|
8
|
+
export interface RetryAttempt {
|
|
9
|
+
attempt: number;
|
|
10
|
+
instruction: string;
|
|
11
|
+
toolsInjected: string[];
|
|
12
|
+
result: 'complete' | 'failed';
|
|
13
|
+
failureType?: FailureType;
|
|
14
|
+
agentMessage: string;
|
|
15
|
+
finalUrl: string;
|
|
16
|
+
}
|
|
17
|
+
export declare const MAX_ATTEMPTS = 5;
|
|
18
|
+
export declare const FAILURE_SIGNALS: Array<{
|
|
19
|
+
pattern: RegExp;
|
|
20
|
+
type: FailureType;
|
|
21
|
+
hint: string;
|
|
22
|
+
}>;
|
|
23
|
+
export declare function classifyFailure(agentMessage: string, config: Partial<UsertesterConfig>): Promise<FailureClassification>;
|
|
24
|
+
export declare function selectToolsForRecovery(classification: FailureClassification): Record<string, unknown>;
|
|
25
|
+
export declare function buildRetryInstruction(originalInstruction: string, history: RetryAttempt[], memory?: {
|
|
26
|
+
recoveryTips?: RecoveryTip[];
|
|
27
|
+
}, currentUrl?: string): string;
|