usertester 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +219 -0
- package/dist/browser/agent.d.ts +33 -0
- package/dist/browser/agent.js +393 -0
- package/dist/browser/agent.js.map +1 -0
- package/dist/cli/cleanup.d.ts +5 -0
- package/dist/cli/cleanup.js +75 -0
- package/dist/cli/cleanup.js.map +1 -0
- package/dist/cli/harness.d.ts +10 -0
- package/dist/cli/harness.js +108 -0
- package/dist/cli/harness.js.map +1 -0
- package/dist/cli/index.d.ts +5 -0
- package/dist/cli/index.js +31 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/kill.d.ts +5 -0
- package/dist/cli/kill.js +46 -0
- package/dist/cli/kill.js.map +1 -0
- package/dist/cli/logs.d.ts +5 -0
- package/dist/cli/logs.js +64 -0
- package/dist/cli/logs.js.map +1 -0
- package/dist/cli/profiles.d.ts +5 -0
- package/dist/cli/profiles.js +67 -0
- package/dist/cli/profiles.js.map +1 -0
- package/dist/cli/send.d.ts +5 -0
- package/dist/cli/send.js +46 -0
- package/dist/cli/send.js.map +1 -0
- package/dist/cli/setup.d.ts +6 -0
- package/dist/cli/setup.js +168 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/cli/spawn.d.ts +5 -0
- package/dist/cli/spawn.js +52 -0
- package/dist/cli/spawn.js.map +1 -0
- package/dist/cli/status.d.ts +5 -0
- package/dist/cli/status.js +85 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/harness/applier.d.ts +38 -0
- package/dist/harness/applier.js +152 -0
- package/dist/harness/applier.js.map +1 -0
- package/dist/harness/index.d.ts +14 -0
- package/dist/harness/index.js +110 -0
- package/dist/harness/index.js.map +1 -0
- package/dist/harness/patterns.d.ts +14 -0
- package/dist/harness/patterns.js +96 -0
- package/dist/harness/patterns.js.map +1 -0
- package/dist/harness/proposer.d.ts +26 -0
- package/dist/harness/proposer.js +181 -0
- package/dist/harness/proposer.js.map +1 -0
- package/dist/harness/traces.d.ts +29 -0
- package/dist/harness/traces.js +65 -0
- package/dist/harness/traces.js.map +1 -0
- package/dist/harness/validator.d.ts +6 -0
- package/dist/harness/validator.js +112 -0
- package/dist/harness/validator.js.map +1 -0
- package/dist/inbox/agentmail.d.ts +11 -0
- package/dist/inbox/agentmail.js +36 -0
- package/dist/inbox/agentmail.js.map +1 -0
- package/dist/llm/provider.d.ts +15 -0
- package/dist/llm/provider.js +65 -0
- package/dist/llm/provider.js.map +1 -0
- package/dist/orchestrator/agent.d.ts +17 -0
- package/dist/orchestrator/agent.js +195 -0
- package/dist/orchestrator/agent.js.map +1 -0
- package/dist/orchestrator/index.d.ts +7 -0
- package/dist/orchestrator/index.js +92 -0
- package/dist/orchestrator/index.js.map +1 -0
- package/dist/orchestrator/retry.d.ts +27 -0
- package/dist/orchestrator/retry.js +145 -0
- package/dist/orchestrator/retry.js.map +1 -0
- package/dist/orchestrator/session.d.ts +13 -0
- package/dist/orchestrator/session.js +55 -0
- package/dist/orchestrator/session.js.map +1 -0
- package/dist/output/events.d.ts +12 -0
- package/dist/output/events.js +81 -0
- package/dist/output/events.js.map +1 -0
- package/dist/profiles/learner.d.ts +4 -0
- package/dist/profiles/learner.js +168 -0
- package/dist/profiles/learner.js.map +1 -0
- package/dist/tools/captcha.d.ts +19 -0
- package/dist/tools/captcha.js +76 -0
- package/dist/tools/captcha.js.map +1 -0
- package/dist/tools/inbox.d.ts +30 -0
- package/dist/tools/inbox.js +65 -0
- package/dist/tools/inbox.js.map +1 -0
- package/dist/types.d.ts +121 -0
- package/dist/types.js +30 -0
- package/dist/types.js.map +1 -0
- package/package.json +60 -0
- package/tasks.example.json +5 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outer loop meta-harness entry point.
|
|
3
|
+
* Wires traces → patterns → proposer → validator → applier.
|
|
4
|
+
* Called fire-and-forget from the orchestrator after each session.
|
|
5
|
+
*/
|
|
6
|
+
import fs from 'node:fs';
|
|
7
|
+
import path from 'node:path';
|
|
8
|
+
import { buildTrace, writeTrace } from './traces.js';
|
|
9
|
+
import { analyzePatterns } from './patterns.js';
|
|
10
|
+
import { runProposer, loadConvergenceState, saveConvergenceState, updateConvergenceState } from './proposer.js';
|
|
11
|
+
import { validatePatch } from './validator.js';
|
|
12
|
+
import { applyPatch } from './applier.js';
|
|
13
|
+
export async function runHarnessLoop(opts) {
|
|
14
|
+
const { sessionId, agentRetryHistories, agentToolsUsed, agentProfileHits, agentSucceeded, url, nAgents, config, harnessDir, projectRoot, } = opts;
|
|
15
|
+
fs.mkdirSync(harnessDir, { recursive: true });
|
|
16
|
+
const harnessLog = path.join(harnessDir, 'harness.log');
|
|
17
|
+
const log = (msg) => {
|
|
18
|
+
try {
|
|
19
|
+
fs.appendFileSync(harnessLog, `[${new Date().toISOString()}] ${msg}\n`);
|
|
20
|
+
}
|
|
21
|
+
catch { }
|
|
22
|
+
};
|
|
23
|
+
log(`Session ${sessionId}: harness loop started (${nAgents} agents, url=${url})`);
|
|
24
|
+
// Step 1: Build and write SessionTrace
|
|
25
|
+
const trace = buildTrace({
|
|
26
|
+
sessionId,
|
|
27
|
+
url,
|
|
28
|
+
agentRetryHistories,
|
|
29
|
+
agentToolsUsed,
|
|
30
|
+
agentProfileHits,
|
|
31
|
+
agentSucceeded,
|
|
32
|
+
nAgents,
|
|
33
|
+
});
|
|
34
|
+
writeTrace(harnessDir, trace);
|
|
35
|
+
log(`Trace written: ${trace.n_succeeded}/${nAgents} succeeded, failure_types=[${trace.failure_types.join(',')}]`);
|
|
36
|
+
// Update convergence state with current session success rate
|
|
37
|
+
const sessionSuccessRate = nAgents > 0 ? trace.n_succeeded / nAgents : 0;
|
|
38
|
+
let convergenceState = loadConvergenceState(harnessDir);
|
|
39
|
+
convergenceState = updateConvergenceState(convergenceState, sessionSuccessRate, false);
|
|
40
|
+
// Check if converged — if so, skip analysis
|
|
41
|
+
if (convergenceState.converged) {
|
|
42
|
+
log(`Converged: ${convergenceState.convergenceReason ?? 'unknown reason'}. Skipping.`);
|
|
43
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
// Step 2: Analyze patterns
|
|
47
|
+
const report = analyzePatterns(harnessDir);
|
|
48
|
+
log(`Pattern analysis: ${report.tracesAnalyzed} traces, hasPattern=${report.hasPattern}, top=${report.topPattern?.type ?? 'none'}`);
|
|
49
|
+
if (!report.hasPattern || !report.topPattern) {
|
|
50
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
// Step 3: Run proposer
|
|
54
|
+
let patch;
|
|
55
|
+
try {
|
|
56
|
+
patch = await runProposer({
|
|
57
|
+
pattern: report.topPattern,
|
|
58
|
+
convergenceState,
|
|
59
|
+
config,
|
|
60
|
+
projectRoot,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
log(`Proposer error: ${err}`);
|
|
65
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
if (!patch) {
|
|
69
|
+
log('Proposer returned no patch (converged or skipped)');
|
|
70
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
log(`Proposer patch: ${patch.file} — ${patch.description}`);
|
|
74
|
+
// Step 4: Validate patch
|
|
75
|
+
let validation;
|
|
76
|
+
try {
|
|
77
|
+
validation = await validatePatch(patch, projectRoot);
|
|
78
|
+
}
|
|
79
|
+
catch (err) {
|
|
80
|
+
log(`Validation error: ${err}`);
|
|
81
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
if (!validation.valid) {
|
|
85
|
+
log(`Patch validation FAILED: ${validation.error}`);
|
|
86
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
log('Patch validated OK (tsc clean)');
|
|
90
|
+
// Step 5: Apply patch
|
|
91
|
+
let applyResult;
|
|
92
|
+
try {
|
|
93
|
+
applyResult = await applyPatch(patch, sessionId, harnessDir, projectRoot);
|
|
94
|
+
}
|
|
95
|
+
catch (err) {
|
|
96
|
+
log(`Apply error: ${err}`);
|
|
97
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
if (!applyResult.applied) {
|
|
101
|
+
log(`Patch apply FAILED: ${applyResult.error}`);
|
|
102
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
log(`Patch applied successfully: ${applyResult.patchId} (${patch.patternType}: ${patch.description})`);
|
|
106
|
+
// Step 6: Update convergence state with patch applied
|
|
107
|
+
convergenceState = updateConvergenceState(convergenceState, sessionSuccessRate, true);
|
|
108
|
+
saveConvergenceState(harnessDir, convergenceState);
|
|
109
|
+
}
|
|
110
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/harness/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,EAAE,MAAM,SAAS,CAAA;AACxB,OAAO,IAAI,MAAM,WAAW,CAAA;AAG5B,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAC/C,OAAO,EAAE,WAAW,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAA;AAC/G,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAA;AAEzC,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAWpC;IACC,MAAM,EACJ,SAAS,EACT,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,cAAc,EACd,GAAG,EACH,OAAO,EACP,MAAM,EACN,UAAU,EACV,WAAW,GACZ,GAAG,IAAI,CAAA;IAER,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAE7C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,aAAa,CAAC,CAAA;IACvD,MAAM,GAAG,GAAG,CAAC,GAAW,EAAE,EAAE;QAC1B,IAAI,CAAC;YACH,EAAE,CAAC,cAAc,CAAC,UAAU,EAAE,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,KAAK,GAAG,IAAI,CAAC,CAAA;QACzE,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC,CAAA;IAED,GAAG,CAAC,WAAW,SAAS,2BAA2B,OAAO,gBAAgB,GAAG,GAAG,CAAC,CAAA;IAEjF,uCAAuC;IACvC,MAAM,KAAK,GAAG,UAAU,CAAC;QACvB,SAAS;QACT,GAAG;QACH,mBAAmB;QACnB,cAAc;QACd,gBAAgB;QAChB,cAAc;QACd,OAAO;KACR,CAAC,CAAA;IACF,UAAU,CAAC,UAAU,EAAE,KAAK,CAAC,CAAA;IAC7B,GAAG,CAAC,kBAAkB,KAAK,CAAC,WAAW,IAAI,OAAO,8BAA8B,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAEjH,6DAA6D;IAC7D,MAAM,kBAAkB,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAA;IACxE,IAAI,gBAAgB,GAAG,oBAAoB,CAAC,UAAU,CAAC,CAAA;IACvD,gBAAgB,GAAG,sBAAsB,CAAC,gBAAgB,EAAE,kBAAkB,EAAE,KAAK,CAAC,CAAA;IAEtF,4CAA4C;IAC5C,IAAI,gBAAgB,CAAC,SAAS,EAAE,CAAC;QAC/B,GAAG,CAAC,cAAc,gBAAgB,CAAC,iBAAiB,IAAI,gBAAgB,aAAa,CAAC,CAAA;QACtF,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,2BAA2B;IAC3B,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;IAC1C,GAAG,CAAC,qBAAqB,MAAM,CAAC,cAAc,uBAAuB,MAAM,CAAC,UAAU,SAAS,MAAM,CAAC,UAAU,EAAE,IAAI,IAAI,MAAM,EAAE,CAAC,CAAA;IAEnI,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;QAC7C,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,uBAAuB;IACvB,IAAI,KAAK,CAAA;IACT,IAAI,CAAC;QACH,KAAK,GAAG,MAAM,WAAW,CAAC;YACxB,OAAO,EAAE,MAAM,CAAC,UAAU;YAC1B,gBAAgB;YAChB,MAAM;YACN,WAAW;SACZ,CAAC,CAAA;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;QAC7B,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,GAAG,CAAC,mDAAmD,CAAC,CAAA;QACxD,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,GAAG,CAAC,mBAAmB,KAAK,CAAC,IAAI,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC,CAAA;IAE3D,yBAAyB;IACzB,IAAI,UAAU,CAAA;IACd,IAAI,CAAC;QACH,UAAU,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,WAAW,CAAC,CAAA;IACtD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAA;QAC/B,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACtB,GAAG,CAAC,4BAA4B,UAAU,CAAC,KAAK,EAAE,CAAC,CAAA;QACnD,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,GAAG,CAAC,gCAAgC,CAAC,CAAA;IAErC,sBAAsB;IACtB,IAAI,WAAW,CAAA;IACf,IAAI,CAAC;QACH,WAAW,GAAG,MAAM,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,CAAC,CAAA;IAC3E,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,gBAAgB,GAAG,EAAE,CAAC,CAAA;QAC1B,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;QACzB,GAAG,CAAC,uBAAuB,WAAW,CAAC,KAAK,EAAE,CAAC,CAAA;QAC/C,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,GAAG,CAAC,+BAA+B,WAAW,CAAC,OAAO,KAAK,KAAK,CAAC,WAAW,KAAK,KAAK,CAAC,WAAW,GAAG,CAAC,CAAA;IAEtG,sDAAsD;IACtD,gBAAgB,GAAG,sBAAsB,CAAC,gBAAgB,EAAE,kBAAkB,EAAE,IAAI,CAAC,CAAA;IACrF,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;AACpD,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export type PatternType = 'UnhandledSignal' | 'MissingWait' | 'CapabilityGapNoTool' | 'HighAttempt';
|
|
2
|
+
export interface DetectedPattern {
|
|
3
|
+
type: PatternType;
|
|
4
|
+
errorEvidence: string[];
|
|
5
|
+
occurrences: number;
|
|
6
|
+
priority: number;
|
|
7
|
+
}
|
|
8
|
+
export interface PatternReport {
|
|
9
|
+
hasPattern: boolean;
|
|
10
|
+
topPattern?: DetectedPattern;
|
|
11
|
+
allPatterns: DetectedPattern[];
|
|
12
|
+
tracesAnalyzed: number;
|
|
13
|
+
}
|
|
14
|
+
export declare function analyzePatterns(harnessDir: string, k?: number): PatternReport;
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rule-based pattern analyzer. Reads last K session traces and identifies
|
|
3
|
+
* patterns the current harness doesn't handle. No LLM — pure regex matching.
|
|
4
|
+
*/
|
|
5
|
+
import { readLastTraces } from './traces.js';
|
|
6
|
+
// Import FAILURE_SIGNALS from retry.ts to check if patterns are already handled
|
|
7
|
+
import { FAILURE_SIGNALS } from '../orchestrator/retry.js';
|
|
8
|
+
export function analyzePatterns(harnessDir, k = 20) {
|
|
9
|
+
const traces = readLastTraces(harnessDir, k);
|
|
10
|
+
if (traces.length === 0) {
|
|
11
|
+
return { hasPattern: false, allPatterns: [], tracesAnalyzed: 0 };
|
|
12
|
+
}
|
|
13
|
+
const patterns = [];
|
|
14
|
+
// 1. UnhandledSignal: error substring appears in 3+ sessions' recurring_errors
|
|
15
|
+
// but matches no existing FAILURE_SIGNALS regex
|
|
16
|
+
const errorCounts = new Map();
|
|
17
|
+
for (const trace of traces) {
|
|
18
|
+
// Use a set per session to avoid counting the same error twice in one session
|
|
19
|
+
const seenInSession = new Set();
|
|
20
|
+
for (const err of trace.recurring_errors) {
|
|
21
|
+
// Normalize: take first 100 chars as the "key"
|
|
22
|
+
const key = err.slice(0, 100).toLowerCase();
|
|
23
|
+
if (!seenInSession.has(key)) {
|
|
24
|
+
seenInSession.add(key);
|
|
25
|
+
errorCounts.set(key, (errorCounts.get(key) ?? 0) + 1);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
const unhandledErrors = [];
|
|
30
|
+
for (const [errKey, count] of errorCounts.entries()) {
|
|
31
|
+
if (count >= 3) {
|
|
32
|
+
// Check if any existing FAILURE_SIGNALS regex matches this error snippet
|
|
33
|
+
const isHandled = FAILURE_SIGNALS.some(sig => sig.pattern.test(errKey));
|
|
34
|
+
if (!isHandled) {
|
|
35
|
+
unhandledErrors.push(errKey);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
if (unhandledErrors.length > 0) {
|
|
40
|
+
patterns.push({
|
|
41
|
+
type: 'UnhandledSignal',
|
|
42
|
+
errorEvidence: unhandledErrors.slice(0, 5),
|
|
43
|
+
occurrences: unhandledErrors.length,
|
|
44
|
+
priority: 1,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
// 2. MissingWait: RATE_LIMITED in failure_types but session still failed
|
|
48
|
+
const missingWaitSessions = traces.filter(t => t.failure_types.includes('RATE_LIMITED') && t.n_failed > 0);
|
|
49
|
+
if (missingWaitSessions.length >= 2) {
|
|
50
|
+
patterns.push({
|
|
51
|
+
type: 'MissingWait',
|
|
52
|
+
errorEvidence: missingWaitSessions
|
|
53
|
+
.flatMap(t => t.recurring_errors.filter(e => /rate.?limit|429|too many/i.test(e)))
|
|
54
|
+
.slice(0, 3),
|
|
55
|
+
occurrences: missingWaitSessions.length,
|
|
56
|
+
priority: 3,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
// 3. CapabilityGapNoTool: CAPABILITY_GAP in failure_types AND tools_used is empty AND session failed
|
|
60
|
+
const capGapSessions = traces.filter(t => t.failure_types.includes('CAPABILITY_GAP') &&
|
|
61
|
+
t.tools_used.length === 0 &&
|
|
62
|
+
t.n_failed > 0);
|
|
63
|
+
if (capGapSessions.length >= 2) {
|
|
64
|
+
patterns.push({
|
|
65
|
+
type: 'CapabilityGapNoTool',
|
|
66
|
+
errorEvidence: capGapSessions
|
|
67
|
+
.flatMap(t => t.recurring_errors)
|
|
68
|
+
.slice(0, 3),
|
|
69
|
+
occurrences: capGapSessions.length,
|
|
70
|
+
priority: 2,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
// 4. HighAttempt: average max attempts >= 3.5 across recent sessions
|
|
74
|
+
if (traces.length >= 3) {
|
|
75
|
+
const avgMaxAttempts = traces
|
|
76
|
+
.map(t => (t.attempts_per_agent.length > 0 ? Math.max(...t.attempts_per_agent) : 1))
|
|
77
|
+
.reduce((a, b) => a + b, 0) / traces.length;
|
|
78
|
+
if (avgMaxAttempts >= 3.5) {
|
|
79
|
+
patterns.push({
|
|
80
|
+
type: 'HighAttempt',
|
|
81
|
+
errorEvidence: [],
|
|
82
|
+
occurrences: traces.length,
|
|
83
|
+
priority: 4,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
// Sort by priority (lower = higher)
|
|
88
|
+
patterns.sort((a, b) => a.priority - b.priority);
|
|
89
|
+
return {
|
|
90
|
+
hasPattern: patterns.length > 0,
|
|
91
|
+
topPattern: patterns[0],
|
|
92
|
+
allPatterns: patterns,
|
|
93
|
+
tracesAnalyzed: traces.length,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=patterns.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"patterns.js","sourceRoot":"","sources":["../../src/harness/patterns.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAE5C,gFAAgF;AAChF,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAA;AAsB1D,MAAM,UAAU,eAAe,CAAC,UAAkB,EAAE,CAAC,GAAG,EAAE;IACxD,MAAM,MAAM,GAAG,cAAc,CAAC,UAAU,EAAE,CAAC,CAAC,CAAA;IAE5C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,WAAW,EAAE,EAAE,EAAE,cAAc,EAAE,CAAC,EAAE,CAAA;IAClE,CAAC;IAED,MAAM,QAAQ,GAAsB,EAAE,CAAA;IAEtC,+EAA+E;IAC/E,mDAAmD;IACnD,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC7C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,8EAA8E;QAC9E,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAA;QACvC,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC;YACzC,+CAA+C;YAC/C,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;YAC3C,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;gBACtB,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;YACvD,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,eAAe,GAAa,EAAE,CAAA;IACpC,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;QACpD,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;YACf,yEAAyE;YACzE,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAA;YACvE,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,iBAAiB;YACvB,aAAa,EAAE,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAC1C,WAAW,EAAE,eAAe,CAAC,MAAM;YACnC,QAAQ,EAAE,CAAC;SACZ,CAAC,CAAA;IACJ,CAAC;IAED,yEAAyE;IACzE,MAAM,mBAAmB,GAAG,MAAM,CAAC,MAAM,CACvC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,QAAQ,GAAG,CAAC,CAChE,CAAA;IACD,IAAI,mBAAmB,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,aAAa;YACnB,aAAa,EAAE,mBAAmB;iBAC/B,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,2BAA2B,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;iBACjF,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YACd,WAAW,EAAE,mBAAmB,CAAC,MAAM;YACvC,QAAQ,EAAE,CAAC;SACZ,CAAC,CAAA;IACJ,CAAC;IAED,qGAAqG;IACrG,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAClC,CAAC,CAAC,EAAE,CACF,CAAC,CAAC,aAAa,CAAC,QAAQ,CAAC,gBAAgB,CAAC;QAC1C,CAAC,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC;QACzB,CAAC,CAAC,QAAQ,GAAG,CAAC,CACjB,CAAA;IACD,IAAI,cAAc,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,qBAAqB;YAC3B,aAAa,EAAE,cAAc;iBAC1B,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC;iBAChC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YACd,WAAW,EAAE,cAAc,CAAC,MAAM;YAClC,QAAQ,EAAE,CAAC;SACZ,CAAC,CAAA;IACJ,CAAC;IAED,qEAAqE;IACrE,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,MAAM,cAAc,GAClB,MAAM;aACH,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aACnF,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;QAE/C,IAAI,cAAc,IAAI,GAAG,EAAE,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,aAAa;gBACnB,aAAa,EAAE,EAAE;gBACjB,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,QAAQ,EAAE,CAAC;aACZ,CAAC,CAAA;QACJ,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAA;IAEhD,OAAO;QACL,UAAU,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC;QAC/B,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;QACvB,WAAW,EAAE,QAAQ;QACrB,cAAc,EAAE,MAAM,CAAC,MAAM;KAC9B,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { UsertesterConfig } from '../types.js';
|
|
2
|
+
import type { DetectedPattern } from './patterns.js';
|
|
3
|
+
export interface CodePatch {
|
|
4
|
+
file: 'src/orchestrator/retry.ts' | 'src/browser/agent.ts';
|
|
5
|
+
oldCode: string;
|
|
6
|
+
newCode: string;
|
|
7
|
+
description: string;
|
|
8
|
+
patternType: string;
|
|
9
|
+
}
|
|
10
|
+
export interface ConvergenceState {
|
|
11
|
+
patchesApplied: number;
|
|
12
|
+
lastPatchAt: string | null;
|
|
13
|
+
sessionsSinceLastPatch: number;
|
|
14
|
+
successRateHistory: number[];
|
|
15
|
+
converged: boolean;
|
|
16
|
+
convergenceReason?: string;
|
|
17
|
+
}
|
|
18
|
+
export declare function loadConvergenceState(harnessDir: string): ConvergenceState;
|
|
19
|
+
export declare function saveConvergenceState(harnessDir: string, state: ConvergenceState): void;
|
|
20
|
+
export declare function updateConvergenceState(state: ConvergenceState, sessionSuccessRate: number, patchApplied: boolean): ConvergenceState;
|
|
21
|
+
export declare function runProposer(opts: {
|
|
22
|
+
pattern: DetectedPattern;
|
|
23
|
+
convergenceState: ConvergenceState;
|
|
24
|
+
config: Partial<UsertesterConfig>;
|
|
25
|
+
projectRoot: string;
|
|
26
|
+
}): Promise<CodePatch | null>;
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM code proposer for the outer loop meta-harness.
|
|
3
|
+
* Uses the proposer_model (defaults to anthropic/claude-opus-4-6) to generate
|
|
4
|
+
* code patches that address detected failure patterns.
|
|
5
|
+
*/
|
|
6
|
+
import fs from 'node:fs';
|
|
7
|
+
import path from 'node:path';
|
|
8
|
+
import { generateText } from 'ai';
|
|
9
|
+
import { resolveModel } from '../llm/provider.js';
|
|
10
|
+
export function loadConvergenceState(harnessDir) {
|
|
11
|
+
const statePath = path.join(harnessDir, 'harness_state.json');
|
|
12
|
+
try {
|
|
13
|
+
const content = fs.readFileSync(statePath, 'utf-8');
|
|
14
|
+
return JSON.parse(content);
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return {
|
|
18
|
+
patchesApplied: 0,
|
|
19
|
+
lastPatchAt: null,
|
|
20
|
+
sessionsSinceLastPatch: 0,
|
|
21
|
+
successRateHistory: [],
|
|
22
|
+
converged: false,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
export function saveConvergenceState(harnessDir, state) {
|
|
27
|
+
fs.mkdirSync(harnessDir, { recursive: true });
|
|
28
|
+
const statePath = path.join(harnessDir, 'harness_state.json');
|
|
29
|
+
fs.writeFileSync(statePath, JSON.stringify(state, null, 2));
|
|
30
|
+
}
|
|
31
|
+
export function updateConvergenceState(state, sessionSuccessRate, patchApplied) {
|
|
32
|
+
const newHistory = [...state.successRateHistory, sessionSuccessRate].slice(-5);
|
|
33
|
+
const newSessionsSince = patchApplied ? 0 : state.sessionsSinceLastPatch + 1;
|
|
34
|
+
// Check convergence criteria
|
|
35
|
+
let converged = state.converged;
|
|
36
|
+
let convergenceReason = state.convergenceReason;
|
|
37
|
+
if (state.patchesApplied >= 20) {
|
|
38
|
+
converged = true;
|
|
39
|
+
convergenceReason = 'Max patches applied (20)';
|
|
40
|
+
}
|
|
41
|
+
else if (newSessionsSince >= 5 && newHistory.length >= 5) {
|
|
42
|
+
const improvement = Math.max(...newHistory) - Math.min(...newHistory);
|
|
43
|
+
if (improvement < 0.005) {
|
|
44
|
+
converged = true;
|
|
45
|
+
convergenceReason = 'Success rate stable for 5 sessions (improvement < 0.5%)';
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return {
|
|
49
|
+
patchesApplied: patchApplied ? state.patchesApplied + 1 : state.patchesApplied,
|
|
50
|
+
lastPatchAt: patchApplied ? new Date().toISOString() : state.lastPatchAt,
|
|
51
|
+
sessionsSinceLastPatch: newSessionsSince,
|
|
52
|
+
successRateHistory: newHistory,
|
|
53
|
+
converged,
|
|
54
|
+
convergenceReason,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
function buildProposerPrompt(pattern, targetFile, fileContents) {
|
|
58
|
+
const taskDescription = getTaskDescription(pattern, targetFile);
|
|
59
|
+
return `You are an expert TypeScript engineer improving an AI browser automation harness.
|
|
60
|
+
|
|
61
|
+
DETECTED PATTERN: ${pattern.type}
|
|
62
|
+
Error evidence (${pattern.occurrences} sessions):
|
|
63
|
+
${pattern.errorEvidence.map(e => ` - "${e}"`).join('\n')}
|
|
64
|
+
|
|
65
|
+
TARGET FILE: ${targetFile}
|
|
66
|
+
CURRENT FILE CONTENTS:
|
|
67
|
+
\`\`\`typescript
|
|
68
|
+
${fileContents}
|
|
69
|
+
\`\`\`
|
|
70
|
+
|
|
71
|
+
TASK:
|
|
72
|
+
${taskDescription}
|
|
73
|
+
|
|
74
|
+
HARD CONSTRAINTS:
|
|
75
|
+
- Never remove or modify existing FAILURE_SIGNALS entries
|
|
76
|
+
- The oldCode field must be VERBATIM text that appears EXACTLY ONCE in the file
|
|
77
|
+
- Change fewer than 50 lines total
|
|
78
|
+
- The newCode must be valid TypeScript
|
|
79
|
+
- Do not change imports unless strictly necessary
|
|
80
|
+
|
|
81
|
+
Respond with a single JSON object (no markdown fences, no extra text):
|
|
82
|
+
{
|
|
83
|
+
"file": "${targetFile}",
|
|
84
|
+
"oldCode": "<verbatim substring from the file to replace>",
|
|
85
|
+
"newCode": "<replacement code>",
|
|
86
|
+
"description": "<one sentence describing the change>",
|
|
87
|
+
"patternType": "${pattern.type}"
|
|
88
|
+
}`;
|
|
89
|
+
}
|
|
90
|
+
function getTaskDescription(pattern, targetFile) {
|
|
91
|
+
switch (pattern.type) {
|
|
92
|
+
case 'UnhandledSignal':
|
|
93
|
+
return `Add a new entry to the FAILURE_SIGNALS array in ${targetFile} that matches the unhandled error patterns. The new entry should have an appropriate pattern regex, FailureType, and recovery hint.`;
|
|
94
|
+
case 'CapabilityGapNoTool':
|
|
95
|
+
return `Update selectToolsForRecovery() in ${targetFile} to inject the appropriate tool(s) for the capability gap being detected. Look at the error evidence to determine which tool is missing.`;
|
|
96
|
+
case 'MissingWait':
|
|
97
|
+
return `Improve the RATE_LIMITED handling in ${targetFile} to better extract and apply wait times from rate limit responses. Ensure the wait logic covers the error patterns shown.`;
|
|
98
|
+
case 'HighAttempt':
|
|
99
|
+
return `Review the retry strategy in ${targetFile} and add a more intelligent backoff or early-exit condition to reduce unnecessary retries when the agent is clearly stuck.`;
|
|
100
|
+
default:
|
|
101
|
+
return `Improve error handling in ${targetFile} to address the detected pattern: ${pattern.type}.`;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
function selectTargetFile(pattern) {
|
|
105
|
+
switch (pattern.type) {
|
|
106
|
+
case 'UnhandledSignal':
|
|
107
|
+
case 'MissingWait':
|
|
108
|
+
case 'CapabilityGapNoTool':
|
|
109
|
+
return 'src/orchestrator/retry.ts';
|
|
110
|
+
case 'HighAttempt':
|
|
111
|
+
return 'src/browser/agent.ts';
|
|
112
|
+
default:
|
|
113
|
+
return 'src/orchestrator/retry.ts';
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
export async function runProposer(opts) {
|
|
117
|
+
const { pattern, convergenceState, config, projectRoot } = opts;
|
|
118
|
+
// Check convergence — don't propose if converged
|
|
119
|
+
if (convergenceState.converged) {
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
const targetFile = selectTargetFile(pattern);
|
|
123
|
+
const absoluteFilePath = path.join(projectRoot, targetFile);
|
|
124
|
+
let fileContents;
|
|
125
|
+
try {
|
|
126
|
+
fileContents = fs.readFileSync(absoluteFilePath, 'utf-8');
|
|
127
|
+
}
|
|
128
|
+
catch (err) {
|
|
129
|
+
throw new Error(`Cannot read ${absoluteFilePath}: ${err}`);
|
|
130
|
+
}
|
|
131
|
+
const prompt = buildProposerPrompt(pattern, targetFile, fileContents);
|
|
132
|
+
const modelString = config.proposer_model ?? 'anthropic/claude-opus-4-6';
|
|
133
|
+
const model = resolveModel(modelString, config);
|
|
134
|
+
let text;
|
|
135
|
+
try {
|
|
136
|
+
const result = await generateText({
|
|
137
|
+
model,
|
|
138
|
+
messages: [{ role: 'user', content: prompt }],
|
|
139
|
+
maxOutputTokens: 2000,
|
|
140
|
+
});
|
|
141
|
+
text = result.text;
|
|
142
|
+
}
|
|
143
|
+
catch (err) {
|
|
144
|
+
throw new Error(`Proposer LLM call failed: ${err}`);
|
|
145
|
+
}
|
|
146
|
+
// Parse JSON from response — same pattern as classifyFailure
|
|
147
|
+
const match = text.match(/\{[\s\S]*\}/);
|
|
148
|
+
if (!match) {
|
|
149
|
+
throw new Error(`Proposer returned no JSON. Response: ${text.slice(0, 200)}`);
|
|
150
|
+
}
|
|
151
|
+
let parsed;
|
|
152
|
+
try {
|
|
153
|
+
parsed = JSON.parse(match[0]);
|
|
154
|
+
}
|
|
155
|
+
catch (err) {
|
|
156
|
+
throw new Error(`Proposer JSON parse failed: ${err}. Raw: ${match[0].slice(0, 200)}`);
|
|
157
|
+
}
|
|
158
|
+
if (!parsed.file || !parsed.oldCode || !parsed.newCode || !parsed.description) {
|
|
159
|
+
throw new Error(`Proposer returned incomplete patch: ${JSON.stringify(parsed)}`);
|
|
160
|
+
}
|
|
161
|
+
// Validate file field
|
|
162
|
+
if (parsed.file !== 'src/orchestrator/retry.ts' &&
|
|
163
|
+
parsed.file !== 'src/browser/agent.ts') {
|
|
164
|
+
throw new Error(`Proposer returned invalid file: ${parsed.file}`);
|
|
165
|
+
}
|
|
166
|
+
const patch = {
|
|
167
|
+
file: parsed.file,
|
|
168
|
+
oldCode: parsed.oldCode,
|
|
169
|
+
newCode: parsed.newCode,
|
|
170
|
+
description: parsed.description,
|
|
171
|
+
patternType: parsed.patternType ?? pattern.type,
|
|
172
|
+
};
|
|
173
|
+
// Verify oldCode appears exactly once in the file
|
|
174
|
+
const occurrences = fileContents.split(patch.oldCode).length - 1;
|
|
175
|
+
if (occurrences !== 1) {
|
|
176
|
+
throw new Error(`Proposed oldCode appears ${occurrences} times in ${patch.file} (expected exactly 1). ` +
|
|
177
|
+
`oldCode: "${patch.oldCode.slice(0, 100)}"`);
|
|
178
|
+
}
|
|
179
|
+
return patch;
|
|
180
|
+
}
|
|
181
|
+
//# sourceMappingURL=proposer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"proposer.js","sourceRoot":"","sources":["../../src/harness/proposer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,EAAE,MAAM,SAAS,CAAA;AACxB,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAA;AACjC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAqBjD,MAAM,UAAU,oBAAoB,CAAC,UAAkB;IACrD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAA;IAC7D,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACnD,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAqB,CAAA;IAChD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,cAAc,EAAE,CAAC;YACjB,WAAW,EAAE,IAAI;YACjB,sBAAsB,EAAE,CAAC;YACzB,kBAAkB,EAAE,EAAE;YACtB,SAAS,EAAE,KAAK;SACjB,CAAA;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,UAAkB,EAAE,KAAuB;IAC9E,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAA;IAC7D,EAAE,CAAC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAA;AAC7D,CAAC;AAED,MAAM,UAAU,sBAAsB,CACpC,KAAuB,EACvB,kBAA0B,EAC1B,YAAqB;IAErB,MAAM,UAAU,GAAG,CAAC,GAAG,KAAK,CAAC,kBAAkB,EAAE,kBAAkB,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;IAC9E,MAAM,gBAAgB,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,sBAAsB,GAAG,CAAC,CAAA;IAE5E,6BAA6B;IAC7B,IAAI,SAAS,GAAG,KAAK,CAAC,SAAS,CAAA;IAC/B,IAAI,iBAAiB,GAAG,KAAK,CAAC,iBAAiB,CAAA;IAE/C,IAAI,KAAK,CAAC,cAAc,IAAI,EAAE,EAAE,CAAC;QAC/B,SAAS,GAAG,IAAI,CAAA;QAChB,iBAAiB,GAAG,0BAA0B,CAAA;IAChD,CAAC;SAAM,IAAI,gBAAgB,IAAI,CAAC,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC3D,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,CAAA;QACrE,IAAI,WAAW,GAAG,KAAK,EAAE,CAAC;YACxB,SAAS,GAAG,IAAI,CAAA;YAChB,iBAAiB,GAAG,yDAAyD,CAAA;QAC/E,CAAC;IACH,CAAC;IAED,OAAO;QACL,cAAc,EAAE,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc;QAC9E,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW;QACxE,sBAAsB,EAAE,gBAAgB;QACxC,kBAAkB,EAAE,UAAU;QAC9B,SAAS;QACT,iBAAiB;KAClB,CAAA;AACH,CAAC;AAED,SAAS,mBAAmB,CAC1B,OAAwB,EACxB,UAAgE,EAChE,YAAoB;IAEpB,MAAM,eAAe,GAAG,kBAAkB,CAAC,OAAO,EAAE,UAAU,CAAC,CAAA;IAE/D,OAAO;;oBAEW,OAAO,CAAC,IAAI;kBACd,OAAO,CAAC,WAAW;EACnC,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;;eAE1C,UAAU;;;EAGvB,YAAY;;;;EAIZ,eAAe;;;;;;;;;;;aAWJ,UAAU;;;;oBAIH,OAAO,CAAC,IAAI;EAC9B,CAAA;AACF,CAAC;AAED,SAAS,kBAAkB,CACzB,OAAwB,EACxB,UAAgE;IAEhE,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC;QACrB,KAAK,iBAAiB;YACpB,OAAO,mDAAmD,UAAU,qIAAqI,CAAA;QAE3M,KAAK,qBAAqB;YACxB,OAAO,sCAAsC,UAAU,0IAA0I,CAAA;QAEnM,KAAK,aAAa;YAChB,OAAO,wCAAwC,UAAU,2HAA2H,CAAA;QAEtL,KAAK,aAAa;YAChB,OAAO,gCAAgC,UAAU,4HAA4H,CAAA;QAE/K;YACE,OAAO,6BAA6B,UAAU,qCAAqC,OAAO,CAAC,IAAI,GAAG,CAAA;IACtG,CAAC;AACH,CAAC;AAED,SAAS,gBAAgB,CACvB,OAAwB;IAExB,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC;QACrB,KAAK,iBAAiB,CAAC;QACvB,KAAK,aAAa,CAAC;QACnB,KAAK,qBAAqB;YACxB,OAAO,2BAA2B,CAAA;QACpC,KAAK,aAAa;YAChB,OAAO,sBAAsB,CAAA;QAC/B;YACE,OAAO,2BAA2B,CAAA;IACtC,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAKjC;IACC,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,IAAI,CAAA;IAE/D,iDAAiD;IACjD,IAAI,gBAAgB,CAAC,SAAS,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAA;IACb,CAAC;IAED,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAA;IAC5C,MAAM,gBAAgB,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,UAAU,CAAC,CAAA;IAE3D,IAAI,YAAoB,CAAA;IACxB,IAAI,CAAC;QACH,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,gBAAgB,EAAE,OAAO,CAAC,CAAA;IAC3D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,eAAe,gBAAgB,KAAK,GAAG,EAAE,CAAC,CAAA;IAC5D,CAAC;IAED,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,EAAE,UAAU,EAAE,YAAY,CAAC,CAAA;IAErE,MAAM,WAAW,GAAG,MAAM,CAAC,cAAc,IAAI,2BAA2B,CAAA;IACxE,MAAM,KAAK,GAAG,YAAY,CAAC,WAAW,EAAE,MAAM,CAAC,CAAA;IAE/C,IAAI,IAAY,CAAA;IAChB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC;YAChC,KAAK;YACL,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,eAAe,EAAE,IAAI;SACtB,CAAC,CAAA;QACF,IAAI,GAAG,MAAM,CAAC,IAAI,CAAA;IACpB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,EAAE,CAAC,CAAA;IACrD,CAAC;IAED,6DAA6D;IAC7D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAA;IACvC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,wCAAwC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;IAC/E,CAAC;IAED,IAAI,MAA0B,CAAA;IAC9B,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAuB,CAAA;IACrD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,+BAA+B,GAAG,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;IACvF,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAC9E,MAAM,IAAI,KAAK,CAAC,uCAAuC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;IAClF,CAAC;IAED,sBAAsB;IACtB,IACE,MAAM,CAAC,IAAI,KAAK,2BAA2B;QAC3C,MAAM,CAAC,IAAI,KAAK,sBAAsB,EACtC,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,mCAAmC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAA;IACnE,CAAC;IAED,MAAM,KAAK,GAAc;QACvB,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,OAAO,CAAC,IAAI;KAChD,CAAA;IAED,kDAAkD;IAClD,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAA;IAChE,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACb,4BAA4B,WAAW,aAAa,KAAK,CAAC,IAAI,yBAAyB;YACrF,aAAa,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAC9C,CAAA;IACH,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { FailureType } from '../orchestrator/retry.js';
|
|
2
|
+
import type { RetryAttempt } from '../orchestrator/retry.js';
|
|
3
|
+
export interface SessionTrace {
|
|
4
|
+
session_id: string;
|
|
5
|
+
url: string;
|
|
6
|
+
ts: string;
|
|
7
|
+
n_agents: number;
|
|
8
|
+
n_succeeded: number;
|
|
9
|
+
n_failed: number;
|
|
10
|
+
failure_types: FailureType[];
|
|
11
|
+
recurring_errors: string[];
|
|
12
|
+
tools_used: string[];
|
|
13
|
+
attempts_per_agent: number[];
|
|
14
|
+
profile_hit: boolean;
|
|
15
|
+
}
|
|
16
|
+
export declare function writeTrace(harnessDir: string, trace: SessionTrace): void;
|
|
17
|
+
export declare function readLastTraces(harnessDir: string, k?: number): SessionTrace[];
|
|
18
|
+
/**
|
|
19
|
+
* Build a SessionTrace from per-agent retry histories.
|
|
20
|
+
*/
|
|
21
|
+
export declare function buildTrace(opts: {
|
|
22
|
+
sessionId: string;
|
|
23
|
+
url: string;
|
|
24
|
+
agentRetryHistories: RetryAttempt[][];
|
|
25
|
+
agentToolsUsed: string[][];
|
|
26
|
+
agentProfileHits: boolean[];
|
|
27
|
+
agentSucceeded: boolean[];
|
|
28
|
+
nAgents: number;
|
|
29
|
+
}): SessionTrace;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session trace writer — appends one structured line per session to
|
|
3
|
+
* ~/.usertester/harness/traces.ndjson for the outer loop pattern analyzer.
|
|
4
|
+
*/
|
|
5
|
+
import fs from 'node:fs';
|
|
6
|
+
import path from 'node:path';
|
|
7
|
+
export function writeTrace(harnessDir, trace) {
|
|
8
|
+
fs.mkdirSync(harnessDir, { recursive: true });
|
|
9
|
+
const tracePath = path.join(harnessDir, 'traces.ndjson');
|
|
10
|
+
fs.appendFileSync(tracePath, JSON.stringify(trace) + '\n');
|
|
11
|
+
}
|
|
12
|
+
export function readLastTraces(harnessDir, k = 20) {
|
|
13
|
+
const tracePath = path.join(harnessDir, 'traces.ndjson');
|
|
14
|
+
try {
|
|
15
|
+
const content = fs.readFileSync(tracePath, 'utf-8');
|
|
16
|
+
return content
|
|
17
|
+
.split('\n')
|
|
18
|
+
.filter(Boolean)
|
|
19
|
+
.slice(-k)
|
|
20
|
+
.map(line => JSON.parse(line));
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return [];
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Build a SessionTrace from per-agent retry histories.
|
|
28
|
+
*/
|
|
29
|
+
export function buildTrace(opts) {
|
|
30
|
+
const { sessionId, url, agentRetryHistories, agentToolsUsed, agentProfileHits, agentSucceeded, nAgents } = opts;
|
|
31
|
+
const n_succeeded = agentSucceeded.filter(Boolean).length;
|
|
32
|
+
const n_failed = nAgents - n_succeeded;
|
|
33
|
+
// Collect all failure types across all agents
|
|
34
|
+
const failure_types = [
|
|
35
|
+
...new Set(agentRetryHistories
|
|
36
|
+
.flat()
|
|
37
|
+
.filter(a => a.result === 'failed' && a.failureType)
|
|
38
|
+
.map(a => a.failureType)),
|
|
39
|
+
];
|
|
40
|
+
// Collect unique error message slices from failed retries
|
|
41
|
+
const errorSlices = agentRetryHistories
|
|
42
|
+
.flat()
|
|
43
|
+
.filter(a => a.result === 'failed')
|
|
44
|
+
.map(a => a.agentMessage.slice(0, 200));
|
|
45
|
+
const recurring_errors = [...new Set(errorSlices)];
|
|
46
|
+
// Collect all tools used
|
|
47
|
+
const tools_used = [...new Set(agentToolsUsed.flat())];
|
|
48
|
+
// Attempts per agent = number of retry entries per agent + 1 (first attempt)
|
|
49
|
+
const attempts_per_agent = agentRetryHistories.map(h => h.length + 1);
|
|
50
|
+
const profile_hit = agentProfileHits.some(Boolean);
|
|
51
|
+
return {
|
|
52
|
+
session_id: sessionId,
|
|
53
|
+
url,
|
|
54
|
+
ts: new Date().toISOString(),
|
|
55
|
+
n_agents: nAgents,
|
|
56
|
+
n_succeeded,
|
|
57
|
+
n_failed,
|
|
58
|
+
failure_types,
|
|
59
|
+
recurring_errors,
|
|
60
|
+
tools_used,
|
|
61
|
+
attempts_per_agent,
|
|
62
|
+
profile_hit,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=traces.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"traces.js","sourceRoot":"","sources":["../../src/harness/traces.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAE,MAAM,SAAS,CAAA;AACxB,OAAO,IAAI,MAAM,WAAW,CAAA;AAkB5B,MAAM,UAAU,UAAU,CAAC,UAAkB,EAAE,KAAmB;IAChE,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,eAAe,CAAC,CAAA;IACxD,EAAE,CAAC,cAAc,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAA;AAC5D,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,UAAkB,EAAE,CAAC,GAAG,EAAE;IACvD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,eAAe,CAAC,CAAA;IACxD,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACnD,OAAO,OAAO;aACX,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,OAAO,CAAC;aACf,KAAK,CAAC,CAAC,CAAC,CAAC;aACT,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAiB,CAAC,CAAA;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAQ1B;IACC,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,mBAAmB,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,OAAO,EAAE,GAAG,IAAI,CAAA;IAE/G,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAA;IACzD,MAAM,QAAQ,GAAG,OAAO,GAAG,WAAW,CAAA;IAEtC,8CAA8C;IAC9C,MAAM,aAAa,GAAkB;QACnC,GAAG,IAAI,GAAG,CACR,mBAAmB;aAChB,IAAI,EAAE;aACN,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,IAAI,CAAC,CAAC,WAAW,CAAC;aACnD,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAA0B,CAAC,CAC1C;KACF,CAAA;IAED,0DAA0D;IAC1D,MAAM,WAAW,GAAG,mBAAmB;SACpC,IAAI,EAAE;SACN,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAAC;SAClC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAA;IAEzC,MAAM,gBAAgB,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC,CAAA;IAElD,yBAAyB;IACzB,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;IAEtD,6EAA6E;IAC7E,MAAM,kBAAkB,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAErE,MAAM,WAAW,GAAG,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAElD,OAAO;QACL,UAAU,EAAE,SAAS;QACrB,GAAG;QACH,EAAE,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QAC5B,QAAQ,EAAE,OAAO;QACjB,WAAW;QACX,QAAQ;QACR,aAAa;QACb,gBAAgB;QAChB,UAAU;QACV,kBAAkB;QAClB,WAAW;KACZ,CAAA;AACH,CAAC"}
|