usertester 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +219 -0
  3. package/dist/browser/agent.d.ts +33 -0
  4. package/dist/browser/agent.js +393 -0
  5. package/dist/browser/agent.js.map +1 -0
  6. package/dist/cli/cleanup.d.ts +5 -0
  7. package/dist/cli/cleanup.js +75 -0
  8. package/dist/cli/cleanup.js.map +1 -0
  9. package/dist/cli/harness.d.ts +10 -0
  10. package/dist/cli/harness.js +108 -0
  11. package/dist/cli/harness.js.map +1 -0
  12. package/dist/cli/index.d.ts +5 -0
  13. package/dist/cli/index.js +31 -0
  14. package/dist/cli/index.js.map +1 -0
  15. package/dist/cli/kill.d.ts +5 -0
  16. package/dist/cli/kill.js +46 -0
  17. package/dist/cli/kill.js.map +1 -0
  18. package/dist/cli/logs.d.ts +5 -0
  19. package/dist/cli/logs.js +64 -0
  20. package/dist/cli/logs.js.map +1 -0
  21. package/dist/cli/profiles.d.ts +5 -0
  22. package/dist/cli/profiles.js +67 -0
  23. package/dist/cli/profiles.js.map +1 -0
  24. package/dist/cli/send.d.ts +5 -0
  25. package/dist/cli/send.js +46 -0
  26. package/dist/cli/send.js.map +1 -0
  27. package/dist/cli/setup.d.ts +6 -0
  28. package/dist/cli/setup.js +168 -0
  29. package/dist/cli/setup.js.map +1 -0
  30. package/dist/cli/spawn.d.ts +5 -0
  31. package/dist/cli/spawn.js +52 -0
  32. package/dist/cli/spawn.js.map +1 -0
  33. package/dist/cli/status.d.ts +5 -0
  34. package/dist/cli/status.js +85 -0
  35. package/dist/cli/status.js.map +1 -0
  36. package/dist/harness/applier.d.ts +38 -0
  37. package/dist/harness/applier.js +152 -0
  38. package/dist/harness/applier.js.map +1 -0
  39. package/dist/harness/index.d.ts +14 -0
  40. package/dist/harness/index.js +110 -0
  41. package/dist/harness/index.js.map +1 -0
  42. package/dist/harness/patterns.d.ts +14 -0
  43. package/dist/harness/patterns.js +96 -0
  44. package/dist/harness/patterns.js.map +1 -0
  45. package/dist/harness/proposer.d.ts +26 -0
  46. package/dist/harness/proposer.js +181 -0
  47. package/dist/harness/proposer.js.map +1 -0
  48. package/dist/harness/traces.d.ts +29 -0
  49. package/dist/harness/traces.js +65 -0
  50. package/dist/harness/traces.js.map +1 -0
  51. package/dist/harness/validator.d.ts +6 -0
  52. package/dist/harness/validator.js +112 -0
  53. package/dist/harness/validator.js.map +1 -0
  54. package/dist/inbox/agentmail.d.ts +11 -0
  55. package/dist/inbox/agentmail.js +36 -0
  56. package/dist/inbox/agentmail.js.map +1 -0
  57. package/dist/llm/provider.d.ts +15 -0
  58. package/dist/llm/provider.js +65 -0
  59. package/dist/llm/provider.js.map +1 -0
  60. package/dist/orchestrator/agent.d.ts +17 -0
  61. package/dist/orchestrator/agent.js +195 -0
  62. package/dist/orchestrator/agent.js.map +1 -0
  63. package/dist/orchestrator/index.d.ts +7 -0
  64. package/dist/orchestrator/index.js +92 -0
  65. package/dist/orchestrator/index.js.map +1 -0
  66. package/dist/orchestrator/retry.d.ts +27 -0
  67. package/dist/orchestrator/retry.js +145 -0
  68. package/dist/orchestrator/retry.js.map +1 -0
  69. package/dist/orchestrator/session.d.ts +13 -0
  70. package/dist/orchestrator/session.js +55 -0
  71. package/dist/orchestrator/session.js.map +1 -0
  72. package/dist/output/events.d.ts +12 -0
  73. package/dist/output/events.js +81 -0
  74. package/dist/output/events.js.map +1 -0
  75. package/dist/profiles/learner.d.ts +4 -0
  76. package/dist/profiles/learner.js +168 -0
  77. package/dist/profiles/learner.js.map +1 -0
  78. package/dist/tools/captcha.d.ts +19 -0
  79. package/dist/tools/captcha.js +76 -0
  80. package/dist/tools/captcha.js.map +1 -0
  81. package/dist/tools/inbox.d.ts +30 -0
  82. package/dist/tools/inbox.js +65 -0
  83. package/dist/tools/inbox.js.map +1 -0
  84. package/dist/types.d.ts +121 -0
  85. package/dist/types.js +30 -0
  86. package/dist/types.js.map +1 -0
  87. package/package.json +60 -0
  88. package/tasks.example.json +5 -0
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Outer loop meta-harness entry point.
3
+ * Wires traces → patterns → proposer → validator → applier.
4
+ * Called fire-and-forget from the orchestrator after each session.
5
+ */
6
+ import fs from 'node:fs';
7
+ import path from 'node:path';
8
+ import { buildTrace, writeTrace } from './traces.js';
9
+ import { analyzePatterns } from './patterns.js';
10
+ import { runProposer, loadConvergenceState, saveConvergenceState, updateConvergenceState } from './proposer.js';
11
+ import { validatePatch } from './validator.js';
12
+ import { applyPatch } from './applier.js';
13
+ export async function runHarnessLoop(opts) {
14
+ const { sessionId, agentRetryHistories, agentToolsUsed, agentProfileHits, agentSucceeded, url, nAgents, config, harnessDir, projectRoot, } = opts;
15
+ fs.mkdirSync(harnessDir, { recursive: true });
16
+ const harnessLog = path.join(harnessDir, 'harness.log');
17
+ const log = (msg) => {
18
+ try {
19
+ fs.appendFileSync(harnessLog, `[${new Date().toISOString()}] ${msg}\n`);
20
+ }
21
+ catch { }
22
+ };
23
+ log(`Session ${sessionId}: harness loop started (${nAgents} agents, url=${url})`);
24
+ // Step 1: Build and write SessionTrace
25
+ const trace = buildTrace({
26
+ sessionId,
27
+ url,
28
+ agentRetryHistories,
29
+ agentToolsUsed,
30
+ agentProfileHits,
31
+ agentSucceeded,
32
+ nAgents,
33
+ });
34
+ writeTrace(harnessDir, trace);
35
+ log(`Trace written: ${trace.n_succeeded}/${nAgents} succeeded, failure_types=[${trace.failure_types.join(',')}]`);
36
+ // Update convergence state with current session success rate
37
+ const sessionSuccessRate = nAgents > 0 ? trace.n_succeeded / nAgents : 0;
38
+ let convergenceState = loadConvergenceState(harnessDir);
39
+ convergenceState = updateConvergenceState(convergenceState, sessionSuccessRate, false);
40
+ // Check if converged — if so, skip analysis
41
+ if (convergenceState.converged) {
42
+ log(`Converged: ${convergenceState.convergenceReason ?? 'unknown reason'}. Skipping.`);
43
+ saveConvergenceState(harnessDir, convergenceState);
44
+ return;
45
+ }
46
+ // Step 2: Analyze patterns
47
+ const report = analyzePatterns(harnessDir);
48
+ log(`Pattern analysis: ${report.tracesAnalyzed} traces, hasPattern=${report.hasPattern}, top=${report.topPattern?.type ?? 'none'}`);
49
+ if (!report.hasPattern || !report.topPattern) {
50
+ saveConvergenceState(harnessDir, convergenceState);
51
+ return;
52
+ }
53
+ // Step 3: Run proposer
54
+ let patch;
55
+ try {
56
+ patch = await runProposer({
57
+ pattern: report.topPattern,
58
+ convergenceState,
59
+ config,
60
+ projectRoot,
61
+ });
62
+ }
63
+ catch (err) {
64
+ log(`Proposer error: ${err}`);
65
+ saveConvergenceState(harnessDir, convergenceState);
66
+ return;
67
+ }
68
+ if (!patch) {
69
+ log('Proposer returned no patch (converged or skipped)');
70
+ saveConvergenceState(harnessDir, convergenceState);
71
+ return;
72
+ }
73
+ log(`Proposer patch: ${patch.file} — ${patch.description}`);
74
+ // Step 4: Validate patch
75
+ let validation;
76
+ try {
77
+ validation = await validatePatch(patch, projectRoot);
78
+ }
79
+ catch (err) {
80
+ log(`Validation error: ${err}`);
81
+ saveConvergenceState(harnessDir, convergenceState);
82
+ return;
83
+ }
84
+ if (!validation.valid) {
85
+ log(`Patch validation FAILED: ${validation.error}`);
86
+ saveConvergenceState(harnessDir, convergenceState);
87
+ return;
88
+ }
89
+ log('Patch validated OK (tsc clean)');
90
+ // Step 5: Apply patch
91
+ let applyResult;
92
+ try {
93
+ applyResult = await applyPatch(patch, sessionId, harnessDir, projectRoot);
94
+ }
95
+ catch (err) {
96
+ log(`Apply error: ${err}`);
97
+ saveConvergenceState(harnessDir, convergenceState);
98
+ return;
99
+ }
100
+ if (!applyResult.applied) {
101
+ log(`Patch apply FAILED: ${applyResult.error}`);
102
+ saveConvergenceState(harnessDir, convergenceState);
103
+ return;
104
+ }
105
+ log(`Patch applied successfully: ${applyResult.patchId} (${patch.patternType}: ${patch.description})`);
106
+ // Step 6: Update convergence state with patch applied
107
+ convergenceState = updateConvergenceState(convergenceState, sessionSuccessRate, true);
108
+ saveConvergenceState(harnessDir, convergenceState);
109
+ }
110
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/harness/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,EAAE,MAAM,SAAS,CAAA;AACxB,OAAO,IAAI,MAAM,WAAW,CAAA;AAG5B,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAC/C,OAAO,EAAE,WAAW,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAA;AAC/G,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAA;AAEzC,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAWpC;IACC,MAAM,EACJ,SAAS,EACT,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,cAAc,EACd,GAAG,EACH,OAAO,EACP,MAAM,EACN,UAAU,EACV,WAAW,GACZ,GAAG,IAAI,CAAA;IAER,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAE7C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,aAAa,CAAC,CAAA;IACvD,MAAM,GAAG,GAAG,CAAC,GAAW,EAAE,EAAE;QAC1B,IAAI,CAAC;YACH,EAAE,CAAC,cAAc,CAAC,UAAU,EAAE,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,KAAK,GAAG,IAAI,CAAC,CAAA;QACzE,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC,CAAA;IAED,GAAG,CAAC,WAAW,SAAS,2BAA2B,OAAO,gBAAgB,GAAG,GAAG,CAAC,CAAA;IAEjF,uCAAuC;IACvC,MAAM,KAAK,GAAG,UAAU,CAAC;QACvB,SAAS;QACT,GAAG;QACH,mBAAmB;QACnB,cAAc;QACd,gBAAgB;QAChB,cAAc;QACd,OAAO;KACR,CAAC,CAAA;IACF,UAAU,CAAC,UAAU,EAAE,KAAK,CAAC,CAAA;IAC7B,GAAG,CAAC,kBAAkB,KAAK,CAAC,WAAW,IAAI,OAAO,8BAA8B,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAEjH,6DAA6D;IAC7D,MAAM,kBAAkB,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAA;IACxE,IAAI,gBAAgB,GAAG,oBAAoB,CAAC,UAAU,CAAC,CAAA;IACvD,gBAAgB,GAAG,sBAAsB,CAAC,gBAAgB,EAAE,kBAAkB,EAAE,KAAK,CAAC,CAAA;IAEtF,4CAA4C;IAC5C,IAAI,gBAAgB,CAAC,SAAS,EAAE,CAAC;QAC/B,GAAG,CAAC,cAAc,gBAAgB,CAAC,iBAAiB,IAAI,gBAAgB,aAAa,CAAC,CAAA;QACtF,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,2BAA2B;IAC3B,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;IAC1C,GAAG,CAAC,qBAAqB,MAAM,CAAC,cAAc,uBAAuB,MAAM,CAAC,UAAU,SAAS,MAAM,CAAC,UAAU,EAAE,IAAI,IAAI,MAAM,EAAE,CAAC,CAAA;IAEnI,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;QAC7C,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,uBAAuB;IACvB,IAAI,KAAK,CAAA;IACT,IAAI,CAAC;QACH,KAAK,GAAG,MAAM,WAAW,CAAC;YACxB,OAAO,EAAE,MAAM,CAAC,UAAU;YAC1B,gBAAgB;YAChB,MAAM;YACN,WAAW;SACZ,CAAC,CAAA;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;QAC7B,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,GAAG,CAAC,mDAAmD,CAAC,CAAA;QACxD,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,GAAG,CAAC,mBAAmB,KAAK,CAAC,IAAI,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC,CAAA;IAE3D,yBAAyB;IACzB,IAAI,UAAU,CAAA;IACd,IAAI,CAAC;QACH,UAAU,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,WAAW,CAAC,CAAA;IACtD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAA;QAC/B,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACtB,GAAG,CAAC,4BAA4B,UAAU,CAAC,KAAK,EAAE,CAAC,CAAA;QACnD,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,GAAG,CAAC,gCAAgC,CAAC,CAAA;IAErC,sBAAsB;IACtB,IAAI,WAAW,CAAA;IACf,IAAI,CAAC;QACH,WAAW,GAAG,MAAM,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,CAAC,CAAA;IAC3E,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,gBAAgB,GAAG,EAAE,CAAC,CAAA;QAC1B,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;QACzB,GAAG,CAAC,uBAAuB,WAAW,CAAC,KAAK,EAAE,CAAC,CAAA;QAC/C,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QAClD,OAAM;IACR,CAAC;IAED,GAAG,CAAC,+BAA+B,WAAW,CAAC,OAAO,KAAK,KAAK,CAAC,WAAW,KAAK,KAAK,CAAC,WAAW,GAAG,CAAC,CAAA;IAEtG,sDAAsD;IACtD,gBAAgB,GAAG,sBAAsB,CAAC,gBAAgB,EAAE,kBAAkB,EAAE,IAAI,CAAC,CAAA;IACrF,oBAAoB,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;AACpD,CAAC"}
@@ -0,0 +1,14 @@
1
+ export type PatternType = 'UnhandledSignal' | 'MissingWait' | 'CapabilityGapNoTool' | 'HighAttempt';
2
+ export interface DetectedPattern {
3
+ type: PatternType;
4
+ errorEvidence: string[];
5
+ occurrences: number;
6
+ priority: number;
7
+ }
8
+ export interface PatternReport {
9
+ hasPattern: boolean;
10
+ topPattern?: DetectedPattern;
11
+ allPatterns: DetectedPattern[];
12
+ tracesAnalyzed: number;
13
+ }
14
+ export declare function analyzePatterns(harnessDir: string, k?: number): PatternReport;
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Rule-based pattern analyzer. Reads last K session traces and identifies
3
+ * patterns the current harness doesn't handle. No LLM — pure regex matching.
4
+ */
5
+ import { readLastTraces } from './traces.js';
6
+ // Import FAILURE_SIGNALS from retry.ts to check if patterns are already handled
7
+ import { FAILURE_SIGNALS } from '../orchestrator/retry.js';
8
+ export function analyzePatterns(harnessDir, k = 20) {
9
+ const traces = readLastTraces(harnessDir, k);
10
+ if (traces.length === 0) {
11
+ return { hasPattern: false, allPatterns: [], tracesAnalyzed: 0 };
12
+ }
13
+ const patterns = [];
14
+ // 1. UnhandledSignal: error substring appears in 3+ sessions' recurring_errors
15
+ // but matches no existing FAILURE_SIGNALS regex
16
+ const errorCounts = new Map();
17
+ for (const trace of traces) {
18
+ // Use a set per session to avoid counting the same error twice in one session
19
+ const seenInSession = new Set();
20
+ for (const err of trace.recurring_errors) {
21
+ // Normalize: take first 100 chars as the "key"
22
+ const key = err.slice(0, 100).toLowerCase();
23
+ if (!seenInSession.has(key)) {
24
+ seenInSession.add(key);
25
+ errorCounts.set(key, (errorCounts.get(key) ?? 0) + 1);
26
+ }
27
+ }
28
+ }
29
+ const unhandledErrors = [];
30
+ for (const [errKey, count] of errorCounts.entries()) {
31
+ if (count >= 3) {
32
+ // Check if any existing FAILURE_SIGNALS regex matches this error snippet
33
+ const isHandled = FAILURE_SIGNALS.some(sig => sig.pattern.test(errKey));
34
+ if (!isHandled) {
35
+ unhandledErrors.push(errKey);
36
+ }
37
+ }
38
+ }
39
+ if (unhandledErrors.length > 0) {
40
+ patterns.push({
41
+ type: 'UnhandledSignal',
42
+ errorEvidence: unhandledErrors.slice(0, 5),
43
+ occurrences: unhandledErrors.length,
44
+ priority: 1,
45
+ });
46
+ }
47
+ // 2. MissingWait: RATE_LIMITED in failure_types but session still failed
48
+ const missingWaitSessions = traces.filter(t => t.failure_types.includes('RATE_LIMITED') && t.n_failed > 0);
49
+ if (missingWaitSessions.length >= 2) {
50
+ patterns.push({
51
+ type: 'MissingWait',
52
+ errorEvidence: missingWaitSessions
53
+ .flatMap(t => t.recurring_errors.filter(e => /rate.?limit|429|too many/i.test(e)))
54
+ .slice(0, 3),
55
+ occurrences: missingWaitSessions.length,
56
+ priority: 3,
57
+ });
58
+ }
59
+ // 3. CapabilityGapNoTool: CAPABILITY_GAP in failure_types AND tools_used is empty AND session failed
60
+ const capGapSessions = traces.filter(t => t.failure_types.includes('CAPABILITY_GAP') &&
61
+ t.tools_used.length === 0 &&
62
+ t.n_failed > 0);
63
+ if (capGapSessions.length >= 2) {
64
+ patterns.push({
65
+ type: 'CapabilityGapNoTool',
66
+ errorEvidence: capGapSessions
67
+ .flatMap(t => t.recurring_errors)
68
+ .slice(0, 3),
69
+ occurrences: capGapSessions.length,
70
+ priority: 2,
71
+ });
72
+ }
73
+ // 4. HighAttempt: average max attempts >= 3.5 across recent sessions
74
+ if (traces.length >= 3) {
75
+ const avgMaxAttempts = traces
76
+ .map(t => (t.attempts_per_agent.length > 0 ? Math.max(...t.attempts_per_agent) : 1))
77
+ .reduce((a, b) => a + b, 0) / traces.length;
78
+ if (avgMaxAttempts >= 3.5) {
79
+ patterns.push({
80
+ type: 'HighAttempt',
81
+ errorEvidence: [],
82
+ occurrences: traces.length,
83
+ priority: 4,
84
+ });
85
+ }
86
+ }
87
+ // Sort by priority (lower = higher)
88
+ patterns.sort((a, b) => a.priority - b.priority);
89
+ return {
90
+ hasPattern: patterns.length > 0,
91
+ topPattern: patterns[0],
92
+ allPatterns: patterns,
93
+ tracesAnalyzed: traces.length,
94
+ };
95
+ }
96
+ //# sourceMappingURL=patterns.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"patterns.js","sourceRoot":"","sources":["../../src/harness/patterns.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAE5C,gFAAgF;AAChF,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAA;AAsB1D,MAAM,UAAU,eAAe,CAAC,UAAkB,EAAE,CAAC,GAAG,EAAE;IACxD,MAAM,MAAM,GAAG,cAAc,CAAC,UAAU,EAAE,CAAC,CAAC,CAAA;IAE5C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,WAAW,EAAE,EAAE,EAAE,cAAc,EAAE,CAAC,EAAE,CAAA;IAClE,CAAC;IAED,MAAM,QAAQ,GAAsB,EAAE,CAAA;IAEtC,+EAA+E;IAC/E,mDAAmD;IACnD,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC7C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,8EAA8E;QAC9E,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAA;QACvC,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC;YACzC,+CAA+C;YAC/C,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;YAC3C,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;gBACtB,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;YACvD,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,eAAe,GAAa,EAAE,CAAA;IACpC,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;QACpD,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;YACf,yEAAyE;YACzE,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAA;YACvE,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,iBAAiB;YACvB,aAAa,EAAE,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAC1C,WAAW,EAAE,eAAe,CAAC,MAAM;YACnC,QAAQ,EAAE,CAAC;SACZ,CAAC,CAAA;IACJ,CAAC;IAED,yEAAyE;IACzE,MAAM,mBAAmB,GAAG,MAAM,CAAC,MAAM,CACvC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,QAAQ,GAAG,CAAC,CAChE,CAAA;IACD,IAAI,mBAAmB,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,aAAa;YACnB,aAAa,EAAE,mBAAmB;iBAC/B,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,2BAA2B,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;iBACjF,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YACd,WAAW,EAAE,mBAAmB,CAAC,MAAM;YACvC,QAAQ,EAAE,CAAC;SACZ,CAAC,CAAA;IACJ,CAAC;IAED,qGAAqG;IACrG,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAClC,CAAC,CAAC,EAAE,CACF,CAAC,CAAC,aAAa,CAAC,QAAQ,CAAC,gBAAgB,CAAC;QAC1C,CAAC,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC;QACzB,CAAC,CAAC,QAAQ,GAAG,CAAC,CACjB,CAAA;IACD,IAAI,cAAc,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,qBAAqB;YAC3B,aAAa,EAAE,cAAc;iBAC1B,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC;iBAChC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YACd,WAAW,EAAE,cAAc,CAAC,MAAM;YAClC,QAAQ,EAAE,CAAC;SACZ,CAAC,CAAA;IACJ,CAAC;IAED,qEAAqE;IACrE,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,MAAM,cAAc,GAClB,MAAM;aACH,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aACnF,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;QAE/C,IAAI,cAAc,IAAI,GAAG,EAAE,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,aAAa;gBACnB,aAAa,EAAE,EAAE;gBACjB,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,QAAQ,EAAE,CAAC;aACZ,CAAC,CAAA;QACJ,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAA;IAEhD,OAAO;QACL,UAAU,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC;QAC/B,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;QACvB,WAAW,EAAE,QAAQ;QACrB,cAAc,EAAE,MAAM,CAAC,MAAM;KAC9B,CAAA;AACH,CAAC"}
@@ -0,0 +1,26 @@
1
+ import type { UsertesterConfig } from '../types.js';
2
+ import type { DetectedPattern } from './patterns.js';
3
+ export interface CodePatch {
4
+ file: 'src/orchestrator/retry.ts' | 'src/browser/agent.ts';
5
+ oldCode: string;
6
+ newCode: string;
7
+ description: string;
8
+ patternType: string;
9
+ }
10
+ export interface ConvergenceState {
11
+ patchesApplied: number;
12
+ lastPatchAt: string | null;
13
+ sessionsSinceLastPatch: number;
14
+ successRateHistory: number[];
15
+ converged: boolean;
16
+ convergenceReason?: string;
17
+ }
18
+ export declare function loadConvergenceState(harnessDir: string): ConvergenceState;
19
+ export declare function saveConvergenceState(harnessDir: string, state: ConvergenceState): void;
20
+ export declare function updateConvergenceState(state: ConvergenceState, sessionSuccessRate: number, patchApplied: boolean): ConvergenceState;
21
+ export declare function runProposer(opts: {
22
+ pattern: DetectedPattern;
23
+ convergenceState: ConvergenceState;
24
+ config: Partial<UsertesterConfig>;
25
+ projectRoot: string;
26
+ }): Promise<CodePatch | null>;
@@ -0,0 +1,181 @@
1
+ /**
2
+ * LLM code proposer for the outer loop meta-harness.
3
+ * Uses the proposer_model (defaults to anthropic/claude-opus-4-6) to generate
4
+ * code patches that address detected failure patterns.
5
+ */
6
+ import fs from 'node:fs';
7
+ import path from 'node:path';
8
+ import { generateText } from 'ai';
9
+ import { resolveModel } from '../llm/provider.js';
10
+ export function loadConvergenceState(harnessDir) {
11
+ const statePath = path.join(harnessDir, 'harness_state.json');
12
+ try {
13
+ const content = fs.readFileSync(statePath, 'utf-8');
14
+ return JSON.parse(content);
15
+ }
16
+ catch {
17
+ return {
18
+ patchesApplied: 0,
19
+ lastPatchAt: null,
20
+ sessionsSinceLastPatch: 0,
21
+ successRateHistory: [],
22
+ converged: false,
23
+ };
24
+ }
25
+ }
26
+ export function saveConvergenceState(harnessDir, state) {
27
+ fs.mkdirSync(harnessDir, { recursive: true });
28
+ const statePath = path.join(harnessDir, 'harness_state.json');
29
+ fs.writeFileSync(statePath, JSON.stringify(state, null, 2));
30
+ }
31
+ export function updateConvergenceState(state, sessionSuccessRate, patchApplied) {
32
+ const newHistory = [...state.successRateHistory, sessionSuccessRate].slice(-5);
33
+ const newSessionsSince = patchApplied ? 0 : state.sessionsSinceLastPatch + 1;
34
+ // Check convergence criteria
35
+ let converged = state.converged;
36
+ let convergenceReason = state.convergenceReason;
37
+ if (state.patchesApplied >= 20) {
38
+ converged = true;
39
+ convergenceReason = 'Max patches applied (20)';
40
+ }
41
+ else if (newSessionsSince >= 5 && newHistory.length >= 5) {
42
+ const improvement = Math.max(...newHistory) - Math.min(...newHistory);
43
+ if (improvement < 0.005) {
44
+ converged = true;
45
+ convergenceReason = 'Success rate stable for 5 sessions (improvement < 0.5%)';
46
+ }
47
+ }
48
+ return {
49
+ patchesApplied: patchApplied ? state.patchesApplied + 1 : state.patchesApplied,
50
+ lastPatchAt: patchApplied ? new Date().toISOString() : state.lastPatchAt,
51
+ sessionsSinceLastPatch: newSessionsSince,
52
+ successRateHistory: newHistory,
53
+ converged,
54
+ convergenceReason,
55
+ };
56
+ }
57
+ function buildProposerPrompt(pattern, targetFile, fileContents) {
58
+ const taskDescription = getTaskDescription(pattern, targetFile);
59
+ return `You are an expert TypeScript engineer improving an AI browser automation harness.
60
+
61
+ DETECTED PATTERN: ${pattern.type}
62
+ Error evidence (${pattern.occurrences} sessions):
63
+ ${pattern.errorEvidence.map(e => ` - "${e}"`).join('\n')}
64
+
65
+ TARGET FILE: ${targetFile}
66
+ CURRENT FILE CONTENTS:
67
+ \`\`\`typescript
68
+ ${fileContents}
69
+ \`\`\`
70
+
71
+ TASK:
72
+ ${taskDescription}
73
+
74
+ HARD CONSTRAINTS:
75
+ - Never remove or modify existing FAILURE_SIGNALS entries
76
+ - The oldCode field must be VERBATIM text that appears EXACTLY ONCE in the file
77
+ - Change fewer than 50 lines total
78
+ - The newCode must be valid TypeScript
79
+ - Do not change imports unless strictly necessary
80
+
81
+ Respond with a single JSON object (no markdown fences, no extra text):
82
+ {
83
+ "file": "${targetFile}",
84
+ "oldCode": "<verbatim substring from the file to replace>",
85
+ "newCode": "<replacement code>",
86
+ "description": "<one sentence describing the change>",
87
+ "patternType": "${pattern.type}"
88
+ }`;
89
+ }
90
+ function getTaskDescription(pattern, targetFile) {
91
+ switch (pattern.type) {
92
+ case 'UnhandledSignal':
93
+ return `Add a new entry to the FAILURE_SIGNALS array in ${targetFile} that matches the unhandled error patterns. The new entry should have an appropriate pattern regex, FailureType, and recovery hint.`;
94
+ case 'CapabilityGapNoTool':
95
+ return `Update selectToolsForRecovery() in ${targetFile} to inject the appropriate tool(s) for the capability gap being detected. Look at the error evidence to determine which tool is missing.`;
96
+ case 'MissingWait':
97
+ return `Improve the RATE_LIMITED handling in ${targetFile} to better extract and apply wait times from rate limit responses. Ensure the wait logic covers the error patterns shown.`;
98
+ case 'HighAttempt':
99
+ return `Review the retry strategy in ${targetFile} and add a more intelligent backoff or early-exit condition to reduce unnecessary retries when the agent is clearly stuck.`;
100
+ default:
101
+ return `Improve error handling in ${targetFile} to address the detected pattern: ${pattern.type}.`;
102
+ }
103
+ }
104
+ function selectTargetFile(pattern) {
105
+ switch (pattern.type) {
106
+ case 'UnhandledSignal':
107
+ case 'MissingWait':
108
+ case 'CapabilityGapNoTool':
109
+ return 'src/orchestrator/retry.ts';
110
+ case 'HighAttempt':
111
+ return 'src/browser/agent.ts';
112
+ default:
113
+ return 'src/orchestrator/retry.ts';
114
+ }
115
+ }
116
+ export async function runProposer(opts) {
117
+ const { pattern, convergenceState, config, projectRoot } = opts;
118
+ // Check convergence — don't propose if converged
119
+ if (convergenceState.converged) {
120
+ return null;
121
+ }
122
+ const targetFile = selectTargetFile(pattern);
123
+ const absoluteFilePath = path.join(projectRoot, targetFile);
124
+ let fileContents;
125
+ try {
126
+ fileContents = fs.readFileSync(absoluteFilePath, 'utf-8');
127
+ }
128
+ catch (err) {
129
+ throw new Error(`Cannot read ${absoluteFilePath}: ${err}`);
130
+ }
131
+ const prompt = buildProposerPrompt(pattern, targetFile, fileContents);
132
+ const modelString = config.proposer_model ?? 'anthropic/claude-opus-4-6';
133
+ const model = resolveModel(modelString, config);
134
+ let text;
135
+ try {
136
+ const result = await generateText({
137
+ model,
138
+ messages: [{ role: 'user', content: prompt }],
139
+ maxOutputTokens: 2000,
140
+ });
141
+ text = result.text;
142
+ }
143
+ catch (err) {
144
+ throw new Error(`Proposer LLM call failed: ${err}`);
145
+ }
146
+ // Parse JSON from response — same pattern as classifyFailure
147
+ const match = text.match(/\{[\s\S]*\}/);
148
+ if (!match) {
149
+ throw new Error(`Proposer returned no JSON. Response: ${text.slice(0, 200)}`);
150
+ }
151
+ let parsed;
152
+ try {
153
+ parsed = JSON.parse(match[0]);
154
+ }
155
+ catch (err) {
156
+ throw new Error(`Proposer JSON parse failed: ${err}. Raw: ${match[0].slice(0, 200)}`);
157
+ }
158
+ if (!parsed.file || !parsed.oldCode || !parsed.newCode || !parsed.description) {
159
+ throw new Error(`Proposer returned incomplete patch: ${JSON.stringify(parsed)}`);
160
+ }
161
+ // Validate file field
162
+ if (parsed.file !== 'src/orchestrator/retry.ts' &&
163
+ parsed.file !== 'src/browser/agent.ts') {
164
+ throw new Error(`Proposer returned invalid file: ${parsed.file}`);
165
+ }
166
+ const patch = {
167
+ file: parsed.file,
168
+ oldCode: parsed.oldCode,
169
+ newCode: parsed.newCode,
170
+ description: parsed.description,
171
+ patternType: parsed.patternType ?? pattern.type,
172
+ };
173
+ // Verify oldCode appears exactly once in the file
174
+ const occurrences = fileContents.split(patch.oldCode).length - 1;
175
+ if (occurrences !== 1) {
176
+ throw new Error(`Proposed oldCode appears ${occurrences} times in ${patch.file} (expected exactly 1). ` +
177
+ `oldCode: "${patch.oldCode.slice(0, 100)}"`);
178
+ }
179
+ return patch;
180
+ }
181
+ //# sourceMappingURL=proposer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"proposer.js","sourceRoot":"","sources":["../../src/harness/proposer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,EAAE,MAAM,SAAS,CAAA;AACxB,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAA;AACjC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAqBjD,MAAM,UAAU,oBAAoB,CAAC,UAAkB;IACrD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAA;IAC7D,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACnD,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAqB,CAAA;IAChD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,cAAc,EAAE,CAAC;YACjB,WAAW,EAAE,IAAI;YACjB,sBAAsB,EAAE,CAAC;YACzB,kBAAkB,EAAE,EAAE;YACtB,SAAS,EAAE,KAAK;SACjB,CAAA;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,UAAkB,EAAE,KAAuB;IAC9E,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAA;IAC7D,EAAE,CAAC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAA;AAC7D,CAAC;AAED,MAAM,UAAU,sBAAsB,CACpC,KAAuB,EACvB,kBAA0B,EAC1B,YAAqB;IAErB,MAAM,UAAU,GAAG,CAAC,GAAG,KAAK,CAAC,kBAAkB,EAAE,kBAAkB,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;IAC9E,MAAM,gBAAgB,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,sBAAsB,GAAG,CAAC,CAAA;IAE5E,6BAA6B;IAC7B,IAAI,SAAS,GAAG,KAAK,CAAC,SAAS,CAAA;IAC/B,IAAI,iBAAiB,GAAG,KAAK,CAAC,iBAAiB,CAAA;IAE/C,IAAI,KAAK,CAAC,cAAc,IAAI,EAAE,EAAE,CAAC;QAC/B,SAAS,GAAG,IAAI,CAAA;QAChB,iBAAiB,GAAG,0BAA0B,CAAA;IAChD,CAAC;SAAM,IAAI,gBAAgB,IAAI,CAAC,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC3D,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,CAAA;QACrE,IAAI,WAAW,GAAG,KAAK,EAAE,CAAC;YACxB,SAAS,GAAG,IAAI,CAAA;YAChB,iBAAiB,GAAG,yDAAyD,CAAA;QAC/E,CAAC;IACH,CAAC;IAED,OAAO;QACL,cAAc,EAAE,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc;QAC9E,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW;QACxE,sBAAsB,EAAE,gBAAgB;QACxC,kBAAkB,EAAE,UAAU;QAC9B,SAAS;QACT,iBAAiB;KAClB,CAAA;AACH,CAAC;AAED,SAAS,mBAAmB,CAC1B,OAAwB,EACxB,UAAgE,EAChE,YAAoB;IAEpB,MAAM,eAAe,GAAG,kBAAkB,CAAC,OAAO,EAAE,UAAU,CAAC,CAAA;IAE/D,OAAO;;oBAEW,OAAO,CAAC,IAAI;kBACd,OAAO,CAAC,WAAW;EACnC,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;;eAE1C,UAAU;;;EAGvB,YAAY;;;;EAIZ,eAAe;;;;;;;;;;;aAWJ,UAAU;;;;oBAIH,OAAO,CAAC,IAAI;EAC9B,CAAA;AACF,CAAC;AAED,SAAS,kBAAkB,CACzB,OAAwB,EACxB,UAAgE;IAEhE,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC;QACrB,KAAK,iBAAiB;YACpB,OAAO,mDAAmD,UAAU,qIAAqI,CAAA;QAE3M,KAAK,qBAAqB;YACxB,OAAO,sCAAsC,UAAU,0IAA0I,CAAA;QAEnM,KAAK,aAAa;YAChB,OAAO,wCAAwC,UAAU,2HAA2H,CAAA;QAEtL,KAAK,aAAa;YAChB,OAAO,gCAAgC,UAAU,4HAA4H,CAAA;QAE/K;YACE,OAAO,6BAA6B,UAAU,qCAAqC,OAAO,CAAC,IAAI,GAAG,CAAA;IACtG,CAAC;AACH,CAAC;AAED,SAAS,gBAAgB,CACvB,OAAwB;IAExB,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC;QACrB,KAAK,iBAAiB,CAAC;QACvB,KAAK,aAAa,CAAC;QACnB,KAAK,qBAAqB;YACxB,OAAO,2BAA2B,CAAA;QACpC,KAAK,aAAa;YAChB,OAAO,sBAAsB,CAAA;QAC/B;YACE,OAAO,2BAA2B,CAAA;IACtC,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAKjC;IACC,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,IAAI,CAAA;IAE/D,iDAAiD;IACjD,IAAI,gBAAgB,CAAC,SAAS,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAA;IACb,CAAC;IAED,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAA;IAC5C,MAAM,gBAAgB,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,UAAU,CAAC,CAAA;IAE3D,IAAI,YAAoB,CAAA;IACxB,IAAI,CAAC;QACH,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,gBAAgB,EAAE,OAAO,CAAC,CAAA;IAC3D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,eAAe,gBAAgB,KAAK,GAAG,EAAE,CAAC,CAAA;IAC5D,CAAC;IAED,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,EAAE,UAAU,EAAE,YAAY,CAAC,CAAA;IAErE,MAAM,WAAW,GAAG,MAAM,CAAC,cAAc,IAAI,2BAA2B,CAAA;IACxE,MAAM,KAAK,GAAG,YAAY,CAAC,WAAW,EAAE,MAAM,CAAC,CAAA;IAE/C,IAAI,IAAY,CAAA;IAChB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC;YAChC,KAAK;YACL,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,eAAe,EAAE,IAAI;SACtB,CAAC,CAAA;QACF,IAAI,GAAG,MAAM,CAAC,IAAI,CAAA;IACpB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,EAAE,CAAC,CAAA;IACrD,CAAC;IAED,6DAA6D;IAC7D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAA;IACvC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,wCAAwC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;IAC/E,CAAC;IAED,IAAI,MAA0B,CAAA;IAC9B,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAuB,CAAA;IACrD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,+BAA+B,GAAG,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;IACvF,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAC9E,MAAM,IAAI,KAAK,CAAC,uCAAuC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;IAClF,CAAC;IAED,sBAAsB;IACtB,IACE,MAAM,CAAC,IAAI,KAAK,2BAA2B;QAC3C,MAAM,CAAC,IAAI,KAAK,sBAAsB,EACtC,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,mCAAmC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAA;IACnE,CAAC;IAED,MAAM,KAAK,GAAc;QACvB,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,OAAO,CAAC,IAAI;KAChD,CAAA;IAED,kDAAkD;IAClD,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAA;IAChE,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACb,4BAA4B,WAAW,aAAa,KAAK,CAAC,IAAI,yBAAyB;YACrF,aAAa,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAC9C,CAAA;IACH,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC"}
@@ -0,0 +1,29 @@
1
+ import type { FailureType } from '../orchestrator/retry.js';
2
+ import type { RetryAttempt } from '../orchestrator/retry.js';
3
+ export interface SessionTrace {
4
+ session_id: string;
5
+ url: string;
6
+ ts: string;
7
+ n_agents: number;
8
+ n_succeeded: number;
9
+ n_failed: number;
10
+ failure_types: FailureType[];
11
+ recurring_errors: string[];
12
+ tools_used: string[];
13
+ attempts_per_agent: number[];
14
+ profile_hit: boolean;
15
+ }
16
+ export declare function writeTrace(harnessDir: string, trace: SessionTrace): void;
17
+ export declare function readLastTraces(harnessDir: string, k?: number): SessionTrace[];
18
+ /**
19
+ * Build a SessionTrace from per-agent retry histories.
20
+ */
21
+ export declare function buildTrace(opts: {
22
+ sessionId: string;
23
+ url: string;
24
+ agentRetryHistories: RetryAttempt[][];
25
+ agentToolsUsed: string[][];
26
+ agentProfileHits: boolean[];
27
+ agentSucceeded: boolean[];
28
+ nAgents: number;
29
+ }): SessionTrace;
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Session trace writer — appends one structured line per session to
3
+ * ~/.usertester/harness/traces.ndjson for the outer loop pattern analyzer.
4
+ */
5
+ import fs from 'node:fs';
6
+ import path from 'node:path';
7
+ export function writeTrace(harnessDir, trace) {
8
+ fs.mkdirSync(harnessDir, { recursive: true });
9
+ const tracePath = path.join(harnessDir, 'traces.ndjson');
10
+ fs.appendFileSync(tracePath, JSON.stringify(trace) + '\n');
11
+ }
12
+ export function readLastTraces(harnessDir, k = 20) {
13
+ const tracePath = path.join(harnessDir, 'traces.ndjson');
14
+ try {
15
+ const content = fs.readFileSync(tracePath, 'utf-8');
16
+ return content
17
+ .split('\n')
18
+ .filter(Boolean)
19
+ .slice(-k)
20
+ .map(line => JSON.parse(line));
21
+ }
22
+ catch {
23
+ return [];
24
+ }
25
+ }
26
+ /**
27
+ * Build a SessionTrace from per-agent retry histories.
28
+ */
29
+ export function buildTrace(opts) {
30
+ const { sessionId, url, agentRetryHistories, agentToolsUsed, agentProfileHits, agentSucceeded, nAgents } = opts;
31
+ const n_succeeded = agentSucceeded.filter(Boolean).length;
32
+ const n_failed = nAgents - n_succeeded;
33
+ // Collect all failure types across all agents
34
+ const failure_types = [
35
+ ...new Set(agentRetryHistories
36
+ .flat()
37
+ .filter(a => a.result === 'failed' && a.failureType)
38
+ .map(a => a.failureType)),
39
+ ];
40
+ // Collect unique error message slices from failed retries
41
+ const errorSlices = agentRetryHistories
42
+ .flat()
43
+ .filter(a => a.result === 'failed')
44
+ .map(a => a.agentMessage.slice(0, 200));
45
+ const recurring_errors = [...new Set(errorSlices)];
46
+ // Collect all tools used
47
+ const tools_used = [...new Set(agentToolsUsed.flat())];
48
+ // Attempts per agent = number of retry entries per agent + 1 (first attempt)
49
+ const attempts_per_agent = agentRetryHistories.map(h => h.length + 1);
50
+ const profile_hit = agentProfileHits.some(Boolean);
51
+ return {
52
+ session_id: sessionId,
53
+ url,
54
+ ts: new Date().toISOString(),
55
+ n_agents: nAgents,
56
+ n_succeeded,
57
+ n_failed,
58
+ failure_types,
59
+ recurring_errors,
60
+ tools_used,
61
+ attempts_per_agent,
62
+ profile_hit,
63
+ };
64
+ }
65
+ //# sourceMappingURL=traces.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"traces.js","sourceRoot":"","sources":["../../src/harness/traces.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAE,MAAM,SAAS,CAAA;AACxB,OAAO,IAAI,MAAM,WAAW,CAAA;AAkB5B,MAAM,UAAU,UAAU,CAAC,UAAkB,EAAE,KAAmB;IAChE,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,eAAe,CAAC,CAAA;IACxD,EAAE,CAAC,cAAc,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAA;AAC5D,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,UAAkB,EAAE,CAAC,GAAG,EAAE;IACvD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,eAAe,CAAC,CAAA;IACxD,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACnD,OAAO,OAAO;aACX,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,OAAO,CAAC;aACf,KAAK,CAAC,CAAC,CAAC,CAAC;aACT,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAiB,CAAC,CAAA;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAQ1B;IACC,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,mBAAmB,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,OAAO,EAAE,GAAG,IAAI,CAAA;IAE/G,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAA;IACzD,MAAM,QAAQ,GAAG,OAAO,GAAG,WAAW,CAAA;IAEtC,8CAA8C;IAC9C,MAAM,aAAa,GAAkB;QACnC,GAAG,IAAI,GAAG,CACR,mBAAmB;aAChB,IAAI,EAAE;aACN,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,IAAI,CAAC,CAAC,WAAW,CAAC;aACnD,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAA0B,CAAC,CAC1C;KACF,CAAA;IAED,0DAA0D;IAC1D,MAAM,WAAW,GAAG,mBAAmB;SACpC,IAAI,EAAE;SACN,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAAC;SAClC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAA;IAEzC,MAAM,gBAAgB,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC,CAAA;IAElD,yBAAyB;IACzB,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;IAEtD,6EAA6E;IAC7E,MAAM,kBAAkB,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAErE,MAAM,WAAW,GAAG,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAElD,OAAO;QACL,UAAU,EAAE,SAAS;QACrB,GAAG;QACH,EAAE,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QAC5B,QAAQ,EAAE,OAAO;QACjB,WAAW;QACX,QAAQ;QACR,aAAa;QACb,gBAAgB;QAChB,UAAU;QACV,kBAAkB;QAClB,WAAW;KACZ,CAAA;AACH,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type { CodePatch } from './proposer.js';
2
+ export interface ValidationResult {
3
+ valid: boolean;
4
+ error?: string;
5
+ }
6
+ export declare function validatePatch(patch: CodePatch, projectRoot: string): Promise<ValidationResult>;