coterie 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +137 -0
- package/dist/adapters/base.d.ts +56 -0
- package/dist/adapters/base.d.ts.map +1 -0
- package/dist/adapters/base.js +176 -0
- package/dist/adapters/base.js.map +1 -0
- package/dist/adapters/claudeCode.d.ts +11 -0
- package/dist/adapters/claudeCode.d.ts.map +1 -0
- package/dist/adapters/claudeCode.js +65 -0
- package/dist/adapters/claudeCode.js.map +1 -0
- package/dist/adapters/codex.d.ts +10 -0
- package/dist/adapters/codex.d.ts.map +1 -0
- package/dist/adapters/codex.js +51 -0
- package/dist/adapters/codex.js.map +1 -0
- package/dist/adapters/cursor.d.ts +16 -0
- package/dist/adapters/cursor.d.ts.map +1 -0
- package/dist/adapters/cursor.js +47 -0
- package/dist/adapters/cursor.js.map +1 -0
- package/dist/adapters/fake.d.ts +19 -0
- package/dist/adapters/fake.d.ts.map +1 -0
- package/dist/adapters/fake.js +41 -0
- package/dist/adapters/fake.js.map +1 -0
- package/dist/adapters/index.d.ts +11 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/index.js +11 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/stream.d.ts +6 -0
- package/dist/adapters/stream.d.ts.map +1 -0
- package/dist/adapters/stream.js +20 -0
- package/dist/adapters/stream.js.map +1 -0
- package/dist/chat/configs.d.ts +18 -0
- package/dist/chat/configs.d.ts.map +1 -0
- package/dist/chat/configs.js +71 -0
- package/dist/chat/configs.js.map +1 -0
- package/dist/chat/doctor.d.ts +16 -0
- package/dist/chat/doctor.d.ts.map +1 -0
- package/dist/chat/doctor.js +43 -0
- package/dist/chat/doctor.js.map +1 -0
- package/dist/chat/finalizer.d.ts +25 -0
- package/dist/chat/finalizer.d.ts.map +1 -0
- package/dist/chat/finalizer.js +52 -0
- package/dist/chat/finalizer.js.map +1 -0
- package/dist/chat/preflight.d.ts +37 -0
- package/dist/chat/preflight.d.ts.map +1 -0
- package/dist/chat/preflight.js +115 -0
- package/dist/chat/preflight.js.map +1 -0
- package/dist/chat/render.d.ts +21 -0
- package/dist/chat/render.d.ts.map +1 -0
- package/dist/chat/render.js +113 -0
- package/dist/chat/render.js.map +1 -0
- package/dist/chat/repl.d.ts +9 -0
- package/dist/chat/repl.d.ts.map +1 -0
- package/dist/chat/repl.js +275 -0
- package/dist/chat/repl.js.map +1 -0
- package/dist/chat/trace.d.ts +29 -0
- package/dist/chat/trace.d.ts.map +1 -0
- package/dist/chat/trace.js +132 -0
- package/dist/chat/trace.js.map +1 -0
- package/dist/chat/transcript.d.ts +15 -0
- package/dist/chat/transcript.d.ts.map +1 -0
- package/dist/chat/transcript.js +39 -0
- package/dist/chat/transcript.js.map +1 -0
- package/dist/cli.d.ts +5 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +134 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +52 -0
- package/dist/config.js.map +1 -0
- package/dist/core/annotation.d.ts +22 -0
- package/dist/core/annotation.d.ts.map +1 -0
- package/dist/core/annotation.js +45 -0
- package/dist/core/annotation.js.map +1 -0
- package/dist/core/compile.d.ts +3 -0
- package/dist/core/compile.d.ts.map +1 -0
- package/dist/core/compile.js +9 -0
- package/dist/core/compile.js.map +1 -0
- package/dist/core/executor.d.ts +32 -0
- package/dist/core/executor.d.ts.map +1 -0
- package/dist/core/executor.js +73 -0
- package/dist/core/executor.js.map +1 -0
- package/dist/core/json.d.ts +9 -0
- package/dist/core/json.d.ts.map +1 -0
- package/dist/core/json.js +49 -0
- package/dist/core/json.js.map +1 -0
- package/dist/core/llm/base.d.ts +9 -0
- package/dist/core/llm/base.d.ts.map +1 -0
- package/dist/core/llm/base.js +3 -0
- package/dist/core/llm/base.js.map +1 -0
- package/dist/core/llm/build.d.ts +16 -0
- package/dist/core/llm/build.d.ts.map +1 -0
- package/dist/core/llm/build.js +34 -0
- package/dist/core/llm/build.js.map +1 -0
- package/dist/core/llm/claudeCli.d.ts +20 -0
- package/dist/core/llm/claudeCli.d.ts.map +1 -0
- package/dist/core/llm/claudeCli.js +57 -0
- package/dist/core/llm/claudeCli.js.map +1 -0
- package/dist/core/llm/codexCli.d.ts +13 -0
- package/dist/core/llm/codexCli.d.ts.map +1 -0
- package/dist/core/llm/codexCli.js +30 -0
- package/dist/core/llm/codexCli.js.map +1 -0
- package/dist/core/llm/cursorCli.d.ts +13 -0
- package/dist/core/llm/cursorCli.d.ts.map +1 -0
- package/dist/core/llm/cursorCli.js +27 -0
- package/dist/core/llm/cursorCli.js.map +1 -0
- package/dist/core/llm/index.d.ts +5 -0
- package/dist/core/llm/index.d.ts.map +1 -0
- package/dist/core/llm/index.js +4 -0
- package/dist/core/llm/index.js.map +1 -0
- package/dist/core/llm/scripted.d.ts +16 -0
- package/dist/core/llm/scripted.d.ts.map +1 -0
- package/dist/core/llm/scripted.js +29 -0
- package/dist/core/llm/scripted.js.map +1 -0
- package/dist/core/progress.d.ts +29 -0
- package/dist/core/progress.d.ts.map +1 -0
- package/dist/core/progress.js +22 -0
- package/dist/core/progress.js.map +1 -0
- package/dist/core/registry.d.ts +29 -0
- package/dist/core/registry.d.ts.map +1 -0
- package/dist/core/registry.js +63 -0
- package/dist/core/registry.js.map +1 -0
- package/dist/core/spawn.d.ts +19 -0
- package/dist/core/spawn.d.ts.map +1 -0
- package/dist/core/spawn.js +58 -0
- package/dist/core/spawn.js.map +1 -0
- package/dist/core/state.d.ts +58 -0
- package/dist/core/state.d.ts.map +1 -0
- package/dist/core/state.js +3 -0
- package/dist/core/state.js.map +1 -0
- package/dist/core/timeout.d.ts +13 -0
- package/dist/core/timeout.d.ts.map +1 -0
- package/dist/core/timeout.js +33 -0
- package/dist/core/timeout.js.map +1 -0
- package/dist/core/types.d.ts +15 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +3 -0
- package/dist/core/types.js.map +1 -0
- package/dist/core/validate.d.ts +7 -0
- package/dist/core/validate.d.ts.map +1 -0
- package/dist/core/validate.js +47 -0
- package/dist/core/validate.js.map +1 -0
- package/dist/graph.d.ts +4 -0
- package/dist/graph.d.ts.map +1 -0
- package/dist/graph.js +14 -0
- package/dist/graph.js.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/modes/adversarial.d.ts +3 -0
- package/dist/modes/adversarial.d.ts.map +1 -0
- package/dist/modes/adversarial.js +42 -0
- package/dist/modes/adversarial.js.map +1 -0
- package/dist/modes/consensus.d.ts +3 -0
- package/dist/modes/consensus.d.ts.map +1 -0
- package/dist/modes/consensus.js +47 -0
- package/dist/modes/consensus.js.map +1 -0
- package/dist/modes/debate.d.ts +3 -0
- package/dist/modes/debate.d.ts.map +1 -0
- package/dist/modes/debate.js +76 -0
- package/dist/modes/debate.js.map +1 -0
- package/dist/modes/index.d.ts +7 -0
- package/dist/modes/index.d.ts.map +1 -0
- package/dist/modes/index.js +7 -0
- package/dist/modes/index.js.map +1 -0
- package/dist/modes/single.d.ts +3 -0
- package/dist/modes/single.d.ts.map +1 -0
- package/dist/modes/single.js +42 -0
- package/dist/modes/single.js.map +1 -0
- package/dist/modes/tournament.d.ts +3 -0
- package/dist/modes/tournament.d.ts.map +1 -0
- package/dist/modes/tournament.js +48 -0
- package/dist/modes/tournament.js.map +1 -0
- package/dist/nodes/agentRunner.d.ts +15 -0
- package/dist/nodes/agentRunner.d.ts.map +1 -0
- package/dist/nodes/agentRunner.js +106 -0
- package/dist/nodes/agentRunner.js.map +1 -0
- package/dist/nodes/auditor.d.ts +38 -0
- package/dist/nodes/auditor.d.ts.map +1 -0
- package/dist/nodes/auditor.js +179 -0
- package/dist/nodes/auditor.js.map +1 -0
- package/dist/nodes/bracket.d.ts +19 -0
- package/dist/nodes/bracket.d.ts.map +1 -0
- package/dist/nodes/bracket.js +90 -0
- package/dist/nodes/bracket.js.map +1 -0
- package/dist/nodes/consensusEngine.d.ts +18 -0
- package/dist/nodes/consensusEngine.d.ts.map +1 -0
- package/dist/nodes/consensusEngine.js +94 -0
- package/dist/nodes/consensusEngine.js.map +1 -0
- package/dist/nodes/moderator.d.ts +20 -0
- package/dist/nodes/moderator.d.ts.map +1 -0
- package/dist/nodes/moderator.js +83 -0
- package/dist/nodes/moderator.js.map +1 -0
- package/dist/nodes/planner.d.ts +17 -0
- package/dist/nodes/planner.d.ts.map +1 -0
- package/dist/nodes/planner.js +40 -0
- package/dist/nodes/planner.js.map +1 -0
- package/dist/nodes/supervisor.d.ts +28 -0
- package/dist/nodes/supervisor.d.ts.map +1 -0
- package/dist/nodes/supervisor.js +75 -0
- package/dist/nodes/supervisor.js.map +1 -0
- package/package.json +66 -0
- package/schemas/coterie.config.schema.json +204 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agentRunner.d.ts","sourceRoot":"","sources":["../../src/nodes/agentRunner.ts"],"names":[],"mappings":"AAAA,uFAAuF;AAGvF,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAG3D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AA2CrD,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,eAAe,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,CAAC,KAAK,EAAE,YAAY,KAAK,MAAM,CAAC;IAC9C,SAAS,CAAC,EAAE,CAAC,KAAK,EAAE,YAAY,KAAK,MAAM,CAAC;CAC7C;AAED,wBAAgB,eAAe,CAAC,IAAI,EAAE,eAAe,IAIrC,OAAO,YAAY,EAAE,SAAS;IAAE,MAAM,CAAC,EAAE,WAAW,CAAA;CAAE,kBA8DrE"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/** Generic CLI-invoking node + budget gate. Mirrors Python `nodes/agent_runner.py`. */
|
|
2
|
+
import { progress } from "../core/progress.js";
|
|
3
|
+
import { ADAPTER_REGISTRY } from "../core/registry.js";
|
|
4
|
+
function instantiate(agentCfg) {
|
|
5
|
+
const ctor = ADAPTER_REGISTRY.require(agentCfg.adapter);
|
|
6
|
+
return new ctor(agentCfg.id, { model: agentCfg.model });
|
|
7
|
+
}
|
|
8
|
+
// NOTE on budget scope (audit #9/#10): per-task USD budgets only bind *metered*
|
|
9
|
+
// usage. On the default subscription setup, codex/cursor report null cost and
|
|
10
|
+
// claude reports an estimate, so `spend_usd` is advisory, not a real bill. The
|
|
11
|
+
// check is also pre-run, so a parallel fan-out round (consensus/tournament) can
|
|
12
|
+
// complete before a cap applies. This is acceptable while the product is
|
|
13
|
+
// $0-metered; a true budget gate belongs with the (roadmap) pay-as-you-go path.
|
|
14
|
+
function checkBudget(state) {
|
|
15
|
+
const budget = (state.config?.budget ?? {});
|
|
16
|
+
const spent = state.spend_usd ?? 0;
|
|
17
|
+
const modeState = { ...(state.mode_state ?? {}) };
|
|
18
|
+
let changed = false;
|
|
19
|
+
const warnAt = budget.warn_at_usd;
|
|
20
|
+
if (warnAt !== undefined && spent >= warnAt && !modeState.budget_warned) {
|
|
21
|
+
console.warn(`coterie budget warning: spent ~$${spent.toFixed(4)} (warn_at=$${warnAt})`);
|
|
22
|
+
modeState.budget_warned = true;
|
|
23
|
+
changed = true;
|
|
24
|
+
}
|
|
25
|
+
const maxUsd = budget.max_usd_per_task;
|
|
26
|
+
if (maxUsd === undefined || spent < maxUsd) {
|
|
27
|
+
return changed ? { mode_state: modeState } : null;
|
|
28
|
+
}
|
|
29
|
+
const onExceed = budget.on_exceed ?? "checkpoint";
|
|
30
|
+
if (onExceed === "warn") {
|
|
31
|
+
console.warn(`coterie budget exceeded: spent ~$${spent.toFixed(4)} (max=$${maxUsd}); continuing`);
|
|
32
|
+
return changed ? { mode_state: modeState } : null;
|
|
33
|
+
}
|
|
34
|
+
modeState.budget_blocked = true;
|
|
35
|
+
if (onExceed === "halt") {
|
|
36
|
+
return { mode_state: modeState, status: "failed" };
|
|
37
|
+
}
|
|
38
|
+
return { mode_state: modeState, status: "awaiting_human" };
|
|
39
|
+
}
|
|
40
|
+
export function makeAgentRunner(opts) {
|
|
41
|
+
if ((opts.agent_id == null) === (opts.agent_id_fn == null)) {
|
|
42
|
+
throw new Error("exactly one of agent_id or agent_id_fn must be provided");
|
|
43
|
+
}
|
|
44
|
+
return async (state, config) => {
|
|
45
|
+
if (state.status === "failed" || state.status === "awaiting_human") {
|
|
46
|
+
return {};
|
|
47
|
+
}
|
|
48
|
+
const budgetUpdate = checkBudget(state);
|
|
49
|
+
if (budgetUpdate && budgetUpdate.status) {
|
|
50
|
+
return budgetUpdate;
|
|
51
|
+
}
|
|
52
|
+
const resolvedId = opts.agent_id ?? opts.agent_id_fn(state);
|
|
53
|
+
const agentCfg = state.config.agents.find((a) => a.id === resolvedId);
|
|
54
|
+
if (!agentCfg)
|
|
55
|
+
throw new Error(`agent id ${resolvedId} not in config.agents`);
|
|
56
|
+
const adapter = instantiate(agentCfg);
|
|
57
|
+
const prompt = opts.prompt_fn
|
|
58
|
+
? opts.prompt_fn(state)
|
|
59
|
+
: (state.plan ?? [state.task])[state.current_step_idx ?? 0] ?? state.task;
|
|
60
|
+
progress.start({ agent_id: resolvedId, role: opts.role });
|
|
61
|
+
let result;
|
|
62
|
+
try {
|
|
63
|
+
result = await opts.executor.execute(adapter, prompt, opts.workdir, {
|
|
64
|
+
timeoutMs: (agentCfg.timeout_s ?? 600) * 1000,
|
|
65
|
+
signal: config?.signal,
|
|
66
|
+
onStream: (text) => progress.step({ agent_id: resolvedId, role: opts.role, text }),
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
catch (e) {
|
|
70
|
+
// Cancellation propagates (the REPL handles it); any other execution error
|
|
71
|
+
// (missing binary, spawn failure, timeout kill) becomes a *recorded* failed
|
|
72
|
+
// run so the round degrades around it instead of crashing the whole turn.
|
|
73
|
+
if (e?.name === "AbortError" || config?.signal?.aborted)
|
|
74
|
+
throw e;
|
|
75
|
+
result = {
|
|
76
|
+
stdout: "",
|
|
77
|
+
stderr: e instanceof Error ? e.message : String(e),
|
|
78
|
+
exit_code: 1,
|
|
79
|
+
files_changed: [],
|
|
80
|
+
duration_s: 0,
|
|
81
|
+
cost_estimate_usd: null,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
const run = {
|
|
85
|
+
agent_id: resolvedId,
|
|
86
|
+
role: opts.role,
|
|
87
|
+
prompt,
|
|
88
|
+
stdout: result.stdout,
|
|
89
|
+
stderr: result.stderr,
|
|
90
|
+
exit_code: result.exit_code,
|
|
91
|
+
files_changed: result.files_changed,
|
|
92
|
+
duration_s: result.duration_s,
|
|
93
|
+
cost_estimate_usd: result.cost_estimate_usd,
|
|
94
|
+
};
|
|
95
|
+
progress.done({ run });
|
|
96
|
+
const update = {
|
|
97
|
+
runs: [run],
|
|
98
|
+
spend_usd: result.cost_estimate_usd ?? 0,
|
|
99
|
+
};
|
|
100
|
+
if (budgetUpdate && "mode_state" in budgetUpdate) {
|
|
101
|
+
update.mode_state = budgetUpdate.mode_state;
|
|
102
|
+
}
|
|
103
|
+
return update;
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=agentRunner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agentRunner.js","sourceRoot":"","sources":["../../src/nodes/agentRunner.ts"],"names":[],"mappings":"AAAA,uFAAuF;AAIvF,OAAO,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAGvD,SAAS,WAAW,CAAC,QAAa;IAChC,MAAM,IAAI,GAAG,gBAAgB,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IACxD,OAAO,IAAI,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC;AAED,gFAAgF;AAChF,8EAA8E;AAC9E,+EAA+E;AAC/E,gFAAgF;AAChF,yEAAyE;AACzE,gFAAgF;AAChF,SAAS,WAAW,CAAC,KAAmB;IACtC,MAAM,MAAM,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,IAAI,EAAE,CAAwB,CAAC;IACnE,MAAM,KAAK,GAAG,KAAK,CAAC,SAAS,IAAI,CAAC,CAAC;IACnC,MAAM,SAAS,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC;IAClD,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC;IAClC,IAAI,MAAM,KAAK,SAAS,IAAI,KAAK,IAAI,MAAM,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,CAAC;QACxE,OAAO,CAAC,IAAI,CAAC,mCAAmC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,cAAc,MAAM,GAAG,CAAC,CAAC;QACzF,SAAS,CAAC,aAAa,GAAG,IAAI,CAAC;QAC/B,OAAO,GAAG,IAAI,CAAC;IACjB,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,gBAAgB,CAAC;IACvC,IAAI,MAAM,KAAK,SAAS,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;QAC3C,OAAO,OAAO,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACpD,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,SAAS,IAAI,YAAY,CAAC;IAClD,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,oCAAoC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,MAAM,eAAe,CAAC,CAAC;QAClG,OAAO,OAAO,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACpD,CAAC;IACD,SAAS,CAAC,cAAc,GAAG,IAAI,CAAC;IAChC,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACxB,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;IACrD,CAAC;IACD,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;AAC7D,CAAC;AAWD,MAAM,UAAU,eAAe,CAAC,IAAqB;IACnD,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,EAAE,CAAC;QAC3D,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,KAAK,EAAE,KAAmB,EAAE,MAAiC,EAAE,EAAE;QACtE,IAAI,KAAK,CAAC,MAAM,KAAK,QAAQ,IAAI,KAAK,CAAC,MAAM,KAAK,gBAAgB,EAAE,CAAC;YACnE,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,MAAM,YAAY,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,EAAE,CAAC;YACxC,OAAO,YAAY,CAAC;QACtB,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,WAAY,CAAC,KAAK,CAAC,CAAC;QAC7D,MAAM,QAAQ,GAAI,KAAK,CAAC,MAAM,CAAC,MAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,UAAU,CAAC,CAAC;QACjF,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,YAAY,UAAU,uBAAuB,CAAC,CAAC;QAC9E,MAAM,OAAO,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;QAEtC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS;YAC3B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;YACvB,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC;QAE5E,QAAQ,CAAC,KAAK,CAAC,EAAE,QAAQ,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1D,IAAI,MAAM,CAAC;QACX,IAAI,CAAC;YACH,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,EAAE;gBAClE,SAAS,EAAE,CAAC,QAAQ,CAAC,SAAS,IAAI,GAAG,CAAC,GAAG,IAAI;gBAC7C,MAAM,EAAE,MAAM,EAAE,MAAM;gBACtB,QAAQ,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;aACnF,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,CAAM,EAAE,CAAC;YAChB,2EAA2E;YAC3E,4EAA4E;YAC5E,0EAA0E;YAC1E,IAAI,CAAC,EAAE,IAAI,KAAK,YAAY,IAAI,MAAM,EAAE,MAAM,EAAE,OAAO;gBAAE,MAAM,CAAC,CAAC;YACjE,MAAM,GAAG;gBACP,MAAM,EAAE,EAAE;gBACV,MAAM,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;gBAClD,SAAS,EAAE,CAAC;gBACZ,aAAa,EAAE,EAAE;gBACjB,UAAU,EAAE,CAAC;gBACb,iBAAiB,EAAE,IAAI;aACxB,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG;YACV,QAAQ,EAAE,UAAU;YACpB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,aAAa,EAAE,MAAM,CAAC,aAAa;YACnC,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;SAC5C,CAAC;QACF,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;QACvB,MAAM,MAAM,GAAQ;YAClB,IAAI,EAAE,CAAC,GAAG,CAAC;YACX,SAAS,EAAE,MAAM,CAAC,iBAAiB,IAAI,CAAC;SACzC,CAAC;QACF,IAAI,YAAY,IAAI,YAAY,IAAI,YAAY,EAAE,CAAC;YACjD,MAAM,CAAC,UAAU,GAAG,YAAY,CAAC,UAAU,CAAC;QAC9C,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { LLMClient } from "../core/llm/base.js";
|
|
2
|
+
import type { CoterieState } from "../core/state.js";
|
|
3
|
+
export declare const AUDITOR_PROMPT_TEMPLATE = "You are an adversarial code auditor. The implementer below produced this work for the subtask:\n\n# Subtask\n{subtask}\n\n# Implementer's output\n{implementer_output}\n\nFind every plausible defect: bugs, edge cases missed, perf problems, security issues,\nmissed requirements, unclear code, undocumented invariants. Be aggressive but precise.\n\nReturn ONLY a JSON array. No prose, no markdown. Each finding:\n{\n \"category\": \"bug|perf|security|clarity|missed-requirement|edge-case\",\n \"severity\": \"low|medium|high|critical\",\n \"description\": \"<one sentence>\",\n \"line_ranges\": [\"path:start-end\", ...]\n}\n\nIf you find no defects, return [].\n";
|
|
4
|
+
export declare function parseFindings(stdout: string): any[];
|
|
5
|
+
/** Whether the auditor output actually yielded a JSON array of findings — as
|
|
6
|
+
* opposed to prose, a crash, or malformed JSON. Lets the judge distinguish a
|
|
7
|
+
* genuine "no defects" ([]) from "couldn't read the auditor", which must not be
|
|
8
|
+
* silently treated as a clean bill of health. */
|
|
9
|
+
export declare function findingsAreParseable(stdout: string): boolean;
|
|
10
|
+
export declare function makeAdversarialJudgeNode(llm: LLMClient | null): (state: CoterieState) => Promise<{
|
|
11
|
+
mode_state: {
|
|
12
|
+
[x: string]: any;
|
|
13
|
+
};
|
|
14
|
+
judge_history: {
|
|
15
|
+
step: number;
|
|
16
|
+
winner: string;
|
|
17
|
+
reason: string;
|
|
18
|
+
scores: {
|
|
19
|
+
sustained_count: number;
|
|
20
|
+
};
|
|
21
|
+
}[];
|
|
22
|
+
status: "executing" | "done";
|
|
23
|
+
}>;
|
|
24
|
+
export declare function adversarialImplementerPrompt(state: CoterieState): string;
|
|
25
|
+
export declare function adversarialAuditorPrompt(state: CoterieState): string;
|
|
26
|
+
export declare function makeRecordImplementerOutputNode(): (state: CoterieState) => Promise<{
|
|
27
|
+
mode_state: {
|
|
28
|
+
[x: string]: any;
|
|
29
|
+
};
|
|
30
|
+
status: "auditing";
|
|
31
|
+
}>;
|
|
32
|
+
export declare function makeRecordAuditorFindingsNode(): (state: CoterieState) => Promise<{
|
|
33
|
+
mode_state: {
|
|
34
|
+
[x: string]: any;
|
|
35
|
+
};
|
|
36
|
+
status: "judging";
|
|
37
|
+
}>;
|
|
38
|
+
//# sourceMappingURL=auditor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../../src/nodes/auditor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAErD,eAAO,MAAM,uBAAuB,8pBAoBnC,CAAC;AAoBF,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG,EAAE,CAGnD;AAED;;;kDAGkD;AAClD,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAE5D;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,SAAS,GAAG,IAAI,IAC9C,OAAO,YAAY;;;;;;;;;;;;;GAyFlC;AAED,wBAAgB,4BAA4B,CAAC,KAAK,EAAE,YAAY,GAAG,MAAM,CAOxE;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,YAAY,GAAG,MAAM,CAUpE;AAED,wBAAgB,+BAA+B,KAC/B,OAAO,YAAY;;;;;GAOlC;AAED,wBAAgB,6BAA6B,KAC7B,OAAO,YAAY;;;;;GAalC"}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { parseJsonLoose } from "../core/json.js";
|
|
2
|
+
export const AUDITOR_PROMPT_TEMPLATE = `You are an adversarial code auditor. The implementer below produced this work for the subtask:
|
|
3
|
+
|
|
4
|
+
# Subtask
|
|
5
|
+
{subtask}
|
|
6
|
+
|
|
7
|
+
# Implementer's output
|
|
8
|
+
{implementer_output}
|
|
9
|
+
|
|
10
|
+
Find every plausible defect: bugs, edge cases missed, perf problems, security issues,
|
|
11
|
+
missed requirements, unclear code, undocumented invariants. Be aggressive but precise.
|
|
12
|
+
|
|
13
|
+
Return ONLY a JSON array. No prose, no markdown. Each finding:
|
|
14
|
+
{
|
|
15
|
+
"category": "bug|perf|security|clarity|missed-requirement|edge-case",
|
|
16
|
+
"severity": "low|medium|high|critical",
|
|
17
|
+
"description": "<one sentence>",
|
|
18
|
+
"line_ranges": ["path:start-end", ...]
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
If you find no defects, return [].
|
|
22
|
+
`;
|
|
23
|
+
const JUDGE_SYSTEM = `You are an impartial judge in an adversarial code review.
|
|
24
|
+
You see an implementation and an auditor's findings. Decide which findings are
|
|
25
|
+
sustained (real defects the implementer should fix) versus rejected (nitpicks,
|
|
26
|
+
false positives, or out of scope).
|
|
27
|
+
|
|
28
|
+
Sustain a finding when: (1) it identifies a real defect, (2) its severity meets or
|
|
29
|
+
exceeds the configured sustain_threshold, AND (3) it is in scope for the subtask.
|
|
30
|
+
|
|
31
|
+
Return strict JSON only — no prose, no markdown:
|
|
32
|
+
{
|
|
33
|
+
"sustained": [<finding_indices>],
|
|
34
|
+
"rejected": [<finding_indices>],
|
|
35
|
+
"verdict": "accept" | "revise",
|
|
36
|
+
"reason": "<2-3 sentences>"
|
|
37
|
+
}`;
|
|
38
|
+
const SEVERITY_RANK = { low: 1, medium: 2, high: 3, critical: 4 };
|
|
39
|
+
export function parseFindings(stdout) {
|
|
40
|
+
const data = parseJsonLoose(stdout);
|
|
41
|
+
return Array.isArray(data) ? data : [];
|
|
42
|
+
}
|
|
43
|
+
/** Whether the auditor output actually yielded a JSON array of findings — as
|
|
44
|
+
* opposed to prose, a crash, or malformed JSON. Lets the judge distinguish a
|
|
45
|
+
* genuine "no defects" ([]) from "couldn't read the auditor", which must not be
|
|
46
|
+
* silently treated as a clean bill of health. */
|
|
47
|
+
export function findingsAreParseable(stdout) {
|
|
48
|
+
return Array.isArray(parseJsonLoose(stdout));
|
|
49
|
+
}
|
|
50
|
+
export function makeAdversarialJudgeNode(llm) {
|
|
51
|
+
return async (state) => {
|
|
52
|
+
const cfg = state.config;
|
|
53
|
+
const adv = cfg.adversarial;
|
|
54
|
+
const judgeCfg = adv.judge ?? {};
|
|
55
|
+
const threshold = judgeCfg.sustain_threshold ?? "medium";
|
|
56
|
+
const minRank = SEVERITY_RANK[threshold] ?? 2;
|
|
57
|
+
const maxRounds = adv.max_rounds ?? 2;
|
|
58
|
+
const modeState = { ...(state.mode_state ?? {}) };
|
|
59
|
+
const findings = (modeState.auditor_findings ?? []);
|
|
60
|
+
const implOutput = (modeState.implementer_output ?? "");
|
|
61
|
+
const roundIdx = (modeState.round_idx ?? 0);
|
|
62
|
+
const eligibleIndices = findings
|
|
63
|
+
.map((f, i) => [i, f])
|
|
64
|
+
.filter(([, f]) => (SEVERITY_RANK[f.severity ?? "low"] ?? 1) >= minRank)
|
|
65
|
+
.map(([i]) => i);
|
|
66
|
+
if (eligibleIndices.length === 0) {
|
|
67
|
+
modeState.sustained_findings = [];
|
|
68
|
+
modeState.verdict = "accept";
|
|
69
|
+
// If the auditor's output couldn't be parsed (crash / prose / bad JSON),
|
|
70
|
+
// "no eligible findings" doesn't mean "clean" — say so rather than implying
|
|
71
|
+
// a pass. (We still terminate; the failure is surfaced in the trace.)
|
|
72
|
+
const unparsed = modeState.auditor_unparsed === true;
|
|
73
|
+
return {
|
|
74
|
+
mode_state: modeState,
|
|
75
|
+
judge_history: [{
|
|
76
|
+
step: state.current_step_idx ?? 0,
|
|
77
|
+
winner: "implementer",
|
|
78
|
+
reason: unparsed
|
|
79
|
+
? "auditor produced no parseable findings — accepting, but the audit may be incomplete"
|
|
80
|
+
: "no findings met severity threshold",
|
|
81
|
+
scores: { sustained_count: 0 },
|
|
82
|
+
}],
|
|
83
|
+
status: "done",
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
let sustained;
|
|
87
|
+
let verdict;
|
|
88
|
+
let reason;
|
|
89
|
+
if (!llm) {
|
|
90
|
+
sustained = eligibleIndices.map((i) => findings[i]);
|
|
91
|
+
verdict = "revise";
|
|
92
|
+
reason = "no judge LLM configured; conservatively sustaining all eligible findings";
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
const findingsBlock = eligibleIndices.map((i) => `[${i}] severity=${findings[i].severity} category=${findings[i].category}: ${findings[i].description}`).join("\n");
|
|
96
|
+
const prompt = `Subtask: ${state.task}\n\n` +
|
|
97
|
+
`Implementation output:\n${implOutput.slice(0, 2000)}\n\n` +
|
|
98
|
+
`Eligible findings (severity >= ${threshold}):\n${findingsBlock}\n\n` +
|
|
99
|
+
`Round: ${roundIdx + 1} of ${maxRounds}`;
|
|
100
|
+
const raw = await llm.chat(JUDGE_SYSTEM, [{ role: "user", content: prompt }]);
|
|
101
|
+
const decision = parseJsonLoose(raw);
|
|
102
|
+
if (decision && typeof decision === "object") {
|
|
103
|
+
sustained = (decision.sustained ?? []).map((i) => findings[i]).filter(Boolean);
|
|
104
|
+
verdict = decision.verdict ?? "revise";
|
|
105
|
+
reason = decision.reason ?? "";
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
sustained = eligibleIndices.map((i) => findings[i]);
|
|
109
|
+
verdict = "revise";
|
|
110
|
+
reason = `judge LLM output unparseable: ${raw.slice(0, 120)}`;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
modeState.sustained_findings = sustained;
|
|
114
|
+
modeState.verdict = verdict;
|
|
115
|
+
modeState.round_idx = roundIdx + 1;
|
|
116
|
+
const outOfRounds = modeState.round_idx >= maxRounds;
|
|
117
|
+
const finished = verdict === "accept" || outOfRounds || sustained.length === 0;
|
|
118
|
+
// Surface the honest outcome: hitting the round cap with sustained defects
|
|
119
|
+
// still outstanding is "done with unresolved issues", not a clean accept.
|
|
120
|
+
const unresolved = finished && verdict !== "accept" && sustained.length > 0;
|
|
121
|
+
modeState.unresolved_findings = unresolved ? sustained.length : 0;
|
|
122
|
+
return {
|
|
123
|
+
mode_state: modeState,
|
|
124
|
+
judge_history: [{
|
|
125
|
+
step: state.current_step_idx ?? 0,
|
|
126
|
+
winner: verdict === "accept" ? "implementer" : "auditor",
|
|
127
|
+
reason: unresolved ? `${reason} (round cap reached with ${sustained.length} unresolved)` : reason,
|
|
128
|
+
scores: { sustained_count: sustained.length },
|
|
129
|
+
}],
|
|
130
|
+
status: finished ? "done" : "executing",
|
|
131
|
+
};
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
export function adversarialImplementerPrompt(state) {
|
|
135
|
+
const ms = state.mode_state ?? {};
|
|
136
|
+
const round = ms.round_idx ?? 0;
|
|
137
|
+
if (round === 0)
|
|
138
|
+
return state.task;
|
|
139
|
+
const sustained = (ms.sustained_findings ?? []);
|
|
140
|
+
const critiques = sustained.map((f) => `- [${f.severity}] ${f.description}`).join("\n");
|
|
141
|
+
return `${state.task}\n\nPrevious round had sustained critiques. Please address each:\n${critiques}`;
|
|
142
|
+
}
|
|
143
|
+
export function adversarialAuditorPrompt(state) {
|
|
144
|
+
const ms = state.mode_state ?? {};
|
|
145
|
+
const implOutput = (ms.implementer_output ?? "");
|
|
146
|
+
// Single-pass substitution so a user task containing the literal
|
|
147
|
+
// "{implementer_output}" can't collide with the second placeholder.
|
|
148
|
+
const subs = {
|
|
149
|
+
"{subtask}": state.task,
|
|
150
|
+
"{implementer_output}": implOutput.slice(0, 4000),
|
|
151
|
+
};
|
|
152
|
+
return AUDITOR_PROMPT_TEMPLATE.replace(/\{subtask\}|\{implementer_output\}/g, (m) => subs[m] ?? m);
|
|
153
|
+
}
|
|
154
|
+
export function makeRecordImplementerOutputNode() {
|
|
155
|
+
return async (state) => {
|
|
156
|
+
const runs = state.runs ?? [];
|
|
157
|
+
const lastImpl = [...runs].reverse().find((r) => r.role === "implementer");
|
|
158
|
+
const modeState = { ...(state.mode_state ?? {}) };
|
|
159
|
+
if (lastImpl)
|
|
160
|
+
modeState.implementer_output = lastImpl.stdout;
|
|
161
|
+
return { mode_state: modeState, status: "auditing" };
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
export function makeRecordAuditorFindingsNode() {
|
|
165
|
+
return async (state) => {
|
|
166
|
+
const runs = state.runs ?? [];
|
|
167
|
+
const lastAuditor = [...runs].reverse().find((r) => r.role === "auditor");
|
|
168
|
+
const modeState = { ...(state.mode_state ?? {}) };
|
|
169
|
+
if (lastAuditor) {
|
|
170
|
+
modeState.auditor_findings = parseFindings(lastAuditor.stdout);
|
|
171
|
+
// Distinguish a real "[]" (clean) from a crash / prose / bad JSON, so the
|
|
172
|
+
// judge doesn't read a broken audit as a pass.
|
|
173
|
+
modeState.auditor_unparsed =
|
|
174
|
+
lastAuditor.exit_code !== 0 || !findingsAreParseable(lastAuditor.stdout);
|
|
175
|
+
}
|
|
176
|
+
return { mode_state: modeState, status: "judging" };
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
//# sourceMappingURL=auditor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"auditor.js","sourceRoot":"","sources":["../../src/nodes/auditor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAIjD,MAAM,CAAC,MAAM,uBAAuB,GAAG;;;;;;;;;;;;;;;;;;;;CAoBtC,CAAC;AAEF,MAAM,YAAY,GAAG;;;;;;;;;;;;;;EAcnB,CAAC;AAEH,MAAM,aAAa,GAA2B,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;AAE1F,MAAM,UAAU,aAAa,CAAC,MAAc;IAC1C,MAAM,IAAI,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;IACpC,OAAO,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;AACzC,CAAC;AAED;;;kDAGkD;AAClD,MAAM,UAAU,oBAAoB,CAAC,MAAc;IACjD,OAAO,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC;AAC/C,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,GAAqB;IAC5D,OAAO,KAAK,EAAE,KAAmB,EAAE,EAAE;QACnC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;QACzB,MAAM,GAAG,GAAG,GAAG,CAAC,WAAW,CAAC;QAC5B,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,MAAM,SAAS,GAAG,QAAQ,CAAC,iBAAiB,IAAI,QAAQ,CAAC;QACzD,MAAM,OAAO,GAAG,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC9C,MAAM,SAAS,GAAG,GAAG,CAAC,UAAU,IAAI,CAAC,CAAC;QAEtC,MAAM,SAAS,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC;QAClD,MAAM,QAAQ,GAAG,CAAC,SAAS,CAAC,gBAAgB,IAAI,EAAE,CAAU,CAAC;QAC7D,MAAM,UAAU,GAAG,CAAC,SAAS,CAAC,kBAAkB,IAAI,EAAE,CAAW,CAAC;QAClE,MAAM,QAAQ,GAAG,CAAC,SAAS,CAAC,SAAS,IAAI,CAAC,CAAW,CAAC;QAEtD,MAAM,eAAe,GAAG,QAAQ;aAC7B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAU,CAAC;aAC9B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC;aACvE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;QAEnB,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,SAAS,CAAC,kBAAkB,GAAG,EAAE,CAAC;YAClC,SAAS,CAAC,OAAO,GAAG,QAAQ,CAAC;YAC7B,yEAAyE;YACzE,4EAA4E;YAC5E,sEAAsE;YACtE,MAAM,QAAQ,GAAG,SAAS,CAAC,gBAAgB,KAAK,IAAI,CAAC;YACrD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,aAAa,EAAE,CAAC;wBACd,IAAI,EAAE,KAAK,CAAC,gBAAgB,IAAI,CAAC;wBACjC,MAAM,EAAE,aAAa;wBACrB,MAAM,EAAE,QAAQ;4BACd,CAAC,CAAC,qFAAqF;4BACvF,CAAC,CAAC,oCAAoC;wBACxC,MAAM,EAAE,EAAE,eAAe,EAAE,CAAC,EAAE;qBAC/B,CAAC;gBACF,MAAM,EAAE,MAAe;aACxB,CAAC;QACJ,CAAC;QAED,IAAI,SAAgB,CAAC;QACrB,IAAI,OAAe,CAAC;QACpB,IAAI,MAAc,CAAC;QACnB,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,SAAS,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;YACpD,OAAO,GAAG,QAAQ,CAAC;YACnB,MAAM,GAAG,0EAA0E,CAAC;QACtF,CAAC;aAAM,CAAC;YACN,MAAM,aAAa,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAC9C,IAAI,CAAC,cAAc,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,aAAa,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CACvG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACb,MAAM,MAAM,GACV,YAAY,KAAK,CAAC,IAAI,MAAM;gBAC5B,2BAA2B,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM;gBAC1D,kCAAkC,SAAS,OAAO,aAAa,MAAM;gBACrE,UAAU,QAAQ,GAAG,CAAC,OAAO,SAAS,EAAE,CAAC;YAC3C,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;YAC9E,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;YACrC,IAAI,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBAC7C,SAAS,GAAG,CAAC,QAAQ,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBACvF,OAAO,GAAG,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC;gBACvC,MAAM,GAAG,QAAQ,CAAC,MAAM,IAAI,EAAE,CAAC;YACjC,CAAC;iBAAM,CAAC;gBACN,SAAS,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;gBACpD,OAAO,GAAG,QAAQ,CAAC;gBACnB,MAAM,GAAG,iCAAiC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;YAChE,CAAC;QACH,CAAC;QAED,SAAS,CAAC,kBAAkB,GAAG,SAAS,CAAC;QACzC,SAAS,CAAC,OAAO,GAAG,OAAO,CAAC;QAC5B,SAAS,CAAC,SAAS,GAAG,QAAQ,GAAG,CAAC,CAAC;QACnC,MAAM,WAAW,GAAG,SAAS,CAAC,SAAS,IAAI,SAAS,CAAC;QACrD,MAAM,QAAQ,GAAG,OAAO,KAAK,QAAQ,IAAI,WAAW,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,CAAC;QAC/E,2EAA2E;QAC3E,0EAA0E;QAC1E,MAAM,UAAU,GAAG,QAAQ,IAAI,OAAO,KAAK,QAAQ,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC;QAC5E,SAAS,CAAC,mBAAmB,GAAG,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAElE,OAAO;YACL,UAAU,EAAE,SAAS;YACrB,aAAa,EAAE,CAAC;oBACd,IAAI,EAAE,KAAK,CAAC,gBAAgB,IAAI,CAAC;oBACjC,MAAM,EAAE,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS;oBACxD,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,MAAM,4BAA4B,SAAS,CAAC,MAAM,cAAc,CAAC,CAAC,CAAC,MAAM;oBACjG,MAAM,EAAE,EAAE,eAAe,EAAE,SAAS,CAAC,MAAM,EAAE;iBAC9C,CAAC;YACF,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAE,MAAgB,CAAC,CAAC,CAAE,WAAqB;SAC9D,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,4BAA4B,CAAC,KAAmB;IAC9D,MAAM,EAAE,GAAG,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC;IAClC,MAAM,KAAK,GAAG,EAAE,CAAC,SAAS,IAAI,CAAC,CAAC;IAChC,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,IAAI,CAAC;IACnC,MAAM,SAAS,GAAG,CAAC,EAAE,CAAC,kBAAkB,IAAI,EAAE,CAAU,CAAC;IACzD,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxF,OAAO,GAAG,KAAK,CAAC,IAAI,qEAAqE,SAAS,EAAE,CAAC;AACvG,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,KAAmB;IAC1D,MAAM,EAAE,GAAG,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC;IAClC,MAAM,UAAU,GAAG,CAAC,EAAE,CAAC,kBAAkB,IAAI,EAAE,CAAW,CAAC;IAC3D,iEAAiE;IACjE,oEAAoE;IACpE,MAAM,IAAI,GAA2B;QACnC,WAAW,EAAE,KAAK,CAAC,IAAI;QACvB,sBAAsB,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;KAClD,CAAC;IACF,OAAO,uBAAuB,CAAC,OAAO,CAAC,qCAAqC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;AACrG,CAAC;AAED,MAAM,UAAU,+BAA+B;IAC7C,OAAO,KAAK,EAAE,KAAmB,EAAE,EAAE;QACnC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,aAAa,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC;QAClD,IAAI,QAAQ;YAAE,SAAS,CAAC,kBAAkB,GAAG,QAAQ,CAAC,MAAM,CAAC;QAC7D,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,EAAE,UAAmB,EAAE,CAAC;IAChE,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,6BAA6B;IAC3C,OAAO,KAAK,EAAE,KAAmB,EAAE,EAAE;QACnC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC;QAC9B,MAAM,WAAW,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;QAC1E,MAAM,SAAS,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC;QAClD,IAAI,WAAW,EAAE,CAAC;YAChB,SAAS,CAAC,gBAAgB,GAAG,aAAa,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;YAC/D,0EAA0E;YAC1E,+CAA+C;YAC/C,SAAS,CAAC,gBAAgB;gBACxB,WAAW,CAAC,SAAS,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC7E,CAAC;QACD,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,EAAE,SAAkB,EAAE,CAAC;IAC/D,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { LLMClient } from "../core/llm/base.js";
|
|
2
|
+
import type { CoterieState } from "../core/state.js";
|
|
3
|
+
export declare function makeBracketJudgeNode(llm: LLMClient | null): (state: CoterieState) => Promise<{
|
|
4
|
+
status: "failed";
|
|
5
|
+
mode_state?: undefined;
|
|
6
|
+
judge_history?: undefined;
|
|
7
|
+
} | {
|
|
8
|
+
mode_state: {
|
|
9
|
+
[x: string]: any;
|
|
10
|
+
};
|
|
11
|
+
judge_history: {
|
|
12
|
+
step: number;
|
|
13
|
+
winner: string;
|
|
14
|
+
reason: string;
|
|
15
|
+
scores: any;
|
|
16
|
+
}[];
|
|
17
|
+
status: "tournament_round" | "done";
|
|
18
|
+
}>;
|
|
19
|
+
//# sourceMappingURL=bracket.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bracket.d.ts","sourceRoot":"","sources":["../../src/nodes/bracket.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAiDrD,wBAAgB,oBAAoB,CAAC,GAAG,EAAE,SAAS,GAAG,IAAI,IAC1C,OAAO,YAAY;;;;;;;;;;;;;;;GAyDlC"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
const BRACKET_JUDGE_SYSTEM = `You are an impartial bracket judge for an N-way coding tournament.
|
|
2
|
+
You see N attempts at the same task, one per agent. Score each on the configured criteria,
|
|
3
|
+
then rank from best to worst.
|
|
4
|
+
|
|
5
|
+
Return strict JSON only — no prose, no markdown:
|
|
6
|
+
{
|
|
7
|
+
"ranking": [{"agent_id": "...", "score": <int 1-100>, "reason": "<one sentence>"}, ...],
|
|
8
|
+
"winner": "<agent_id>",
|
|
9
|
+
"summary": "<2-3 sentences>"
|
|
10
|
+
}`;
|
|
11
|
+
async function rankCurrentRound(llm, task, attempts, criteria) {
|
|
12
|
+
if (!llm) {
|
|
13
|
+
const successful = attempts.filter((a) => a.exit_code === 0);
|
|
14
|
+
const pool = successful.length ? successful : attempts;
|
|
15
|
+
const chosen = pool.reduce((a, b) => (a.cost_estimate_usd ?? Infinity) <= (b.cost_estimate_usd ?? Infinity) ? a : b);
|
|
16
|
+
return { ranking: [], winner: chosen.agent_id, summary: "no judge LLM; chose cheapest" };
|
|
17
|
+
}
|
|
18
|
+
const block = attempts
|
|
19
|
+
.map((a) => `--- ${a.agent_id} ---\nexit_code: ${a.exit_code}, duration_s: ${a.duration_s.toFixed(2)}, cost: $${a.cost_estimate_usd}\n` +
|
|
20
|
+
`stdout (first 1500 chars):\n${a.stdout.slice(0, 1500)}`)
|
|
21
|
+
.join("\n\n");
|
|
22
|
+
const prompt = `Task: ${task}\n\nCriteria (priority order): ${JSON.stringify(criteria)}\n\nAttempts:\n${block}`;
|
|
23
|
+
const raw = await llm.chat(BRACKET_JUDGE_SYSTEM, [{ role: "user", content: prompt }]);
|
|
24
|
+
try {
|
|
25
|
+
const d = JSON.parse(raw);
|
|
26
|
+
return { ranking: d.ranking ?? [], winner: d.winner, summary: d.summary ?? "" };
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
const successful = attempts.filter((a) => a.exit_code === 0);
|
|
30
|
+
const pool = successful.length ? successful : attempts;
|
|
31
|
+
const chosen = pool.reduce((a, b) => (a.cost_estimate_usd ?? Infinity) <= (b.cost_estimate_usd ?? Infinity) ? a : b);
|
|
32
|
+
return { ranking: [], winner: chosen.agent_id, summary: `bracket judge unparseable: ${raw.slice(0, 120)}` };
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
export function makeBracketJudgeNode(llm) {
|
|
36
|
+
return async (state) => {
|
|
37
|
+
const cfg = state.config;
|
|
38
|
+
const tournament = cfg.tournament ?? {};
|
|
39
|
+
const judgeCfg = tournament.judge ?? {};
|
|
40
|
+
const criteria = judgeCfg.criteria ?? ["correctness", "minimal-diff", "tests-pass", "clarity"];
|
|
41
|
+
const participantsAll = (tournament.participants ?? []);
|
|
42
|
+
const totalRounds = tournament.rounds ?? 1;
|
|
43
|
+
const modeState = { ...(state.mode_state ?? {}) };
|
|
44
|
+
let roundIdx = (modeState.tournament_round_idx ?? 0);
|
|
45
|
+
const eliminated = [...(modeState.eliminated_participants ?? [])];
|
|
46
|
+
const lastCount = (modeState.last_judged_run_count ?? 0);
|
|
47
|
+
const allRuns = state.runs ?? [];
|
|
48
|
+
const thisRound = allRuns
|
|
49
|
+
.slice(lastCount)
|
|
50
|
+
.filter((r) => r.role === "tournament-participant" && participantsAll.includes(r.agent_id) && !eliminated.includes(r.agent_id));
|
|
51
|
+
if (thisRound.length === 0)
|
|
52
|
+
return { status: "failed" };
|
|
53
|
+
const { ranking, winner, summary } = await rankCurrentRound(llm, state.task, thisRound, criteria);
|
|
54
|
+
roundIdx += 1;
|
|
55
|
+
let survivors;
|
|
56
|
+
let newEliminated;
|
|
57
|
+
if (totalRounds === 1 || thisRound.length <= 2) {
|
|
58
|
+
survivors = [winner];
|
|
59
|
+
newEliminated = [...eliminated, ...thisRound.filter((a) => a.agent_id !== winner).map((a) => a.agent_id)];
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
const survivorsN = Math.max(1, Math.floor(thisRound.length / 2));
|
|
63
|
+
const ordered = ranking.length
|
|
64
|
+
? ranking.map((r) => r.agent_id).filter(Boolean)
|
|
65
|
+
: [winner, ...thisRound.filter((a) => a.agent_id !== winner).map((a) => a.agent_id)];
|
|
66
|
+
survivors = ordered.slice(0, survivorsN);
|
|
67
|
+
newEliminated = [...eliminated, ...ordered.filter((a) => !survivors.includes(a))];
|
|
68
|
+
}
|
|
69
|
+
modeState.tournament_round_idx = roundIdx;
|
|
70
|
+
modeState.eliminated_participants = newEliminated;
|
|
71
|
+
modeState.survivors = survivors;
|
|
72
|
+
modeState.last_judged_run_count = allRuns.length;
|
|
73
|
+
modeState.bracket_ranking = ranking;
|
|
74
|
+
modeState.bracket_summary = summary;
|
|
75
|
+
const done = survivors.length <= 1 || roundIdx >= totalRounds;
|
|
76
|
+
if (done)
|
|
77
|
+
modeState.winner = survivors[0] ?? winner;
|
|
78
|
+
return {
|
|
79
|
+
mode_state: modeState,
|
|
80
|
+
judge_history: [{
|
|
81
|
+
step: state.current_step_idx ?? 0,
|
|
82
|
+
winner,
|
|
83
|
+
reason: summary,
|
|
84
|
+
scores: Object.fromEntries(ranking.map((r) => [r.agent_id, r.score ?? 0])),
|
|
85
|
+
}],
|
|
86
|
+
status: done ? "done" : "tournament_round",
|
|
87
|
+
};
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=bracket.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bracket.js","sourceRoot":"","sources":["../../src/nodes/bracket.ts"],"names":[],"mappings":"AAGA,MAAM,oBAAoB,GAAG;;;;;;;;;EAS3B,CAAC;AAEH,KAAK,UAAU,gBAAgB,CAC7B,GAAqB,EACrB,IAAY,EACZ,QAAe,EACf,QAAkB;IAElB,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,CAAC;QAC7D,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC;QACvD,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClC,CAAC,CAAC,CAAC,iBAAiB,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,iBAAiB,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAC/E,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,8BAA8B,EAAE,CAAC;IAC3F,CAAC;IAED,MAAM,KAAK,GAAG,QAAQ;SACnB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACT,OAAO,CAAC,CAAC,QAAQ,oBAAoB,CAAC,CAAC,SAAS,iBAAiB,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,iBAAiB,IAAI;QAC3H,+BAA+B,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CACzD;SACA,IAAI,CAAC,MAAM,CAAC,CAAC;IAChB,MAAM,MAAM,GAAG,SAAS,IAAI,kCAAkC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,kBAAkB,KAAK,EAAE,CAAC;IAChH,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;IACtF,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC1B,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC;IAClF,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,CAAC;QAC7D,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC;QACvD,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClC,CAAC,CAAC,CAAC,iBAAiB,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,iBAAiB,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAC/E,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,8BAA8B,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC;IAC9G,CAAC;AACH,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,GAAqB;IACxD,OAAO,KAAK,EAAE,KAAmB,EAAE,EAAE;QACnC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;QACzB,MAAM,UAAU,GAAG,GAAG,CAAC,UAAU,IAAI,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,IAAI,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,IAAI,CAAC,aAAa,EAAE,cAAc,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC;QAC/F,MAAM,eAAe,GAAG,CAAC,UAAU,CAAC,YAAY,IAAI,EAAE,CAAa,CAAC;QACpE,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;QAE3C,MAAM,SAAS,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC;QAClD,IAAI,QAAQ,GAAG,CAAC,SAAS,CAAC,oBAAoB,IAAI,CAAC,CAAW,CAAC;QAC/D,MAAM,UAAU,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,uBAAuB,IAAI,EAAE,CAAC,CAAa,CAAC;QAC9E,MAAM,SAAS,GAAG,CAAC,SAAS,CAAC,qBAAqB,IAAI,CAAC,CAAW,CAAC;QAEnE,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC;QACjC,MAAM,SAAS,GAAG,OAAO;aACtB,KAAK,CAAC,SAAS,CAAC;aAChB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,wBAAwB,IAAI,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;QAElI,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,MAAM,EAAE,QAAiB,EAAE,CAAC;QAEjE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QAClG,QAAQ,IAAI,CAAC,CAAC;QAEd,IAAI,SAAmB,CAAC;QACxB,IAAI,aAAuB,CAAC;QAC5B,IAAI,WAAW,KAAK,CAAC,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC/C,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;YACrB,aAAa,GAAG,CAAC,GAAG,UAAU,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;QAC5G,CAAC;aAAM,CAAC;YACN,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;YACjE,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM;gBAC5B,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC;gBAChD,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;YACvF,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACzC,aAAa,GAAG,CAAC,GAAG,UAAU,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpF,CAAC;QAED,SAAS,CAAC,oBAAoB,GAAG,QAAQ,CAAC;QAC1C,SAAS,CAAC,uBAAuB,GAAG,aAAa,CAAC;QAClD,SAAS,CAAC,SAAS,GAAG,SAAS,CAAC;QAChC,SAAS,CAAC,qBAAqB,GAAG,OAAO,CAAC,MAAM,CAAC;QACjD,SAAS,CAAC,eAAe,GAAG,OAAO,CAAC;QACpC,SAAS,CAAC,eAAe,GAAG,OAAO,CAAC;QACpC,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,IAAI,CAAC,IAAI,QAAQ,IAAI,WAAW,CAAC;QAC9D,IAAI,IAAI;YAAE,SAAS,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC;QAEpD,OAAO;YACL,UAAU,EAAE,SAAS;YACrB,aAAa,EAAE,CAAC;oBACd,IAAI,EAAE,KAAK,CAAC,gBAAgB,IAAI,CAAC;oBACjC,MAAM;oBACN,MAAM,EAAE,OAAO;oBACf,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC;iBAC3E,CAAC;YACF,MAAM,EAAE,IAAI,CAAC,CAAC,CAAE,MAAgB,CAAC,CAAC,CAAE,kBAA4B;SACjE,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { LLMClient } from "../core/llm/base.js";
|
|
2
|
+
import type { CoterieState } from "../core/state.js";
|
|
3
|
+
export declare function makeConsensusEngineNode(llm: LLMClient | null): (state: CoterieState) => Promise<{
|
|
4
|
+
mode_state: {
|
|
5
|
+
consensus_findings: {
|
|
6
|
+
description: any;
|
|
7
|
+
category: any;
|
|
8
|
+
severity: any;
|
|
9
|
+
agreement_count: number;
|
|
10
|
+
agreement_ratio: number;
|
|
11
|
+
participant_count: number;
|
|
12
|
+
label: string;
|
|
13
|
+
supporting_agents: string[];
|
|
14
|
+
}[];
|
|
15
|
+
};
|
|
16
|
+
status: "done";
|
|
17
|
+
}>;
|
|
18
|
+
//# sourceMappingURL=consensusEngine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"consensusEngine.d.ts","sourceRoot":"","sources":["../../src/nodes/consensusEngine.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAkBrD,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,SAAS,GAAG,IAAI,IAC7C,OAAO,YAAY;;;;;;;;;;;;;;GAgFlC"}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { parseJsonLoose } from "../core/json.js";
|
|
2
|
+
import { parseFindings } from "./auditor.js";
|
|
3
|
+
const ENGINE_SYSTEM = `You cluster code-review findings into groups of semantically equivalent items.
|
|
4
|
+
Input: a JSON array of findings, each with an \`agent_id\` field identifying which agent produced it.
|
|
5
|
+
Output: a JSON array of clusters. Each cluster:
|
|
6
|
+
{
|
|
7
|
+
"description": "<canonical phrasing>",
|
|
8
|
+
"category": "<most common category>",
|
|
9
|
+
"severity": "<highest severity in cluster>",
|
|
10
|
+
"supporting_agents": ["<agent_id>", ...],
|
|
11
|
+
"member_indices": [<int>, ...]
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
Two findings belong in the same cluster when they identify the same defect, even if
|
|
15
|
+
worded differently. Be strict — do not merge unrelated findings just because they share a category.
|
|
16
|
+
Return only the JSON array, no prose, no markdown.`;
|
|
17
|
+
export function makeConsensusEngineNode(llm) {
|
|
18
|
+
return async (state) => {
|
|
19
|
+
const cfg = state.config;
|
|
20
|
+
const consCfg = cfg.consensus ?? {};
|
|
21
|
+
const engineCfg = consCfg.engine ?? {};
|
|
22
|
+
// `consensus.threshold` is the documented key; `engine.confirm_threshold` is
|
|
23
|
+
// an older alias. Honor either (threshold wins) before falling back.
|
|
24
|
+
const threshold = consCfg.threshold ?? engineCfg.confirm_threshold ?? 0.5;
|
|
25
|
+
const participants = (consCfg.participants ?? cfg.agents.map((a) => a.id));
|
|
26
|
+
const n = participants.length;
|
|
27
|
+
const flat = [];
|
|
28
|
+
for (const run of state.runs ?? []) {
|
|
29
|
+
if (run.role !== "consensus-participant" || !participants.includes(run.agent_id))
|
|
30
|
+
continue;
|
|
31
|
+
for (const f of parseFindings(run.stdout)) {
|
|
32
|
+
if (f && typeof f === "object" && typeof f.description === "string") {
|
|
33
|
+
flat.push({ ...f, agent_id: run.agent_id });
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
if (flat.length === 0) {
|
|
38
|
+
return {
|
|
39
|
+
mode_state: { ...(state.mode_state ?? {}), consensus_findings: [] },
|
|
40
|
+
status: "done",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
let clusters;
|
|
44
|
+
if (!llm) {
|
|
45
|
+
clusters = flat.map((f, i) => ({
|
|
46
|
+
description: f.description,
|
|
47
|
+
category: f.category ?? "unknown",
|
|
48
|
+
severity: f.severity ?? "low",
|
|
49
|
+
supporting_agents: [f.agent_id],
|
|
50
|
+
member_indices: [i],
|
|
51
|
+
}));
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
const raw = await llm.chat(ENGINE_SYSTEM, [
|
|
55
|
+
{ role: "user", content: `Cluster these findings:\n${JSON.stringify(flat)}` },
|
|
56
|
+
]);
|
|
57
|
+
const parsed = parseJsonLoose(raw);
|
|
58
|
+
if (Array.isArray(parsed)) {
|
|
59
|
+
clusters = parsed;
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
clusters = flat.map((f, i) => ({
|
|
63
|
+
description: f.description,
|
|
64
|
+
category: f.category ?? "unknown",
|
|
65
|
+
severity: f.severity ?? "low",
|
|
66
|
+
supporting_agents: [f.agent_id],
|
|
67
|
+
member_indices: [i],
|
|
68
|
+
}));
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
const consensus = clusters.map((c) => {
|
|
72
|
+
// Intersect with the real participant set: a hallucinated/duplicate agent id
|
|
73
|
+
// must not inflate agreement (ratio could otherwise exceed 1.0).
|
|
74
|
+
const supporters = [...new Set(c.supporting_agents ?? [])].filter((s) => participants.includes(s));
|
|
75
|
+
const ratio = supporters.length / Math.max(n, 1);
|
|
76
|
+
const label = supporters.length <= 1 ? "unverified" : ratio >= threshold ? "confirmed" : "needs-verification";
|
|
77
|
+
return {
|
|
78
|
+
description: c.description ?? "",
|
|
79
|
+
category: c.category ?? "unknown",
|
|
80
|
+
severity: c.severity ?? "low",
|
|
81
|
+
agreement_count: supporters.length,
|
|
82
|
+
agreement_ratio: ratio,
|
|
83
|
+
participant_count: n,
|
|
84
|
+
label,
|
|
85
|
+
supporting_agents: supporters,
|
|
86
|
+
};
|
|
87
|
+
});
|
|
88
|
+
return {
|
|
89
|
+
mode_state: { ...(state.mode_state ?? {}), consensus_findings: consensus },
|
|
90
|
+
status: "done",
|
|
91
|
+
};
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
//# sourceMappingURL=consensusEngine.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"consensusEngine.js","sourceRoot":"","sources":["../../src/nodes/consensusEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGjD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,aAAa,GAAG;;;;;;;;;;;;;mDAa6B,CAAC;AAEpD,MAAM,UAAU,uBAAuB,CAAC,GAAqB;IAC3D,OAAO,KAAK,EAAE,KAAmB,EAAE,EAAE;QACnC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;QACzB,MAAM,OAAO,GAAG,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,IAAI,EAAE,CAAC;QACvC,6EAA6E;QAC7E,qEAAqE;QACrE,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,SAAS,CAAC,iBAAiB,IAAI,GAAG,CAAC;QAC1E,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,YAAY,IAAK,GAAG,CAAC,MAAgB,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAa,CAAC;QACvG,MAAM,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC;QAE9B,MAAM,IAAI,GAAU,EAAE,CAAC;QACvB,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,uBAAuB,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC;gBAAE,SAAS;YAC3F,KAAK,MAAM,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1C,IAAI,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;oBACpE,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAC9C,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO;gBACL,UAAU,EAAE,EAAE,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,kBAAkB,EAAE,EAAE,EAAE;gBACnE,MAAM,EAAE,MAAe;aACxB,CAAC;QACJ,CAAC;QAED,IAAI,QAAe,CAAC;QACpB,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7B,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,QAAQ,EAAE,CAAC,CAAC,QAAQ,IAAI,SAAS;gBACjC,QAAQ,EAAE,CAAC,CAAC,QAAQ,IAAI,KAAK;gBAC7B,iBAAiB,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC/B,cAAc,EAAE,CAAC,CAAC,CAAC;aACpB,CAAC,CAAC,CAAC;QACN,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,aAAa,EAAE;gBACxC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,4BAA4B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,EAAE;aAC9E,CAAC,CAAC;YACH,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;YACnC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1B,QAAQ,GAAG,MAAM,CAAC;YACpB,CAAC;iBAAM,CAAC;gBACN,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC7B,WAAW,EAAE,CAAC,CAAC,WAAW;oBAC1B,QAAQ,EAAE,CAAC,CAAC,QAAQ,IAAI,SAAS;oBACjC,QAAQ,EAAE,CAAC,CAAC,QAAQ,IAAI,KAAK;oBAC7B,iBAAiB,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;oBAC/B,cAAc,EAAE,CAAC,CAAC,CAAC;iBACpB,CAAC,CAAC,CAAC;YACN,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACnC,6EAA6E;YAC7E,iEAAiE;YACjE,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAS,CAAC,CAAC,iBAAiB,IAAI,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC9E,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CACzB,CAAC;YACF,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACjD,MAAM,KAAK,GACT,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,IAAI,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,oBAAoB,CAAC;YAClG,OAAO;gBACL,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,EAAE;gBAChC,QAAQ,EAAE,CAAC,CAAC,QAAQ,IAAI,SAAS;gBACjC,QAAQ,EAAE,CAAC,CAAC,QAAQ,IAAI,KAAK;gBAC7B,eAAe,EAAE,UAAU,CAAC,MAAM;gBAClC,eAAe,EAAE,KAAK;gBACtB,iBAAiB,EAAE,CAAC;gBACpB,KAAK;gBACL,iBAAiB,EAAE,UAAU;aAC9B,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,OAAO;YACL,UAAU,EAAE,EAAE,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,kBAAkB,EAAE,SAAS,EAAE;YAC1E,MAAM,EAAE,MAAe;SACxB,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { LLMClient } from "../core/llm/base.js";
|
|
2
|
+
import type { CoterieState } from "../core/state.js";
|
|
3
|
+
export declare function makeModeratorNode(llm: LLMClient | null): (state: CoterieState) => Promise<{
|
|
4
|
+
mode_state: {
|
|
5
|
+
[x: string]: any;
|
|
6
|
+
};
|
|
7
|
+
}>;
|
|
8
|
+
export declare function makeDebateJudgeNode(llm: LLMClient | null): (state: CoterieState) => Promise<{
|
|
9
|
+
mode_state: {
|
|
10
|
+
debate_verdict: any;
|
|
11
|
+
};
|
|
12
|
+
judge_history: {
|
|
13
|
+
step: number;
|
|
14
|
+
winner: string;
|
|
15
|
+
reason: any;
|
|
16
|
+
scores: any;
|
|
17
|
+
}[];
|
|
18
|
+
status: "done";
|
|
19
|
+
}>;
|
|
20
|
+
//# sourceMappingURL=moderator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"moderator.d.ts","sourceRoot":"","sources":["../../src/nodes/moderator.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAuBrD,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,SAAS,GAAG,IAAI,IACvC,OAAO,YAAY;;;;GAsBlC;AAED,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,SAAS,GAAG,IAAI,IACzC,OAAO,YAAY;;;;;;;;;;;GAsClC"}
|