@llm-jury/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +378 -0
- package/dist/calibration/index.d.ts +1 -0
- package/dist/calibration/index.js +1 -0
- package/dist/calibration/optimizer.d.ts +26 -0
- package/dist/calibration/optimizer.js +61 -0
- package/dist/classifiers/base.d.ts +11 -0
- package/dist/classifiers/base.js +7 -0
- package/dist/classifiers/functionAdapter.d.ts +8 -0
- package/dist/classifiers/functionAdapter.js +20 -0
- package/dist/classifiers/huggingFaceAdapter.d.ts +20 -0
- package/dist/classifiers/huggingFaceAdapter.js +52 -0
- package/dist/classifiers/index.d.ts +5 -0
- package/dist/classifiers/index.js +5 -0
- package/dist/classifiers/llmClassifier.d.ts +19 -0
- package/dist/classifiers/llmClassifier.js +47 -0
- package/dist/classifiers/sklearnAdapter.d.ts +14 -0
- package/dist/classifiers/sklearnAdapter.js +29 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +1 -0
- package/dist/cli/main.d.ts +4 -0
- package/dist/cli/main.js +261 -0
- package/dist/debate/engine.d.ts +48 -0
- package/dist/debate/engine.js +309 -0
- package/dist/debate/index.d.ts +1 -0
- package/dist/debate/index.js +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +7 -0
- package/dist/judges/base.d.ts +16 -0
- package/dist/judges/base.js +1 -0
- package/dist/judges/bayesian.d.ts +8 -0
- package/dist/judges/bayesian.js +52 -0
- package/dist/judges/index.d.ts +5 -0
- package/dist/judges/index.js +5 -0
- package/dist/judges/llmJudge.d.ts +19 -0
- package/dist/judges/llmJudge.js +86 -0
- package/dist/judges/majorityVote.d.ts +5 -0
- package/dist/judges/majorityVote.js +45 -0
- package/dist/judges/weightedVote.d.ts +5 -0
- package/dist/judges/weightedVote.js +42 -0
- package/dist/jury/core.d.ts +43 -0
- package/dist/jury/core.js +113 -0
- package/dist/jury/index.d.ts +1 -0
- package/dist/jury/index.js +1 -0
- package/dist/llm/client.d.ts +23 -0
- package/dist/llm/client.js +85 -0
- package/dist/llm/index.d.ts +1 -0
- package/dist/llm/index.js +1 -0
- package/dist/personas/base.d.ts +19 -0
- package/dist/personas/base.js +1 -0
- package/dist/personas/index.d.ts +2 -0
- package/dist/personas/index.js +2 -0
- package/dist/personas/registry.d.ts +8 -0
- package/dist/personas/registry.js +83 -0
- package/dist/utils.d.ts +2 -0
- package/dist/utils.js +23 -0
- package/package.json +43 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ClassificationResult, Classifier } from "./base.ts";
|
|
2
|
+
export type SklearnLikeModel = {
|
|
3
|
+
predictProba(features: unknown): number[][] | Promise<number[][]>;
|
|
4
|
+
};
|
|
5
|
+
export type VectorizerLike = {
|
|
6
|
+
transform(texts: string[]): unknown;
|
|
7
|
+
};
|
|
8
|
+
export declare class SklearnClassifier implements Classifier {
|
|
9
|
+
labels: string[];
|
|
10
|
+
private model;
|
|
11
|
+
private vectorizer?;
|
|
12
|
+
constructor(model: SklearnLikeModel, labels: string[], vectorizer?: VectorizerLike);
|
|
13
|
+
classify(text: string): Promise<ClassificationResult>;
|
|
14
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export class SklearnClassifier {
|
|
2
|
+
labels;
|
|
3
|
+
model;
|
|
4
|
+
vectorizer;
|
|
5
|
+
constructor(model, labels, vectorizer) {
|
|
6
|
+
this.model = model;
|
|
7
|
+
this.labels = labels;
|
|
8
|
+
this.vectorizer = vectorizer;
|
|
9
|
+
}
|
|
10
|
+
async classify(text) {
|
|
11
|
+
const features = this.vectorizer ? this.vectorizer.transform([text]) : [text];
|
|
12
|
+
const probabilities = await this.model.predictProba(features);
|
|
13
|
+
const row = probabilities[0] ?? [];
|
|
14
|
+
if (row.length === 0) {
|
|
15
|
+
throw new Error("predictProba returned no probabilities for input");
|
|
16
|
+
}
|
|
17
|
+
let bestIndex = 0;
|
|
18
|
+
for (let i = 1; i < row.length; i += 1) {
|
|
19
|
+
if (row[i] > row[bestIndex]) {
|
|
20
|
+
bestIndex = i;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return {
|
|
24
|
+
label: this.labels[bestIndex] ?? String(bestIndex),
|
|
25
|
+
confidence: Number(row[bestIndex] ?? 0),
|
|
26
|
+
rawOutput: row,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./main.ts";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./main.js";
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
export declare function parseLabels(raw: string | null, fallback?: string[]): string[];
|
|
3
|
+
export declare function resolveCalibrationLabels(rawLabels: string | null, expectedLabels: string[]): string[];
|
|
4
|
+
export declare function main(argv?: string[]): Promise<number>;
|
package/dist/cli/main.js
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { ThresholdCalibrator } from "../calibration/optimizer.js";
|
|
4
|
+
import { FunctionClassifier } from "../classifiers/functionAdapter.js";
|
|
5
|
+
import { HuggingFaceClassifier } from "../classifiers/huggingFaceAdapter.js";
|
|
6
|
+
import { LLMClassifier } from "../classifiers/llmClassifier.js";
|
|
7
|
+
import { DebateConfig, DebateMode } from "../debate/engine.js";
|
|
8
|
+
import { BayesianJudge } from "../judges/bayesian.js";
|
|
9
|
+
import { LLMJudge } from "../judges/llmJudge.js";
|
|
10
|
+
import { MajorityVoteJudge } from "../judges/majorityVote.js";
|
|
11
|
+
import { WeightedVoteJudge } from "../judges/weightedVote.js";
|
|
12
|
+
import { Jury } from "../jury/core.js";
|
|
13
|
+
import { PersonaRegistry } from "../personas/registry.js";
|
|
14
|
+
function parseArg(argv, name) {
|
|
15
|
+
const idx = argv.indexOf(name);
|
|
16
|
+
if (idx === -1 || idx + 1 >= argv.length) {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
return argv[idx + 1] ?? null;
|
|
20
|
+
}
|
|
21
|
+
function parseBoolFlag(argv, name) {
|
|
22
|
+
return argv.includes(name);
|
|
23
|
+
}
|
|
24
|
+
function readJsonl(path) {
|
|
25
|
+
const lines = readFileSync(path, "utf8")
|
|
26
|
+
.split("\n")
|
|
27
|
+
.map((line) => line.trim())
|
|
28
|
+
.filter(Boolean);
|
|
29
|
+
return lines.map((line) => JSON.parse(line));
|
|
30
|
+
}
|
|
31
|
+
function writeJsonl(path, rows) {
|
|
32
|
+
writeFileSync(path, rows.map((row) => JSON.stringify(row)).join("\n") + "\n", "utf8");
|
|
33
|
+
}
|
|
34
|
+
function toSnakeCaseObject(value) {
|
|
35
|
+
if (Array.isArray(value)) {
|
|
36
|
+
return value.map((item) => toSnakeCaseObject(item));
|
|
37
|
+
}
|
|
38
|
+
if (value && typeof value === "object") {
|
|
39
|
+
const out = {};
|
|
40
|
+
for (const [key, entry] of Object.entries(value)) {
|
|
41
|
+
const snake = key.replace(/[A-Z]/g, (char) => `_${char.toLowerCase()}`);
|
|
42
|
+
out[snake] = toSnakeCaseObject(entry);
|
|
43
|
+
}
|
|
44
|
+
return out;
|
|
45
|
+
}
|
|
46
|
+
return value;
|
|
47
|
+
}
|
|
48
|
+
export function parseLabels(raw, fallback = ["safe", "unsafe"]) {
|
|
49
|
+
if (!raw) {
|
|
50
|
+
return fallback;
|
|
51
|
+
}
|
|
52
|
+
const labels = raw
|
|
53
|
+
.split(",")
|
|
54
|
+
.map((label) => label.trim())
|
|
55
|
+
.filter(Boolean);
|
|
56
|
+
return labels.length > 0 ? labels : fallback;
|
|
57
|
+
}
|
|
58
|
+
export function resolveCalibrationLabels(rawLabels, expectedLabels) {
|
|
59
|
+
return parseLabels(rawLabels, Array.from(new Set(expectedLabels)));
|
|
60
|
+
}
|
|
61
|
+
function selectPersonas(name) {
|
|
62
|
+
switch (name.toLowerCase()) {
|
|
63
|
+
case "content_moderation":
|
|
64
|
+
return PersonaRegistry.contentModeration();
|
|
65
|
+
case "legal_compliance":
|
|
66
|
+
return PersonaRegistry.legalCompliance();
|
|
67
|
+
case "medical_triage":
|
|
68
|
+
return PersonaRegistry.medicalTriage();
|
|
69
|
+
case "financial_compliance":
|
|
70
|
+
return PersonaRegistry.financialCompliance();
|
|
71
|
+
default:
|
|
72
|
+
throw new Error(`Unsupported personas set: ${name}`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
function applyPersonaModel(personas, model) {
|
|
76
|
+
if (!model) {
|
|
77
|
+
return personas;
|
|
78
|
+
}
|
|
79
|
+
return personas.map((persona) => ({ ...persona, model }));
|
|
80
|
+
}
|
|
81
|
+
function selectJudge(name, model) {
|
|
82
|
+
switch (name.toLowerCase()) {
|
|
83
|
+
case "llm":
|
|
84
|
+
return new LLMJudge({ model: model ?? "gpt-5-mini" });
|
|
85
|
+
case "majority":
|
|
86
|
+
return new MajorityVoteJudge();
|
|
87
|
+
case "weighted":
|
|
88
|
+
return new WeightedVoteJudge();
|
|
89
|
+
case "bayesian":
|
|
90
|
+
return new BayesianJudge();
|
|
91
|
+
default:
|
|
92
|
+
throw new Error(`Unsupported judge strategy: ${name}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
function buildDebateConfig(argv) {
|
|
96
|
+
const rawMode = parseArg(argv, "--debate-mode") ?? DebateMode.INDEPENDENT;
|
|
97
|
+
const modeValues = Object.values(DebateMode);
|
|
98
|
+
if (!modeValues.includes(rawMode)) {
|
|
99
|
+
throw new Error(`Unsupported debate mode: ${rawMode}`);
|
|
100
|
+
}
|
|
101
|
+
const maxRounds = Number(parseArg(argv, "--max-rounds") ?? "1");
|
|
102
|
+
return new DebateConfig({
|
|
103
|
+
mode: rawMode,
|
|
104
|
+
maxRounds,
|
|
105
|
+
includePrimaryResult: !parseBoolFlag(argv, "--hide-primary-result"),
|
|
106
|
+
includeConfidence: !parseBoolFlag(argv, "--hide-confidence"),
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
function buildClassifier(classifierSpec, labels, rows) {
|
|
110
|
+
if (classifierSpec === "function") {
|
|
111
|
+
const predictionMap = new Map();
|
|
112
|
+
rows.forEach((row, idx) => {
|
|
113
|
+
const text = String(row.text ?? `row-${idx}`);
|
|
114
|
+
const predictedLabel = String(row.predicted_label ?? row.label ?? labels[0] ?? "unknown");
|
|
115
|
+
const predictedConfidence = Number(row.predicted_confidence ?? 0.95);
|
|
116
|
+
predictionMap.set(text, [predictedLabel, predictedConfidence]);
|
|
117
|
+
});
|
|
118
|
+
return {
|
|
119
|
+
classifier: new FunctionClassifier((text) => predictionMap.get(text) ?? [labels[0] ?? "unknown", 0.95], labels),
|
|
120
|
+
isMockClassifier: true,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
if (classifierSpec.startsWith("llm:")) {
|
|
124
|
+
const model = classifierSpec.slice("llm:".length).trim();
|
|
125
|
+
if (!model) {
|
|
126
|
+
throw new Error("classifier spec 'llm:' requires a model name");
|
|
127
|
+
}
|
|
128
|
+
return {
|
|
129
|
+
classifier: new LLMClassifier({ model, labels }),
|
|
130
|
+
isMockClassifier: false,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
if (classifierSpec.startsWith("huggingface:")) {
|
|
134
|
+
const modelName = classifierSpec.slice("huggingface:".length).trim();
|
|
135
|
+
if (!modelName) {
|
|
136
|
+
throw new Error("classifier spec 'huggingface:' requires a model name");
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
classifier: new HuggingFaceClassifier({ modelName }),
|
|
140
|
+
isMockClassifier: false,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
throw new Error("Unsupported classifier spec. Use: function, llm:<model>, huggingface:<model>");
|
|
144
|
+
}
|
|
145
|
+
function usageText() {
|
|
146
|
+
return [
|
|
147
|
+
"Usage: llm-jury <command> [options]",
|
|
148
|
+
"",
|
|
149
|
+
"Commands:",
|
|
150
|
+
" classify Classify JSONL inputs and write verdicts JSONL",
|
|
151
|
+
" calibrate Calibrate threshold from labeled JSONL",
|
|
152
|
+
"",
|
|
153
|
+
"Common options:",
|
|
154
|
+
" --classifier function|llm:<model>|huggingface:<model>",
|
|
155
|
+
" --personas content_moderation|legal_compliance|medical_triage|financial_compliance",
|
|
156
|
+
" --judge llm|majority|weighted|bayesian",
|
|
157
|
+
" --labels safe,unsafe",
|
|
158
|
+
" --debate-mode independent|sequential|deliberation|adversarial",
|
|
159
|
+
"",
|
|
160
|
+
"Examples:",
|
|
161
|
+
" llm-jury classify --input input.jsonl --output verdicts.jsonl --classifier function --judge majority",
|
|
162
|
+
" llm-jury calibrate --input calibration.jsonl --classifier function --judge majority",
|
|
163
|
+
].join("\n");
|
|
164
|
+
}
|
|
165
|
+
export async function main(argv = process.argv.slice(2)) {
|
|
166
|
+
if (argv.length === 0 || argv.includes("--help") || argv.includes("-h")) {
|
|
167
|
+
process.stdout.write(`${usageText()}\n`);
|
|
168
|
+
return 0;
|
|
169
|
+
}
|
|
170
|
+
if (argv.includes("--version") || argv.includes("-v")) {
|
|
171
|
+
process.stdout.write("0.1.0\n");
|
|
172
|
+
return 0;
|
|
173
|
+
}
|
|
174
|
+
const command = argv[0];
|
|
175
|
+
const classifierSpec = parseArg(argv, "--classifier") ?? "function";
|
|
176
|
+
const personasKey = parseArg(argv, "--personas") ?? "content_moderation";
|
|
177
|
+
const judgeKey = parseArg(argv, "--judge") ?? "llm";
|
|
178
|
+
const judgeModel = parseArg(argv, "--judge-model") ?? "gpt-5-mini";
|
|
179
|
+
const personaModel = parseArg(argv, "--persona-model") ?? "gpt-5-mini";
|
|
180
|
+
const rawLabels = parseArg(argv, "--labels");
|
|
181
|
+
const labels = parseLabels(rawLabels, ["safe", "unsafe"]);
|
|
182
|
+
const debateConfig = buildDebateConfig(argv);
|
|
183
|
+
const debateConcurrency = Number(parseArg(argv, "--debate-concurrency") ?? "5");
|
|
184
|
+
const maxDebateCostRaw = parseArg(argv, "--max-debate-cost");
|
|
185
|
+
const maxDebateCostUsd = maxDebateCostRaw == null ? undefined : Number(maxDebateCostRaw);
|
|
186
|
+
if (command === "classify") {
|
|
187
|
+
const input = parseArg(argv, "--input");
|
|
188
|
+
const output = parseArg(argv, "--output");
|
|
189
|
+
const thresholdValue = parseArg(argv, "--threshold");
|
|
190
|
+
const concurrency = Number(parseArg(argv, "--concurrency") ?? "10");
|
|
191
|
+
if (!input || !output) {
|
|
192
|
+
throw new Error("--input and --output are required");
|
|
193
|
+
}
|
|
194
|
+
const threshold = thresholdValue ? Number(thresholdValue) : 0.7;
|
|
195
|
+
const rows = readJsonl(input);
|
|
196
|
+
const texts = rows.map((row, idx) => String(row.text ?? `row-${idx}`));
|
|
197
|
+
const { classifier, isMockClassifier } = buildClassifier(classifierSpec, labels, rows);
|
|
198
|
+
const jury = new Jury({
|
|
199
|
+
classifier,
|
|
200
|
+
personas: applyPersonaModel(selectPersonas(personasKey), personaModel),
|
|
201
|
+
confidenceThreshold: threshold,
|
|
202
|
+
judge: selectJudge(judgeKey, judgeModel),
|
|
203
|
+
debateConfig,
|
|
204
|
+
debateConcurrency,
|
|
205
|
+
maxDebateCostUsd,
|
|
206
|
+
});
|
|
207
|
+
const verdicts = await jury.classifyBatch(texts, isMockClassifier ? 1 : concurrency);
|
|
208
|
+
writeJsonl(output, verdicts.map((verdict) => toSnakeCaseObject(verdict)));
|
|
209
|
+
return 0;
|
|
210
|
+
}
|
|
211
|
+
if (command === "calibrate") {
|
|
212
|
+
const input = parseArg(argv, "--input");
|
|
213
|
+
const errorCost = Number(parseArg(argv, "--error-cost") ?? "10");
|
|
214
|
+
const escalationCost = Number(parseArg(argv, "--escalation-cost") ?? "0.05");
|
|
215
|
+
const initialThreshold = Number(parseArg(argv, "--initial-threshold") ?? "0.7");
|
|
216
|
+
if (!input) {
|
|
217
|
+
throw new Error("--input is required");
|
|
218
|
+
}
|
|
219
|
+
const rows = readJsonl(input);
|
|
220
|
+
if (rows.length === 0) {
|
|
221
|
+
throw new Error("input jsonl is empty");
|
|
222
|
+
}
|
|
223
|
+
const missingLabels = rows.filter((row) => row.label == null).length;
|
|
224
|
+
if (missingLabels > 0) {
|
|
225
|
+
throw new Error(`Calibration input requires a ground-truth 'label' field on every row. Missing labels in ${missingLabels} row(s).`);
|
|
226
|
+
}
|
|
227
|
+
const texts = rows.map((row, idx) => String(row.text ?? `row-${idx}`));
|
|
228
|
+
const expectedLabels = rows.map((row) => String(row.label));
|
|
229
|
+
const inferenceLabels = resolveCalibrationLabels(rawLabels, expectedLabels);
|
|
230
|
+
const { classifier } = buildClassifier(classifierSpec, inferenceLabels, rows);
|
|
231
|
+
const jury = new Jury({
|
|
232
|
+
classifier,
|
|
233
|
+
personas: applyPersonaModel(selectPersonas(personasKey), personaModel),
|
|
234
|
+
confidenceThreshold: initialThreshold,
|
|
235
|
+
judge: selectJudge(judgeKey, judgeModel),
|
|
236
|
+
debateConfig,
|
|
237
|
+
debateConcurrency,
|
|
238
|
+
maxDebateCostUsd,
|
|
239
|
+
});
|
|
240
|
+
const calibrator = new ThresholdCalibrator(jury);
|
|
241
|
+
const bestThreshold = await calibrator.calibrate({
|
|
242
|
+
texts,
|
|
243
|
+
labels: expectedLabels,
|
|
244
|
+
errorCost,
|
|
245
|
+
escalationCost,
|
|
246
|
+
});
|
|
247
|
+
const report = calibrator.calibrationReport();
|
|
248
|
+
report.bestThreshold = bestThreshold;
|
|
249
|
+
process.stdout.write(`${JSON.stringify(toSnakeCaseObject(report))}\n`);
|
|
250
|
+
return 0;
|
|
251
|
+
}
|
|
252
|
+
throw new Error("Supported commands: classify, calibrate");
|
|
253
|
+
}
|
|
254
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
255
|
+
main().then((code) => {
|
|
256
|
+
process.exitCode = code;
|
|
257
|
+
}, (err) => {
|
|
258
|
+
console.error(err);
|
|
259
|
+
process.exitCode = 1;
|
|
260
|
+
});
|
|
261
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import type { ClassificationResult } from "../classifiers/base.ts";
|
|
2
|
+
import type { LLMClient } from "../llm/client.ts";
|
|
3
|
+
import type { Persona, PersonaResponse } from "../personas/base.ts";
|
|
4
|
+
export declare const DebateMode: {
|
|
5
|
+
readonly INDEPENDENT: "independent";
|
|
6
|
+
readonly SEQUENTIAL: "sequential";
|
|
7
|
+
readonly DELIBERATION: "deliberation";
|
|
8
|
+
readonly ADVERSARIAL: "adversarial";
|
|
9
|
+
};
|
|
10
|
+
export type DebateMode = (typeof DebateMode)[keyof typeof DebateMode];
|
|
11
|
+
export type DebateTranscript = {
|
|
12
|
+
inputText: string;
|
|
13
|
+
primaryResult: ClassificationResult;
|
|
14
|
+
rounds: PersonaResponse[][];
|
|
15
|
+
summary?: string;
|
|
16
|
+
durationMs: number;
|
|
17
|
+
totalTokens: number;
|
|
18
|
+
totalCostUsd: number | null;
|
|
19
|
+
};
|
|
20
|
+
export declare class DebateConfig {
|
|
21
|
+
mode: DebateMode;
|
|
22
|
+
maxRounds: number;
|
|
23
|
+
includePrimaryResult: boolean;
|
|
24
|
+
includeConfidence: boolean;
|
|
25
|
+
constructor(options?: Partial<DebateConfig>);
|
|
26
|
+
}
|
|
27
|
+
export declare class DebateEngine {
|
|
28
|
+
private personas;
|
|
29
|
+
private config;
|
|
30
|
+
private llmClient;
|
|
31
|
+
private concurrency;
|
|
32
|
+
constructor(personas: Persona[], config?: DebateConfig, llmClient?: LLMClient, concurrency?: number);
|
|
33
|
+
static jsonResponseBlock(): string;
|
|
34
|
+
debate(text: string, primaryResult: ClassificationResult, labels: string[], maxCostUsd?: number | null): Promise<DebateTranscript>;
|
|
35
|
+
runRound(text: string, primaryResult: ClassificationResult, labels: string[], priorRounds: PersonaResponse[][]): Promise<PersonaResponse[]>;
|
|
36
|
+
runDeliberationRound(text: string, primaryResult: ClassificationResult, labels: string[], priorRounds: PersonaResponse[][]): Promise<PersonaResponse[]>;
|
|
37
|
+
queryPersona(persona: Persona, text: string, primaryResult: ClassificationResult, labels: string[], priorRounds: PersonaResponse[][]): Promise<PersonaResponse>;
|
|
38
|
+
queryPersonaDeliberation(persona: Persona, text: string, primaryResult: ClassificationResult, labels: string[], priorRounds: PersonaResponse[][]): Promise<PersonaResponse>;
|
|
39
|
+
buildPersonaPrompt(persona: Persona, text: string, primary: ClassificationResult, labels: string[], priorRounds: PersonaResponse[][]): string;
|
|
40
|
+
buildDeliberationPrompt(persona: Persona, text: string, primary: ClassificationResult, labels: string[], priorRounds: PersonaResponse[][]): string;
|
|
41
|
+
summarise(text: string, labels: string[], rounds: PersonaResponse[][]): Promise<{
|
|
42
|
+
summary: string;
|
|
43
|
+
tokens: number;
|
|
44
|
+
cost: number;
|
|
45
|
+
}>;
|
|
46
|
+
parsePersonaResponse(raw: string, personaName: string): PersonaResponse;
|
|
47
|
+
consensusReached(roundResponses: PersonaResponse[]): boolean;
|
|
48
|
+
}
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
import { LiteLLMClient } from "../llm/client.js";
|
|
2
|
+
import { stripMarkdown } from "../utils.js";
|
|
3
|
+
const SUMMARISATION_PROMPT = "You are a neutral summarisation agent. You have observed a structured debate " +
|
|
4
|
+
"among domain experts about classifying a piece of text.\n\n" +
|
|
5
|
+
"Produce a concise synthesis that covers:\n" +
|
|
6
|
+
"1. The main arguments from each side\n" +
|
|
7
|
+
"2. Points of consensus among the experts\n" +
|
|
8
|
+
"3. Unresolved disagreements\n\n" +
|
|
9
|
+
"Be factual and impartial. Do not add your own classification.";
|
|
10
|
+
const DELIBERATION_INSTRUCTIONS = "You have seen the initial assessments from all experts on this input. " +
|
|
11
|
+
"You MUST:\n" +
|
|
12
|
+
"(i) Engage with at least one other expert's reasoning — agree or disagree " +
|
|
13
|
+
"with supporting rationale.\n" +
|
|
14
|
+
"(ii) Revise your own classification if you find their counter-arguments compelling.\n" +
|
|
15
|
+
"(iii) Re-evaluate the input through the interpretive lens of at least one other expert's " +
|
|
16
|
+
"perspective, considering both intent and impact.\n\n" +
|
|
17
|
+
"Then provide your revised assessment.";
|
|
18
|
+
export const DebateMode = {
|
|
19
|
+
INDEPENDENT: "independent",
|
|
20
|
+
SEQUENTIAL: "sequential",
|
|
21
|
+
DELIBERATION: "deliberation",
|
|
22
|
+
ADVERSARIAL: "adversarial",
|
|
23
|
+
};
|
|
24
|
+
export class DebateConfig {
|
|
25
|
+
mode;
|
|
26
|
+
maxRounds;
|
|
27
|
+
includePrimaryResult;
|
|
28
|
+
includeConfidence;
|
|
29
|
+
constructor(options = {}) {
|
|
30
|
+
this.mode = options.mode ?? DebateMode.DELIBERATION;
|
|
31
|
+
this.maxRounds = options.maxRounds ?? 2;
|
|
32
|
+
this.includePrimaryResult = options.includePrimaryResult ?? true;
|
|
33
|
+
this.includeConfidence = options.includeConfidence ?? true;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
export class DebateEngine {
|
|
37
|
+
personas;
|
|
38
|
+
config;
|
|
39
|
+
llmClient;
|
|
40
|
+
concurrency;
|
|
41
|
+
constructor(personas, config = new DebateConfig(), llmClient = new LiteLLMClient(), concurrency = 5) {
|
|
42
|
+
this.personas = personas;
|
|
43
|
+
this.config = config;
|
|
44
|
+
this.llmClient = llmClient;
|
|
45
|
+
this.concurrency = Math.max(1, concurrency);
|
|
46
|
+
}
|
|
47
|
+
static jsonResponseBlock() {
|
|
48
|
+
return ("\n## Your Assessment\n\n" +
|
|
49
|
+
"Provide your classification. Respond ONLY with valid JSON:\n" +
|
|
50
|
+
"```json\n" +
|
|
51
|
+
"{\n" +
|
|
52
|
+
' "label": "<your classification>",\n' +
|
|
53
|
+
' "confidence": <0.0-1.0>,\n' +
|
|
54
|
+
' "reasoning": "<your full reasoning>",\n' +
|
|
55
|
+
' "key_factors": ["<factor 1>", "<factor 2>"],\n' +
|
|
56
|
+
' "dissent_notes": "<optional rebuttal against opposing side>"\n' +
|
|
57
|
+
"}\n" +
|
|
58
|
+
"```");
|
|
59
|
+
}
|
|
60
|
+
async debate(text, primaryResult, labels, maxCostUsd = null) {
|
|
61
|
+
const start = Date.now();
|
|
62
|
+
const rounds = [];
|
|
63
|
+
let totalTokens = 0;
|
|
64
|
+
let totalCostUsd = 0;
|
|
65
|
+
if (this.personas.length === 0) {
|
|
66
|
+
return {
|
|
67
|
+
inputText: text,
|
|
68
|
+
primaryResult,
|
|
69
|
+
rounds: [],
|
|
70
|
+
durationMs: Date.now() - start,
|
|
71
|
+
totalTokens: 0,
|
|
72
|
+
totalCostUsd: 0,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
if (this.config.mode === DebateMode.INDEPENDENT || this.config.mode === DebateMode.ADVERSARIAL) {
|
|
76
|
+
const responses = await this.runRound(text, primaryResult, labels, []);
|
|
77
|
+
rounds.push(responses);
|
|
78
|
+
responses.forEach((response) => {
|
|
79
|
+
totalTokens += Number(response.tokensUsed ?? 0);
|
|
80
|
+
totalCostUsd += Number(response.costUsd ?? 0);
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
else if (this.config.mode === DebateMode.SEQUENTIAL) {
|
|
84
|
+
const responses = [];
|
|
85
|
+
for (const persona of this.personas) {
|
|
86
|
+
const response = await this.queryPersona(persona, text, primaryResult, labels, responses.length > 0 ? [responses] : []);
|
|
87
|
+
responses.push(response);
|
|
88
|
+
totalTokens += Number(response.tokensUsed ?? 0);
|
|
89
|
+
totalCostUsd += Number(response.costUsd ?? 0);
|
|
90
|
+
if (maxCostUsd != null && totalCostUsd > maxCostUsd) {
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
rounds.push(responses);
|
|
95
|
+
}
|
|
96
|
+
else if (this.config.mode === DebateMode.DELIBERATION) {
|
|
97
|
+
const firstRound = await this.runRound(text, primaryResult, labels, []);
|
|
98
|
+
rounds.push(firstRound);
|
|
99
|
+
firstRound.forEach((response) => {
|
|
100
|
+
totalTokens += Number(response.tokensUsed ?? 0);
|
|
101
|
+
totalCostUsd += Number(response.costUsd ?? 0);
|
|
102
|
+
});
|
|
103
|
+
if (maxCostUsd != null && totalCostUsd > maxCostUsd) {
|
|
104
|
+
return {
|
|
105
|
+
inputText: text,
|
|
106
|
+
primaryResult,
|
|
107
|
+
rounds,
|
|
108
|
+
durationMs: Date.now() - start,
|
|
109
|
+
totalTokens,
|
|
110
|
+
totalCostUsd,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
for (let i = 1; i < Math.max(1, this.config.maxRounds); i += 1) {
|
|
114
|
+
const current = await this.runDeliberationRound(text, primaryResult, labels, rounds);
|
|
115
|
+
rounds.push(current);
|
|
116
|
+
current.forEach((response) => {
|
|
117
|
+
totalTokens += Number(response.tokensUsed ?? 0);
|
|
118
|
+
totalCostUsd += Number(response.costUsd ?? 0);
|
|
119
|
+
});
|
|
120
|
+
if (maxCostUsd != null && totalCostUsd > maxCostUsd) {
|
|
121
|
+
break;
|
|
122
|
+
}
|
|
123
|
+
if (this.consensusReached(current)) {
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
let summary;
|
|
128
|
+
if (maxCostUsd == null || totalCostUsd <= maxCostUsd) {
|
|
129
|
+
const summaryResult = await this.summarise(text, labels, rounds);
|
|
130
|
+
totalTokens += summaryResult.tokens;
|
|
131
|
+
totalCostUsd += summaryResult.cost;
|
|
132
|
+
summary = summaryResult.summary;
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
inputText: text,
|
|
136
|
+
primaryResult,
|
|
137
|
+
rounds,
|
|
138
|
+
summary,
|
|
139
|
+
durationMs: Date.now() - start,
|
|
140
|
+
totalTokens,
|
|
141
|
+
totalCostUsd,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
return {
|
|
145
|
+
inputText: text,
|
|
146
|
+
primaryResult,
|
|
147
|
+
rounds,
|
|
148
|
+
durationMs: Date.now() - start,
|
|
149
|
+
totalTokens,
|
|
150
|
+
totalCostUsd,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
async runRound(text, primaryResult, labels, priorRounds) {
|
|
154
|
+
const out = [];
|
|
155
|
+
for (let i = 0; i < this.personas.length; i += this.concurrency) {
|
|
156
|
+
const batch = this.personas.slice(i, i + this.concurrency);
|
|
157
|
+
const responses = await Promise.all(batch.map((persona) => this.queryPersona(persona, text, primaryResult, labels, priorRounds)));
|
|
158
|
+
out.push(...responses);
|
|
159
|
+
}
|
|
160
|
+
return out;
|
|
161
|
+
}
|
|
162
|
+
async runDeliberationRound(text, primaryResult, labels, priorRounds) {
|
|
163
|
+
const out = [];
|
|
164
|
+
for (let i = 0; i < this.personas.length; i += this.concurrency) {
|
|
165
|
+
const batch = this.personas.slice(i, i + this.concurrency);
|
|
166
|
+
const responses = await Promise.all(batch.map((persona) => this.queryPersonaDeliberation(persona, text, primaryResult, labels, priorRounds)));
|
|
167
|
+
out.push(...responses);
|
|
168
|
+
}
|
|
169
|
+
return out;
|
|
170
|
+
}
|
|
171
|
+
async queryPersona(persona, text, primaryResult, labels, priorRounds) {
|
|
172
|
+
const prompt = this.buildPersonaPrompt(persona, text, primaryResult, labels, priorRounds);
|
|
173
|
+
const payload = await this.llmClient.complete(persona.model, persona.systemPrompt, prompt, persona.temperature);
|
|
174
|
+
const parsed = this.parsePersonaResponse(payload.content, persona.name);
|
|
175
|
+
parsed.rawResponse = payload.content;
|
|
176
|
+
parsed.tokensUsed = Number(payload.tokens ?? 0);
|
|
177
|
+
parsed.costUsd = Number(payload.costUsd ?? 0);
|
|
178
|
+
return parsed;
|
|
179
|
+
}
|
|
180
|
+
async queryPersonaDeliberation(persona, text, primaryResult, labels, priorRounds) {
|
|
181
|
+
const prompt = this.buildDeliberationPrompt(persona, text, primaryResult, labels, priorRounds);
|
|
182
|
+
const payload = await this.llmClient.complete(persona.model, persona.systemPrompt, prompt, persona.temperature);
|
|
183
|
+
const parsed = this.parsePersonaResponse(payload.content, persona.name);
|
|
184
|
+
parsed.rawResponse = payload.content;
|
|
185
|
+
parsed.tokensUsed = Number(payload.tokens ?? 0);
|
|
186
|
+
parsed.costUsd = Number(payload.costUsd ?? 0);
|
|
187
|
+
return parsed;
|
|
188
|
+
}
|
|
189
|
+
buildPersonaPrompt(persona, text, primary, labels, priorRounds) {
|
|
190
|
+
const parts = [];
|
|
191
|
+
parts.push(`## Persona\n\n${persona.name}: ${persona.role}\n`);
|
|
192
|
+
parts.push(`## Input to Classify\n\n${text}\n`);
|
|
193
|
+
parts.push(`## Available Labels\n\n${labels.join(", ")}\n`);
|
|
194
|
+
if (this.config.mode === DebateMode.ADVERSARIAL) {
|
|
195
|
+
const personaIndex = this.personas.indexOf(persona);
|
|
196
|
+
const stance = personaIndex % 2 === 0 ? "Prosecution" : "Defense";
|
|
197
|
+
parts.push("## Adversarial Role\n\n" +
|
|
198
|
+
`You are assigned the **${stance}** side. Argue this stance rigorously while staying truthful to the evidence.\n`);
|
|
199
|
+
}
|
|
200
|
+
if (this.config.includePrimaryResult) {
|
|
201
|
+
const confidence = this.config.includeConfidence ? ` (confidence: ${primary.confidence.toFixed(2)})` : "";
|
|
202
|
+
parts.push("## Primary Classifier Result\n\n" +
|
|
203
|
+
`Label: ${primary.label}${confidence}\n` +
|
|
204
|
+
"Note: This was flagged as low-confidence and escalated to you.\n");
|
|
205
|
+
}
|
|
206
|
+
if (priorRounds.length > 0) {
|
|
207
|
+
parts.push("## Previous Assessments\n");
|
|
208
|
+
priorRounds.forEach((roundResponses, idx) => {
|
|
209
|
+
parts.push(`\n### Round ${idx + 1}\n`);
|
|
210
|
+
for (const response of roundResponses) {
|
|
211
|
+
parts.push(`**${response.personaName}**: ${response.label} (confidence: ${response.confidence.toFixed(2)})\n` +
|
|
212
|
+
`Reasoning: ${response.reasoning}\n`);
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
parts.push(DebateEngine.jsonResponseBlock());
|
|
217
|
+
return parts.join("\n");
|
|
218
|
+
}
|
|
219
|
+
buildDeliberationPrompt(persona, text, primary, labels, priorRounds) {
|
|
220
|
+
const parts = [];
|
|
221
|
+
parts.push(`## Persona\n\n${persona.name}: ${persona.role}\n`);
|
|
222
|
+
parts.push(`## Input to Classify\n\n${text}\n`);
|
|
223
|
+
parts.push(`## Available Labels\n\n${labels.join(", ")}\n`);
|
|
224
|
+
if (this.config.includePrimaryResult) {
|
|
225
|
+
const confidence = this.config.includeConfidence ? ` (confidence: ${primary.confidence.toFixed(2)})` : "";
|
|
226
|
+
parts.push("## Primary Classifier Result\n\n" +
|
|
227
|
+
`Label: ${primary.label}${confidence}\n` +
|
|
228
|
+
"Note: This was flagged as low-confidence and escalated to you.\n");
|
|
229
|
+
}
|
|
230
|
+
if (priorRounds.length > 0) {
|
|
231
|
+
priorRounds.forEach((roundResponses, idx) => {
|
|
232
|
+
if (idx === 0) {
|
|
233
|
+
parts.push("## Initial Expert Opinions\n");
|
|
234
|
+
}
|
|
235
|
+
else {
|
|
236
|
+
parts.push(`## Revised Opinions (Round ${idx + 1})\n`);
|
|
237
|
+
}
|
|
238
|
+
for (const response of roundResponses) {
|
|
239
|
+
parts.push(`**${response.personaName}**: ${response.label} (confidence: ${response.confidence.toFixed(2)})\n` +
|
|
240
|
+
`Reasoning: ${response.reasoning}\n`);
|
|
241
|
+
}
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
parts.push(`\n## Deliberation Instructions\n\n${DELIBERATION_INSTRUCTIONS}\n`);
|
|
245
|
+
parts.push(DebateEngine.jsonResponseBlock());
|
|
246
|
+
return parts.join("\n");
|
|
247
|
+
}
|
|
248
|
+
async summarise(text, labels, rounds) {
|
|
249
|
+
const parts = [];
|
|
250
|
+
parts.push(`## Input\n\n${text}\n`);
|
|
251
|
+
parts.push(`## Available Labels\n\n${labels.join(", ")}\n`);
|
|
252
|
+
parts.push("## Expert Debate\n");
|
|
253
|
+
rounds.forEach((roundResponses, idx) => {
|
|
254
|
+
if (idx === 0) {
|
|
255
|
+
parts.push("\n### Initial Expert Opinions\n");
|
|
256
|
+
}
|
|
257
|
+
else {
|
|
258
|
+
parts.push(`\n### Revised Opinions (Round ${idx + 1})\n`);
|
|
259
|
+
}
|
|
260
|
+
for (const response of roundResponses) {
|
|
261
|
+
parts.push(`**${response.personaName}**: ${response.label} (confidence: ${response.confidence.toFixed(2)})\n` +
|
|
262
|
+
`Reasoning: ${response.reasoning}\n`);
|
|
263
|
+
}
|
|
264
|
+
});
|
|
265
|
+
const model = this.personas[0]?.model ?? "gpt-5-mini";
|
|
266
|
+
const payload = await this.llmClient.complete(model, SUMMARISATION_PROMPT, parts.join("\n"), 0);
|
|
267
|
+
return {
|
|
268
|
+
summary: payload.content,
|
|
269
|
+
tokens: Number(payload.tokens ?? 0),
|
|
270
|
+
cost: Number(payload.costUsd ?? 0),
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
parsePersonaResponse(raw, personaName) {
|
|
274
|
+
let parsed;
|
|
275
|
+
try {
|
|
276
|
+
const candidate = JSON.parse(stripMarkdown(raw));
|
|
277
|
+
if (!candidate || typeof candidate !== "object" || Array.isArray(candidate)) {
|
|
278
|
+
throw new Error("Persona response must be a JSON object.");
|
|
279
|
+
}
|
|
280
|
+
parsed = candidate;
|
|
281
|
+
}
|
|
282
|
+
catch {
|
|
283
|
+
return {
|
|
284
|
+
personaName,
|
|
285
|
+
label: "unknown",
|
|
286
|
+
confidence: 0,
|
|
287
|
+
reasoning: `Failed to parse persona response: ${raw.slice(0, 200)}`,
|
|
288
|
+
keyFactors: [],
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
return {
|
|
292
|
+
personaName,
|
|
293
|
+
label: String(parsed.label ?? "unknown"),
|
|
294
|
+
confidence: Number(parsed.confidence ?? 0),
|
|
295
|
+
reasoning: String(parsed.reasoning ?? ""),
|
|
296
|
+
keyFactors: Array.isArray(parsed.key_factors) ? parsed.key_factors.map(String) : [],
|
|
297
|
+
dissentNotes: parsed.dissent_notes,
|
|
298
|
+
tokensUsed: 0,
|
|
299
|
+
costUsd: 0,
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
consensusReached(roundResponses) {
|
|
303
|
+
if (roundResponses.length === 0) {
|
|
304
|
+
return false;
|
|
305
|
+
}
|
|
306
|
+
const labels = new Set(roundResponses.map((r) => r.label));
|
|
307
|
+
return labels.size === 1;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./engine.ts";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./engine.js";
|