@llm-jury/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +378 -0
- package/dist/calibration/index.d.ts +1 -0
- package/dist/calibration/index.js +1 -0
- package/dist/calibration/optimizer.d.ts +26 -0
- package/dist/calibration/optimizer.js +61 -0
- package/dist/classifiers/base.d.ts +11 -0
- package/dist/classifiers/base.js +7 -0
- package/dist/classifiers/functionAdapter.d.ts +8 -0
- package/dist/classifiers/functionAdapter.js +20 -0
- package/dist/classifiers/huggingFaceAdapter.d.ts +20 -0
- package/dist/classifiers/huggingFaceAdapter.js +52 -0
- package/dist/classifiers/index.d.ts +5 -0
- package/dist/classifiers/index.js +5 -0
- package/dist/classifiers/llmClassifier.d.ts +19 -0
- package/dist/classifiers/llmClassifier.js +47 -0
- package/dist/classifiers/sklearnAdapter.d.ts +14 -0
- package/dist/classifiers/sklearnAdapter.js +29 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +1 -0
- package/dist/cli/main.d.ts +4 -0
- package/dist/cli/main.js +261 -0
- package/dist/debate/engine.d.ts +48 -0
- package/dist/debate/engine.js +309 -0
- package/dist/debate/index.d.ts +1 -0
- package/dist/debate/index.js +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +7 -0
- package/dist/judges/base.d.ts +16 -0
- package/dist/judges/base.js +1 -0
- package/dist/judges/bayesian.d.ts +8 -0
- package/dist/judges/bayesian.js +52 -0
- package/dist/judges/index.d.ts +5 -0
- package/dist/judges/index.js +5 -0
- package/dist/judges/llmJudge.d.ts +19 -0
- package/dist/judges/llmJudge.js +86 -0
- package/dist/judges/majorityVote.d.ts +5 -0
- package/dist/judges/majorityVote.js +45 -0
- package/dist/judges/weightedVote.d.ts +5 -0
- package/dist/judges/weightedVote.js +42 -0
- package/dist/jury/core.d.ts +43 -0
- package/dist/jury/core.js +113 -0
- package/dist/jury/index.d.ts +1 -0
- package/dist/jury/index.js +1 -0
- package/dist/llm/client.d.ts +23 -0
- package/dist/llm/client.js +85 -0
- package/dist/llm/index.d.ts +1 -0
- package/dist/llm/index.js +1 -0
- package/dist/personas/base.d.ts +19 -0
- package/dist/personas/base.js +1 -0
- package/dist/personas/index.d.ts +2 -0
- package/dist/personas/index.js +2 -0
- package/dist/personas/registry.d.ts +8 -0
- package/dist/personas/registry.js +83 -0
- package/dist/utils.d.ts +2 -0
- package/dist/utils.js +23 -0
- package/package.json +43 -0
package/README.md
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
# @llm-jury/core
|
|
2
|
+
|
|
3
|
+
**When your classifier is uncertain, let a configurable jury of LLM personas debate and return an auditable verdict.**
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/@llm-jury/core)
|
|
6
|
+
[](https://nodejs.org/)
|
|
7
|
+
[](../../LICENSE)
|
|
8
|
+
|
|
9
|
+
## Overview
|
|
10
|
+
|
|
11
|
+
`@llm-jury/core` is an SDK, not a hosted API. Your app imports it directly:
|
|
12
|
+
|
|
13
|
+
```ts
|
|
14
|
+
import { Jury, PersonaRegistry } from "@llm-jury/core";
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
It wraps a classifier returning `[label, confidence]` and adds confidence-based escalation:
|
|
18
|
+
|
|
19
|
+
1. Run primary classifier (fast path)
|
|
20
|
+
2. Return directly when confidence is high
|
|
21
|
+
3. Escalate low-confidence cases to persona debate
|
|
22
|
+
4. Consolidate with a judge strategy
|
|
23
|
+
5. Return verdict + audit trail
|
|
24
|
+
|
|
25
|
+
### Research Inspiration
|
|
26
|
+
|
|
27
|
+
`llm-jury` is inspired by the CEJ (Collaborative Expert Judgment) module described in [arXiv:2512.23732](https://arxiv.org/abs/2512.23732). This package generalizes that pattern into a domain-agnostic SDK with pluggable classifiers, multiple debate modes, multiple judge strategies, threshold calibration, and Python + TypeScript distributions.
|
|
28
|
+
|
|
29
|
+
## Install
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
npm install @llm-jury/core
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Prerequisites
|
|
36
|
+
|
|
37
|
+
- Node.js `>=22.6`
|
|
38
|
+
- For real LLM calls: `OPENAI_API_KEY` (or provider key through your LiteLLM/OpenAI setup)
|
|
39
|
+
|
|
40
|
+
## Quick Start
|
|
41
|
+
|
|
42
|
+
```ts
|
|
43
|
+
import {
|
|
44
|
+
FunctionClassifier,
|
|
45
|
+
Jury,
|
|
46
|
+
MajorityVoteJudge,
|
|
47
|
+
PersonaRegistry,
|
|
48
|
+
} from "@llm-jury/core";
|
|
49
|
+
|
|
50
|
+
const classifier = new FunctionClassifier(
|
|
51
|
+
() => ["safe", 0.62],
|
|
52
|
+
["safe", "unsafe"],
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
const jury = new Jury({
|
|
56
|
+
classifier,
|
|
57
|
+
personas: PersonaRegistry.contentModeration(),
|
|
58
|
+
confidenceThreshold: 0.7,
|
|
59
|
+
judge: new MajorityVoteJudge(),
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const verdict = await jury.classify("borderline message");
|
|
63
|
+
console.log(verdict.label, verdict.confidence, verdict.wasEscalated);
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
The default LLM client sends requests to `POST /chat/completions` on `OPENAI_BASE_URL` / `LITELLM_BASE_URL` / `https://api.openai.com/v1`.
|
|
67
|
+
|
|
68
|
+
## SDK Response
|
|
69
|
+
|
|
70
|
+
`jury.classify(text)` returns a `Verdict`. There are two shapes depending on whether the input was escalated.
|
|
71
|
+
|
|
72
|
+
### Fast path (confidence above threshold)
|
|
73
|
+
|
|
74
|
+
When the primary classifier is confident enough, the verdict is returned directly with no debate.
|
|
75
|
+
|
|
76
|
+
```json
|
|
77
|
+
{
|
|
78
|
+
"label": "safe",
|
|
79
|
+
"confidence": 0.95,
|
|
80
|
+
"reasoning": "Classified by primary classifier with sufficient confidence.",
|
|
81
|
+
"wasEscalated": false,
|
|
82
|
+
"primaryResult": {
|
|
83
|
+
"label": "safe",
|
|
84
|
+
"confidence": 0.95,
|
|
85
|
+
"rawOutput": { "label": "safe", "confidence": 0.95 }
|
|
86
|
+
},
|
|
87
|
+
"debateTranscript": null,
|
|
88
|
+
"judgeStrategy": "primary_classifier",
|
|
89
|
+
"totalDurationMs": 312,
|
|
90
|
+
"totalCostUsd": 0.0001
|
|
91
|
+
}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Escalated (confidence below threshold)
|
|
95
|
+
|
|
96
|
+
When confidence is too low, the input goes through persona debate and a judge produces the final verdict.
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{
|
|
100
|
+
"label": "unsafe",
|
|
101
|
+
"confidence": 1.0,
|
|
102
|
+
"reasoning": "The statement is a sweeping negative generalization about an entire group of people.",
|
|
103
|
+
"wasEscalated": true,
|
|
104
|
+
"primaryResult": {
|
|
105
|
+
"label": "unsafe",
|
|
106
|
+
"confidence": 0.62,
|
|
107
|
+
"rawOutput": { "label": "unsafe", "confidence": 0.62 }
|
|
108
|
+
},
|
|
109
|
+
"debateTranscript": {
|
|
110
|
+
"inputText": "Those people always cause problems wherever they go",
|
|
111
|
+
"primaryResult": { "label": "unsafe", "confidence": 0.62 },
|
|
112
|
+
"rounds": [
|
|
113
|
+
[
|
|
114
|
+
{
|
|
115
|
+
"personaName": "Policy Analyst",
|
|
116
|
+
"label": "unsafe",
|
|
117
|
+
"confidence": 0.90,
|
|
118
|
+
"reasoning": "The statement is a blanket negative generalization targeting a group.",
|
|
119
|
+
"keyFactors": ["group-targeting language", "sweeping generalization"],
|
|
120
|
+
"dissentNotes": null,
|
|
121
|
+
"tokensUsed": 185,
|
|
122
|
+
"costUsd": 0.0003
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
"personaName": "Cultural Context Expert",
|
|
126
|
+
"label": "unsafe",
|
|
127
|
+
"confidence": 0.85,
|
|
128
|
+
"reasoning": "While context could soften interpretation, the phrasing is unambiguously negative.",
|
|
129
|
+
"keyFactors": ["no mitigating context", "derogatory framing"],
|
|
130
|
+
"dissentNotes": null,
|
|
131
|
+
"tokensUsed": 192,
|
|
132
|
+
"costUsd": 0.0003
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
"personaName": "Harm Assessment Specialist",
|
|
136
|
+
"label": "unsafe",
|
|
137
|
+
"confidence": 0.92,
|
|
138
|
+
"reasoning": "Broad negative generalization risks normalizing prejudice against the targeted group.",
|
|
139
|
+
"keyFactors": ["potential for real-world harm", "targets unspecified group"],
|
|
140
|
+
"dissentNotes": null,
|
|
141
|
+
"tokensUsed": 178,
|
|
142
|
+
"costUsd": 0.0003
|
|
143
|
+
}
|
|
144
|
+
]
|
|
145
|
+
],
|
|
146
|
+
"summary": "The experts unanimously agreed the statement constitutes an unsafe sweeping generalization targeting a group.",
|
|
147
|
+
"durationMs": 2450,
|
|
148
|
+
"totalTokens": 555,
|
|
149
|
+
"totalCostUsd": 0.0009
|
|
150
|
+
},
|
|
151
|
+
"judgeStrategy": "majority_vote",
|
|
152
|
+
"totalDurationMs": 2780,
|
|
153
|
+
"totalCostUsd": 0.001
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Verdict field reference
|
|
158
|
+
|
|
159
|
+
| Field | Type | Description |
|
|
160
|
+
|---|---|---|
|
|
161
|
+
| `label` | `string` | Final classification |
|
|
162
|
+
| `confidence` | `number` | Final confidence (0.0-1.0) |
|
|
163
|
+
| `reasoning` | `string` | Human-readable explanation |
|
|
164
|
+
| `wasEscalated` | `boolean` | Whether debate was triggered |
|
|
165
|
+
| `primaryResult` | `ClassificationResult` | Fast-path classifier output |
|
|
166
|
+
| `debateTranscript` | `DebateTranscript \| null` | Full debate audit trail incl. `rounds`, `summary`, token/cost totals (null if not escalated) |
|
|
167
|
+
| `judgeStrategy` | `string` | Strategy that produced the verdict |
|
|
168
|
+
| `totalDurationMs` | `number` | Wall-clock time (ms) |
|
|
169
|
+
| `totalCostUsd` | `number \| null` | API cost in USD |
|
|
170
|
+
|
|
171
|
+
### Persona response fields
|
|
172
|
+
|
|
173
|
+
| Field | Type | Description |
|
|
174
|
+
|---|---|---|
|
|
175
|
+
| `personaName` | `string` | Which persona |
|
|
176
|
+
| `label` | `string` | This persona's classification |
|
|
177
|
+
| `confidence` | `number` | This persona's confidence |
|
|
178
|
+
| `reasoning` | `string` | Full reasoning chain |
|
|
179
|
+
| `keyFactors` | `string[]` | Key decision factors |
|
|
180
|
+
| `dissentNotes` | `string \| null` | Rebuttal in deliberation/adversarial modes |
|
|
181
|
+
| `tokensUsed` | `number` | Tokens consumed |
|
|
182
|
+
| `costUsd` | `number \| null` | API cost for this call |
|
|
183
|
+
|
|
184
|
+
`DebateTranscript` also includes `summary` (`string?`) — a structured summary produced during the Summarisation stage of the deliberation pipeline (undefined in non-deliberation modes).
|
|
185
|
+
|
|
186
|
+
## Choosing What To Use
|
|
187
|
+
|
|
188
|
+
### Classifiers
|
|
189
|
+
|
|
190
|
+
| Classifier | When to use | Example |
|
|
191
|
+
|---|---|---|
|
|
192
|
+
| `FunctionClassifier` | Wrap an existing model or function | `new FunctionClassifier(fn, labels)` |
|
|
193
|
+
| `LLMClassifier` | Primary classifier is an LLM | `new LLMClassifier({ labels: ["safe","unsafe"] })` |
|
|
194
|
+
| `HuggingFaceClassifier` | Local HuggingFace model | `new HuggingFaceClassifier({ modelName: "..." })` |
|
|
195
|
+
| `SklearnClassifier` | Wrap an sklearn-like model | `new SklearnClassifier(model, labels, vectorizer)` |
|
|
196
|
+
|
|
197
|
+
### Built-in Persona Sets
|
|
198
|
+
|
|
199
|
+
| Method | Domain | Personas |
|
|
200
|
+
|---|---|---|
|
|
201
|
+
| `PersonaRegistry.contentModeration()` | Trust & Safety | Policy Analyst, Cultural Context Expert, Harm Assessment Specialist |
|
|
202
|
+
| `PersonaRegistry.legalCompliance()` | Legal/Regulatory | Regulatory Attorney, Business Risk Analyst, Industry Standards Expert |
|
|
203
|
+
| `PersonaRegistry.medicalTriage()` | Healthcare | Clinical Safety Reviewer, Contextual Historian, Resource Allocation Analyst |
|
|
204
|
+
| `PersonaRegistry.financialCompliance()` | AML/KYC | AML Investigator, Risk Quant, Business Controls Reviewer |
|
|
205
|
+
| `PersonaRegistry.custom([...])` | Any domain | Provide your own persona objects |
|
|
206
|
+
|
|
207
|
+
### Judge Strategies
|
|
208
|
+
|
|
209
|
+
| Strategy | How it decides | Best for |
|
|
210
|
+
|---|---|---|
|
|
211
|
+
| `new MajorityVoteJudge()` | Counts persona votes. Confidence = fraction agreeing. | Fast, no extra LLM call |
|
|
212
|
+
| `new WeightedVoteJudge()` | Weights votes by persona confidence. | When confidence scores vary significantly |
|
|
213
|
+
| `new LLMJudge()` | LLM reads full transcript and synthesises verdict. | Maximum quality, auditable reasoning |
|
|
214
|
+
| `new BayesianJudge()` | Bayesian aggregation with optional persona priors. | When you have reliability data on personas |
|
|
215
|
+
|
|
216
|
+
### Debate Modes
|
|
217
|
+
|
|
218
|
+
| Mode | Behaviour | Best for |
|
|
219
|
+
|---|---|---|
|
|
220
|
+
| `independent` | All personas assess in parallel | Fast, low cost |
|
|
221
|
+
| `sequential` | Each persona sees previous responses | Building on earlier assessments |
|
|
222
|
+
| `deliberation` (default) | Full 4-stage CEJ pipeline: Initial Opinions, Structured Debate, Summarisation, Final Judgment | Maximum value; complex edge cases |
|
|
223
|
+
| `adversarial` | Assigns prosecution/defense stances | Stress-testing a classification |
|
|
224
|
+
|
|
225
|
+
## Important Notes
|
|
226
|
+
|
|
227
|
+
- **Temperature is handled automatically.** The SDK omits the temperature parameter for reasoning models (`gpt-5*`, `o1*`, `o3*`). No configuration needed.
|
|
228
|
+
- **Escalation is strictly `< threshold`** — confidence exactly equal to the threshold does NOT escalate.
|
|
229
|
+
- **Default debate mode is deliberation** for maximum value — it runs the full 4-stage CEJ pipeline. For cheaper/faster operation, use `{ mode: DebateMode.Independent }`.
|
|
230
|
+
- **Cost tracking** — `totalCostUsd` is always `undefined` unless a custom `llmClient` provides cost data (no viable npm cost-estimation library exists).
|
|
231
|
+
- **Empty personas disables escalation**: If you pass `personas: []`, the jury always returns the primary classifier result.
|
|
232
|
+
|
|
233
|
+
## API Reference
|
|
234
|
+
|
|
235
|
+
### Public Exports
|
|
236
|
+
|
|
237
|
+
```ts
|
|
238
|
+
import {
|
|
239
|
+
Jury,
|
|
240
|
+
JuryStats,
|
|
241
|
+
DebateConfig,
|
|
242
|
+
DebateMode,
|
|
243
|
+
PersonaRegistry,
|
|
244
|
+
FunctionClassifier,
|
|
245
|
+
LLMClassifier,
|
|
246
|
+
HuggingFaceClassifier,
|
|
247
|
+
SklearnClassifier,
|
|
248
|
+
MajorityVoteJudge,
|
|
249
|
+
WeightedVoteJudge,
|
|
250
|
+
LLMJudge,
|
|
251
|
+
BayesianJudge,
|
|
252
|
+
ThresholdCalibrator,
|
|
253
|
+
LiteLLMClient,
|
|
254
|
+
} from "@llm-jury/core";
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### `Jury` Options
|
|
258
|
+
|
|
259
|
+
| Option | Default | Description |
|
|
260
|
+
|---|---|---|
|
|
261
|
+
| `classifier` | (required) | Primary classifier |
|
|
262
|
+
| `personas` | (required) | List of personas |
|
|
263
|
+
| `confidenceThreshold` | `0.7` | Escalation threshold |
|
|
264
|
+
| `judge` | defaults to `LLMJudge` | Judge strategy |
|
|
265
|
+
| `debateConfig` | `undefined` | Debate configuration |
|
|
266
|
+
| `escalationOverride` | `undefined` | Force escalation |
|
|
267
|
+
| `maxDebateCostUsd` | `undefined` | Cost cap for debate |
|
|
268
|
+
| `debateConcurrency` | `5` | Max concurrent persona calls |
|
|
269
|
+
| `onEscalation` | `undefined` | Escalation callback |
|
|
270
|
+
| `onVerdict` | `undefined` | Verdict callback |
|
|
271
|
+
| `llmClient` | `undefined` | LLM transport override |
|
|
272
|
+
|
|
273
|
+
Methods:
|
|
274
|
+
|
|
275
|
+
- `await classify(text)` — classify a single input
|
|
276
|
+
- `await classifyBatch(texts, concurrency=10)` — classify multiple inputs
|
|
277
|
+
|
|
278
|
+
Behavior notes:
|
|
279
|
+
|
|
280
|
+
- Escalation condition is strictly `< threshold` (exactly equal does not escalate).
|
|
281
|
+
- If `personas` is empty, jury escalation is effectively disabled.
|
|
282
|
+
- If `maxDebateCostUsd` is exceeded, result falls back to primary classifier with `judgeStrategy` set to `cost_guard_primary_fallback`.
|
|
283
|
+
|
|
284
|
+
Stats: `jury.stats.total`, `fastPath`, `escalated`, `escalationRate`, `costSavingsVsAlwaysEscalate`.
|
|
285
|
+
|
|
286
|
+
### `DebateConfig` Options
|
|
287
|
+
|
|
288
|
+
| Option | Default | Meaning |
|
|
289
|
+
|---|---|---|
|
|
290
|
+
| `mode` | `deliberation` | Debate mode |
|
|
291
|
+
| `maxRounds` | `2` | Max deliberation rounds |
|
|
292
|
+
| `includePrimaryResult` | `true` | Include primary result in prompts |
|
|
293
|
+
| `includeConfidence` | `true` | Include confidence in prompt context |
|
|
294
|
+
|
|
295
|
+
### Personas
|
|
296
|
+
|
|
297
|
+
Persona fields: `name`, `role`, `systemPrompt`, `model="gpt-5-mini"`, `temperature=0.3`, `knownBias?`.
|
|
298
|
+
|
|
299
|
+
### Classifiers (API)
|
|
300
|
+
|
|
301
|
+
All classifiers implement `classify(text)` and expose `labels`.
|
|
302
|
+
|
|
303
|
+
- **FunctionClassifier**: `new FunctionClassifier(fn, labels)` where `fn` may return tuple or Promise tuple
|
|
304
|
+
- **LLMClassifier**: `new LLMClassifier({ model, labels, systemPrompt, llmClient, temperature })` — expects model JSON response with `label` and `confidence`; falls back to first label with `confidence=0` on parse failure
|
|
305
|
+
- **SklearnClassifier**: `new SklearnClassifier(model, labels, vectorizer?)` where model has `predictProba(...)`
|
|
306
|
+
- **HuggingFaceClassifier**: `new HuggingFaceClassifier({ modelName?, device?, pipeline? })` — uses injected `pipeline` or loads `@xenova/transformers`; must provide `modelName` or `pipeline`
|
|
307
|
+
|
|
308
|
+
### Judge Strategies (API)
|
|
309
|
+
|
|
310
|
+
- **MajorityVoteJudge**: `new MajorityVoteJudge()` — confidence = fraction of personas voting winning label
|
|
311
|
+
- **WeightedVoteJudge**: `new WeightedVoteJudge()` — confidence based on confidence-weighted label scores
|
|
312
|
+
- **LLMJudge**: `new LLMJudge({ model, systemPrompt, temperature, llmClient })` — falls back to primary result with `llm_judge_fallback_invalid_json` if JSON parse fails
|
|
313
|
+
- **BayesianJudge**: `new BayesianJudge(priors={})` — uses persona priors/reliability maps if provided
|
|
314
|
+
|
|
315
|
+
### Threshold Calibration
|
|
316
|
+
|
|
317
|
+
`new ThresholdCalibrator(jury)` then `await calibrate({ texts, labels, errorCost=10, escalationCost=0.05, thresholds? })`.
|
|
318
|
+
|
|
319
|
+
Report: `calibrationReport()` returns rows with threshold, accuracy, escalation rate, and total cost. `calibrate(...)` mutates `jury.threshold` to the best threshold.
|
|
320
|
+
|
|
321
|
+
### LLM Transport (`LiteLLMClient`)
|
|
322
|
+
|
|
323
|
+
- `new LiteLLMClient({ baseUrl?, apiKey?, timeoutMs? })`
|
|
324
|
+
- Falls back to env vars: `LITELLM_BASE_URL`, `OPENAI_BASE_URL` (default: `https://api.openai.com/v1`); `LITELLM_API_KEY`, `OPENAI_API_KEY`
|
|
325
|
+
- Sends `POST /chat/completions`
|
|
326
|
+
- Returns `{ content, tokens, costUsd }` — `costUsd` is always `undefined` (no viable npm cost-estimation library)
|
|
327
|
+
- Throws before request if no API key is configured
|
|
328
|
+
|
|
329
|
+
Temperature is automatically omitted for reasoning models (`gpt-5*`, `o1*`, `o3*`).
|
|
330
|
+
|
|
331
|
+
## Testing
|
|
332
|
+
|
|
333
|
+
```bash
|
|
334
|
+
npm test
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
### Real API Smoke Test
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
OPENAI_API_KEY="$OPENAI_API_KEY" node --test --experimental-strip-types tests/smoke/real-api.test.ts
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
## CLI
|
|
344
|
+
|
|
345
|
+
The CLI is for batch workflows. The primary interface is the TypeScript API above.
|
|
346
|
+
|
|
347
|
+
```bash
|
|
348
|
+
npm run build
|
|
349
|
+
node dist/cli/main.js classify \
|
|
350
|
+
--input input.jsonl \
|
|
351
|
+
--output verdicts.jsonl \
|
|
352
|
+
--classifier function \
|
|
353
|
+
--personas content_moderation \
|
|
354
|
+
--judge majority \
|
|
355
|
+
--judge-model gpt-5-mini \
|
|
356
|
+
--persona-model gpt-5-mini \
|
|
357
|
+
--threshold 0.7 \
|
|
358
|
+
--labels safe,unsafe
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
Calibration:
|
|
362
|
+
|
|
363
|
+
```bash
|
|
364
|
+
node dist/cli/main.js calibrate \
|
|
365
|
+
--input calibration.jsonl \
|
|
366
|
+
--classifier function \
|
|
367
|
+
--personas content_moderation \
|
|
368
|
+
--judge majority \
|
|
369
|
+
--judge-model gpt-5-mini \
|
|
370
|
+
--persona-model gpt-5-mini \
|
|
371
|
+
--labels safe,unsafe
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
Supported classifier specs: `function`, `llm:<model>`, `huggingface:<model>`.
|
|
375
|
+
|
|
376
|
+
## License
|
|
377
|
+
|
|
378
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./optimizer.ts";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./optimizer.js";
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { Jury } from "../jury/core.ts";
|
|
2
|
+
export type CalibrationOptions = {
|
|
3
|
+
texts: string[];
|
|
4
|
+
labels: string[];
|
|
5
|
+
errorCost?: number;
|
|
6
|
+
escalationCost?: number;
|
|
7
|
+
thresholds?: number[];
|
|
8
|
+
};
|
|
9
|
+
type CalibrationRow = {
|
|
10
|
+
threshold: number;
|
|
11
|
+
accuracy: number;
|
|
12
|
+
escalationRate: number;
|
|
13
|
+
totalCost: number;
|
|
14
|
+
};
|
|
15
|
+
export declare class ThresholdCalibrator {
|
|
16
|
+
private jury;
|
|
17
|
+
private rows;
|
|
18
|
+
private bestThreshold;
|
|
19
|
+
constructor(jury: Jury);
|
|
20
|
+
calibrate(options: CalibrationOptions): Promise<number>;
|
|
21
|
+
calibrationReport(): {
|
|
22
|
+
bestThreshold: number | null;
|
|
23
|
+
rows: CalibrationRow[];
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
export {};
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
export class ThresholdCalibrator {
|
|
2
|
+
jury;
|
|
3
|
+
rows = [];
|
|
4
|
+
bestThreshold = null;
|
|
5
|
+
constructor(jury) {
|
|
6
|
+
this.jury = jury;
|
|
7
|
+
}
|
|
8
|
+
async calibrate(options) {
|
|
9
|
+
const errorCost = options.errorCost ?? 10;
|
|
10
|
+
const escalationCost = options.escalationCost ?? 0.05;
|
|
11
|
+
const thresholds = options.thresholds ?? Array.from({ length: 10 }, (_v, idx) => Number((0.5 + idx * 0.05).toFixed(2)));
|
|
12
|
+
if (options.texts.length !== options.labels.length) {
|
|
13
|
+
throw new Error("texts and labels must have same length");
|
|
14
|
+
}
|
|
15
|
+
this.rows = [];
|
|
16
|
+
let bestThreshold = thresholds[0] ?? 0.7;
|
|
17
|
+
let bestCost = Number.POSITIVE_INFINITY;
|
|
18
|
+
for (const threshold of thresholds) {
|
|
19
|
+
let errors = 0;
|
|
20
|
+
let escalations = 0;
|
|
21
|
+
let correct = 0;
|
|
22
|
+
for (let i = 0; i < options.texts.length; i += 1) {
|
|
23
|
+
const text = options.texts[i];
|
|
24
|
+
const expected = options.labels[i];
|
|
25
|
+
const result = await this.jury.classifier.classify(text);
|
|
26
|
+
if (result.confidence < threshold) {
|
|
27
|
+
escalations += 1;
|
|
28
|
+
correct += 1;
|
|
29
|
+
}
|
|
30
|
+
else if (result.label === expected) {
|
|
31
|
+
correct += 1;
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
errors += 1;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const total = Math.max(1, options.texts.length);
|
|
38
|
+
const totalCost = errors * errorCost + escalations * escalationCost;
|
|
39
|
+
const row = {
|
|
40
|
+
threshold,
|
|
41
|
+
accuracy: correct / total,
|
|
42
|
+
escalationRate: escalations / total,
|
|
43
|
+
totalCost,
|
|
44
|
+
};
|
|
45
|
+
this.rows.push(row);
|
|
46
|
+
if (totalCost < bestCost) {
|
|
47
|
+
bestCost = totalCost;
|
|
48
|
+
bestThreshold = threshold;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
this.bestThreshold = bestThreshold;
|
|
52
|
+
this.jury.threshold = bestThreshold;
|
|
53
|
+
return bestThreshold;
|
|
54
|
+
}
|
|
55
|
+
calibrationReport() {
|
|
56
|
+
return {
|
|
57
|
+
bestThreshold: this.bestThreshold,
|
|
58
|
+
rows: [...this.rows],
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export type ClassificationResult = {
|
|
2
|
+
label: string;
|
|
3
|
+
confidence: number;
|
|
4
|
+
rawOutput?: unknown;
|
|
5
|
+
};
|
|
6
|
+
export interface Classifier {
|
|
7
|
+
labels: string[];
|
|
8
|
+
classify(text: string): Promise<ClassificationResult>;
|
|
9
|
+
classifyBatch?(texts: string[]): Promise<ClassificationResult[]>;
|
|
10
|
+
}
|
|
11
|
+
export declare function defaultClassifyBatch(classifier: Classifier, texts: string[]): Promise<ClassificationResult[]>;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ClassificationResult, Classifier } from "./base.ts";
|
|
2
|
+
export declare class FunctionClassifier implements Classifier {
|
|
3
|
+
labels: string[];
|
|
4
|
+
private fn;
|
|
5
|
+
constructor(fn: (text: string) => [string, number] | Promise<[string, number]>, labels: string[]);
|
|
6
|
+
classify(text: string): Promise<ClassificationResult>;
|
|
7
|
+
classifyBatch(texts: string[]): Promise<ClassificationResult[]>;
|
|
8
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { defaultClassifyBatch } from "./base.js";
|
|
2
|
+
export class FunctionClassifier {
|
|
3
|
+
labels;
|
|
4
|
+
fn;
|
|
5
|
+
constructor(fn, labels) {
|
|
6
|
+
this.fn = fn;
|
|
7
|
+
this.labels = labels;
|
|
8
|
+
}
|
|
9
|
+
async classify(text) {
|
|
10
|
+
const [label, confidence] = await this.fn(text);
|
|
11
|
+
return {
|
|
12
|
+
label,
|
|
13
|
+
confidence,
|
|
14
|
+
rawOutput: { label, confidence },
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
async classifyBatch(texts) {
|
|
18
|
+
return defaultClassifyBatch(this, texts);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { ClassificationResult, Classifier } from "./base.ts";
|
|
2
|
+
export type HuggingFaceLabelScore = {
|
|
3
|
+
label: string;
|
|
4
|
+
score: number;
|
|
5
|
+
};
|
|
6
|
+
export type HuggingFacePipeline = (text: string) => HuggingFaceLabelScore[] | Promise<HuggingFaceLabelScore[]> | HuggingFaceLabelScore[][] | Promise<HuggingFaceLabelScore[][]>;
|
|
7
|
+
export type HuggingFaceClassifierOptions = {
|
|
8
|
+
modelName?: string;
|
|
9
|
+
device?: string;
|
|
10
|
+
pipeline?: HuggingFacePipeline;
|
|
11
|
+
};
|
|
12
|
+
export declare class HuggingFaceClassifier implements Classifier {
|
|
13
|
+
labels: string[];
|
|
14
|
+
private modelName?;
|
|
15
|
+
private device;
|
|
16
|
+
private pipeline?;
|
|
17
|
+
constructor(options?: HuggingFaceClassifierOptions);
|
|
18
|
+
classify(text: string): Promise<ClassificationResult>;
|
|
19
|
+
private resolvePipeline;
|
|
20
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
export class HuggingFaceClassifier {
|
|
2
|
+
labels;
|
|
3
|
+
modelName;
|
|
4
|
+
device;
|
|
5
|
+
pipeline;
|
|
6
|
+
constructor(options = {}) {
|
|
7
|
+
this.labels = [];
|
|
8
|
+
this.modelName = options.modelName;
|
|
9
|
+
this.device = options.device ?? "cpu";
|
|
10
|
+
this.pipeline = options.pipeline;
|
|
11
|
+
}
|
|
12
|
+
async classify(text) {
|
|
13
|
+
const runner = await this.resolvePipeline();
|
|
14
|
+
const raw = await runner(text);
|
|
15
|
+
const normalized = Array.isArray(raw[0]) ? raw[0] : raw;
|
|
16
|
+
if (!normalized || normalized.length === 0) {
|
|
17
|
+
throw new Error("HuggingFace pipeline returned no scores");
|
|
18
|
+
}
|
|
19
|
+
let top = normalized[0];
|
|
20
|
+
for (const item of normalized.slice(1)) {
|
|
21
|
+
if (item.score > top.score) {
|
|
22
|
+
top = item;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
if (this.labels.length === 0) {
|
|
26
|
+
this.labels = normalized.map((item) => item.label);
|
|
27
|
+
}
|
|
28
|
+
return {
|
|
29
|
+
label: top.label,
|
|
30
|
+
confidence: Number(top.score),
|
|
31
|
+
rawOutput: normalized,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
async resolvePipeline() {
|
|
35
|
+
if (this.pipeline) {
|
|
36
|
+
return this.pipeline;
|
|
37
|
+
}
|
|
38
|
+
if (!this.modelName) {
|
|
39
|
+
throw new Error("Provide modelName or pipeline to HuggingFaceClassifier.");
|
|
40
|
+
}
|
|
41
|
+
try {
|
|
42
|
+
const transformers = (await import("@xenova/transformers"));
|
|
43
|
+
this.pipeline = await transformers.pipeline("text-classification", this.modelName, {
|
|
44
|
+
device: this.device,
|
|
45
|
+
});
|
|
46
|
+
return this.pipeline;
|
|
47
|
+
}
|
|
48
|
+
catch (error) {
|
|
49
|
+
throw new Error("Unable to initialize HuggingFace pipeline. Install @xenova/transformers or inject a pipeline.", { cause: error });
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { ClassificationResult, Classifier } from "./base.ts";
|
|
2
|
+
import type { LLMClient } from "../llm/client.ts";
|
|
3
|
+
export type LLMClassifierOptions = {
|
|
4
|
+
model?: string;
|
|
5
|
+
labels?: string[];
|
|
6
|
+
systemPrompt?: string;
|
|
7
|
+
temperature?: number;
|
|
8
|
+
llmClient?: LLMClient;
|
|
9
|
+
};
|
|
10
|
+
export declare class LLMClassifier implements Classifier {
|
|
11
|
+
labels: string[];
|
|
12
|
+
private model;
|
|
13
|
+
private systemPrompt;
|
|
14
|
+
private temperature;
|
|
15
|
+
private llmClient;
|
|
16
|
+
constructor(options?: LLMClassifierOptions);
|
|
17
|
+
classify(text: string): Promise<ClassificationResult>;
|
|
18
|
+
classifyBatch(texts: string[]): Promise<ClassificationResult[]>;
|
|
19
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { LiteLLMClient } from "../llm/client.js";
|
|
2
|
+
import { safeJsonObject, stripMarkdown } from "../utils.js";
|
|
3
|
+
export class LLMClassifier {
|
|
4
|
+
labels;
|
|
5
|
+
model;
|
|
6
|
+
systemPrompt;
|
|
7
|
+
temperature;
|
|
8
|
+
llmClient;
|
|
9
|
+
constructor(options = {}) {
|
|
10
|
+
this.model = options.model ?? "gpt-5-mini";
|
|
11
|
+
this.labels = options.labels ?? [];
|
|
12
|
+
this.systemPrompt =
|
|
13
|
+
options.systemPrompt ?? "Classify the text and return JSON with fields label and confidence.";
|
|
14
|
+
this.temperature = options.temperature ?? 0;
|
|
15
|
+
this.llmClient = options.llmClient ?? new LiteLLMClient();
|
|
16
|
+
}
|
|
17
|
+
async classify(text) {
|
|
18
|
+
const prompt = [
|
|
19
|
+
"Classify the following text.",
|
|
20
|
+
`Labels: ${this.labels.join(", ") || "any"}`,
|
|
21
|
+
`Text: ${text}`,
|
|
22
|
+
"Respond ONLY with JSON: {\"label\":\"...\",\"confidence\":0.0-1.0}",
|
|
23
|
+
].join("\n");
|
|
24
|
+
const payload = await this.llmClient.complete(this.model, this.systemPrompt, prompt, this.temperature);
|
|
25
|
+
const parsed = safeJsonObject(stripMarkdown(payload.content));
|
|
26
|
+
const fallbackLabel = this.labels[0] ?? "unknown";
|
|
27
|
+
if (!parsed) {
|
|
28
|
+
return {
|
|
29
|
+
label: fallbackLabel,
|
|
30
|
+
confidence: 0,
|
|
31
|
+
rawOutput: { raw_content: payload.content, error: "invalid_json" },
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
return {
|
|
35
|
+
label: String(parsed.label ?? fallbackLabel),
|
|
36
|
+
confidence: Number(parsed.confidence ?? 0),
|
|
37
|
+
rawOutput: parsed,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
async classifyBatch(texts) {
|
|
41
|
+
const out = [];
|
|
42
|
+
for (const text of texts) {
|
|
43
|
+
out.push(await this.classify(text));
|
|
44
|
+
}
|
|
45
|
+
return out;
|
|
46
|
+
}
|
|
47
|
+
}
|