ai-consensus-core 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Marcelo Ceccon / entropyvortex
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,415 @@
1
+ # ai-consensus-core
2
+
3
+ > Turn any set of AI models into a real roundtable.
4
+ > Production-grade Consensus Validation Protocol (CVP) for TypeScript — zero LLM-provider coupling, highly observable, shipped as a clean npm package.
5
+
6
+ [![npm](https://img.shields.io/npm/v/ai-consensus-core)](https://www.npmjs.com/package/ai-consensus-core)
7
+ [![license](https://img.shields.io/npm/l/ai-consensus-core)](./LICENSE)
8
+ [![types](https://img.shields.io/npm/types/ai-consensus-core)](#types)
9
+
10
+ This is the engine that powers [Roundtable](https://github.com/entropyvortex/roundtable) and the [`ai-consensus-mcp`](https://github.com/entropyvortex/ai-consensus-mcp) MCP server — extracted into a standalone library so anyone can wire multi-model debate into their own product.
11
+
12
+ ## Why this exists
13
+
14
+ Most "multi-agent" frameworks are toys.
15
+ This one is built for real work.
16
+
17
+ You configure any number of models — Grok, Claude, Gemini, DeepSeek, whatever — give each a persona, and hand it a question. You get:
18
+
19
+ - **Blind Round 1, then sequential debate.** Each model defends its take under full cross-visibility.
20
+ - **Confidence scoring + disagreement detection.** Deterministic, no extra LLM calls.
21
+ - **Early stopping** when the group converges.
22
+ - **Optional judge synthesis** — a non-voting model produces majority/minority/unresolved sections.
23
+ - **Full observability.** Typed event stream fires on every round, every participant, every confidence shift, every disagreement.
24
+ - **Zero provider coupling.** The library never imports a provider SDK. You plug in a `ModelCaller` once and use any backend.
25
+
26
+ ## Install
27
+
28
+ ```bash
29
+ npm install ai-consensus-core
30
+ # or
31
+ pnpm add ai-consensus-core
32
+ # or
33
+ yarn add ai-consensus-core
34
+ ```
35
+
36
+ ESM-only. Node ≥ 20. Runtime dependencies: `zod` + Node's built-in `events`. That's it.
37
+
38
+ ## 60-second example
39
+
40
+ ```ts
41
+ import {
42
+ ConsensusEngine,
43
+ PERSONAS,
44
+ type ModelCaller,
45
+ } from "ai-consensus-core";
46
+
47
+ // 1) Adapt your provider of choice to the ModelCaller shape.
48
+ // This one targets any OpenAI-compatible endpoint (Grok, Claude, OpenAI, Groq…).
49
+ const caller: ModelCaller = async ({ system, user, modelId, temperature, maxOutputTokens, signal }) => {
50
+ const res = await fetch("https://api.x.ai/v1/chat/completions", {
51
+ method: "POST",
52
+ signal,
53
+ headers: {
54
+ authorization: `Bearer ${process.env.GROK_API_KEY}`,
55
+ "content-type": "application/json",
56
+ },
57
+ body: JSON.stringify({
58
+ model: modelId,
59
+ temperature,
60
+ max_tokens: maxOutputTokens,
61
+ messages: [
62
+ { role: "system", content: system },
63
+ { role: "user", content: user },
64
+ ],
65
+ }),
66
+ });
67
+ const json = await res.json();
68
+ return { content: json.choices[0].message.content };
69
+ };
70
+
71
+ // 2) Wire up observability.
72
+ const engine = new ConsensusEngine(caller);
73
+
74
+ engine.on("roundStart", (e) => console.log(`▶ ${e.label}`));
75
+ engine.on("roundComplete", (e) => console.log(` score=${e.score}`));
76
+ engine.on("disagreementDetected", (e) =>
77
+ console.log(` ⚠ ${e.disagreement.label} (Δ=${e.disagreement.severity})`),
78
+ );
79
+
80
+ // 3) Run.
81
+ const result = await engine.run({
82
+ question: "Should early-stage startups adopt microservices from day one?",
83
+ participants: [
84
+ { id: "p1", modelId: "grok-4", persona: PERSONAS[0]! }, // Risk Analyst
85
+ { id: "p2", modelId: "grok-4", persona: PERSONAS[1]! }, // First-Principles
86
+ { id: "p3", modelId: "grok-4", persona: PERSONAS[6]! }, // Domain Expert
87
+ ],
88
+ maxRounds: 4,
89
+ judge: { modelId: "grok-4" },
90
+ });
91
+
92
+ console.log(`Final score: ${result.finalScore}`);
93
+ console.log(result.synthesis?.majorityPosition);
94
+ ```
95
+
96
+ ## Protocol diagram
97
+
98
+ ```
99
+ USER QUESTION
100
+
101
+
102
+ ┌─────────────────────────────────────────────────────────┐
103
+ │ ROUND 1 — INITIAL ANALYSIS │
104
+ │ (blind=true, parallel, no cross-visibility) │
105
+ │ │
106
+ │ ┌─────┐ ┌─────┐ ┌─────┐ ┌─────┐ │
107
+ │ │ P₁ │ │ P₂ │ │ P₃ │ │ Pₙ │ │
108
+ │ └──┬──┘ └──┬──┘ └──┬──┘ └──┬──┘ │
109
+ │ │ │ │ │ │
110
+ │ ModelCaller ModelCaller ... ModelCaller │
111
+ │ │ │ │ │ │
112
+ │ ▼ ▼ ▼ ▼ │
113
+ │ CONFIDENCE: N ← extracted from trailing line │
114
+ └─────────────────────────┬───────────────────────────────┘
115
+
116
+
117
+ score₁ = round(clamp(μ − 0.5·σ, 0, 100))
118
+
119
+
120
+ ┌─────────────────────────────────────────────────────────┐
121
+ │ ROUND 2 — COUNTERARGUMENTS │
122
+ │ (sequential, randomized order, full history) │
123
+ │ │
124
+ │ P? ──► P? ──► P? ──► P? │
125
+ │ Each participant sees every prior response │
126
+ │ from round 1 AND earlier in round 2. │
127
+ └─────────────────────────┬───────────────────────────────┘
128
+
129
+ ┌───────────────┴───────────────┐
130
+ │ │
131
+ ▼ ▼
132
+ |score₂ − score₁| ≤ Δ ───yes──► earlyStop event
133
+ │ no stopReason = "converged"
134
+
135
+ ┌─────────────────────────────────────────────────────────┐
136
+ │ ROUND 3 — EVIDENCE ASSESSMENT │
137
+ └─────────────────────────┬───────────────────────────────┘
138
+
139
+ ┌─────────────────────────────────────────────────────────┐
140
+ │ ROUND 4..N − 1 — SYNTHESIS & REFINEMENT │
141
+ │ ROUND N — FINAL SYNTHESIS │
142
+ │ (loops until maxRounds or convergence) │
143
+ └─────────────────────────┬───────────────────────────────┘
144
+
145
+ FINAL-ROUND RESPONSES
146
+
147
+ ┌─────────────┴─────────────┐
148
+ │ │
149
+ judge?=true (always)
150
+ │ │
151
+ ▼ ▼
152
+ ┌──────────────────┐ ConsensusResult {
153
+ │ JUDGE_PERSONA │ rounds, finalScore,
154
+ │ (non-voting) │ finalAverageConfidence,
155
+ │ │ stopReason, synthesis?
156
+ │ • Majority │ }
157
+ │ • Minority │
158
+ │ • Unresolved │
159
+ │ • JUDGE_CONFIDENCE
160
+ └──────────────────┘
161
+ ```
162
+
163
+ ### Phase contract
164
+
165
+ | Round | Phase | Label | Visibility |
166
+ | -------- | --------------------- | ------------------------------------ | -------------------- |
167
+ | 1 | `initial-analysis` | `Initial Analysis` | **blind** (parallel) |
168
+ | 2 | `counterarguments` | `Counterarguments` | full history |
169
+ | 3 | `evidence-assessment` | `Evidence Assessment` | full history |
170
+ | 4 … N−1 | `synthesis` | `Synthesis & Refinement (Round k)` | full history |
171
+ | N (last) | `synthesis` | `Final Synthesis` | full history |
172
+
173
+ - **Round 1 is blind by default.** Participants run in parallel, see no one else. Flip `blindFirstRound: false` to go sequential (rare — mostly for deterministic replay).
174
+ - **Rounds 2+ are sequential.** Speaking order is randomized unless `randomizeOrder: false`. Each speaker sees everyone who came before them — including earlier speakers in the current round.
175
+ - **Every response must end with `CONFIDENCE: N`** where N is an integer 0–100. Missing marker → 50 (neutral).
176
+ - **Consensus score** = `round(clamp(μ − 0.5·σ, 0, 100))` using population stddev.
177
+ - **Disagreement detected** when two participants' confidences differ by ≥ 20 (tunable).
178
+ - **Early stop** when `|score_k − score_{k−1}| ≤ 3` (tunable). Only checked from round 2 onward.
179
+
180
+ ## The `ModelCaller` contract
181
+
182
+ The library's single extension point. Implement it once for your provider; the engine calls it for every participant and the judge.
183
+
184
+ ```ts
185
+ export type ModelCaller = (req: ModelCallRequest) => Promise<ModelCallResponse>;
186
+
187
+ export interface ModelCallRequest {
188
+ participantId: string; // "judge" for synthesis calls
189
+ modelId: string;
190
+ round: number;
191
+ phase: Phase; // "initial-analysis" | "counterarguments" | "evidence-assessment" | "synthesis"
192
+ system: string; // persona + phase instructions
193
+ user: string; // the question (or synthesis context for the judge)
194
+ temperature: number; // 0.7 participants, 0.3 judge (defaults — caller may override)
195
+ maxOutputTokens: number; // 1500 default
196
+ signal?: AbortSignal; // honor this
197
+ onToken?: (t: string) => void; // optional streaming sink
198
+ }
199
+
200
+ export interface ModelCallResponse {
201
+ content: string; // must include the trailing CONFIDENCE: N line
202
+ usage?: { inputTokens: number; outputTokens: number; totalTokens: number };
203
+ }
204
+ ```
205
+
206
+ **Implementation rules.**
207
+
208
+ 1. **Honor `signal`.** The engine propagates cancellation; if you ignore it, your consumers can't cancel a run.
209
+ 2. **Stream if you can.** Call `onToken` with each chunk; observers get real-time UI for free.
210
+ 3. **Don't re-throw `AbortError` as something else.** The engine short-circuits cleanly on it.
211
+ 4. **Don't swallow other errors.** Throw. The engine captures the error into `ParticipantResponse` and keeps running.
212
+ 5. **Return the full content verbatim.** Do not strip the trailing `CONFIDENCE:` line — the parser needs it.
213
+
214
+ ## Events
215
+
216
+ ```ts
217
+ engine.on("roundStart", (e: RoundStartEvent) => void);
218
+ engine.on("participantStart", (e: ParticipantStartEvent) => void);
219
+ engine.on("participantToken", (e: ParticipantTokenEvent) => void); // only fires if caller streams
220
+ engine.on("participantComplete", (e: ParticipantCompleteEvent) => void);
221
+ engine.on("confidenceUpdate", (e: ConfidenceUpdateEvent) => void);
222
+ engine.on("disagreementDetected", (e: DisagreementDetectedEvent) => void);
223
+ engine.on("roundComplete", (e: RoundCompleteEvent) => void);
224
+ engine.on("earlyStop", (e: EarlyStopEvent) => void);
225
+ engine.on("synthesisStart", (e: SynthesisStartEvent) => void);
226
+ engine.on("synthesisToken", (e: SynthesisTokenEvent) => void);
227
+ engine.on("synthesisComplete", (e: SynthesisCompleteEvent) => void);
228
+ engine.on("finalResult", (e: FinalResultEvent) => void);
229
+ engine.on("error", (err: Error) => void);
230
+ ```
231
+
232
+ Event order for one round of three participants:
233
+
234
+ ```
235
+ roundStart
236
+ participantStart (p1) → [participantToken × N if streaming] → participantComplete → confidenceUpdate
237
+ participantStart (p2) → …
238
+ participantStart (p3) → …
239
+ [disagreementDetected × 0..N]
240
+ roundComplete
241
+ [earlyStop?]
242
+ …next round
243
+ [synthesisStart → synthesisToken × N → synthesisComplete]
244
+ finalResult
245
+ ```
246
+
247
+ ## Options reference
248
+
249
+ ```ts
250
+ interface ConsensusOptions {
251
+ question: string; // required, non-empty
252
+ participants: Participant[]; // required, ≥ 2, unique ids
253
+
254
+ maxRounds?: number; // default 4, clamped to [1, 10]
255
+ earlyStop?: boolean; // default true
256
+ convergenceDelta?: number; // default 3
257
+ disagreementThreshold?: number; // default 20
258
+
259
+ blindFirstRound?: boolean; // default true
260
+ randomizeOrder?: boolean; // default true
261
+ participantTemperature?: number; // default 0.7
262
+ maxOutputTokens?: number; // default 1500
263
+
264
+ judge?: {
265
+ modelId: string;
266
+ caller?: ModelCaller; // defaults to engine's main caller
267
+ temperature?: number; // default 0.3
268
+ maxOutputTokens?: number; // default 1500
269
+ };
270
+
271
+ randomSeed?: number; // deterministic round-order shuffle
272
+ signal?: AbortSignal; // cancellation
273
+ }
274
+ ```
275
+
276
+ ## Personas
277
+
278
+ Exactly the seven personas from the battle-tested Roundtable playbook:
279
+
280
+ | id | Name | Role |
281
+ | --------------------- | -------------------------- | ------------------------------------------------------------------------ |
282
+ | `pessimist` | Risk Analyst | Surfaces failure modes, tail risks, second-order effects. |
283
+ | `first-principles` | First-Principles Engineer | Decomposes every claim to axioms; rejects analogies. |
284
+ | `vc-specialist` | VC Funds Specialist | Markets, moats, unit economics, defensibility. |
285
+ | `scientific-skeptic` | Scientific Skeptic | Demands evidence, questions methodology, flags fallacies. |
286
+ | `optimistic-futurist` | Optimistic Futurist | Exponential trends, paradigm shifts, grounded upside. |
287
+ | `devils-advocate` | Devil's Advocate | Constructs the strongest counter-arguments. |
288
+ | `domain-expert` | Domain Expert | Practical implementation knowledge, edge cases, reality checks. |
289
+
290
+ Plus one judge:
291
+
292
+ | id | Name | Role |
293
+ | ------- | ---------------- | --------------------------------------------------------------------------- |
294
+ | `judge` | Consensus Judge | Non-voting synthesizer. Produces Majority / Minority / Unresolved sections. |
295
+
296
+ ```ts
297
+ import { PERSONAS, JUDGE_PERSONA, getPersonaById } from "ai-consensus-core";
298
+
299
+ const riskAnalyst = getPersonaById("pessimist");
300
+ ```
301
+
302
+ ## Scoring
303
+
304
+ ```ts
305
+ import { consensusScore, detectDisagreements } from "ai-consensus-core";
306
+
307
+ consensusScore([85, 82, 78, 40]);
308
+
309
+ detectDisagreements({
310
+ round: 2,
311
+ responses, // ParticipantResponse[]
312
+ participants,
313
+ threshold: 20,
314
+ });
315
+ ```
316
+
317
+ ## Cancellation
318
+
319
+ ```ts
320
+ const ac = new AbortController();
321
+ setTimeout(() => ac.abort(), 5_000);
322
+
323
+ const result = await engine.run({ ...options, signal: ac.signal });
324
+ // result.stopReason === "aborted" if the timeout fires
325
+ ```
326
+
327
+ The signal is forwarded into every `ModelCaller` invocation. Any provider that respects `AbortSignal` (most do) tears down cleanly.
328
+
329
+ ## Deterministic replay
330
+
331
+ Pass `randomSeed` to make round-order shuffling reproducible. Combined with a deterministic `ModelCaller` (e.g. one that replays recorded responses), a whole run becomes bit-for-bit reproducible — perfect for snapshot tests.
332
+
333
+ ```ts
334
+ await engine.run({ ...options, randomSeed: 42 });
335
+ ```
336
+
337
+ ## Types
338
+
339
+ Everything is exported from the root:
340
+
341
+ ```ts
342
+ import type {
343
+ Persona,
344
+ Participant,
345
+ Phase,
346
+ TokenUsage,
347
+ ModelCaller,
348
+ ModelCallRequest,
349
+ ModelCallResponse,
350
+ ParticipantResponse,
351
+ Disagreement,
352
+ RoundResult,
353
+ SynthesisResult,
354
+ ConsensusResult,
355
+ ConsensusOptions,
356
+ StopReason,
357
+ ConsensusEventMap,
358
+ RoundStartEvent,
359
+ ParticipantStartEvent,
360
+ ParticipantCompleteEvent,
361
+ RoundCompleteEvent,
362
+ FinalResultEvent,
363
+ // …etc
364
+ } from "ai-consensus-core";
365
+ ```
366
+
367
+ Zod schemas are exported too, for boundary validation on your side:
368
+
369
+ ```ts
370
+ import { PersonaSchema, ParticipantSchema } from "ai-consensus-core";
371
+
372
+ ParticipantSchema.parse(untrustedInput);
373
+ ```
374
+
375
+ ## Development
376
+
377
+ ```bash
378
+ git clone https://github.com/entropyvortex/ai-consensus-core.git
379
+ cd ai-consensus-core
380
+ npm install
381
+ npm run test # 136 tests, vitest
382
+ npm run test:coverage
383
+ npm run build # emits ESM + .d.ts into dist/
384
+ ```
385
+
386
+ ## Design notes
387
+
388
+ - **Why `avg − 0.5·σ` and not median / majority vote?** A high mean with a tight spread should score higher than a high mean with one strong dissenter. A simple linear penalty on stddev does this cheaply and keeps the score on the same 0–100 scale as the raw confidences.
389
+ - **Why confidence-delta disagreements, not claim extraction?** Extracting claims from free text is fragile and expensive. A 20-point confidence gap is a strong, cheap, deterministic signal. If you want richer structure, run the judge.
390
+ - **Why sequential rounds 2+ instead of parallel?** The protocol wants each speaker to have full visibility of the conversation so far. Parallel would let participants ignore each other and defeat the debate.
391
+ - **Why the `CONFIDENCE: N` marker instead of structured outputs?** Every provider supports it. Structured outputs across five-plus providers is a coupling surface we didn't want.
392
+
393
+ ## Philosophy
394
+
395
+ Most multi-agent frameworks are toys. They hard-code a single provider, assume a single use case, or pile opinions on top of opinions until the engine is unshippable.
396
+
397
+ This library is the opposite of that. It's the minimum viable mechanism for multi-model consensus — no provider SDK, no CLI, no server, no opinions beyond the protocol itself. You bring the models and the shell; we bring the engine.
398
+
399
+ If you care about serious multi-AI reasoning, persistent agent memory, and safe, powerful tooling — this is the foundation layer.
400
+
401
+ ## See also
402
+
403
+ - [`ai-consensus-mcp`](https://github.com/entropyvortex/ai-consensus-mcp) — thin stdio MCP server that wraps this library and exposes `consensus` as a single tool for Claude Code / Cursor / Windsurf / any MCP host.
404
+
405
+ ## License
406
+
407
+ MIT
408
+
409
+ ---
410
+
411
+ **Part of the [entropyvortex](https://github.com/entropyvortex) stack** — practical, no-bullshit AI open source by [Marcelo Ceccon](https://github.com/marceloceccon).
412
+
413
+ Made with ❤️ in Brazil.
414
+
415
+ MIT License • Built to ship.
@@ -0,0 +1,30 @@
1
+ import { TypedEventEmitter } from "./events.js";
2
+ import type { ConsensusEventMap, ConsensusOptions, ConsensusResult, ModelCaller } from "./types.js";
3
+ declare const DEFAULTS: {
4
+ readonly maxRounds: 4;
5
+ readonly earlyStop: true;
6
+ readonly convergenceDelta: 3;
7
+ readonly disagreementThreshold: 20;
8
+ readonly blindFirstRound: true;
9
+ readonly randomizeOrder: true;
10
+ readonly participantTemperature: 0.7;
11
+ readonly maxOutputTokens: 1500;
12
+ readonly judgeTemperature: 0.3;
13
+ readonly judgeMaxOutputTokens: 1500;
14
+ };
15
+ declare const MAX_ROUNDS_CAP = 10;
16
+ export declare class ConsensusEngine extends TypedEventEmitter<ConsensusEventMap> {
17
+ #private;
18
+ constructor(caller: ModelCaller);
19
+ /**
20
+ * Run the Consensus Validation Protocol end-to-end.
21
+ *
22
+ * Emits events throughout (see {@link ConsensusEventMap}). Resolves with the
23
+ * final {@link ConsensusResult}. Rejects only on `AbortError` from a
24
+ * cancelled run — per-participant ModelCaller failures are captured into
25
+ * the per-response `error` field and do not abort the loop.
26
+ */
27
+ run(options: ConsensusOptions): Promise<ConsensusResult>;
28
+ }
29
+ export { DEFAULTS as CONSENSUS_DEFAULTS, MAX_ROUNDS_CAP };
30
+ //# sourceMappingURL=engine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAUA,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAqBhD,OAAO,KAAK,EACV,iBAAiB,EACjB,gBAAgB,EAChB,eAAe,EAEf,WAAW,EAOZ,MAAM,YAAY,CAAC;AAIpB,QAAA,MAAM,QAAQ;;;;;;;;;;;CAWJ,CAAC;AAEX,QAAA,MAAM,cAAc,KAAK,CAAC;AAK1B,qBAAa,eAAgB,SAAQ,iBAAiB,CAAC,iBAAiB,CAAC;;gBAG3D,MAAM,EAAE,WAAW;IAK/B;;;;;;;OAOG;IACG,GAAG,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC;CAyb/D;AA+ED,OAAO,EAAE,QAAQ,IAAI,kBAAkB,EAAE,cAAc,EAAE,CAAC"}