@mneme-ai/core 2.62.0 → 2.64.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent_manifest.d.ts.map +1 -1
- package/dist/agent_manifest.js +8 -0
- package/dist/agent_manifest.js.map +1 -1
- package/dist/diff_arena/adapters.d.ts +84 -0
- package/dist/diff_arena/adapters.d.ts.map +1 -0
- package/dist/diff_arena/adapters.js +172 -0
- package/dist/diff_arena/adapters.js.map +1 -0
- package/dist/diff_arena/consensus.d.ts +73 -0
- package/dist/diff_arena/consensus.d.ts.map +1 -0
- package/dist/diff_arena/consensus.js +138 -0
- package/dist/diff_arena/consensus.js.map +1 -0
- package/dist/diff_arena/index.d.ts +100 -0
- package/dist/diff_arena/index.d.ts.map +1 -0
- package/dist/diff_arena/index.js +230 -0
- package/dist/diff_arena/index.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/time_crystal/gotcha_detector.d.ts +31 -0
- package/dist/time_crystal/gotcha_detector.d.ts.map +1 -0
- package/dist/time_crystal/gotcha_detector.js +81 -0
- package/dist/time_crystal/gotcha_detector.js.map +1 -0
- package/dist/time_crystal/index.d.ts +158 -0
- package/dist/time_crystal/index.d.ts.map +1 -0
- package/dist/time_crystal/index.js +289 -0
- package/dist/time_crystal/index.js.map +1 -0
- package/dist/time_crystal/problem_fingerprint.d.ts +43 -0
- package/dist/time_crystal/problem_fingerprint.d.ts.map +1 -0
- package/dist/time_crystal/problem_fingerprint.js +101 -0
- package/dist/time_crystal/problem_fingerprint.js.map +1 -0
- package/dist/time_crystal/ranking.d.ts +60 -0
- package/dist/time_crystal/ranking.d.ts.map +1 -0
- package/dist/time_crystal/ranking.js +90 -0
- package/dist/time_crystal/ranking.js.map +1 -0
- package/dist/truth_gate/claims.d.ts.map +1 -1
- package/dist/truth_gate/claims.js +38 -0
- package/dist/truth_gate/claims.js.map +1 -1
- package/dist/truth_gate/probes.d.ts.map +1 -1
- package/dist/truth_gate/probes.js +115 -0
- package/dist/truth_gate/probes.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2.64.0 — ARENA vendor adapter abstraction.
|
|
3
|
+
*
|
|
4
|
+
* ARENA must work with ANY AI vendor. Rather than hard-code Anthropic /
|
|
5
|
+
* OpenAI / Google SDK clients (which would force vendor lock-in + bring
|
|
6
|
+
* in their dependencies + require live API keys for tests), we expose
|
|
7
|
+
* a thin `VendorAdapter` interface that anyone can implement.
|
|
8
|
+
*
|
|
9
|
+
* Mneme ships **3 built-in adapter kinds**:
|
|
10
|
+
*
|
|
11
|
+
* 1. MOCK — deterministic seeded response. Used for tests + offline
|
|
12
|
+
* demos. Always available; no API key needed.
|
|
13
|
+
*
|
|
14
|
+
* 2. HTTP — generic JSON-POST adapter pointed at any
|
|
15
|
+
* OpenAI-compatible chat completions endpoint. Works with
|
|
16
|
+
* Anthropic, OpenAI, Google AI Studio, OpenRouter, local
|
|
17
|
+
* ollama/llama.cpp servers. Requires base URL + API key in env.
|
|
18
|
+
*
|
|
19
|
+
* 3. CLI — spawns a subprocess (e.g. `gemini-cli ask "..."` /
|
|
20
|
+
* `grok-cli`). Captures stdout. Useful when the vendor only
|
|
21
|
+
* ships a CLI client. Refuses commands not in allowlist.
|
|
22
|
+
*
|
|
23
|
+
* Pure ESM. Defensive — every adapter returns a result envelope with
|
|
24
|
+
* `ok` boolean; never throws into the runner.
|
|
25
|
+
*/
|
|
26
|
+
import { spawnSync } from "node:child_process";
|
|
27
|
+
import { createHash } from "node:crypto";
|
|
28
|
+
export function mockAdapter(opts) {
|
|
29
|
+
return {
|
|
30
|
+
name: opts.name,
|
|
31
|
+
kind: "mock",
|
|
32
|
+
async ask(prompt) {
|
|
33
|
+
const t0 = performance.now();
|
|
34
|
+
// Simulated latency (cheap; uses setTimeout so other adapters can run in parallel)
|
|
35
|
+
if (opts.simulatedLatencyMs && opts.simulatedLatencyMs > 0) {
|
|
36
|
+
await new Promise((r) => setTimeout(r, opts.simulatedLatencyMs));
|
|
37
|
+
}
|
|
38
|
+
const text = (opts.responder ?? defaultMockResponder(opts.name))(prompt);
|
|
39
|
+
return {
|
|
40
|
+
vendor: opts.name,
|
|
41
|
+
kind: "mock",
|
|
42
|
+
ok: true,
|
|
43
|
+
text,
|
|
44
|
+
confidence: opts.confidence ?? 0.7,
|
|
45
|
+
latencyMs: +(performance.now() - t0).toFixed(2),
|
|
46
|
+
};
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Deterministic seeded responder: hashes (vendor, prompt) → picks one
|
|
52
|
+
* of N flavored canned answers. Same input always produces same output.
|
|
53
|
+
*/
|
|
54
|
+
function defaultMockResponder(vendorName) {
|
|
55
|
+
const flavors = [
|
|
56
|
+
"Based on my training, the answer involves multiple considerations.",
|
|
57
|
+
"The most accurate response would acknowledge the nuance here.",
|
|
58
|
+
"There are a few approaches; the canonical one is well-documented.",
|
|
59
|
+
"This depends on the specific context, but in general terms:",
|
|
60
|
+
];
|
|
61
|
+
return (prompt) => {
|
|
62
|
+
const h = createHash("sha256").update(vendorName + "|" + prompt).digest("hex");
|
|
63
|
+
const idx = parseInt(h.slice(0, 4), 16) % flavors.length;
|
|
64
|
+
return `[${vendorName}] ${flavors[idx]} (re: ${prompt.slice(0, 60)})`;
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
export function httpAdapter(opts) {
|
|
68
|
+
return {
|
|
69
|
+
name: opts.name,
|
|
70
|
+
kind: "http",
|
|
71
|
+
timeoutMs: opts.timeoutMs ?? 30000,
|
|
72
|
+
async ask(prompt) {
|
|
73
|
+
const t0 = performance.now();
|
|
74
|
+
const apiKey = process.env[opts.apiKeyEnv];
|
|
75
|
+
if (!apiKey) {
|
|
76
|
+
return {
|
|
77
|
+
vendor: opts.name, kind: "http", ok: false, text: "", confidence: 0,
|
|
78
|
+
latencyMs: +(performance.now() - t0).toFixed(2),
|
|
79
|
+
reason: `missing env ${opts.apiKeyEnv}`,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
try {
|
|
83
|
+
const controller = new AbortController();
|
|
84
|
+
const timer = setTimeout(() => controller.abort(), opts.timeoutMs ?? 30000);
|
|
85
|
+
const res = await fetch(opts.endpoint, {
|
|
86
|
+
method: "POST",
|
|
87
|
+
headers: {
|
|
88
|
+
"content-type": "application/json",
|
|
89
|
+
"authorization": `Bearer ${apiKey}`,
|
|
90
|
+
...(opts.headers ?? {}),
|
|
91
|
+
},
|
|
92
|
+
body: JSON.stringify({
|
|
93
|
+
model: opts.model,
|
|
94
|
+
messages: [{ role: "user", content: prompt }],
|
|
95
|
+
max_tokens: 800,
|
|
96
|
+
}),
|
|
97
|
+
signal: controller.signal,
|
|
98
|
+
});
|
|
99
|
+
clearTimeout(timer);
|
|
100
|
+
if (!res.ok) {
|
|
101
|
+
return {
|
|
102
|
+
vendor: opts.name, kind: "http", ok: false, text: "", confidence: 0,
|
|
103
|
+
latencyMs: +(performance.now() - t0).toFixed(2),
|
|
104
|
+
reason: `http ${res.status}`,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
const json = await res.json();
|
|
108
|
+
const text = json.choices?.[0]?.message?.content ?? "";
|
|
109
|
+
return {
|
|
110
|
+
vendor: opts.name,
|
|
111
|
+
kind: "http",
|
|
112
|
+
ok: text.length > 0,
|
|
113
|
+
text,
|
|
114
|
+
confidence: 0.7,
|
|
115
|
+
latencyMs: +(performance.now() - t0).toFixed(2),
|
|
116
|
+
meta: { model: json.model, requestId: json.id },
|
|
117
|
+
reason: text.length === 0 ? "empty response body" : undefined,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
catch (e) {
|
|
121
|
+
return {
|
|
122
|
+
vendor: opts.name, kind: "http", ok: false, text: "", confidence: 0,
|
|
123
|
+
latencyMs: +(performance.now() - t0).toFixed(2),
|
|
124
|
+
reason: e.message,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
},
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
const SAFE_COMMAND_ALLOWLIST = /^[a-z][a-z0-9_-]{1,40}$/;
|
|
131
|
+
export function cliAdapter(opts) {
|
|
132
|
+
return {
|
|
133
|
+
name: opts.name,
|
|
134
|
+
kind: "cli",
|
|
135
|
+
timeoutMs: opts.timeoutMs ?? 30000,
|
|
136
|
+
async ask(prompt) {
|
|
137
|
+
const t0 = performance.now();
|
|
138
|
+
if (!SAFE_COMMAND_ALLOWLIST.test(opts.command)) {
|
|
139
|
+
return {
|
|
140
|
+
vendor: opts.name, kind: "cli", ok: false, text: "", confidence: 0,
|
|
141
|
+
latencyMs: +(performance.now() - t0).toFixed(2),
|
|
142
|
+
reason: `command '${opts.command}' refused by allowlist (only [a-z][a-z0-9_-]{1,40})`,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
const resolvedArgs = opts.args.map((a) => a.replace("{prompt}", prompt));
|
|
146
|
+
try {
|
|
147
|
+
const r = spawnSync(opts.command, resolvedArgs, {
|
|
148
|
+
encoding: "utf8",
|
|
149
|
+
timeout: opts.timeoutMs ?? 30000,
|
|
150
|
+
});
|
|
151
|
+
const text = (r.stdout ?? "").trim();
|
|
152
|
+
return {
|
|
153
|
+
vendor: opts.name,
|
|
154
|
+
kind: "cli",
|
|
155
|
+
ok: r.status === 0 && text.length > 0,
|
|
156
|
+
text,
|
|
157
|
+
confidence: 0.7,
|
|
158
|
+
latencyMs: +(performance.now() - t0).toFixed(2),
|
|
159
|
+
reason: r.status !== 0 ? `exit=${r.status}: ${(r.stderr ?? "").slice(0, 120)}` : (text.length === 0 ? "empty stdout" : undefined),
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
catch (e) {
|
|
163
|
+
return {
|
|
164
|
+
vendor: opts.name, kind: "cli", ok: false, text: "", confidence: 0,
|
|
165
|
+
latencyMs: +(performance.now() - t0).toFixed(2),
|
|
166
|
+
reason: e.message,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
//# sourceMappingURL=adapters.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapters.js","sourceRoot":"","sources":["../../src/diff_arena/adapters.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AA0CzC,MAAM,UAAU,WAAW,CAAC,IAAqB;IAC/C,OAAO;QACL,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,IAAI,EAAE,MAAM;QACZ,KAAK,CAAC,GAAG,CAAC,MAAc;YACtB,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7B,mFAAmF;YACnF,IAAI,IAAI,CAAC,kBAAkB,IAAI,IAAI,CAAC,kBAAkB,GAAG,CAAC,EAAE,CAAC;gBAC3D,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC;YACnE,CAAC;YACD,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,SAAS,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YACzE,OAAO;gBACL,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,IAAI,EAAE,MAAM;gBACZ,EAAE,EAAE,IAAI;gBACR,IAAI;gBACJ,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,GAAG;gBAClC,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;aAChD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,SAAS,oBAAoB,CAAC,UAAkB;IAC9C,MAAM,OAAO,GAAG;QACd,oEAAoE;QACpE,+DAA+D;QAC/D,mEAAmE;QACnE,6DAA6D;KAC9D,CAAC;IACF,OAAO,CAAC,MAAc,EAAE,EAAE;QACxB,MAAM,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,GAAG,GAAG,GAAG,MAAM,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC/E,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QACzD,OAAO,IAAI,UAAU,KAAK,OAAO,CAAC,GAAG,CAAC,SAAS,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC;IACxE,CAAC,CAAC;AACJ,CAAC;AAiBD,MAAM,UAAU,WAAW,CAAC,IAAqB;IAC/C,OAAO;QACL,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,IAAI,EAAE,MAAM;QACZ,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,KAAK;QAClC,KAAK,CAAC,GAAG,CAAC,MAAc;YACtB,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC3C,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO;oBACL,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;oBACnE,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;oBAC/C,MAAM,EAAE,eAAe,IAAI,CAAC,SAAS,EAAE;iBACxC,CAAC;YACJ,CAAC;YACD,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,CAAC;gBAC5E,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE;oBACrC,MAAM,EAAE,MAAM;oBACd,OAAO,EAAE;wBACP,cAAc,EAAE,kBAAkB;wBAClC,eAAe,EAAE,UAAU,MAAM,EAAE;wBACnC,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;qBACxB;oBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;wBAC7C,UAAU,EAAE,GAAG;qBAChB,CAAC;oBACF,MAAM,EAAE,UAAU,CAAC,MAAM;iBAC1B,CAAC,CAAC;gBACH,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;oBACZ,OAAO;wBACL,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;wBACnE,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;wBAC/C,MAAM,EAAE,QAAQ,GAAG,CAAC,MAAM,EAAE;qBAC7B,CAAC;gBACJ,CAAC;gBACD,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAA0F,CAAC;gBACtH,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;gBACvD,OAAO;oBACL,MAAM,EAAE,IAAI,CAAC,IAAI;oBACjB,IAAI,EAAE,MAAM;oBACZ,EAAE,EAAE,IAAI,CAAC,MAAM,GAAG,CAAC;oBACnB,IAAI;oBACJ,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;oBAC/C,IAAI,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,EAAE,EAAE;oBAC/C,MAAM,EAAE,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,SAAS;iBAC9D,CAAC;YACJ,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO;oBACL,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;oBACnE,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;oBAC/C,MAAM,EAAG,CAAW,CAAC,OAAO;iBAC7B,CAAC;YACJ,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC;AAaD,MAAM,sBAAsB,GAAG,yBAAyB,CAAC;AAEzD,MAAM,UAAU,UAAU,CAAC,IAAoB;IAC7C,OAAO;QACL,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,IAAI,EAAE,KAAK;QACX,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,KAAK;QAClC,KAAK,CAAC,GAAG,CAAC,MAAc;YACtB,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC/C,OAAO;oBACL,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;oBAClE,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;oBAC/C,MAAM,EAAE,YAAY,IAAI,CAAC,OAAO,qDAAqD;iBACtF,CAAC;YACJ,CAAC;YACD,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC;YACzE,IAAI,CAAC;gBACH,MAAM,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,YAAY,EAAE;oBAC9C,QAAQ,EAAE,MAAM;oBAChB,OAAO,EAAE,IAAI,CAAC,SAAS,IAAI,KAAK;iBACjC,CAAC,CAAC;gBACH,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBACrC,OAAO;oBACL,MAAM,EAAE,IAAI,CAAC,IAAI;oBACjB,IAAI,EAAE,KAAK;oBACX,EAAE,EAAE,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;oBACrC,IAAI;oBACJ,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;oBAC/C,MAAM,EAAE,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS,CAAC;iBAClI,CAAC;YACJ,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO;oBACL,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;oBAClE,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;oBAC/C,MAAM,EAAG,CAAW,CAAC,OAAO;iBAC7B,CAAC;YACJ,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2.64.0 — ARENA multi-axis consensus scoring.
|
|
3
|
+
*
|
|
4
|
+
* Single-axis text-similarity (Jaccard on words) is too coarse: two
|
|
5
|
+
* answers can use overlapping vocabulary while disagreeing on the
|
|
6
|
+
* concrete numbers, OR disagree on phrasing while citing the same
|
|
7
|
+
* facts. ARENA scores 4 axes:
|
|
8
|
+
*
|
|
9
|
+
* 1. JACCARD — token-set overlap on bigrams (more discriminative than
|
|
10
|
+
* single-token Jaccard).
|
|
11
|
+
* 2. NUMERIC — do they cite the same numbers / versions / dates?
|
|
12
|
+
* Two answers about React that both say "19" agree on version.
|
|
13
|
+
* 3. SENTIMENT — assert vs hedge balance (one says "definitely",
|
|
14
|
+
* another says "perhaps" → disagreement on confidence).
|
|
15
|
+
* 4. LENGTH — log-ratio of word counts. Very different depth = often
|
|
16
|
+
* different scope.
|
|
17
|
+
*
|
|
18
|
+
* Composite consensus = weighted mean. Pairwise scores fold into an
|
|
19
|
+
* NxN matrix; per-vendor "outlier score" = 1 - avg agreement with
|
|
20
|
+
* everyone else. Identifies which vendor is the disagreer (or
|
|
21
|
+
* possibly: the only one with the right answer).
|
|
22
|
+
*
|
|
23
|
+
* Pure deterministic.
|
|
24
|
+
*/
|
|
25
|
+
export interface ConsensusInput {
|
|
26
|
+
/** Per-vendor responses (text only — ARENA passes the text field). */
|
|
27
|
+
responses: Array<{
|
|
28
|
+
vendor: string;
|
|
29
|
+
text: string;
|
|
30
|
+
}>;
|
|
31
|
+
}
|
|
32
|
+
export interface PairwiseScore {
|
|
33
|
+
a: string;
|
|
34
|
+
b: string;
|
|
35
|
+
jaccard: number;
|
|
36
|
+
numeric: number;
|
|
37
|
+
sentiment: number;
|
|
38
|
+
length: number;
|
|
39
|
+
composite: number;
|
|
40
|
+
}
|
|
41
|
+
export interface VendorOutlier {
|
|
42
|
+
vendor: string;
|
|
43
|
+
/** Mean agreement with all OTHER vendors. */
|
|
44
|
+
meanAgreement: number;
|
|
45
|
+
/** 0..1; higher = more of an outlier. */
|
|
46
|
+
outlierScore: number;
|
|
47
|
+
}
|
|
48
|
+
export interface ConsensusResult {
|
|
49
|
+
/** Mean composite across all pairs. */
|
|
50
|
+
score: number;
|
|
51
|
+
/** "high" ≥0.70 / "medium" ≥0.40 / "low" <0.40. */
|
|
52
|
+
agreement: "high" | "medium" | "low";
|
|
53
|
+
/** Pairwise N choose 2. */
|
|
54
|
+
pairs: PairwiseScore[];
|
|
55
|
+
/** Per-vendor outlier diagnosis. */
|
|
56
|
+
outliers: VendorOutlier[];
|
|
57
|
+
/** Tokens / numbers / facts ALL vendors agree on (intersection). */
|
|
58
|
+
commonFacts: string[];
|
|
59
|
+
/** Tokens / numbers a SINGLE vendor mentioned (disputed). */
|
|
60
|
+
uniqueClaims: Array<{
|
|
61
|
+
vendor: string;
|
|
62
|
+
claim: string;
|
|
63
|
+
}>;
|
|
64
|
+
}
|
|
65
|
+
export declare function pairwiseScore(a: {
|
|
66
|
+
vendor: string;
|
|
67
|
+
text: string;
|
|
68
|
+
}, b: {
|
|
69
|
+
vendor: string;
|
|
70
|
+
text: string;
|
|
71
|
+
}): PairwiseScore;
|
|
72
|
+
export declare function computeConsensus(input: ConsensusInput): ConsensusResult;
|
|
73
|
+
//# sourceMappingURL=consensus.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"consensus.d.ts","sourceRoot":"","sources":["../../src/diff_arena/consensus.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,MAAM,WAAW,cAAc;IAC7B,sEAAsE;IACtE,SAAS,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACpD;AAED,MAAM,WAAW,aAAa;IAC5B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,6CAA6C;IAC7C,aAAa,EAAE,MAAM,CAAC;IACtB,yCAAyC;IACzC,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,uCAAuC;IACvC,KAAK,EAAE,MAAM,CAAC;IACd,mDAAmD;IACnD,SAAS,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACrC,2BAA2B;IAC3B,KAAK,EAAE,aAAa,EAAE,CAAC;IACvB,oCAAoC;IACpC,QAAQ,EAAE,aAAa,EAAE,CAAC;IAC1B,oEAAoE;IACpE,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,6DAA6D;IAC7D,YAAY,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACxD;AAuCD,wBAAgB,aAAa,CAAC,CAAC,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,EAAE,CAAC,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,aAAa,CAqBrH;AAED,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,cAAc,GAAG,eAAe,CAiDvE"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2.64.0 — ARENA multi-axis consensus scoring.
|
|
3
|
+
*
|
|
4
|
+
* Single-axis text-similarity (Jaccard on words) is too coarse: two
|
|
5
|
+
* answers can use overlapping vocabulary while disagreeing on the
|
|
6
|
+
* concrete numbers, OR disagree on phrasing while citing the same
|
|
7
|
+
* facts. ARENA scores 4 axes:
|
|
8
|
+
*
|
|
9
|
+
* 1. JACCARD — token-set overlap on bigrams (more discriminative than
|
|
10
|
+
* single-token Jaccard).
|
|
11
|
+
* 2. NUMERIC — do they cite the same numbers / versions / dates?
|
|
12
|
+
* Two answers about React that both say "19" agree on version.
|
|
13
|
+
* 3. SENTIMENT — assert vs hedge balance (one says "definitely",
|
|
14
|
+
* another says "perhaps" → disagreement on confidence).
|
|
15
|
+
* 4. LENGTH — log-ratio of word counts. Very different depth = often
|
|
16
|
+
* different scope.
|
|
17
|
+
*
|
|
18
|
+
* Composite consensus = weighted mean. Pairwise scores fold into an
|
|
19
|
+
* NxN matrix; per-vendor "outlier score" = 1 - avg agreement with
|
|
20
|
+
* everyone else. Identifies which vendor is the disagreer (or
|
|
21
|
+
* possibly: the only one with the right answer).
|
|
22
|
+
*
|
|
23
|
+
* Pure deterministic.
|
|
24
|
+
*/
|
|
25
|
+
const HEDGES = ["may", "might", "could", "perhaps", "possibly", "seems", "appears", "approximately", "around", "about", "probably", "likely", "tends to"];
|
|
26
|
+
const ABSOLUTES = ["always", "never", "all", "every", "definitely", "certainly", "absolutely", "must", "guaranteed"];
|
|
27
|
+
function tokens(text) {
|
|
28
|
+
return text.toLowerCase().split(/[^a-z0-9]+/).filter((t) => t.length >= 2);
|
|
29
|
+
}
|
|
30
|
+
function bigrams(toks) {
|
|
31
|
+
const s = new Set();
|
|
32
|
+
for (let i = 0; i + 1 < toks.length; i++)
|
|
33
|
+
s.add(toks[i] + "_" + toks[i + 1]);
|
|
34
|
+
return s;
|
|
35
|
+
}
|
|
36
|
+
function jaccardSet(a, b) {
|
|
37
|
+
if (a.size === 0 && b.size === 0)
|
|
38
|
+
return 1;
|
|
39
|
+
if (a.size === 0 || b.size === 0)
|
|
40
|
+
return 0;
|
|
41
|
+
let inter = 0;
|
|
42
|
+
for (const x of a)
|
|
43
|
+
if (b.has(x))
|
|
44
|
+
inter++;
|
|
45
|
+
return inter / (a.size + b.size - inter);
|
|
46
|
+
}
|
|
47
|
+
const NUM_RX = /\b(?:v?\d+(?:\.\d+){0,3}|\d{4}-\d{2}-\d{2}|\d{3,})\b/gi;
|
|
48
|
+
function extractNumbers(text) {
|
|
49
|
+
const m = text.match(NUM_RX) ?? [];
|
|
50
|
+
return new Set(m.map((x) => x.toLowerCase()));
|
|
51
|
+
}
|
|
52
|
+
function sentimentScore(text) {
|
|
53
|
+
const lower = text.toLowerCase();
|
|
54
|
+
let h = 0, a = 0;
|
|
55
|
+
for (const w of HEDGES)
|
|
56
|
+
if (lower.includes(w))
|
|
57
|
+
h++;
|
|
58
|
+
for (const w of ABSOLUTES)
|
|
59
|
+
if (lower.includes(w))
|
|
60
|
+
a++;
|
|
61
|
+
if (h + a === 0)
|
|
62
|
+
return 0; // neutral
|
|
63
|
+
return (a - h) / (h + a); // -1 (hedged) … +1 (absolute)
|
|
64
|
+
}
|
|
65
|
+
export function pairwiseScore(a, b) {
|
|
66
|
+
const tokA = tokens(a.text);
|
|
67
|
+
const tokB = tokens(b.text);
|
|
68
|
+
const biA = bigrams(tokA);
|
|
69
|
+
const biB = bigrams(tokB);
|
|
70
|
+
const jaccard = jaccardSet(biA, biB);
|
|
71
|
+
const numA = extractNumbers(a.text);
|
|
72
|
+
const numB = extractNumbers(b.text);
|
|
73
|
+
const numeric = jaccardSet(numA, numB);
|
|
74
|
+
const sA = sentimentScore(a.text);
|
|
75
|
+
const sB = sentimentScore(b.text);
|
|
76
|
+
// sentiment alignment: 1 - |sA - sB| / 2
|
|
77
|
+
const sentiment = 1 - Math.abs(sA - sB) / 2;
|
|
78
|
+
// length alignment: 1 - |log(words_a/words_b)| / 3 (clamped)
|
|
79
|
+
const wa = Math.max(1, tokA.length);
|
|
80
|
+
const wb = Math.max(1, tokB.length);
|
|
81
|
+
const lr = Math.abs(Math.log(wa / wb));
|
|
82
|
+
const length = Math.max(0, 1 - lr / 3);
|
|
83
|
+
// weighted composite — jaccard primary, numeric is killer signal, sentiment + length supportive.
|
|
84
|
+
const composite = +(0.50 * jaccard + 0.30 * numeric + 0.10 * sentiment + 0.10 * length).toFixed(4);
|
|
85
|
+
return { a: a.vendor, b: b.vendor, jaccard: +jaccard.toFixed(4), numeric: +numeric.toFixed(4), sentiment: +sentiment.toFixed(4), length: +length.toFixed(4), composite };
|
|
86
|
+
}
|
|
87
|
+
export function computeConsensus(input) {
|
|
88
|
+
const n = input.responses.length;
|
|
89
|
+
if (n < 2) {
|
|
90
|
+
return {
|
|
91
|
+
score: 1, agreement: "high", pairs: [], outliers: [],
|
|
92
|
+
commonFacts: input.responses[0] ? tokens(input.responses[0].text).slice(0, 20) : [],
|
|
93
|
+
uniqueClaims: [],
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
const pairs = [];
|
|
97
|
+
for (let i = 0; i < n; i++)
|
|
98
|
+
for (let j = i + 1; j < n; j++) {
|
|
99
|
+
pairs.push(pairwiseScore(input.responses[i], input.responses[j]));
|
|
100
|
+
}
|
|
101
|
+
const score = +(pairs.reduce((s, p) => s + p.composite, 0) / pairs.length).toFixed(4);
|
|
102
|
+
const agreement = score >= 0.70 ? "high" : score >= 0.40 ? "medium" : "low";
|
|
103
|
+
// Per-vendor mean agreement (across all pairs involving them).
|
|
104
|
+
const vendorTotals = new Map();
|
|
105
|
+
for (const p of pairs) {
|
|
106
|
+
const t1 = vendorTotals.get(p.a) ?? { sum: 0, count: 0 };
|
|
107
|
+
t1.sum += p.composite;
|
|
108
|
+
t1.count++;
|
|
109
|
+
vendorTotals.set(p.a, t1);
|
|
110
|
+
const t2 = vendorTotals.get(p.b) ?? { sum: 0, count: 0 };
|
|
111
|
+
t2.sum += p.composite;
|
|
112
|
+
t2.count++;
|
|
113
|
+
vendorTotals.set(p.b, t2);
|
|
114
|
+
}
|
|
115
|
+
const outliers = [];
|
|
116
|
+
for (const [vendor, t] of vendorTotals) {
|
|
117
|
+
const meanAgreement = t.count === 0 ? 0 : t.sum / t.count;
|
|
118
|
+
outliers.push({ vendor, meanAgreement: +meanAgreement.toFixed(4), outlierScore: +(1 - meanAgreement).toFixed(4) });
|
|
119
|
+
}
|
|
120
|
+
outliers.sort((a, b) => b.outlierScore - a.outlierScore);
|
|
121
|
+
// Common facts = numbers that ALL vendors mentioned
|
|
122
|
+
const numSets = input.responses.map((r) => extractNumbers(r.text));
|
|
123
|
+
const allNumbers = new Set();
|
|
124
|
+
for (const s of numSets)
|
|
125
|
+
for (const n of s)
|
|
126
|
+
allNumbers.add(n);
|
|
127
|
+
const commonFacts = Array.from(allNumbers).filter((num) => numSets.every((s) => s.has(num)));
|
|
128
|
+
// Unique claims = numbers that exactly ONE vendor mentioned
|
|
129
|
+
const uniqueClaims = [];
|
|
130
|
+
for (const num of allNumbers) {
|
|
131
|
+
const mentions = input.responses.filter((r, i) => numSets[i].has(num));
|
|
132
|
+
if (mentions.length === 1) {
|
|
133
|
+
uniqueClaims.push({ vendor: mentions[0].vendor, claim: num });
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return { score, agreement, pairs, outliers, commonFacts, uniqueClaims };
|
|
137
|
+
}
|
|
138
|
+
//# sourceMappingURL=consensus.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"consensus.js","sourceRoot":"","sources":["../../src/diff_arena/consensus.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAwCH,MAAM,MAAM,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;AAC1J,MAAM,SAAS,GAAG,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;AAErH,SAAS,MAAM,CAAC,IAAY;IAC1B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;AAC7E,CAAC;AAED,SAAS,OAAO,CAAC,IAAc;IAC7B,MAAM,CAAC,GAAG,IAAI,GAAG,EAAU,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC7E,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,UAAU,CAAC,CAAc,EAAE,CAAc;IAChD,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3C,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,CAAC;QAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,KAAK,EAAE,CAAC;IACzC,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,MAAM,GAAG,wDAAwD,CAAC;AAExE,SAAS,cAAc,CAAC,IAAY;IAClC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;IACnC,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,cAAc,CAAC,IAAY;IAClC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACjC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;IACjB,KAAK,MAAM,CAAC,IAAI,MAAM;QAAE,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;YAAE,CAAC,EAAE,CAAC;IACnD,KAAK,MAAM,CAAC,IAAI,SAAS;QAAE,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;YAAE,CAAC,EAAE,CAAC;IACtD,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC,CAAC,UAAU;IACrC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,8BAA8B;AAC1D,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,CAAmC,EAAE,CAAmC;IACpG,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1B,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1B,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IACrC,MAAM,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACvC,MAAM,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAClC,yCAAyC;IACzC,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC;IAC5C,6DAA6D;IAC7D,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IACpC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IACpC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IACvC,iGAAiG;IACjG,MAAM,SAAS,GAAG,CAAC,CAAC,IAAI,GAAG,OAAO,GAAG,IAAI,GAAG,OAAO,GAAG,IAAI,GAAG,SAAS,GAAG,IAAI,GAAG,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IACnG,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC;AAC3K,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,KAAqB;IACpD,MAAM,CAAC,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC;IACjC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACV,OAAO;YACL,KAAK,EAAE,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE;YACpD,WAAW,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE;YACnF,YAAY,EAAE,EAAE;SACjB,CAAC;IACJ,CAAC;IACD,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3D,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAE,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC;QACtE,CAAC;IACD,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IACtF,MAAM,SAAS,GAAG,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IAE5E,+DAA+D;IAC/D,MAAM,YAAY,GAAG,IAAI,GAAG,EAA0C,CAAC;IACvE,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QACzD,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,SAAS,CAAC;QAAC,EAAE,CAAC,KAAK,EAAE,CAAC;QAClC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1B,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QACzD,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,SAAS,CAAC;QAAC,EAAE,CAAC,KAAK,EAAE,CAAC;QAClC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC5B,CAAC;IACD,MAAM,QAAQ,GAAoB,EAAE,CAAC;IACrC,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,YAAY,EAAE,CAAC;QACvC,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC;QAC1D,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACrH,CAAC;IACD,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC;IAEzD,oDAAoD;IACpD,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACnE,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;IACrC,KAAK,MAAM,CAAC,IAAI,OAAO;QAAE,KAAK,MAAM,CAAC,IAAI,CAAC;YAAE,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9D,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAE7F,4DAA4D;IAC5D,MAAM,YAAY,GAAoC,EAAE,CAAC;IACzD,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;QACxE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,YAAY,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC;AAC1E,CAAC"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2.64.0 — DIFFERENTIAL ARENA: multi-vendor consensus by default.
|
|
3
|
+
*
|
|
4
|
+
* Continues Mneme MCP user-roadmap. v2.60-v2.63 quartet shipped
|
|
5
|
+
* (bodyguard / diplomat / conscience / memory); v2.64 adds the
|
|
6
|
+
* multi-vendor consensus primitive: when one agent (Claude) gets a
|
|
7
|
+
* prompt, it can silently delegate to Mneme `diff_arena.ask` which
|
|
8
|
+
* parallel-calls 2-3 OTHER vendors (GPT, Gemini, etc) → returns the
|
|
9
|
+
* diff + Mneme-graded consensus back to Claude's context.
|
|
10
|
+
*
|
|
11
|
+
* User stops paying $50/mo to FIVE AI vendors. Pays $50/mo to Mneme;
|
|
12
|
+
* Mneme blends them.
|
|
13
|
+
*
|
|
14
|
+
* (Distinct from v2.18 ARENA which is the public scoreboard primitive.
|
|
15
|
+
* DIFF_ARENA = differential / consensus angle. Different namespace.)
|
|
16
|
+
*
|
|
17
|
+
* 5 wild innovations:
|
|
18
|
+
*
|
|
19
|
+
* 1. PLUGGABLE ADAPTERS (`adapters.ts`) — mock / http / cli kinds.
|
|
20
|
+
* Ships mock out of box; users plug real vendors via env keys.
|
|
21
|
+
* Vendor-agnostic — works with any OpenAI-compatible endpoint OR
|
|
22
|
+
* any CLI-based vendor (Gemini CLI, Grok CLI, ollama).
|
|
23
|
+
*
|
|
24
|
+
* 2. MULTI-AXIS CONSENSUS (`consensus.ts`) — 4-dimensional pairwise
|
|
25
|
+
* scoring: Jaccard bigram + numeric agreement + sentiment +
|
|
26
|
+
* length. Catches the case where two vendors use overlapping
|
|
27
|
+
* vocabulary but disagree on concrete numbers.
|
|
28
|
+
*
|
|
29
|
+
* 3. PER-VENDOR OUTLIER DIAGNOSIS — mean agreement of each vendor
|
|
30
|
+
* vs all others; identifies the disagreer (or sole-truth-bearer).
|
|
31
|
+
*
|
|
32
|
+
* 4. COMMON-FACTS / UNIQUE-CLAIMS EXTRACTION — pulls numbers/
|
|
33
|
+
* versions/dates ALL vendors agree on (trust these) and ones
|
|
34
|
+
* ONLY ONE vendor mentioned (verify these).
|
|
35
|
+
*
|
|
36
|
+
* 5. HMAC-CHAINED ARENA LEDGER (`.mneme/diff_arena/rounds.jsonl`)
|
|
37
|
+
* — every ask + per-vendor response recorded; same canonical-JSON
|
|
38
|
+
* convention as PASSPORT + MIRRAGE + TIME-CRYSTAL.
|
|
39
|
+
*
|
|
40
|
+
* Pure ESM. Defensive — every vendor call wrapped in promise-race
|
|
41
|
+
* timeout + try/catch; ARENA itself never throws.
|
|
42
|
+
*/
|
|
43
|
+
import { type VendorAdapter, type VendorResponse } from "./adapters.js";
|
|
44
|
+
import { type ConsensusResult } from "./consensus.js";
|
|
45
|
+
export interface AskInput {
|
|
46
|
+
prompt: string;
|
|
47
|
+
/** Vendor adapters to query in parallel. */
|
|
48
|
+
vendors: VendorAdapter[];
|
|
49
|
+
/** Per-vendor timeout ms (default 30000). */
|
|
50
|
+
timeoutMs?: number;
|
|
51
|
+
/** Optional ACGV grader: given a response, returns refute verdict. */
|
|
52
|
+
acgvGrader?: (text: string) => Promise<AcgvVerdict>;
|
|
53
|
+
/** Working directory for ledger persist. */
|
|
54
|
+
cwd?: string;
|
|
55
|
+
/** Skip ledger append (tests). */
|
|
56
|
+
noLedger?: boolean;
|
|
57
|
+
}
|
|
58
|
+
export interface AcgvVerdict {
|
|
59
|
+
outcome: "CONFIRMED" | "REFUTED" | "INCONCLUSIVE" | "DISPUTED" | "IMPOSSIBLE";
|
|
60
|
+
evidence?: string;
|
|
61
|
+
confidence?: number;
|
|
62
|
+
}
|
|
63
|
+
export interface AskResult {
|
|
64
|
+
prompt: string;
|
|
65
|
+
at: string;
|
|
66
|
+
vendorsAsked: string[];
|
|
67
|
+
responses: Array<VendorResponse & {
|
|
68
|
+
acgv?: AcgvVerdict;
|
|
69
|
+
}>;
|
|
70
|
+
consensus: ConsensusResult;
|
|
71
|
+
/** Composed suggested answer that surfaces common facts + flags disputed claims. */
|
|
72
|
+
suggestedAnswer: string;
|
|
73
|
+
/** Latency stats across the round. */
|
|
74
|
+
latencyMs: number;
|
|
75
|
+
/** HMAC of the canonical body. */
|
|
76
|
+
hmac: string;
|
|
77
|
+
}
|
|
78
|
+
interface LedgerEntry {
|
|
79
|
+
kind: "ask" | "vendor_response";
|
|
80
|
+
at: string;
|
|
81
|
+
roundId: string;
|
|
82
|
+
who: string;
|
|
83
|
+
detail: string;
|
|
84
|
+
prevHmac: string;
|
|
85
|
+
hmac: string;
|
|
86
|
+
}
|
|
87
|
+
export declare function readLedger(cwd: string): LedgerEntry[];
|
|
88
|
+
export declare function verifyLedgerChain(cwd: string): {
|
|
89
|
+
ok: boolean;
|
|
90
|
+
rows: number;
|
|
91
|
+
brokenAt?: number;
|
|
92
|
+
};
|
|
93
|
+
export declare function diffArenaAsk(input: AskInput): Promise<AskResult>;
|
|
94
|
+
export declare function verifyAskResult(r: AskResult): boolean;
|
|
95
|
+
export declare function renderArenaBanner(r: AskResult): string;
|
|
96
|
+
export { mockAdapter, httpAdapter, cliAdapter } from "./adapters.js";
|
|
97
|
+
export type { VendorAdapter, VendorResponse } from "./adapters.js";
|
|
98
|
+
export { computeConsensus, pairwiseScore } from "./consensus.js";
|
|
99
|
+
export type { ConsensusInput, ConsensusResult, PairwiseScore, VendorOutlier } from "./consensus.js";
|
|
100
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/diff_arena/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AAMH,OAAO,EAAwC,KAAK,aAAa,EAAE,KAAK,cAAc,EAAE,MAAM,eAAe,CAAC;AAC9G,OAAO,EAAmC,KAAK,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAQvF,MAAM,WAAW,QAAQ;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,4CAA4C;IAC5C,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,6CAA6C;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,sEAAsE;IACtE,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC;IACpD,4CAA4C;IAC5C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,kCAAkC;IAClC,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,WAAW,GAAG,SAAS,GAAG,cAAc,GAAG,UAAU,GAAG,YAAY,CAAC;IAC9E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,EAAE,EAAE,MAAM,CAAC;IACX,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,EAAE,KAAK,CAAC,cAAc,GAAG;QAAE,IAAI,CAAC,EAAE,WAAW,CAAA;KAAE,CAAC,CAAC;IAC1D,SAAS,EAAE,eAAe,CAAC;IAC3B,oFAAoF;IACpF,eAAe,EAAE,MAAM,CAAC;IACxB,sCAAsC;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,kCAAkC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAmBD,UAAU,WAAW;IACnB,IAAI,EAAE,KAAK,GAAG,iBAAiB,CAAC;IAChC,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;CACd;AA0BD,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,WAAW,EAAE,CAIrD;AAED,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG;IAAE,EAAE,EAAE,OAAO,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAY/F;AAID,wBAAsB,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,CA4CtE;AAED,wBAAgB,eAAe,CAAC,CAAC,EAAE,SAAS,GAAG,OAAO,CAIrD;AA0BD,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,SAAS,GAAG,MAAM,CA0BtD;AAID,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AACrE,YAAY,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AACjE,YAAY,EAAE,cAAc,EAAE,eAAe,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC"}
|