@ijfw/memory-server 1.5.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw-dashboard +20 -1
- package/package.json +4 -3
- package/src/audit-roster.js +89 -12
- package/src/brain/tiered-llm.js +57 -7
- package/src/cross-orchestrator-cli.js +344 -4
- package/src/cross-project-search.js +39 -1
- package/src/dashboard-server.js +7 -1
- package/src/dream/runner.mjs +560 -8
- package/src/handlers/brain-handler.js +101 -1
- package/src/importers/discover.js +1 -1
- package/src/memory/bench-metrics.js +289 -0
- package/src/memory/benchmark.js +1 -1
- package/src/memory/search.js +53 -1
- package/src/orchestrator/plan-checker.js +1 -1
- package/src/profile/audit.js +671 -0
- package/src/profile/capture.js +871 -0
- package/src/profile/derive-dialectic.js +242 -0
- package/src/profile/derive-heuristic.js +733 -0
- package/src/profile/derive.js +156 -0
- package/src/profile/egress.js +306 -0
- package/src/profile/eval/build-real-probes.mjs +197 -0
- package/src/profile/eval/corpus-from-reddit.mjs +166 -0
- package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
- package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
- package/src/profile/eval/gate-b-behavior.mjs +420 -0
- package/src/profile/eval/gate-b-decision-run.mjs +171 -0
- package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
- package/src/profile/eval/gate-b-run.mjs +417 -0
- package/src/profile/eval/gate-b-run.test.mjs +204 -0
- package/src/profile/eval/gate-c-capture.mjs +323 -0
- package/src/profile/eval/harness.mjs +551 -0
- package/src/profile/eval/instrument-validation.mjs +248 -0
- package/src/profile/eval/instrument-validation.test.mjs +125 -0
- package/src/profile/eval/multi-subject-harness.mjs +106 -0
- package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
- package/src/profile/eval/personas.test.mjs +83 -0
- package/src/profile/eval/plumbing.test.mjs +69 -0
- package/src/profile/eval/prereg.mjs +130 -0
- package/src/profile/eval/prereg.test.mjs +78 -0
- package/src/profile/eval/real-corpus.test.mjs +103 -0
- package/src/profile/eval/real-personas.mjs +109 -0
- package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
- package/src/profile/eval/run-real-corpus.mjs +358 -0
- package/src/profile/eval/slug-quality.mjs +464 -0
- package/src/profile/eval/stylometry-features.js +85 -0
- package/src/profile/eval/stylometry-reference.js +16 -0
- package/src/profile/eval/stylometry.js +224 -0
- package/src/profile/eval/stylometry.test.mjs +103 -0
- package/src/profile/eval/synthetic-personas.js +91 -0
- package/src/profile/eval/verifier-features.mjs +170 -0
- package/src/profile/eval/verifier-logreg.mjs +74 -0
- package/src/profile/eval/verifier-pair.mjs +122 -0
- package/src/profile/eval/verifier-reference.mjs +68 -0
- package/src/profile/eval/verifier-scorer.mjs +30 -0
- package/src/profile/eval/wrong-target-control.mjs +168 -0
- package/src/profile/eval/wrong-target-control.test.mjs +124 -0
- package/src/profile/exemplar-capture.js +232 -0
- package/src/profile/exemplar-retrieve.js +138 -0
- package/src/profile/exemplar-store.js +314 -0
- package/src/profile/lock.js +64 -0
- package/src/profile/merge.js +624 -0
- package/src/profile/path-policy.js +213 -0
- package/src/profile/precision-stamp.mjs +151 -0
- package/src/profile/render-brief.js +717 -0
- package/src/profile/schema.js +244 -0
- package/src/profile/sensitivity.js +249 -0
- package/src/profile/serve.js +345 -0
- package/src/profile/store.js +261 -0
- package/src/profile/telemetry.js +289 -0
- package/src/recovery/checkpoint.js +7 -1
- package/src/server.js +185 -14
- package/src/.registry-meta-key.pem +0 -3
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* profile/eval/gate-b-behavior.mjs — Cross-system profile bus, PHASE P5 (Gate B).
|
|
3
|
+
*
|
|
4
|
+
* GATE B — BEHAVIOR A/B (THE HEADLINE). Capture (Gate C) is necessary but NOT
|
|
5
|
+
* sufficient: PrefEval [2502.09597] shows even an EXPLICIT in-context preference
|
|
6
|
+
* is followed <10% of the time at ~10 turns. So the headline claim — "the profile
|
|
7
|
+
* changes what the agent does" — must be proven by BEHAVIOR, not by grading an
|
|
8
|
+
* internal artifact. Gate B runs the SAME agent task WITH vs WITHOUT the profile
|
|
9
|
+
* brief injected and measures preference-adherence IN THE OUTPUT.
|
|
10
|
+
*
|
|
11
|
+
* THE REAL BRIEF. The injected text is produced by the REAL renderBrief() — the
|
|
12
|
+
* exact string a host receives. We do not hand the agent a hand-tuned hint; we
|
|
13
|
+
* hand it what the production serving path emits.
|
|
14
|
+
*
|
|
15
|
+
* THE FOUR ARMS (all mandatory per the audit):
|
|
16
|
+
* - baseline : empty profile -> empty brief (the floor; what the agent does
|
|
17
|
+
* with NO profile signal).
|
|
18
|
+
* - heuristic : the brief from the heuristic-derived profile (Gate C's
|
|
19
|
+
* profile) — the product as shipped.
|
|
20
|
+
* - dialectic : the brief from a profile that also ran the dialectic tier
|
|
21
|
+
* (heuristic-vs-dialectic ABLATION) — only differs when a
|
|
22
|
+
* dialectic transport is injected.
|
|
23
|
+
* - oracle : an in-prompt ORACLE brief stating the prefs verbatim (the
|
|
24
|
+
* CEILING — best case if capture were perfect). PrefEval shows
|
|
25
|
+
* even THIS is not 100%, which is why it's the ceiling, not 1.0.
|
|
26
|
+
*
|
|
27
|
+
* SCORING — preference-adherence is measured TWO ways:
|
|
28
|
+
* 1. OBJECTIVE (bias-free): does the output exhibit the user's prefs? (uses
|
|
29
|
+
* the held-out objective style target + literal preference checks). No judge.
|
|
30
|
+
* 2. JUDGED (bias-controlled): a pairwise, position-randomized, length-
|
|
31
|
+
* controlled, identity-masked judge picks the more on-profile output. We
|
|
32
|
+
* report κ between the judge and the objective rater so the judge's
|
|
33
|
+
* reliability is measured.
|
|
34
|
+
*
|
|
35
|
+
* STATS — paired McNemar + bootstrap CI, both from the REAL lab-study helpers
|
|
36
|
+
* (imported via harness.mjs, NOT re-derived).
|
|
37
|
+
*
|
|
38
|
+
* LIVE RUNNER — the agent run is gated behind IJFW_PROFILE_EVAL_LIVE with an
|
|
39
|
+
* INJECTABLE transport (resolveAgentTransport). Unit tests inject a deterministic
|
|
40
|
+
* fake agent that genuinely CONSUMES the injected brief (it adheres iff the brief
|
|
41
|
+
* tells it to) so the REAL scoring + REAL stat pipeline run end-to-end offline.
|
|
42
|
+
* Live runs construct a real local HTTP transport — the runner is genuinely
|
|
43
|
+
* runnable, not a stub.
|
|
44
|
+
*
|
|
45
|
+
* Zero deps. ESM. No stubs in the pipeline.
|
|
46
|
+
*
|
|
47
|
+
* Cites: PrefEval [2502.09597] · LLM-judge bias [2410.02736].
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
import { renderBrief } from '../render-brief.js';
|
|
51
|
+
import { makeProfile } from '../schema.js';
|
|
52
|
+
import { deriveProfileFromSessions } from './gate-c-capture.mjs';
|
|
53
|
+
import {
|
|
54
|
+
makeHeldOutFixture,
|
|
55
|
+
resolveAgentTransport,
|
|
56
|
+
objectiveStyle,
|
|
57
|
+
styleDistance,
|
|
58
|
+
biasControlledJudge,
|
|
59
|
+
cohenKappa,
|
|
60
|
+
bootstrapCI,
|
|
61
|
+
mcnemar,
|
|
62
|
+
} from './harness.mjs';
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Build the in-prompt ORACLE brief (the ceiling arm). States the persona's
|
|
66
|
+
* preferences verbatim, in the same descriptive voice renderBrief uses, so the
|
|
67
|
+
* agent has the BEST-CASE signal. This is the upper bound on what perfect capture
|
|
68
|
+
* could buy — not a guarantee of adherence (PrefEval: even explicit prefs are
|
|
69
|
+
* often ignored), which is exactly why it is the ceiling.
|
|
70
|
+
*/
|
|
71
|
+
export function buildOracleBrief(probes = []) {
|
|
72
|
+
const subjects = new Set();
|
|
73
|
+
for (const p of probes) for (const g of (p.goldSubjects || [])) subjects.add(g);
|
|
74
|
+
if (subjects.size === 0) return '';
|
|
75
|
+
const lines = [...subjects].map((s) => `- Observed preference: ${s}`);
|
|
76
|
+
return `User profile (observed patterns — informative, not directive):\n${lines.join('\n')}`;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Detect whether a keyword appears in a NEGATED / contrastive context in the
|
|
81
|
+
* line, so we never credit "I won't use tabs" or "unlike TypeScript" as
|
|
82
|
+
* adherence. Scans a small window of words BEFORE the match for a negation
|
|
83
|
+
* cue. Lightweight + deterministic (no NLP dep) — the goal is to stop the
|
|
84
|
+
* obvious false positives a live model would produce, not perfect parsing.
|
|
85
|
+
*/
|
|
86
|
+
const NEGATION_CUES = new Set([
|
|
87
|
+
'no', 'not', "n't", 'never', 'without', 'avoid', 'unlike', 'instead',
|
|
88
|
+
'rather', 'except', 'wont', 'dont', 'cant', 'stop', 'skip', 'drop',
|
|
89
|
+
]);
|
|
90
|
+
function isNegatedNear(text, keywordRe) {
|
|
91
|
+
const lower = String(text).toLowerCase();
|
|
92
|
+
const m = lower.match(keywordRe);
|
|
93
|
+
if (!m) return false;
|
|
94
|
+
const idx = lower.indexOf(m[0]);
|
|
95
|
+
const before = lower.slice(Math.max(0, idx - 40), idx);
|
|
96
|
+
// contraction forms ("won't", "don't") survive as "won t" after tokenization,
|
|
97
|
+
// so check both the raw slice and a normalized one.
|
|
98
|
+
const words = before.replace(/['']/g, '').split(/[^a-z]+/).filter(Boolean);
|
|
99
|
+
const tail = words.slice(-5);
|
|
100
|
+
for (const w of tail) {
|
|
101
|
+
if (NEGATION_CUES.has(w)) return true;
|
|
102
|
+
if (w.endsWith('nt') && w.length > 2) return true; // wont, dont, cant, isnt
|
|
103
|
+
}
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Literal preference adherence: does the output text honor the probe's gold
|
|
109
|
+
* preferences? OBJECTIVE, no judge. H3 FIX — this is adherence-SENSITIVE, not
|
|
110
|
+
* mention-sensitive: indentation prefs are checked against the ACTUAL leading
|
|
111
|
+
* whitespace of code lines (not the word "tabs"), and every check is
|
|
112
|
+
* NEGATION-AWARE so a live model saying "I won't use tabs" or "unlike
|
|
113
|
+
* TypeScript…" does NOT score adherent. Returns a 0/1 adherence score for this
|
|
114
|
+
* (output, probe) pair.
|
|
115
|
+
*/
|
|
116
|
+
export function objectiveAdherence(output, probe) {
|
|
117
|
+
const text = String(output || '');
|
|
118
|
+
const gold = (probe.goldSubjects || []).map((s) => String(s).toLowerCase());
|
|
119
|
+
let checks = 0; let hits = 0;
|
|
120
|
+
|
|
121
|
+
// terseness: if the persona wants terse, short output adheres.
|
|
122
|
+
if (gold.some((g) => g.includes('terse'))) {
|
|
123
|
+
checks += 1;
|
|
124
|
+
const target = probe.goldStyle ? probe.goldStyle.terseness : 0.5;
|
|
125
|
+
const got = objectiveStyle(text).terseness;
|
|
126
|
+
if (Math.abs(got - target) <= 0.25) hits += 1;
|
|
127
|
+
}
|
|
128
|
+
// tabs vs spaces: ACTUAL indentation behavior, not a mention of the word.
|
|
129
|
+
// Adherent iff some line is indented with a leading TAB (the real, checkable
|
|
130
|
+
// signal). A bare prose mention ("I use tabs") is NOT adherence; a negated
|
|
131
|
+
// mention is explicitly disqualified.
|
|
132
|
+
if (gold.some((g) => g.includes('tab'))) {
|
|
133
|
+
checks += 1;
|
|
134
|
+
const lines = text.split('\n');
|
|
135
|
+
const tabIndented = lines.some((ln) => ln.startsWith('\t') || /^[^\S\t]*\t/.test(ln));
|
|
136
|
+
const negatedTabs = isNegatedNear(text, /\btabs?\b/);
|
|
137
|
+
if (tabIndented && !negatedTabs) hits += 1;
|
|
138
|
+
}
|
|
139
|
+
// typescript: genuine usage (a type annotation or .ts), negation-aware. A
|
|
140
|
+
// contrastive mention ("unlike TypeScript") does not count.
|
|
141
|
+
if (gold.some((g) => g.includes('typescript'))) {
|
|
142
|
+
checks += 1;
|
|
143
|
+
const usesTs = /:\s*\w+\s*[),=]/.test(text) || /\.ts\b/.test(text)
|
|
144
|
+
|| /\binterface\b|\btype\s+\w+\s*=/.test(text);
|
|
145
|
+
const mentionsTs = /typescript/i.test(text);
|
|
146
|
+
const negatedTs = isNegatedNear(text, /typescript/);
|
|
147
|
+
// adheres if it genuinely USES TS syntax, OR affirmatively names TS (not negated).
|
|
148
|
+
if ((usesTs || (mentionsTs && !negatedTs)) && !(negatedTs && !usesTs)) hits += 1;
|
|
149
|
+
}
|
|
150
|
+
if (checks === 0) {
|
|
151
|
+
// No checkable facet for this probe -> fall back to a style-distance match
|
|
152
|
+
// against the probe's held-out style target (still objective).
|
|
153
|
+
const target = probe.goldStyle || objectiveStyle('');
|
|
154
|
+
const got = objectiveStyle(text);
|
|
155
|
+
return styleDistance(got, target) <= 0.5 ? 1 : 0;
|
|
156
|
+
}
|
|
157
|
+
return hits / checks >= 0.5 ? 1 : 0;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Run ONE arm over the probe set: for each probe, ask the agent transport to
|
|
162
|
+
* answer the probe prompt WITH the arm's brief prepended (the brief is the ONLY
|
|
163
|
+
* difference between arms). Returns the per-probe outputs + per-probe objective
|
|
164
|
+
* adherence 0/1 vector.
|
|
165
|
+
*
|
|
166
|
+
* @param {Function} transport ({prompt, system, ...}) -> Promise<{text}>
|
|
167
|
+
* @param {string} brief the injected brief (may be '')
|
|
168
|
+
* @param {Array} probes
|
|
169
|
+
* @returns {Promise<{ outputs:string[], adherence:number[] }>}
|
|
170
|
+
*/
|
|
171
|
+
export async function runArm(transport, brief, probes) {
|
|
172
|
+
const outputs = [];
|
|
173
|
+
const adherence = [];
|
|
174
|
+
for (const probe of probes) {
|
|
175
|
+
// The brief is injected as the SYSTEM context — exactly how a host would
|
|
176
|
+
// passively inject the profile ahead of the user's turn.
|
|
177
|
+
// eslint-disable-next-line no-await-in-loop
|
|
178
|
+
const res = await transport({
|
|
179
|
+
prompt: probe.prompt || '',
|
|
180
|
+
system: brief || '',
|
|
181
|
+
maxTokens: 256,
|
|
182
|
+
probe, // passed through so a fake transport can be brief-faithful in tests
|
|
183
|
+
});
|
|
184
|
+
const text = res && typeof res.text === 'string' ? res.text : '';
|
|
185
|
+
outputs.push(text);
|
|
186
|
+
adherence.push(objectiveAdherence(text, probe));
|
|
187
|
+
}
|
|
188
|
+
return { outputs, adherence };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* runGateB(corpus, opts) -> Gate B report.
|
|
193
|
+
*
|
|
194
|
+
* @param {object} [corpus] { sessions, probes, negativeControl } (defaults to fixture)
|
|
195
|
+
* @param {object} [opts]
|
|
196
|
+
* @param {Function} [opts.agent] injected agent transport (tests/live)
|
|
197
|
+
* @param {Function} [opts.judge] injected pairwise judge ({first,second})->0|1
|
|
198
|
+
* @param {Function} [opts._localTransport] dialectic derivation arm (ablation)
|
|
199
|
+
* @param {object} [opts.env]
|
|
200
|
+
* @param {number} [opts.seed] position-randomization + bootstrap seed
|
|
201
|
+
*
|
|
202
|
+
* @returns {Promise<object>} arms (incl. the redacted `default` arm), paired
|
|
203
|
+
* McNemar (heuristic vs baseline), the redaction McNemar (default vs heuristic),
|
|
204
|
+
* bootstrap CIs per arm, oracle ceiling, ablation delta, STRUCTURED κ (judge vs
|
|
205
|
+
* objective; null+reason when degenerate), and the new-signal SURFACING latency
|
|
206
|
+
* probe (NOT adaptation/retraction).
|
|
207
|
+
*
|
|
208
|
+
* THROWS in live mode (IJFW_PROFILE_EVAL_LIVE) when no transport can be resolved
|
|
209
|
+
* — a live behavioral gate must NOT silently fake a result.
|
|
210
|
+
*/
|
|
211
|
+
export async function runGateB(corpus, opts = {}) {
|
|
212
|
+
const data = corpus || makeHeldOutFixture();
|
|
213
|
+
const probes = Array.isArray(data.probes) ? data.probes : [];
|
|
214
|
+
const seed = Number.isFinite(opts.seed) ? opts.seed : 7;
|
|
215
|
+
|
|
216
|
+
const { transport, live } = resolveAgentTransport({ agent: opts.agent, env: opts.env });
|
|
217
|
+
if (!transport) {
|
|
218
|
+
if (opts.env && opts.env.IJFW_PROFILE_EVAL_LIVE) {
|
|
219
|
+
throw new Error(
|
|
220
|
+
'Gate B live mode requested (IJFW_PROFILE_EVAL_LIVE) but no agent transport '
|
|
221
|
+
+ 'could be resolved (no injected agent and no local URL). Refusing to fake a '
|
|
222
|
+
+ 'behavioral result.',
|
|
223
|
+
);
|
|
224
|
+
}
|
|
225
|
+
throw new Error('Gate B requires an agent transport (inject opts.agent for offline runs).');
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// BRIEFS — all from the REAL renderBrief except the oracle ceiling.
|
|
229
|
+
//
|
|
230
|
+
// `shareSensitive: true` reflects the per-host OPT-IN serving scenario: Gate B
|
|
231
|
+
// measures behavior WHEN THE PROFILE IS SHARED with a host the user trusts
|
|
232
|
+
// (the only scenario in which behavioral adherence is even at stake). Without
|
|
233
|
+
// the opt-in, renderBrief redacts med/high prefs by design (sensitivity.js) —
|
|
234
|
+
// and the preference levers that drive behavior are exactly those med-tier
|
|
235
|
+
// corrections. Measuring the no-share path would test the redactor, not the
|
|
236
|
+
// behavioral effect. The opt-in is set here EXPLICITLY (not silently) so the
|
|
237
|
+
// arm semantics stay honest. `redactFile` is left at default so the kill-switch
|
|
238
|
+
// + denylist still apply.
|
|
239
|
+
const renderOpts = { env: opts.env, shareSensitive: true };
|
|
240
|
+
// baseline : empty profile -> empty brief.
|
|
241
|
+
const baselineBrief = renderBrief(makeProfile(), renderOpts).text; // '' by construction
|
|
242
|
+
// heuristic: the shipped product brief (heuristic-derived profile).
|
|
243
|
+
const heuristicProfile = await deriveProfileFromSessions(data.sessions, { env: opts.env });
|
|
244
|
+
const heuristicBrief = renderBrief(heuristicProfile, renderOpts).text;
|
|
245
|
+
// default : the SAME heuristic profile rendered with NO opt-in (shareSensitive
|
|
246
|
+
// omitted -> low-only). This exercises the redactor in the SCORED
|
|
247
|
+
// path (M1 FIX): the med-tier preference levers (corrections like
|
|
248
|
+
// tabs/typescript) are gated, only the always-shareable low-tier
|
|
249
|
+
// style axes survive. Adherence should drop toward baseline —
|
|
250
|
+
// simultaneously proving the redactor works AND that the behavioral
|
|
251
|
+
// effect is opt-in-gated, not unconditional.
|
|
252
|
+
const defaultBrief = renderBrief(heuristicProfile, { env: opts.env }).text;
|
|
253
|
+
// dialectic: heuristic + dialectic ABLATION (differs only if a transport injected).
|
|
254
|
+
const dialecticProfile = await deriveProfileFromSessions(data.sessions, {
|
|
255
|
+
env: opts.env,
|
|
256
|
+
_localTransport: opts._localTransport,
|
|
257
|
+
});
|
|
258
|
+
const dialecticBrief = renderBrief(dialecticProfile, renderOpts).text;
|
|
259
|
+
// oracle : in-prompt ceiling.
|
|
260
|
+
const oracleBrief = buildOracleBrief(probes);
|
|
261
|
+
|
|
262
|
+
// RUN every arm through the SAME transport — the brief is the only difference.
|
|
263
|
+
const baseline = await runArm(transport, baselineBrief, probes);
|
|
264
|
+
const defaultArm = await runArm(transport, defaultBrief, probes);
|
|
265
|
+
const heuristic = await runArm(transport, heuristicBrief, probes);
|
|
266
|
+
const dialectic = await runArm(transport, dialecticBrief, probes);
|
|
267
|
+
const oracle = await runArm(transport, oracleBrief, probes);
|
|
268
|
+
|
|
269
|
+
// PAIRED McNEMAR — heuristic (with profile) vs baseline (without). The headline
|
|
270
|
+
// contrast: does injecting the profile flip non-adherent outputs to adherent?
|
|
271
|
+
// REAL mcnemar helper.
|
|
272
|
+
const headline = mcnemar(baseline.adherence, heuristic.adherence);
|
|
273
|
+
|
|
274
|
+
// Bootstrap CIs per arm on the per-probe adherence vectors. REAL helper.
|
|
275
|
+
const ci = (v, s) => bootstrapCI(v, { seed: s });
|
|
276
|
+
const arms = {
|
|
277
|
+
baseline: { adherence: baseline.adherence, outputs: baseline.outputs, ci: ci(baseline.adherence, seed) },
|
|
278
|
+
default: { adherence: defaultArm.adherence, outputs: defaultArm.outputs, ci: ci(defaultArm.adherence, seed + 4) },
|
|
279
|
+
heuristic: { adherence: heuristic.adherence, outputs: heuristic.outputs, ci: ci(heuristic.adherence, seed + 1) },
|
|
280
|
+
dialectic: { adherence: dialectic.adherence, outputs: dialectic.outputs, ci: ci(dialectic.adherence, seed + 2) },
|
|
281
|
+
oracle: { adherence: oracle.adherence, outputs: oracle.outputs, ci: ci(oracle.adherence, seed + 3) },
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
// REDACTOR CONTROL (M1): paired McNemar default (redacted) vs heuristic
|
|
285
|
+
// (shared). A non-trivial drop here is the redactor demonstrably gating the
|
|
286
|
+
// behavioral levers — the behavioral effect is OPT-IN, not unconditional.
|
|
287
|
+
const redaction = mcnemar(defaultArm.adherence, heuristic.adherence);
|
|
288
|
+
|
|
289
|
+
// ABLATION — dialectic vs heuristic (does the LLM tier buy anything?).
|
|
290
|
+
const ablation = mcnemar(heuristic.adherence, dialectic.adherence);
|
|
291
|
+
|
|
292
|
+
// BIAS-CONTROLLED JUDGE (optional second rater). Pairwise heuristic-vs-baseline
|
|
293
|
+
// outputs through the position-randomized / length-controlled / identity-masked
|
|
294
|
+
// wrapper, then κ between the judge and the OBJECTIVE rater (the bias check).
|
|
295
|
+
let judgeReport = null;
|
|
296
|
+
if (typeof opts.judge === 'function') {
|
|
297
|
+
const items = probes.map((_, i) => ({ a: heuristic.outputs[i], b: baseline.outputs[i] }));
|
|
298
|
+
const { preferA } = biasControlledJudge(items, opts.judge, { seed });
|
|
299
|
+
// Objective second rater: 1 iff heuristic objectively adhered AND baseline did
|
|
300
|
+
// not (i.e. the objective signal also prefers the heuristic arm).
|
|
301
|
+
const objectivePrefersA = probes.map((_, i) => (
|
|
302
|
+
heuristic.adherence[i] === 1 && baseline.adherence[i] === 0 ? 1 : 0
|
|
303
|
+
));
|
|
304
|
+
// κ is STRUCTURED ({kappa, degenerate, reason}). On a tiny/low-variance
|
|
305
|
+
// fixture κ is DEGENERATE and reported as such (null + reason) — never as a
|
|
306
|
+
// spurious 1.0 (H1 FIX). The caller surfaces `degenerate`/`reason`.
|
|
307
|
+
judgeReport = {
|
|
308
|
+
preferA,
|
|
309
|
+
objectivePrefersA,
|
|
310
|
+
kappa: cohenKappa(preferA, objectivePrefersA),
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// NEW-SIGNAL SURFACING LATENCY probe (H4: NOT "adaptation"). Feed a
|
|
315
|
+
// CONTRADICTING preference one session at a time and measure how many sessions
|
|
316
|
+
// it takes the NEW signal to clear the evidence floor and SURFACE in the brief.
|
|
317
|
+
// It does NOT measure retraction — the superseded belief is still asserted
|
|
318
|
+
// afterwards (no asymmetric-decay yet; that is P1.4). Uses the REAL pipeline.
|
|
319
|
+
const surfacing = await newSignalSurfacingLatency(data, opts);
|
|
320
|
+
|
|
321
|
+
return {
|
|
322
|
+
arms,
|
|
323
|
+
headline, // McNemar: heuristic vs baseline (the proof contrast)
|
|
324
|
+
ablation, // McNemar: dialectic vs heuristic
|
|
325
|
+
redaction, // McNemar: default (redacted) vs heuristic (the opt-in gate)
|
|
326
|
+
oracleCeiling: arms.oracle.ci.point,
|
|
327
|
+
judge: judgeReport, // null unless a judge was injected
|
|
328
|
+
surfacing, // new-signal surfacing latency (NOT adaptation/retraction)
|
|
329
|
+
live, // whether this was a live LLM run
|
|
330
|
+
briefs: {
|
|
331
|
+
baseline: baselineBrief,
|
|
332
|
+
default: defaultBrief,
|
|
333
|
+
heuristic: heuristicBrief,
|
|
334
|
+
dialectic: dialecticBrief,
|
|
335
|
+
oracle: oracleBrief,
|
|
336
|
+
},
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* newSignalSurfacingLatency(corpus, opts) -> {
|
|
342
|
+
* surfaced, surfacingLatencySessions, contradicting, oldSubject,
|
|
343
|
+
* bothBeliefsPresent, retractionImplemented }.
|
|
344
|
+
*
|
|
345
|
+
* H4 FIX — this measures ACCRUAL/SURFACING, NOT adaptation. We feed a
|
|
346
|
+
* CONTRADICTING preference one session at a time and find the FIRST session index
|
|
347
|
+
* at which the NEW signal clears the evidence floor (>=3) and SURFACES in the
|
|
348
|
+
* rendered brief. It does NOT measure retraction: the heuristic floor keys prefs
|
|
349
|
+
* by EXACT phrase, so a contradicting phrase is a NEW atom — it never overwrites
|
|
350
|
+
* the old one. After surfacing, BOTH the old belief ("use tabs not spaces") AND
|
|
351
|
+
* the new contradicting belief ("use spaces not tabs") are present in the brief.
|
|
352
|
+
* Calling that "the profile adapts" would invert the finding (falsifiable by one
|
|
353
|
+
* grep). RETRACTION of the superseded belief is not yet implemented —
|
|
354
|
+
* asymmetric-decay is P1.4's job, and this probe will report it as
|
|
355
|
+
* `retractionImplemented:true` when it lands. Uses the REAL pipeline; no judge.
|
|
356
|
+
*/
|
|
357
|
+
export async function newSignalSurfacingLatency(corpus, opts = {}) {
|
|
358
|
+
const data = corpus || makeHeldOutFixture();
|
|
359
|
+
// Start from the established heuristic profile (old pref asserted).
|
|
360
|
+
let profile = await deriveProfileFromSessions(data.sessions, { env: opts.env });
|
|
361
|
+
const oldSubject = 'use tabs not spaces';
|
|
362
|
+
const contradicting = 'use spaces not tabs'; // a DIFFERENT subject (heuristic adds it)
|
|
363
|
+
|
|
364
|
+
const { applyDelta } = await import('../merge.js');
|
|
365
|
+
const { deriveProfile } = await import('../derive.js');
|
|
366
|
+
|
|
367
|
+
let surfaced = false;
|
|
368
|
+
let surfacingLatencySessions = Infinity;
|
|
369
|
+
let bothBeliefsPresent = false;
|
|
370
|
+
const MAX = 6;
|
|
371
|
+
for (let i = 1; i <= MAX; i++) {
|
|
372
|
+
const sid = `surfacing-${i}`;
|
|
373
|
+
const ts = new Date(Date.UTC(2026, 6, i)).toISOString();
|
|
374
|
+
const feedback = [{ session_id: sid, ts, kind: 'correction', phrase: contradicting, context: '' }];
|
|
375
|
+
// eslint-disable-next-line no-await-in-loop
|
|
376
|
+
const delta = await deriveProfile(
|
|
377
|
+
{ feedback, sessionId: sid, host: 'claude' },
|
|
378
|
+
{ env: opts.env },
|
|
379
|
+
);
|
|
380
|
+
profile = applyDelta(profile, delta);
|
|
381
|
+
// shareSensitive: true — same opt-in serving scenario as runGateB, so the
|
|
382
|
+
// med-tier contradicting preference can surface and the latency is real.
|
|
383
|
+
// eslint-disable-next-line no-await-in-loop
|
|
384
|
+
const brief = renderBrief(profile, { env: opts.env, shareSensitive: true }).text.toLowerCase();
|
|
385
|
+
if (brief.includes(contradicting) && i >= 3) {
|
|
386
|
+
// New signal has accrued enough evidence (>=3) to clear the floor + surface.
|
|
387
|
+
surfaced = true;
|
|
388
|
+
surfacingLatencySessions = i;
|
|
389
|
+
// HONEST FINDING: the OLD belief is STILL asserted (no retraction). Both
|
|
390
|
+
// contradicting beliefs coexist in the brief — this is surfacing, not
|
|
391
|
+
// adaptation.
|
|
392
|
+
bothBeliefsPresent = brief.includes(oldSubject) && brief.includes(contradicting);
|
|
393
|
+
break;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
return {
|
|
397
|
+
surfaced,
|
|
398
|
+
surfacingLatencySessions,
|
|
399
|
+
contradicting,
|
|
400
|
+
oldSubject,
|
|
401
|
+
bothBeliefsPresent,
|
|
402
|
+
retractionImplemented: false, // asymmetric-decay (P1.4) not yet wired
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* adaptationLatency — DEPRECATED back-compat alias. The metric was renamed (H4):
|
|
408
|
+
* it measures new-signal SURFACING, not adaptation/retraction. New callers should
|
|
409
|
+
* use newSignalSurfacingLatency. This alias maps the new fields to the old names
|
|
410
|
+
* (`flipped`/`latencySessions`) AND carries the new ones so nothing breaks.
|
|
411
|
+
*/
|
|
412
|
+
export async function adaptationLatency(corpus, opts = {}) {
|
|
413
|
+
const r = await newSignalSurfacingLatency(corpus, opts);
|
|
414
|
+
return { ...r, flipped: r.surfaced, latencySessions: r.surfacingLatencySessions };
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
export default {
|
|
418
|
+
runGateB, runArm, buildOracleBrief, objectiveAdherence,
|
|
419
|
+
newSignalSurfacingLatency, adaptationLatency,
|
|
420
|
+
};
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
// gate-b-decision-run.mjs — Gate B v2, Task T7. The Phase-2 decision runner and the
|
|
2
|
+
// honest decision gate the whole program exists to protect.
|
|
3
|
+
//
|
|
4
|
+
// Flow: validate instrument → (refuse to spend if it fails) → DESCRIPTIVE pilot →
|
|
5
|
+
// FRESH-seed confirmatory → decision gate. The verdict is computed from the CONFIRMATORY
|
|
6
|
+
// arm only; the pilot is descriptive and never feeds the statistic.
|
|
7
|
+
//
|
|
8
|
+
// Decision rules (hardened spec §6.3 + audit):
|
|
9
|
+
// instrument invalid → NULL (same-register discrimination unproven; NO spend; ship portability)
|
|
10
|
+
// baseline / register-echo PASS→ VOID (a no-signal arm beat the control ⇒ rig contaminated)
|
|
11
|
+
// real authors didn't carry it → NULL (synthetic can NEVER license the claim)
|
|
12
|
+
// derived arm PASSES → PASS (descriptor-only brief works — the product win)
|
|
13
|
+
// only few-shot-oracle PASSES → PASS_ORACLE → Phase 3 (descriptors plateau; exemplar lever)
|
|
14
|
+
// even few-shot-oracle NULLS → CUT (the real ceiling can't capture voice ⇒ cut the claim)
|
|
15
|
+
//
|
|
16
|
+
// CUT is licensed ONLY by a few-shot-oracle NULL. A renderable-ceiling (derived) NULL alone
|
|
17
|
+
// never CUTs — it routes to Phase 3.
|
|
18
|
+
|
|
19
|
+
import { deriveMinMeanMargin } from './prereg.mjs';
|
|
20
|
+
|
|
21
|
+
export const VERDICTS = ['PASS', 'PASS_ORACLE', 'CUT', 'NULL', 'VOID'];
|
|
22
|
+
|
|
23
|
+
// realArmsCarried producer: the verdict may be licensed ONLY if enough REAL (headline-
|
|
24
|
+
// eligible, non-synthetic) personas were decidable. Synthetic personas can never license
|
|
25
|
+
// the claim. Derived from the persona stamps — never an injected literal in production.
|
|
26
|
+
export function deriveRealArmsCarried(personas, decidableIds, minSubjects) {
|
|
27
|
+
const byId = new Map(personas.map((p) => [p.id, p]));
|
|
28
|
+
const realDecidable = decidableIds.filter((id) => {
|
|
29
|
+
const p = byId.get(id);
|
|
30
|
+
return p && p.headlineEligible === true && p.synthetic !== true;
|
|
31
|
+
});
|
|
32
|
+
return realDecidable.length >= minSubjects;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Pure decision over the confirmatory booleans. No I/O, fully unit-tested.
|
|
36
|
+
export function decideGateB(input) {
|
|
37
|
+
const {
|
|
38
|
+
instrumentValid, baselinePasses, registerEchoPasses, derivedPasses, oraclePasses, realArmsCarried,
|
|
39
|
+
} = input;
|
|
40
|
+
|
|
41
|
+
if (!instrumentValid) {
|
|
42
|
+
return {
|
|
43
|
+
verdict: 'NULL', ship: 'portability',
|
|
44
|
+
reason: 'instrument invalid: same-register discrimination not established; no cloud spend',
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
// A safety rail must NEVER read as silent-false. If a producer didn't compute these,
|
|
48
|
+
// refuse loudly rather than let a missing VOID/NULL rail wave a contaminated run through.
|
|
49
|
+
if (typeof registerEchoPasses !== 'boolean' || typeof realArmsCarried !== 'boolean') {
|
|
50
|
+
throw new Error('decideGateB: registerEchoPasses and realArmsCarried must be computed booleans — a dead safety rail must never read as silent-false');
|
|
51
|
+
}
|
|
52
|
+
if (baselinePasses || registerEchoPasses) {
|
|
53
|
+
return {
|
|
54
|
+
verdict: 'VOID',
|
|
55
|
+
reason: 'a no-signal arm (baseline or register-only echo) passed the control — rig contaminated; re-examine before any claim',
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
if (!realArmsCarried) {
|
|
59
|
+
return {
|
|
60
|
+
verdict: 'NULL', ship: 'portability',
|
|
61
|
+
reason: 'real authors did not carry the verdict (synthetic personas cannot license "writes like you")',
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
if (derivedPasses) {
|
|
65
|
+
return {
|
|
66
|
+
verdict: 'PASS', claim: 'writes in your voice (descriptor-only brief)',
|
|
67
|
+
reason: 'derived style-axis-band brief passes the wrong-target control on real authors',
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
if (oraclePasses) {
|
|
71
|
+
return {
|
|
72
|
+
verdict: 'PASS_ORACLE', next: 'phase-3-exemplar-lever',
|
|
73
|
+
reason: 'few-shot-oracle passes but descriptors plateau; voice-match is achievable, derivation is the gap',
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
return {
|
|
77
|
+
verdict: 'CUT', ship: 'portability',
|
|
78
|
+
reason: 'even the real-ceiling few-shot-oracle fails the wrong-target control — voice-match is not achievable; cut the claim honestly',
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Reduce a confirmatory control result (T5 output) + per-arm Bonferroni alpha into the
|
|
83
|
+
// booleans decideGateB consumes. An arm "passes" iff its control verdict passes AND it
|
|
84
|
+
// beats baseline at its Bonferroni-adjusted alpha.
|
|
85
|
+
export function confirmatoryBooleans(control, preReg, { realArmsCarried }) {
|
|
86
|
+
const pa = control.perArm;
|
|
87
|
+
// The register-echo VOID rail must have been MEASURED. undefined ⇒ no register-echo arm
|
|
88
|
+
// was run ⇒ refuse, rather than silently treating it as false (the rail's dead-wiring bug).
|
|
89
|
+
if (control.registerEchoPasses === undefined) {
|
|
90
|
+
throw new Error('confirmatoryBooleans: control.registerEchoPasses is undefined — the register-echo arm was not run; the VOID rail is dead');
|
|
91
|
+
}
|
|
92
|
+
if (typeof realArmsCarried !== 'boolean') {
|
|
93
|
+
throw new Error('confirmatoryBooleans: realArmsCarried must be a computed boolean (use deriveRealArmsCarried)');
|
|
94
|
+
}
|
|
95
|
+
const armPasses = (arm) => {
|
|
96
|
+
const a = pa[arm];
|
|
97
|
+
if (!a) return false;
|
|
98
|
+
// beatsBaseline is already directioned (significant AND arm flips more than baseline).
|
|
99
|
+
const beats = a.vsBaseline ? a.vsBaseline.beatsBaseline === true : true;
|
|
100
|
+
return Boolean(a.verdict && a.verdict.passes && beats);
|
|
101
|
+
};
|
|
102
|
+
return {
|
|
103
|
+
instrumentValid: true,
|
|
104
|
+
baselinePasses: Boolean(pa.baseline && pa.baseline.verdict && pa.baseline.verdict.passes),
|
|
105
|
+
registerEchoPasses: Boolean(control.registerEchoPasses),
|
|
106
|
+
derivedPasses: armPasses('derived'),
|
|
107
|
+
oraclePasses: armPasses('fewShotOracle'),
|
|
108
|
+
realArmsCarried,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// runGateBDecision(deps, preRegInput) — orchestrator. deps is injected so the runner is
|
|
113
|
+
// testable offline; the real deps wire validateInstrument + runHarness + wrongTargetControl
|
|
114
|
+
// + the cloud transport and reuse the existing allowedSys/allowedPr + budget guard.
|
|
115
|
+
//
|
|
116
|
+
// deps = {
|
|
117
|
+
// buildPreReg(input) -> preReg
|
|
118
|
+
// validate(preReg) -> { passes, ... } (T2; no spend)
|
|
119
|
+
// guard({phase}) -> asserts budget + allowedSys/allowedPr BEFORE every spend
|
|
120
|
+
// measure({seed, phase, preReg})-> confirmatory shape (runs harness+control for that seed)
|
|
121
|
+
// }
|
|
122
|
+
export async function runGateBDecision(deps, preRegInput = {}) {
|
|
123
|
+
const preReg = deps.buildPreReg(preRegInput);
|
|
124
|
+
const validation = await deps.validate(preReg);
|
|
125
|
+
|
|
126
|
+
if (!validation.passes) {
|
|
127
|
+
// REFUSE TO SPEND: no guard, no measure calls.
|
|
128
|
+
return {
|
|
129
|
+
runId: preReg.runId, preRegHash: preReg.hash, spent: false, validation,
|
|
130
|
+
verdict: decideGateB({ instrumentValid: false }),
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Derive the measured-scale floor from the validated instrument's own separation and
|
|
135
|
+
// freeze it for this run (throws if the instrument has no valid scale). This REPLACES
|
|
136
|
+
// the blind absolute constant — the prior attempt's failure class.
|
|
137
|
+
const minMeanMargin = deriveMinMeanMargin(validation, preReg.floorK);
|
|
138
|
+
|
|
139
|
+
const pilotSeed = preReg.seeds.personaSeed >>> 0;
|
|
140
|
+
const confirmSeed = (preReg.seeds.personaSeed ^ 0x5bd1e995) >>> 0; // FRESH, distinct draw
|
|
141
|
+
|
|
142
|
+
await deps.guard({ phase: 'pilot' });
|
|
143
|
+
const pilot = await deps.measure({
|
|
144
|
+
seed: pilotSeed, phase: 'pilot', preReg, minMeanMargin,
|
|
145
|
+
}); // DESCRIPTIVE only
|
|
146
|
+
|
|
147
|
+
await deps.guard({ phase: 'confirmatory' });
|
|
148
|
+
const confirmatory = await deps.measure({
|
|
149
|
+
seed: confirmSeed, phase: 'confirmatory', preReg, minMeanMargin,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const verdict = decideGateB({
|
|
153
|
+
instrumentValid: true,
|
|
154
|
+
baselinePasses: confirmatory.baselinePasses,
|
|
155
|
+
registerEchoPasses: confirmatory.registerEchoPasses,
|
|
156
|
+
derivedPasses: confirmatory.derivedPasses,
|
|
157
|
+
oraclePasses: confirmatory.oraclePasses,
|
|
158
|
+
realArmsCarried: confirmatory.realArmsCarried,
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
runId: preReg.runId,
|
|
163
|
+
preRegHash: preReg.hash,
|
|
164
|
+
spent: true,
|
|
165
|
+
validation,
|
|
166
|
+
seeds: { pilotSeed, confirmSeed },
|
|
167
|
+
pilot, // descriptive — NOT an input to `verdict`
|
|
168
|
+
confirmatory,
|
|
169
|
+
verdict,
|
|
170
|
+
};
|
|
171
|
+
}
|