@ijfw/memory-server 1.5.6 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/bin/ijfw-dashboard +20 -1
  2. package/package.json +4 -3
  3. package/src/audit-roster.js +89 -12
  4. package/src/brain/tiered-llm.js +57 -7
  5. package/src/cross-orchestrator-cli.js +344 -4
  6. package/src/cross-project-search.js +39 -1
  7. package/src/dashboard-server.js +7 -1
  8. package/src/dream/runner.mjs +560 -8
  9. package/src/handlers/brain-handler.js +101 -1
  10. package/src/importers/discover.js +1 -1
  11. package/src/memory/bench-metrics.js +289 -0
  12. package/src/memory/benchmark.js +1 -1
  13. package/src/memory/search.js +53 -1
  14. package/src/orchestrator/plan-checker.js +1 -1
  15. package/src/profile/audit.js +671 -0
  16. package/src/profile/capture.js +871 -0
  17. package/src/profile/derive-dialectic.js +242 -0
  18. package/src/profile/derive-heuristic.js +733 -0
  19. package/src/profile/derive.js +156 -0
  20. package/src/profile/egress.js +306 -0
  21. package/src/profile/eval/build-real-probes.mjs +197 -0
  22. package/src/profile/eval/corpus-from-reddit.mjs +166 -0
  23. package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
  24. package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
  25. package/src/profile/eval/gate-b-behavior.mjs +420 -0
  26. package/src/profile/eval/gate-b-decision-run.mjs +171 -0
  27. package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
  28. package/src/profile/eval/gate-b-run.mjs +417 -0
  29. package/src/profile/eval/gate-b-run.test.mjs +204 -0
  30. package/src/profile/eval/gate-c-capture.mjs +323 -0
  31. package/src/profile/eval/harness.mjs +551 -0
  32. package/src/profile/eval/instrument-validation.mjs +248 -0
  33. package/src/profile/eval/instrument-validation.test.mjs +125 -0
  34. package/src/profile/eval/multi-subject-harness.mjs +106 -0
  35. package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
  36. package/src/profile/eval/personas.test.mjs +83 -0
  37. package/src/profile/eval/plumbing.test.mjs +69 -0
  38. package/src/profile/eval/prereg.mjs +130 -0
  39. package/src/profile/eval/prereg.test.mjs +78 -0
  40. package/src/profile/eval/real-corpus.test.mjs +103 -0
  41. package/src/profile/eval/real-personas.mjs +109 -0
  42. package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
  43. package/src/profile/eval/run-real-corpus.mjs +358 -0
  44. package/src/profile/eval/slug-quality.mjs +464 -0
  45. package/src/profile/eval/stylometry-features.js +85 -0
  46. package/src/profile/eval/stylometry-reference.js +16 -0
  47. package/src/profile/eval/stylometry.js +224 -0
  48. package/src/profile/eval/stylometry.test.mjs +103 -0
  49. package/src/profile/eval/synthetic-personas.js +91 -0
  50. package/src/profile/eval/verifier-features.mjs +170 -0
  51. package/src/profile/eval/verifier-logreg.mjs +74 -0
  52. package/src/profile/eval/verifier-pair.mjs +122 -0
  53. package/src/profile/eval/verifier-reference.mjs +68 -0
  54. package/src/profile/eval/verifier-scorer.mjs +30 -0
  55. package/src/profile/eval/wrong-target-control.mjs +168 -0
  56. package/src/profile/eval/wrong-target-control.test.mjs +124 -0
  57. package/src/profile/exemplar-capture.js +232 -0
  58. package/src/profile/exemplar-retrieve.js +138 -0
  59. package/src/profile/exemplar-store.js +314 -0
  60. package/src/profile/lock.js +64 -0
  61. package/src/profile/merge.js +624 -0
  62. package/src/profile/path-policy.js +213 -0
  63. package/src/profile/precision-stamp.mjs +151 -0
  64. package/src/profile/render-brief.js +717 -0
  65. package/src/profile/schema.js +244 -0
  66. package/src/profile/sensitivity.js +249 -0
  67. package/src/profile/serve.js +345 -0
  68. package/src/profile/store.js +261 -0
  69. package/src/profile/telemetry.js +289 -0
  70. package/src/recovery/checkpoint.js +7 -1
  71. package/src/server.js +185 -14
  72. package/src/.registry-meta-key.pem +0 -3
@@ -0,0 +1,122 @@
1
+ // verifier-pair.mjs — Gate B v3. Turn two authors' chunk-feature sets into ONE fixed
2
+ // pair-summary feature vector for the trained logistic verifier. ZERO deps.
3
+ //
4
+ // For an author pair (A,B) we compute, over all (or sampled) cross-author chunk pairs,
5
+ // the DISTRIBUTION of per-family Burrows-Delta distances (mean |z_a - z_b| over that
6
+ // family's dims — NOT a single channel cosine, which washes out the few idiosyncratic
7
+ // dims). We summarize each family's cross-pair distance distribution with
8
+ // mean/median/min/max + 10/25/75/90 percentiles + fraction-below-cutpoint, and add a
9
+ // within-author consistency term (how tight each author's own chunks cluster). These
10
+ // summaries are the verifier's input — the logreg learns the same/different boundary.
11
+
12
+ import { FAMILIES } from './verifier-features.mjs';
13
+
14
+ // mean |z_a - z_b| over a family's dimensions = Burrows-Delta-style distance for ONE
15
+ // chunk pair within ONE family.
16
+ function familyDelta(za, zb) {
17
+ const n = za.length;
18
+ if (!n) return 0;
19
+ let s = 0;
20
+ for (let i = 0; i < n; i += 1) s += Math.abs(za[i] - zb[i]);
21
+ return s / n;
22
+ }
23
+
24
+ function percentile(sorted, p) {
25
+ if (!sorted.length) return 0;
26
+ const idx = Math.min(sorted.length - 1, Math.max(0, Math.round((p / 100) * (sorted.length - 1))));
27
+ return sorted[idx];
28
+ }
29
+
30
+ function summarize(dists, cutpoint) {
31
+ if (!dists.length) return [0, 0, 0, 0, 0, 0, 0, 0, 0];
32
+ const s = [...dists].sort((a, b) => a - b);
33
+ const mean = s.reduce((a, b) => a + b, 0) / s.length;
34
+ const median = percentile(s, 50);
35
+ const belowCut = s.filter((d) => d <= cutpoint).length / s.length;
36
+ return [
37
+ mean, median, s[0], s[s.length - 1],
38
+ percentile(s, 10), percentile(s, 25), percentile(s, 75), percentile(s, 90),
39
+ belowCut,
40
+ ];
41
+ }
42
+ const SUMMARY_LEN = 9; // mean,median,min,max,p10,p25,p75,p90,fracBelow
43
+
44
+ // within-author consistency: mean cross-chunk family delta among an author's OWN chunks
45
+ // (low = tight idiolect). Returns one scalar per family.
46
+ function withinConsistency(chunkFeats) {
47
+ const out = {};
48
+ for (const fam of FAMILIES) {
49
+ const cs = chunkFeats.map((c) => c[fam]).filter((v) => v && v.length);
50
+ if (cs.length < 2) { out[fam] = 0; continue; }
51
+ let s = 0; let n = 0;
52
+ const cap = 40; // bound the within-author pair count for speed
53
+ for (let i = 0; i < cs.length && n < cap * cs.length; i += 1) {
54
+ for (let j = i + 1; j < cs.length; j += 1) { s += familyDelta(cs[i], cs[j]); n += 1; }
55
+ }
56
+ out[fam] = n ? s / n : 0;
57
+ }
58
+ return out;
59
+ }
60
+
61
+ // Deterministic sampling of cross-author chunk pairs (seeded) so the pair vector is
62
+ // reproducible and bounded for large authors.
63
+ function sampleCrossPairs(nA, nB, cap, rng) {
64
+ const total = nA * nB;
65
+ if (total <= cap) {
66
+ const out = [];
67
+ for (let i = 0; i < nA; i += 1) for (let j = 0; j < nB; j += 1) out.push([i, j]);
68
+ return out;
69
+ }
70
+ const seen = new Set();
71
+ const out = [];
72
+ while (out.length < cap) {
73
+ const i = Math.floor(rng() * nA);
74
+ const j = Math.floor(rng() * nB);
75
+ const k = i * nB + j;
76
+ if (seen.has(k)) continue;
77
+ seen.add(k); out.push([i, j]);
78
+ }
79
+ return out;
80
+ }
81
+
82
+ // pairFeatures(featsA, featsB, cfg) -> Float64Array. featsA/featsB are arrays of
83
+ // chunkFeatures objects. The feature layout is fixed (same length every call).
84
+ export function pairFeatures(featsA, featsB, cfg = {}) {
85
+ const cap = cfg.pairCap || 400;
86
+ const rng = cfg.rng || Math.random;
87
+ const cutByFam = cfg.cutpoints || {}; // per-family DEV-learned cutpoint
88
+ const idxs = sampleCrossPairs(featsA.length, featsB.length, cap, rng);
89
+
90
+ // collect cross-pair family-delta distributions
91
+ const dists = {};
92
+ for (const fam of FAMILIES) dists[fam] = [];
93
+ for (const [i, j] of idxs) {
94
+ const a = featsA[i]; const b = featsB[j];
95
+ for (const fam of FAMILIES) {
96
+ if (a[fam] && a[fam].length) dists[fam].push(familyDelta(a[fam], b[fam]));
97
+ }
98
+ }
99
+
100
+ const consA = withinConsistency(featsA);
101
+ const consB = withinConsistency(featsB);
102
+
103
+ const vec = [];
104
+ for (const fam of FAMILIES) {
105
+ const cut = Number.isFinite(cutByFam[fam]) ? cutByFam[fam] : 2.0;
106
+ vec.push(...summarize(dists[fam], cut));
107
+ // consistency context: each author's own tightness + the ratio cross/within
108
+ const cA = consA[fam]; const cB = consB[fam];
109
+ const crossMean = dists[fam].length
110
+ ? dists[fam].reduce((a, b) => a + b, 0) / dists[fam].length : 0;
111
+ vec.push(cA, cB, (cA + cB) / 2);
112
+ // separation ratio: cross distance vs the tighter within-author distance (a strong
113
+ // same/diff signal — within-author cross-pair distance ~ cross distance for same).
114
+ const denom = Math.max(1e-6, (cA + cB) / 2);
115
+ vec.push(crossMean / denom);
116
+ }
117
+ return Float64Array.from(vec);
118
+ }
119
+
120
+ export const PAIR_FEATURE_LEN = FAMILIES.length * (SUMMARY_LEN + 4);
121
+
122
+ export const __test = { familyDelta, summarize, withinConsistency, sampleCrossPairs };
@@ -0,0 +1,68 @@
1
+ // verifier-reference.mjs — Gate B v3, Task B (calibration regen). Build the per-family
2
+ // reference mean/SD from a set of TRAIN-author chunks (DEV, author-disjoint from the
3
+ // validation fold). Percentile SD floor; floored dims dropped from the scalar/affix
4
+ // families is NOT done (fixed-length vectors needed) — instead a robust SD floor keeps
5
+ // rare dims from blowing up to the z-cap. ZERO deps. Never reads sealed corpora.
6
+
7
+ import { relFreqFunc, relFreqTrigrams, relFreqPunct } from './stylometry-features.js';
8
+ import { FUNCTION_WORDS, TRIGRAM_KEYS, PUNCT_KEYS } from './stylometry-reference.js';
9
+ import { topicRobustScalars, affixCounts, relFreqAffix } from './verifier-features.mjs';
10
+
11
+ function meanSd(rows, floorPct) {
12
+ const d = rows[0].length;
13
+ const mean = Array.from({ length: d }, () => 0);
14
+ const sd = Array.from({ length: d }, () => 0);
15
+ for (const r of rows) for (let j = 0; j < d; j += 1) mean[j] += r[j];
16
+ for (let j = 0; j < d; j += 1) mean[j] /= rows.length;
17
+ for (const r of rows) for (let j = 0; j < d; j += 1) sd[j] += (r[j] - mean[j]) ** 2;
18
+ for (let j = 0; j < d; j += 1) sd[j] = Math.sqrt(sd[j] / rows.length);
19
+ // percentile SD floor: floor at the `floorPct` percentile of the POSITIVE sds, so rare
20
+ // near-constant dims don't divide by ~0 (which sends them to the z-cap and dominates).
21
+ const pos = sd.filter((x) => x > 0).sort((a, b) => a - b);
22
+ const floor = pos.length ? pos[Math.floor((floorPct / 100) * (pos.length - 1))] : 1e-6;
23
+ for (let j = 0; j < d; j += 1) if (!(sd[j] > floor)) sd[j] = floor;
24
+ return { mean, sd };
25
+ }
26
+
27
+ // Pick the top-K most frequent affix keys across the train chunks (content-light:
28
+ // 3-char prefixes/suffixes). Deterministic by (count desc, key asc).
29
+ function pickAffixKeys(chunks, k) {
30
+ const counts = Object.create(null);
31
+ for (const c of chunks) {
32
+ const { pre, suf } = affixCounts(c);
33
+ for (const key in pre) counts[key] = (counts[key] || 0) + pre[key];
34
+ for (const key in suf) counts[key] = (counts[key] || 0) + suf[key];
35
+ }
36
+ return Object.entries(counts)
37
+ .sort((a, b) => (b[1] - a[1]) || (a[0] < b[0] ? -1 : 1))
38
+ .slice(0, k)
39
+ .map(([key]) => key);
40
+ }
41
+
42
+ // buildReference(trainChunks, opts) -> ref object consumed by chunkFeatures.
43
+ export function buildReference(trainChunks, opts = {}) {
44
+ const floorPct = opts.floorPct ?? 10;
45
+ const affixK = opts.affixK ?? 150;
46
+ const affixKeys = pickAffixKeys(trainChunks, affixK);
47
+
48
+ const funcRows = trainChunks.map((c) => relFreqFunc(c, FUNCTION_WORDS));
49
+ const triRows = trainChunks.map((c) => relFreqTrigrams(c, TRIGRAM_KEYS));
50
+ const punctRows = trainChunks.map((c) => relFreqPunct(c, PUNCT_KEYS));
51
+ const scalarRows = trainChunks.map((c) => topicRobustScalars(c));
52
+ const affixRows = trainChunks.map((c) => relFreqAffix(c, affixKeys));
53
+
54
+ const f = meanSd(funcRows, floorPct);
55
+ const t = meanSd(triRows, floorPct);
56
+ const p = meanSd(punctRows, floorPct);
57
+ const s = meanSd(scalarRows, floorPct);
58
+ const a = meanSd(affixRows, floorPct);
59
+
60
+ return {
61
+ funcMean: f.mean, funcSd: f.sd,
62
+ triMean: t.mean, triSd: t.sd,
63
+ punctMean: p.mean, punctSd: p.sd,
64
+ scalarMean: s.mean, scalarSd: s.sd,
65
+ affixKeys, affixMean: a.mean, affixSd: a.sd,
66
+ nTrainChunks: trainChunks.length,
67
+ };
68
+ }
@@ -0,0 +1,30 @@
1
+ // verifier-scorer.mjs — Gate B v3. Adapts the trained chunk-pair logistic verifier to
2
+ // the `scorer` interface validateInstrument expects: { vectorize, distance }.
3
+ //
4
+ // UNIT OF PAIRING = one CHUNK per "doc". vectorize(text) -> per-family z-vectors for that
5
+ // chunk (against the DEV-derived reference). distance(a,b) -> 1 - P(same-author) from the
6
+ // trained logreg over the single chunk-pair summary. Training (in the harness) uses the
7
+ // IDENTICAL single-pair summary representation, so train and score are consistent.
8
+
9
+ import { chunkFeatures } from './verifier-features.mjs';
10
+ import { pairFeatures } from './verifier-pair.mjs';
11
+ import { applyStandardizer, predictProba } from './verifier-logreg.mjs';
12
+
13
+ // Build a single chunk-pair summary vector (two singleton chunk sets). cutpoints kept
14
+ // fixed across train/score (DEV-learned, passed in).
15
+ export function chunkPairFeatures(featA, featB, cfg) {
16
+ return pairFeatures([featA], [featB], cfg);
17
+ }
18
+
19
+ // makeScorer(ref, model, standardizer, cfg) -> { vectorize, distance }.
20
+ export function makeScorer(ref, model, standardizer, cfg = {}) {
21
+ return {
22
+ vectorize(text) { return chunkFeatures(text, ref); },
23
+ distance(a, b) {
24
+ const pf = chunkPairFeatures(a, b, cfg);
25
+ const std = applyStandardizer(pf, standardizer);
26
+ const p = predictProba(std, model); // P(same-author)
27
+ return 1 - p; // distance: higher = more different (validateInstrument convention)
28
+ },
29
+ };
30
+ }
@@ -0,0 +1,168 @@
1
+ // wrong-target-control.mjs — Gate B v2, Task T5. THE discriminator.
2
+ //
3
+ // For each subject P and arm, the margin is:
4
+ // m_P = distance(output, NEAREST same-register foreigner) − distance(output, OWN test)
5
+ // m_P > 0 means the styled output landed closer to P's OWN held-out fingerprint than to
6
+ // the CLOSEST same-register stranger. A generic register-obeyer is ~equidistant from all
7
+ // same-register targets ⇒ m≈0 ⇒ NULL. Only idiosyncratic voice capture wins.
8
+ //
9
+ // Audit must-fixes baked in:
10
+ // * Foreign pool restricted to the SAME register band (δ=0.15), by the register metric
11
+ // only — a register-obeyer cannot false-PASS.
12
+ // * NEAREST foreigner, never a centroid/mean of foreigners (centroid regresses to the
13
+ // population center and manufactures a positive margin).
14
+ // * Undecidable subjects (no same-register foreigner) are EXCLUDED — never fall back to
15
+ // a register-diverse pool.
16
+ // * Bootstrap resamples SUBJECTS (the margins array is length = nDecidableSubjects).
17
+ // * One-sided 99% lower bound = bootstrapCI(margins, {alpha:0.02}).lo.
18
+ // * Measured-scale floor (minMeanMargin) must bite: significant-but-trivial ⇒ NULL.
19
+ // * McNemar vs baseline reads .pValue and applies perTestAlpha locally (ignores the
20
+ // helper's hardcoded .significant=0.05).
21
+
22
+ import { fullStyleDistance, styleDistance } from './stylometry.js';
23
+ import { bootstrapCI, mcnemar } from '../../memory/bench-metrics.js';
24
+
25
+ export const CONTROL_DEFAULTS = Object.freeze({
26
+ registerDelta: 0.15,
27
+ alpha: 0.02, // two-sided helper @0.02 → one-sided 99% lower bound
28
+ minMeanMargin: 0.01, // TEST-ONLY default; the real run derives floorK*(between-within) (prereg.deriveMinMeanMargin)
29
+ minDz: 0.5, // minimum standardized effect size (mean/sd) — kills "few subjects carry it"
30
+ perTestAlpha: 0.01, // Bonferroni-adjusted at T7; default single-test 0.01
31
+ bootstrapIters: 2000,
32
+ seed: 42,
33
+ });
34
+
35
+ function mean(xs) { return xs.length ? xs.reduce((a, b) => a + b, 0) / xs.length : NaN; }
36
+ function stdev(xs) {
37
+ if (xs.length < 2) return NaN;
38
+ const mu = mean(xs);
39
+ return Math.sqrt(xs.reduce((a, b) => a + (b - mu) ** 2, 0) / (xs.length - 1));
40
+ }
41
+
42
+ // Same-register foreigners of P: other personas whose TEST register vector is within δ of
43
+ // P's. Register-only — independent of the authorship distance under test.
44
+ export function sameRegisterForeigners(p, personas, delta) {
45
+ const pReg = p.fingerprint.register;
46
+ return personas.filter((q) => q.id !== p.id && styleDistance(pReg, q.fingerprint.register) <= delta);
47
+ }
48
+
49
+ // Margin of one arm output for subject P against the NEAREST same-register foreigner.
50
+ export function subjectMargin(armVector, p, foreigners) {
51
+ const dOwn = fullStyleDistance(armVector, p.fingerprint);
52
+ let dNearestForeign = Infinity;
53
+ for (const q of foreigners) {
54
+ const d = fullStyleDistance(armVector, q.fingerprint);
55
+ if (d < dNearestForeign) dNearestForeign = d;
56
+ }
57
+ return { dOwn, dNearestForeign, margin: dNearestForeign - dOwn };
58
+ }
59
+
60
+ // Significance at a given alpha, reading the raw p-value (NOT mcnemar's hardcoded .significant).
61
+ export function significantAt(pValue, alpha) { return Number.isFinite(pValue) && pValue < alpha; }
62
+
63
+ function fmt4(x) { return Number.isFinite(x) ? x.toFixed(4) : 'NaN'; }
64
+
65
+ // Pure arm-level control verdict. PASS requires ALL of (spec §4.4 + §4.7):
66
+ // 1. one-sided 99% CI lower bound > 0 (the effect is real)
67
+ // 2. meanMargin >= measured-scale floor (the effect is non-trivial in instrument units)
68
+ // 3. majority of subjects positive (>50%) (NOT a few subjects carrying it)
69
+ // 4. standardized effect dz = mean/sd >= minDz (the effect is large relative to spread)
70
+ // 5. sign-test pValue < perTestAlpha (the majority is statistically significant)
71
+ // A heavy-tailed margin that clears 1+2 but fails 3/4/5 is a NULL, not a PASS.
72
+ export function controlVerdict(armStats, cfg) {
73
+ const reasons = [];
74
+ const {
75
+ ciLower, meanMargin, sd, pctPositive, signPValue,
76
+ } = armStats;
77
+ const minDz = cfg.minDz ?? 0.5;
78
+ const dz = (Number.isFinite(sd) && sd > 0)
79
+ ? meanMargin / sd
80
+ : (meanMargin > 0 ? Infinity : 0); // sd≈0 with positive mean = perfectly consistent effect
81
+
82
+ if (!(Number.isFinite(ciLower) && ciLower > 0)) reasons.push(`CI-lower ${fmt4(ciLower)} not > 0`);
83
+ if (!(Number.isFinite(meanMargin) && meanMargin >= cfg.minMeanMargin)) reasons.push(`meanMargin ${fmt4(meanMargin)} < floor ${cfg.minMeanMargin}`);
84
+ if (!(Number.isFinite(pctPositive) && pctPositive > 0.5)) reasons.push(`pctPositive ${fmt4(pctPositive)} <= 0.5 (minority of subjects)`);
85
+ if (!(dz >= minDz)) reasons.push(`dz ${Number.isFinite(dz) ? dz.toFixed(3) : dz} < ${minDz}`);
86
+ if (!(Number.isFinite(signPValue) && signPValue < cfg.perTestAlpha)) reasons.push(`sign-test p ${fmt4(signPValue)} >= perTestAlpha ${cfg.perTestAlpha}`);
87
+
88
+ return { passes: reasons.length === 0, reasons, dz };
89
+ }
90
+
91
+ // wrongTargetControl(harnessOut, personas, opts) → per-arm margin stats + verdicts.
92
+ export function wrongTargetControl(harnessOut, personas, opts = {}) {
93
+ const cfg = { ...CONTROL_DEFAULTS, ...opts };
94
+ const byId = new Map(personas.map((p) => [p.id, p]));
95
+
96
+ // foreigners per persona (register-only); decidable = has >=1 same-register foreigner
97
+ const foreignersById = {};
98
+ for (const p of personas) foreignersById[p.id] = sameRegisterForeigners(p, personas, cfg.registerDelta);
99
+ const decidableIds = personas.filter((p) => foreignersById[p.id].length > 0).map((p) => p.id);
100
+
101
+ const perArm = {};
102
+ for (const arm of harnessOut.arms) {
103
+ const margins = [];
104
+ const ownWin = [];
105
+ const regGaps = [];
106
+ for (const id of decidableIds) {
107
+ const p = byId.get(id);
108
+ const armVec = harnessOut.results[id][arm].vector;
109
+ const sm = subjectMargin(armVec, p, foreignersById[id]);
110
+ margins.push(sm.margin);
111
+ ownWin.push(sm.margin > 0 ? 1 : 0);
112
+ // diagnostic: register distance to the nearest same-register foreigner
113
+ const nearest = foreignersById[id].reduce((best, q) => {
114
+ const d = fullStyleDistance(armVec, q.fingerprint);
115
+ return d < best.d ? { q, d } : best;
116
+ }, { q: null, d: Infinity }).q;
117
+ if (nearest) regGaps.push(styleDistance(p.fingerprint.register, nearest.fingerprint.register));
118
+ }
119
+ const ownLoss = margins.map((m) => (m < 0 ? 1 : 0));
120
+ const ci = bootstrapCI(margins, { iters: cfg.bootstrapIters, alpha: cfg.alpha, seed: cfg.seed });
121
+ // zeros-vs-wins sign test: b = #(margin>0), c = #(margin<0); two-sided p on |b−c|.
122
+ const sign = mcnemar(ownLoss, ownWin);
123
+ perArm[arm] = {
124
+ arm,
125
+ nDecidable: margins.length,
126
+ margins,
127
+ ownWin,
128
+ meanMargin: mean(margins),
129
+ sd: stdev(margins),
130
+ pctPositive: mean(ownWin),
131
+ signPValue: sign.pValue,
132
+ signPositives: sign.b,
133
+ signNegatives: sign.c,
134
+ ciLower: ci.lo,
135
+ ciPoint: ci.point,
136
+ registerGap: mean(regGaps),
137
+ };
138
+ }
139
+
140
+ // McNemar vs baseline for each verdict arm (own-match win indicator), pValue exposed.
141
+ for (const arm of harnessOut.arms) {
142
+ if (arm === 'baseline' || !perArm.baseline) continue;
143
+ const m = mcnemar(perArm.baseline.ownWin, perArm[arm].ownWin);
144
+ // mcnemar.pValue is TWO-SIDED (|b−c|), so the direction guard m.b > m.c is mandatory:
145
+ // the arm must FLIP MORE subjects to own-match than baseline does, not merely differ.
146
+ perArm[arm].vsBaseline = {
147
+ b: m.b, c: m.c, pValue: m.pValue, beatsBaseline: significantAt(m.pValue, cfg.perTestAlpha) && m.b > m.c,
148
+ };
149
+ }
150
+
151
+ // arm verdicts
152
+ for (const arm of harnessOut.arms) perArm[arm].verdict = controlVerdict(perArm[arm], cfg);
153
+
154
+ return {
155
+ cfg,
156
+ nSubjects: personas.length,
157
+ nDecidable: decidableIds.length,
158
+ nUndecidable: personas.length - decidableIds.length,
159
+ decidableIds,
160
+ perArm,
161
+ // VOID rail input: PASS of a register-only-echo arm means the control is fooled by
162
+ // register alone. undefined (no registerEcho arm present) ⇒ the decision layer THROWS
163
+ // rather than silently treating the missing rail as false.
164
+ registerEchoPasses: perArm.registerEcho ? perArm.registerEcho.verdict.passes : undefined,
165
+ };
166
+ }
167
+
168
+ export const __test = { mean };
@@ -0,0 +1,124 @@
1
+ // Gate B v2 — Task T5: wrong-target control. The discriminator's guards: same-register
2
+ // foreign pool, undecidable exclusion, NEAREST (not centroid) margin, subject-level
3
+ // bootstrap, measured-scale floor, McNemar by p-value. Positive control (own output)
4
+ // PASSES; generic output NULLs.
5
+
6
+ import { test } from 'node:test';
7
+ import assert from 'node:assert/strict';
8
+ import {
9
+ wrongTargetControl, sameRegisterForeigners, subjectMargin, controlVerdict, significantAt,
10
+ } from './wrong-target-control.mjs';
11
+ import { mcnemar } from '../../memory/bench-metrics.js';
12
+ import { generatePersonaText } from './synthetic-personas.js';
13
+ import { fullStyleVector } from './stylometry.js';
14
+
15
+ function persona(id, text) { return { id, fingerprint: fullStyleVector(text) }; }
16
+ // formal personas (archetype 0) — same register, different authors. n=10 so the sign test
17
+ // can clear perTestAlpha=0.01 when every subject is positive (the positive control).
18
+ const FORMAL = Array.from({ length: 10 }, (_, i) => persona(`f${i}`, generatePersonaText(0, 17 * (i + 1), 16)));
19
+ // Unambiguously different register: emoji + contractions + exclamations + casual (several
20
+ // axes differ from formal, clearing the 0.15 band).
21
+ const CASUAL = persona('c0', "omg yes!! 🚀 lol i love it, it's so cool 😄 you're gonna wanna try it, honestly it's the best, i can't even, no cap!! 💯");
22
+
23
+ function fakeHarness(arms, personas, vectorFor) {
24
+ const results = {};
25
+ for (const p of personas) {
26
+ results[p.id] = {};
27
+ for (const arm of arms) results[p.id][arm] = { vector: vectorFor(p, arm) };
28
+ }
29
+ return { arms, personaIds: personas.map((p) => p.id), results };
30
+ }
31
+
32
+ test('foreign pool is restricted to the SAME register band (δ=0.15)', () => {
33
+ const pool = [...FORMAL, CASUAL];
34
+ const f = sameRegisterForeigners(FORMAL[0], pool, 0.15).map((q) => q.id);
35
+ assert.ok(f.includes('f1'), 'a same-register formal peer is in the pool');
36
+ assert.ok(!f.includes('c0'), 'the different-register casual author is excluded');
37
+ });
38
+
39
+ test('undecidable subjects (no same-register foreigner) are EXCLUDED, no diverse fallback', () => {
40
+ const personas = [FORMAL[0], CASUAL]; // each other is a different register
41
+ const h = fakeHarness(['baseline'], personas, (p) => p.fingerprint);
42
+ const r = wrongTargetControl(h, personas);
43
+ assert.equal(r.nDecidable, 0);
44
+ assert.equal(r.nUndecidable, 2);
45
+ assert.deepEqual(r.decidableIds, []);
46
+ });
47
+
48
+ test('margin uses NEAREST foreigner, not a centroid', () => {
49
+ const p = FORMAL[0];
50
+ const foreigners = [FORMAL[1], FORMAL[2], FORMAL[3]];
51
+ // output IS a foreigner (f1) → nearest distance is 0 → margin must be negative
52
+ const sm = subjectMargin(FORMAL[1].fingerprint, p, foreigners);
53
+ assert.equal(sm.dNearestForeign, 0, 'nearest foreigner distance is 0 (output == f1)');
54
+ assert.ok(sm.margin < 0, 'matching a stranger yields a NEGATIVE margin (centroid would hide this)');
55
+ // output IS own → margin positive
56
+ const smOwn = subjectMargin(p.fingerprint, p, foreigners);
57
+ assert.ok(smOwn.margin > 0, 'matching OWN yields a positive margin');
58
+ });
59
+
60
+ test('POSITIVE control: own-output arm PASSES (margins positive, CI-lower>0, floor cleared)', () => {
61
+ const h = fakeHarness(['baseline'], FORMAL, (p) => p.fingerprint); // output == own
62
+ const r = wrongTargetControl(h, FORMAL);
63
+ assert.equal(r.nDecidable, 10);
64
+ assert.equal(r.perArm.baseline.margins.length, 10, 'bootstrap operates on SUBJECTS (=10), not probes');
65
+ assert.equal(r.perArm.baseline.verdict.passes, true);
66
+ assert.ok(r.perArm.baseline.ciLower > 0);
67
+ assert.ok(r.perArm.baseline.pctPositive > 0.5 && r.perArm.baseline.signPValue < 0.01);
68
+ });
69
+
70
+ test('NEGATIVE control: generic constant output does NOT pass the control (≈NULL)', () => {
71
+ const constVec = fullStyleVector(generatePersonaText(1, 999, 16)); // casual generic, matches no formal voice
72
+ const h = fakeHarness(['derived'], FORMAL, () => constVec);
73
+ const r = wrongTargetControl(h, FORMAL);
74
+ assert.equal(r.perArm.derived.verdict.passes, false, 'generic output is not closer to OWN than to nearest stranger');
75
+ });
76
+
77
+ const FULL_PASS = {
78
+ ciLower: 0.02, meanMargin: 0.05, sd: 0.05, pctPositive: 0.8, signPValue: 0.001,
79
+ };
80
+ const FULL_CFG = { minMeanMargin: 0.01, minDz: 0.5, perTestAlpha: 0.01 };
81
+
82
+ test('controlVerdict PASSES only when ALL legs hold (not vacuously false)', () => {
83
+ assert.equal(controlVerdict(FULL_PASS, FULL_CFG).passes, true);
84
+ });
85
+
86
+ test('measured-scale floor BITES: significant-but-trivial ⇒ NULL', () => {
87
+ assert.equal(controlVerdict({ ...FULL_PASS, meanMargin: 0.004 }, FULL_CFG).passes, false);
88
+ assert.equal(controlVerdict({ ...FULL_PASS, ciLower: -0.01 }, FULL_CFG).passes, false);
89
+ });
90
+
91
+ test('NEW LEG — effect size dz: a few subjects carrying it (high sd) ⇒ NULL', () => {
92
+ // mean clears floor and CI>0, but sd is huge ⇒ dz<0.5 ⇒ NULL (heavy-tail false-PASS killed)
93
+ const g = controlVerdict({ ...FULL_PASS, sd: 0.5 }, FULL_CFG);
94
+ assert.equal(g.passes, false);
95
+ assert.ok(g.reasons.some((r) => /dz/.test(r)));
96
+ });
97
+
98
+ test('NEW LEG — majority: <=50% of subjects positive ⇒ NULL', () => {
99
+ const g = controlVerdict({ ...FULL_PASS, pctPositive: 0.5 }, FULL_CFG);
100
+ assert.equal(g.passes, false);
101
+ assert.ok(g.reasons.some((r) => /pctPositive/.test(r)));
102
+ });
103
+
104
+ test('NEW LEG — sign-test: p >= perTestAlpha ⇒ NULL (even with mean+CI+majority)', () => {
105
+ const g = controlVerdict({ ...FULL_PASS, signPValue: 0.03 }, FULL_CFG);
106
+ assert.equal(g.passes, false);
107
+ assert.ok(g.reasons.some((r) => /sign-test/.test(r)));
108
+ });
109
+
110
+ test('McNemar reads p-value with perTestAlpha — ignores the helper hardcoded .significant(0.05)', () => {
111
+ const baseline = Array.from({ length: 20 }, () => 0);
112
+ const arm = Array.from({ length: 20 }, (_, i) => (i < 7 ? 1 : 0)); // 7 flips, 0 losses
113
+ const m = mcnemar(baseline, arm);
114
+ assert.ok(m.pValue > 0.01 && m.pValue < 0.05, `p=${m.pValue.toFixed(3)} lands between the alphas`);
115
+ assert.equal(significantAt(m.pValue, 0.01), false, 'not significant at the strict Bonferroni alpha');
116
+ assert.equal(significantAt(m.pValue, 0.05), true, 'would be "significant" at the lax 0.05 the helper hardcodes');
117
+ });
118
+
119
+ test('registerGap diagnostic is emitted for decidable arms', () => {
120
+ const h = fakeHarness(['baseline'], FORMAL, (p) => p.fingerprint);
121
+ const r = wrongTargetControl(h, FORMAL);
122
+ assert.ok(Number.isFinite(r.perArm.baseline.registerGap));
123
+ assert.ok(r.perArm.baseline.registerGap <= 0.15, 'nearest foreigner really is same-register');
124
+ });