npm - @ijfw/memory-server - Versions diffs - 1.5.5 → 1.6.0 - Mend

@ijfw/memory-server 1.5.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/bin/ijfw-dashboard +20 -1
package/package.json +4 -3
package/src/audit-roster.js +89 -12
package/src/brain/tiered-llm.js +57 -7
package/src/cross-orchestrator-cli.js +344 -4
package/src/cross-project-search.js +39 -1
package/src/dashboard-server.js +7 -1
package/src/dream/runner.mjs +560 -8
package/src/handlers/brain-handler.js +101 -1
package/src/importers/discover.js +1 -1
package/src/memory/bench-metrics.js +289 -0
package/src/memory/benchmark.js +1 -1
package/src/memory/search.js +53 -1
package/src/orchestrator/plan-checker.js +1 -1
package/src/profile/audit.js +671 -0
package/src/profile/capture.js +871 -0
package/src/profile/derive-dialectic.js +242 -0
package/src/profile/derive-heuristic.js +733 -0
package/src/profile/derive.js +156 -0
package/src/profile/egress.js +306 -0
package/src/profile/eval/build-real-probes.mjs +197 -0
package/src/profile/eval/corpus-from-reddit.mjs +166 -0
package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
package/src/profile/eval/gate-b-behavior.mjs +420 -0
package/src/profile/eval/gate-b-decision-run.mjs +171 -0
package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
package/src/profile/eval/gate-b-run.mjs +417 -0
package/src/profile/eval/gate-b-run.test.mjs +204 -0
package/src/profile/eval/gate-c-capture.mjs +323 -0
package/src/profile/eval/harness.mjs +551 -0
package/src/profile/eval/instrument-validation.mjs +248 -0
package/src/profile/eval/instrument-validation.test.mjs +125 -0
package/src/profile/eval/multi-subject-harness.mjs +106 -0
package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
package/src/profile/eval/personas.test.mjs +83 -0
package/src/profile/eval/plumbing.test.mjs +69 -0
package/src/profile/eval/prereg.mjs +130 -0
package/src/profile/eval/prereg.test.mjs +78 -0
package/src/profile/eval/real-corpus.test.mjs +103 -0
package/src/profile/eval/real-personas.mjs +109 -0
package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
package/src/profile/eval/run-real-corpus.mjs +358 -0
package/src/profile/eval/slug-quality.mjs +464 -0
package/src/profile/eval/stylometry-features.js +85 -0
package/src/profile/eval/stylometry-reference.js +16 -0
package/src/profile/eval/stylometry.js +224 -0
package/src/profile/eval/stylometry.test.mjs +103 -0
package/src/profile/eval/synthetic-personas.js +91 -0
package/src/profile/eval/verifier-features.mjs +170 -0
package/src/profile/eval/verifier-logreg.mjs +74 -0
package/src/profile/eval/verifier-pair.mjs +122 -0
package/src/profile/eval/verifier-reference.mjs +68 -0
package/src/profile/eval/verifier-scorer.mjs +30 -0
package/src/profile/eval/wrong-target-control.mjs +168 -0
package/src/profile/eval/wrong-target-control.test.mjs +124 -0
package/src/profile/exemplar-capture.js +232 -0
package/src/profile/exemplar-retrieve.js +138 -0
package/src/profile/exemplar-store.js +314 -0
package/src/profile/lock.js +64 -0
package/src/profile/merge.js +624 -0
package/src/profile/path-policy.js +213 -0
package/src/profile/precision-stamp.mjs +151 -0
package/src/profile/render-brief.js +717 -0
package/src/profile/schema.js +244 -0
package/src/profile/sensitivity.js +249 -0
package/src/profile/serve.js +345 -0
package/src/profile/store.js +261 -0
package/src/profile/telemetry.js +289 -0
package/src/recovery/checkpoint.js +7 -1
package/src/server.js +185 -14
package/src/.registry-meta-key.pem +0 -3

package/src/profile/eval/instrument-validation.mjs ADDED Viewed

@@ -0,0 +1,248 @@
+// instrument-validation.mjs — Gate B v2, Task T2. THE HARD PREREQUISITE GATE.
+//
+// Before any cloud spend, prove the metric (T1 fullStyleDistance) actually discriminates
+// INDIVIDUALS — not just registers. We compute same-author vs different-author distances
+// over a labeled corpus and score the discriminator with AUC. The PRIMARY gate is the
+// SAME-REGISTER AUC: can the metric tell apart two authors who write in the same register
+// (the only thing that makes the wrong-target control meaningful). If the same-register
+// discriminator is near chance, the whole behavioral question is unanswerable → report a
+// NULL and CUT; do NOT loosen the thresholds to proceed.
+//
+// Anti-bias discipline (audit must-fix): authors and pairs are selected by IDENTITY and
+// by REGISTER distance only — NEVER by the fullStyleDistance being scored. Selecting
+// "well-separated" authors by the scored metric would inflate AUC. The same-register
+// membership uses the 9-axis register metric (styleDistance), which is independent of the
+// authorship sub-vectors under test.
+import { fullStyleVector, fullStyleDistance, styleVector, styleDistance } from './stylometry.js';
+export const DEFAULT_VALIDATION_CFG = Object.freeze({
+  registerDelta: 0.15,        // same-register band on the 9-axis register metric
+  aucFloor: 0.80,             // sanity floor on register-diverse AUC
+  sameRegisterCILower: 0.75,  // PRIMARY gate: bootstrap lower bound on same-register AUC
+  minSameRegister: 150,       // min same-register between-author pairs (else underpowered)
+  bootstrapB: 1000,
+  ciAlpha: 0.05,              // one-sided lower bound at this alpha
+  seed: 1,
+});
+// ---- deterministic RNG (mulberry32) so bootstrap is reproducible ----
+function mulberry32(seed) {
+  let a = seed >>> 0;
+  return function rng() {
+    a |= 0; a = (a + 0x6D2B79F5) | 0;
+    let t = Math.imul(a ^ (a >>> 15), 1 | a);
+    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  };
+}
+// Mann-Whitney AUC with higher score = positive class, average-rank tie handling.
+function aucHigherPos(posScores, negScores) {
+  const nP = posScores.length;
+  const nN = negScores.length;
+  if (!nP || !nN) return NaN;
+  const all = [];
+  for (const s of posScores) all.push({ s, p: 1 });
+  for (const s of negScores) all.push({ s, p: 0 });
+  all.sort((a, b) => a.s - b.s);
+  const N = all.length;
+  let rankSumPos = 0;
+  let i = 0;
+  while (i < N) {
+    let j = i;
+    while (j < N && all[j].s === all[i].s) j += 1;
+    const avgRank = (i + 1 + j) / 2; // ranks are 1-based; average over the tie block
+    for (let k = i; k < j; k += 1) if (all[k].p === 1) rankSumPos += avgRank;
+    i = j;
+  }
+  return (rankSumPos - (nP * (nP + 1)) / 2) / (nP * nN);
+}
+// Positive class = same-author pairs (should have LOW distance), so score = -distance
+// (higher = more "same"). AUC = P(same-author pair ranks closer than diff-author pair).
+function aucFromDistances(sameDists, diffDists) {
+  return aucHigherPos(sameDists.map((d) => -d), diffDists.map((d) => -d));
+}
+// Equal Error Rate over the distance threshold sweep.
+function equalErrorRate(sameDists, diffDists) {
+  if (!sameDists.length || !diffDists.length) return NaN;
+  const ts = [...new Set([...sameDists, ...diffDists])].sort((a, b) => a - b);
+  let best = 1;
+  for (const t of ts) {
+    const fnr = sameDists.filter((d) => d > t).length / sameDists.length; // same called diff
+    const fpr = diffDists.filter((d) => d <= t).length / diffDists.length; // diff called same
+    best = Math.min(best, Math.max(fnr, fpr));
+  }
+  return best;
+}
+function mean(xs) { return xs.length ? xs.reduce((a, b) => a + b, 0) / xs.length : NaN; }
+// Build flat sample list: one sample per document. Each carries authorId + precomputed
+// register (9-axis) and full (authorship) vectors so pairwise distance is cheap.
+//
+// OPT-IN (Gate B v3 trained verifier): if `scorer` is supplied it carries a custom
+// authorship representation + distance. `scorer.vectorize(text)` returns the opaque
+// authorship object stored on `sample.full`; `scorer.distance(a.full, b.full)` returns a
+// distance in [0,1] (higher = more different). The 9-axis REGISTER vector is ALWAYS the
+// shipped styleVector — the same-register tag must stay independent of the authorship
+// representation under test. When `scorer` is null, behavior is identical to before
+// (fullStyleVector + fullStyleDistance).
+function buildSamples(corpus, scorer = null) {
+  const samples = [];
+  for (const author of corpus) {
+    const docs = author.docs || [];
+    for (const text of docs) {
+      samples.push({
+        authorId: author.id,
+        register: styleVector(text),
+        full: scorer ? scorer.vectorize(text) : fullStyleVector(text),
+      });
+    }
+  }
+  return samples;
+}
+// Authorship distance: the injected scorer's distance, or the shipped composite.
+function authDistance(aFull, bFull, scorer) {
+  return scorer ? scorer.distance(aFull, bFull) : fullStyleDistance(aFull, bFull);
+}
+// Enumerate same-author and diff-author pair distances. Diff pairs are tagged with
+// whether they are SAME-REGISTER (register distance <= delta) — that tag depends ONLY on
+// the register metric, never on the authorship distance under test.
+function enumeratePairs(samples, delta, scorer = null) {
+  const same = [];
+  const diff = [];
+  const diffSameRegister = [];
+  for (let i = 0; i < samples.length; i += 1) {
+    for (let j = i + 1; j < samples.length; j += 1) {
+      const a = samples[i];
+      const b = samples[j];
+      const d = authDistance(a.full, b.full, scorer);
+      if (a.authorId === b.authorId) {
+        same.push(d);
+      } else {
+        diff.push(d);
+        if (styleDistance(a.register, b.register) <= delta) diffSameRegister.push(d);
+      }
+    }
+  }
+  return { same, diff, diffSameRegister };
+}
+// Author-level (cluster) bootstrap on the same-register AUC. Resamples AUTHORS with
+// replacement; same-author distances come from each resampled author's own docs, and the
+// same-register NEG pairs are drawn across DISTINCT resampled authors (a real author is
+// never compared to a duplicate of themselves — that would inject distance~0 foreigners).
+function bootstrapSameRegisterAucLower(corpus, delta, B, alpha, seed, scorer = null) {
+  const rng = mulberry32(seed);
+  const ids = corpus.map((a) => a.id);
+  const byId = new Map(corpus.map((a) => [a.id, a]));
+  const reps = [];
+  for (let b = 0; b < B; b += 1) {
+    const pick = [];
+    for (let k = 0; k < ids.length; k += 1) pick.push(ids[Math.floor(rng() * ids.length)]);
+    const distinct = [...new Set(pick)];
+    if (distinct.length < 2) continue;
+    // same-author distances: each occurrence contributes its within-author doc pairs
+    const sameD = [];
+    for (const id of pick) {
+      const s = buildSamples([byId.get(id)], scorer);
+      for (let i = 0; i < s.length; i += 1) {
+        for (let j = i + 1; j < s.length; j += 1) sameD.push(authDistance(s[i].full, s[j].full, scorer));
+      }
+    }
+    // same-register diff distances: across DISTINCT authors only
+    const distinctSamples = buildSamples(distinct.map((id) => byId.get(id)), scorer);
+    const negSR = [];
+    for (let i = 0; i < distinctSamples.length; i += 1) {
+      for (let j = i + 1; j < distinctSamples.length; j += 1) {
+        const a = distinctSamples[i];
+        const c = distinctSamples[j];
+        if (a.authorId !== c.authorId && styleDistance(a.register, c.register) <= delta) {
+          negSR.push(authDistance(a.full, c.full, scorer));
+        }
+      }
+    }
+    if (sameD.length && negSR.length) reps.push(aucFromDistances(sameD, negSR));
+  }
+  if (!reps.length) return { lower: NaN, reps: 0 };
+  reps.sort((a, b) => a - b);
+  const idx = Math.max(0, Math.floor(alpha * reps.length) - 1);
+  return { lower: reps[idx], reps: reps.length };
+}
+function fmt(x) { return Number.isFinite(x) ? x.toFixed(3) : 'NaN'; }
+// Pure gate decision over the computed metrics. Extracted so the threshold logic is
+// unit-testable independent of any fixture's AUC. NEVER loosens: every threshold is a
+// hard floor; a failed check is named for honest reporting at the decision gate.
+export function gateDecision(m, cfg) {
+  const checks = [];
+  if (!(Number.isFinite(m.auc) && m.auc >= cfg.aucFloor)) {
+    checks.push(`diverse AUC ${fmt(m.auc)} < ${cfg.aucFloor}`);
+  }
+  if (!(Number.isFinite(m.sameRegisterAucCILower) && m.sameRegisterAucCILower >= cfg.sameRegisterCILower)) {
+    checks.push(`same-register AUC CI-lower ${fmt(m.sameRegisterAucCILower)} < ${cfg.sameRegisterCILower}`);
+  }
+  if (!(m.nSameRegister >= cfg.minSameRegister)) {
+    checks.push(`nSameRegister ${m.nSameRegister} < ${cfg.minSameRegister}`);
+  }
+  if (!(Number.isFinite(m.withinMean) && Number.isFinite(m.betweenMean) && m.betweenMean > m.withinMean)) {
+    checks.push('betweenMean not > withinMean');
+  }
+  return { passes: checks.length === 0, failedChecks: checks };
+}
+// validateInstrument(corpus, preReg) → structured validation result + pass/fail gate.
+// corpus: [{ id, docs: [text, ...] }, ...]. preReg.validation overrides DEFAULT_VALIDATION_CFG.
+export function validateInstrument(corpus, preReg = {}) {
+  const cfg = { ...DEFAULT_VALIDATION_CFG, ...preReg.validation };
+  // OPT-IN scorer (Gate B v3 trained verifier). preReg.scorer = { vectorize, distance }.
+  // vectorize(text) -> opaque authorship object; distance(aFull, bFull) -> [0,1]. The
+  // corpus "docs" are the unit of pairing (for the verifier these are author CHUNKS, and
+  // distance is the trained-model same/diff distance for that chunk pair). When absent,
+  // behavior is byte-identical to the shipped frozen-composite path.
+  const scorer = preReg.scorer || null;
+  const samples = buildSamples(corpus, scorer);
+  const { same, diff, diffSameRegister } = enumeratePairs(samples, cfg.registerDelta, scorer);
+  const withinMean = mean(same);
+  const betweenMean = mean(diff);
+  const auc = aucFromDistances(same, diff);
+  const eer = equalErrorRate(same, diff);
+  const nSameRegister = diffSameRegister.length;
+  const sameRegisterAuc = nSameRegister ? aucFromDistances(same, diffSameRegister) : NaN;
+  const boot = nSameRegister
+    ? bootstrapSameRegisterAucLower(corpus, cfg.registerDelta, cfg.bootstrapB, cfg.ciAlpha, cfg.seed, scorer)
+    : { lower: NaN, reps: 0 };
+  const metrics = {
+    auc, sameRegisterAucCILower: boot.lower, nSameRegister, withinMean, betweenMean,
+  };
+  const { passes, failedChecks } = gateDecision(metrics, cfg);
+  return {
+    auc,
+    eer,
+    sameRegisterAuc,
+    sameRegisterAucCILower: boot.lower,
+    bootstrapReps: boot.reps,
+    withinMean,
+    betweenMean,
+    nWithin: same.length,
+    nBetween: diff.length,
+    nSameRegister,
+    cfg,
+    passes,
+    failedChecks,
+  };
+}
+export const __test = {
+  aucHigherPos, aucFromDistances, equalErrorRate, enumeratePairs, buildSamples, gateDecision,
+};

package/src/profile/eval/instrument-validation.test.mjs ADDED Viewed

@@ -0,0 +1,125 @@
+// Gate B v2 — Task T2: instrument-validation gate. Tests the GATE LOGIC and the
+// anti-bias selection rule on small fixtures. (The real >=150-pair / CI>=0.75 thresholds
+// are exercised at run time on the Reddit corpus, not here — here we prove the gate
+// REFUSES to pass when underpowered or below threshold, and never self-loosens.)
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { validateInstrument, __test } from './instrument-validation.mjs';
+const {
+  aucFromDistances, equalErrorRate, enumeratePairs, buildSamples, gateDecision,
+} = __test;
+const GATE_CFG = {
+  aucFloor: 0.80, sameRegisterCILower: 0.75, minSameRegister: 150,
+};
+const PASSING = {
+  auc: 0.88, sameRegisterAucCILower: 0.78, nSameRegister: 200, withinMean: 0.3, betweenMean: 0.5,
+};
+// Three stylistically distinct authors, 4 docs each.
+const FORMAL = [
+  'The proposal, however, must be examined with care, for the implications of the decision extend well beyond the present matter.',
+  'It is therefore evident that the committee, which has reviewed the whole of the evidence, will arrive at the conclusion in due course.',
+  'The argument rests upon the assumption that the conditions of the experiment were, in the main, properly controlled and recorded.',
+  'One must consider, moreover, the degree to which the results of the study may be generalized beyond the original population.',
+];
+const CASUAL = [
+  "yeah i just think you're gonna like this one, it's kinda fun and you don't really have to do much honestly.",
+  'ok so i tried it and it is like, totally fine? you just click the thing and it kinda works, no big deal.',
+  "i dunno, you could do it that way i guess, but honestly i'd just keep it simple, it is way easier that way.",
+  "lol yeah that's basically what i did too, you just gotta be patient and it'll sort itself out eventually.",
+];
+const TERSE_TECH = [
+  'Run the migration. Check the logs. If the index is missing, rebuild it and retry the job.',
+  'Patch the handler. Add a guard for the null case. Ship it behind the flag, then enable ten percent.',
+  'Cache is stale. Bump the version key. Invalidate on write. Measure the hit rate after deploy.',
+  'Lock contention on the queue. Shard by tenant. Add backpressure. Alert if depth exceeds the threshold.',
+];
+const CORPUS3 = [
+  { id: 'formal', docs: FORMAL },
+  { id: 'casual', docs: CASUAL },
+  { id: 'tech', docs: TERSE_TECH },
+];
+test('AUC helper: perfect separation = 1, reversed = 0, identical = 0.5', () => {
+  assert.equal(aucFromDistances([0.1, 0.2], [0.8, 0.9]), 1);
+  assert.equal(aucFromDistances([0.8, 0.9], [0.1, 0.2]), 0);
+  assert.equal(aucFromDistances([0.5, 0.5], [0.5, 0.5]), 0.5);
+});
+test('EER is 0 for perfectly separated distance distributions', () => {
+  assert.equal(equalErrorRate([0.1, 0.15, 0.2], [0.7, 0.8, 0.9]), 0);
+});
+test('betweenMean > withinMean on a 3-author x 4-doc fixture (metric discriminates)', () => {
+  const r = validateInstrument(CORPUS3);
+  assert.ok(r.betweenMean > r.withinMean, `between ${r.betweenMean.toFixed(3)} > within ${r.withinMean.toFixed(3)}`);
+  assert.ok(r.auc > 0.5, `auc ${r.auc.toFixed(3)} above chance`);
+});
+test('same-register slice returns nSameRegister>0 + finite AUC + bootstrap CI', () => {
+  // Two DIFFERENT formal authors (same register band, different func-word fingerprint).
+  const P = [
+    'The proposal, however, must be examined with the greatest care, for the consequences extend beyond the matter at hand.',
+    'It is therefore the case that the observer, having weighed the evidence, will tend toward the cautious conclusion.',
+  ];
+  const Q = [
+    'The findings are, consequently, of considerable importance; moreover, they bear directly upon the questions raised at the outset.',
+    'Whereas the earlier account emphasized the structural causes, the present analysis attends, hence, to the procedural ones.',
+  ];
+  const corpus2 = [{ id: 'P', docs: P }, { id: 'Q', docs: Q }];
+  const r = validateInstrument(corpus2, { validation: { bootstrapB: 300, minSameRegister: 1 } });
+  assert.ok(r.nSameRegister > 0, `nSameRegister ${r.nSameRegister} > 0`);
+  assert.ok(Number.isFinite(r.sameRegisterAuc), 'sameRegisterAuc finite');
+  assert.ok(Number.isFinite(r.sameRegisterAucCILower), 'CI lower finite');
+  assert.ok(r.bootstrapReps > 0, 'bootstrap produced replicates');
+});
+test('GATE refuses to pass when nSameRegister < 150 (underpowered), reported honestly', () => {
+  const r = validateInstrument(CORPUS3); // tiny fixture, far below 150
+  assert.equal(r.passes, false);
+  assert.ok(r.failedChecks.some((c) => /nSameRegister/.test(c)), `failedChecks names the gap: ${r.failedChecks}`);
+});
+test('gateDecision PASSES only when ALL thresholds are met (not vacuously false)', () => {
+  assert.equal(gateDecision(PASSING, GATE_CFG).passes, true);
+});
+test('gateDecision FAILS on diverse AUC below floor — threshold not loosened', () => {
+  const g = gateDecision({ ...PASSING, auc: 0.79 }, GATE_CFG);
+  assert.equal(g.passes, false);
+  assert.ok(g.failedChecks.some((c) => /diverse AUC/.test(c)), g.failedChecks.join('; '));
+});
+test('gateDecision FAILS on same-register CI-lower below floor — the PRIMARY gate', () => {
+  const g = gateDecision({ ...PASSING, sameRegisterAucCILower: 0.74 }, GATE_CFG);
+  assert.equal(g.passes, false);
+  assert.ok(g.failedChecks.some((c) => /same-register AUC CI-lower/.test(c)), g.failedChecks.join('; '));
+});
+test('gateDecision FAILS on NaN same-register CI-lower (no same-register pairs found)', () => {
+  const g = gateDecision({ ...PASSING, sameRegisterAucCILower: NaN }, GATE_CFG);
+  assert.equal(g.passes, false);
+  assert.ok(g.failedChecks.some((c) => /same-register AUC CI-lower/.test(c)));
+});
+test('gateDecision FAILS on underpowered nSameRegister (<150), never auto-loosened', () => {
+  const g = gateDecision({ ...PASSING, nSameRegister: 149 }, GATE_CFG);
+  assert.equal(g.passes, false);
+  assert.ok(g.failedChecks.some((c) => /nSameRegister/.test(c)));
+});
+test('SELECTION-BIAS GUARD: same-register membership tracks REGISTER only, not authorship distance', () => {
+  // X and Y: near-identical register (both terse imperative), different authorship content.
+  const xText = 'Run it. Check it. Ship it.';
+  const yText = 'Stop it. Fix it. Test it.';
+  // Z: very different register (long formal).
+  const zText = 'The committee, however, having deliberated at considerable length upon the whole of the available evidence, ultimately resolved to defer the decision until a later and more convenient occasion.';
+  const samples = buildSamples([{ id: 'X', docs: [xText] }, { id: 'Y', docs: [yText] }, { id: 'Z', docs: [zText] }]);
+  const { diffSameRegister, diff } = enumeratePairs(samples, 0.15);
+  // X-Y (same register) is included; pairs involving Z (different register) are not all included.
+  assert.ok(diffSameRegister.length >= 1, 'at least the same-register X-Y pair is included');
+  assert.ok(diffSameRegister.length < diff.length, 'the register-distant pairs (with Z) are excluded');
+});

package/src/profile/eval/multi-subject-harness.mjs ADDED Viewed

@@ -0,0 +1,106 @@
+// multi-subject-harness.mjs — Gate B v2, Task T4. Runs the arms for every subject and
+// scores each arm's output against the subject's HELD-OUT TEST fingerprint.
+//
+// Arms:
+//   baseline     — no style brief ('')  → anchor
+//   derived      — STYLE-AXIS-BAND-ONLY brief built from TRAIN register (no numbers, no
+//                  raw user prose). Verdict-bearing.
+//   fewShotOracle— raw OWN_TRAIN exemplars injected as "match this voice" (public corpus
+//                  only — see operator decision 1). The TRUE instrument ceiling. Verdict-bearing.
+//
+// Circularity kills enforced here:
+//   * Briefs are built from persona.trainDocs ONLY. The harness NEVER touches testDocs in a
+//     brief builder (AST guard test) and NEVER imports styleTargetFromAxes (AST guard test).
+//   * assertBriefNonLeaky throws if a non-baseline brief contains a verbatim OWN_test
+//     document or sits inside the func+trigram leak floor of OWN_test (prose-leak guard C1').
+//   * Scoring is PER-SUBJECT aggregated: all of a subject's probe outputs for an arm are
+//     concatenated into ONE authorship vector, so the high-dim func/trigram vector is stable
+//     (not per-probe noise).
+import { styleVector, fullStyleVector, fullStyleDistance } from './stylometry.js';
+export const ARMS = ['baseline', 'derived', 'fewShotOracle'];
+export const VERDICT_ARMS = ['derived', 'fewShotOracle']; // baseline is the anchor, not a verdict
+export const DEFAULT_PROBES = [
+  'Write a short note about your plans for the coming week.',
+  'Describe, in your own words, how you would approach a new project.',
+  'Give a brief reaction to a change a teammate just proposed.',
+];
+// Map the 9-axis register vector to neutral BAND words. No numbers; no raw user prose.
+function describeBands(reg) {
+  const band = (v, lo, hi, low, mid, high) => (v < lo ? low : v > hi ? high : mid);
+  return [
+    `length ${band(reg.terseness, 0.4, 0.6, 'expansive', 'moderate', 'very terse')}`,
+    `tone ${band(reg.formality, 0.15, 0.4, 'casual', 'neutral', 'formal')}`,
+    reg.emojiRate > 0.08 ? 'uses emoji' : 'no emoji',
+    `punctuation ${band(reg.punctProfile, 0.25, 0.5, 'sparse', 'moderate', 'heavy')}`,
+    `hedging ${band(reg.hedgeRate, 0.1, 0.3, 'direct', 'some', 'frequent')}`,
+  ].join('; ');
+}
+// Deterministic exemplar selection from TRAIN docs (longest-first for signal density).
+function pickExemplars(trainDocs, n) {
+  return [...trainDocs].sort((a, b) => b.length - a.length).slice(0, n);
+}
+// buildBriefs(persona, cfg) → { baseline, derived, fewShotOracle }. TRAIN-ONLY by
+// construction — references persona.trainDocs and never persona.testDocs.
+export function buildBriefs(persona, cfg = {}) {
+  const trainText = persona.trainDocs.join('\n');
+  const reg = styleVector(trainText);
+  const derived = `Write in this style — ${describeBands(reg)}.`;
+  const exemplars = pickExemplars(persona.trainDocs, cfg.nExemplars || 2);
+  const fewShotOracle = `Match the voice of these writing samples:\n${exemplars.map((e) => `"""${e}"""`).join('\n')}`;
+  return { baseline: '', derived, fewShotOracle };
+}
+const FUNC_TRI_WEIGHTS = { register: 0, func: 0.67, tri: 0.33, punct: 0 };
+// assertBriefNonLeaky(brief, persona, cfg): a non-baseline brief must not contain OWN_test
+// content. (1) no verbatim OWN_test document; (2) not inside the func+trigram leak floor of
+// OWN_test (catches near-verbatim test prose). Train exemplars (disjoint docs) clear this.
+export function assertBriefNonLeaky(brief, persona, cfg = {}) {
+  if (!brief) return; // baseline
+  const leakFloor = cfg.leakFloor ?? 0.02;
+  for (const td of persona.testDocs) {
+    if (td.length > 20 && brief.includes(td)) {
+      throw new Error(`brief leaks a verbatim OWN_test document (persona ${persona.id})`);
+    }
+  }
+  const d = fullStyleDistance(brief, persona.testDocs.join('\n'), FUNC_TRI_WEIGHTS);
+  if (d < leakFloor) {
+    throw new Error(`brief too close to OWN_test func+tri signature (${d.toFixed(3)} < ${leakFloor}, persona ${persona.id})`);
+  }
+}
+function buildPrompt(brief, task) {
+  return (brief ? `${brief}\n\n` : '') + `Task: ${task}`;
+}
+// runHarness(personas, { transport, probes, cfg }) → per-(subject, arm) aggregated result.
+// transport: async (prompt) => string. Real cloud transport at run time; a style-faithful
+// fake in tests.
+export async function runHarness(personas, opts) {
+  const { transport, probes = DEFAULT_PROBES, cfg = {} } = opts;
+  if (typeof transport !== 'function') throw new Error('runHarness requires a transport(prompt)=>text');
+  const results = {};
+  for (const p of personas) {
+    const briefs = buildBriefs(p, cfg);
+    for (const arm of ARMS) if (arm !== 'baseline') assertBriefNonLeaky(briefs[arm], p, cfg);
+    results[p.id] = {};
+    for (const arm of ARMS) {
+      const outputs = [];
+      for (const task of probes) {
+        // eslint-disable-next-line no-await-in-loop
+        outputs.push(String(await transport(buildPrompt(briefs[arm], task))));
+      }
+      const aggregated = outputs.join('\n'); // PER-SUBJECT aggregation → one vector/arm
+      const vector = fullStyleVector(aggregated);
+      results[p.id][arm] = { vector, distOwn: fullStyleDistance(vector, p.fingerprint), outputs };
+    }
+  }
+  return { personaIds: personas.map((p) => p.id), arms: ARMS, results };
+}
+export const __test = { describeBands, pickExemplars, buildPrompt };

package/src/profile/eval/multi-subject-harness.test.mjs ADDED Viewed

@@ -0,0 +1,99 @@
+// Gate B v2 — Task T4: multi-subject harness. The load-bearing guards: briefs are
+// TRAIN-only (dataflow + AST), the harness never imports styleTargetFromAxes, the leak
+// guard catches OWN_test prose in a brief, scoring is per-subject aggregated, and a
+// faithful agent makes the oracle beat baseline while a constant agent fabricates NO
+// advantage (the harness can't manufacture an arm difference on its own).
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import {
+  buildBriefs, assertBriefNonLeaky, runHarness, ARMS,
+} from './multi-subject-harness.mjs';
+import { generatePersonaText } from './synthetic-personas.js';
+import { fullStyleVector } from './stylometry.js';
+// Formal personas (archetype 0), distinct content per persona.
+function formalPersona(id, seed) {
+  const trainDocs = [generatePersonaText(0, seed + 1, 18), generatePersonaText(0, seed + 2, 18)];
+  const testDocs = [generatePersonaText(0, seed + 9001, 14)];
+  return {
+    id, synthetic: false, headlineEligible: true, trainDocs, testDocs,
+    fingerprint: fullStyleVector(testDocs.join('\n')),
+  };
+}
+const PERSONAS = [formalPersona('p1', 100), formalPersona('p2', 200), formalPersona('p3', 300)];
+const RELAX = { cfg: { leakFloor: 0.001 } }; // templated synthetic train≈test; relax 2nd-tier floor
+// Faithful agent: echoes injected exemplars (→ author style), else a fixed CASUAL default.
+function faithfulAgent(prompt) {
+  const ex = prompt.match(/"""([\s\S]*?)"""/g);
+  if (ex) return ex.map((s) => s.replace(/"""/g, '')).join(' ');
+  return 'yeah it just kinda works i think, pretty simple honestly, no big deal at all and you can just go with it.';
+}
+// Constant agent: same output for every prompt — no arm can differ.
+function constantAgent() { return 'the result, however, meets the stated goal; therefore the process follows the plan.'; }
+test('buildBriefs: baseline is empty, arms differ only by brief, TRAIN-derived', () => {
+  const b = buildBriefs(PERSONAS[0]);
+  assert.equal(b.baseline, '');
+  assert.ok(b.derived.length > 0 && b.fewShotOracle.length > 0);
+  // fewShotOracle contains a TRAIN doc, never a TEST doc
+  assert.ok(PERSONAS[0].trainDocs.some((d) => b.fewShotOracle.includes(d)), 'oracle injects a train exemplar');
+  assert.ok(!PERSONAS[0].testDocs.some((d) => b.fewShotOracle.includes(d)), 'oracle never injects a test doc');
+});
+test('AST GUARD: buildBriefs references trainDocs and never testDocs; no styleTargetFromAxes', () => {
+  const src = fs.readFileSync(fileURLToPath(new URL('./multi-subject-harness.mjs', import.meta.url)), 'utf8');
+  // strip comments so the guard checks CODE, not its own documentation
+  const code = src.replace(/\/\/.*$/gm, '').replace(/\/\*[\s\S]*?\*\//g, '');
+  assert.ok(!/styleTargetFromAxes/.test(code), 'harness code must not import/use styleTargetFromAxes');
+  // extract the buildBriefs body by stable markers (next top-level statement)
+  const start = src.indexOf('export function buildBriefs');
+  const end = src.indexOf('const FUNC_TRI_WEIGHTS', start);
+  assert.ok(start !== -1 && end !== -1 && end > start, 'located buildBriefs body');
+  const body = src.slice(start, end);
+  assert.ok(/trainDocs/.test(body), 'buildBriefs uses trainDocs');
+  assert.ok(!/testDocs/.test(body), 'buildBriefs never touches testDocs');
+});
+test('LEAK GUARD: verbatim OWN_test prose in a brief throws; a generic brief passes', () => {
+  const p = PERSONAS[0];
+  // positive control: brief IS the test text → leaky
+  assert.throws(() => assertBriefNonLeaky(p.testDocs[0], p), /leak|too close/i);
+  // the derived band brief (generic English) is func/tri-far from the persona's test → ok
+  const briefs = buildBriefs(p);
+  assert.doesNotThrow(() => assertBriefNonLeaky(briefs.derived, p));
+});
+test('per-subject AGGREGATION: one authorship vector per (subject, arm)', async () => {
+  const out = await runHarness(PERSONAS, { transport: faithfulAgent, ...RELAX });
+  assert.deepEqual(out.arms, ARMS);
+  for (const id of out.personaIds) {
+    for (const arm of ARMS) {
+      const r = out.results[id][arm];
+      assert.equal(r.vector.__full, true, 'a single full vector per arm');
+      assert.equal(r.outputs.length, 3, 'three probe outputs aggregated into the one vector');
+      assert.ok(Number.isFinite(r.distOwn));
+    }
+  }
+});
+test('FAITHFUL agent: fewShotOracle beats baseline (mean distance to OWN test)', async () => {
+  const out = await runHarness(PERSONAS, { transport: faithfulAgent, ...RELAX });
+  const mean = (arm) => out.personaIds.reduce((s, id) => s + out.results[id][arm].distOwn, 0) / out.personaIds.length;
+  assert.ok(mean('fewShotOracle') < mean('baseline'), `oracle ${mean('fewShotOracle').toFixed(3)} < baseline ${mean('baseline').toFixed(3)}`);
+});
+test('CONSTANT agent: harness fabricates NO arm advantage (all arms equal distance)', async () => {
+  const out = await runHarness(PERSONAS, { transport: constantAgent, ...RELAX });
+  for (const id of out.personaIds) {
+    assert.equal(out.results[id].fewShotOracle.distOwn, out.results[id].baseline.distOwn);
+    assert.equal(out.results[id].derived.distOwn, out.results[id].baseline.distOwn);
+  }
+});
+test('runHarness requires a transport function', async () => {
+  await assert.rejects(() => runHarness(PERSONAS, {}), /transport/);
+});

package/src/profile/eval/personas.test.mjs ADDED Viewed

@@ -0,0 +1,83 @@
+// Gate B v2 — Task T3: persona loaders. Real authors (document-disjoint train/test,
+// seed-only selection) + synthetic (downgrade-only). The selection-bias guard is the
+// load-bearing test: persona order must NOT depend on the authorship distance under test.
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import {
+  splitAuthorDocs, makePersona, loadRealPersonas,
+} from './real-personas.mjs';
+import { makePersonas, generatePersonaText } from './synthetic-personas.js';
+import { fullStyleVector, fullStyleDistance } from './stylometry.js';
+const SMALL = { minTrainTokens: 15, minTestTokens: 8, seed: 1 };
+function makeAuthor(id, variant) {
+  // 4 unique docs, ~13 tokens each — clears the small floors with a disjoint split.
+  return {
+    id,
+    docs: Array.from({ length: 4 }, (_, i) => `${variant} document number ${i} with several extra filler words placed here to clear the token floor.`),
+  };
+}
+test('splitAuthorDocs produces DISJOINT train/test slices that clear the floors', () => {
+  const a = makeAuthor('a', 'alpha');
+  const s = splitAuthorDocs(a, SMALL);
+  assert.ok(s.trainTokens >= SMALL.minTrainTokens);
+  assert.ok(s.testTokens >= SMALL.minTestTokens);
+  const inter = s.trainDocs.filter((d) => s.testDocs.includes(d));
+  assert.equal(inter.length, 0, 'no document appears in both slices');
+  assert.ok(s.trainDocs.length + s.testDocs.length === 4, 'every doc allocated exactly once');
+});
+test('makePersona THROWS on an author too short to split with power', () => {
+  const tiny = { id: 'tiny', docs: ['hi there', 'ok then'] };
+  assert.throws(() => makePersona(tiny, SMALL), /too short|< \d+ tokens/);
+});
+test('persona.fingerprint is the held-out TEST fingerprint (not train)', () => {
+  const p = makePersona(makeAuthor('a', 'alpha'), SMALL);
+  assert.equal(p.fingerprint.__full, true);
+  const fromTest = fullStyleVector(p.testDocs.join('\n'));
+  assert.equal(fullStyleDistance(p.fingerprint, fromTest), 0, 'fingerprint == fullStyleVector(testDocs)');
+  assert.equal(p.synthetic, false);
+  assert.equal(p.headlineEligible, true);
+});
+test('SELECTION-BIAS GUARD: persona order is a pure function of seed, not of content/distance', () => {
+  const ids = ['zeta', 'alpha', 'mike', 'delta'];
+  const corpusA = ids.map((id) => makeAuthor(id, 'alpha-content'));
+  // SAME ids, DIFFERENT text → different mutual fullStyleDistances
+  const corpusB = ids.map((id) => makeAuthor(id, 'totally different wording entirely'));
+  const orderA = loadRealPersonas(corpusA, SMALL).map((p) => p.id);
+  const orderB = loadRealPersonas(corpusB, SMALL).map((p) => p.id);
+  assert.deepEqual(orderA, orderB, 'identical ids ⇒ identical order regardless of content');
+  // and changing the seed changes the order (it really is seed-driven)
+  const orderSeed2 = loadRealPersonas(corpusA, { ...SMALL, seed: 99 }).map((p) => p.id);
+  assert.notDeepEqual(orderA, orderSeed2);
+});
+test('loadRealPersonas THROWS (abort-and-ingest) when too few authors qualify', () => {
+  const corpus = [makeAuthor('a', 'x'), { id: 'short', docs: ['too', 'short'] }];
+  assert.throws(() => loadRealPersonas(corpus, { ...SMALL, nAuthors: 2 }), /ingest more authors/);
+});
+test('synthetic personas are stamped downgrade-only and deterministic', () => {
+  const p1 = makePersonas(4, 7);
+  const p2 = makePersonas(4, 7);
+  assert.equal(p1.length, 4);
+  for (const p of p1) {
+    assert.equal(p.synthetic, true);
+    assert.equal(p.headlineEligible, false);
+    assert.equal(p.fingerprint.__full, true);
+  }
+  // deterministic by seed
+  assert.deepEqual(p1.map((p) => p.testDocs), p2.map((p) => p.testDocs));
+  // mutually distinct fingerprints (different archetypes/content)
+  assert.ok(fullStyleDistance(p1[0].fingerprint, p1[1].fingerprint) > 0);
+});
+test('generatePersonaText: same style+content seed is deterministic; train/test differ', () => {
+  assert.equal(generatePersonaText(0, 123, 5), generatePersonaText(0, 123, 5));
+  assert.notEqual(generatePersonaText(0, 1, 5), generatePersonaText(0, 9001, 5));
+});