@ijfw/memory-server 1.5.6 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/bin/ijfw-dashboard +20 -1
  2. package/package.json +4 -3
  3. package/src/audit-roster.js +89 -12
  4. package/src/brain/tiered-llm.js +57 -7
  5. package/src/cross-orchestrator-cli.js +344 -4
  6. package/src/cross-project-search.js +39 -1
  7. package/src/dashboard-server.js +7 -1
  8. package/src/dream/runner.mjs +560 -8
  9. package/src/handlers/brain-handler.js +101 -1
  10. package/src/importers/discover.js +1 -1
  11. package/src/memory/bench-metrics.js +289 -0
  12. package/src/memory/benchmark.js +1 -1
  13. package/src/memory/search.js +53 -1
  14. package/src/orchestrator/plan-checker.js +1 -1
  15. package/src/profile/audit.js +671 -0
  16. package/src/profile/capture.js +871 -0
  17. package/src/profile/derive-dialectic.js +242 -0
  18. package/src/profile/derive-heuristic.js +733 -0
  19. package/src/profile/derive.js +156 -0
  20. package/src/profile/egress.js +306 -0
  21. package/src/profile/eval/build-real-probes.mjs +197 -0
  22. package/src/profile/eval/corpus-from-reddit.mjs +166 -0
  23. package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
  24. package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
  25. package/src/profile/eval/gate-b-behavior.mjs +420 -0
  26. package/src/profile/eval/gate-b-decision-run.mjs +171 -0
  27. package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
  28. package/src/profile/eval/gate-b-run.mjs +417 -0
  29. package/src/profile/eval/gate-b-run.test.mjs +204 -0
  30. package/src/profile/eval/gate-c-capture.mjs +323 -0
  31. package/src/profile/eval/harness.mjs +551 -0
  32. package/src/profile/eval/instrument-validation.mjs +248 -0
  33. package/src/profile/eval/instrument-validation.test.mjs +125 -0
  34. package/src/profile/eval/multi-subject-harness.mjs +106 -0
  35. package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
  36. package/src/profile/eval/personas.test.mjs +83 -0
  37. package/src/profile/eval/plumbing.test.mjs +69 -0
  38. package/src/profile/eval/prereg.mjs +130 -0
  39. package/src/profile/eval/prereg.test.mjs +78 -0
  40. package/src/profile/eval/real-corpus.test.mjs +103 -0
  41. package/src/profile/eval/real-personas.mjs +109 -0
  42. package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
  43. package/src/profile/eval/run-real-corpus.mjs +358 -0
  44. package/src/profile/eval/slug-quality.mjs +464 -0
  45. package/src/profile/eval/stylometry-features.js +85 -0
  46. package/src/profile/eval/stylometry-reference.js +16 -0
  47. package/src/profile/eval/stylometry.js +224 -0
  48. package/src/profile/eval/stylometry.test.mjs +103 -0
  49. package/src/profile/eval/synthetic-personas.js +91 -0
  50. package/src/profile/eval/verifier-features.mjs +170 -0
  51. package/src/profile/eval/verifier-logreg.mjs +74 -0
  52. package/src/profile/eval/verifier-pair.mjs +122 -0
  53. package/src/profile/eval/verifier-reference.mjs +68 -0
  54. package/src/profile/eval/verifier-scorer.mjs +30 -0
  55. package/src/profile/eval/wrong-target-control.mjs +168 -0
  56. package/src/profile/eval/wrong-target-control.test.mjs +124 -0
  57. package/src/profile/exemplar-capture.js +232 -0
  58. package/src/profile/exemplar-retrieve.js +138 -0
  59. package/src/profile/exemplar-store.js +314 -0
  60. package/src/profile/lock.js +64 -0
  61. package/src/profile/merge.js +624 -0
  62. package/src/profile/path-policy.js +213 -0
  63. package/src/profile/precision-stamp.mjs +151 -0
  64. package/src/profile/render-brief.js +717 -0
  65. package/src/profile/schema.js +244 -0
  66. package/src/profile/sensitivity.js +249 -0
  67. package/src/profile/serve.js +345 -0
  68. package/src/profile/store.js +261 -0
  69. package/src/profile/telemetry.js +289 -0
  70. package/src/recovery/checkpoint.js +7 -1
  71. package/src/server.js +185 -14
  72. package/src/.registry-meta-key.pem +0 -3
@@ -0,0 +1,224 @@
1
+ // stylometry.js — a richer, judge-free style metric for the Gate B research
2
+ // program (docs/superpowers/plans/2026-06-08-gate-b-research-program.md, Phase 1.1).
3
+ //
4
+ // WHY: the old Gate B style metric was 3 shallow axes (length-as-terseness + a
5
+ // 12-word formality keyword count + an emoji bit). A skeptic correctly rejects
6
+ // "matched your length profile" as evidence of "writes in your voice", and a
7
+ // model trivially games it via maxTokens alone. This vector adds axes that move
8
+ // INDEPENDENTLY of length — lexical diversity, sentence-length variance,
9
+ // punctuation signature, hedging and contraction rates — so a real voice match
10
+ // must move several dimensions, not just shorten the output.
11
+ //
12
+ // Everything here is deterministic and dependency-free (no LLM, no network).
13
+ // All nine features are normalized to [0,1] so styleDistance is a well-behaved
14
+ // mean-absolute-difference in [0,1].
15
+
16
+ import {
17
+ FUNCTION_WORDS, TRIGRAM_KEYS, PUNCT_KEYS,
18
+ REF_MEAN_FUNC, REF_SD_FUNC, REF_MEAN_TRI, REF_SD_TRI, REF_MEAN_PUNCT, REF_SD_PUNCT,
19
+ } from './stylometry-reference.js';
20
+ import { relFreqFunc, relFreqTrigrams, relFreqPunct } from './stylometry-features.js';
21
+
22
+ const FORMAL_MARKERS = [
23
+ 'however', 'therefore', 'furthermore', 'moreover', 'nevertheless',
24
+ 'consequently', 'accordingly', 'thus', 'hence', 'whereas', 'regarding',
25
+ 'additionally', 'subsequently',
26
+ ];
27
+ const HEDGES = [
28
+ 'maybe', 'perhaps', 'possibly', 'probably', 'might', 'could', 'i think',
29
+ 'i guess', 'sort of', 'kind of', 'somewhat', 'arguably', 'presumably',
30
+ 'it seems', 'roughly', 'approximately',
31
+ ];
32
+ // Conservative emoji + pictograph ranges (no external dep).
33
+ // eslint-disable-next-line security/detect-unsafe-regex, no-misleading-character-class -- character class, no backtracking; intentional emoji + variation-selector (U+FE0F) class, match semantics must not change.
34
+ const EMOJI_RE = /[\u{1F300}-\u{1FAFF}\u{2600}-\u{27BF}\u{2190}-\u{21FF}\u{2B00}-\u{2BFF}\u{FE0F}]/gu;
35
+ const CONTRACTION_RE = /\b\w+'(?:s|t|re|ve|ll|d|m)\b/gi;
36
+
37
+ function saturate(x, scale) {
38
+ if (!(x > 0) || !(scale > 0)) return 0;
39
+ return x / (x + scale);
40
+ }
41
+ function clamp01(x) {
42
+ return x < 0 ? 0 : x > 1 ? 1 : x;
43
+ }
44
+ function words(text) {
45
+ return (String(text).toLowerCase().match(/[a-z0-9']+/gi) || []);
46
+ }
47
+ function sentences(text) {
48
+ return String(text).split(/[.!?]+(?:\s|$)/).map((s) => s.trim()).filter(Boolean);
49
+ }
50
+ function countMatches(text, list) {
51
+ const lc = String(text).toLowerCase();
52
+ let n = 0;
53
+ for (const term of list) {
54
+ if (term.includes(' ')) {
55
+ // phrase: count non-overlapping occurrences
56
+ let idx = 0;
57
+ while ((idx = lc.indexOf(term, idx)) !== -1) { n += 1; idx += term.length; }
58
+ } else {
59
+ const re = new RegExp(`\\b${term}\\b`, 'g');
60
+ n += (lc.match(re) || []).length;
61
+ }
62
+ }
63
+ return n;
64
+ }
65
+
66
+ // The nine-axis style vector. Each component is in [0,1].
67
+ export function styleVector(text) {
68
+ const s = String(text || '');
69
+ const w = words(s);
70
+ const nW = w.length;
71
+ const sents = sentences(s);
72
+ const chars = s.length || 1;
73
+
74
+ // 1. terseness — short output = terse. (Length-driven, kept for continuity.)
75
+ const terseness = 1 - saturate(chars, 240);
76
+
77
+ // 2. formality — formal connective density (per word), saturated.
78
+ const formality = saturate(countMatches(s, FORMAL_MARKERS) / Math.max(nW, 1), 0.03);
79
+
80
+ // 3. emojiRate — emoji per char, saturated.
81
+ const emojiRate = saturate((s.match(EMOJI_RE) || []).length / chars, 0.02);
82
+
83
+ // 4. typeTokenRatio — lexical diversity (unique/total). Already [0,1].
84
+ const ttr = nW ? new Set(w).size / nW : 0;
85
+
86
+ // 5. meanSentenceLen — words per sentence, saturated at ~22.
87
+ const sentLens = sents.map((x) => words(x).length);
88
+ const meanSent = sentLens.length ? sentLens.reduce((a, b) => a + b, 0) / sentLens.length : 0;
89
+ const meanSentenceLen = saturate(meanSent, 22);
90
+
91
+ // 6. sentenceLenVar — burstiness of sentence length (stdev/mean), saturated.
92
+ let sentenceLenVar = 0;
93
+ if (sentLens.length > 1 && meanSent > 0) {
94
+ const variance = sentLens.reduce((a, b) => a + (b - meanSent) ** 2, 0) / sentLens.length;
95
+ sentenceLenVar = saturate(Math.sqrt(variance) / meanSent, 0.6);
96
+ }
97
+
98
+ // 7. punctProfile — punctuation density (per char), saturated.
99
+ const punct = (s.match(/[!?;:,—–\-()"]/g) || []).length;
100
+ const punctProfile = saturate(punct / chars, 0.06);
101
+
102
+ // 8. hedgeRate — hedging-marker density (per word), saturated.
103
+ const hedgeRate = saturate(countMatches(s, HEDGES) / Math.max(nW, 1), 0.04);
104
+
105
+ // 9. contractionRate — contractions per word, saturated.
106
+ const contractionRate = saturate((s.match(CONTRACTION_RE) || []).length / Math.max(nW, 1), 0.05);
107
+
108
+ return {
109
+ terseness: clamp01(terseness),
110
+ formality: clamp01(formality),
111
+ emojiRate: clamp01(emojiRate),
112
+ typeTokenRatio: clamp01(ttr),
113
+ meanSentenceLen: clamp01(meanSentenceLen),
114
+ sentenceLenVar: clamp01(sentenceLenVar),
115
+ punctProfile: clamp01(punctProfile),
116
+ hedgeRate: clamp01(hedgeRate),
117
+ contractionRate: clamp01(contractionRate),
118
+ };
119
+ }
120
+
121
+ export const STYLE_AXES = [
122
+ 'terseness', 'formality', 'emojiRate', 'typeTokenRatio', 'meanSentenceLen',
123
+ 'sentenceLenVar', 'punctProfile', 'hedgeRate', 'contractionRate',
124
+ ];
125
+
126
+ // Mean absolute difference over the nine axes → [0,1]. Accepts either raw text
127
+ // or pre-computed vectors. Optional per-axis weights (default equal).
128
+ export function styleDistance(a, b, weights = null) {
129
+ const va = (a && typeof a === 'object') ? a : styleVector(a);
130
+ const vb = (b && typeof b === 'object') ? b : styleVector(b);
131
+ let sum = 0;
132
+ let wsum = 0;
133
+ for (const ax of STYLE_AXES) {
134
+ const wt = weights && Number.isFinite(weights[ax]) ? weights[ax] : 1;
135
+ sum += wt * Math.abs((va[ax] ?? 0) - (vb[ax] ?? 0));
136
+ wsum += wt;
137
+ }
138
+ return wsum ? sum / wsum : 0;
139
+ }
140
+
141
+ // ---------------------------------------------------------------------------
142
+ // AUTHORSHIP LAYER (Gate B v2) — function-word / char-trigram / punct-n-gram
143
+ // stylometry. The 9-axis styleVector above captures REGISTER (terse/formal/emoji);
144
+ // these sub-vectors capture INDIVIDUAL voice — the idiosyncratic markers that
145
+ // separate two same-register strangers. fullStyleDistance combines both.
146
+ // ---------------------------------------------------------------------------
147
+
148
+ // z-scores beyond this magnitude are clamped — guards against a function word that is
149
+ // rare-or-absent in the (small) reference corpus producing a blow-up component that
150
+ // would dominate the cosine direction.
151
+ const Z_CAP = 8;
152
+
153
+ // Standardize a raw relative-frequency vector against the FROZEN reference mean/sd.
154
+ // CRITICAL (anti-circularity): refMean/refSd are the imported constants from
155
+ // stylometry-reference.js — they are NEVER computed from `raw` (the scored text). The
156
+ // only operations on `raw` here are subtraction of a constant and division by a
157
+ // constant; no mean/variance is taken over the input. (Enforced by the anti-recompute
158
+ // guard test.)
159
+ function zStandardize(raw, refMean, refSd) {
160
+ const out = Array.from({ length: raw.length });
161
+ for (let i = 0; i < raw.length; i += 1) {
162
+ let z = (raw[i] - refMean[i]) / refSd[i];
163
+ if (z > Z_CAP) z = Z_CAP; else if (z < -Z_CAP) z = -Z_CAP;
164
+ out[i] = z;
165
+ }
166
+ return out;
167
+ }
168
+
169
+ // authorVector(text) → { func[180], tri[250], punct[24] } z-standardized sub-vectors.
170
+ // Pure function of (text, frozen constants): independent of any other scored text.
171
+ export function authorVector(text) {
172
+ return {
173
+ func: zStandardize(relFreqFunc(text, FUNCTION_WORDS), REF_MEAN_FUNC, REF_SD_FUNC),
174
+ tri: zStandardize(relFreqTrigrams(text, TRIGRAM_KEYS), REF_MEAN_TRI, REF_SD_TRI),
175
+ punct: zStandardize(relFreqPunct(text, PUNCT_KEYS), REF_MEAN_PUNCT, REF_SD_PUNCT),
176
+ };
177
+ }
178
+
179
+ // Frozen composite weights (spec §1.4). func-word distribution carries the most
180
+ // individual-voice signal, so it dominates; register is a minority contributor.
181
+ export const STYLO_WEIGHTS = Object.freeze({
182
+ register: 0.20, func: 0.50, tri: 0.25, punct: 0.05,
183
+ });
184
+
185
+ // fullStyleVector(text) → { register (9-axis), func, tri, punct }. Accepts an already
186
+ // computed full vector (idempotent) so callers can precompute once and reuse.
187
+ export function fullStyleVector(text) {
188
+ if (text && typeof text === 'object' && text.__full === true) return text;
189
+ const av = authorVector(text);
190
+ return { __full: true, register: styleVector(text), func: av.func, tri: av.tri, punct: av.punct };
191
+ }
192
+
193
+ // Cosine distance mapped to [0,1]: (1 - cos)/2. Zero-norm handling: both zero → 0
194
+ // (identical/empty), exactly one zero → 0.5 (maximally undecided).
195
+ function cosineDistance(u, v) {
196
+ let dot = 0; let nu = 0; let nv = 0;
197
+ for (let i = 0; i < u.length; i += 1) { dot += u[i] * v[i]; nu += u[i] * u[i]; nv += v[i] * v[i]; }
198
+ if (nu === 0 && nv === 0) return 0;
199
+ if (nu === 0 || nv === 0) return 0.5;
200
+ let cos = dot / (Math.sqrt(nu) * Math.sqrt(nv));
201
+ cos = cos > 1 ? 1 : cos < -1 ? -1 : cos;
202
+ const d = (1 - cos) / 2;
203
+ return d < 1e-12 ? 0 : d; // snap float dust so d(x,x) is exactly 0
204
+
205
+ }
206
+
207
+ // fullStyleDistance(a, b) → weighted authorship+register distance in [0,1]. Accepts
208
+ // raw text OR fullStyleVector objects for either argument.
209
+ export function fullStyleDistance(a, b, weights = STYLO_WEIGHTS) {
210
+ const fa = fullStyleVector(a);
211
+ const fb = fullStyleVector(b);
212
+ const registerD = styleDistance(fa.register, fb.register);
213
+ const funcD = cosineDistance(fa.func, fb.func);
214
+ const triD = cosineDistance(fa.tri, fb.tri);
215
+ const punctD = cosineDistance(fa.punct, fb.punct);
216
+ const w = weights;
217
+ const wsum = w.register + w.func + w.tri + w.punct;
218
+ return wsum ? (w.register * registerD + w.func * funcD + w.tri * triD + w.punct * punctD) / wsum : 0;
219
+ }
220
+
221
+ export default {
222
+ styleVector, styleDistance, STYLE_AXES,
223
+ authorVector, fullStyleVector, fullStyleDistance, STYLO_WEIGHTS,
224
+ };
@@ -0,0 +1,103 @@
1
+ // Gate B v2 — authorship-layer stylometry (Task T1). These tests enforce the
2
+ // properties the design audit made load-bearing: the metric is a sane distance, it
3
+ // carries INDIVIDUAL-voice signal that survives length (not just register), and the
4
+ // z-standardization is provably NOT recomputed from the scored text (the anti-circularity
5
+ // guard — both structural source-scan and behavioral independence).
6
+
7
+ import { test } from 'node:test';
8
+ import assert from 'node:assert/strict';
9
+ import fs from 'node:fs';
10
+ import { fileURLToPath } from 'node:url';
11
+ import {
12
+ authorVector, fullStyleVector, fullStyleDistance, STYLO_WEIGHTS,
13
+ } from './stylometry.js';
14
+ import {
15
+ FUNCTION_WORDS, TRIGRAM_KEYS, PUNCT_KEYS, REFERENCE_SOURCE,
16
+ REF_MEAN_FUNC, REF_SD_FUNC,
17
+ } from './stylometry-reference.js';
18
+
19
+ // Same-author (formal/expository), two lengths.
20
+ const A_SHORT = 'The matter, however, is one which must be considered with the greatest of care; the consequences of a hasty decision are, as the records of the past have shown, considerable.';
21
+ const A_LONG = 'The matter, however, is one which must be considered with the greatest of care, for the consequences of a hasty decision are, as the records of the past have repeatedly shown, considerable. It is therefore the case that the wise observer, having weighed the whole of the evidence which is before him, will tend toward the conclusion that nothing of importance ought to be undertaken until the question itself has been examined with the patience that such a matter properly demands.';
22
+ // Different author (casual/personal), length-matched to A_SHORT.
23
+ const B_CASUAL = "yeah so i just think you're gonna really like it, it's kinda cool and you just click on the thing and then you're basically done, i mean it barely took me any time at all honestly.";
24
+
25
+ test('authorVector emits fixed-length func(180)/tri(250)/punct(24), deterministic', () => {
26
+ const v = authorVector(A_SHORT);
27
+ assert.equal(v.func.length, 180);
28
+ assert.equal(v.tri.length, 250);
29
+ assert.equal(v.punct.length, 24);
30
+ for (const x of [...v.func, ...v.tri, ...v.punct]) assert.ok(Number.isFinite(x));
31
+ // deterministic
32
+ const v2 = authorVector(A_SHORT);
33
+ assert.deepEqual(v2.func, v.func);
34
+ assert.deepEqual(v2.tri, v.tri);
35
+ assert.deepEqual(v2.punct, v.punct);
36
+ });
37
+
38
+ test('fullStyleDistance is a sane metric: [0,1], symmetric, d(x,x)=0', () => {
39
+ assert.equal(fullStyleDistance(A_SHORT, A_SHORT), 0);
40
+ const d = fullStyleDistance(A_SHORT, B_CASUAL);
41
+ assert.ok(d >= 0 && d <= 1, `in [0,1], got ${d}`);
42
+ const d2 = fullStyleDistance(B_CASUAL, A_SHORT);
43
+ assert.ok(Math.abs(d - d2) < 1e-12, 'symmetric');
44
+ });
45
+
46
+ test('NOT-GAMEABLE-BY-LENGTH: same-author/diff-length is CLOSER than diff-author/same-length', () => {
47
+ const sameAuthorDiffLength = fullStyleDistance(A_SHORT, A_LONG);
48
+ const diffAuthorSameLength = fullStyleDistance(A_SHORT, B_CASUAL);
49
+ assert.ok(
50
+ diffAuthorSameLength > sameAuthorDiffLength,
51
+ `expected diff-author (${diffAuthorSameLength.toFixed(4)}) > same-author-diff-length (${sameAuthorDiffLength.toFixed(4)})`,
52
+ );
53
+ });
54
+
55
+ test('ANTI-RECOMPUTE (structural): standardization uses imported REF constants, not input stats', () => {
56
+ const src = fs.readFileSync(fileURLToPath(new URL('./stylometry.js', import.meta.url)), 'utf8');
57
+ // imports the frozen reference module
58
+ assert.ok(/from '\.\/stylometry-reference\.js'/.test(src), 'imports the frozen reference module');
59
+ // REF constants are passed into the standardizer at the call site
60
+ assert.ok(/zStandardize\(\s*relFreqFunc\([^)]*\),\s*REF_MEAN_FUNC,\s*REF_SD_FUNC\)/.test(src),
61
+ 'authorVector standardizes func against imported REF_MEAN_FUNC/REF_SD_FUNC');
62
+ // the standardizer body must not recompute a mean/variance over the scored input
63
+ const m = src.match(/function zStandardize[\s\S]*?\n}/);
64
+ assert.ok(m, 'found zStandardize body');
65
+ assert.ok(!/Math\.sqrt/.test(m[0]), 'standardizer does not compute a stdev (no Math.sqrt) over input');
66
+ assert.ok(!/\.reduce\(/.test(m[0]), 'standardizer does not aggregate over the input vector');
67
+ });
68
+
69
+ test('ANTI-RECOMPUTE (behavioral): authorVector(A) is independent of other scored texts', () => {
70
+ const before = authorVector(A_SHORT);
71
+ // score a pile of unrelated texts — must not mutate any global standardization state
72
+ for (let i = 0; i < 50; i += 1) {
73
+ authorVector(`${B_CASUAL} ${i} ${A_LONG}`);
74
+ fullStyleDistance(A_LONG, B_CASUAL);
75
+ }
76
+ const after = authorVector(A_SHORT);
77
+ assert.deepEqual(after.func, before.func);
78
+ assert.deepEqual(after.tri, before.tri);
79
+ assert.deepEqual(after.punct, before.punct);
80
+ });
81
+
82
+ test('reference constants are frozen, aligned, and documented as persona-disjoint', () => {
83
+ assert.equal(FUNCTION_WORDS.length, 180);
84
+ assert.equal(TRIGRAM_KEYS.length, 250);
85
+ assert.equal(PUNCT_KEYS.length, 24);
86
+ assert.equal(REF_MEAN_FUNC.length, 180);
87
+ assert.equal(REF_SD_FUNC.length, 180);
88
+ assert.ok(REF_SD_FUNC.every((s) => s > 0), 'all SDs strictly positive (no divide-by-zero)');
89
+ assert.match(REFERENCE_SOURCE, /disjoint/i);
90
+ });
91
+
92
+ test('STYLO_WEIGHTS is frozen and sums sensibly with func dominant', () => {
93
+ assert.ok(Object.isFrozen(STYLO_WEIGHTS));
94
+ assert.ok(STYLO_WEIGHTS.func >= STYLO_WEIGHTS.tri);
95
+ assert.ok(STYLO_WEIGHTS.func > STYLO_WEIGHTS.register);
96
+ });
97
+
98
+ test('fullStyleVector is idempotent on an already-computed full vector', () => {
99
+ const fv = fullStyleVector(A_SHORT);
100
+ const again = fullStyleVector(fv);
101
+ assert.equal(again, fv);
102
+ assert.equal(fullStyleDistance(fv, fv), 0);
103
+ });
@@ -0,0 +1,91 @@
1
+ // synthetic-personas.js — Gate B v2, Task T3 (synthetic half). Deterministic, neutral,
2
+ // controlled-style personas. EXISTS ONLY to (a) exercise the harness plumbing offline
3
+ // and (b) add a SANITY/power check — NEVER to carry the verdict. Every synthetic persona
4
+ // is stamped { synthetic:true, headlineEligible:false } so the decision runner (T7) can
5
+ // only ever let real authors set the headline N and license the PASS/CUT.
6
+ //
7
+ // Anti-circularity note (C2): because synthetic text is generated FROM style archetypes
8
+ // and then scored by the SAME metric family, a synthetic "win" is partly tautological.
9
+ // That is exactly why synthetic is downgrade-only and never licenses the mission claim.
10
+
11
+ import { fullStyleVector } from './stylometry.js';
12
+
13
+ function mulberry32(seed) {
14
+ let a = seed >>> 0;
15
+ return function rng() {
16
+ a |= 0; a = (a + 0x6D2B79F5) | 0;
17
+ let t = Math.imul(a ^ (a >>> 15), 1 | a);
18
+ t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
19
+ return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
20
+ };
21
+ }
22
+ function pick(rng, arr) { return arr[Math.floor(rng() * arr.length)]; }
23
+
24
+ const SUBJECTS = ['the system', 'a user', 'the team', 'one approach', 'the process', 'this method', 'the result', 'a change', 'the design', 'the plan'];
25
+ const PREDICATES = ['works as expected', 'needs another review', 'depends on the input', 'improves over time', 'handles the edge case', 'reduces the cost', 'follows the plan', 'meets the stated goal', 'remains stable under load', 'scales with demand'];
26
+
27
+ // Style archetypes control function-word / connector / punctuation habits — the features
28
+ // the authorship metric reads. Neutral content throughout.
29
+ const ARCHETYPES = [
30
+ { // formal
31
+ connectors: ['however', 'therefore', 'moreover', 'consequently', 'furthermore'],
32
+ join: (a, b, c) => `${cap(a)}, ${c}, ${b}.`,
33
+ },
34
+ { // casual
35
+ connectors: ['so', 'and', 'but', 'plus', 'anyway'],
36
+ join: (a, b, c) => `${a} ${c} it ${b}, honestly.`,
37
+ },
38
+ { // terse
39
+ connectors: ['', '', ''],
40
+ join: (a, b) => `${cap(a)} ${b}.`,
41
+ },
42
+ { // discursive
43
+ connectors: ['which means', 'in other words', 'that is to say', 'as it happens'],
44
+ join: (a, b, c) => `${cap(a)} ${b}, ${c} the outcome is broadly acceptable for now.`,
45
+ },
46
+ ];
47
+ function cap(s) { return s.charAt(0).toUpperCase() + s.slice(1); }
48
+
49
+ // Generate neutral persona text in a fixed style (archetypeIdx) with content variation
50
+ // driven by contentSeed — so train and test slices share STYLE but differ in content.
51
+ export function generatePersonaText(archetypeIdx, contentSeed, nSentences) {
52
+ const arch = ARCHETYPES[archetypeIdx % ARCHETYPES.length];
53
+ const rng = mulberry32(contentSeed >>> 0);
54
+ const out = [];
55
+ for (let i = 0; i < nSentences; i += 1) {
56
+ const s = pick(rng, SUBJECTS);
57
+ const p = pick(rng, PREDICATES);
58
+ const c = pick(rng, arch.connectors);
59
+ out.push(arch.join(s, p, c));
60
+ }
61
+ return out.join(' ');
62
+ }
63
+
64
+ // makePersonas(n, seed) → n synthetic personas, same shape as real personas.
65
+ export function makePersonas(n, seed = 1) {
66
+ const personas = [];
67
+ for (let k = 0; k < n; k += 1) {
68
+ const archetypeIdx = k % ARCHETYPES.length;
69
+ const styleBase = (seed * 1000 + k) >>> 0;
70
+ // distinct content seeds for train vs test → same style, disjoint content
71
+ const trainDocs = [
72
+ generatePersonaText(archetypeIdx, styleBase + 1, 16),
73
+ generatePersonaText(archetypeIdx, styleBase + 2, 16),
74
+ ];
75
+ const testDocs = [
76
+ generatePersonaText(archetypeIdx, styleBase + 9001, 12),
77
+ ];
78
+ personas.push({
79
+ id: `syn-${seed}-${k}`,
80
+ synthetic: true,
81
+ headlineEligible: false,
82
+ archetype: archetypeIdx,
83
+ trainDocs,
84
+ testDocs,
85
+ fingerprint: fullStyleVector(testDocs.join('\n')),
86
+ });
87
+ }
88
+ return personas;
89
+ }
90
+
91
+ export default { makePersonas, generatePersonaText };
@@ -0,0 +1,170 @@
1
+ // verifier-features.mjs — Gate B v3 (trained pairwise verifier), chunk-level feature
2
+ // extraction. ZERO deps. Reuses the shipped func(180)/tri(250)/punct(24) extractors and
3
+ // adds TOPIC-ROBUST per-chunk features (word-shape/casing, char-affix n-grams,
4
+ // sentence/paragraph length distributions, whitespace/quote/emoji/typo habits) so the
5
+ // downstream verifier keys on individual VOICE, not subreddit topic vocabulary.
6
+ //
7
+ // Does NOT import or mutate the shipped stylometry.js scoring path. The frozen
8
+ // reference here is INJECTED (built from DEV held-out authors by the harness), never the
9
+ // shipped REF_MEAN/SD — calibration regen requirement (Task B).
10
+
11
+ import { relFreqFunc, relFreqTrigrams, relFreqPunct } from './stylometry-features.js';
12
+ import { FUNCTION_WORDS, TRIGRAM_KEYS, PUNCT_KEYS } from './stylometry-reference.js';
13
+
14
+ const Z_CAP = 8;
15
+
16
+ // ---- chunking --------------------------------------------------------------
17
+ // Split an author's docs into whitespace-token chunks of `size` tokens. Punctuation is
18
+ // kept attached (raw whitespace split) so punct/affix features survive. We chunk WITHIN
19
+ // each doc (no cross-doc bleed) then concatenate the chunk lists.
20
+ export function chunkDocs(docs, size) {
21
+ const chunks = [];
22
+ for (const doc of docs) {
23
+ const toks = String(doc).split(/\s+/).filter(Boolean);
24
+ if (toks.length < Math.floor(size * 0.6)) {
25
+ // too short to be a clean chunk on its own; keep only if it is at least ~60% of a
26
+ // chunk (avoids tiny noisy slices dominating the distribution).
27
+ if (toks.length >= 120) chunks.push(toks.join(' '));
28
+ continue;
29
+ }
30
+ for (let i = 0; i + size <= toks.length; i += size) {
31
+ chunks.push(toks.slice(i, i + size).join(' '));
32
+ }
33
+ // tail: keep if it is at least 60% of a chunk so we don't waste long tails
34
+ const rem = toks.length % size;
35
+ if (rem >= Math.floor(size * 0.6)) {
36
+ chunks.push(toks.slice(toks.length - rem).join(' '));
37
+ }
38
+ }
39
+ return chunks;
40
+ }
41
+
42
+ // ---- topic-robust scalar features (each is a function-word/shape RATE, topic-agnostic) -
43
+ function topicRobustScalars(text) {
44
+ const s = String(text);
45
+ const chars = s.length || 1;
46
+ // word tokens (case-preserving for shape features)
47
+ const rawToks = s.split(/\s+/).filter(Boolean);
48
+ const alphaToks = (s.match(/[A-Za-z]+(?:'[A-Za-z]+)?/g) || []);
49
+ const nW = alphaToks.length || 1;
50
+
51
+ // casing / word-shape
52
+ let allCaps = 0; let titleCase = 0; let allLower = 0; let hasDigitTok = 0;
53
+ for (const t of rawToks) {
54
+ if (/^[A-Z0-9]{2,}$/.test(t) && /[A-Z]/.test(t)) allCaps += 1;
55
+ else if (/^[A-Z][a-z]+$/.test(t)) titleCase += 1;
56
+ else if (/^[a-z]+$/.test(t)) allLower += 1;
57
+ if (/\d/.test(t)) hasDigitTok += 1;
58
+ }
59
+ const nTok = rawToks.length || 1;
60
+
61
+ // sentences + paragraphs
62
+ const sents = s.split(/[.!?]+(?:\s|$)/).map((x) => x.trim()).filter(Boolean);
63
+ const sentLens = sents.map((x) => (x.match(/[A-Za-z']+/g) || []).length).filter((n) => n > 0);
64
+ const meanSent = sentLens.length ? sentLens.reduce((a, b) => a + b, 0) / sentLens.length : 0;
65
+ let sdSent = 0;
66
+ if (sentLens.length > 1 && meanSent > 0) {
67
+ sdSent = Math.sqrt(sentLens.reduce((a, b) => a + (b - meanSent) ** 2, 0) / sentLens.length);
68
+ }
69
+ const paras = s.split(/\n{2,}/).map((x) => x.trim()).filter(Boolean);
70
+ const paraLens = paras.map((p) => (p.match(/[A-Za-z']+/g) || []).length);
71
+ const meanPara = paraLens.length ? paraLens.reduce((a, b) => a + b, 0) / paraLens.length : 0;
72
+
73
+ // whitespace / quote / emoji / typo habits
74
+ const doubleSpace = (s.match(/ {2,}/g) || []).length;
75
+ const newlineRate = (s.match(/\n/g) || []).length / chars;
76
+ const straightQuote = (s.match(/"/g) || []).length;
77
+ const curlyQuote = (s.match(/[“”‘’]/g) || []).length;
78
+ const apostrophe = (s.match(/'/g) || []).length;
79
+ // oxlint-disable-next-line no-misleading-character-class -- intentional emoji + variation-selector (U+FE0F) class; match semantics must not change.
80
+ const emoji = (s.match(/[\u{1F300}-\u{1FAFF}\u{2600}-\u{27BF}\u{2B00}-\u{2BFF}\u{FE0F}]/gu) || []).length;
81
+ // "typo"-ish signals: repeated-letter runs (sooo), lone i, multi-punct
82
+ const repeatRun = (s.match(/([a-zA-Z])\1\1+/g) || []).length;
83
+ const loneI = (s.match(/\bi\b/g) || []).length; // lowercase pronoun i
84
+ const multiPunct = (s.match(/[!?]{2,}/g) || []).length;
85
+ const ellipsis = (s.match(/\.{2,}/g) || []).length;
86
+ const parenRate = (s.match(/[()]/g) || []).length / chars;
87
+ const dashRate = (s.match(/[-—–]/g) || []).length / chars;
88
+ const digitRate = (s.match(/\d/g) || []).length / chars;
89
+ // mean word length (chars per alpha token) — idiolect signal, topic-light
90
+ const meanWordLen = alphaToks.reduce((a, t) => a + t.length, 0) / nW;
91
+ const ttr = nW ? new Set(alphaToks.map((t) => t.toLowerCase())).size / nW : 0;
92
+
93
+ const sat = (x, k) => (x > 0 ? x / (x + k) : 0);
94
+ return [
95
+ allCaps / nTok,
96
+ titleCase / nTok,
97
+ allLower / nTok,
98
+ hasDigitTok / nTok,
99
+ sat(meanSent, 22),
100
+ meanSent > 0 ? sdSent / meanSent : 0,
101
+ sat(meanPara, 60),
102
+ sat(doubleSpace / nTok, 0.05),
103
+ sat(newlineRate, 0.02),
104
+ sat(straightQuote / chars, 0.01),
105
+ sat(curlyQuote / chars, 0.01),
106
+ sat(apostrophe / chars, 0.02),
107
+ sat(emoji / chars, 0.005),
108
+ sat(repeatRun / nTok, 0.01),
109
+ sat(loneI / nW, 0.02),
110
+ sat(multiPunct / chars, 0.005),
111
+ sat(ellipsis / chars, 0.005),
112
+ sat(parenRate, 0.02),
113
+ sat(dashRate, 0.02),
114
+ sat(digitRate, 0.05),
115
+ sat(meanWordLen, 5),
116
+ ttr,
117
+ ];
118
+ }
119
+ export const N_SCALAR = 22;
120
+
121
+ // ---- char-affix n-grams (prefix/suffix 3-grams) — topic-robust morphology -----------
122
+ // We fix a key list per fold (the harness passes them in). Returns rel-freq over keys.
123
+ export function affixCounts(text) {
124
+ const toks = (String(text).toLowerCase().match(/[a-z']{2,}/g) || []);
125
+ const pre = Object.create(null);
126
+ const suf = Object.create(null);
127
+ for (const t of toks) {
128
+ if (t.length >= 4) {
129
+ const p = 'P' + t.slice(0, 3);
130
+ const s = 'S' + t.slice(-3);
131
+ pre[p] = (pre[p] || 0) + 1;
132
+ suf[s] = (suf[s] || 0) + 1;
133
+ }
134
+ }
135
+ return { pre, suf, nTok: toks.length || 1 };
136
+ }
137
+ export function relFreqAffix(text, keys) {
138
+ const { pre, suf, nTok } = affixCounts(text);
139
+ return keys.map((k) => ((k[0] === 'P' ? pre[k] : suf[k]) || 0) / nTok);
140
+ }
141
+
142
+ // ---- z-standardize against an INJECTED reference (DEV-derived) ----------------------
143
+ function zStd(raw, mean, sd) {
144
+ const out = Array.from({ length: raw.length });
145
+ for (let i = 0; i < raw.length; i += 1) {
146
+ let z = (raw[i] - mean[i]) / (sd[i] || 1e-6);
147
+ if (z > Z_CAP) z = Z_CAP; else if (z < -Z_CAP) z = -Z_CAP;
148
+ out[i] = z;
149
+ }
150
+ return out;
151
+ }
152
+
153
+ // Per-chunk full feature object. `ref` = { funcMean, funcSd, triMean, triSd, punctMean,
154
+ // punctSd, scalarMean, scalarSd, affixKeys, affixMean, affixSd } built by the harness
155
+ // from DEV held-out authors. Families are kept SEPARATE so the pair stage can compute a
156
+ // per-family Burrows-Delta.
157
+ export function chunkFeatures(text, ref) {
158
+ const func = zStd(relFreqFunc(text, FUNCTION_WORDS), ref.funcMean, ref.funcSd);
159
+ const tri = zStd(relFreqTrigrams(text, TRIGRAM_KEYS), ref.triMean, ref.triSd);
160
+ const punct = zStd(relFreqPunct(text, PUNCT_KEYS), ref.punctMean, ref.punctSd);
161
+ const scalar = zStd(topicRobustScalars(text), ref.scalarMean, ref.scalarSd);
162
+ const affix = ref.affixKeys && ref.affixKeys.length
163
+ ? zStd(relFreqAffix(text, ref.affixKeys), ref.affixMean, ref.affixSd)
164
+ : [];
165
+ return { func, tri, punct, scalar, affix };
166
+ }
167
+
168
+ export const FAMILIES = ['func', 'tri', 'punct', 'scalar', 'affix'];
169
+
170
+ export { topicRobustScalars };
@@ -0,0 +1,74 @@
1
+ // verifier-logreg.mjs — Gate B v3. Plain-JS L2 logistic regression (batch gradient
2
+ // descent) over standardized pair-summary features. ZERO deps. Trains on same- vs
3
+ // different-author pair vectors; predicts P(same-author). That probability becomes the
4
+ // verifier score fed (as distance = 1 - p) into validateInstrument.
5
+
6
+ // Standardizer fit on TRAIN features only (mean/sd per column), applied to val.
7
+ export function fitStandardizer(rows) {
8
+ const d = rows[0].length;
9
+ const mean = new Float64Array(d);
10
+ const sd = new Float64Array(d);
11
+ for (const r of rows) for (let j = 0; j < d; j += 1) mean[j] += r[j];
12
+ for (let j = 0; j < d; j += 1) mean[j] /= rows.length;
13
+ for (const r of rows) for (let j = 0; j < d; j += 1) sd[j] += (r[j] - mean[j]) ** 2;
14
+ for (let j = 0; j < d; j += 1) sd[j] = Math.sqrt(sd[j] / rows.length) || 1e-6;
15
+ return { mean, sd };
16
+ }
17
+ export function applyStandardizer(row, st) {
18
+ const out = new Float64Array(row.length);
19
+ for (let j = 0; j < row.length; j += 1) out[j] = (row[j] - st.mean[j]) / st.sd[j];
20
+ return out;
21
+ }
22
+
23
+ function sigmoid(z) {
24
+ if (z >= 0) { const e = Math.exp(-z); return 1 / (1 + e); }
25
+ const e = Math.exp(z); return e / (1 + e);
26
+ }
27
+
28
+ // trainLogReg(X, y, opts) — X: array of standardized Float64Array rows, y: 0/1 labels.
29
+ // L2 (lambda) on weights (not bias). Class-balanced sample weights so the (rarer)
30
+ // same-author positives are not swamped by the abundant different-author negatives.
31
+ export function trainLogReg(X, y, opts = {}) {
32
+ const lambda = opts.lambda ?? 1.0;
33
+ const lr = opts.lr ?? 0.1;
34
+ const iters = opts.iters ?? 4000;
35
+ const n = X.length;
36
+ const d = X[0].length;
37
+ const w = new Float64Array(d);
38
+ let b = 0;
39
+
40
+ const nPos = y.reduce((a, v) => a + v, 0);
41
+ const nNeg = n - nPos;
42
+ const wPos = nPos ? n / (2 * nPos) : 1;
43
+ const wNeg = nNeg ? n / (2 * nNeg) : 1;
44
+
45
+ for (let it = 0; it < iters; it += 1) {
46
+ const gw = new Float64Array(d);
47
+ let gb = 0;
48
+ let wsum = 0;
49
+ for (let i = 0; i < n; i += 1) {
50
+ const xi = X[i];
51
+ let z = b;
52
+ for (let j = 0; j < d; j += 1) z += w[j] * xi[j];
53
+ const p = sigmoid(z);
54
+ const sw = y[i] ? wPos : wNeg;
55
+ const err = (p - y[i]) * sw;
56
+ for (let j = 0; j < d; j += 1) gw[j] += err * xi[j];
57
+ gb += err;
58
+ wsum += sw;
59
+ }
60
+ const inv = 1 / (wsum || 1);
61
+ for (let j = 0; j < d; j += 1) {
62
+ gw[j] = gw[j] * inv + lambda * w[j] / n;
63
+ w[j] -= lr * gw[j];
64
+ }
65
+ b -= lr * gb * inv;
66
+ }
67
+ return { w, b };
68
+ }
69
+
70
+ export function predictProba(row, model) {
71
+ let z = model.b;
72
+ for (let j = 0; j < row.length; j += 1) z += model.w[j] * row[j];
73
+ return sigmoid(z);
74
+ }