@ijfw/memory-server 1.5.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw-dashboard +20 -1
- package/package.json +4 -3
- package/src/audit-roster.js +89 -12
- package/src/brain/tiered-llm.js +57 -7
- package/src/cross-orchestrator-cli.js +344 -4
- package/src/cross-project-search.js +39 -1
- package/src/dashboard-server.js +7 -1
- package/src/dream/runner.mjs +560 -8
- package/src/handlers/brain-handler.js +101 -1
- package/src/importers/discover.js +1 -1
- package/src/memory/bench-metrics.js +289 -0
- package/src/memory/benchmark.js +1 -1
- package/src/memory/search.js +53 -1
- package/src/orchestrator/plan-checker.js +1 -1
- package/src/profile/audit.js +671 -0
- package/src/profile/capture.js +871 -0
- package/src/profile/derive-dialectic.js +242 -0
- package/src/profile/derive-heuristic.js +733 -0
- package/src/profile/derive.js +156 -0
- package/src/profile/egress.js +306 -0
- package/src/profile/eval/build-real-probes.mjs +197 -0
- package/src/profile/eval/corpus-from-reddit.mjs +166 -0
- package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
- package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
- package/src/profile/eval/gate-b-behavior.mjs +420 -0
- package/src/profile/eval/gate-b-decision-run.mjs +171 -0
- package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
- package/src/profile/eval/gate-b-run.mjs +417 -0
- package/src/profile/eval/gate-b-run.test.mjs +204 -0
- package/src/profile/eval/gate-c-capture.mjs +323 -0
- package/src/profile/eval/harness.mjs +551 -0
- package/src/profile/eval/instrument-validation.mjs +248 -0
- package/src/profile/eval/instrument-validation.test.mjs +125 -0
- package/src/profile/eval/multi-subject-harness.mjs +106 -0
- package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
- package/src/profile/eval/personas.test.mjs +83 -0
- package/src/profile/eval/plumbing.test.mjs +69 -0
- package/src/profile/eval/prereg.mjs +130 -0
- package/src/profile/eval/prereg.test.mjs +78 -0
- package/src/profile/eval/real-corpus.test.mjs +103 -0
- package/src/profile/eval/real-personas.mjs +109 -0
- package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
- package/src/profile/eval/run-real-corpus.mjs +358 -0
- package/src/profile/eval/slug-quality.mjs +464 -0
- package/src/profile/eval/stylometry-features.js +85 -0
- package/src/profile/eval/stylometry-reference.js +16 -0
- package/src/profile/eval/stylometry.js +224 -0
- package/src/profile/eval/stylometry.test.mjs +103 -0
- package/src/profile/eval/synthetic-personas.js +91 -0
- package/src/profile/eval/verifier-features.mjs +170 -0
- package/src/profile/eval/verifier-logreg.mjs +74 -0
- package/src/profile/eval/verifier-pair.mjs +122 -0
- package/src/profile/eval/verifier-reference.mjs +68 -0
- package/src/profile/eval/verifier-scorer.mjs +30 -0
- package/src/profile/eval/wrong-target-control.mjs +168 -0
- package/src/profile/eval/wrong-target-control.test.mjs +124 -0
- package/src/profile/exemplar-capture.js +232 -0
- package/src/profile/exemplar-retrieve.js +138 -0
- package/src/profile/exemplar-store.js +314 -0
- package/src/profile/lock.js +64 -0
- package/src/profile/merge.js +624 -0
- package/src/profile/path-policy.js +213 -0
- package/src/profile/precision-stamp.mjs +151 -0
- package/src/profile/render-brief.js +717 -0
- package/src/profile/schema.js +244 -0
- package/src/profile/sensitivity.js +249 -0
- package/src/profile/serve.js +345 -0
- package/src/profile/store.js +261 -0
- package/src/profile/telemetry.js +289 -0
- package/src/recovery/checkpoint.js +7 -1
- package/src/server.js +185 -14
- package/src/.registry-meta-key.pem +0 -3
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* profile/derive-heuristic.js — Cross-system profile bus, PHASE P2.
|
|
3
|
+
*
|
|
4
|
+
* The ZERO-LLM heuristic derivation tier — *the default that carries the
|
|
5
|
+
* profile* (design-v2 §5). It turns per-session METADATA (statistics about
|
|
6
|
+
* your messages, never raw transcripts — data minimization, §6) + structured
|
|
7
|
+
* feedback/outcome records into a single `ProfileDelta` that the P0 merge layer
|
|
8
|
+
* (`applyDelta` in ./merge.js) folds into the one global profile.
|
|
9
|
+
*
|
|
10
|
+
* PURITY CONTRACT (P2.4): every function here is a pure data transform.
|
|
11
|
+
* - No network (`fetch`/`http`/`https`).
|
|
12
|
+
* - No `child_process`.
|
|
13
|
+
* - No LLM tier (`tiered-llm.js` et al.).
|
|
14
|
+
* - No store writes (this module imports nothing from ./store.js; persistence
|
|
15
|
+
* is the merge layer's job via `mergeAndWrite`).
|
|
16
|
+
* A CI moat guard (P4.5) asserts the serving path never reaches the LLM tier;
|
|
17
|
+
* this module keeps that promise at the source.
|
|
18
|
+
*
|
|
19
|
+
* ── The merge contract (why the shapes below look the way they do) ──────────
|
|
20
|
+
* `applyDelta(profile, delta)` reads a delta of shape:
|
|
21
|
+
* {
|
|
22
|
+
* style?: { axis: { sample:0..1, weight?:number } },
|
|
23
|
+
* inferences?: Inference[], // see schema.makeInference
|
|
24
|
+
* expertise?: { domain: { accepts:number, n:number } },
|
|
25
|
+
* overlays?: { key: { style?, inferences? } },
|
|
26
|
+
* provenance?: { ...scalars },
|
|
27
|
+
* }
|
|
28
|
+
*
|
|
29
|
+
* EMA DOUBLE-APPLY DECISION (P2.1): merge.js `foldStyleAxis` OWNS the α≈0.15 EMA
|
|
30
|
+
* step and the Beta(α,β) mass update — it folds the raw `sample` on WRITE. So
|
|
31
|
+
* this derive step must emit the per-session OBSERVATION (`{sample,weight}`),
|
|
32
|
+
* NOT a pre-smoothed EMA, or the α would be applied twice. `deriveStyle`
|
|
33
|
+
* therefore returns normalized [0,1] samples; the merge does the smoothing.
|
|
34
|
+
*
|
|
35
|
+
* WILSON DECISION (P2.2): merge.js `mergeExpertise` accumulates `{accepts,n}`
|
|
36
|
+
* and recomputes `wilsonLowerBound` on write. So `deriveExpertise` emits raw
|
|
37
|
+
* `{accepts,n}` counts — NOT a precomputed band — and lets evidence accumulate
|
|
38
|
+
* across sessions. `expertiseBand` is a read-side classifier (used by briefs /
|
|
39
|
+
* tests) that requires N≥5 before naming a band. We re-implement the Wilson
|
|
40
|
+
* formula locally (identical to merge.js `wilsonLowerBound`) rather than edit
|
|
41
|
+
* P0 to export it — see the function comment.
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
import { makeInference } from './schema.js';
|
|
45
|
+
|
|
46
|
+
/** Clamp into the unit interval; non-finite -> 0. Matches merge.js clamp01. */
|
|
47
|
+
function clamp01(x) {
|
|
48
|
+
const n = Number(x);
|
|
49
|
+
if (!Number.isFinite(n)) return 0;
|
|
50
|
+
if (n < 0) return 0;
|
|
51
|
+
if (n > 1) return 1;
|
|
52
|
+
return n;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// P2.1 — EMA style fingerprint.
|
|
57
|
+
//
|
|
58
|
+
// Map per-session metadata signals onto the four style axes as [0,1] samples.
|
|
59
|
+
// Each mapping is a deterministic, monotone squash of an interpretable
|
|
60
|
+
// statistic. The merge applies the EMA — we only emit the observation.
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Logistic-ish saturating map of a non-negative magnitude `x` onto (0,1) with
|
|
65
|
+
* `mid` as the ~0.5 crossover. Pure, monotone-increasing in x. Used so a
|
|
66
|
+
* single extreme session can't pin a sample to a hard 0/1 (the Beta mass +
|
|
67
|
+
* EMA already damp single observations; this keeps samples interior).
|
|
68
|
+
*/
|
|
69
|
+
function saturate(x, mid) {
|
|
70
|
+
const v = Math.max(0, Number(x) || 0);
|
|
71
|
+
const m = mid > 0 ? mid : 1;
|
|
72
|
+
return v / (v + m); // x=0 ->0, x=mid ->0.5, x->∞ ->1, strictly increasing
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Inverse saturating map: large `x` -> low sample (e.g. long messages -> low
|
|
77
|
+
* terseness). Monotone-decreasing in x.
|
|
78
|
+
*/
|
|
79
|
+
function saturateInverse(x, mid) {
|
|
80
|
+
return 1 - saturate(x, mid);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Formality-axis blend weights (FIX 1). `formality_markers` is the primary
|
|
85
|
+
* authorship signal; `code_block_ratio` is a bounded nudge (code-heavy sessions
|
|
86
|
+
* read slightly more formal). They sum to 1 so the convex blend stays in [0,1]
|
|
87
|
+
* and a marker swing always dominates an equal-magnitude code swing.
|
|
88
|
+
*/
|
|
89
|
+
export const FORMALITY_MARKER_WEIGHT = 0.85;
|
|
90
|
+
export const FORMALITY_CODE_WEIGHT = 0.15;
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* deriveStyle(meta) -> { axis: { sample:0..1, weight } }.
|
|
94
|
+
*
|
|
95
|
+
* `meta` is per-session aggregate metadata (NOT transcripts):
|
|
96
|
+
* - avg_msg_chars — mean user message length -> terseness (inverse)
|
|
97
|
+
* - emoji_per_msg — mean emoji per message -> emoji_use
|
|
98
|
+
* - code_block_ratio — fraction of msgs w/ code -> formality (bounded nudge)
|
|
99
|
+
* - formality_markers — 0..1 formal-marker density -> formality
|
|
100
|
+
* - turn_cadence_per_min — turns/minute -> energy
|
|
101
|
+
*
|
|
102
|
+
* Missing signals fall back to the neutral 0.5 prior with a LOW weight, so an
|
|
103
|
+
* absent metric neither moves nor anchors the axis. Only an axis with a real
|
|
104
|
+
* observed signal gets full weight.
|
|
105
|
+
*/
|
|
106
|
+
export function deriveStyle(meta = {}) {
|
|
107
|
+
const m = meta && typeof meta === 'object' ? meta : {};
|
|
108
|
+
|
|
109
|
+
// weight reflects how strong the per-session evidence is for the axis. A
|
|
110
|
+
// present signal => weight 1; an absent one => a small weight on the neutral
|
|
111
|
+
// prior (so it barely nudges and never anchors). Weight is also passed to the
|
|
112
|
+
// merge, which scales the EMA step (still bounded by α + the δ cap).
|
|
113
|
+
//
|
|
114
|
+
// FIX 2 (CRITICAL-1 / M1 / H4): thread the per-host `trust` through every
|
|
115
|
+
// emitted observation so the merge fold can apply the documented trust
|
|
116
|
+
// weighting. `meta.trust_weight` is carried by capture.js `toDeriveMeta`; a
|
|
117
|
+
// missing trust is left undefined (the merge defaults to full trust only when
|
|
118
|
+
// truly absent). We clamp here so a forged out-of-range trust cannot amplify.
|
|
119
|
+
const trust = (m.trust_weight !== undefined && m.trust_weight !== null && Number.isFinite(Number(m.trust_weight)))
|
|
120
|
+
? clamp01(m.trust_weight)
|
|
121
|
+
: undefined;
|
|
122
|
+
const obs = (sample, present) => {
|
|
123
|
+
const o = { sample: clamp01(sample), weight: present ? 1 : 0.1 };
|
|
124
|
+
if (trust !== undefined) o.trust = trust;
|
|
125
|
+
return o;
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const hasLen = m.avg_msg_chars !== undefined && m.avg_msg_chars !== null;
|
|
129
|
+
const hasEmoji = m.emoji_per_msg !== undefined && m.emoji_per_msg !== null;
|
|
130
|
+
const hasFormal = m.formality_markers !== undefined && m.formality_markers !== null;
|
|
131
|
+
const hasCode = m.code_block_ratio !== undefined && m.code_block_ratio !== null;
|
|
132
|
+
const hasCadence = m.turn_cadence_per_min !== undefined && m.turn_cadence_per_min !== null;
|
|
133
|
+
|
|
134
|
+
// terseness: short messages => terse. ~120 chars is the 0.5 crossover.
|
|
135
|
+
const terseness = obs(saturateInverse(m.avg_msg_chars, 120), hasLen);
|
|
136
|
+
|
|
137
|
+
// emoji_use: emoji density. ~0.5 emoji/msg is the 0.5 crossover.
|
|
138
|
+
const emoji_use = obs(saturate(m.emoji_per_msg, 0.5), hasEmoji);
|
|
139
|
+
|
|
140
|
+
// formality: a convex blend of two already-[0,1] signals.
|
|
141
|
+
// sample = FORMALITY_MARKER_WEIGHT*formality_markers
|
|
142
|
+
// + FORMALITY_CODE_WEIGHT *code_block_ratio
|
|
143
|
+
// formality_markers is the PRIMARY signal (weight 0.85); code_block_ratio is a
|
|
144
|
+
// bounded NUDGE (weight 0.15) — code-heavy sessions skew slightly formal. The
|
|
145
|
+
// weights sum to 1 so the blend is automatically clamped to [0,1] when both
|
|
146
|
+
// inputs are in [0,1] (clamp01 belt-and-braces against out-of-range metadata),
|
|
147
|
+
// and it is monotone-increasing in BOTH inputs (FIX 1: code_block_ratio was a
|
|
148
|
+
// documented input with zero effect — it now contributes without unseating
|
|
149
|
+
// markers as primary). The axis counts as OBSERVED (full weight) when EITHER
|
|
150
|
+
// signal is present; a missing input contributes 0 to the blend.
|
|
151
|
+
const formality = obs(
|
|
152
|
+
clamp01(
|
|
153
|
+
FORMALITY_MARKER_WEIGHT * (hasFormal ? clamp01(m.formality_markers) : 0)
|
|
154
|
+
+ FORMALITY_CODE_WEIGHT * (hasCode ? clamp01(m.code_block_ratio) : 0),
|
|
155
|
+
),
|
|
156
|
+
hasFormal || hasCode,
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
// energy: turn cadence. ~4 turns/min is the 0.5 crossover.
|
|
160
|
+
const energy = obs(saturate(m.turn_cadence_per_min, 4), hasCadence);
|
|
161
|
+
|
|
162
|
+
return { formality, energy, terseness, emoji_use };
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/** Minimum merged Beta evidence before an axis counts as "confirmed". */
|
|
166
|
+
export const STYLE_CONFIRM_MIN_SESSIONS = 5;
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* styleAxisConfirmed(axis) -> boolean. Reads a MERGED style axis object
|
|
170
|
+
* ({ema,alpha,beta,evidence_count}); an axis is "unconfirmed" (cold start)
|
|
171
|
+
* until ≥5 sessions of evidence have accrued. Briefs omit unconfirmed axes.
|
|
172
|
+
*
|
|
173
|
+
* We key on `evidence_count` (the merge's per-session counter) rather than Beta
|
|
174
|
+
* mass, because a high-weight single session can inflate α+β; sessions are the
|
|
175
|
+
* honest unit. A fresh axis has evidence_count 0 (Beta(1,1) uniform prior).
|
|
176
|
+
*/
|
|
177
|
+
export function styleAxisConfirmed(axis) {
|
|
178
|
+
if (!axis || typeof axis !== 'object') return false;
|
|
179
|
+
return (Number(axis.evidence_count) || 0) >= STYLE_CONFIRM_MIN_SESSIONS;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
// P2.2 — Wilson-score expertise.
|
|
184
|
+
//
|
|
185
|
+
// Per-domain lower-bound 95% CI on the accept-without-edit ratio. Outcome
|
|
186
|
+
// semantics (design-v2 §4):
|
|
187
|
+
// - accept -> strong positive (accepts += 1, n += 1)
|
|
188
|
+
// - edit-after -> negative (n += 1; NOT counted in accepts)
|
|
189
|
+
// - discard -> neutral (excluded from n entirely — no signal)
|
|
190
|
+
//
|
|
191
|
+
// We emit raw `{accepts,n}` counts; merge.js `mergeExpertise` accumulates them
|
|
192
|
+
// across sessions and recomputes `wilsonLowerBound` on write. `expertiseBand`
|
|
193
|
+
// classifies a MERGED record (which already carries the recomputed wilsonLB),
|
|
194
|
+
// requiring N>=5 before naming any band — below that, evidence is too thin.
|
|
195
|
+
// ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
/** Minimum sample size before expertise can be banded (design-v2 §4: N>=5). */
|
|
198
|
+
export const EXPERTISE_MIN_N = 5;
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Band cut-points on the Wilson 95% LOWER bound (FIX 3 — previously inline magic
|
|
202
|
+
* numbers). Cutting on the lower bound (not the point estimate) keeps the band
|
|
203
|
+
* honest under small N. Reference calibration that fixes the expert cut: a user
|
|
204
|
+
* with 8/10 accepts has a Wilson LB ≈ 0.49, which must read as 'expert'; 0.45
|
|
205
|
+
* sits just under that so an 8/10 record clears it with margin while a noisier
|
|
206
|
+
* record (e.g. a few accepts in a larger sample) does not. The proficient cut
|
|
207
|
+
* 0.25 marks "clearly better than chance, not yet expert". Both bounds are
|
|
208
|
+
* inclusive (>=). These are read-side classifier constants only; the merge owns
|
|
209
|
+
* the persisted wilsonLB.
|
|
210
|
+
*/
|
|
211
|
+
export const EXPERT_WILSON_THRESHOLD = 0.45;
|
|
212
|
+
export const PROFICIENT_WILSON_THRESHOLD = 0.25;
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Outcomes that contribute to the denominator `n` (a real authorship signal).
|
|
216
|
+
*
|
|
217
|
+
* FIX 2: the P2.2 spec (design-v2 §4) defines EXACTLY three authorship outcomes
|
|
218
|
+
* — accept / edit-after / discard — of which two are counted (discard is
|
|
219
|
+
* neutral, excluded from `n` below). The earlier set also carried `reject` and
|
|
220
|
+
* the `edit_after`/`edit` aliases; none of those are spec outcomes and NO repo
|
|
221
|
+
* source emits them: `deriveExpertise` has no caller yet (it is a P2 building
|
|
222
|
+
* block ahead of its P3/P4 wiring), and the only structured-feedback emitter,
|
|
223
|
+
* `src/feedback-detector.js`, produces the SEPARATE *preference-kind* vocabulary
|
|
224
|
+
* {correction, confirmation, preference, rule} consumed by `derivePreferences`
|
|
225
|
+
* — not authorship outcomes. Treating an unknown string as `edit-after` would
|
|
226
|
+
* silently inflate `n` and depress the Wilson lower bound, so the set is kept to
|
|
227
|
+
* the spec's two countable outcomes. When a real authorship-outcome emitter
|
|
228
|
+
* lands, extend this set deliberately (with a citation), not by guessing aliases.
|
|
229
|
+
*/
|
|
230
|
+
const EXPERTISE_COUNTED = new Set(['accept', 'edit-after']);
|
|
231
|
+
/** Outcomes that count as a positive (accept-without-edit). */
|
|
232
|
+
const EXPERTISE_POSITIVE = new Set(['accept']);
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Wilson score lower bound (95%, z=1.96). Re-implemented here IDENTICALLY to
|
|
236
|
+
* merge.js `wilsonLowerBound` rather than editing P0 to widen its surface — the
|
|
237
|
+
* formula is five lines and keeping it local preserves the P2/P0 boundary (the
|
|
238
|
+
* derive step is self-contained and the band classifier needs it for cold
|
|
239
|
+
* `{accepts,n}` records that haven't been through the merge yet). The merge
|
|
240
|
+
* remains the source of truth for the persisted `wilsonLB`.
|
|
241
|
+
*/
|
|
242
|
+
function wilsonLB(accepts, n, z = 1.96) {
|
|
243
|
+
if (!n || n <= 0) return 0;
|
|
244
|
+
const phat = accepts / n;
|
|
245
|
+
const z2 = z * z;
|
|
246
|
+
const denom = 1 + z2 / n;
|
|
247
|
+
const center = phat + z2 / (2 * n);
|
|
248
|
+
const margin = z * Math.sqrt((phat * (1 - phat) + z2 / (4 * n)) / n);
|
|
249
|
+
return clamp01((center - margin) / denom);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* deriveExpertise(records) -> { domain: { accepts, n } }.
|
|
254
|
+
*
|
|
255
|
+
* `records` is an array of `{ domain, outcome }` authorship outcomes for the
|
|
256
|
+
* session (e.g. a suggested edit was accepted/edited/discarded). Emits raw
|
|
257
|
+
* counts in the shape merge.js `mergeExpertise` consumes; the merge recomputes
|
|
258
|
+
* the Wilson lower bound after accumulation.
|
|
259
|
+
*/
|
|
260
|
+
export function deriveExpertise(records = []) {
|
|
261
|
+
const out = {};
|
|
262
|
+
if (!Array.isArray(records)) return out;
|
|
263
|
+
for (const rec of records) {
|
|
264
|
+
if (!rec || typeof rec !== 'object') continue;
|
|
265
|
+
const domain = typeof rec.domain === 'string' ? rec.domain.trim() : '';
|
|
266
|
+
if (!domain) continue;
|
|
267
|
+
const outcome = String(rec.outcome || '').toLowerCase();
|
|
268
|
+
if (!EXPERTISE_COUNTED.has(outcome)) continue; // discard/unknown = neutral, no signal
|
|
269
|
+
if (!out[domain]) out[domain] = { accepts: 0, n: 0 };
|
|
270
|
+
out[domain].n += 1;
|
|
271
|
+
if (EXPERTISE_POSITIVE.has(outcome)) out[domain].accepts += 1;
|
|
272
|
+
}
|
|
273
|
+
return out;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* expertiseBand(record) -> 'unknown' | 'novice' | 'proficient' | 'expert'.
|
|
278
|
+
*
|
|
279
|
+
* `record` is a MERGED expertise entry `{ accepts, n, wilsonLB? }`. Requires
|
|
280
|
+
* N>=5 (EXPERTISE_MIN_N) before naming a band; otherwise 'unknown' (cold start).
|
|
281
|
+
* Bands are cut on the Wilson LOWER bound (conservative) at the named constants
|
|
282
|
+
* EXPERT_WILSON_THRESHOLD / PROFICIENT_WILSON_THRESHOLD — see their definitions
|
|
283
|
+
* for the 8/10-accepts -> LB≈0.49 -> 'expert' reference calibration.
|
|
284
|
+
*/
|
|
285
|
+
export function expertiseBand(record) {
|
|
286
|
+
if (!record || typeof record !== 'object') return 'unknown';
|
|
287
|
+
const n = Number(record.n) || 0;
|
|
288
|
+
if (n < EXPERTISE_MIN_N) return 'unknown';
|
|
289
|
+
const lb = Number.isFinite(Number(record.wilsonLB))
|
|
290
|
+
? Number(record.wilsonLB)
|
|
291
|
+
: wilsonLB(Number(record.accepts) || 0, n);
|
|
292
|
+
if (lb >= EXPERT_WILSON_THRESHOLD) return 'expert';
|
|
293
|
+
if (lb >= PROFICIENT_WILSON_THRESHOLD) return 'proficient';
|
|
294
|
+
return 'novice';
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// ---------------------------------------------------------------------------
|
|
298
|
+
// P2.3 — Preference tags from .session-feedback.jsonl.
|
|
299
|
+
//
|
|
300
|
+
// Record schema (verified against src/feedback-detector.js):
|
|
301
|
+
// { ts, kind, phrase, context } kind ∈ {correction, confirmation, preference, rule}
|
|
302
|
+
//
|
|
303
|
+
// Each record becomes a preference Inference (schema.makeInference shape) the
|
|
304
|
+
// merge dedupes by id (kind+subject) and evidence-accumulates. `correction` is
|
|
305
|
+
// the strongest signal (design-v2 §5: "corrections=strongest signal"); a bare
|
|
306
|
+
// `confirmation` is the weakest (it only reinforces, it doesn't assert a new
|
|
307
|
+
// preference).
|
|
308
|
+
//
|
|
309
|
+
// The subject is a normalized slug of the phrase so two sessions reporting the
|
|
310
|
+
// "same" preference dedupe to one atom across hosts. The original feedback kind
|
|
311
|
+
// is preserved in `value.kind` (a brief renders it; the merge keeps the most
|
|
312
|
+
// recent confirmation's value). We sequester these at confidence < a confirmed
|
|
313
|
+
// heuristic measurement so the dialectic tier (P3) can corroborate, never
|
|
314
|
+
// silently override (design-v2 §5 "heuristic is the floor").
|
|
315
|
+
// ---------------------------------------------------------------------------
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* FIX 4 (CRITICAL-2) — value-level special-category + direct-PII deny/scrub gate
|
|
319
|
+
* for the FEEDBACK path.
|
|
320
|
+
*
|
|
321
|
+
* `capture.js assertNoSpecialCategory` gates special-category attribute KEYS on
|
|
322
|
+
* the wire record; the feedback path is different — the risk is the free-text
|
|
323
|
+
* `phrase`/`context` carrying special-category meaning OR direct identifiers
|
|
324
|
+
* INTO the global (exfiltrable) store. We therefore run a VALUE-level scan here.
|
|
325
|
+
* It is co-located (not imported from capture.js) deliberately: capture.js pulls
|
|
326
|
+
* in node:fs/os/crypto, and derive-heuristic must keep its zero-LLM/zero-network
|
|
327
|
+
* import graph clean (the P2.4 moat-at-source guard). The special-category
|
|
328
|
+
* VOCABULARY mirrors capture.js SPECIAL_CATEGORY_KEYS in spirit — these are word
|
|
329
|
+
* forms (not attribute keys) so they cannot share the literal list; keep them in
|
|
330
|
+
* sync deliberately if the key list grows.
|
|
331
|
+
*
|
|
332
|
+
* `special` => REFUSE the whole inference (never mint). `pii` patterns =>
|
|
333
|
+
* the phrase is scrubbed of the direct identifier before slugging, so no raw
|
|
334
|
+
* email/phone/etc. can reach the store even in the slug.
|
|
335
|
+
*/
|
|
336
|
+
const SPECIAL_CATEGORY_TERMS = [
|
|
337
|
+
/\breligion\b/i, /\breligious\b/i, /\bfaith\b/i, /\bchurch\b/i, /\bmosque\b/i, /\bsynagogue\b/i,
|
|
338
|
+
/\brace\b/i, /\bethnic(?:ity)?\b/i,
|
|
339
|
+
/\bpolitical\b/i, /\bpolitics\b/i, /\baffiliation\b/i,
|
|
340
|
+
/\bsexual orientation\b/i, /\bsex life\b/i,
|
|
341
|
+
/\bhealth\b/i, /\bmedical\b/i, /\bdiagnos(?:is|ed)\b/i, /\bcondition\b/i, /\bdisease\b/i,
|
|
342
|
+
/\bdisab(?:led|ility)\b/i, /\bgenetic\b/i, /\bbiometric\b/i,
|
|
343
|
+
/\btrade union\b/i, /\bcriminal\b/i, /\bconviction\b/i,
|
|
344
|
+
];
|
|
345
|
+
|
|
346
|
+
/** Direct-identifier patterns scrubbed from the phrase before it is slugged. */
|
|
347
|
+
const PII_PATTERNS = [
|
|
348
|
+
/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi, // email
|
|
349
|
+
// eslint-disable-next-line security/detect-unsafe-regex -- linear-time PII redactor: each repetition consumes a mandatory digit, no ambiguous/overlapping quantifier; not ReDoS-exploitable.
|
|
350
|
+
/(?:\+?\d[\s().-]?){7,}\d/g, // phone-ish run of digits
|
|
351
|
+
/\b\d{3}-\d{2}-\d{4}\b/g, // US SSN shape
|
|
352
|
+
// eslint-disable-next-line security/detect-unsafe-regex -- linear-time PII redactor: each repetition consumes a mandatory digit, no ambiguous/overlapping quantifier; not ReDoS-exploitable.
|
|
353
|
+
/\b(?:\d[ -]?){13,19}\b/g, // card-ish long digit run
|
|
354
|
+
];
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* scrubPhrase(content, phrase, context) -> { ok, scrubbed }.
|
|
358
|
+
*
|
|
359
|
+
* Returns ok:false when EITHER the trigger `phrase`, the surrounding `context`,
|
|
360
|
+
* OR the extracted `content` carries a special-category signal (REFUSE — never
|
|
361
|
+
* mint). The deny scan covers the FULL feedback surface (phrase + context +
|
|
362
|
+
* content), so a special-category term anywhere in the message blocks the
|
|
363
|
+
* inference — DEFECT 3 widened the deny scan to the extracted content too, so a
|
|
364
|
+
* term that lives only in the content (and would now be slugged) is still
|
|
365
|
+
* caught. Otherwise returns the EXTRACTED CONTENT with direct-PII identifiers
|
|
366
|
+
* removed (so a slug derived from it can never carry a raw email/phone/SSN/card).
|
|
367
|
+
* Pure.
|
|
368
|
+
*/
|
|
369
|
+
function scrubPhrase(content, phrase, context) {
|
|
370
|
+
const hay = `${String(phrase || '')} ${String(context || '')} ${String(content || '')}`;
|
|
371
|
+
for (const re of SPECIAL_CATEGORY_TERMS) {
|
|
372
|
+
if (re.test(hay)) return { ok: false, scrubbed: '' };
|
|
373
|
+
}
|
|
374
|
+
let scrubbed = String(content || '');
|
|
375
|
+
for (const re of PII_PATTERNS) {
|
|
376
|
+
scrubbed = scrubbed.replace(re, ' ');
|
|
377
|
+
}
|
|
378
|
+
return { ok: true, scrubbed };
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/** Per-kind base confidence — correction strongest, confirmation weakest. */
|
|
382
|
+
const FEEDBACK_KIND_CONFIDENCE = Object.freeze({
|
|
383
|
+
correction: 0.7,
|
|
384
|
+
rule: 0.65,
|
|
385
|
+
preference: 0.55,
|
|
386
|
+
confirmation: 0.4,
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
/** Sensitivity tier per kind (a preference/correction is more revealing than a confirmation). */
|
|
390
|
+
const FEEDBACK_KIND_SENSITIVITY = Object.freeze({
|
|
391
|
+
correction: 'med',
|
|
392
|
+
rule: 'med',
|
|
393
|
+
preference: 'med',
|
|
394
|
+
confirmation: 'low',
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* DEFECT 3 — extract the preference OBJECT from a feedback row.
|
|
399
|
+
*
|
|
400
|
+
* The shipped `src/feedback-detector.js` is a SHARED file consumed by the
|
|
401
|
+
* memory/correction system too, so we do NOT change its wire contract. In that
|
|
402
|
+
* contract `phrase` is the matched TRIGGER TOKEN ONLY ("No,", "always use",
|
|
403
|
+
* "I prefer") and `context` is the fuller surrounding message (the snippet
|
|
404
|
+
* window) — i.e. the actual preference CONTENT lives in `context`, not `phrase`.
|
|
405
|
+
*
|
|
406
|
+
* Slugging `phrase` alone keyed every real correction on the trigger word
|
|
407
|
+
* ("no") — meaningless, and the brief could never surface the real preference
|
|
408
|
+
* ("use tabs not spaces"). This profile-side extractor recovers the content:
|
|
409
|
+
*
|
|
410
|
+
* 1. Pick the richer of {phrase, context} (context is the fuller message in
|
|
411
|
+
* the live contract; phrase wins only when context is empty/shorter — the
|
|
412
|
+
* shape the integration/PII tests author, where content is IN `phrase`).
|
|
413
|
+
* 2. If the trigger token appears within the chosen text, drop everything up
|
|
414
|
+
* to AND INCLUDING it, keeping the trailing preference object (the trigger
|
|
415
|
+
* and any lead-in fluff like "Going forward," are not the preference).
|
|
416
|
+
* 3. Strip leading snippet artifacts (ellipsis/punctuation) and fall back to
|
|
417
|
+
* the trigger only if nothing else remains.
|
|
418
|
+
*
|
|
419
|
+
* Pure + deterministic. The result is fed through the SAME `phraseToSubject`
|
|
420
|
+
* slug + the SAME `scrubPhrase` PII/special-category gate, so CRITICAL-2 holds:
|
|
421
|
+
* the served atom is still slug-only, special-category rows are still refused,
|
|
422
|
+
* and direct PII is still scrubbed before slugging.
|
|
423
|
+
*/
|
|
424
|
+
function preferenceContent(phrase, context) {
|
|
425
|
+
const p = String(phrase || '').trim();
|
|
426
|
+
const c = String(context || '').trim();
|
|
427
|
+
// context is the fuller message in the live detector contract; prefer it when
|
|
428
|
+
// it carries more than the bare trigger. When context is empty/shorter (the
|
|
429
|
+
// phrase-only wire shape the integration/PII tests use), keep `phrase`.
|
|
430
|
+
let base = c.length > p.length ? c : p;
|
|
431
|
+
if (p) {
|
|
432
|
+
const idx = base.toLowerCase().indexOf(p.toLowerCase());
|
|
433
|
+
if (idx >= 0) {
|
|
434
|
+
const after = base.slice(idx + p.length).trim();
|
|
435
|
+
// Adopt the trailing remainder only when it carries real content; if the
|
|
436
|
+
// trigger was the whole/last of the text, keep `base` (don't blank it).
|
|
437
|
+
if (after) base = after;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
// Drop leading snippet artifacts (the detector's '…' lead-in marker, stray
|
|
441
|
+
// separators) so they never pollute the slug.
|
|
442
|
+
base = base.replace(/^[…\s,.!:;-]+/, '').trim();
|
|
443
|
+
return base || p;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Normalize a feedback phrase into a stable, deterministic subject slug so the
|
|
448
|
+
* "same" preference dedupes across sessions/hosts. Lowercased, punctuation
|
|
449
|
+
* stripped, whitespace collapsed, length-capped. Pure.
|
|
450
|
+
*/
|
|
451
|
+
function phraseToSubject(phrase) {
|
|
452
|
+
return String(phrase || '')
|
|
453
|
+
.toLowerCase()
|
|
454
|
+
.replace(/[^a-z0-9\s]+/g, ' ') // drop punctuation/emoji (apostrophes too)
|
|
455
|
+
.replace(/\s+/g, ' ')
|
|
456
|
+
.trim()
|
|
457
|
+
.slice(0, 80);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* derivePreferences(records, ctx?) -> Inference[].
|
|
462
|
+
*
|
|
463
|
+
* @param {Array<{ts,kind,phrase,context}>} records .session-feedback.jsonl rows
|
|
464
|
+
* @param {{sessionId?:string, host?:string}} [ctx] provenance for this session
|
|
465
|
+
*
|
|
466
|
+
* Maps each known-kind, non-empty-phrase record to a preference Inference.
|
|
467
|
+
* Unknown kinds and empty phrases are dropped (high precision, low recall —
|
|
468
|
+
* mirrors the detector's own posture). Deterministic + pure.
|
|
469
|
+
*/
|
|
470
|
+
export function derivePreferences(records = [], ctx = {}) {
|
|
471
|
+
const out = [];
|
|
472
|
+
if (!Array.isArray(records)) return out;
|
|
473
|
+
const sessionId = ctx && typeof ctx.sessionId === 'string' ? ctx.sessionId : null;
|
|
474
|
+
const host = ctx && typeof ctx.host === 'string' ? ctx.host : null;
|
|
475
|
+
|
|
476
|
+
for (const rec of records) {
|
|
477
|
+
if (!rec || typeof rec !== 'object') continue;
|
|
478
|
+
const kind = String(rec.kind || '').toLowerCase();
|
|
479
|
+
const confidence = FEEDBACK_KIND_CONFIDENCE[kind];
|
|
480
|
+
if (confidence === undefined) continue; // unknown kind -> ignore
|
|
481
|
+
|
|
482
|
+
// DEFECT 3 — recover the preference OBJECT. The live detector sets `phrase`
|
|
483
|
+
// to the trigger token only and carries the content in `context`; slugging
|
|
484
|
+
// `phrase` keyed the inference on the trigger ("no") instead of the
|
|
485
|
+
// preference ("use tabs not spaces"). Extract the content FIRST, then slug
|
|
486
|
+
// it. (When `context` is empty/shorter the extractor returns `phrase`, so
|
|
487
|
+
// the phrase-only wire shape the integration/PII tests author is unchanged.)
|
|
488
|
+
const content = preferenceContent(rec.phrase, rec.context);
|
|
489
|
+
|
|
490
|
+
// FIX 4 (CRITICAL-2) — value-level deny/scrub BEFORE minting. A phrase, its
|
|
491
|
+
// context, OR the extracted content carrying a special-category signal is
|
|
492
|
+
// REFUSED outright (never reaches the global store); direct-PII identifiers
|
|
493
|
+
// are scrubbed from the content so even the slug cannot carry a raw
|
|
494
|
+
// email/phone/SSN/card. The deny scan covers the full surface (phrase +
|
|
495
|
+
// context + content) so DEFECT 3's content extraction cannot smuggle a
|
|
496
|
+
// special-category term past the gate.
|
|
497
|
+
const gate = scrubPhrase(content, rec.phrase, rec.context);
|
|
498
|
+
if (!gate.ok) continue; // special-category -> refuse this inference
|
|
499
|
+
|
|
500
|
+
const subject = phraseToSubject(gate.scrubbed);
|
|
501
|
+
if (!subject) continue; // empty/punctuation-only (or fully-scrubbed) phrase -> ignore
|
|
502
|
+
|
|
503
|
+
const ts = typeof rec.ts === 'string' && Date.parse(rec.ts) > 0
|
|
504
|
+
? rec.ts
|
|
505
|
+
: undefined; // fall back to makeInference's epoch sentinel
|
|
506
|
+
|
|
507
|
+
out.push(makeInference({
|
|
508
|
+
kind: 'preference',
|
|
509
|
+
subject,
|
|
510
|
+
// FIX 4 (CRITICAL-2) — SLUG ONLY. We do NOT persist the verbatim user
|
|
511
|
+
// phrase anywhere in the served atom (it was exfiltrable PII into the
|
|
512
|
+
// global store). The slugged `subject` (already lowercased / punctuation-
|
|
513
|
+
// stripped / PII-scrubbed) is the dedupe key a brief renders; the value
|
|
514
|
+
// carries only the structured feedback kind, never raw user text. The
|
|
515
|
+
// single-row poison vector is neutralized by the cross-session
|
|
516
|
+
// corroboration barrier: evidence_count stays 1 here, BELOW the brief
|
|
517
|
+
// surfacing floor (evidence_count >= 3), so one session cannot mint a
|
|
518
|
+
// brief-surfacing preference — only cross-session accumulation can.
|
|
519
|
+
value: { kind },
|
|
520
|
+
confidence,
|
|
521
|
+
evidence_count: 1,
|
|
522
|
+
last_confirmed: ts,
|
|
523
|
+
source_sessions: sessionId ? [sessionId] : [],
|
|
524
|
+
source_hosts: host ? [host] : [],
|
|
525
|
+
sensitivity: FEEDBACK_KIND_SENSITIVITY[kind] || 'low',
|
|
526
|
+
}));
|
|
527
|
+
}
|
|
528
|
+
return out;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// ---------------------------------------------------------------------------
|
|
532
|
+
// S4 — EDIT-DELTA derivation: the CORRECTION LOOP (the HEART of the product).
|
|
533
|
+
//
|
|
534
|
+
// A preference grounded in an ACTUAL edit-delta (diff of agent-proposal vs the
|
|
535
|
+
// user's committed final) is the strongest, cleanest signal — categorically
|
|
536
|
+
// above a regex trigger in a prompt. `derivePreferences` (above) maps the
|
|
537
|
+
// REGEX-detected feedback rows; THIS path maps the EDIT-DELTA evidence rows
|
|
538
|
+
// produced by capture.js `captureEditDelta` (the .session-edits.jsonl stream).
|
|
539
|
+
//
|
|
540
|
+
// An `edit-after` row (the user changed what the agent wrote — outcome
|
|
541
|
+
// 'edit-after', changed:true) becomes a `correction`-kind preference Inference,
|
|
542
|
+
// SUBJECT-slugged on the SCOPE + the cited committed direction so the SAME
|
|
543
|
+
// correction in the same scope dedupes/corroborates across sessions. The value
|
|
544
|
+
// carries the CITATION (the proposed/committed hashes + the bounded cited span)
|
|
545
|
+
// so the admission gate can prove the preference is grounded in a real edit.
|
|
546
|
+
//
|
|
547
|
+
// An `accept` row (the agent's edit landed unchanged) is NOT a preference — it
|
|
548
|
+
// is an authorship OUTCOME that flows into the expertise tier (deriveExpertise:
|
|
549
|
+
// accept = positive, edit-after = negative) via `editOutcomes` below.
|
|
550
|
+
// ---------------------------------------------------------------------------
|
|
551
|
+
|
|
552
|
+
/** Confidence floor for an edit-delta-grounded correction (strongest signal). */
|
|
553
|
+
export const EDIT_CORRECTION_CONFIDENCE = 0.7;
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* editScopeKey(scope) -> a short, stable slug for the scope an edit applies to.
|
|
557
|
+
* Prefers language, falls back to file_pattern. Pure.
|
|
558
|
+
*/
|
|
559
|
+
function editScopeKey(scope) {
|
|
560
|
+
const s = scope && typeof scope === 'object' ? scope : {};
|
|
561
|
+
return phraseToSubject(String(s.language || s.file_pattern || 'unknown'));
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* deriveEditPreferences(editRows, ctx?) -> Inference[].
|
|
566
|
+
*
|
|
567
|
+
* Maps each `edit-after` (changed) edit-delta row to a `correction`-kind
|
|
568
|
+
* preference Inference grounded in the cited edit. `accept` / no-op rows produce
|
|
569
|
+
* no preference (they are expertise signal, handled separately). The subject is
|
|
570
|
+
* `scope::<cited-committed-slug>` so the same correction in the same scope is
|
|
571
|
+
* ONE atom that corroborates across sessions; below the admission floor
|
|
572
|
+
* (evidence_count >= 3 across non-adjacent sessions, enforced in merge.js) it
|
|
573
|
+
* stays UNCONFIRMED — a single accidental edit cannot mint a confirmed
|
|
574
|
+
* preference (prefer under-learning to mis-learning). Pure + deterministic.
|
|
575
|
+
*/
|
|
576
|
+
export function deriveEditPreferences(editRows = [], ctx = {}) {
|
|
577
|
+
const out = [];
|
|
578
|
+
if (!Array.isArray(editRows)) return out;
|
|
579
|
+
const sessionId = ctx && typeof ctx.sessionId === 'string' ? ctx.sessionId : null;
|
|
580
|
+
const host = ctx && typeof ctx.host === 'string' ? ctx.host : null;
|
|
581
|
+
const ordinal = ctx && Number.isFinite(Number(ctx.sessionOrdinal)) ? Number(ctx.sessionOrdinal) : undefined;
|
|
582
|
+
|
|
583
|
+
for (const row of editRows) {
|
|
584
|
+
if (!row || typeof row !== 'object') continue;
|
|
585
|
+
const outcome = String(row.outcome || '').toLowerCase();
|
|
586
|
+
if (outcome !== 'edit-after' || row.changed === false) continue; // accepts/no-ops -> not a preference
|
|
587
|
+
|
|
588
|
+
// The cited committed span IS the citation; scrub-and-slug it (the capture
|
|
589
|
+
// layer already PII-scrubbed it, but we re-scan the full surface so a
|
|
590
|
+
// special-category term in the span blocks the inference — never mint).
|
|
591
|
+
const cited = String(row.cited_span || row.direction || '');
|
|
592
|
+
const gate = scrubPhrase(cited, '', '');
|
|
593
|
+
if (!gate.ok) continue; // special-category -> refuse this inference
|
|
594
|
+
const citedSlug = phraseToSubject(gate.scrubbed);
|
|
595
|
+
if (!citedSlug) continue; // empty / fully-scrubbed cited span -> no usable citation
|
|
596
|
+
|
|
597
|
+
const scopeKey = editScopeKey(row.scope);
|
|
598
|
+
const subject = `${scopeKey}::${citedSlug}`.slice(0, 80);
|
|
599
|
+
|
|
600
|
+
const ts = (typeof row.ts === 'string' && Date.parse(row.ts) > 0)
|
|
601
|
+
? row.ts
|
|
602
|
+
: (Number.isFinite(Number(row.ts)) && Number(row.ts) > 0
|
|
603
|
+
? new Date(Number(row.ts)).toISOString()
|
|
604
|
+
: undefined);
|
|
605
|
+
|
|
606
|
+
out.push(makeInference({
|
|
607
|
+
kind: 'correction',
|
|
608
|
+
subject,
|
|
609
|
+
// VALUE carries the CITATION — the diff IS the citation. We persist the
|
|
610
|
+
// proposed/committed content HASHES (dedupe keys, never raw bodies) + the
|
|
611
|
+
// scope, so a brief can prove the preference is grounded in a real edit.
|
|
612
|
+
// No raw user text leaves capture's bounded/scrubbed cited span.
|
|
613
|
+
value: {
|
|
614
|
+
kind: 'correction',
|
|
615
|
+
scope: row.scope && typeof row.scope === 'object' ? row.scope : undefined,
|
|
616
|
+
cited: { proposed_hash: row.proposed_hash, committed_hash: row.committed_hash },
|
|
617
|
+
},
|
|
618
|
+
confidence: EDIT_CORRECTION_CONFIDENCE,
|
|
619
|
+
evidence_count: 1,
|
|
620
|
+
last_confirmed: ts,
|
|
621
|
+
source_sessions: sessionId ? [sessionId] : [],
|
|
622
|
+
source_hosts: host ? [host] : [],
|
|
623
|
+
sensitivity: 'med',
|
|
624
|
+
}));
|
|
625
|
+
// Thread the session ordinal so the merge's non-adjacency admission gate can
|
|
626
|
+
// tell apart "3 corroborations from 3 spread-out sessions" (confirm) from
|
|
627
|
+
// "3 edits in one session / back-to-back sessions" (do NOT confirm).
|
|
628
|
+
if (ordinal !== undefined) out[out.length - 1].source_ordinals = [ordinal];
|
|
629
|
+
}
|
|
630
|
+
return out;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* editOutcomes(editRows) -> [{ domain, outcome }] for deriveExpertise.
|
|
635
|
+
*
|
|
636
|
+
* Routes the edit-delta stream into the authorship-outcome vocabulary the
|
|
637
|
+
* (previously DORMANT) `deriveExpertise` emitter consumes: an `accept` row is a
|
|
638
|
+
* positive (the agent's edit landed unchanged), an `edit-after` row is a
|
|
639
|
+
* negative (the user corrected it). The domain is the edit's language scope, so
|
|
640
|
+
* expertise accrues per-language. Pure.
|
|
641
|
+
*/
|
|
642
|
+
export function editOutcomes(editRows = []) {
|
|
643
|
+
const out = [];
|
|
644
|
+
if (!Array.isArray(editRows)) return out;
|
|
645
|
+
for (const row of editRows) {
|
|
646
|
+
if (!row || typeof row !== 'object') continue;
|
|
647
|
+
const outcome = String(row.outcome || '').toLowerCase();
|
|
648
|
+
if (outcome !== 'accept' && outcome !== 'edit-after') continue;
|
|
649
|
+
const scope = row.scope && typeof row.scope === 'object' ? row.scope : {};
|
|
650
|
+
const domain = String(scope.language || scope.file_pattern || '').trim();
|
|
651
|
+
if (!domain) continue;
|
|
652
|
+
out.push({ domain, outcome });
|
|
653
|
+
}
|
|
654
|
+
return out;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// ---------------------------------------------------------------------------
|
|
658
|
+
// P2.4 — Top-level: emit a single ProfileDelta.
|
|
659
|
+
//
|
|
660
|
+
// Composes the three heuristic signals into ONE delta the merge layer folds.
|
|
661
|
+
// No store writes (this module never imports ./store.js); persistence is
|
|
662
|
+
// `mergeAndWrite`'s job. PURE + DETERMINISTIC: the provenance recency stamp is
|
|
663
|
+
// derived from CONTENT (the max feedback timestamp), NOT a wall clock — so two
|
|
664
|
+
// calls with the same input produce a deep-equal delta and the merge's
|
|
665
|
+
// commutative `updated` invariant is preserved.
|
|
666
|
+
// ---------------------------------------------------------------------------
|
|
667
|
+
|
|
668
|
+
/**
|
|
669
|
+
* deriveHeuristic(input) -> ProfileDelta.
|
|
670
|
+
*
|
|
671
|
+
* @param {object} input
|
|
672
|
+
* @param {object} [input.metadata] per-session style metadata (see deriveStyle)
|
|
673
|
+
* @param {Array} [input.outcomes] authorship outcomes (see deriveExpertise)
|
|
674
|
+
* @param {Array} [input.feedback] .session-feedback.jsonl rows (see derivePreferences)
|
|
675
|
+
* @param {Array} [input.edits] .session-edits.jsonl edit-delta rows (S4 — see
|
|
676
|
+
* deriveEditPreferences / editOutcomes). The
|
|
677
|
+
* CORRECTION LOOP: a preference grounded in an
|
|
678
|
+
* actual edit-delta, NOT a regex trigger.
|
|
679
|
+
* @param {string} [input.sessionId] provenance for derived inferences
|
|
680
|
+
* @param {string} [input.host] provenance for derived inferences
|
|
681
|
+
* @param {number} [input.sessionOrdinal] monotonic session index (S4 non-adjacency gate)
|
|
682
|
+
*
|
|
683
|
+
* Returns `{ style, expertise, inferences, provenance }` — every field shaped
|
|
684
|
+
* exactly as `applyDelta` reads it. Omits a field when its source signal is
|
|
685
|
+
* absent so an empty input yields a clean no-op delta.
|
|
686
|
+
*/
|
|
687
|
+
export function deriveHeuristic(input = {}) {
|
|
688
|
+
const src = input && typeof input === 'object' ? input : {};
|
|
689
|
+
const ctx = { sessionId: src.sessionId, host: src.host, sessionOrdinal: src.sessionOrdinal };
|
|
690
|
+
|
|
691
|
+
const delta = {};
|
|
692
|
+
const edits = Array.isArray(src.edits) ? src.edits : [];
|
|
693
|
+
|
|
694
|
+
// Style — only emit when metadata was supplied (else deriveStyle would
|
|
695
|
+
// anchor every axis at the low-weight neutral prior, adding noise sessions).
|
|
696
|
+
if (src.metadata && typeof src.metadata === 'object') {
|
|
697
|
+
delta.style = deriveStyle(src.metadata);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// Expertise — raw {domain:{accepts,n}} counts; omit when empty. The DORMANT
|
|
701
|
+
// accept/edit-after emitter is now fed by BOTH any explicit outcomes AND the
|
|
702
|
+
// S4 edit-delta stream (editOutcomes: accept=positive, edit-after=negative).
|
|
703
|
+
const outcomes = [
|
|
704
|
+
...(Array.isArray(src.outcomes) ? src.outcomes : []),
|
|
705
|
+
...editOutcomes(edits),
|
|
706
|
+
];
|
|
707
|
+
const expertise = deriveExpertise(outcomes);
|
|
708
|
+
if (Object.keys(expertise).length) delta.expertise = expertise;
|
|
709
|
+
|
|
710
|
+
// Preferences -> inferences; omit when empty. TWO grounded sources, deduped by
|
|
711
|
+
// the merge on id (kind+subject): (a) the S4 EDIT-DELTA corrections (the HEART
|
|
712
|
+
// — grounded in an actual diff), and (b) the regex-detected feedback rows.
|
|
713
|
+
const inferences = [
|
|
714
|
+
...deriveEditPreferences(edits, ctx),
|
|
715
|
+
...derivePreferences(Array.isArray(src.feedback) ? src.feedback : [], ctx),
|
|
716
|
+
];
|
|
717
|
+
if (inferences.length) delta.inferences = inferences;
|
|
718
|
+
|
|
719
|
+
// Provenance: a CONTENT-derived recency stamp (max inference last_confirmed),
|
|
720
|
+
// never a wall clock — keeps deriveHeuristic deterministic and the merge's
|
|
721
|
+
// `updated` MAX commutative. Omit entirely when there is no dated signal so
|
|
722
|
+
// the merge falls back to its own recency logic.
|
|
723
|
+
let maxTs = -Infinity;
|
|
724
|
+
for (const inf of inferences) {
|
|
725
|
+
const t = Date.parse(inf.last_confirmed);
|
|
726
|
+
if (Number.isFinite(t) && t > maxTs) maxTs = t;
|
|
727
|
+
}
|
|
728
|
+
if (Number.isFinite(maxTs) && maxTs > 0) {
|
|
729
|
+
delta.provenance = { updated: new Date(maxTs).toISOString() };
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
return delta;
|
|
733
|
+
}
|