@ijfw/memory-server 1.5.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw-dashboard +20 -1
- package/package.json +4 -3
- package/src/audit-roster.js +89 -12
- package/src/brain/tiered-llm.js +57 -7
- package/src/cross-orchestrator-cli.js +344 -4
- package/src/cross-project-search.js +39 -1
- package/src/dashboard-server.js +7 -1
- package/src/dream/runner.mjs +560 -8
- package/src/handlers/brain-handler.js +101 -1
- package/src/importers/discover.js +1 -1
- package/src/memory/bench-metrics.js +289 -0
- package/src/memory/benchmark.js +1 -1
- package/src/memory/search.js +53 -1
- package/src/orchestrator/plan-checker.js +1 -1
- package/src/profile/audit.js +671 -0
- package/src/profile/capture.js +871 -0
- package/src/profile/derive-dialectic.js +242 -0
- package/src/profile/derive-heuristic.js +733 -0
- package/src/profile/derive.js +156 -0
- package/src/profile/egress.js +306 -0
- package/src/profile/eval/build-real-probes.mjs +197 -0
- package/src/profile/eval/corpus-from-reddit.mjs +166 -0
- package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
- package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
- package/src/profile/eval/gate-b-behavior.mjs +420 -0
- package/src/profile/eval/gate-b-decision-run.mjs +171 -0
- package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
- package/src/profile/eval/gate-b-run.mjs +417 -0
- package/src/profile/eval/gate-b-run.test.mjs +204 -0
- package/src/profile/eval/gate-c-capture.mjs +323 -0
- package/src/profile/eval/harness.mjs +551 -0
- package/src/profile/eval/instrument-validation.mjs +248 -0
- package/src/profile/eval/instrument-validation.test.mjs +125 -0
- package/src/profile/eval/multi-subject-harness.mjs +106 -0
- package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
- package/src/profile/eval/personas.test.mjs +83 -0
- package/src/profile/eval/plumbing.test.mjs +69 -0
- package/src/profile/eval/prereg.mjs +130 -0
- package/src/profile/eval/prereg.test.mjs +78 -0
- package/src/profile/eval/real-corpus.test.mjs +103 -0
- package/src/profile/eval/real-personas.mjs +109 -0
- package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
- package/src/profile/eval/run-real-corpus.mjs +358 -0
- package/src/profile/eval/slug-quality.mjs +464 -0
- package/src/profile/eval/stylometry-features.js +85 -0
- package/src/profile/eval/stylometry-reference.js +16 -0
- package/src/profile/eval/stylometry.js +224 -0
- package/src/profile/eval/stylometry.test.mjs +103 -0
- package/src/profile/eval/synthetic-personas.js +91 -0
- package/src/profile/eval/verifier-features.mjs +170 -0
- package/src/profile/eval/verifier-logreg.mjs +74 -0
- package/src/profile/eval/verifier-pair.mjs +122 -0
- package/src/profile/eval/verifier-reference.mjs +68 -0
- package/src/profile/eval/verifier-scorer.mjs +30 -0
- package/src/profile/eval/wrong-target-control.mjs +168 -0
- package/src/profile/eval/wrong-target-control.test.mjs +124 -0
- package/src/profile/exemplar-capture.js +232 -0
- package/src/profile/exemplar-retrieve.js +138 -0
- package/src/profile/exemplar-store.js +314 -0
- package/src/profile/lock.js +64 -0
- package/src/profile/merge.js +624 -0
- package/src/profile/path-policy.js +213 -0
- package/src/profile/precision-stamp.mjs +151 -0
- package/src/profile/render-brief.js +717 -0
- package/src/profile/schema.js +244 -0
- package/src/profile/sensitivity.js +249 -0
- package/src/profile/serve.js +345 -0
- package/src/profile/store.js +261 -0
- package/src/profile/telemetry.js +289 -0
- package/src/recovery/checkpoint.js +7 -1
- package/src/server.js +185 -14
- package/src/.registry-meta-key.pem +0 -3
|
@@ -0,0 +1,871 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* profile/capture.js — Cross-system profile bus, PHASE P1 (the INPUT side).
|
|
3
|
+
*
|
|
4
|
+
* Turns REAL sessions into the per-session METADATA the P2 heuristic derivation
|
|
5
|
+
* (./derive-heuristic.js `deriveStyle`) consumes. This is capture-hardening +
|
|
6
|
+
* anti-poison + anti-drift: the gate that decides what evidence is allowed to
|
|
7
|
+
* reach the one global profile, and in what shape.
|
|
8
|
+
*
|
|
9
|
+
* ── DATA MINIMIZATION (the load-bearing invariant) ──────────────────────────
|
|
10
|
+
* METADATA ONLY. We NEVER persist raw transcript text. `extractMessageMetadata`
|
|
11
|
+
* reduces a message to COUNTS (chars / emoji / code-block presence / formality
|
|
12
|
+
* marker hits) and discards the string immediately. The per-session accumulator
|
|
13
|
+
* and the wire record carry only numbers + booleans + an opaque salted identity.
|
|
14
|
+
* Storing message content anywhere is a P1 failure (see profile-capture.test).
|
|
15
|
+
*
|
|
16
|
+
* ── WIRING CONTRACT (other agents read this — match exactly) ────────────────
|
|
17
|
+
* OUTPUT FILE: <REPO_ROOT>/.ijfw/.session-style.jsonl — one JSON line per
|
|
18
|
+
* session, appended at SessionEnd:
|
|
19
|
+
* { ts, session_id, host, avg_msg_chars, emoji_rate, code_block_ratio,
|
|
20
|
+
* formality_markers, turn_cadence_s, msg_count,
|
|
21
|
+
* profile_influenced, global_eligible, quarantine_reason, identity,
|
|
22
|
+
* trust_weight, delta_cap }
|
|
23
|
+
*
|
|
24
|
+
* The five style fields line up with what `deriveStyle` consumes; `toDeriveMeta`
|
|
25
|
+
* renames the two that differ in unit (emoji_rate -> emoji_per_msg,
|
|
26
|
+
* turn_cadence_s -> turn_cadence_per_min) so the P2 input shape is exact.
|
|
27
|
+
* `formality_markers` and `code_block_ratio` are already in [0,1] as deriveStyle
|
|
28
|
+
* expects, so they pass through unchanged.
|
|
29
|
+
*
|
|
30
|
+
* ── LIFECYCLE WIRING ────────────────────────────────────────────────────────
|
|
31
|
+
* - Per message (UserPromptSubmit hook): `captureMessage({sessionId,text,...})`
|
|
32
|
+
* extracts metadata and folds it into the accumulator state file
|
|
33
|
+
* `.ijfw/.session-style-acc.json` (counts only). The hook ALREADY imports a
|
|
34
|
+
* module from mcp-server/src and ALREADY has the {session_id,prompt} payload
|
|
35
|
+
* — capture rides that same call site (see pre-prompt.sh wiring note in the
|
|
36
|
+
* P1 report). `profileInjected:true` is passed when a profile brief was
|
|
37
|
+
* injected into the turn, so the session can be excluded from re-derivation.
|
|
38
|
+
* - Session end (Stop hook): `flushSession({sessionId,...})` reads the
|
|
39
|
+
* accumulator, applies the hardening gates, appends ONE contract record, and
|
|
40
|
+
* clears the accumulator.
|
|
41
|
+
*
|
|
42
|
+
* Zero deps, Node ESM, no network, no child_process, no LLM.
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
import {
|
|
46
|
+
readFileSync,
|
|
47
|
+
writeFileSync,
|
|
48
|
+
appendFileSync,
|
|
49
|
+
existsSync,
|
|
50
|
+
mkdirSync,
|
|
51
|
+
unlinkSync,
|
|
52
|
+
} from 'node:fs';
|
|
53
|
+
import { join } from 'node:path';
|
|
54
|
+
import { hostname } from 'node:os';
|
|
55
|
+
import { createHash } from 'node:crypto';
|
|
56
|
+
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
// Paths. All capture state is PROJECT-LOCAL under <repo>/.ijfw/ — unlike the
|
|
59
|
+
// global profile (homedir-rooted), the per-session capture stream is scoped to
|
|
60
|
+
// the project it was observed in. The dream/derive stage decides what, if
|
|
61
|
+
// anything, is promoted to the global profile (and the gates below decide what
|
|
62
|
+
// is even ELIGIBLE for that promotion).
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
const IJFW_DIR = '.ijfw';
|
|
66
|
+
const STYLE_FILE = '.session-style.jsonl';
|
|
67
|
+
const ACC_FILE = '.session-style-acc.json';
|
|
68
|
+
|
|
69
|
+
function ijfwDir(cwd) {
|
|
70
|
+
return join(cwd || process.cwd(), IJFW_DIR);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** The append-only per-session style stream (the WIRING CONTRACT output). */
|
|
74
|
+
export function styleFilePath(cwd) {
|
|
75
|
+
return join(ijfwDir(cwd), STYLE_FILE);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** The in-flight per-session accumulator (counts only; cleared on flush). */
|
|
79
|
+
export function accumulatorPath(cwd) {
|
|
80
|
+
return join(ijfwDir(cwd), ACC_FILE);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function ensureDir(dir) {
|
|
84
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
// P1.5 — PII / special-category deny-gate.
|
|
89
|
+
//
|
|
90
|
+
// We store interaction STYLE, never personal attributes. The deny-gate is a
|
|
91
|
+
// REAL check applied BEFORE persist: if any key in a candidate record names a
|
|
92
|
+
// special-category attribute (GDPR Art. 9 categories + obvious direct PII), the
|
|
93
|
+
// record is refused outright rather than redacted — we never want this class of
|
|
94
|
+
// data in the profile pipeline at all.
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
/** Special-category / direct-PII attribute keys that must never be persisted. */
|
|
98
|
+
export const SPECIAL_CATEGORY_KEYS = Object.freeze([
|
|
99
|
+
'race',
|
|
100
|
+
'ethnicity',
|
|
101
|
+
'religion',
|
|
102
|
+
'religious_belief',
|
|
103
|
+
'political_opinion',
|
|
104
|
+
'political_affiliation',
|
|
105
|
+
'sexual_orientation',
|
|
106
|
+
'sex_life',
|
|
107
|
+
'health',
|
|
108
|
+
'health_condition',
|
|
109
|
+
'medical',
|
|
110
|
+
'disability',
|
|
111
|
+
'genetic',
|
|
112
|
+
'biometric',
|
|
113
|
+
'trade_union',
|
|
114
|
+
'criminal_record',
|
|
115
|
+
// direct identifiers that are not interaction style
|
|
116
|
+
'ssn',
|
|
117
|
+
'national_id',
|
|
118
|
+
'passport',
|
|
119
|
+
'credit_card',
|
|
120
|
+
'email',
|
|
121
|
+
'phone',
|
|
122
|
+
'address',
|
|
123
|
+
'full_name',
|
|
124
|
+
'date_of_birth',
|
|
125
|
+
'dob',
|
|
126
|
+
]);
|
|
127
|
+
|
|
128
|
+
const SPECIAL_CATEGORY_SET = new Set(SPECIAL_CATEGORY_KEYS);
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* assertNoSpecialCategory(attrs) -> { ok, refused? }.
|
|
132
|
+
*
|
|
133
|
+
* Refuses if ANY key is a special-category / direct-PII attribute. Interaction-
|
|
134
|
+
* style metadata (avg_msg_chars, emoji_rate, formality_markers, …) is allowed —
|
|
135
|
+
* that is exactly what we DO store. Case-insensitive on the key name.
|
|
136
|
+
*/
|
|
137
|
+
export function assertNoSpecialCategory(attrs) {
|
|
138
|
+
if (!attrs || typeof attrs !== 'object') return { ok: true };
|
|
139
|
+
for (const key of Object.keys(attrs)) {
|
|
140
|
+
if (SPECIAL_CATEGORY_SET.has(String(key).toLowerCase())) {
|
|
141
|
+
return { ok: false, refused: key };
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return { ok: true };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// P1.6 — identity partitioning + shared-machine stance.
|
|
149
|
+
//
|
|
150
|
+
// The global profile contribution is bound to the OS user (+ a stable salt) so
|
|
151
|
+
// two OS users on one machine never blend into one profile. On an AMBIGUOUS
|
|
152
|
+
// shared machine (no resolvable user, or an explicit shared-machine signal) we
|
|
153
|
+
// REFUSE the global contribution — session-local capture still proceeds (we do
|
|
154
|
+
// not throw away the data), but `global_eligible` is false so the dream/derive
|
|
155
|
+
// stage will not promote it cross-machine.
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
/** Per-install identity salt path — stable, homedir-independent of project. */
|
|
159
|
+
const IDENTITY_SALT_ENV = 'IJFW_IDENTITY_SALT';
|
|
160
|
+
const DEFAULT_SALT = 'ijfw-profile-identity-v1';
|
|
161
|
+
|
|
162
|
+
function resolveOsUser(env) {
|
|
163
|
+
const e = env || {};
|
|
164
|
+
const u = e.USER || e.USERNAME || e.LOGNAME || '';
|
|
165
|
+
return String(u || '').trim();
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* computeIdentity({env}) -> { identity, user, ambiguous }.
|
|
170
|
+
*
|
|
171
|
+
* `identity` is a SALTED hash of the OS user — never the raw username — so two
|
|
172
|
+
* users get two distinct, opaque identities. `ambiguous` is true when no OS user
|
|
173
|
+
* is resolvable OR a shared-machine signal is present; an ambiguous identity is
|
|
174
|
+
* not eligible to contribute to the cross-machine global profile.
|
|
175
|
+
*/
|
|
176
|
+
export function computeIdentity({ env } = {}) {
|
|
177
|
+
const e = env || {};
|
|
178
|
+
const user = resolveOsUser(e);
|
|
179
|
+
const sharedSignal = e.IJFW_SHARED_MACHINE === '1' || e.IJFW_SHARED_MACHINE === 'true';
|
|
180
|
+
const salt = (e[IDENTITY_SALT_ENV] && String(e[IDENTITY_SALT_ENV]).trim()) || DEFAULT_SALT;
|
|
181
|
+
const ambiguous = !user || sharedSignal;
|
|
182
|
+
// Bind to user + machine so the same username on two machines is still two
|
|
183
|
+
// identities (a shared username like "ubuntu" on CI boxes must not collide).
|
|
184
|
+
const basis = `${salt}::${user || 'UNKNOWN'}::${hostname()}`;
|
|
185
|
+
const identity = createHash('sha256').update(basis).digest('hex').slice(0, 24);
|
|
186
|
+
return { identity, user: user || null, ambiguous };
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
// P1.3 — context quarantine (anti-poison).
|
|
191
|
+
//
|
|
192
|
+
// CI / shared-runner / multi-tenant contexts must not poison the GLOBAL profile:
|
|
193
|
+
// a build agent's terse machine output is not the user's authorship style. We
|
|
194
|
+
// detect the common CI env vars + shared/multi-tenant path shapes and mark the
|
|
195
|
+
// session global-ineligible (session-local capture still records, for project
|
|
196
|
+
// scope, but it never reaches the cross-machine profile).
|
|
197
|
+
// ---------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
/** CI / automation environment variables (presence of any => CI context). */
|
|
200
|
+
const CI_ENV_VARS = Object.freeze([
|
|
201
|
+
'CI',
|
|
202
|
+
'CONTINUOUS_INTEGRATION',
|
|
203
|
+
'GITHUB_ACTIONS',
|
|
204
|
+
'GITLAB_CI',
|
|
205
|
+
'BUILDKITE',
|
|
206
|
+
'CIRCLECI',
|
|
207
|
+
'TRAVIS',
|
|
208
|
+
'JENKINS_URL',
|
|
209
|
+
'TEAMCITY_VERSION',
|
|
210
|
+
'TF_BUILD',
|
|
211
|
+
'BITBUCKET_BUILD_NUMBER',
|
|
212
|
+
'DRONE',
|
|
213
|
+
'APPVEYOR',
|
|
214
|
+
'CODEBUILD_BUILD_ID',
|
|
215
|
+
]);
|
|
216
|
+
|
|
217
|
+
/** Path fragments that signal a shared runner / multi-tenant workspace. */
|
|
218
|
+
const SHARED_PATH_PATTERNS = [
|
|
219
|
+
/\/runner\/work\//i, // GitHub Actions runner
|
|
220
|
+
/\/var\/lib\/jenkins\//i, // Jenkins
|
|
221
|
+
/\/jenkins\/workspace\//i,
|
|
222
|
+
/\/home\/circleci\//i,
|
|
223
|
+
/\/builds\//i, // GitLab CI default
|
|
224
|
+
/\/buildkite\//i,
|
|
225
|
+
/\/codebuild\//i,
|
|
226
|
+
];
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* detectQuarantine({env, cwd}) -> { quarantined, reason }.
|
|
230
|
+
*
|
|
231
|
+
* Pure. `reason` is a short stable string (e.g. "ci:GITHUB_ACTIONS",
|
|
232
|
+
* "shared-path") suitable for the wire record. A clean local dev path with no CI
|
|
233
|
+
* vars is NOT quarantined.
|
|
234
|
+
*/
|
|
235
|
+
export function detectQuarantine({ env, cwd } = {}) {
|
|
236
|
+
const e = env || {};
|
|
237
|
+
for (const v of CI_ENV_VARS) {
|
|
238
|
+
const val = e[v];
|
|
239
|
+
if (val !== undefined && val !== null && String(val) !== '' && String(val) !== '0' && String(val).toLowerCase() !== 'false') {
|
|
240
|
+
return { quarantined: true, reason: `ci:${v}` };
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
const p = String(cwd || '');
|
|
244
|
+
for (const re of SHARED_PATH_PATTERNS) {
|
|
245
|
+
if (re.test(p)) return { quarantined: true, reason: 'shared-path' };
|
|
246
|
+
}
|
|
247
|
+
return { quarantined: false, reason: null };
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
// P1.4 — per-host trust weighting + single-session influence cap + asymmetric
|
|
252
|
+
// decay.
|
|
253
|
+
//
|
|
254
|
+
// trustWeightForHost: first-party hosts (where we control the capture quality)
|
|
255
|
+
// are trusted more than unknown ones. Bounded (0,1].
|
|
256
|
+
//
|
|
257
|
+
// cappedDelta: a SINGLE session can never move an axis past STYLE_DELTA_CAP from
|
|
258
|
+
// its prior, no matter how extreme the sample or how high the weight/trust. This
|
|
259
|
+
// is the structural anti-drift guarantee the merge relies on.
|
|
260
|
+
//
|
|
261
|
+
// asymmetricStep: a CONTRADICTING signal (opposite side of the 0.5 midpoint from
|
|
262
|
+
// the current belief) adapts FASTER than a confirming one — so a real change in
|
|
263
|
+
// the user's behavior is reflected within a few sessions, while noise that
|
|
264
|
+
// merely re-confirms the current belief barely moves it. This is the classic
|
|
265
|
+
// "trust slowly, distrust quickly" asymmetry that keeps a stale profile from
|
|
266
|
+
// lagging a genuine shift.
|
|
267
|
+
// ---------------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
/** Max distance a single session may move a style axis from its prior. */
|
|
270
|
+
export const STYLE_DELTA_CAP = 0.25;
|
|
271
|
+
|
|
272
|
+
/** Confirming-signal learning rate (slow). */
|
|
273
|
+
export const CONFIRM_ALPHA = 0.15;
|
|
274
|
+
/** Contradicting-signal learning rate (fast — distrust quickly). */
|
|
275
|
+
export const CONTRADICT_ALPHA = 0.35;
|
|
276
|
+
|
|
277
|
+
/** Known first-party hosts and their trust weights. Unknown -> conservative. */
|
|
278
|
+
const HOST_TRUST = Object.freeze({
|
|
279
|
+
'claude-code': 1.0,
|
|
280
|
+
'claude': 1.0,
|
|
281
|
+
'codex': 0.9,
|
|
282
|
+
'gemini': 0.9,
|
|
283
|
+
'cursor': 0.85,
|
|
284
|
+
'windsurf': 0.85,
|
|
285
|
+
'copilot': 0.8,
|
|
286
|
+
'hermes': 0.8,
|
|
287
|
+
'wayland': 0.8,
|
|
288
|
+
'aider': 0.75,
|
|
289
|
+
});
|
|
290
|
+
const UNKNOWN_HOST_TRUST = 0.5;
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* trustWeightForHost(host) -> (0,1]. Deterministic. Known first-party hosts are
|
|
294
|
+
* trusted at least as much as an unknown host.
|
|
295
|
+
*/
|
|
296
|
+
export function trustWeightForHost(host) {
|
|
297
|
+
const key = String(host || '').toLowerCase().trim();
|
|
298
|
+
if (Object.prototype.hasOwnProperty.call(HOST_TRUST, key)) return HOST_TRUST[key];
|
|
299
|
+
return UNKNOWN_HOST_TRUST;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function clamp01(x) {
|
|
303
|
+
const n = Number(x);
|
|
304
|
+
if (!Number.isFinite(n)) return 0;
|
|
305
|
+
if (n < 0) return 0;
|
|
306
|
+
if (n > 1) return 1;
|
|
307
|
+
return n;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* cappedDelta(prior, sample, {weight, trust}) -> new value in [0,1].
|
|
312
|
+
*
|
|
313
|
+
* Moves `prior` toward `sample` by a weight*trust-scaled EMA step, then HARD
|
|
314
|
+
* clamps the net move to ±STYLE_DELTA_CAP. The clamp is the guarantee: one
|
|
315
|
+
* session cannot move an axis past the cap regardless of weight/trust/extremity.
|
|
316
|
+
*/
|
|
317
|
+
export function cappedDelta(prior, sample, { weight = 1, trust = 1 } = {}) {
|
|
318
|
+
const p = clamp01(prior);
|
|
319
|
+
const s = clamp01(sample);
|
|
320
|
+
const w = clamp01(weight) * clamp01(trust);
|
|
321
|
+
const step = (s - p) * CONFIRM_ALPHA * w;
|
|
322
|
+
const capped = Math.max(-STYLE_DELTA_CAP, Math.min(STYLE_DELTA_CAP, step));
|
|
323
|
+
return clamp01(p + capped);
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* asymmetricStep(current, sample) -> new value in [0,1].
|
|
328
|
+
*
|
|
329
|
+
* Contradiction (sample on the opposite side of the 0.5 midpoint from current)
|
|
330
|
+
* uses CONTRADICT_ALPHA (fast); confirmation uses CONFIRM_ALPHA (slow). The move
|
|
331
|
+
* is still bounded by STYLE_DELTA_CAP per step so a single contradicting session
|
|
332
|
+
* cannot overshoot.
|
|
333
|
+
*/
|
|
334
|
+
export function asymmetricStep(current, sample) {
|
|
335
|
+
const c = clamp01(current);
|
|
336
|
+
const s = clamp01(sample);
|
|
337
|
+
const currentSide = c >= 0.5 ? 1 : -1;
|
|
338
|
+
const sampleSide = s >= 0.5 ? 1 : -1;
|
|
339
|
+
const contradicting = sampleSide !== currentSide;
|
|
340
|
+
const alpha = contradicting ? CONTRADICT_ALPHA : CONFIRM_ALPHA;
|
|
341
|
+
const step = (s - c) * alpha;
|
|
342
|
+
const capped = Math.max(-STYLE_DELTA_CAP, Math.min(STYLE_DELTA_CAP, step));
|
|
343
|
+
return clamp01(c + capped);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// ---------------------------------------------------------------------------
|
|
347
|
+
// P1.1 — metadata-only message extraction.
|
|
348
|
+
//
|
|
349
|
+
// Reduce ONE message to counts and throw the text away. Emoji counting uses a
|
|
350
|
+
// Unicode property escape; formality markers are a small deterministic lexicon
|
|
351
|
+
// of polite/formal cues (mirrors feedback-detector's high-precision posture);
|
|
352
|
+
// code-block presence is a fenced-block check. NOTHING here returns the string.
|
|
353
|
+
// ---------------------------------------------------------------------------
|
|
354
|
+
|
|
355
|
+
// Emoji: pictographic + emoji-component ranges. Property escapes are supported
|
|
356
|
+
// in modern Node; the `u` flag is required. We count matches, never store them.
|
|
357
|
+
const EMOJI_RE = /(\p{Extended_Pictographic})/gu;
|
|
358
|
+
const CODE_FENCE_RE = /```|~~~|(?:^|\n) {4,}\S/; // fenced block or 4-space indent code
|
|
359
|
+
const FORMALITY_MARKERS = [
|
|
360
|
+
/\bplease\b/i,
|
|
361
|
+
/\bthank you\b/i,
|
|
362
|
+
/\bthanks\b/i,
|
|
363
|
+
/\bkindly\b/i,
|
|
364
|
+
/\bcould you\b/i,
|
|
365
|
+
/\bwould you\b/i,
|
|
366
|
+
/\bI would appreciate\b/i,
|
|
367
|
+
/\bregards\b/i,
|
|
368
|
+
/\bsincerely\b/i,
|
|
369
|
+
/\bcertainly\b/i,
|
|
370
|
+
/\bfurthermore\b/i,
|
|
371
|
+
/\bhowever\b/i,
|
|
372
|
+
];
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* extractMessageMetadata(text) -> { chars, emojis, hasCode, formalityHits, words }.
|
|
376
|
+
*
|
|
377
|
+
* METADATA ONLY. Returns counts; never returns or embeds the message text.
|
|
378
|
+
*/
|
|
379
|
+
export function extractMessageMetadata(text) {
|
|
380
|
+
const s = typeof text === 'string' ? text : '';
|
|
381
|
+
const chars = s.length;
|
|
382
|
+
const words = s.trim() ? s.trim().split(/\s+/).length : 0;
|
|
383
|
+
let emojis = 0;
|
|
384
|
+
if (s) {
|
|
385
|
+
const m = s.match(EMOJI_RE);
|
|
386
|
+
emojis = m ? m.length : 0;
|
|
387
|
+
}
|
|
388
|
+
const hasCode = CODE_FENCE_RE.test(s);
|
|
389
|
+
let formalityHits = 0;
|
|
390
|
+
for (const re of FORMALITY_MARKERS) {
|
|
391
|
+
if (re.test(s)) formalityHits += 1;
|
|
392
|
+
}
|
|
393
|
+
return { chars, emojis, hasCode, formalityHits, words };
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// ---------------------------------------------------------------------------
|
|
397
|
+
// Accumulator — per-session running totals (counts only). Read/modify/write of
|
|
398
|
+
// a small JSON file. Robust to a missing/corrupt accumulator (starts fresh).
|
|
399
|
+
// ---------------------------------------------------------------------------
|
|
400
|
+
|
|
401
|
+
function freshAccumulator(sessionId, ts) {
|
|
402
|
+
return {
|
|
403
|
+
v: 1,
|
|
404
|
+
session_id: sessionId || null,
|
|
405
|
+
first_ts: Number.isFinite(ts) ? ts : null,
|
|
406
|
+
last_ts: Number.isFinite(ts) ? ts : null,
|
|
407
|
+
msg_count: 0,
|
|
408
|
+
total_chars: 0,
|
|
409
|
+
total_emojis: 0,
|
|
410
|
+
code_msgs: 0,
|
|
411
|
+
formality_msgs: 0, // messages that contained >=1 formality marker
|
|
412
|
+
formality_hits: 0, // total marker hits (for density)
|
|
413
|
+
profile_influenced: false,
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
function readAccumulator(cwd) {
|
|
418
|
+
const p = accumulatorPath(cwd);
|
|
419
|
+
if (!existsSync(p)) return null;
|
|
420
|
+
try {
|
|
421
|
+
const acc = JSON.parse(readFileSync(p, 'utf8'));
|
|
422
|
+
if (acc && typeof acc === 'object') return acc;
|
|
423
|
+
} catch {
|
|
424
|
+
// corrupt -> treat as absent (start fresh)
|
|
425
|
+
}
|
|
426
|
+
return null;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function writeAccumulator(cwd, acc) {
|
|
430
|
+
ensureDir(ijfwDir(cwd));
|
|
431
|
+
writeFileSync(accumulatorPath(cwd), JSON.stringify(acc), 'utf8');
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
function clearAccumulator(cwd) {
|
|
435
|
+
const p = accumulatorPath(cwd);
|
|
436
|
+
try {
|
|
437
|
+
if (existsSync(p)) unlinkSync(p);
|
|
438
|
+
} catch {
|
|
439
|
+
// best-effort
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* captureMessage({sessionId, text, ts, profileInjected, cwd, env}) -> {ok}.
|
|
445
|
+
*
|
|
446
|
+
* Per-message entrypoint (UserPromptSubmit hook). Extracts METADATA from `text`,
|
|
447
|
+
* folds it into the per-session accumulator, and discards the text. Idempotent
|
|
448
|
+
* w.r.t. shape: a new accumulator is created on the first message of a session
|
|
449
|
+
* (or when the session_id changes — a stale accumulator from a previous session
|
|
450
|
+
* is replaced rather than blended).
|
|
451
|
+
*
|
|
452
|
+
* `profileInjected:true` marks that a profile brief was injected into this turn
|
|
453
|
+
* (P1.2) — the whole session is then flagged profile_influenced at flush.
|
|
454
|
+
*/
|
|
455
|
+
export function captureMessage({ sessionId, text, ts, profileInjected, cwd } = {}) {
|
|
456
|
+
const root = cwd || process.cwd();
|
|
457
|
+
const numTs = Number(ts);
|
|
458
|
+
let acc = readAccumulator(root);
|
|
459
|
+
if (!acc || acc.session_id !== (sessionId || null)) {
|
|
460
|
+
acc = freshAccumulator(sessionId || null, numTs);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
const meta = extractMessageMetadata(text);
|
|
464
|
+
acc.msg_count += 1;
|
|
465
|
+
acc.total_chars += meta.chars;
|
|
466
|
+
acc.total_emojis += meta.emojis;
|
|
467
|
+
if (meta.hasCode) acc.code_msgs += 1;
|
|
468
|
+
if (meta.formalityHits > 0) acc.formality_msgs += 1;
|
|
469
|
+
acc.formality_hits += meta.formalityHits;
|
|
470
|
+
if (Number.isFinite(numTs)) {
|
|
471
|
+
if (acc.first_ts === null) acc.first_ts = numTs;
|
|
472
|
+
acc.last_ts = numTs;
|
|
473
|
+
}
|
|
474
|
+
// P1.2: any brief-injected message taints the whole session for re-derivation.
|
|
475
|
+
if (profileInjected === true) acc.profile_influenced = true;
|
|
476
|
+
|
|
477
|
+
try {
|
|
478
|
+
writeAccumulator(root, acc);
|
|
479
|
+
} catch (err) {
|
|
480
|
+
return { ok: false, code: 'EACC_WRITE', error: err.message };
|
|
481
|
+
}
|
|
482
|
+
return { ok: true };
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
// ---------------------------------------------------------------------------
|
|
486
|
+
// Resolve the capture host from env (which platform produced the session).
|
|
487
|
+
// ---------------------------------------------------------------------------
|
|
488
|
+
function resolveHost(env) {
|
|
489
|
+
const e = env || {};
|
|
490
|
+
const h = e.IJFW_HOST || e.IJFW_PLATFORM || '';
|
|
491
|
+
return String(h || 'claude-code').trim() || 'claude-code';
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* flushSession({sessionId, ts, cwd, env, extraAttributes}) -> {ok}/{ok:false,...}.
|
|
496
|
+
*
|
|
497
|
+
* SessionEnd entrypoint. Reads the accumulator, computes the per-session contract
|
|
498
|
+
* record, applies ALL hardening gates, appends ONE JSON line to
|
|
499
|
+
* .ijfw/.session-style.jsonl, and clears the accumulator.
|
|
500
|
+
*
|
|
501
|
+
* Gate order (fail-closed BEFORE any write):
|
|
502
|
+
* 1. P1.5 PII deny-gate over `extraAttributes` and the derived record — refuse
|
|
503
|
+
* to persist anything carrying a special-category attribute.
|
|
504
|
+
* 2. P1.6 identity binding + shared-machine stance (sets identity; ambiguous
|
|
505
|
+
* machine => global ineligible).
|
|
506
|
+
* 3. P1.3 context quarantine (CI / shared path => global ineligible).
|
|
507
|
+
* 4. P1.2 profile_influenced flag carried through from the accumulator.
|
|
508
|
+
* 5. P1.4 per-host trust weight + delta cap stamped onto the record.
|
|
509
|
+
*/
|
|
510
|
+
export function flushSession({ sessionId, ts, cwd, env, extraAttributes } = {}) {
|
|
511
|
+
const root = cwd || process.cwd();
|
|
512
|
+
const e = env || {};
|
|
513
|
+
|
|
514
|
+
// P1.5 — deny-gate on any caller-supplied attributes BEFORE we do anything.
|
|
515
|
+
if (extraAttributes !== undefined) {
|
|
516
|
+
const gate = assertNoSpecialCategory(extraAttributes);
|
|
517
|
+
if (!gate.ok) {
|
|
518
|
+
return { ok: false, code: 'ESPECIAL_CATEGORY_DENIED', refused: gate.refused,
|
|
519
|
+
error: `special-category attribute refused before persist: ${gate.refused}` };
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
const acc = readAccumulator(root);
|
|
524
|
+
if (!acc || acc.msg_count <= 0) {
|
|
525
|
+
// Nothing captured this session — clean no-op, no empty record.
|
|
526
|
+
clearAccumulator(root);
|
|
527
|
+
return { ok: true, skipped: 'no-messages' };
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
const msgCount = acc.msg_count;
|
|
531
|
+
const avgChars = acc.total_chars / msgCount;
|
|
532
|
+
const emojiRate = acc.total_emojis / msgCount; // emoji per message
|
|
533
|
+
const codeRatio = clamp01(acc.code_msgs / msgCount); // fraction of msgs w/ code, [0,1]
|
|
534
|
+
// formality_markers as a [0,1] density: fraction of messages that carried a
|
|
535
|
+
// formal marker. This is exactly what deriveStyle expects (0..1 marker density).
|
|
536
|
+
const formality = clamp01(acc.formality_msgs / msgCount);
|
|
537
|
+
// Turn cadence in SECONDS between messages (the contract unit). With one
|
|
538
|
+
// message there is no inter-turn gap; report 0 (deriveStyle treats it as a
|
|
539
|
+
// present-but-zero cadence). With N messages over a span, mean gap = span/(N-1).
|
|
540
|
+
let cadenceS = 0;
|
|
541
|
+
if (msgCount > 1 && Number.isFinite(acc.first_ts) && Number.isFinite(acc.last_ts) && acc.last_ts > acc.first_ts) {
|
|
542
|
+
cadenceS = ((acc.last_ts - acc.first_ts) / 1000) / (msgCount - 1);
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
const host = resolveHost(e);
|
|
546
|
+
|
|
547
|
+
// P1.6 — identity binding + shared-machine stance.
|
|
548
|
+
const id = computeIdentity({ env: e });
|
|
549
|
+
|
|
550
|
+
// P1.3 — context quarantine.
|
|
551
|
+
const quar = detectQuarantine({ env: e, cwd: root });
|
|
552
|
+
|
|
553
|
+
// global eligibility: a session may contribute to the cross-machine GLOBAL
|
|
554
|
+
// profile only if it is NOT quarantined AND the identity is NOT ambiguous.
|
|
555
|
+
const globalEligible = !quar.quarantined && !id.ambiguous;
|
|
556
|
+
const quarantineReason = quar.quarantined
|
|
557
|
+
? quar.reason
|
|
558
|
+
: (id.ambiguous ? 'ambiguous-identity' : null);
|
|
559
|
+
|
|
560
|
+
// P1.4 — per-host trust + the single-session influence cap, stamped so the
|
|
561
|
+
// dream/derive + merge stages apply them.
|
|
562
|
+
const trustWeight = trustWeightForHost(host);
|
|
563
|
+
|
|
564
|
+
const record = {
|
|
565
|
+
ts: Number.isFinite(Number(ts)) ? Number(ts) : (Number.isFinite(acc.last_ts) ? acc.last_ts : Date.now()),
|
|
566
|
+
session_id: acc.session_id || sessionId || null,
|
|
567
|
+
host,
|
|
568
|
+
// ── style metadata (lines up with deriveStyle via toDeriveMeta) ──
|
|
569
|
+
avg_msg_chars: Math.round(avgChars * 100) / 100,
|
|
570
|
+
emoji_rate: Math.round(emojiRate * 1000) / 1000,
|
|
571
|
+
code_block_ratio: Math.round(codeRatio * 1000) / 1000,
|
|
572
|
+
formality_markers: Math.round(formality * 1000) / 1000,
|
|
573
|
+
turn_cadence_s: Math.round(cadenceS * 100) / 100,
|
|
574
|
+
msg_count: msgCount,
|
|
575
|
+
// ── hardening metadata ──
|
|
576
|
+
profile_influenced: acc.profile_influenced === true, // P1.2
|
|
577
|
+
global_eligible: globalEligible, // P1.3 + P1.6
|
|
578
|
+
quarantine_reason: quarantineReason,
|
|
579
|
+
identity: id.identity, // P1.6
|
|
580
|
+
trust_weight: trustWeight, // P1.4
|
|
581
|
+
delta_cap: STYLE_DELTA_CAP, // P1.4
|
|
582
|
+
};
|
|
583
|
+
|
|
584
|
+
// P1.5 — final deny-gate over the assembled record itself (defense in depth:
|
|
585
|
+
// no special-category key may have crept in via any path).
|
|
586
|
+
const recGate = assertNoSpecialCategory(record);
|
|
587
|
+
if (!recGate.ok) {
|
|
588
|
+
return { ok: false, code: 'ESPECIAL_CATEGORY_DENIED', refused: recGate.refused,
|
|
589
|
+
error: `special-category attribute refused before persist: ${recGate.refused}` };
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// Merge caller-supplied NON-special attributes (already gated) — additive,
|
|
593
|
+
// never overwriting the contract fields.
|
|
594
|
+
if (extraAttributes && typeof extraAttributes === 'object') {
|
|
595
|
+
for (const [k, v] of Object.entries(extraAttributes)) {
|
|
596
|
+
if (!Object.prototype.hasOwnProperty.call(record, k)) record[k] = v;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
try {
|
|
601
|
+
ensureDir(ijfwDir(root));
|
|
602
|
+
appendFileSync(styleFilePath(root), `${JSON.stringify(record)}\n`, 'utf8');
|
|
603
|
+
} catch (err) {
|
|
604
|
+
return { ok: false, code: 'EWRITE', error: err.message };
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
clearAccumulator(root);
|
|
608
|
+
return { ok: true, record };
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
// ---------------------------------------------------------------------------
|
|
612
|
+
// Adapter: wire record -> exact deriveStyle input shape.
|
|
613
|
+
//
|
|
614
|
+
// deriveStyle reads: avg_msg_chars, emoji_per_msg, code_block_ratio,
|
|
615
|
+
// formality_markers, turn_cadence_per_min. Two of our contract fields differ in
|
|
616
|
+
// unit/name: emoji_rate (per msg) -> emoji_per_msg; turn_cadence_s (seconds
|
|
617
|
+
// between turns) -> turn_cadence_per_min (turns per minute). We convert here so
|
|
618
|
+
// P2 never has to know the wire format.
|
|
619
|
+
// ---------------------------------------------------------------------------
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* toDeriveMeta(record) -> the object `deriveStyle` consumes.
|
|
623
|
+
*
|
|
624
|
+
* Pure. Only emits a field when its source is present, so a partial record
|
|
625
|
+
* yields a partial meta (deriveStyle falls back to its neutral prior for absent
|
|
626
|
+
* axes). turn_cadence_s of 0 maps to 0 turns/min (present-but-zero cadence).
|
|
627
|
+
*/
|
|
628
|
+
export function toDeriveMeta(record) {
|
|
629
|
+
const r = record && typeof record === 'object' ? record : {};
|
|
630
|
+
const meta = {};
|
|
631
|
+
if (r.avg_msg_chars !== undefined && r.avg_msg_chars !== null) meta.avg_msg_chars = r.avg_msg_chars;
|
|
632
|
+
if (r.emoji_rate !== undefined && r.emoji_rate !== null) meta.emoji_per_msg = r.emoji_rate;
|
|
633
|
+
if (r.code_block_ratio !== undefined && r.code_block_ratio !== null) meta.code_block_ratio = r.code_block_ratio;
|
|
634
|
+
if (r.formality_markers !== undefined && r.formality_markers !== null) meta.formality_markers = r.formality_markers;
|
|
635
|
+
if (r.turn_cadence_s !== undefined && r.turn_cadence_s !== null) {
|
|
636
|
+
// seconds-between-turns -> turns-per-minute. 0s gap => 0 turns/min (present).
|
|
637
|
+
const s = Number(r.turn_cadence_s);
|
|
638
|
+
meta.turn_cadence_per_min = Number.isFinite(s) && s > 0 ? 60 / s : 0;
|
|
639
|
+
}
|
|
640
|
+
// FIX 2 (CRITICAL-1 / M1 / H4): carry the per-host trust weight through so the
|
|
641
|
+
// merge fold can apply trust scaling. Before this, toDeriveMeta DROPPED
|
|
642
|
+
// trust_weight and the documented per-host trust weighting was inert. We
|
|
643
|
+
// resolve trust from the recorded `trust_weight` when present, falling back to
|
|
644
|
+
// the host's known trust (so a record that pre-dates the stamp is still
|
|
645
|
+
// trust-scaled) — never silently full-trust an unknown host.
|
|
646
|
+
if (r.trust_weight !== undefined && r.trust_weight !== null && Number.isFinite(Number(r.trust_weight))) {
|
|
647
|
+
meta.trust_weight = Number(r.trust_weight);
|
|
648
|
+
} else if (r.host !== undefined && r.host !== null) {
|
|
649
|
+
meta.trust_weight = trustWeightForHost(r.host);
|
|
650
|
+
}
|
|
651
|
+
return meta;
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// ===========================================================================
|
|
655
|
+
// S4 — EDIT-DELTA CAPTURE: the CORRECTION LOOP (the HEART of the product).
|
|
656
|
+
//
|
|
657
|
+
// The strongest, currently-missing personalization signal is NOT a regex
|
|
658
|
+
// trigger in a prompt — it is the clean ground-truth DIFF between what the
|
|
659
|
+
// AGENT PROPOSED and what the USER COMMITTED it to. Per the cross-audit: that
|
|
660
|
+
// diff IS the citation. An X->Y correction ("agent wrote `var`, user changed it
|
|
661
|
+
// to `const`") is direct evidence of a preference, grounded in an actual edit.
|
|
662
|
+
//
|
|
663
|
+
// We observe this at the PostToolUse boundary for Edit/Write tools:
|
|
664
|
+
// - Edit tool_input: { file_path, old_string (PROPOSED prior), new_string (COMMITTED) }
|
|
665
|
+
// - Write tool_input: { file_path, content (COMMITTED), } (+ prior file content = PROPOSED)
|
|
666
|
+
// A first edit landing is the agent's proposal; a LATER edit to the SAME span/
|
|
667
|
+
// file is the user's correction of it. We record EVERY edit-delta as an
|
|
668
|
+
// evidence row carring (a) a real CITED SPAN (PII-scrubbed excerpt + content
|
|
669
|
+
// hashes for dedupe), (b) SCOPE (file-pattern / language / repo-relative path),
|
|
670
|
+
// and (c) an authorship OUTCOME (accept vs edit-after) for the expertise tier.
|
|
671
|
+
//
|
|
672
|
+
// DATA MINIMIZATION (same invariant as the style stream): we DO NOT persist the
|
|
673
|
+
// full file. The cited span is a BOUNDED, PII-scrubbed excerpt of the changed
|
|
674
|
+
// region only; the proposed/committed bodies are reduced to salted content
|
|
675
|
+
// HASHES so two identical edits dedupe without ever storing raw secrets.
|
|
676
|
+
//
|
|
677
|
+
// WIRING CONTRACT (the dream runner consumes this exactly like the feedback
|
|
678
|
+
// stream): OUTPUT FILE <REPO_ROOT>/.ijfw/.session-edits.jsonl — one JSON line
|
|
679
|
+
// per observed edit-delta:
|
|
680
|
+
// { ts, session_id, host, scope:{file_pattern,language,repo_rel},
|
|
681
|
+
// outcome:'accept'|'edit-after', proposed_hash, committed_hash,
|
|
682
|
+
// cited_span, direction, identity, trust_weight }
|
|
683
|
+
// ===========================================================================
|
|
684
|
+
|
|
685
|
+
const EDIT_FILE = '.session-edits.jsonl';
|
|
686
|
+
|
|
687
|
+
/** The append-only per-session edit-delta evidence stream (S4 contract). */
|
|
688
|
+
export function editFilePath(cwd) {
|
|
689
|
+
return join(ijfwDir(cwd), EDIT_FILE);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
/** Max characters of a changed region we keep as the human-glanceable cited span. */
|
|
693
|
+
export const CITED_SPAN_MAX = 160;
|
|
694
|
+
|
|
695
|
+
// Extension -> coarse language label. Scope is "where this preference applies"
|
|
696
|
+
// (a file-pattern / language / repo-relative path) — NOT the raw absolute path,
|
|
697
|
+
// which could leak a username via /Users/<name>/ or /home/<name>/.
|
|
698
|
+
const EXT_LANGUAGE = Object.freeze({
|
|
699
|
+
js: 'javascript', mjs: 'javascript', cjs: 'javascript', jsx: 'javascript',
|
|
700
|
+
ts: 'typescript', tsx: 'typescript', mts: 'typescript', cts: 'typescript',
|
|
701
|
+
py: 'python', rb: 'ruby', go: 'go', rs: 'rust', java: 'java', kt: 'kotlin',
|
|
702
|
+
c: 'c', h: 'c', cc: 'cpp', cpp: 'cpp', hpp: 'cpp', cs: 'csharp',
|
|
703
|
+
php: 'php', swift: 'swift', scala: 'scala', sh: 'shell', bash: 'shell',
|
|
704
|
+
zsh: 'shell', md: 'markdown', json: 'json', yml: 'yaml', yaml: 'yaml',
|
|
705
|
+
toml: 'toml', html: 'html', css: 'css', scss: 'css', sql: 'sql',
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
/**
|
|
709
|
+
* scopeForPath(filePath, {cwd}) -> { file_pattern, language, repo_rel }.
|
|
710
|
+
*
|
|
711
|
+
* Pure. Derives the SCOPE a preference grounded in an edit applies to, WITHOUT
|
|
712
|
+
* persisting an absolute path (which can carry the OS username — a direct
|
|
713
|
+
* identifier we never store). `file_pattern` is a glob over the extension
|
|
714
|
+
* (`*.js`), `language` is the coarse label, `repo_rel` is the path relative to
|
|
715
|
+
* the repo root (when it sits inside it) — never an absolute path.
|
|
716
|
+
*/
|
|
717
|
+
export function scopeForPath(filePath, { cwd } = {}) {
|
|
718
|
+
const raw = String(filePath || '').trim();
|
|
719
|
+
// Basename + extension, robust to either path separator.
|
|
720
|
+
const base = raw.split(/[/\\]/).filter(Boolean).pop() || '';
|
|
721
|
+
const dot = base.lastIndexOf('.');
|
|
722
|
+
const ext = dot > 0 ? base.slice(dot + 1).toLowerCase() : '';
|
|
723
|
+
const file_pattern = ext ? `*.${ext}` : (base || '*');
|
|
724
|
+
const language = (ext && EXT_LANGUAGE[ext]) || (ext || 'unknown');
|
|
725
|
+
|
|
726
|
+
// repo-relative path ONLY when the file is inside the project root; otherwise
|
|
727
|
+
// fall back to the basename so we never persist a homedir-rooted absolute path.
|
|
728
|
+
let repo_rel = base;
|
|
729
|
+
const root = String(cwd || '');
|
|
730
|
+
if (root && raw) {
|
|
731
|
+
const norm = raw.replace(/\\/g, '/');
|
|
732
|
+
const nroot = root.replace(/\\/g, '/').replace(/\/+$/, '');
|
|
733
|
+
if (norm === nroot) repo_rel = base;
|
|
734
|
+
else if (norm.startsWith(`${nroot}/`)) repo_rel = norm.slice(nroot.length + 1);
|
|
735
|
+
}
|
|
736
|
+
return { file_pattern, language, repo_rel };
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
/** Salted content hash of an edit body — dedupe key, never the raw body. */
|
|
740
|
+
function contentHash(s) {
|
|
741
|
+
const salt = process.env[IDENTITY_SALT_ENV] && String(process.env[IDENTITY_SALT_ENV]).trim()
|
|
742
|
+
? String(process.env[IDENTITY_SALT_ENV]).trim()
|
|
743
|
+
: DEFAULT_SALT;
|
|
744
|
+
return createHash('sha256').update(`${salt}::edit::${String(s == null ? '' : s)}`).digest('hex').slice(0, 16);
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
// Direct-identifier patterns scrubbed from a cited span before persist. Mirrors
|
|
748
|
+
// derive-heuristic.js PII_PATTERNS in spirit (kept local — capture.js must not
|
|
749
|
+
// import the derive module, to keep the zero-LLM/zero-network import graph of
|
|
750
|
+
// the derive side clean). Keep in sync deliberately if either list grows.
|
|
751
|
+
const EDIT_PII_PATTERNS = [
|
|
752
|
+
/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi, // email
|
|
753
|
+
// eslint-disable-next-line security/detect-unsafe-regex -- linear-time PII redactor: each repetition consumes a mandatory digit, no ambiguous/overlapping quantifier; not ReDoS-exploitable.
|
|
754
|
+
/(?:\+?\d[\s().-]?){7,}\d/g, // phone-ish run of digits
|
|
755
|
+
/\b\d{3}-\d{2}-\d{4}\b/g, // US SSN shape
|
|
756
|
+
// eslint-disable-next-line security/detect-unsafe-regex -- linear-time PII redactor: each repetition consumes a mandatory digit, no ambiguous/overlapping quantifier; not ReDoS-exploitable.
|
|
757
|
+
/\b(?:\d[ -]?){13,19}\b/g, // card-ish long digit run
|
|
758
|
+
/\b[A-Za-z0-9_-]*(?:secret|token|api[_-]?key|password|passwd|bearer)[A-Za-z0-9_-]*\s*[:=]\s*\S+/gi, // assigned secret
|
|
759
|
+
];
|
|
760
|
+
|
|
761
|
+
/**
|
|
762
|
+
* citedSpan(committed, proposed) -> a bounded, PII-scrubbed excerpt of the
|
|
763
|
+
* CHANGED region. This is the human-glanceable citation ("you changed it TO
|
|
764
|
+
* this"). We take the committed side (the user's final), scrub direct
|
|
765
|
+
* identifiers / assigned secrets, collapse whitespace, and cap the length. Pure.
|
|
766
|
+
*/
|
|
767
|
+
export function citedSpan(committed) {
|
|
768
|
+
let s = String(committed == null ? '' : committed);
|
|
769
|
+
for (const re of EDIT_PII_PATTERNS) s = s.replace(re, ' ');
|
|
770
|
+
s = s.replace(/\s+/g, ' ').trim();
|
|
771
|
+
return s.slice(0, CITED_SPAN_MAX);
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
/**
|
|
775
|
+
* extractEditDelta({ proposed, committed, filePath, cwd }) ->
|
|
776
|
+
* { changed, outcome, scope, proposed_hash, committed_hash, cited_span, direction }
|
|
777
|
+
* | null.
|
|
778
|
+
*
|
|
779
|
+
* METADATA-MINIMIZED. The ground-truth correction signal. Returns null for a
|
|
780
|
+
* no-op (no usable file path, or proposed === committed AND both empty). When
|
|
781
|
+
* the committed body DIFFERS from what the agent proposed, this is an
|
|
782
|
+
* `edit-after` correction (outcome) and `changed:true`; when they are identical
|
|
783
|
+
* (a Write that landed exactly / an accepted Edit) it is an `accept`. The
|
|
784
|
+
* `direction` is the bounded cited excerpt of the committed side — the citation.
|
|
785
|
+
*/
|
|
786
|
+
export function extractEditDelta({ proposed, committed, filePath, cwd } = {}) {
|
|
787
|
+
const fp = String(filePath || '').trim();
|
|
788
|
+
if (!fp) return null;
|
|
789
|
+
const prop = proposed == null ? '' : String(proposed);
|
|
790
|
+
const comm = committed == null ? '' : String(committed);
|
|
791
|
+
if (prop === '' && comm === '') return null; // nothing observed
|
|
792
|
+
|
|
793
|
+
const changed = prop !== comm;
|
|
794
|
+
return {
|
|
795
|
+
changed,
|
|
796
|
+
outcome: changed ? 'edit-after' : 'accept',
|
|
797
|
+
scope: scopeForPath(fp, { cwd }),
|
|
798
|
+
proposed_hash: contentHash(prop),
|
|
799
|
+
committed_hash: contentHash(comm),
|
|
800
|
+
cited_span: citedSpan(comm),
|
|
801
|
+
direction: citedSpan(comm), // alias kept explicit: the cited committed span
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
/**
|
|
806
|
+
* captureEditDelta({ sessionId, filePath, proposed, committed, ts, cwd, env,
|
|
807
|
+
* profileInjected }) -> { ok, skipped? } | { ok:false, code, error }.
|
|
808
|
+
*
|
|
809
|
+
* PostToolUse(Edit|Write) entrypoint. Computes the metadata-minimized edit-delta
|
|
810
|
+
* and APPENDS one evidence row to .ijfw/.session-edits.jsonl. Best-effort and
|
|
811
|
+
* fail-soft: a malformed payload or a write error never throws (the hook must
|
|
812
|
+
* never block a tool). The row is identity-bound + trust-weighted exactly like
|
|
813
|
+
* the style stream so the global merge can apply the same eligibility gates.
|
|
814
|
+
*
|
|
815
|
+
* `profileInjected:true` marks the session as profile-influenced so the dream
|
|
816
|
+
* runner can exclude self-reinforcing edits the same way it does style rows.
|
|
817
|
+
*/
|
|
818
|
+
export function captureEditDelta({
|
|
819
|
+
sessionId, filePath, proposed, committed, ts, cwd, env, profileInjected,
|
|
820
|
+
} = {}) {
|
|
821
|
+
const root = cwd || process.cwd();
|
|
822
|
+
const e = env || {};
|
|
823
|
+
|
|
824
|
+
let delta;
|
|
825
|
+
try {
|
|
826
|
+
delta = extractEditDelta({ proposed, committed, filePath, cwd: root });
|
|
827
|
+
} catch (err) {
|
|
828
|
+
return { ok: false, code: 'EEXTRACT', error: err.message };
|
|
829
|
+
}
|
|
830
|
+
if (!delta) return { ok: true, skipped: 'no-delta' };
|
|
831
|
+
|
|
832
|
+
const host = resolveHost(e);
|
|
833
|
+
const id = computeIdentity({ env: e });
|
|
834
|
+
const quar = detectQuarantine({ env: e, cwd: root });
|
|
835
|
+
const globalEligible = !quar.quarantined && !id.ambiguous;
|
|
836
|
+
|
|
837
|
+
const record = {
|
|
838
|
+
ts: Number.isFinite(Number(ts)) ? Number(ts) : Date.now(),
|
|
839
|
+
session_id: sessionId || null,
|
|
840
|
+
host,
|
|
841
|
+
scope: delta.scope,
|
|
842
|
+
outcome: delta.outcome, // 'accept' | 'edit-after'
|
|
843
|
+
changed: delta.changed,
|
|
844
|
+
proposed_hash: delta.proposed_hash,
|
|
845
|
+
committed_hash: delta.committed_hash,
|
|
846
|
+
cited_span: delta.cited_span, // bounded, PII-scrubbed citation
|
|
847
|
+
direction: delta.direction,
|
|
848
|
+
profile_influenced: profileInjected === true,
|
|
849
|
+
global_eligible: globalEligible,
|
|
850
|
+
quarantine_reason: quar.quarantined ? quar.reason : (id.ambiguous ? 'ambiguous-identity' : null),
|
|
851
|
+
identity: id.identity,
|
|
852
|
+
trust_weight: trustWeightForHost(host),
|
|
853
|
+
};
|
|
854
|
+
|
|
855
|
+
// Defense in depth: the cited span must never carry a special-category KEY
|
|
856
|
+
// (it is a value, but the assembled record is gated the same way the style
|
|
857
|
+
// record is — a special-category attribute on any path is refused).
|
|
858
|
+
const recGate = assertNoSpecialCategory(record);
|
|
859
|
+
if (!recGate.ok) {
|
|
860
|
+
return { ok: false, code: 'ESPECIAL_CATEGORY_DENIED', refused: recGate.refused,
|
|
861
|
+
error: `special-category attribute refused before persist: ${recGate.refused}` };
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
try {
|
|
865
|
+
ensureDir(ijfwDir(root));
|
|
866
|
+
appendFileSync(editFilePath(root), `${JSON.stringify(record)}\n`, 'utf8');
|
|
867
|
+
} catch (err) {
|
|
868
|
+
return { ok: false, code: 'EWRITE', error: err.message };
|
|
869
|
+
}
|
|
870
|
+
return { ok: true, record };
|
|
871
|
+
}
|