@druumen/sessions-db 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +249 -0
- package/LICENSE +201 -0
- package/NOTICE +10 -0
- package/README.md +250 -0
- package/cli/_write-helpers.mjs +99 -0
- package/cli/alias.mjs +115 -0
- package/cli/argparse.mjs +296 -0
- package/cli/close.mjs +116 -0
- package/cli/find.mjs +185 -0
- package/cli/format.mjs +277 -0
- package/cli/link-parent.mjs +133 -0
- package/cli/link.mjs +132 -0
- package/cli/rebuild.mjs +98 -0
- package/cli/sessions-db-session-start-main.mjs +454 -0
- package/cli/sessions-db-session-start.mjs +56 -0
- package/cli/sessions-db.mjs +119 -0
- package/cli/sweep.mjs +171 -0
- package/cli/tree.mjs +127 -0
- package/lib/git-context.mjs +479 -0
- package/lib/identity.mjs +616 -0
- package/lib/index.mjs +145 -0
- package/lib/init.mjs +185 -0
- package/lib/lock.mjs +86 -0
- package/lib/operations.mjs +490 -0
- package/lib/paths.mjs +199 -0
- package/lib/projection.mjs +496 -0
- package/lib/sanitize.mjs +131 -0
- package/lib/storage.mjs +759 -0
- package/lib/sweep.mjs +209 -0
- package/lib/transcript.mjs +230 -0
- package/lib/types.mjs +276 -0
- package/lib/uuid.mjs +116 -0
- package/lib/watch.mjs +217 -0
- package/package.json +53 -0
- package/types/git-context.d.mts +98 -0
- package/types/identity.d.mts +658 -0
- package/types/index.d.mts +10 -0
- package/types/index.d.ts +127 -0
- package/types/init.d.mts +53 -0
- package/types/lock.d.mts +18 -0
- package/types/operations.d.mts +204 -0
- package/types/paths.d.mts +54 -0
- package/types/projection.d.mts +79 -0
- package/types/sanitize.d.mts +39 -0
- package/types/storage.d.mts +276 -0
- package/types/sweep.d.mts +58 -0
- package/types/transcript.d.mts +59 -0
- package/types/types.d.mts +255 -0
- package/types/uuid.d.mts +17 -0
- package/types/watch.d.mts +33 -0
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure projection logic for sessions-db.
|
|
3
|
+
*
|
|
4
|
+
* Events are appended to `events.jsonl` (SSoT). The projection cache
|
|
5
|
+
* (`sessions-db.json`) is a fold of all events: `events → reduce → state`.
|
|
6
|
+
* This module contains zero IO — it only knows how to fold one or more
|
|
7
|
+
* events into a projection object. The `storage.mjs` wrapper handles disk.
|
|
8
|
+
*
|
|
9
|
+
* Schema v0.2 — see Phase 1 ticket §2 "Projection schema".
|
|
10
|
+
*
|
|
11
|
+
* Idempotency contract:
|
|
12
|
+
* - Applying the same event sequence twice yields equivalent projections
|
|
13
|
+
* (sessions are merged, not duplicated; arrays are deduped where their
|
|
14
|
+
* identity is well-defined; counters are recomputed from event count, not
|
|
15
|
+
* incremented).
|
|
16
|
+
* - Reducers mutate `projection` in place and return the same reference;
|
|
17
|
+
* callers can use either the return value or the mutated input.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const SCHEMA_VERSION = 2;
|
|
21
|
+
const FINGERPRINT_VERSIONS = ['first_human_prompt_v1', 'lineage_prefix_v1'];
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Build an empty projection skeleton. Sessions map starts empty; metadata
|
|
25
|
+
* has `event_count = 0` and `last_event_id = null`.
|
|
26
|
+
*
|
|
27
|
+
* @returns {{ _meta: object, sessions: Record<string, object> }}
|
|
28
|
+
*/
|
|
29
|
+
export function emptyProjection() {
|
|
30
|
+
return {
|
|
31
|
+
_meta: {
|
|
32
|
+
schema_version: SCHEMA_VERSION,
|
|
33
|
+
fingerprint_versions: [...FINGERPRINT_VERSIONS],
|
|
34
|
+
updated: null,
|
|
35
|
+
event_count: 0,
|
|
36
|
+
last_event_id: null,
|
|
37
|
+
},
|
|
38
|
+
sessions: {},
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Build a default session record. Caller passes the stable_id and the
|
|
44
|
+
* `created_at` timestamp (typically the first observing event's `ts`).
|
|
45
|
+
*
|
|
46
|
+
* @param {string} stableId
|
|
47
|
+
* @param {string} ts - ISO timestamp string used for both created_at and
|
|
48
|
+
* last_progress_at.
|
|
49
|
+
*/
|
|
50
|
+
export function emptySession(stableId, ts) {
|
|
51
|
+
return {
|
|
52
|
+
stable_id: stableId,
|
|
53
|
+
alias: null,
|
|
54
|
+
claude_session_ids: [],
|
|
55
|
+
transcript_files: [],
|
|
56
|
+
fingerprints: {
|
|
57
|
+
first_human_prompt_v1: null,
|
|
58
|
+
lineage_prefix_v1: null,
|
|
59
|
+
},
|
|
60
|
+
parent_session_id: null,
|
|
61
|
+
parent_candidate_ids: [],
|
|
62
|
+
// Count of parent candidates that resolveIdentity omitted from the most
|
|
63
|
+
// recent session_seen due to the MAX_PARENT_CANDIDATES cap. 0 means the
|
|
64
|
+
// surfaced parent_candidate_ids are complete; >0 means CLI / audit
|
|
65
|
+
// should render "+ N more" or trigger a rebuild-from-events drill-down.
|
|
66
|
+
// Last-write-wins (mirrors identity_resolution semantics).
|
|
67
|
+
parent_candidates_omitted_count: 0,
|
|
68
|
+
// Audit trail of how the most recent session_seen resolved this stable_id
|
|
69
|
+
// — overwritten on every session_seen (always reflects the latest signal
|
|
70
|
+
// set). Null on first creation; populated by reduceSessionSeen when the
|
|
71
|
+
// event payload carries it. See identity.mjs / recordSessionSeen.
|
|
72
|
+
identity_resolution: null,
|
|
73
|
+
worktree_path_observed: null,
|
|
74
|
+
worktree_realpath: null,
|
|
75
|
+
worktree_registry_name: null,
|
|
76
|
+
git_common_dir: null,
|
|
77
|
+
branch_at_start: null,
|
|
78
|
+
branch_current: null,
|
|
79
|
+
head_at_start: null,
|
|
80
|
+
head_last_seen: null,
|
|
81
|
+
tasks: [],
|
|
82
|
+
projects: [],
|
|
83
|
+
activity_state: 'active',
|
|
84
|
+
outcome: 'open',
|
|
85
|
+
closed_at: null,
|
|
86
|
+
closed_reason: null,
|
|
87
|
+
created_at: ts,
|
|
88
|
+
last_progress_at: ts,
|
|
89
|
+
first_prompt_preview: null,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Apply a single event to a projection (mutating). Returns the same
|
|
95
|
+
* projection reference for fluent chaining.
|
|
96
|
+
*
|
|
97
|
+
* Unknown ops are tolerated — they update _meta but otherwise no-op so a
|
|
98
|
+
* future schema bump applied against an older binary degrades cleanly. We
|
|
99
|
+
* still bump `event_count` so the rebuild detector remains accurate.
|
|
100
|
+
*
|
|
101
|
+
* @param {object} projection
|
|
102
|
+
* @param {{ ts: string, event_id: string, op: string, stable_id: string,
|
|
103
|
+
* payload?: object }} event
|
|
104
|
+
* @returns {object} projection
|
|
105
|
+
*/
|
|
106
|
+
export function applyEvent(projection, event) {
|
|
107
|
+
if (!projection || typeof projection !== 'object' || !projection.sessions) {
|
|
108
|
+
throw new TypeError('applyEvent: projection missing or malformed');
|
|
109
|
+
}
|
|
110
|
+
if (!event || typeof event !== 'object') {
|
|
111
|
+
throw new TypeError('applyEvent: event missing');
|
|
112
|
+
}
|
|
113
|
+
const { op, stable_id: stableId, ts } = event;
|
|
114
|
+
if (typeof stableId !== 'string' || stableId.length === 0) {
|
|
115
|
+
throw new TypeError('applyEvent: event.stable_id required');
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Ensure session exists for any op except (theoretically) ops that operate
|
|
119
|
+
// on global state — currently every op is session-scoped, so eager
|
|
120
|
+
// creation is safe and idempotent.
|
|
121
|
+
let session = projection.sessions[stableId];
|
|
122
|
+
if (!session) {
|
|
123
|
+
session = emptySession(stableId, ts);
|
|
124
|
+
projection.sessions[stableId] = session;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
switch (op) {
|
|
128
|
+
case 'session_seen':
|
|
129
|
+
reduceSessionSeen(session, event);
|
|
130
|
+
break;
|
|
131
|
+
case 'session_link':
|
|
132
|
+
reduceSessionLink(session, event);
|
|
133
|
+
break;
|
|
134
|
+
case 'alias_set':
|
|
135
|
+
reduceAliasSet(session, event);
|
|
136
|
+
break;
|
|
137
|
+
case 'parent_set':
|
|
138
|
+
reduceParentSet(session, event);
|
|
139
|
+
break;
|
|
140
|
+
case 'close':
|
|
141
|
+
reduceClose(session, event);
|
|
142
|
+
break;
|
|
143
|
+
case 'sweep':
|
|
144
|
+
reduceSweep(session, event);
|
|
145
|
+
break;
|
|
146
|
+
case 'session_unlink':
|
|
147
|
+
reduceSessionUnlink(session, event);
|
|
148
|
+
break;
|
|
149
|
+
case 'manual_link':
|
|
150
|
+
reduceManualLink(session, event);
|
|
151
|
+
break;
|
|
152
|
+
default:
|
|
153
|
+
// Unknown op — no-op on the session, but still account for it in
|
|
154
|
+
// _meta so callers can detect drift.
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Bump last_progress_at to the most recent event's ts for ops that
|
|
159
|
+
// represent real session activity. `sweep` is a maintenance/synthetic op
|
|
160
|
+
// that should NOT bump last_progress_at on its own — its dedicated
|
|
161
|
+
// reducer handles `effective_last_progress` if the sweep wants to push
|
|
162
|
+
// the timestamp forward explicitly. We still guard against out-of-order
|
|
163
|
+
// ts via lexical compare (correct for ISO 8601 strings).
|
|
164
|
+
if (op !== 'sweep' && ts && (!session.last_progress_at || ts > session.last_progress_at)) {
|
|
165
|
+
session.last_progress_at = ts;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Update _meta — last_event_id wins on every event (events.jsonl ordering
|
|
169
|
+
// is the canonical event order).
|
|
170
|
+
projection._meta.event_count += 1;
|
|
171
|
+
projection._meta.last_event_id = event.event_id ?? projection._meta.last_event_id;
|
|
172
|
+
projection._meta.updated = ts ?? projection._meta.updated;
|
|
173
|
+
|
|
174
|
+
return projection;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Fold an event array into a fresh projection. Used both for full rebuilds
|
|
179
|
+
* (storage.rebuildProjection) and for unit tests.
|
|
180
|
+
*
|
|
181
|
+
* @param {Array<object>} events
|
|
182
|
+
*/
|
|
183
|
+
export function rebuildFromEvents(events) {
|
|
184
|
+
const projection = emptyProjection();
|
|
185
|
+
if (!Array.isArray(events)) return projection;
|
|
186
|
+
for (const event of events) {
|
|
187
|
+
applyEvent(projection, event);
|
|
188
|
+
}
|
|
189
|
+
return projection;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
// Per-op reducers (each isolated for testability).
|
|
194
|
+
// ---------------------------------------------------------------------------
|
|
195
|
+
|
|
196
|
+
function reduceSessionSeen(session, event) {
|
|
197
|
+
const p = event.payload ?? {};
|
|
198
|
+
|
|
199
|
+
// claude_session_ids — append (dedup); represents fork/resume of the same
|
|
200
|
+
// logical session.
|
|
201
|
+
if (typeof p.claude_session_id === 'string' && p.claude_session_id.length > 0) {
|
|
202
|
+
if (!session.claude_session_ids.includes(p.claude_session_id)) {
|
|
203
|
+
session.claude_session_ids.push(p.claude_session_id);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// transcript_files — dedup by `path`. We replace the existing entry with
|
|
208
|
+
// the newest data so latest_uuid / size / mtime / status reflect current
|
|
209
|
+
// truth.
|
|
210
|
+
if (p.transcript_file && typeof p.transcript_file === 'object') {
|
|
211
|
+
const tf = p.transcript_file;
|
|
212
|
+
const idx = session.transcript_files.findIndex((t) => t && t.path === tf.path);
|
|
213
|
+
if (idx === -1) {
|
|
214
|
+
session.transcript_files.push({ ...tf });
|
|
215
|
+
} else {
|
|
216
|
+
session.transcript_files[idx] = { ...session.transcript_files[idx], ...tf };
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Fingerprints — only set when missing (first observation wins for v1
|
|
221
|
+
// algorithm; future versions can layer a different field).
|
|
222
|
+
if (p.fingerprints && typeof p.fingerprints === 'object') {
|
|
223
|
+
if (
|
|
224
|
+
session.fingerprints.first_human_prompt_v1 == null &&
|
|
225
|
+
typeof p.fingerprints.first_human_prompt_v1 === 'string'
|
|
226
|
+
) {
|
|
227
|
+
session.fingerprints.first_human_prompt_v1 = p.fingerprints.first_human_prompt_v1;
|
|
228
|
+
}
|
|
229
|
+
if (
|
|
230
|
+
session.fingerprints.lineage_prefix_v1 == null &&
|
|
231
|
+
typeof p.fingerprints.lineage_prefix_v1 === 'string'
|
|
232
|
+
) {
|
|
233
|
+
session.fingerprints.lineage_prefix_v1 = p.fingerprints.lineage_prefix_v1;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Worktree / git context — last-write-wins for these recency-sensitive
|
|
238
|
+
// fields. `head_last_seen` and `branch_current` should reflect the most
|
|
239
|
+
// recent observation.
|
|
240
|
+
setIfPresent(session, p, 'worktree_path_observed');
|
|
241
|
+
setIfPresent(session, p, 'worktree_realpath');
|
|
242
|
+
setIfPresent(session, p, 'worktree_registry_name');
|
|
243
|
+
setIfPresent(session, p, 'git_common_dir');
|
|
244
|
+
setIfPresent(session, p, 'branch_current');
|
|
245
|
+
setIfPresent(session, p, 'head_last_seen');
|
|
246
|
+
|
|
247
|
+
// First-write-wins fields (initial observation captures these and we
|
|
248
|
+
// refuse to overwrite to preserve history).
|
|
249
|
+
setIfMissing(session, p, 'branch_at_start');
|
|
250
|
+
setIfMissing(session, p, 'head_at_start');
|
|
251
|
+
setIfMissing(session, p, 'first_prompt_preview');
|
|
252
|
+
if (typeof p.cwd === 'string' && session.cwd == null) {
|
|
253
|
+
session.cwd = p.cwd;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// identity_resolution — last-write-wins. Every session_seen carries the
|
|
257
|
+
// resolution outcome (P1/P2/P3/minted) that produced the stable_id this
|
|
258
|
+
// event landed on. Storing the LATEST is informative: a session that
|
|
259
|
+
// started life as 'minted' and then gets corroborated by subsequent
|
|
260
|
+
// signals (resume / fork) shows the most recent resolution path.
|
|
261
|
+
if (p.identity_resolution && typeof p.identity_resolution === 'object') {
|
|
262
|
+
session.identity_resolution = p.identity_resolution;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// parent_candidates_omitted_count — last-write-wins. Backward compat:
|
|
266
|
+
// missing field is treated as "no change to existing value" so old events
|
|
267
|
+
// (pre-cap) replayed on a fresh projection leave the default 0 alone, and
|
|
268
|
+
// new events on top of old projections (legacy session may not have the
|
|
269
|
+
// field) get it created via the emptySession default. Numeric only;
|
|
270
|
+
// anything else is ignored (defensive).
|
|
271
|
+
if (typeof p.parent_candidates_omitted_count === 'number'
|
|
272
|
+
&& p.parent_candidates_omitted_count >= 0
|
|
273
|
+
&& Number.isFinite(p.parent_candidates_omitted_count)) {
|
|
274
|
+
session.parent_candidates_omitted_count = p.parent_candidates_omitted_count;
|
|
275
|
+
}
|
|
276
|
+
// Defensive shim for projections persisted before the field existed: if a
|
|
277
|
+
// session record loaded from disk lacks the field, materialize it as 0 so
|
|
278
|
+
// downstream consumers can read it without optional-chaining everywhere.
|
|
279
|
+
if (typeof session.parent_candidates_omitted_count !== 'number') {
|
|
280
|
+
session.parent_candidates_omitted_count = 0;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// parent_candidate_ids — append + dedup by stable_id. Each session_seen
|
|
284
|
+
// may surface fingerprint matches that didn't reach the corroborator
|
|
285
|
+
// threshold (hub-spoke hints, NOT auto-promotion to parent_session_id).
|
|
286
|
+
// We accumulate them across observations because cross-session evidence
|
|
287
|
+
// is additive: a candidate observed once is still a candidate even if
|
|
288
|
+
// later observations don't repeat it.
|
|
289
|
+
if (Array.isArray(p.parent_candidate_ids)) {
|
|
290
|
+
for (const candidate of p.parent_candidate_ids) {
|
|
291
|
+
if (!candidate || typeof candidate !== 'object') continue;
|
|
292
|
+
// session_seen-derived candidates use `stable_id` (canonical). Manual
|
|
293
|
+
// links use `parent_id`. Accept either to keep the reducer
|
|
294
|
+
// forward-compatible across both surfaces.
|
|
295
|
+
const candidateId =
|
|
296
|
+
typeof candidate.stable_id === 'string' && candidate.stable_id.length > 0
|
|
297
|
+
? candidate.stable_id
|
|
298
|
+
: typeof candidate.parent_id === 'string' && candidate.parent_id.length > 0
|
|
299
|
+
? candidate.parent_id
|
|
300
|
+
: typeof candidate.id === 'string' && candidate.id.length > 0
|
|
301
|
+
? candidate.id
|
|
302
|
+
: null;
|
|
303
|
+
if (candidateId === null) continue;
|
|
304
|
+
const dup = session.parent_candidate_ids.find((c) => {
|
|
305
|
+
const existingId =
|
|
306
|
+
typeof c.stable_id === 'string'
|
|
307
|
+
? c.stable_id
|
|
308
|
+
: typeof c.parent_id === 'string'
|
|
309
|
+
? c.parent_id
|
|
310
|
+
: typeof c.id === 'string'
|
|
311
|
+
? c.id
|
|
312
|
+
: null;
|
|
313
|
+
return existingId !== null && existingId === candidateId;
|
|
314
|
+
});
|
|
315
|
+
if (!dup) session.parent_candidate_ids.push({ ...candidate });
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function reduceSessionLink(session, event) {
|
|
321
|
+
const p = event.payload ?? {};
|
|
322
|
+
|
|
323
|
+
// P5 migration guard (codex P5 round-1 fix): P4-era `link --remove` wrote
|
|
324
|
+
// `session_link` events with `payload.remove: true`, but the P4 reducer
|
|
325
|
+
// never honored the flag — those events would still ADD the named tasks
|
|
326
|
+
// / projects rather than remove them. Operators noticed and re-issued
|
|
327
|
+
// their intent via other means; the bad events sit in events.jsonl as
|
|
328
|
+
// dead markers.
|
|
329
|
+
//
|
|
330
|
+
// P5 ships `session_unlink` as the canonical remove op. To prevent any
|
|
331
|
+
// rebuild-from-events run from silently re-adding tasks / projects the
|
|
332
|
+
// operator had already abandoned, we explicitly skip the entire add path
|
|
333
|
+
// when we see the legacy `payload.remove === true` marker. Operators who
|
|
334
|
+
// want to remove the link must re-issue `link --remove --task X` under
|
|
335
|
+
// P5, which now writes `session_unlink` (see cli/link.mjs).
|
|
336
|
+
//
|
|
337
|
+
// We deliberately do NOT dispatch into `reduceSessionUnlink` here — those
|
|
338
|
+
// P4 markers carry add-shaped semantics ("we wanted to remove these
|
|
339
|
+
// listed tasks") in a context where the actual session.tasks state may
|
|
340
|
+
// already have been modified by subsequent legitimate events. Treating
|
|
341
|
+
// them as no-ops is the safest projection-stable choice; treating them
|
|
342
|
+
// as unlinks would risk double-removing items the operator legitimately
|
|
343
|
+
// re-added later.
|
|
344
|
+
if (p.remove === true) return;
|
|
345
|
+
|
|
346
|
+
if (Array.isArray(p.tasks)) {
|
|
347
|
+
for (const t of p.tasks) {
|
|
348
|
+
if (typeof t === 'string' && t.length > 0 && !session.tasks.includes(t)) {
|
|
349
|
+
session.tasks.push(t);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
if (Array.isArray(p.projects)) {
|
|
354
|
+
for (const proj of p.projects) {
|
|
355
|
+
if (typeof proj === 'string' && proj.length > 0 && !session.projects.includes(proj)) {
|
|
356
|
+
session.projects.push(proj);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function reduceAliasSet(session, event) {
|
|
363
|
+
const p = event.payload ?? {};
|
|
364
|
+
// Allow explicit clear via null. Anything else must be a non-empty string;
|
|
365
|
+
// missing payload.alias is a no-op (defensive).
|
|
366
|
+
if (p.alias === null) {
|
|
367
|
+
session.alias = null;
|
|
368
|
+
} else if (typeof p.alias === 'string' && p.alias.length > 0) {
|
|
369
|
+
session.alias = p.alias;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
function reduceParentSet(session, event) {
|
|
374
|
+
const p = event.payload ?? {};
|
|
375
|
+
if (p.parent_session_id === null) {
|
|
376
|
+
session.parent_session_id = null;
|
|
377
|
+
} else if (
|
|
378
|
+
typeof p.parent_session_id === 'string' &&
|
|
379
|
+
p.parent_session_id.length > 0
|
|
380
|
+
) {
|
|
381
|
+
session.parent_session_id = p.parent_session_id;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function reduceClose(session, event) {
|
|
386
|
+
const p = event.payload ?? {};
|
|
387
|
+
if (typeof p.outcome === 'string' && p.outcome.length > 0) {
|
|
388
|
+
session.outcome = p.outcome;
|
|
389
|
+
}
|
|
390
|
+
// closed_at always set to event ts (the moment of closure).
|
|
391
|
+
session.closed_at = event.ts ?? session.closed_at;
|
|
392
|
+
if (typeof p.closed_reason === 'string') {
|
|
393
|
+
session.closed_reason = p.closed_reason;
|
|
394
|
+
} else if (p.closed_reason === null) {
|
|
395
|
+
session.closed_reason = null;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function reduceSweep(session, event) {
|
|
400
|
+
const p = event.payload ?? {};
|
|
401
|
+
if (typeof p.activity_state === 'string' && p.activity_state.length > 0) {
|
|
402
|
+
session.activity_state = p.activity_state;
|
|
403
|
+
}
|
|
404
|
+
if (typeof p.effective_last_progress === 'string') {
|
|
405
|
+
// Sweep-supplied effective time can be later than last_progress_at when
|
|
406
|
+
// it represents an externally-measured idle decision. We do not lower
|
|
407
|
+
// last_progress_at via sweep — that field is event-driven only.
|
|
408
|
+
if (
|
|
409
|
+
!session.last_progress_at ||
|
|
410
|
+
p.effective_last_progress > session.last_progress_at
|
|
411
|
+
) {
|
|
412
|
+
session.last_progress_at = p.effective_last_progress;
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* P5: `session_unlink` reducer — set-based filter on tasks / projects.
|
|
419
|
+
*
|
|
420
|
+
* Counterpart to `reduceSessionLink` (additive). Operator (or future cleanup
|
|
421
|
+
* hook) writes a session_unlink event with the same payload shape as
|
|
422
|
+
* session_link; the reducer removes the named ids from the session arrays.
|
|
423
|
+
*
|
|
424
|
+
* Idempotent: removing an id that is not present is a no-op. The Set is
|
|
425
|
+
* built per-payload so duplicates within payload.tasks collapse for free.
|
|
426
|
+
*
|
|
427
|
+
* Why set-based instead of mutate-each? Operator may pass `--task X --task X`
|
|
428
|
+
* by accident; converting to a Set first keeps the filter O(n+m) and removes
|
|
429
|
+
* surprise behavior where the second X is silently ignored vs. counted.
|
|
430
|
+
*/
|
|
431
|
+
function reduceSessionUnlink(session, event) {
|
|
432
|
+
const p = event.payload ?? {};
|
|
433
|
+
if (Array.isArray(p.tasks) && p.tasks.length > 0) {
|
|
434
|
+
const removeSet = new Set(
|
|
435
|
+
p.tasks.filter((t) => typeof t === 'string' && t.length > 0),
|
|
436
|
+
);
|
|
437
|
+
if (removeSet.size > 0 && Array.isArray(session.tasks)) {
|
|
438
|
+
session.tasks = session.tasks.filter((t) => !removeSet.has(t));
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
if (Array.isArray(p.projects) && p.projects.length > 0) {
|
|
442
|
+
const removeSet = new Set(
|
|
443
|
+
p.projects.filter((proj) => typeof proj === 'string' && proj.length > 0),
|
|
444
|
+
);
|
|
445
|
+
if (removeSet.size > 0 && Array.isArray(session.projects)) {
|
|
446
|
+
session.projects = session.projects.filter((proj) => !removeSet.has(proj));
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
function reduceManualLink(session, event) {
|
|
452
|
+
const p = event.payload ?? {};
|
|
453
|
+
if (Array.isArray(p.parent_candidate_ids)) {
|
|
454
|
+
for (const candidate of p.parent_candidate_ids) {
|
|
455
|
+
if (!candidate || typeof candidate !== 'object') continue;
|
|
456
|
+
// Dedup by candidate id — `parent_id` is the canonical key in v0.2
|
|
457
|
+
// schema; fall back to JSON shape match for raw strings.
|
|
458
|
+
const candidateId =
|
|
459
|
+
typeof candidate.parent_id === 'string'
|
|
460
|
+
? candidate.parent_id
|
|
461
|
+
: typeof candidate.id === 'string'
|
|
462
|
+
? candidate.id
|
|
463
|
+
: null;
|
|
464
|
+
const dup = session.parent_candidate_ids.find((c) => {
|
|
465
|
+
const existingId =
|
|
466
|
+
typeof c.parent_id === 'string'
|
|
467
|
+
? c.parent_id
|
|
468
|
+
: typeof c.id === 'string'
|
|
469
|
+
? c.id
|
|
470
|
+
: null;
|
|
471
|
+
return existingId !== null && candidateId !== null && existingId === candidateId;
|
|
472
|
+
});
|
|
473
|
+
if (!dup) {
|
|
474
|
+
session.parent_candidate_ids.push({ ...candidate });
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// ---------------------------------------------------------------------------
|
|
481
|
+
// Helpers
|
|
482
|
+
// ---------------------------------------------------------------------------
|
|
483
|
+
|
|
484
|
+
function setIfPresent(target, source, key) {
|
|
485
|
+
const v = source[key];
|
|
486
|
+
if (v !== undefined && v !== null) {
|
|
487
|
+
target[key] = v;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function setIfMissing(target, source, key) {
|
|
492
|
+
const v = source[key];
|
|
493
|
+
if ((target[key] == null) && v !== undefined && v !== null) {
|
|
494
|
+
target[key] = v;
|
|
495
|
+
}
|
|
496
|
+
}
|
package/lib/sanitize.mjs
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* First-prompt sanitizer for sessions-db.
|
|
3
|
+
*
|
|
4
|
+
* Why this exists: the first user message of a Claude Code transcript is
|
|
5
|
+
* routinely wrapped in injected blocks emitted by the IDE bridge or by the
|
|
6
|
+
* harness itself:
|
|
7
|
+
* - `<system-reminder>...</system-reminder>` — system/harness reminders.
|
|
8
|
+
* - `<system>...</system>` — generic system prompt envelope.
|
|
9
|
+
* - `<thinking>...</thinking>` — chain-of-thought leak guard.
|
|
10
|
+
* - `<tool_use>...</tool_use>` — assistant tool call (echoed back).
|
|
11
|
+
* - `<tool_result>...</tool_result>` — tool output echo.
|
|
12
|
+
* - `<parameter>...</parameter>` — tool call argument body.
|
|
13
|
+
* - `<ide_opened_file>...</ide_opened_file>` — IDE "user has this file
|
|
14
|
+
* open" hint, which leaks file paths.
|
|
15
|
+
* - `<ide_selection>...</ide_selection>` — IDE "user highlighted these
|
|
16
|
+
* lines" hint, which leaks selected source code into the prompt preview.
|
|
17
|
+
* - `<command-name>...</command-message>` — slash command wrapper.
|
|
18
|
+
* If we naively persisted that text to disk we would (a) leak file paths and
|
|
19
|
+
* other IDE state, and (b) blow the preview budget on noise instead of the
|
|
20
|
+
* user's actual prompt. So we NFKC-normalise first (fold fullwidth → ASCII so
|
|
21
|
+
* disguised tags get caught), strip the wrappers in two passes (defensive
|
|
22
|
+
* against a wrapper revealed only after a sibling is removed), then trim and
|
|
23
|
+
* truncate to a safe preview length (default 200) on a UTF-16 code-point
|
|
24
|
+
* boundary so multi-byte characters survive intact.
|
|
25
|
+
*
|
|
26
|
+
* Note on HTML entities: we DO NOT entity-decode. `<system-reminder>`
|
|
27
|
+
* stays literally `<system-reminder>` in the preview — entities can be
|
|
28
|
+
* legitimate user content (e.g., quoted code), and decoding them before
|
|
29
|
+
* stripping would create a brand-new injection vector. The sanitizer's
|
|
30
|
+
* contract is byte-faithful pass-through for anything that is not an actual
|
|
31
|
+
* `<tag>...</tag>` wrapper.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
// All opening tags use `<TAG\b[^>]*>` so a trailing space or attribute (e.g.
|
|
35
|
+
// `<system-reminder >` or `<system-reminder data-x="y">`) cannot bypass the
|
|
36
|
+
// match. `\b` anchors the tag name so `<system-reminderXYZ>` does NOT match.
|
|
37
|
+
const SYSTEM_REMINDER_RE = /<system-reminder\b[^>]*>[\s\S]*?<\/system-reminder>/gi;
|
|
38
|
+
const SYSTEM_RE = /<system\b[^>]*>[\s\S]*?<\/system>/gi;
|
|
39
|
+
const THINKING_RE = /<thinking\b[^>]*>[\s\S]*?<\/thinking>/gi;
|
|
40
|
+
const TOOL_USE_RE = /<tool_use\b[^>]*>[\s\S]*?<\/tool_use>/gi;
|
|
41
|
+
const TOOL_RESULT_RE = /<tool_result\b[^>]*>[\s\S]*?<\/tool_result>/gi;
|
|
42
|
+
const PARAMETER_RE = /<parameter\b[^>]*>[\s\S]*?<\/parameter>/gi;
|
|
43
|
+
|
|
44
|
+
const IDE_OPENED_RE = /<ide_opened_file\b[^>]*>[\s\S]*?<\/ide_opened_file>/gi;
|
|
45
|
+
// IDE injects user's editor selection (highlighted source lines + file path).
|
|
46
|
+
// Discovered in production 2026-05-10 leaking selected code into preview.
|
|
47
|
+
const IDE_SELECTION_RE = /<ide_selection\b[^>]*>[\s\S]*?<\/ide_selection>/gi;
|
|
48
|
+
// Slash-command wrapper opens with <command-name> and closes with the
|
|
49
|
+
// trailing </command-message> tag (not a typo — that is the actual shape).
|
|
50
|
+
const COMMAND_WRAPPER_RE = /<command-name\b[^>]*>[\s\S]*?<\/command-message>/gi;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Strip every `<system-reminder>...</system-reminder>` block from `s`, plus
|
|
54
|
+
* the related harness/system envelopes (`<system>`, `<thinking>`, `<tool_use>`,
|
|
55
|
+
* `<tool_result>`, `<parameter>`).
|
|
56
|
+
*
|
|
57
|
+
* @param {string} s
|
|
58
|
+
* @returns {string}
|
|
59
|
+
*/
|
|
60
|
+
export function stripSystemReminders(s) {
|
|
61
|
+
if (typeof s !== 'string') return '';
|
|
62
|
+
return s
|
|
63
|
+
.replace(SYSTEM_REMINDER_RE, '')
|
|
64
|
+
.replace(SYSTEM_RE, '')
|
|
65
|
+
.replace(THINKING_RE, '')
|
|
66
|
+
.replace(TOOL_USE_RE, '')
|
|
67
|
+
.replace(TOOL_RESULT_RE, '')
|
|
68
|
+
.replace(PARAMETER_RE, '');
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Strip IDE/harness wrappers (`<ide_opened_file>...`, `<ide_selection>...`,
|
|
73
|
+
* `<command-name>...</command-message>`).
|
|
74
|
+
* @param {string} s
|
|
75
|
+
* @returns {string}
|
|
76
|
+
*/
|
|
77
|
+
export function stripIdeWrappers(s) {
|
|
78
|
+
if (typeof s !== 'string') return '';
|
|
79
|
+
return s
|
|
80
|
+
.replace(IDE_OPENED_RE, '')
|
|
81
|
+
.replace(IDE_SELECTION_RE, '')
|
|
82
|
+
.replace(COMMAND_WRAPPER_RE, '');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Sanitise a raw first-prompt string for safe persistence.
|
|
87
|
+
*
|
|
88
|
+
* Order matters and is the result of an adversarial review:
|
|
89
|
+
* 1. NFKC normalise FIRST. Fullwidth bracket variants (e.g.
|
|
90
|
+
* `<system-reminder>`) only fold into ASCII `<>` after NFKC; if we
|
|
91
|
+
* stripped before normalising the wrapper would survive the strip pass
|
|
92
|
+
* and then leak its body once normalisation happens.
|
|
93
|
+
* 2. Strip system-reminders + system envelopes.
|
|
94
|
+
* 3. Strip IDE/harness wrappers.
|
|
95
|
+
* 4. Defensive second pass: re-strip both families. Removing one wrapper
|
|
96
|
+
* can splice together text that now reads as a fresh wrapper (e.g.
|
|
97
|
+
* `<sys` + IDE block + `tem>...</system>`); the second pass closes that.
|
|
98
|
+
* 5. Trim and collapse runs of 3+ newlines to a paragraph break.
|
|
99
|
+
* 6. Truncate to `maxLen` (default 200) on a code-point boundary, append `…`.
|
|
100
|
+
*
|
|
101
|
+
* @param {string} raw
|
|
102
|
+
* @param {{ maxLen?: number }} [opts]
|
|
103
|
+
* @returns {string}
|
|
104
|
+
*/
|
|
105
|
+
export function sanitizeFirstPrompt(raw, opts = {}) {
|
|
106
|
+
if (typeof raw !== 'string') return '';
|
|
107
|
+
const maxLen = Number.isFinite(opts.maxLen) && opts.maxLen > 0 ? opts.maxLen : 200;
|
|
108
|
+
|
|
109
|
+
let s = raw;
|
|
110
|
+
// (1) NFKC FIRST so fullwidth `<...>` becomes ASCII before strip runs.
|
|
111
|
+
s = s.normalize('NFKC');
|
|
112
|
+
// (2-3) First strip pass.
|
|
113
|
+
s = stripSystemReminders(s);
|
|
114
|
+
s = stripIdeWrappers(s);
|
|
115
|
+
// (4) Defensive second pass — close the splice-injection gap.
|
|
116
|
+
s = stripSystemReminders(s);
|
|
117
|
+
s = stripIdeWrappers(s);
|
|
118
|
+
// (5) Whitespace tidy.
|
|
119
|
+
s = s.replace(/\r\n/g, '\n');
|
|
120
|
+
s = s.replace(/\n{3,}/g, '\n\n');
|
|
121
|
+
s = s.trim();
|
|
122
|
+
|
|
123
|
+
if (s.length <= maxLen) return s;
|
|
124
|
+
|
|
125
|
+
// (6) Truncate on a code-point boundary so we never split a surrogate pair.
|
|
126
|
+
// We cap by code-point count (Array.from() iterates code points), then
|
|
127
|
+
// re-join. The ellipsis itself counts toward `maxLen`.
|
|
128
|
+
const cps = Array.from(s);
|
|
129
|
+
if (cps.length <= maxLen) return s;
|
|
130
|
+
return cps.slice(0, Math.max(0, maxLen - 1)).join('') + '…';
|
|
131
|
+
}
|