@druumen/sessions-db 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +249 -0
  2. package/LICENSE +201 -0
  3. package/NOTICE +10 -0
  4. package/README.md +250 -0
  5. package/cli/_write-helpers.mjs +99 -0
  6. package/cli/alias.mjs +115 -0
  7. package/cli/argparse.mjs +296 -0
  8. package/cli/close.mjs +116 -0
  9. package/cli/find.mjs +185 -0
  10. package/cli/format.mjs +277 -0
  11. package/cli/link-parent.mjs +133 -0
  12. package/cli/link.mjs +132 -0
  13. package/cli/rebuild.mjs +98 -0
  14. package/cli/sessions-db-session-start-main.mjs +454 -0
  15. package/cli/sessions-db-session-start.mjs +56 -0
  16. package/cli/sessions-db.mjs +119 -0
  17. package/cli/sweep.mjs +171 -0
  18. package/cli/tree.mjs +127 -0
  19. package/lib/git-context.mjs +479 -0
  20. package/lib/identity.mjs +616 -0
  21. package/lib/index.mjs +145 -0
  22. package/lib/init.mjs +185 -0
  23. package/lib/lock.mjs +86 -0
  24. package/lib/operations.mjs +490 -0
  25. package/lib/paths.mjs +199 -0
  26. package/lib/projection.mjs +496 -0
  27. package/lib/sanitize.mjs +131 -0
  28. package/lib/storage.mjs +759 -0
  29. package/lib/sweep.mjs +209 -0
  30. package/lib/transcript.mjs +230 -0
  31. package/lib/types.mjs +276 -0
  32. package/lib/uuid.mjs +116 -0
  33. package/lib/watch.mjs +217 -0
  34. package/package.json +53 -0
  35. package/types/git-context.d.mts +98 -0
  36. package/types/identity.d.mts +658 -0
  37. package/types/index.d.mts +10 -0
  38. package/types/index.d.ts +127 -0
  39. package/types/init.d.mts +53 -0
  40. package/types/lock.d.mts +18 -0
  41. package/types/operations.d.mts +204 -0
  42. package/types/paths.d.mts +54 -0
  43. package/types/projection.d.mts +79 -0
  44. package/types/sanitize.d.mts +39 -0
  45. package/types/storage.d.mts +276 -0
  46. package/types/sweep.d.mts +58 -0
  47. package/types/transcript.d.mts +59 -0
  48. package/types/types.d.mts +255 -0
  49. package/types/uuid.d.mts +17 -0
  50. package/types/watch.d.mts +33 -0
package/lib/sweep.mjs ADDED
@@ -0,0 +1,209 @@
1
+ /**
2
+ * Pure sweep logic for sessions-db (Phase 5).
3
+ *
4
+ * `computeSweepTransitions` takes a projection snapshot + thresholds and
5
+ * returns the list of activity_state transitions the caller should write as
6
+ * `sweep` events. Pure: no IO, fully deterministic given (projection, now,
7
+ * idleThresholdDays, archiveThresholdDays). The caller (CLI sweep handler)
8
+ * is responsible for writing one event per transition through the normal
9
+ * `tryUpdateProjection` path so events.jsonl + projection cache stay in
10
+ * lockstep.
11
+ *
12
+ * State machine (terminal: archived):
13
+ *
14
+ * active ──ageDays >= idleThreshold──▶ idle
15
+ * active ──ageDays >= archiveThreshold──▶ archived
16
+ * idle ──ageDays >= archiveThreshold──▶ archived
17
+ * archived (terminal — never re-promoted by sweep)
18
+ *
19
+ * Idempotency:
20
+ * - Sweep does not generate a transition when the computed target equals
21
+ * the session's current activity_state. So invoking sweep twice on the
22
+ * same projection (with the same `now`) yields zero transitions on the
23
+ * second run.
24
+ * - `archived` is terminal — sweep never touches it. Operators rehydrate
25
+ * archived sessions explicitly (out of P5 scope).
26
+ *
27
+ * Effective last-progress = max ISO timestamp of:
28
+ * - session.last_progress_at
29
+ * - session.transcript_files[*].mtime
30
+ * - session.hive_watcher_last_seen (placeholder for future integration —
31
+ * hive-watcher will surface filesystem activity timestamps independent of
32
+ * hook-derived events; reading the field today is a no-op when absent)
33
+ *
34
+ * Why max instead of just last_progress_at? `last_progress_at` is bumped by
35
+ * hook-derived events (session_seen, alias_set, link, ...). A session whose
36
+ * transcript is being actively appended (long /loop run, codex-rescue) but
37
+ * which hasn't fired a hook in the sweep window would otherwise be flagged
38
+ * idle. Transcript mtimes from session_seen payloads + the future hive-
39
+ * watcher signal cover that gap.
40
+ */
41
+
42
+ const MS_PER_DAY = 24 * 60 * 60 * 1000;
43
+ const DEFAULT_IDLE_THRESHOLD_DAYS = 14;
44
+ const DEFAULT_ARCHIVE_THRESHOLD_DAYS = 30;
45
+
46
+ /**
47
+ * Compute desired activity_state transitions for all sessions in a projection.
48
+ *
49
+ * @param {object} projection
50
+ * @param {{
51
+ * now?: number,
52
+ * idleThresholdDays?: number,
53
+ * archiveThresholdDays?: number,
54
+ * }} [opts]
55
+ * @returns {Array<{
56
+ * stable_id: string,
57
+ * from_state: string,
58
+ * to_state: string,
59
+ * effective_last_progress: string,
60
+ * age_days: number,
61
+ * }>}
62
+ */
63
+ export function computeSweepTransitions(projection, opts = {}) {
64
+ const now = typeof opts.now === 'number' ? opts.now : Date.now();
65
+ const idleThreshold = pickThreshold(
66
+ opts.idleThresholdDays,
67
+ projection && projection._meta && projection._meta.idle_threshold_days,
68
+ DEFAULT_IDLE_THRESHOLD_DAYS,
69
+ );
70
+ const archiveThreshold = pickThreshold(
71
+ opts.archiveThresholdDays,
72
+ projection && projection._meta && projection._meta.archive_threshold_days,
73
+ DEFAULT_ARCHIVE_THRESHOLD_DAYS,
74
+ );
75
+
76
+ const sessions = projection && projection.sessions ? projection.sessions : {};
77
+ const transitions = [];
78
+
79
+ for (const [stableId, session] of Object.entries(sessions)) {
80
+ if (!session || typeof session !== 'object') continue;
81
+
82
+ // Terminal — sweep never re-promotes archived sessions. Operator must
83
+ // rehydrate explicitly (out of P5 scope).
84
+ if (session.activity_state === 'archived') continue;
85
+
86
+ const hasSignal = hasAnyParseableTimestamp(session);
87
+ if (!hasSignal) {
88
+ // Defensive: a session with no parseable timestamp at all means we
89
+ // genuinely cannot decide its age. Skip rather than treat it as
90
+ // infinitely old (which would archive every freshly-minted session
91
+ // that happened to be persisted before its first ts wrote).
92
+ continue;
93
+ }
94
+ const effective = computeEffectiveLastProgress(session);
95
+ const effectiveMs = Date.parse(effective);
96
+ if (!Number.isFinite(effectiveMs)) continue;
97
+ const ageMs = now - effectiveMs;
98
+ const ageDays = Math.floor(ageMs / MS_PER_DAY);
99
+
100
+ let target;
101
+ if (ageDays >= archiveThreshold) target = 'archived';
102
+ else if (ageDays >= idleThreshold) target = 'idle';
103
+ else target = 'active';
104
+
105
+ if (target === session.activity_state) continue;
106
+
107
+ transitions.push({
108
+ stable_id: stableId,
109
+ from_state: session.activity_state,
110
+ to_state: target,
111
+ effective_last_progress: effective,
112
+ age_days: ageDays,
113
+ });
114
+ }
115
+
116
+ return transitions;
117
+ }
118
+
119
+ /**
120
+ * Compute the effective "last progress" timestamp for a session — the max
121
+ * (latest) ISO 8601 timestamp across:
122
+ * - session.last_progress_at
123
+ * - session.transcript_files[*].mtime
124
+ * - session.hive_watcher_last_seen (future hive-watcher integration)
125
+ *
126
+ * Returns the epoch ISO string when no candidate is parseable.
127
+ *
128
+ * Implementation note (codex P5 round-1 fix): we MUST parse each candidate
129
+ * to epoch ms and compare numerically, then re-emit a normalized ISO 8601
130
+ * (Z) string. A naive lexicographic `candidates.sort().pop()` only works
131
+ * when every candidate is uniformly Z-suffixed with identical fractional
132
+ * precision — and that invariant is fragile in practice:
133
+ * - transcript_files[*].mtime is sourced from the local fs `Stats.mtime`
134
+ * and gets ISO-stringified at write time; on a host with non-UTC
135
+ * TZ env the stringifier may emit `+02:00` offsets.
136
+ * - hive_watcher_last_seen comes from a different writer with its own
137
+ * formatter (sub-millisecond precision possible).
138
+ * - operator-supplied --json fixtures may carry mixed precisions.
139
+ * Lex-sorting `2026-05-09T05:00:00+02:00` against `2026-05-09T04:00:00.000Z`
140
+ * picks the wrong winner; lex-sorting `...100Z` against `...100.500Z`
141
+ * picks the SHORTER string as larger because `0` < `5` at position 23 once
142
+ * the lengths diverge. Both are silent miscategorization → wrong sweep
143
+ * verdict. Date.parse() canonicalizes everything to a single epoch axis.
144
+ *
145
+ * @param {object} session
146
+ * @returns {string} ISO 8601 timestamp (always Z, normalized)
147
+ */
148
+ export function computeEffectiveLastProgress(session) {
149
+ if (!session || typeof session !== 'object') {
150
+ return new Date(0).toISOString();
151
+ }
152
+ let maxEpoch = -Infinity;
153
+ const considerCandidate = (raw) => {
154
+ if (typeof raw !== 'string' || raw.length === 0) return;
155
+ const epoch = Date.parse(raw);
156
+ if (!Number.isFinite(epoch)) return;
157
+ if (epoch > maxEpoch) maxEpoch = epoch;
158
+ };
159
+ considerCandidate(session.last_progress_at);
160
+ if (Array.isArray(session.transcript_files)) {
161
+ for (const tf of session.transcript_files) {
162
+ if (tf && typeof tf === 'object') considerCandidate(tf.mtime);
163
+ }
164
+ }
165
+ considerCandidate(session.hive_watcher_last_seen);
166
+ if (maxEpoch === -Infinity) {
167
+ return new Date(0).toISOString();
168
+ }
169
+ return new Date(maxEpoch).toISOString();
170
+ }
171
+
172
+ /**
173
+ * Did the session record carry at least one parseable timestamp from any of
174
+ * the recognized signal sources? Sweep relies on this to distinguish "we
175
+ * really know nothing about this session's recency" (skip — defensive) from
176
+ * "the session is genuinely stale" (transition).
177
+ *
178
+ * Mirrors the candidate set in `computeEffectiveLastProgress` but returns a
179
+ * boolean rather than a timestamp.
180
+ */
181
+ function hasAnyParseableTimestamp(session) {
182
+ if (typeof session.last_progress_at === 'string'
183
+ && Number.isFinite(Date.parse(session.last_progress_at))) {
184
+ return true;
185
+ }
186
+ if (Array.isArray(session.transcript_files)) {
187
+ for (const tf of session.transcript_files) {
188
+ if (tf && typeof tf.mtime === 'string'
189
+ && Number.isFinite(Date.parse(tf.mtime))) {
190
+ return true;
191
+ }
192
+ }
193
+ }
194
+ if (typeof session.hive_watcher_last_seen === 'string'
195
+ && Number.isFinite(Date.parse(session.hive_watcher_last_seen))) {
196
+ return true;
197
+ }
198
+ return false;
199
+ }
200
+
201
+ function pickThreshold(optsValue, metaValue, fallback) {
202
+ if (typeof optsValue === 'number' && Number.isFinite(optsValue) && optsValue > 0) {
203
+ return optsValue;
204
+ }
205
+ if (typeof metaValue === 'number' && Number.isFinite(metaValue) && metaValue > 0) {
206
+ return metaValue;
207
+ }
208
+ return fallback;
209
+ }
@@ -0,0 +1,230 @@
1
+ /**
2
+ * Claude Code transcript jsonl reader.
3
+ *
4
+ * Layout assumption: every transcript is `~/.claude/projects/<workspace-hash>/
5
+ * <session_uuid>.jsonl`, one JSON record per line. Records mix several types
6
+ * (`user`, `assistant`, `system`, `attachment`, `queue-operation`,
7
+ * `file-history-snapshot`, `ai-title`, `last-prompt`); only some carry the
8
+ * `uuid` / `parentUuid` lineage fields. We therefore consider only records
9
+ * with a `uuid` for firstUuid/lastUuid/firstParentUuid extraction.
10
+ *
11
+ * The "first human prompt" is the first `type === 'user'` record matching
12
+ * any of these (fallback chain, first hit wins):
13
+ * 1. `userType === 'external'` — empirical truth, what current Claude Code
14
+ * emits for human/IDE-originated messages.
15
+ * 2. `userType === 'human'` — what the design ticket assumed; included for
16
+ * forward compat if Claude Code ever switches to the spec value.
17
+ * 3. `message.role === 'user'` — semantic fallback when neither userType
18
+ * label is present (older harness builds, third-party tooling).
19
+ * If none match we leave firstHumanPromptRaw=null rather than mis-attributing
20
+ * a tool-result echo as the human's prompt.
21
+ *
22
+ * The reader is streaming (line-by-line) so it stays bounded on memory
23
+ * regardless of file size. We bail out before opening if the file exceeds
24
+ * `maxSizeMb` (default 50) and report `status: 'too_large'`.
25
+ */
26
+
27
+ import { createReadStream, statSync, readdirSync, existsSync } from 'node:fs';
28
+ import { createInterface } from 'node:readline';
29
+ import { join } from 'node:path';
30
+ import { homedir } from 'node:os';
31
+
32
+ const DEFAULT_MAX_MB = 50;
33
+ const CLAUDE_PROJECTS_ROOT = join(homedir(), '.claude', 'projects');
34
+
35
+ /**
36
+ * @typedef {{
37
+ * sessionId: string|null,
38
+ * firstUuid: string|null,
39
+ * lastUuid: string|null,
40
+ * firstParentUuid: string|null,
41
+ * recordCount: number,
42
+ * firstHumanPromptRaw: string|null,
43
+ * cwd: string|null,
44
+ * gitBranch: string|null,
45
+ * size: number,
46
+ * mtime: Date,
47
+ * status: 'ok' | 'corrupted' | 'too_large',
48
+ * }} TranscriptMeta
49
+ */
50
+
51
+ /**
52
+ * Convert an absolute filesystem path to the dash-encoded workspace hash that
53
+ * Claude Code uses for the `~/.claude/projects/<hash>/` directory name. The
54
+ * encoding replaces every path separator and dot with a dash and keeps the
55
+ * leading dash that Claude Code itself prepends.
56
+ *
57
+ * @param {string} cwd absolute path
58
+ * @returns {string} e.g. `-Users-zm-leng-Documents-...-drummen-com-cn`
59
+ */
60
+ export function workspaceHashFromCwd(cwd) {
61
+ if (typeof cwd !== 'string' || !cwd.startsWith('/')) {
62
+ throw new TypeError(`workspaceHashFromCwd: expected absolute path, got ${cwd}`);
63
+ }
64
+ return cwd.replace(/[/.]/g, '-');
65
+ }
66
+
67
+ /**
68
+ * List every `.jsonl` transcript in a workspace's Claude Code directory,
69
+ * sorted by mtime descending (newest first). Returns absolute paths.
70
+ *
71
+ * @param {string} workspaceHash dash-encoded hash, OR an absolute path that
72
+ * we will hash for you.
73
+ * @returns {string[]}
74
+ */
75
+ export function listTranscriptFiles(workspaceHash) {
76
+ const hash =
77
+ workspaceHash.startsWith('/') ? workspaceHashFromCwd(workspaceHash) : workspaceHash;
78
+ const dir = join(CLAUDE_PROJECTS_ROOT, hash);
79
+ if (!existsSync(dir)) return [];
80
+
81
+ /** @type {{ path: string, mtime: number }[]} */
82
+ const rows = [];
83
+ for (const entry of readdirSync(dir, { withFileTypes: true })) {
84
+ if (!entry.isFile() || !entry.name.endsWith('.jsonl')) continue;
85
+ const full = join(dir, entry.name);
86
+ let st;
87
+ try {
88
+ st = statSync(full);
89
+ } catch {
90
+ continue;
91
+ }
92
+ rows.push({ path: full, mtime: st.mtimeMs });
93
+ }
94
+ rows.sort((a, b) => b.mtime - a.mtime);
95
+ return rows.map((r) => r.path);
96
+ }
97
+
98
+ /**
99
+ * Pick out the human-readable text from a `message.content` field, which is
100
+ * either a plain string or an array of `{type, text}` objects. Non-text items
101
+ * (tool results, images, etc.) are dropped.
102
+ *
103
+ * @param {unknown} content
104
+ * @returns {string|null}
105
+ */
106
+ function extractText(content) {
107
+ if (typeof content === 'string') return content;
108
+ if (Array.isArray(content)) {
109
+ const parts = [];
110
+ for (const item of content) {
111
+ if (item && typeof item === 'object' && item.type === 'text' && typeof item.text === 'string') {
112
+ parts.push(item.text);
113
+ }
114
+ }
115
+ if (parts.length === 0) return null;
116
+ return parts.join('\n');
117
+ }
118
+ return null;
119
+ }
120
+
121
+ /**
122
+ * Parse a single Claude Code transcript jsonl file and return its identity +
123
+ * lineage metadata. Streams the file line-by-line; never loads the whole
124
+ * thing into memory.
125
+ *
126
+ * @param {string} path absolute path to the jsonl file
127
+ * @param {{ maxSizeMb?: number }} [opts]
128
+ * @returns {Promise<TranscriptMeta>}
129
+ */
130
+ export async function parseTranscriptFile(path, opts = {}) {
131
+ const maxSizeMb = Number.isFinite(opts.maxSizeMb) && opts.maxSizeMb > 0
132
+ ? opts.maxSizeMb
133
+ : DEFAULT_MAX_MB;
134
+
135
+ const st = statSync(path);
136
+ /** @type {TranscriptMeta} */
137
+ const meta = {
138
+ sessionId: null,
139
+ firstUuid: null,
140
+ lastUuid: null,
141
+ firstParentUuid: null,
142
+ recordCount: 0,
143
+ firstHumanPromptRaw: null,
144
+ cwd: null,
145
+ gitBranch: null,
146
+ size: st.size,
147
+ mtime: st.mtime,
148
+ status: 'ok',
149
+ };
150
+
151
+ if (st.size > maxSizeMb * 1024 * 1024) {
152
+ meta.status = 'too_large';
153
+ return meta;
154
+ }
155
+
156
+ const stream = createReadStream(path, { encoding: 'utf8' });
157
+ const rl = createInterface({ input: stream, crlfDelay: Infinity });
158
+
159
+ let sawAnyValidRecord = false;
160
+ let parseErrors = 0;
161
+ // Track first parent-bearing record separately so we can decide whether the
162
+ // file represents a fresh session (firstParentUuid === null) or a resume
163
+ // (parentUuid points into another file).
164
+ let firstUuidBearingRecord = null;
165
+
166
+ for await (const line of rl) {
167
+ if (line.length === 0) continue;
168
+ let rec;
169
+ try {
170
+ rec = JSON.parse(line);
171
+ } catch {
172
+ parseErrors += 1;
173
+ continue;
174
+ }
175
+ if (!rec || typeof rec !== 'object') {
176
+ parseErrors += 1;
177
+ continue;
178
+ }
179
+ sawAnyValidRecord = true;
180
+ meta.recordCount += 1;
181
+
182
+ // sessionId is consistent across the file; latch the first non-empty.
183
+ if (meta.sessionId === null && typeof rec.sessionId === 'string' && rec.sessionId.length > 0) {
184
+ meta.sessionId = rec.sessionId;
185
+ }
186
+
187
+ // cwd / gitBranch — first non-empty wins.
188
+ if (meta.cwd === null && typeof rec.cwd === 'string' && rec.cwd.length > 0) {
189
+ meta.cwd = rec.cwd;
190
+ }
191
+ if (meta.gitBranch === null && typeof rec.gitBranch === 'string' && rec.gitBranch.length > 0) {
192
+ meta.gitBranch = rec.gitBranch;
193
+ }
194
+
195
+ // Lineage tracking: only records with a `uuid` participate.
196
+ if (typeof rec.uuid === 'string' && rec.uuid.length > 0) {
197
+ if (firstUuidBearingRecord === null) {
198
+ firstUuidBearingRecord = rec;
199
+ meta.firstUuid = rec.uuid;
200
+ meta.firstParentUuid = typeof rec.parentUuid === 'string' ? rec.parentUuid : null;
201
+ }
202
+ meta.lastUuid = rec.uuid;
203
+ }
204
+
205
+ // First human prompt: type='user' AND any of the userType fallbacks
206
+ // (external = empirical, human = ticket spec, message.role = semantic).
207
+ if (
208
+ meta.firstHumanPromptRaw === null &&
209
+ rec.type === 'user' &&
210
+ rec.message &&
211
+ (
212
+ rec.userType === 'external' ||
213
+ rec.userType === 'human' ||
214
+ rec.message.role === 'user'
215
+ )
216
+ ) {
217
+ const text = extractText(rec.message.content);
218
+ if (text !== null) meta.firstHumanPromptRaw = text;
219
+ }
220
+ }
221
+
222
+ if (!sawAnyValidRecord) {
223
+ meta.status = parseErrors > 0 ? 'corrupted' : 'ok';
224
+ } else if (parseErrors > 0 && meta.recordCount === 0) {
225
+ // We skipped some lines but never recovered a usable record.
226
+ meta.status = 'corrupted';
227
+ }
228
+
229
+ return meta;
230
+ }