npm - @druumen/sessions-db - Versions diffs - 0.1.0 - Mend

@druumen/sessions-db 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/CHANGELOG.md +249 -0
package/LICENSE +201 -0
package/NOTICE +10 -0
package/README.md +250 -0
package/cli/_write-helpers.mjs +99 -0
package/cli/alias.mjs +115 -0
package/cli/argparse.mjs +296 -0
package/cli/close.mjs +116 -0
package/cli/find.mjs +185 -0
package/cli/format.mjs +277 -0
package/cli/link-parent.mjs +133 -0
package/cli/link.mjs +132 -0
package/cli/rebuild.mjs +98 -0
package/cli/sessions-db-session-start-main.mjs +454 -0
package/cli/sessions-db-session-start.mjs +56 -0
package/cli/sessions-db.mjs +119 -0
package/cli/sweep.mjs +171 -0
package/cli/tree.mjs +127 -0
package/lib/git-context.mjs +479 -0
package/lib/identity.mjs +616 -0
package/lib/index.mjs +145 -0
package/lib/init.mjs +185 -0
package/lib/lock.mjs +86 -0
package/lib/operations.mjs +490 -0
package/lib/paths.mjs +199 -0
package/lib/projection.mjs +496 -0
package/lib/sanitize.mjs +131 -0
package/lib/storage.mjs +759 -0
package/lib/sweep.mjs +209 -0
package/lib/transcript.mjs +230 -0
package/lib/types.mjs +276 -0
package/lib/uuid.mjs +116 -0
package/lib/watch.mjs +217 -0
package/package.json +53 -0
package/types/git-context.d.mts +98 -0
package/types/identity.d.mts +658 -0
package/types/index.d.mts +10 -0
package/types/index.d.ts +127 -0
package/types/init.d.mts +53 -0
package/types/lock.d.mts +18 -0
package/types/operations.d.mts +204 -0
package/types/paths.d.mts +54 -0
package/types/projection.d.mts +79 -0
package/types/sanitize.d.mts +39 -0
package/types/storage.d.mts +276 -0
package/types/sweep.d.mts +58 -0
package/types/transcript.d.mts +59 -0
package/types/types.d.mts +255 -0
package/types/uuid.d.mts +17 -0
package/types/watch.d.mts +33 -0

package/lib/sweep.mjs ADDED Viewed

@@ -0,0 +1,209 @@
+/**
+ * Pure sweep logic for sessions-db (Phase 5).
+ *
+ * `computeSweepTransitions` takes a projection snapshot + thresholds and
+ * returns the list of activity_state transitions the caller should write as
+ * `sweep` events. Pure: no IO, fully deterministic given (projection, now,
+ * idleThresholdDays, archiveThresholdDays). The caller (CLI sweep handler)
+ * is responsible for writing one event per transition through the normal
+ * `tryUpdateProjection` path so events.jsonl + projection cache stay in
+ * lockstep.
+ *
+ * State machine (terminal: archived):
+ *
+ *   active   ──ageDays >= idleThreshold──▶ idle
+ *   active   ──ageDays >= archiveThreshold──▶ archived
+ *   idle     ──ageDays >= archiveThreshold──▶ archived
+ *   archived (terminal — never re-promoted by sweep)
+ *
+ * Idempotency:
+ *   - Sweep does not generate a transition when the computed target equals
+ *     the session's current activity_state. So invoking sweep twice on the
+ *     same projection (with the same `now`) yields zero transitions on the
+ *     second run.
+ *   - `archived` is terminal — sweep never touches it. Operators rehydrate
+ *     archived sessions explicitly (out of P5 scope).
+ *
+ * Effective last-progress = max ISO timestamp of:
+ *   - session.last_progress_at
+ *   - session.transcript_files[*].mtime
+ *   - session.hive_watcher_last_seen   (placeholder for future integration —
+ *     hive-watcher will surface filesystem activity timestamps independent of
+ *     hook-derived events; reading the field today is a no-op when absent)
+ *
+ * Why max instead of just last_progress_at? `last_progress_at` is bumped by
+ * hook-derived events (session_seen, alias_set, link, ...). A session whose
+ * transcript is being actively appended (long /loop run, codex-rescue) but
+ * which hasn't fired a hook in the sweep window would otherwise be flagged
+ * idle. Transcript mtimes from session_seen payloads + the future hive-
+ * watcher signal cover that gap.
+ */
+const MS_PER_DAY = 24 * 60 * 60 * 1000;
+const DEFAULT_IDLE_THRESHOLD_DAYS = 14;
+const DEFAULT_ARCHIVE_THRESHOLD_DAYS = 30;
+/**
+ * Compute desired activity_state transitions for all sessions in a projection.
+ *
+ * @param {object} projection
+ * @param {{
+ *   now?: number,
+ *   idleThresholdDays?: number,
+ *   archiveThresholdDays?: number,
+ * }} [opts]
+ * @returns {Array<{
+ *   stable_id: string,
+ *   from_state: string,
+ *   to_state: string,
+ *   effective_last_progress: string,
+ *   age_days: number,
+ * }>}
+ */
+export function computeSweepTransitions(projection, opts = {}) {
+  const now = typeof opts.now === 'number' ? opts.now : Date.now();
+  const idleThreshold = pickThreshold(
+    opts.idleThresholdDays,
+    projection && projection._meta && projection._meta.idle_threshold_days,
+    DEFAULT_IDLE_THRESHOLD_DAYS,
+  );
+  const archiveThreshold = pickThreshold(
+    opts.archiveThresholdDays,
+    projection && projection._meta && projection._meta.archive_threshold_days,
+    DEFAULT_ARCHIVE_THRESHOLD_DAYS,
+  );
+  const sessions = projection && projection.sessions ? projection.sessions : {};
+  const transitions = [];
+  for (const [stableId, session] of Object.entries(sessions)) {
+    if (!session || typeof session !== 'object') continue;
+    // Terminal — sweep never re-promotes archived sessions. Operator must
+    // rehydrate explicitly (out of P5 scope).
+    if (session.activity_state === 'archived') continue;
+    const hasSignal = hasAnyParseableTimestamp(session);
+    if (!hasSignal) {
+      // Defensive: a session with no parseable timestamp at all means we
+      // genuinely cannot decide its age. Skip rather than treat it as
+      // infinitely old (which would archive every freshly-minted session
+      // that happened to be persisted before its first ts wrote).
+      continue;
+    }
+    const effective = computeEffectiveLastProgress(session);
+    const effectiveMs = Date.parse(effective);
+    if (!Number.isFinite(effectiveMs)) continue;
+    const ageMs = now - effectiveMs;
+    const ageDays = Math.floor(ageMs / MS_PER_DAY);
+    let target;
+    if (ageDays >= archiveThreshold) target = 'archived';
+    else if (ageDays >= idleThreshold) target = 'idle';
+    else target = 'active';
+    if (target === session.activity_state) continue;
+    transitions.push({
+      stable_id: stableId,
+      from_state: session.activity_state,
+      to_state: target,
+      effective_last_progress: effective,
+      age_days: ageDays,
+    });
+  }
+  return transitions;
+}
+/**
+ * Compute the effective "last progress" timestamp for a session — the max
+ * (latest) ISO 8601 timestamp across:
+ *   - session.last_progress_at
+ *   - session.transcript_files[*].mtime
+ *   - session.hive_watcher_last_seen   (future hive-watcher integration)
+ *
+ * Returns the epoch ISO string when no candidate is parseable.
+ *
+ * Implementation note (codex P5 round-1 fix): we MUST parse each candidate
+ * to epoch ms and compare numerically, then re-emit a normalized ISO 8601
+ * (Z) string. A naive lexicographic `candidates.sort().pop()` only works
+ * when every candidate is uniformly Z-suffixed with identical fractional
+ * precision — and that invariant is fragile in practice:
+ *   - transcript_files[*].mtime is sourced from the local fs `Stats.mtime`
+ *     and gets ISO-stringified at write time; on a host with non-UTC
+ *     TZ env the stringifier may emit `+02:00` offsets.
+ *   - hive_watcher_last_seen comes from a different writer with its own
+ *     formatter (sub-millisecond precision possible).
+ *   - operator-supplied --json fixtures may carry mixed precisions.
+ * Lex-sorting `2026-05-09T05:00:00+02:00` against `2026-05-09T04:00:00.000Z`
+ * picks the wrong winner; lex-sorting `...100Z` against `...100.500Z`
+ * picks the SHORTER string as larger because `0` < `5` at position 23 once
+ * the lengths diverge. Both are silent miscategorization → wrong sweep
+ * verdict. Date.parse() canonicalizes everything to a single epoch axis.
+ *
+ * @param {object} session
+ * @returns {string} ISO 8601 timestamp (always Z, normalized)
+ */
+export function computeEffectiveLastProgress(session) {
+  if (!session || typeof session !== 'object') {
+    return new Date(0).toISOString();
+  }
+  let maxEpoch = -Infinity;
+  const considerCandidate = (raw) => {
+    if (typeof raw !== 'string' || raw.length === 0) return;
+    const epoch = Date.parse(raw);
+    if (!Number.isFinite(epoch)) return;
+    if (epoch > maxEpoch) maxEpoch = epoch;
+  };
+  considerCandidate(session.last_progress_at);
+  if (Array.isArray(session.transcript_files)) {
+    for (const tf of session.transcript_files) {
+      if (tf && typeof tf === 'object') considerCandidate(tf.mtime);
+    }
+  }
+  considerCandidate(session.hive_watcher_last_seen);
+  if (maxEpoch === -Infinity) {
+    return new Date(0).toISOString();
+  }
+  return new Date(maxEpoch).toISOString();
+}
+/**
+ * Did the session record carry at least one parseable timestamp from any of
+ * the recognized signal sources? Sweep relies on this to distinguish "we
+ * really know nothing about this session's recency" (skip — defensive) from
+ * "the session is genuinely stale" (transition).
+ *
+ * Mirrors the candidate set in `computeEffectiveLastProgress` but returns a
+ * boolean rather than a timestamp.
+ */
+function hasAnyParseableTimestamp(session) {
+  if (typeof session.last_progress_at === 'string'
+      && Number.isFinite(Date.parse(session.last_progress_at))) {
+    return true;
+  }
+  if (Array.isArray(session.transcript_files)) {
+    for (const tf of session.transcript_files) {
+      if (tf && typeof tf.mtime === 'string'
+          && Number.isFinite(Date.parse(tf.mtime))) {
+        return true;
+      }
+    }
+  }
+  if (typeof session.hive_watcher_last_seen === 'string'
+      && Number.isFinite(Date.parse(session.hive_watcher_last_seen))) {
+    return true;
+  }
+  return false;
+}
+function pickThreshold(optsValue, metaValue, fallback) {
+  if (typeof optsValue === 'number' && Number.isFinite(optsValue) && optsValue > 0) {
+    return optsValue;
+  }
+  if (typeof metaValue === 'number' && Number.isFinite(metaValue) && metaValue > 0) {
+    return metaValue;
+  }
+  return fallback;
+}

package/lib/transcript.mjs ADDED Viewed

@@ -0,0 +1,230 @@
+/**
+ * Claude Code transcript jsonl reader.
+ *
+ * Layout assumption: every transcript is `~/.claude/projects/<workspace-hash>/
+ * <session_uuid>.jsonl`, one JSON record per line. Records mix several types
+ * (`user`, `assistant`, `system`, `attachment`, `queue-operation`,
+ * `file-history-snapshot`, `ai-title`, `last-prompt`); only some carry the
+ * `uuid` / `parentUuid` lineage fields. We therefore consider only records
+ * with a `uuid` for firstUuid/lastUuid/firstParentUuid extraction.
+ *
+ * The "first human prompt" is the first `type === 'user'` record matching
+ * any of these (fallback chain, first hit wins):
+ *   1. `userType === 'external'` — empirical truth, what current Claude Code
+ *      emits for human/IDE-originated messages.
+ *   2. `userType === 'human'` — what the design ticket assumed; included for
+ *      forward compat if Claude Code ever switches to the spec value.
+ *   3. `message.role === 'user'` — semantic fallback when neither userType
+ *      label is present (older harness builds, third-party tooling).
+ * If none match we leave firstHumanPromptRaw=null rather than mis-attributing
+ * a tool-result echo as the human's prompt.
+ *
+ * The reader is streaming (line-by-line) so it stays bounded on memory
+ * regardless of file size. We bail out before opening if the file exceeds
+ * `maxSizeMb` (default 50) and report `status: 'too_large'`.
+ */
+import { createReadStream, statSync, readdirSync, existsSync } from 'node:fs';
+import { createInterface } from 'node:readline';
+import { join } from 'node:path';
+import { homedir } from 'node:os';
+const DEFAULT_MAX_MB = 50;
+const CLAUDE_PROJECTS_ROOT = join(homedir(), '.claude', 'projects');
+/**
+ * @typedef {{
+ *   sessionId: string|null,
+ *   firstUuid: string|null,
+ *   lastUuid: string|null,
+ *   firstParentUuid: string|null,
+ *   recordCount: number,
+ *   firstHumanPromptRaw: string|null,
+ *   cwd: string|null,
+ *   gitBranch: string|null,
+ *   size: number,
+ *   mtime: Date,
+ *   status: 'ok' | 'corrupted' | 'too_large',
+ * }} TranscriptMeta
+ */
+/**
+ * Convert an absolute filesystem path to the dash-encoded workspace hash that
+ * Claude Code uses for the `~/.claude/projects/<hash>/` directory name. The
+ * encoding replaces every path separator and dot with a dash and keeps the
+ * leading dash that Claude Code itself prepends.
+ *
+ * @param {string} cwd absolute path
+ * @returns {string} e.g. `-Users-zm-leng-Documents-...-drummen-com-cn`
+ */
+export function workspaceHashFromCwd(cwd) {
+  if (typeof cwd !== 'string' || !cwd.startsWith('/')) {
+    throw new TypeError(`workspaceHashFromCwd: expected absolute path, got ${cwd}`);
+  }
+  return cwd.replace(/[/.]/g, '-');
+}
+/**
+ * List every `.jsonl` transcript in a workspace's Claude Code directory,
+ * sorted by mtime descending (newest first). Returns absolute paths.
+ *
+ * @param {string} workspaceHash dash-encoded hash, OR an absolute path that
+ *   we will hash for you.
+ * @returns {string[]}
+ */
+export function listTranscriptFiles(workspaceHash) {
+  const hash =
+    workspaceHash.startsWith('/') ? workspaceHashFromCwd(workspaceHash) : workspaceHash;
+  const dir = join(CLAUDE_PROJECTS_ROOT, hash);
+  if (!existsSync(dir)) return [];
+  /** @type {{ path: string, mtime: number }[]} */
+  const rows = [];
+  for (const entry of readdirSync(dir, { withFileTypes: true })) {
+    if (!entry.isFile() || !entry.name.endsWith('.jsonl')) continue;
+    const full = join(dir, entry.name);
+    let st;
+    try {
+      st = statSync(full);
+    } catch {
+      continue;
+    }
+    rows.push({ path: full, mtime: st.mtimeMs });
+  }
+  rows.sort((a, b) => b.mtime - a.mtime);
+  return rows.map((r) => r.path);
+}
+/**
+ * Pick out the human-readable text from a `message.content` field, which is
+ * either a plain string or an array of `{type, text}` objects. Non-text items
+ * (tool results, images, etc.) are dropped.
+ *
+ * @param {unknown} content
+ * @returns {string|null}
+ */
+function extractText(content) {
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    const parts = [];
+    for (const item of content) {
+      if (item && typeof item === 'object' && item.type === 'text' && typeof item.text === 'string') {
+        parts.push(item.text);
+      }
+    }
+    if (parts.length === 0) return null;
+    return parts.join('\n');
+  }
+  return null;
+}
+/**
+ * Parse a single Claude Code transcript jsonl file and return its identity +
+ * lineage metadata. Streams the file line-by-line; never loads the whole
+ * thing into memory.
+ *
+ * @param {string} path absolute path to the jsonl file
+ * @param {{ maxSizeMb?: number }} [opts]
+ * @returns {Promise<TranscriptMeta>}
+ */
+export async function parseTranscriptFile(path, opts = {}) {
+  const maxSizeMb = Number.isFinite(opts.maxSizeMb) && opts.maxSizeMb > 0
+    ? opts.maxSizeMb
+    : DEFAULT_MAX_MB;
+  const st = statSync(path);
+  /** @type {TranscriptMeta} */
+  const meta = {
+    sessionId: null,
+    firstUuid: null,
+    lastUuid: null,
+    firstParentUuid: null,
+    recordCount: 0,
+    firstHumanPromptRaw: null,
+    cwd: null,
+    gitBranch: null,
+    size: st.size,
+    mtime: st.mtime,
+    status: 'ok',
+  };
+  if (st.size > maxSizeMb * 1024 * 1024) {
+    meta.status = 'too_large';
+    return meta;
+  }
+  const stream = createReadStream(path, { encoding: 'utf8' });
+  const rl = createInterface({ input: stream, crlfDelay: Infinity });
+  let sawAnyValidRecord = false;
+  let parseErrors = 0;
+  // Track first parent-bearing record separately so we can decide whether the
+  // file represents a fresh session (firstParentUuid === null) or a resume
+  // (parentUuid points into another file).
+  let firstUuidBearingRecord = null;
+  for await (const line of rl) {
+    if (line.length === 0) continue;
+    let rec;
+    try {
+      rec = JSON.parse(line);
+    } catch {
+      parseErrors += 1;
+      continue;
+    }
+    if (!rec || typeof rec !== 'object') {
+      parseErrors += 1;
+      continue;
+    }
+    sawAnyValidRecord = true;
+    meta.recordCount += 1;
+    // sessionId is consistent across the file; latch the first non-empty.
+    if (meta.sessionId === null && typeof rec.sessionId === 'string' && rec.sessionId.length > 0) {
+      meta.sessionId = rec.sessionId;
+    }
+    // cwd / gitBranch — first non-empty wins.
+    if (meta.cwd === null && typeof rec.cwd === 'string' && rec.cwd.length > 0) {
+      meta.cwd = rec.cwd;
+    }
+    if (meta.gitBranch === null && typeof rec.gitBranch === 'string' && rec.gitBranch.length > 0) {
+      meta.gitBranch = rec.gitBranch;
+    }
+    // Lineage tracking: only records with a `uuid` participate.
+    if (typeof rec.uuid === 'string' && rec.uuid.length > 0) {
+      if (firstUuidBearingRecord === null) {
+        firstUuidBearingRecord = rec;
+        meta.firstUuid = rec.uuid;
+        meta.firstParentUuid = typeof rec.parentUuid === 'string' ? rec.parentUuid : null;
+      }
+      meta.lastUuid = rec.uuid;
+    }
+    // First human prompt: type='user' AND any of the userType fallbacks
+    // (external = empirical, human = ticket spec, message.role = semantic).
+    if (
+      meta.firstHumanPromptRaw === null &&
+      rec.type === 'user' &&
+      rec.message &&
+      (
+        rec.userType === 'external' ||
+        rec.userType === 'human' ||
+        rec.message.role === 'user'
+      )
+    ) {
+      const text = extractText(rec.message.content);
+      if (text !== null) meta.firstHumanPromptRaw = text;
+    }
+  }
+  if (!sawAnyValidRecord) {
+    meta.status = parseErrors > 0 ? 'corrupted' : 'ok';
+  } else if (parseErrors > 0 && meta.recordCount === 0) {
+    // We skipped some lines but never recovered a usable record.
+    meta.status = 'corrupted';
+  }
+  return meta;
+}