claude-code-station 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,402 @@
1
+ /**
2
+ * ccs-scan-sessions.ts — Session indexing for the ccs scan engine.
3
+ *
4
+ * Extracted from ccs-scan.ts (review A-4): walks ~/.claude/projects/,
5
+ * parses session JSONL files with mtime-based caching, resolves each
6
+ * session's cwd to a registered repo, and keeps the sessions table in sync
7
+ * (upsert / delete-missing / remap-unmapped).
8
+ *
9
+ * ccs-scan.ts owns orchestration (repos.yml sync, repo_stats scan, CLI) and
10
+ * calls scanSessions() BEFORE the repo pass so repo_stats aggregates see
11
+ * fresh session data (audit C-1).
12
+ *
13
+ * Source of truth: docs/design/sqlite-schema.md
14
+ */
15
+
16
+ import { readdir, readFile, stat } from "node:fs/promises";
17
+ import { existsSync, realpathSync } from "node:fs";
18
+ import { join, basename } from "node:path";
19
+ import { homedir } from "node:os";
20
+ import type Database from "better-sqlite3";
21
+
22
+ import { getAllRepos, type RepoRow } from "./ccs-db.ts";
23
+ import { maskSecrets } from "./ccs-secrets.ts";
24
+ import { sanitizeSessionCwd, stripControlChars } from "./ccs-sanitize.ts";
25
+ import { extractText, nowIso, MAX_JSONL_SIZE, UUID_RE } from "./ccs-utils.ts";
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Constants
29
+ // ---------------------------------------------------------------------------
30
+
31
+ const MAX_TOPIC_LEN = 200;
32
+ const MAX_SUMMARY_LEN = 1000;
33
+ const SUMMARY_RE =
34
+ /<!--\s*ECC:SUMMARY:START\s*-->([\s\S]*?)<!--\s*ECC:SUMMARY:END\s*-->/;
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // JSONL parsing
38
+ // ---------------------------------------------------------------------------
39
+
40
+ export interface SessionParseResult {
41
+ uuid: string;
42
+ project_dir: string;
43
+ cwd: string;
44
+ branch: string | null;
45
+ started_at: string;
46
+ last_activity_at: string;
47
+ message_count: number;
48
+ topic: string | null;
49
+ summary: string | null;
50
+ jsonl_size: number;
51
+ jsonl_mtime: string;
52
+ }
53
+
54
+ export async function parseSessionJsonl(
55
+ filePath: string,
56
+ projectDir: string,
57
+ size: number,
58
+ mtimeIso: string,
59
+ ): Promise<SessionParseResult | null> {
60
+ const fileBase = basename(filePath).replace(/\.jsonl$/, "");
61
+ if (!UUID_RE.test(fileBase)) return null;
62
+ if (size === 0 || size > MAX_JSONL_SIZE) return null;
63
+
64
+ let content: string;
65
+ try {
66
+ content = await readFile(filePath, "utf-8");
67
+ } catch {
68
+ return null;
69
+ }
70
+
71
+ const lines = content.split("\n").filter((l) => l.trim().length > 0);
72
+ if (lines.length === 0) return null;
73
+
74
+ let cwd = "";
75
+ let branch: string | null = null;
76
+ let firstTs = "";
77
+ let lastTs = "";
78
+ let firstUserMsg = "";
79
+ let msgCount = 0;
80
+
81
+ for (const line of lines) {
82
+ let entry: Record<string, unknown>;
83
+ try {
84
+ entry = JSON.parse(line) as Record<string, unknown>;
85
+ } catch {
86
+ continue;
87
+ }
88
+ // message_count means CONVERSATION messages (audit logic H-4) — JSONL
89
+ // also carries summary/meta/tool rows which must not inflate the count.
90
+ // ccs-preview-session.ts counts the same way.
91
+ if (entry.type === "user" || entry.type === "assistant") msgCount++;
92
+ if (!cwd && typeof entry.cwd === "string") cwd = entry.cwd;
93
+ if (!branch && typeof entry.gitBranch === "string") {
94
+ branch = entry.gitBranch || null;
95
+ }
96
+ const ts = typeof entry.timestamp === "string" ? entry.timestamp : "";
97
+ if (ts) {
98
+ // min/max by value, not by line order (audit logic M-6): sidechain rows
99
+ // can be appended out of chronological order. ISO 8601 (always Z here)
100
+ // sorts lexicographically, so string comparison is safe.
101
+ if (!firstTs || ts < firstTs) firstTs = ts;
102
+ if (!lastTs || ts > lastTs) lastTs = ts;
103
+ }
104
+ if (
105
+ !firstUserMsg &&
106
+ entry.type === "user" &&
107
+ entry.message &&
108
+ typeof entry.message === "object"
109
+ ) {
110
+ const raw = extractText(
111
+ (entry.message as { content?: unknown }).content,
112
+ );
113
+ if (raw && !raw.includes("[Request interrupted by user")) {
114
+ firstUserMsg = raw
115
+ .replace(/<[a-z_-]+>[\s\S]*?<\/[a-z_-]+>/gi, "")
116
+ .replace(/<[^>]+>/g, "")
117
+ .replace(/\s+/g, " ")
118
+ .trim();
119
+ }
120
+ }
121
+ }
122
+
123
+ // summary: search whole content
124
+ // Pipeline order matters: strip control chars first so secret patterns see
125
+ // clean text, mask BEFORE truncation so a token straddling the
126
+ // MAX_SUMMARY_LEN/MAX_TOPIC_LEN boundary is still redacted intact.
127
+ let summary: string | null = null;
128
+ const summaryMatch = SUMMARY_RE.exec(content);
129
+ if (summaryMatch) {
130
+ summary = maskSecrets(stripControlChars(summaryMatch[1].trim())).slice(
131
+ 0,
132
+ MAX_SUMMARY_LEN,
133
+ );
134
+ }
135
+
136
+ const topic = firstUserMsg
137
+ ? maskSecrets(stripControlChars(firstUserMsg)).slice(0, MAX_TOPIC_LEN)
138
+ : null;
139
+
140
+ // Trust boundary (audit H-1/M-2/NEW-1): the JSONL cwd field is attacker
141
+ // controllable and later reaches the fzf row, the Ctrl-Y clipboard command
142
+ // and a shell `cd`. Gate on the RAW value first — reject any shell
143
+ // metacharacter / control char (a rejected cwd degrades to the "unknown"
144
+ // sentinel, which bin/ccs treats as non-launchable) — then mask any secret
145
+ // embedded in an otherwise-clean path before it is stored/displayed (M-2).
146
+ const cleanCwd = cwd ? sanitizeSessionCwd(cwd) : null;
147
+ const safeCwd = cleanCwd ? maskSecrets(cleanCwd) : null;
148
+
149
+ return {
150
+ uuid: fileBase,
151
+ project_dir: projectDir,
152
+ cwd: safeCwd ?? "unknown",
153
+ branch: branch ? stripControlChars(branch) || null : null,
154
+ started_at: firstTs || mtimeIso,
155
+ last_activity_at: lastTs || mtimeIso,
156
+ message_count: msgCount,
157
+ topic,
158
+ summary,
159
+ jsonl_size: size,
160
+ jsonl_mtime: mtimeIso,
161
+ };
162
+ }
163
+
164
+ // ---------------------------------------------------------------------------
165
+ // cwd → repo resolution
166
+ // ---------------------------------------------------------------------------
167
+
168
+ /**
169
+ * Resolve a session cwd to a registered repo by LONGEST prefix match
170
+ * (audit logic M-4): a session started in ~/Workspace/foo/packages/bar must
171
+ * map to the repo registered at ~/Workspace/foo, and when repos nest, the
172
+ * deepest path wins. Exact matches naturally win via longest-first ordering.
173
+ *
174
+ * Each root is also registered under its realpath (review DA-3): sessions
175
+ * record the cwd the process actually ran in, so a repo registered via a
176
+ * symlinked path (or under macOS /var -> /private/var aliasing) would never
177
+ * match its sessions on the lexical path alone.
178
+ */
179
+ export function buildRepoResolver(
180
+ repos: RepoRow[],
181
+ ): (cwd: string) => string | null {
182
+ const roots: Array<{ path: string; name: string }> = [];
183
+ const seen = new Set<string>();
184
+ const addRoot = (path: string, name: string) => {
185
+ const key = `${path} ${name}`;
186
+ if (seen.has(key)) return;
187
+ seen.add(key);
188
+ roots.push({ path, name });
189
+ };
190
+ for (const r of repos) {
191
+ for (const p of [r.path, r.cwd]) {
192
+ if (!p) continue;
193
+ addRoot(p, r.name);
194
+ try {
195
+ const real = realpathSync(p);
196
+ if (real !== p) addRoot(real, r.name);
197
+ } catch {
198
+ // path absent — the lexical root above is all we can match on
199
+ }
200
+ }
201
+ }
202
+ roots.sort((a, b) => b.path.length - a.path.length);
203
+ return (cwd: string) => {
204
+ for (const root of roots) {
205
+ if (cwd === root.path || cwd.startsWith(root.path + "/")) {
206
+ return root.name;
207
+ }
208
+ }
209
+ return null;
210
+ };
211
+ }
212
+
213
+ // ---------------------------------------------------------------------------
214
+ // Sessions scan
215
+ // ---------------------------------------------------------------------------
216
+
217
+ export async function scanSessions(
218
+ db: Database.Database,
219
+ ): Promise<{ indexed: number; skipped: number }> {
220
+ const projectsDir = join(homedir(), ".claude", "projects");
221
+ if (!existsSync(projectsDir)) return { indexed: 0, skipped: 0 };
222
+
223
+ const repos = getAllRepos(db);
224
+ const resolveRepoName = buildRepoResolver(repos);
225
+
226
+ // Existing session mtimes
227
+ const existing = new Map<string, string>();
228
+ for (const row of db
229
+ .prepare(`SELECT uuid, jsonl_mtime FROM sessions`)
230
+ .all() as Array<{ uuid: string; jsonl_mtime: string }>) {
231
+ existing.set(row.uuid, row.jsonl_mtime);
232
+ }
233
+
234
+ const validUuids = new Set<string>();
235
+ let indexed = 0;
236
+ let skipped = 0;
237
+
238
+ let projectDirs: string[];
239
+ try {
240
+ projectDirs = await readdir(projectsDir);
241
+ } catch {
242
+ return { indexed: 0, skipped: 0 };
243
+ }
244
+
245
+ // indexed_at is written as an explicit ISO 8601 value rather than
246
+ // SQLite's naive `datetime('now')` so all timestamps in state.db share one
247
+ // format (audit logic M-1).
248
+ const upsertStmt = db.prepare(
249
+ `INSERT INTO sessions (
250
+ uuid, repo_name, project_dir, cwd, branch,
251
+ started_at, last_activity_at, message_count,
252
+ topic, summary, jsonl_size, jsonl_mtime, indexed_at
253
+ ) VALUES (
254
+ @uuid, @repo_name, @project_dir, @cwd, @branch,
255
+ @started_at, @last_activity_at, @message_count,
256
+ @topic, @summary, @jsonl_size, @jsonl_mtime, @indexed_at
257
+ )
258
+ ON CONFLICT(uuid) DO UPDATE SET
259
+ repo_name = excluded.repo_name,
260
+ project_dir = excluded.project_dir,
261
+ cwd = excluded.cwd,
262
+ branch = excluded.branch,
263
+ started_at = excluded.started_at,
264
+ last_activity_at = excluded.last_activity_at,
265
+ message_count = excluded.message_count,
266
+ topic = excluded.topic,
267
+ summary = excluded.summary,
268
+ jsonl_size = excluded.jsonl_size,
269
+ jsonl_mtime = excluded.jsonl_mtime,
270
+ indexed_at = excluded.indexed_at`,
271
+ );
272
+
273
+ // Oversized JSONLs are still resumable sessions: keep the last good row
274
+ // but stamp the new size/mtime so the mtime cache skips the file on the
275
+ // next scan instead of re-attempting (and re-skipping) it forever
276
+ // (review C-4).
277
+ const touchStmt = db.prepare(
278
+ `UPDATE sessions
279
+ SET jsonl_size = @jsonl_size,
280
+ jsonl_mtime = @jsonl_mtime,
281
+ indexed_at = @indexed_at
282
+ WHERE uuid = @uuid`,
283
+ );
284
+
285
+ for (const projName of projectDirs) {
286
+ const projPath = join(projectsDir, projName);
287
+ let projStat;
288
+ try {
289
+ projStat = await stat(projPath);
290
+ } catch {
291
+ continue;
292
+ }
293
+ if (!projStat.isDirectory()) continue;
294
+
295
+ let files: string[];
296
+ try {
297
+ files = await readdir(projPath);
298
+ } catch {
299
+ continue;
300
+ }
301
+
302
+ for (const f of files) {
303
+ if (!f.endsWith(".jsonl")) continue;
304
+ const uuid = f.replace(/\.jsonl$/, "");
305
+ if (!UUID_RE.test(uuid)) continue;
306
+ const full = join(projPath, f);
307
+
308
+ let st;
309
+ try {
310
+ st = await stat(full);
311
+ } catch {
312
+ continue;
313
+ }
314
+ validUuids.add(uuid);
315
+ const mtimeIso = new Date(st.mtimeMs).toISOString();
316
+
317
+ if (existing.get(uuid) === mtimeIso) {
318
+ skipped++;
319
+ continue;
320
+ }
321
+
322
+ if (st.size > MAX_JSONL_SIZE) {
323
+ if (existing.has(uuid)) {
324
+ touchStmt.run({
325
+ jsonl_size: st.size,
326
+ jsonl_mtime: mtimeIso,
327
+ indexed_at: nowIso(),
328
+ uuid,
329
+ });
330
+ }
331
+ // Never indexed before it crossed the limit: nothing usable can be
332
+ // extracted without reading 50MB+, so it stays unlisted by design.
333
+ skipped++;
334
+ continue;
335
+ }
336
+
337
+ try {
338
+ const parsed = await parseSessionJsonl(
339
+ full,
340
+ projName,
341
+ st.size,
342
+ mtimeIso,
343
+ );
344
+ if (!parsed) {
345
+ skipped++;
346
+ continue;
347
+ }
348
+ const repoName = resolveRepoName(parsed.cwd);
349
+ upsertStmt.run({
350
+ uuid: parsed.uuid,
351
+ repo_name: repoName,
352
+ project_dir: parsed.project_dir,
353
+ cwd: parsed.cwd,
354
+ branch: parsed.branch,
355
+ started_at: parsed.started_at,
356
+ last_activity_at: parsed.last_activity_at,
357
+ message_count: parsed.message_count,
358
+ topic: parsed.topic,
359
+ summary: parsed.summary,
360
+ jsonl_size: parsed.jsonl_size,
361
+ jsonl_mtime: parsed.jsonl_mtime,
362
+ indexed_at: nowIso(),
363
+ });
364
+ indexed++;
365
+ } catch (err) {
366
+ process.stderr.write(
367
+ `[ccs-scan] session ${uuid}: ${err instanceof Error ? err.message : String(err)}\n`,
368
+ );
369
+ }
370
+ }
371
+ }
372
+
373
+ // Delete sessions whose JSONL no longer exists (chunked).
374
+ const allExisting = Array.from(existing.keys());
375
+ const toDelete = allExisting.filter((u) => !validUuids.has(u));
376
+ const CHUNK = 500;
377
+ for (let i = 0; i < toDelete.length; i += CHUNK) {
378
+ const chunk = toDelete.slice(i, i + CHUNK);
379
+ const ph = chunk.map(() => "?").join(",");
380
+ db.prepare(`DELETE FROM sessions WHERE uuid IN (${ph})`).run(...chunk);
381
+ }
382
+
383
+ // Re-resolve unmapped sessions (audit logic M-3): mtime-skipped rows keep
384
+ // the repo_name decided when they were first indexed, so a repo added to
385
+ // repos.yml afterwards would never claim its past sessions. Cheap pass —
386
+ // only rows still NULL are reconsidered.
387
+ const remapTx = db.transaction(() => {
388
+ const nullRows = db
389
+ .prepare(`SELECT uuid, cwd FROM sessions WHERE repo_name IS NULL`)
390
+ .all() as Array<{ uuid: string; cwd: string }>;
391
+ const updStmt = db.prepare(
392
+ `UPDATE sessions SET repo_name = ? WHERE uuid = ?`,
393
+ );
394
+ for (const row of nullRows) {
395
+ const name = resolveRepoName(row.cwd);
396
+ if (name) updStmt.run(name, row.uuid);
397
+ }
398
+ });
399
+ remapTx();
400
+
401
+ return { indexed, skipped };
402
+ }