@arcreflex/agent-transcripts 0.1.5 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,12 @@
4
4
  * Parses session files from ~/.claude/projects/{project}/sessions/{session}.jsonl
5
5
  */
6
6
 
7
+ import { Glob } from "bun";
8
+ import { basename, join, relative } from "path";
9
+ import { stat } from "fs/promises";
7
10
  import type {
8
11
  Adapter,
12
+ DiscoveredSession,
9
13
  Transcript,
10
14
  Message,
11
15
  Warning,
@@ -13,6 +17,21 @@ import type {
13
17
  } from "../types.ts";
14
18
  import { extractToolSummary } from "../utils/summary.ts";
15
19
 
20
+ /**
21
+ * Claude Code sessions-index.json structure.
22
+ */
23
+ interface SessionsIndex {
24
+ version: number;
25
+ entries: SessionIndexEntry[];
26
+ }
27
+
28
+ interface SessionIndexEntry {
29
+ sessionId: string;
30
+ fullPath: string;
31
+ fileMtime: number;
32
+ isSidechain: boolean;
33
+ }
34
+
16
35
  // Claude Code JSONL record types
17
36
  interface ClaudeRecord {
18
37
  type: string;
@@ -25,6 +44,7 @@ interface ClaudeRecord {
25
44
  };
26
45
  content?: string;
27
46
  subtype?: string;
47
+ cwd?: string;
28
48
  }
29
49
 
30
50
  interface ContentBlock {
@@ -35,7 +55,7 @@ interface ContentBlock {
35
55
  name?: string;
36
56
  input?: Record<string, unknown>;
37
57
  tool_use_id?: string;
38
- content?: string;
58
+ content?: unknown; // Can be string, array, or other structure
39
59
  }
40
60
 
41
61
  /**
@@ -67,11 +87,42 @@ function parseJsonl(content: string): {
67
87
  return { records, warnings };
68
88
  }
69
89
 
90
+ /**
91
+ * Find the nearest message ancestor by walking up the parent chain.
92
+ * Returns undefined if no message ancestor exists.
93
+ */
94
+ function findMessageAncestor(
95
+ parentUuid: string | null | undefined,
96
+ allByUuid: Map<string, ClaudeRecord>,
97
+ messageUuids: Set<string>,
98
+ ): string | undefined {
99
+ const visited = new Set<string>();
100
+ let current = parentUuid;
101
+ while (current) {
102
+ if (visited.has(current)) {
103
+ return undefined; // Cycle detected
104
+ }
105
+ visited.add(current);
106
+ if (messageUuids.has(current)) {
107
+ return current;
108
+ }
109
+ const rec = allByUuid.get(current);
110
+ current = rec?.parentUuid ?? null;
111
+ }
112
+ return undefined;
113
+ }
114
+
115
+ interface SplitResult {
116
+ conversations: ClaudeRecord[][];
117
+ /** Map from message UUID to its resolved parent (nearest message ancestor) */
118
+ resolvedParents: Map<string, string | undefined>;
119
+ }
120
+
70
121
  /**
71
122
  * Build message graph and find conversation boundaries.
72
- * Returns array of conversation groups (each is array of records in order).
123
+ * Returns conversations and a map of resolved parent references.
73
124
  */
74
- function splitConversations(records: ClaudeRecord[]): ClaudeRecord[][] {
125
+ function splitConversations(records: ClaudeRecord[]): SplitResult {
75
126
  // Filter to only message records (user, assistant, system with uuid)
76
127
  const messageRecords = records.filter(
77
128
  (r) =>
@@ -79,29 +130,52 @@ function splitConversations(records: ClaudeRecord[]): ClaudeRecord[][] {
79
130
  (r.type === "user" || r.type === "assistant" || r.type === "system"),
80
131
  );
81
132
 
82
- if (messageRecords.length === 0) return [];
83
-
84
- // Build parent → children map
85
- const byUuid = new Map<string, ClaudeRecord>();
86
- const children = new Map<string, string[]>();
133
+ if (messageRecords.length === 0) {
134
+ return { conversations: [], resolvedParents: new Map() };
135
+ }
87
136
 
88
- for (const rec of messageRecords) {
137
+ // Build UUID lookup for ALL records to track parent chains through non-messages
138
+ const allByUuid = new Map<string, ClaudeRecord>();
139
+ for (const rec of records) {
89
140
  if (rec.uuid) {
90
- byUuid.set(rec.uuid, rec);
91
- const parent = rec.parentUuid;
92
- if (parent) {
93
- const existing = children.get(parent) || [];
94
- existing.push(rec.uuid);
95
- children.set(parent, existing);
96
- }
141
+ allByUuid.set(rec.uuid, rec);
97
142
  }
98
143
  }
99
144
 
100
- // Find roots (no parent or parent not in our set)
145
+ // Set of message UUIDs for quick lookup
146
+ const messageUuids = new Set<string>();
147
+ for (const rec of messageRecords) {
148
+ if (rec.uuid) messageUuids.add(rec.uuid);
149
+ }
150
+
151
+ // Build parent → children map, resolving through non-message records
152
+ // Also track resolved parents for use in transformation
153
+ const byUuid = new Map<string, ClaudeRecord>();
154
+ const children = new Map<string, string[]>();
155
+ const resolvedParents = new Map<string, string | undefined>();
101
156
  const roots: string[] = [];
157
+
102
158
  for (const rec of messageRecords) {
103
- if (!rec.parentUuid || !byUuid.has(rec.parentUuid)) {
104
- if (rec.uuid) roots.push(rec.uuid);
159
+ if (!rec.uuid) continue;
160
+ byUuid.set(rec.uuid, rec);
161
+
162
+ // Find nearest message ancestor (walking through non-message records)
163
+ const ancestor = findMessageAncestor(
164
+ rec.parentUuid,
165
+ allByUuid,
166
+ messageUuids,
167
+ );
168
+
169
+ // Store resolved parent for this message
170
+ resolvedParents.set(rec.uuid, ancestor);
171
+
172
+ if (ancestor) {
173
+ const existing = children.get(ancestor) || [];
174
+ existing.push(rec.uuid);
175
+ children.set(ancestor, existing);
176
+ } else {
177
+ // No message ancestor - this is a root
178
+ roots.push(rec.uuid);
105
179
  }
106
180
  }
107
181
 
@@ -141,7 +215,7 @@ function splitConversations(records: ClaudeRecord[]): ClaudeRecord[][] {
141
215
  return ta - tb;
142
216
  });
143
217
 
144
- return conversations;
218
+ return { conversations, resolvedParents };
145
219
  }
146
220
 
147
221
  /**
@@ -170,16 +244,23 @@ function extractThinking(content: string | ContentBlock[]): string | undefined {
170
244
 
171
245
  /**
172
246
  * Extract tool calls from content blocks.
247
+ * Matches with results from the toolResults map.
173
248
  */
174
- function extractToolCalls(content: string | ContentBlock[]): ToolCall[] {
249
+ function extractToolCalls(
250
+ content: string | ContentBlock[],
251
+ toolResults: Map<string, string>,
252
+ ): ToolCall[] {
175
253
  if (typeof content === "string") return [];
176
254
 
177
255
  return content.flatMap((b) => {
178
- if (b.type === "tool_use" && b.name) {
256
+ if (b.type === "tool_use" && b.name && b.id) {
257
+ const result = toolResults.get(b.id);
179
258
  return [
180
259
  {
181
260
  name: b.name,
182
261
  summary: extractToolSummary(b.name, b.input || {}),
262
+ input: b.input,
263
+ result,
183
264
  },
184
265
  ];
185
266
  }
@@ -187,6 +268,39 @@ function extractToolCalls(content: string | ContentBlock[]): ToolCall[] {
187
268
  });
188
269
  }
189
270
 
271
+ /**
272
+ * Safely convert tool result content to string.
273
+ * Content can be a string, array, or other structure.
274
+ */
275
+ function stringifyToolResult(content: unknown): string {
276
+ if (typeof content === "string") return content;
277
+ if (content === null || content === undefined) return "";
278
+ // For arrays or objects, JSON stringify for display
279
+ try {
280
+ return JSON.stringify(content, null, 2);
281
+ } catch {
282
+ return String(content);
283
+ }
284
+ }
285
+
286
+ /**
287
+ * Extract tool results from content blocks.
288
+ * Returns a map of tool_use_id → result content.
289
+ */
290
+ function extractToolResults(
291
+ content: string | ContentBlock[],
292
+ ): Map<string, string> {
293
+ const results = new Map<string, string>();
294
+ if (typeof content === "string") return results;
295
+
296
+ for (const b of content) {
297
+ if (b.type === "tool_result" && b.tool_use_id && b.content !== undefined) {
298
+ results.set(b.tool_use_id, stringifyToolResult(b.content));
299
+ }
300
+ }
301
+ return results;
302
+ }
303
+
190
304
  /**
191
305
  * Check if a user message contains only tool results (no actual user text).
192
306
  */
@@ -211,8 +325,13 @@ function resolveParent(
211
325
  if (!parentUuid) return undefined;
212
326
 
213
327
  // Follow the chain through any skipped messages
328
+ const visited = new Set<string>();
214
329
  let current: string | undefined = parentUuid;
215
330
  while (current && skippedParents.has(current)) {
331
+ if (visited.has(current)) {
332
+ return undefined; // Cycle detected
333
+ }
334
+ visited.add(current);
216
335
  current = skippedParents.get(current);
217
336
  }
218
337
 
@@ -226,17 +345,36 @@ function transformConversation(
226
345
  records: ClaudeRecord[],
227
346
  sourcePath: string,
228
347
  warnings: Warning[],
348
+ resolvedParents: Map<string, string | undefined>,
229
349
  ): Transcript {
230
350
  const messages: Message[] = [];
231
- // Track skipped message UUIDs → their parent UUIDs for chain repair
351
+ // Track skipped message UUIDs → their resolved parent UUIDs for chain repair
232
352
  const skippedParents = new Map<string, string | undefined>();
233
353
 
354
+ // Collect all tool results from user messages (tool_use_id → result)
355
+ const allToolResults = new Map<string, string>();
356
+ for (const rec of records) {
357
+ if (rec.type === "user" && rec.message) {
358
+ const results = extractToolResults(rec.message.content);
359
+ for (const [id, content] of results) {
360
+ allToolResults.set(id, content);
361
+ }
362
+ }
363
+ }
364
+
365
+ let cwd: string | undefined;
366
+
234
367
  // First pass: identify which messages will be skipped
235
368
  for (const rec of records) {
236
369
  if (!rec.uuid) continue;
237
370
 
238
371
  let willSkip = false;
239
372
 
373
+ // Take the first cwd we find.
374
+ if (!cwd && rec.cwd) {
375
+ cwd = rec.cwd;
376
+ }
377
+
240
378
  if (rec.type === "user" && rec.message) {
241
379
  if (isToolResultOnly(rec.message.content)) {
242
380
  willSkip = true;
@@ -247,7 +385,7 @@ function transformConversation(
247
385
  } else if (rec.type === "assistant" && rec.message) {
248
386
  const text = extractText(rec.message.content);
249
387
  const thinking = extractThinking(rec.message.content);
250
- const toolCalls = extractToolCalls(rec.message.content);
388
+ const toolCalls = extractToolCalls(rec.message.content, allToolResults);
251
389
  // Only skip if no text, no thinking, AND no tool calls
252
390
  if (!text.trim() && !thinking && toolCalls.length === 0) {
253
391
  willSkip = true;
@@ -258,7 +396,8 @@ function transformConversation(
258
396
  }
259
397
 
260
398
  if (willSkip) {
261
- skippedParents.set(rec.uuid, rec.parentUuid || undefined);
399
+ // Use the resolved parent (already walked through non-message records)
400
+ skippedParents.set(rec.uuid, resolvedParents.get(rec.uuid));
262
401
  }
263
402
  }
264
403
 
@@ -266,7 +405,12 @@ function transformConversation(
266
405
  for (const rec of records) {
267
406
  const sourceRef = rec.uuid || "";
268
407
  const timestamp = rec.timestamp || new Date().toISOString();
269
- const parentMessageRef = resolveParent(rec.parentUuid, skippedParents);
408
+ // Start with the resolved parent (through non-message records),
409
+ // then walk through any skipped messages
410
+ const parentMessageRef = rec.uuid
411
+ ? resolveParent(resolvedParents.get(rec.uuid), skippedParents)
412
+ : undefined;
413
+ const rawJson = JSON.stringify(rec);
270
414
 
271
415
  if (rec.type === "user" && rec.message) {
272
416
  // Skip tool-result-only user messages (they're just tool responses)
@@ -279,13 +423,14 @@ function transformConversation(
279
423
  sourceRef,
280
424
  timestamp,
281
425
  parentMessageRef,
426
+ rawJson,
282
427
  content: text,
283
428
  });
284
429
  }
285
430
  } else if (rec.type === "assistant" && rec.message) {
286
431
  const text = extractText(rec.message.content);
287
432
  const thinking = extractThinking(rec.message.content);
288
- const toolCalls = extractToolCalls(rec.message.content);
433
+ const toolCalls = extractToolCalls(rec.message.content, allToolResults);
289
434
 
290
435
  // Add assistant message if there's text or thinking
291
436
  if (text.trim() || thinking) {
@@ -294,6 +439,7 @@ function transformConversation(
294
439
  sourceRef,
295
440
  timestamp,
296
441
  parentMessageRef,
442
+ rawJson,
297
443
  content: text,
298
444
  thinking,
299
445
  });
@@ -306,6 +452,7 @@ function transformConversation(
306
452
  sourceRef,
307
453
  timestamp,
308
454
  parentMessageRef,
455
+ rawJson,
309
456
  calls: toolCalls,
310
457
  });
311
458
  }
@@ -317,36 +464,144 @@ function transformConversation(
317
464
  sourceRef,
318
465
  timestamp,
319
466
  parentMessageRef,
467
+ rawJson,
320
468
  content: text,
321
469
  });
322
470
  }
323
471
  }
324
472
  }
325
473
 
474
+ // Compute time bounds from min/max across all messages (not array order,
475
+ // which is BFS traversal order and may not be chronological for branches)
476
+ let minTime = Infinity;
477
+ let maxTime = -Infinity;
478
+ for (const msg of messages) {
479
+ const t = new Date(msg.timestamp).getTime();
480
+ if (t < minTime) minTime = t;
481
+ if (t > maxTime) maxTime = t;
482
+ }
483
+ const now = new Date().toISOString();
484
+ const startTime = Number.isFinite(minTime)
485
+ ? new Date(minTime).toISOString()
486
+ : now;
487
+ const endTime = Number.isFinite(maxTime)
488
+ ? new Date(maxTime).toISOString()
489
+ : startTime;
490
+
326
491
  return {
327
492
  source: {
328
493
  file: sourcePath,
329
494
  adapter: "claude-code",
330
495
  },
331
- metadata: { warnings },
496
+ metadata: {
497
+ warnings,
498
+ messageCount: messages.length,
499
+ startTime,
500
+ endTime,
501
+ cwd,
502
+ },
332
503
  messages,
333
504
  };
334
505
  }
335
506
 
507
+ /**
508
+ * Discover sessions from sessions-index.json.
509
+ * Returns undefined if index doesn't exist or is invalid.
510
+ */
511
+ async function discoverFromIndex(
512
+ source: string,
513
+ ): Promise<DiscoveredSession[] | undefined> {
514
+ const indexPath = join(source, "sessions-index.json");
515
+
516
+ try {
517
+ const content = await Bun.file(indexPath).text();
518
+ const index: SessionsIndex = JSON.parse(content);
519
+
520
+ if (index.version !== 1 || !Array.isArray(index.entries)) {
521
+ return undefined;
522
+ }
523
+
524
+ const sessions: DiscoveredSession[] = [];
525
+
526
+ for (const entry of index.entries) {
527
+ // Skip sidechains (subagents)
528
+ if (entry.isSidechain) continue;
529
+
530
+ // Verify the file exists and get current mtime
531
+ try {
532
+ const fileStat = await stat(entry.fullPath);
533
+ sessions.push({
534
+ path: entry.fullPath,
535
+ relativePath:
536
+ relative(source, entry.fullPath) || basename(entry.fullPath),
537
+ mtime: fileStat.mtime.getTime(),
538
+ });
539
+ } catch {
540
+ // Skip files that no longer exist
541
+ }
542
+ }
543
+
544
+ return sessions;
545
+ } catch {
546
+ // Index doesn't exist or is invalid
547
+ return undefined;
548
+ }
549
+ }
550
+
551
+ /**
552
+ * Discover sessions via glob pattern fallback.
553
+ */
554
+ async function discoverByGlob(source: string): Promise<DiscoveredSession[]> {
555
+ const sessions: DiscoveredSession[] = [];
556
+ const glob = new Glob("**/*.jsonl");
557
+
558
+ for await (const file of glob.scan({ cwd: source, absolute: false })) {
559
+ // Skip files in subagents directories
560
+ if (file.includes("/subagents/")) continue;
561
+
562
+ const fullPath = join(source, file);
563
+
564
+ try {
565
+ const fileStat = await stat(fullPath);
566
+ sessions.push({
567
+ path: fullPath,
568
+ relativePath: file,
569
+ mtime: fileStat.mtime.getTime(),
570
+ });
571
+ } catch {
572
+ // Skip files we can't stat
573
+ }
574
+ }
575
+
576
+ return sessions;
577
+ }
578
+
336
579
  export const claudeCodeAdapter: Adapter = {
337
580
  name: "claude-code",
338
- filePatterns: ["*.jsonl"],
581
+
582
+ async discover(source: string): Promise<DiscoveredSession[]> {
583
+ // Try index-based discovery first, fall back to glob
584
+ const fromIndex = await discoverFromIndex(source);
585
+ return fromIndex ?? (await discoverByGlob(source));
586
+ },
339
587
 
340
588
  parse(content: string, sourcePath: string): Transcript[] {
341
589
  const { records, warnings } = parseJsonl(content);
342
- const conversations = splitConversations(records);
590
+ const { conversations, resolvedParents } = splitConversations(records);
343
591
 
344
592
  if (conversations.length === 0) {
345
593
  // Return single empty transcript with warnings
594
+ const now = new Date().toISOString();
346
595
  return [
347
596
  {
348
597
  source: { file: sourcePath, adapter: "claude-code" },
349
- metadata: { warnings },
598
+ metadata: {
599
+ warnings,
600
+ messageCount: 0,
601
+ startTime: now,
602
+ endTime: now,
603
+ cwd: undefined,
604
+ },
350
605
  messages: [],
351
606
  },
352
607
  ];
@@ -354,12 +609,24 @@ export const claudeCodeAdapter: Adapter = {
354
609
 
355
610
  // For single conversation, include all warnings
356
611
  if (conversations.length === 1) {
357
- return [transformConversation(conversations[0], sourcePath, warnings)];
612
+ return [
613
+ transformConversation(
614
+ conversations[0],
615
+ sourcePath,
616
+ warnings,
617
+ resolvedParents,
618
+ ),
619
+ ];
358
620
  }
359
621
 
360
622
  // For multiple conversations, only first gets warnings
361
623
  return conversations.map((conv, i) =>
362
- transformConversation(conv, sourcePath, i === 0 ? warnings : []),
624
+ transformConversation(
625
+ conv,
626
+ sourcePath,
627
+ i === 0 ? warnings : [],
628
+ resolvedParents,
629
+ ),
363
630
  );
364
631
  },
365
632
  };
package/src/cache.ts ADDED
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Cache module for agent-transcripts.
3
+ *
4
+ * Stores derived content (rendered outputs, titles) keyed by source path,
5
+ * invalidated by content hash. Cache lives at ~/.cache/agent-transcripts/.
6
+ */
7
+
8
+ import { join } from "path";
9
+ import { homedir } from "os";
10
+ import { mkdir, rename, unlink } from "fs/promises";
11
+
12
+ const CACHE_DIR = join(homedir(), ".cache", "agent-transcripts");
13
+
14
+ export interface SegmentCache {
15
+ title?: string;
16
+ html?: string;
17
+ md?: string;
18
+ }
19
+
20
+ export interface CacheEntry {
21
+ contentHash: string;
22
+ segments: SegmentCache[];
23
+ }
24
+
25
+ /**
26
+ * Compute a hash of file content for cache invalidation.
27
+ */
28
+ export function computeContentHash(content: string): string {
29
+ return Bun.hash(content).toString(16);
30
+ }
31
+
32
+ /**
33
+ * Get the cache file path for a source file.
34
+ * Uses hash of source path to avoid filesystem issues with special chars.
35
+ */
36
+ function getCachePath(sourcePath: string): string {
37
+ const pathHash = Bun.hash(sourcePath).toString(16);
38
+ return join(CACHE_DIR, `${pathHash}.json`);
39
+ }
40
+
41
+ /**
42
+ * Ensure cache directory exists.
43
+ */
44
+ async function ensureCacheDir(): Promise<void> {
45
+ await mkdir(CACHE_DIR, { recursive: true, mode: 0o755 });
46
+ }
47
+
48
+ /**
49
+ * Load cache entry for a source file.
50
+ * Returns undefined if no cache exists or cache is corrupt.
51
+ */
52
+ export async function loadCache(
53
+ sourcePath: string,
54
+ ): Promise<CacheEntry | undefined> {
55
+ const cachePath = getCachePath(sourcePath);
56
+ try {
57
+ const content = await Bun.file(cachePath).text();
58
+ return JSON.parse(content) as CacheEntry;
59
+ } catch {
60
+ return undefined;
61
+ }
62
+ }
63
+
64
+ /**
65
+ * Save cache entry for a source file.
66
+ * Uses atomic write (temp file + rename) to prevent corruption.
67
+ */
68
+ export async function saveCache(
69
+ sourcePath: string,
70
+ entry: CacheEntry,
71
+ ): Promise<void> {
72
+ await ensureCacheDir();
73
+
74
+ const cachePath = getCachePath(sourcePath);
75
+ const tmpPath = `${cachePath}.${process.pid}.${Date.now()}.tmp`;
76
+
77
+ const content = JSON.stringify(entry, null, 2) + "\n";
78
+ await Bun.write(tmpPath, content);
79
+
80
+ try {
81
+ await rename(tmpPath, cachePath);
82
+ } catch (err) {
83
+ try {
84
+ await unlink(tmpPath);
85
+ } catch {
86
+ // Ignore cleanup errors
87
+ }
88
+ throw err;
89
+ }
90
+ }
91
+
92
+ /**
93
+ * Check if cache is valid for the given content hash and format.
94
+ * Returns the cached segments if valid, undefined otherwise.
95
+ */
96
+ export function getCachedSegments(
97
+ cached: CacheEntry | undefined,
98
+ contentHash: string,
99
+ format: "html" | "md",
100
+ ): SegmentCache[] | undefined {
101
+ if (!cached || cached.contentHash !== contentHash) {
102
+ return undefined;
103
+ }
104
+ // Check that all segments have the requested format
105
+ if (cached.segments.length === 0) {
106
+ return undefined;
107
+ }
108
+ for (const seg of cached.segments) {
109
+ if (!seg[format]) {
110
+ return undefined;
111
+ }
112
+ }
113
+ return cached.segments;
114
+ }
115
+
116
+ /**
117
+ * Get cached title for a specific segment.
118
+ * Returns undefined if cache is invalid or title not present.
119
+ */
120
+ export function getCachedTitle(
121
+ cached: CacheEntry | undefined,
122
+ contentHash: string,
123
+ segmentIndex: number,
124
+ ): string | undefined {
125
+ if (!cached || cached.contentHash !== contentHash) {
126
+ return undefined;
127
+ }
128
+ return cached.segments[segmentIndex]?.title;
129
+ }