@arcreflex/agent-transcripts 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLAUDE.md CHANGED
@@ -4,7 +4,9 @@
4
4
 
5
5
  ## Architectural Notes
6
6
 
7
- - **Source paths are stable**: Session source paths (e.g., `~/.claude/projects/.../sessions/`) are standardized by the tools that create them. Don't over-engineer for path changes—use source paths as cache keys directly.
7
+ - **Source paths are stable**: Session source paths (e.g., `~/.claude/projects/.../sessions/`) are standardized by the tools that create them. Archive entries store the absolute source path for traceability.
8
+ - **Archive is the central store**: All derived data (titles, etc.) lives on archive entries. Rendered HTML is in-memory only (LRU in serve). No persistent cache layer.
9
+ - **Serve is snapshot-based**: `serve` loads the archive once at startup. It does not live-reload when `watch` archives new sessions. This is a known simplification — revisit if live-updating becomes important.
8
10
 
9
11
  ## Verification
10
12
 
package/README.md CHANGED
@@ -17,30 +17,35 @@ src/
17
17
  render.ts # Intermediate format → markdown
18
18
  render-html.ts # HTML transcript rendering
19
19
  render-index.ts # Index page rendering
20
- convert.ts # Full pipeline with provenance tracking
21
- sync.ts # Batch sync sessions → markdown
22
- serve.ts # HTTP server for dynamic transcript serving
23
- cache.ts # Content-hash-based caching (~/.cache/agent-transcripts/)
24
- title.ts # LLM title generation
20
+ convert.ts # Direct pipeline (parse render to stdout or directory)
21
+ archive.ts # Persistent archive store (~/.local/share/agent-transcripts/archive/)
22
+ watch.ts # Continuous archive updates via fs.watch + polling
23
+ serve.ts # HTTP server serving from archive with in-memory LRU
24
+ title.ts # LLM title generation (writes to archive entries)
25
25
  types.ts # Core types (Transcript, Message, Adapter)
26
26
  adapters/ # Source format adapters (currently: claude-code)
27
27
  utils/
28
28
  naming.ts # Deterministic output file naming
29
- provenance.ts # Source tracking via transcripts.json + YAML front matter
30
29
  summary.ts # Tool call summary extraction
31
30
  openrouter.ts # OpenRouter API client for title generation
32
31
  html.ts # HTML escaping utility
33
- tree.ts # Tree navigation utilities
32
+ tree.ts # Tree navigation and walkTranscriptTree generator
33
+ text.ts # Shared text utilities (truncate)
34
+ theme.ts # Shared CSS theme constants
34
35
  test/
35
36
  fixtures/ # Snapshot test inputs/outputs
36
37
  snapshots.test.ts
38
+ archive.test.ts
39
+ tree.test.ts
40
+ naming.test.ts
41
+ summary.test.ts
37
42
  ```
38
43
 
39
44
  ## Commands
40
45
 
41
46
  ```bash
42
47
  bun run check # typecheck + prettier
43
- bun run test # snapshot tests
48
+ bun run test # snapshot tests + archive tests
44
49
  bun run format # auto-format
45
50
  ```
46
51
 
@@ -50,9 +55,23 @@ bun run format # auto-format
50
55
  # Subcommands (convert is default if omitted)
51
56
  agent-transcripts convert <file> # Parse and render to stdout
52
57
  agent-transcripts convert <file> -o <dir> # Parse and render to directory
53
- agent-transcripts sync <dir> -o <out> # Batch sync sessions
54
- agent-transcripts serve <dir> # Serve transcripts via HTTP
55
- agent-transcripts serve <dir> -p 8080 # Serve on custom port
58
+
59
+ # Archive management
60
+ agent-transcripts archive <source> # Archive sessions from source dir
61
+ agent-transcripts archive <source> --archive-dir ~/my-archive
62
+
63
+ # Serving
64
+ agent-transcripts serve # Serve from default archive
65
+ agent-transcripts serve --archive-dir <dir> # Serve from custom archive
66
+ agent-transcripts serve -p 8080 # Custom port
67
+
68
+ # Watching
69
+ agent-transcripts watch <source> # Keep archive updated continuously
70
+ agent-transcripts watch <source> --poll-interval 60000
71
+
72
+ # Title generation
73
+ agent-transcripts title # Generate titles for archive entries
74
+ agent-transcripts title -f # Force regenerate all titles
56
75
 
57
76
  # Use "-" for stdin
58
77
  cat session.jsonl | agent-transcripts -
@@ -60,74 +79,60 @@ cat session.jsonl | agent-transcripts -
60
79
 
61
80
  ## Architecture
62
81
 
63
- Two-stage pipeline: Parse (source → intermediate) → Render (intermediate → markdown).
82
+ ```
83
+ Source (Claude Code sessions)
84
+ ↓ [archive / watch]
85
+ Archive (~/.local/share/agent-transcripts/archive/{sessionId}.json)
86
+ ↓ [serve]
87
+ HTML (rendered on demand, in-memory LRU)
88
+ ```
89
+
90
+ `convert` is a standalone direct pipeline (no archive dependency).
91
+
92
+ `serve` loads the archive once at startup — it won't pick up new sessions archived by a concurrent `watch` without a restart. Live-reloading could be added later (periodic re-listing or file-watch trigger) if needed.
64
93
 
65
94
  - Adapters handle source formats (see `src/adapters/index.ts` for registry)
66
95
  - Auto-detection: paths containing `.claude/` → claude-code adapter
67
96
  - Branching conversations preserved via `parentMessageRef` on messages
68
- - Provenance tracking via `transcripts.json` index + YAML front matter
69
97
  - Deterministic naming: `{datetime}-{sessionId}.md`
70
- - Sync uses sessions-index.json for discovery (claude-code), skipping subagent files
71
- - Sync uses content hash to skip unchanged sources (see Cache section)
72
98
 
73
- ### Cache
99
+ ### Archive
74
100
 
75
- Derived content (rendered outputs, LLM-generated titles) is cached at `~/.cache/agent-transcripts/`:
101
+ The archive is the central data store at `~/.local/share/agent-transcripts/archive/`:
76
102
 
77
103
  ```
78
- ~/.cache/agent-transcripts/
79
- {source-path-hash}.json → CacheEntry
104
+ ~/.local/share/agent-transcripts/archive/
105
+ {sessionId}.json → ArchiveEntry
80
106
  ```
81
107
 
82
108
  ```typescript
83
- interface CacheEntry {
84
- contentHash: string; // hash of source content (invalidation key)
85
- segments: Array<{
86
- title?: string; // LLM-generated title
87
- html?: string; // rendered HTML
88
- md?: string; // rendered markdown
89
- }>;
109
+ interface ArchiveEntry {
110
+ sessionId: string;
111
+ sourcePath: string; // absolute source path
112
+ sourceHash: string; // content hash (invalidation key)
113
+ adapterName: string;
114
+ adapterVersion: string; // e.g. "claude-code:1"
115
+ schemaVersion: number;
116
+ archivedAt: string; // ISO timestamp
117
+ title?: string; // harness-provided or LLM-generated
118
+ transcripts: Transcript[];
90
119
  }
91
120
  ```
92
121
 
93
- Cache is keyed by source path (hashed), invalidated by content hash. When source content changes, all cached data is invalidated and regenerated.
94
-
95
- ### transcripts.json
96
-
97
- The index file is a table of contents for the output directory:
98
-
99
- ```typescript
100
- interface TranscriptsIndex {
101
- version: 1;
102
- entries: {
103
- [outputFilename: string]: {
104
- source: string; // absolute path to source
105
- sessionId: string; // full session ID from filename
106
- segmentIndex?: number; // for multi-transcript sources (1-indexed)
107
- syncedAt: string; // ISO timestamp
108
- firstUserMessage: string; // first user message content
109
- title?: string; // copied from cache for convenience
110
- messageCount: number;
111
- startTime: string; // ISO timestamp
112
- endTime: string; // ISO timestamp
113
- cwd?: string; // working directory
114
- };
115
- };
116
- }
117
- ```
122
+ Freshness is determined by `sourceHash + adapterVersion + schemaVersion`. When any changes, the entry is re-archived.
118
123
 
119
124
  ## Key Types
120
125
 
121
126
  - `Transcript`: source info, warnings, messages array
122
127
  - `Message`: union of UserMessage | AssistantMessage | SystemMessage | ToolCallGroup | ErrorMessage
123
- - `Adapter`: name, discover function, parse function
128
+ - `Adapter`: name, version, discover function, parse function
124
129
 
125
130
  ### Titles
126
131
 
127
132
  Transcripts get titles from (in priority order):
128
133
 
129
134
  1. Harness-provided summary (e.g., Claude Code's sessions-index.json `summary` field)
130
- 2. Cached title from previous sync
135
+ 2. Existing title from previous archive entry
131
136
  3. LLM-generated title via OpenRouter (requires `OPENROUTER_API_KEY`)
132
137
 
133
138
  ## Adding an Adapter
@@ -144,4 +149,6 @@ Transcripts get titles from (in priority order):
144
149
 
145
150
  Snapshot-based: `*.input.jsonl` → parse → render → compare against `*.output.md`
146
151
 
152
+ Archive tests: real fixture files + temp dirs to verify archiving, freshness, listing.
153
+
147
154
  To update snapshots: manually edit the expected `.output.md` files.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arcreflex/agent-transcripts",
3
- "version": "0.1.10",
3
+ "version": "0.1.11",
4
4
  "description": "Transform AI coding agent session files into readable transcripts",
5
5
  "type": "module",
6
6
  "repository": {
@@ -815,6 +815,7 @@ async function discoverByGlob(source: string): Promise<DiscoveredSession[]> {
815
815
 
816
816
  export const claudeCodeAdapter: Adapter = {
817
817
  name: "claude-code",
818
+ version: "claude-code:1",
818
819
 
819
820
  async discover(source: string): Promise<DiscoveredSession[]> {
820
821
  // Try index-based discovery first, fall back to glob
@@ -30,16 +30,10 @@ export function detectAdapter(filePath: string): string | undefined {
30
30
  return undefined;
31
31
  }
32
32
 
33
- /**
34
- * Get adapter by name.
35
- */
36
33
  export function getAdapter(name: string): Adapter | undefined {
37
34
  return adapters[name];
38
35
  }
39
36
 
40
- /**
41
- * List available adapter names.
42
- */
43
37
  export function listAdapters(): string[] {
44
38
  return Object.keys(adapters);
45
39
  }
package/src/archive.ts ADDED
@@ -0,0 +1,267 @@
1
+ /**
2
+ * Archive module: persistent storage for parsed transcripts.
3
+ *
4
+ * Archive entries live at {archiveDir}/{sessionId}.json and contain
5
+ * the full parsed transcripts plus metadata for freshness checks.
6
+ */
7
+
8
+ import { join } from "path";
9
+ import { homedir } from "os";
10
+ import { mkdir, readdir, rename, unlink } from "fs/promises";
11
+ import type { Adapter, DiscoveredSession, Transcript } from "./types.ts";
12
+ import { extractSessionId } from "./utils/naming.ts";
13
+
14
+ export const DEFAULT_ARCHIVE_DIR = join(
15
+ homedir(),
16
+ ".local/share/agent-transcripts/archive",
17
+ );
18
+
19
+ const ARCHIVE_SCHEMA_VERSION = 1;
20
+
21
+ export interface ArchiveEntry {
22
+ sessionId: string;
23
+ sourcePath: string;
24
+ sourceHash: string;
25
+ adapterName: string;
26
+ adapterVersion: string;
27
+ schemaVersion: number;
28
+ archivedAt: string;
29
+ title?: string;
30
+ transcripts: Transcript[];
31
+ }
32
+
33
+ /** Lightweight per-transcript summary for indexing (no message bodies). */
34
+ export interface TranscriptSummary {
35
+ firstMessageTimestamp: string;
36
+ firstUserMessage: string;
37
+ metadata: Transcript["metadata"];
38
+ }
39
+
40
+ /** Entry header — full metadata but no message bodies. */
41
+ export interface ArchiveEntryHeader {
42
+ sessionId: string;
43
+ sourcePath: string;
44
+ sourceHash: string;
45
+ title?: string;
46
+ segments: TranscriptSummary[];
47
+ }
48
+
49
+ export interface ArchiveResult {
50
+ updated: string[];
51
+ current: string[];
52
+ errors: Array<{ sessionId: string; error: string }>;
53
+ }
54
+
55
+ export function computeContentHash(content: string): string {
56
+ return Bun.hash(content).toString(16);
57
+ }
58
+
59
+ /** Type guard: validates that a parsed JSON value has the shape of an ArchiveEntry. */
60
+ function isArchiveEntry(value: unknown): value is ArchiveEntry {
61
+ if (typeof value !== "object" || value === null) return false;
62
+ const v = value as Record<string, unknown>;
63
+ return (
64
+ typeof v.sessionId === "string" &&
65
+ typeof v.sourcePath === "string" &&
66
+ typeof v.sourceHash === "string" &&
67
+ typeof v.adapterName === "string" &&
68
+ typeof v.adapterVersion === "string" &&
69
+ typeof v.schemaVersion === "number" &&
70
+ typeof v.archivedAt === "string" &&
71
+ Array.isArray(v.transcripts)
72
+ );
73
+ }
74
+
75
+ export async function loadEntry(
76
+ archiveDir: string,
77
+ sessionId: string,
78
+ ): Promise<ArchiveEntry | undefined> {
79
+ let content: string;
80
+ try {
81
+ content = await Bun.file(join(archiveDir, `${sessionId}.json`)).text();
82
+ } catch (err: unknown) {
83
+ if (
84
+ err &&
85
+ typeof err === "object" &&
86
+ "code" in err &&
87
+ err.code === "ENOENT"
88
+ ) {
89
+ return undefined;
90
+ }
91
+ throw err;
92
+ }
93
+ const parsed: unknown = JSON.parse(content);
94
+ if (!isArchiveEntry(parsed)) {
95
+ console.error(`Warning: invalid archive entry for ${sessionId}, skipping`);
96
+ return undefined;
97
+ }
98
+ return parsed;
99
+ }
100
+
101
+ export async function saveEntry(
102
+ archiveDir: string,
103
+ entry: ArchiveEntry,
104
+ ): Promise<void> {
105
+ await mkdir(archiveDir, { recursive: true });
106
+
107
+ const filePath = join(archiveDir, `${entry.sessionId}.json`);
108
+ const tmpPath = `${filePath}.${process.pid}.${Date.now()}.tmp`;
109
+ const content = JSON.stringify(entry, null, 2) + "\n";
110
+
111
+ await Bun.write(tmpPath, content);
112
+ try {
113
+ await rename(tmpPath, filePath);
114
+ } catch (err) {
115
+ try {
116
+ await unlink(tmpPath);
117
+ } catch {}
118
+ throw err;
119
+ }
120
+ }
121
+
122
+ export function isFresh(
123
+ entry: ArchiveEntry,
124
+ sourceHash: string,
125
+ adapter: Adapter,
126
+ ): boolean {
127
+ return (
128
+ entry.sourceHash === sourceHash &&
129
+ entry.adapterVersion === adapter.version &&
130
+ entry.schemaVersion === ARCHIVE_SCHEMA_VERSION
131
+ );
132
+ }
133
+
134
+ export async function archiveSession(
135
+ archiveDir: string,
136
+ session: DiscoveredSession,
137
+ adapter: Adapter,
138
+ ): Promise<{ entry: ArchiveEntry; updated: boolean }> {
139
+ const sessionId = extractSessionId(session.path);
140
+ const content = await Bun.file(session.path).text();
141
+ const sourceHash = computeContentHash(content);
142
+
143
+ const existing = await loadEntry(archiveDir, sessionId);
144
+ if (existing && isFresh(existing, sourceHash, adapter)) {
145
+ // Still update title if harness summary changed
146
+ if (session.summary && existing.title !== session.summary) {
147
+ existing.title = session.summary;
148
+ await saveEntry(archiveDir, existing);
149
+ return { entry: existing, updated: true };
150
+ }
151
+ return { entry: existing, updated: false };
152
+ }
153
+
154
+ const transcripts = adapter.parse(content, session.path);
155
+
156
+ const entry: ArchiveEntry = {
157
+ sessionId,
158
+ sourcePath: session.path,
159
+ sourceHash,
160
+ adapterName: adapter.name,
161
+ adapterVersion: adapter.version,
162
+ schemaVersion: ARCHIVE_SCHEMA_VERSION,
163
+ archivedAt: new Date().toISOString(),
164
+ title: session.summary ?? existing?.title,
165
+ transcripts,
166
+ };
167
+
168
+ await saveEntry(archiveDir, entry);
169
+ return { entry, updated: true };
170
+ }
171
+
172
+ export async function archiveAll(
173
+ archiveDir: string,
174
+ sourceDir: string,
175
+ adapters: Adapter[],
176
+ options: { quiet?: boolean } = {},
177
+ ): Promise<ArchiveResult> {
178
+ const result: ArchiveResult = { updated: [], current: [], errors: [] };
179
+
180
+ for (const adapter of adapters) {
181
+ const sessions = await adapter.discover(sourceDir);
182
+
183
+ for (const session of sessions) {
184
+ const sessionId = extractSessionId(session.path);
185
+ try {
186
+ const { updated } = await archiveSession(archiveDir, session, adapter);
187
+ if (updated) {
188
+ result.updated.push(sessionId);
189
+ if (!options.quiet) {
190
+ console.error(`Archived: ${sessionId}`);
191
+ }
192
+ } else {
193
+ result.current.push(sessionId);
194
+ }
195
+ } catch (err) {
196
+ const message = err instanceof Error ? err.message : String(err);
197
+ result.errors.push({ sessionId, error: message });
198
+ if (!options.quiet) {
199
+ console.error(`Error archiving ${sessionId}: ${message}`);
200
+ }
201
+ }
202
+ }
203
+ }
204
+
205
+ return result;
206
+ }
207
+
208
+ async function readArchiveFiles<T>(
209
+ archiveDir: string,
210
+ transform: (entry: ArchiveEntry) => T,
211
+ ): Promise<T[]> {
212
+ let files: string[];
213
+ try {
214
+ files = await readdir(archiveDir);
215
+ } catch {
216
+ return [];
217
+ }
218
+
219
+ const results: T[] = [];
220
+ for (const file of files) {
221
+ if (!file.endsWith(".json")) continue;
222
+ try {
223
+ const content = await Bun.file(join(archiveDir, file)).text();
224
+ const parsed: unknown = JSON.parse(content);
225
+ if (!isArchiveEntry(parsed)) {
226
+ console.error(`Warning: invalid archive file ${file}, skipping`);
227
+ continue;
228
+ }
229
+ results.push(transform(parsed));
230
+ } catch {
231
+ // Skip corrupt/unreadable entries
232
+ }
233
+ }
234
+ return results;
235
+ }
236
+
237
+ export async function listEntries(archiveDir: string): Promise<ArchiveEntry[]> {
238
+ return readArchiveFiles(archiveDir, (entry) => entry);
239
+ }
240
+
241
+ function summarizeTranscript(t: Transcript): TranscriptSummary {
242
+ let firstUserMessage = "";
243
+ for (const msg of t.messages) {
244
+ if (msg.type === "user") {
245
+ firstUserMessage = msg.content;
246
+ break;
247
+ }
248
+ }
249
+ return {
250
+ firstMessageTimestamp: t.messages[0]?.timestamp ?? "",
251
+ firstUserMessage,
252
+ metadata: t.metadata,
253
+ };
254
+ }
255
+
256
+ /** Load entry headers only — reads each entry but discards message bodies. */
257
+ export async function listEntryHeaders(
258
+ archiveDir: string,
259
+ ): Promise<ArchiveEntryHeader[]> {
260
+ return readArchiveFiles(archiveDir, (entry) => ({
261
+ sessionId: entry.sessionId,
262
+ sourcePath: entry.sourcePath,
263
+ sourceHash: entry.sourceHash,
264
+ title: entry.title,
265
+ segments: entry.transcripts.map(summarizeTranscript),
266
+ }));
267
+ }