@arcreflex/agent-transcripts 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/archive.ts ADDED
@@ -0,0 +1,267 @@
1
+ /**
2
+ * Archive module: persistent storage for parsed transcripts.
3
+ *
4
+ * Archive entries live at {archiveDir}/{sessionId}.json and contain
5
+ * the full parsed transcripts plus metadata for freshness checks.
6
+ */
7
+
8
+ import { join } from "path";
9
+ import { homedir } from "os";
10
+ import { mkdir, readdir, rename, unlink } from "fs/promises";
11
+ import type { Adapter, DiscoveredSession, Transcript } from "./types.ts";
12
+ import { extractSessionId } from "./utils/naming.ts";
13
+
14
+ export const DEFAULT_ARCHIVE_DIR = join(
15
+ homedir(),
16
+ ".local/share/agent-transcripts/archive",
17
+ );
18
+
19
+ const ARCHIVE_SCHEMA_VERSION = 1;
20
+
21
+ export interface ArchiveEntry {
22
+ sessionId: string;
23
+ sourcePath: string;
24
+ sourceHash: string;
25
+ adapterName: string;
26
+ adapterVersion: string;
27
+ schemaVersion: number;
28
+ archivedAt: string;
29
+ title?: string;
30
+ transcripts: Transcript[];
31
+ }
32
+
33
+ /** Lightweight per-transcript summary for indexing (no message bodies). */
34
+ export interface TranscriptSummary {
35
+ firstMessageTimestamp: string;
36
+ firstUserMessage: string;
37
+ metadata: Transcript["metadata"];
38
+ }
39
+
40
+ /** Entry header — full metadata but no message bodies. */
41
+ export interface ArchiveEntryHeader {
42
+ sessionId: string;
43
+ sourcePath: string;
44
+ sourceHash: string;
45
+ title?: string;
46
+ segments: TranscriptSummary[];
47
+ }
48
+
49
+ export interface ArchiveResult {
50
+ updated: string[];
51
+ current: string[];
52
+ errors: Array<{ sessionId: string; error: string }>;
53
+ }
54
+
55
+ export function computeContentHash(content: string): string {
56
+ return Bun.hash(content).toString(16);
57
+ }
58
+
59
+ /** Type guard: validates that a parsed JSON value has the shape of an ArchiveEntry. */
60
+ function isArchiveEntry(value: unknown): value is ArchiveEntry {
61
+ if (typeof value !== "object" || value === null) return false;
62
+ const v = value as Record<string, unknown>;
63
+ return (
64
+ typeof v.sessionId === "string" &&
65
+ typeof v.sourcePath === "string" &&
66
+ typeof v.sourceHash === "string" &&
67
+ typeof v.adapterName === "string" &&
68
+ typeof v.adapterVersion === "string" &&
69
+ typeof v.schemaVersion === "number" &&
70
+ typeof v.archivedAt === "string" &&
71
+ Array.isArray(v.transcripts)
72
+ );
73
+ }
74
+
75
+ export async function loadEntry(
76
+ archiveDir: string,
77
+ sessionId: string,
78
+ ): Promise<ArchiveEntry | undefined> {
79
+ let content: string;
80
+ try {
81
+ content = await Bun.file(join(archiveDir, `${sessionId}.json`)).text();
82
+ } catch (err: unknown) {
83
+ if (
84
+ err &&
85
+ typeof err === "object" &&
86
+ "code" in err &&
87
+ err.code === "ENOENT"
88
+ ) {
89
+ return undefined;
90
+ }
91
+ throw err;
92
+ }
93
+ const parsed: unknown = JSON.parse(content);
94
+ if (!isArchiveEntry(parsed)) {
95
+ console.error(`Warning: invalid archive entry for ${sessionId}, skipping`);
96
+ return undefined;
97
+ }
98
+ return parsed;
99
+ }
100
+
101
+ export async function saveEntry(
102
+ archiveDir: string,
103
+ entry: ArchiveEntry,
104
+ ): Promise<void> {
105
+ await mkdir(archiveDir, { recursive: true });
106
+
107
+ const filePath = join(archiveDir, `${entry.sessionId}.json`);
108
+ const tmpPath = `${filePath}.${process.pid}.${Date.now()}.tmp`;
109
+ const content = JSON.stringify(entry, null, 2) + "\n";
110
+
111
+ await Bun.write(tmpPath, content);
112
+ try {
113
+ await rename(tmpPath, filePath);
114
+ } catch (err) {
115
+ try {
116
+ await unlink(tmpPath);
117
+ } catch {}
118
+ throw err;
119
+ }
120
+ }
121
+
122
+ export function isFresh(
123
+ entry: ArchiveEntry,
124
+ sourceHash: string,
125
+ adapter: Adapter,
126
+ ): boolean {
127
+ return (
128
+ entry.sourceHash === sourceHash &&
129
+ entry.adapterVersion === adapter.version &&
130
+ entry.schemaVersion === ARCHIVE_SCHEMA_VERSION
131
+ );
132
+ }
133
+
134
+ export async function archiveSession(
135
+ archiveDir: string,
136
+ session: DiscoveredSession,
137
+ adapter: Adapter,
138
+ ): Promise<{ entry: ArchiveEntry; updated: boolean }> {
139
+ const sessionId = extractSessionId(session.path);
140
+ const content = await Bun.file(session.path).text();
141
+ const sourceHash = computeContentHash(content);
142
+
143
+ const existing = await loadEntry(archiveDir, sessionId);
144
+ if (existing && isFresh(existing, sourceHash, adapter)) {
145
+ // Still update title if harness summary changed
146
+ if (session.summary && existing.title !== session.summary) {
147
+ existing.title = session.summary;
148
+ await saveEntry(archiveDir, existing);
149
+ return { entry: existing, updated: true };
150
+ }
151
+ return { entry: existing, updated: false };
152
+ }
153
+
154
+ const transcripts = adapter.parse(content, session.path);
155
+
156
+ const entry: ArchiveEntry = {
157
+ sessionId,
158
+ sourcePath: session.path,
159
+ sourceHash,
160
+ adapterName: adapter.name,
161
+ adapterVersion: adapter.version,
162
+ schemaVersion: ARCHIVE_SCHEMA_VERSION,
163
+ archivedAt: new Date().toISOString(),
164
+ title: session.summary ?? existing?.title,
165
+ transcripts,
166
+ };
167
+
168
+ await saveEntry(archiveDir, entry);
169
+ return { entry, updated: true };
170
+ }
171
+
172
+ export async function archiveAll(
173
+ archiveDir: string,
174
+ sourceDir: string,
175
+ adapters: Adapter[],
176
+ options: { quiet?: boolean } = {},
177
+ ): Promise<ArchiveResult> {
178
+ const result: ArchiveResult = { updated: [], current: [], errors: [] };
179
+
180
+ for (const adapter of adapters) {
181
+ const sessions = await adapter.discover(sourceDir);
182
+
183
+ for (const session of sessions) {
184
+ const sessionId = extractSessionId(session.path);
185
+ try {
186
+ const { updated } = await archiveSession(archiveDir, session, adapter);
187
+ if (updated) {
188
+ result.updated.push(sessionId);
189
+ if (!options.quiet) {
190
+ console.error(`Archived: ${sessionId}`);
191
+ }
192
+ } else {
193
+ result.current.push(sessionId);
194
+ }
195
+ } catch (err) {
196
+ const message = err instanceof Error ? err.message : String(err);
197
+ result.errors.push({ sessionId, error: message });
198
+ if (!options.quiet) {
199
+ console.error(`Error archiving ${sessionId}: ${message}`);
200
+ }
201
+ }
202
+ }
203
+ }
204
+
205
+ return result;
206
+ }
207
+
208
+ async function readArchiveFiles<T>(
209
+ archiveDir: string,
210
+ transform: (entry: ArchiveEntry) => T,
211
+ ): Promise<T[]> {
212
+ let files: string[];
213
+ try {
214
+ files = await readdir(archiveDir);
215
+ } catch {
216
+ return [];
217
+ }
218
+
219
+ const results: T[] = [];
220
+ for (const file of files) {
221
+ if (!file.endsWith(".json")) continue;
222
+ try {
223
+ const content = await Bun.file(join(archiveDir, file)).text();
224
+ const parsed: unknown = JSON.parse(content);
225
+ if (!isArchiveEntry(parsed)) {
226
+ console.error(`Warning: invalid archive file ${file}, skipping`);
227
+ continue;
228
+ }
229
+ results.push(transform(parsed));
230
+ } catch {
231
+ // Skip corrupt/unreadable entries
232
+ }
233
+ }
234
+ return results;
235
+ }
236
+
237
+ export async function listEntries(archiveDir: string): Promise<ArchiveEntry[]> {
238
+ return readArchiveFiles(archiveDir, (entry) => entry);
239
+ }
240
+
241
+ function summarizeTranscript(t: Transcript): TranscriptSummary {
242
+ let firstUserMessage = "";
243
+ for (const msg of t.messages) {
244
+ if (msg.type === "user") {
245
+ firstUserMessage = msg.content;
246
+ break;
247
+ }
248
+ }
249
+ return {
250
+ firstMessageTimestamp: t.messages[0]?.timestamp ?? "",
251
+ firstUserMessage,
252
+ metadata: t.metadata,
253
+ };
254
+ }
255
+
256
+ /** Load entry headers only — reads each entry but discards message bodies. */
257
+ export async function listEntryHeaders(
258
+ archiveDir: string,
259
+ ): Promise<ArchiveEntryHeader[]> {
260
+ return readArchiveFiles(archiveDir, (entry) => ({
261
+ sessionId: entry.sessionId,
262
+ sourcePath: entry.sourcePath,
263
+ sourceHash: entry.sourceHash,
264
+ title: entry.title,
265
+ segments: entry.transcripts.map(summarizeTranscript),
266
+ }));
267
+ }
package/src/cli.ts CHANGED
@@ -15,20 +15,12 @@ import {
15
15
  } from "cmd-ts";
16
16
  import { parseToTranscripts } from "./parse.ts";
17
17
  import { renderTranscript } from "./render.ts";
18
- import { sync, type OutputFormat } from "./sync.ts";
19
18
  import { convertToDirectory } from "./convert.ts";
20
19
  import { generateTitles } from "./title.ts";
21
20
  import { serve } from "./serve.ts";
22
-
23
- // Custom type for format option
24
- const formatType = {
25
- async from(value: string): Promise<OutputFormat> {
26
- if (value !== "md" && value !== "html") {
27
- throw new Error(`Invalid format: ${value}. Must be "md" or "html".`);
28
- }
29
- return value;
30
- },
31
- };
21
+ import { archiveAll, DEFAULT_ARCHIVE_DIR } from "./archive.ts";
22
+ import { getAdapters } from "./adapters/index.ts";
23
+ import { ArchiveWatcher } from "./watch.ts";
32
24
 
33
25
  // Shared options
34
26
  const inputArg = positional({
@@ -57,57 +49,47 @@ const headOpt = option({
57
49
  description: "Render branch ending at this message ID (default: latest)",
58
50
  });
59
51
 
60
- // Sync subcommand
61
- const syncCmd = command({
62
- name: "sync",
63
- description: "Sync session files to transcripts (markdown or HTML)",
52
+ const archiveDirOpt = option({
53
+ type: optional(string),
54
+ long: "archive-dir",
55
+ description: `Archive directory (default: ${DEFAULT_ARCHIVE_DIR})`,
56
+ });
57
+
58
+ // Archive subcommand
59
+ const archiveCmd = command({
60
+ name: "archive",
61
+ description: "Archive session files from source directory",
64
62
  args: {
65
63
  source: positional({
66
64
  type: string,
67
65
  displayName: "source",
68
66
  description: "Source directory to scan for session files",
69
67
  }),
70
- output: option({
71
- type: string,
72
- long: "output",
73
- short: "o",
74
- description: "Output directory for transcripts",
75
- }),
76
- format: option({
77
- type: optional(formatType),
78
- long: "format",
79
- description: "Output format: md (default) or html",
80
- }),
81
- noTitle: flag({
82
- long: "no-title",
83
- description: "Skip LLM title generation (for HTML format)",
84
- }),
85
- force: flag({
86
- long: "force",
87
- short: "f",
88
- description: "Re-render all sessions, ignoring mtime",
89
- }),
68
+ archiveDir: archiveDirOpt,
90
69
  quiet: flag({
91
70
  long: "quiet",
92
71
  short: "q",
93
72
  description: "Suppress progress output",
94
73
  }),
95
74
  },
96
- async handler({ source, output, format, noTitle, force, quiet }) {
97
- await sync({ source, output, format, noTitle, force, quiet });
75
+ async handler({ source, archiveDir, quiet }) {
76
+ const dir = archiveDir ?? DEFAULT_ARCHIVE_DIR;
77
+ const result = await archiveAll(dir, source, getAdapters(), { quiet });
78
+
79
+ if (!quiet) {
80
+ console.error(
81
+ `\nArchive complete: ${result.updated.length} updated, ${result.current.length} current, ${result.errors.length} errors`,
82
+ );
83
+ }
98
84
  },
99
85
  });
100
86
 
101
87
  // Title subcommand
102
88
  const titleCmd = command({
103
89
  name: "title",
104
- description: "Generate LLM titles for transcripts.json entries",
90
+ description: "Generate LLM titles for archive entries",
105
91
  args: {
106
- output: positional({
107
- type: string,
108
- displayName: "output",
109
- description: "Output directory containing transcripts.json",
110
- }),
92
+ archiveDir: archiveDirOpt,
111
93
  force: flag({
112
94
  long: "force",
113
95
  short: "f",
@@ -119,21 +101,21 @@ const titleCmd = command({
119
101
  description: "Suppress progress output",
120
102
  }),
121
103
  },
122
- async handler({ output, force, quiet }) {
123
- await generateTitles({ outputDir: output, force, quiet });
104
+ async handler({ archiveDir, force, quiet }) {
105
+ await generateTitles({
106
+ archiveDir: archiveDir ?? undefined,
107
+ force,
108
+ quiet,
109
+ });
124
110
  },
125
111
  });
126
112
 
127
113
  // Serve subcommand
128
114
  const serveCmd = command({
129
115
  name: "serve",
130
- description: "Serve transcripts via HTTP (dynamic rendering with caching)",
116
+ description: "Serve transcripts from archive via HTTP",
131
117
  args: {
132
- source: positional({
133
- type: string,
134
- displayName: "source",
135
- description: "Source directory to scan for session files",
136
- }),
118
+ archiveDir: archiveDirOpt,
137
119
  port: option({
138
120
  type: optional(number),
139
121
  long: "port",
@@ -145,13 +127,66 @@ const serveCmd = command({
145
127
  short: "q",
146
128
  description: "Suppress request logging",
147
129
  }),
148
- noCache: flag({
149
- long: "no-cache",
150
- description: "Bypass HTML cache (for development)",
130
+ },
131
+ async handler({ archiveDir, port, quiet }) {
132
+ await serve({
133
+ archiveDir: archiveDir ?? undefined,
134
+ port: port ?? 3000,
135
+ quiet,
136
+ });
137
+ },
138
+ });
139
+
140
+ // Watch subcommand
141
+ const watchCmd = command({
142
+ name: "watch",
143
+ description: "Watch source directories and keep archive updated",
144
+ args: {
145
+ source: positional({
146
+ type: string,
147
+ displayName: "source",
148
+ description: "Source directory to watch for session files",
149
+ }),
150
+ archiveDir: archiveDirOpt,
151
+ pollInterval: option({
152
+ type: optional(number),
153
+ long: "poll-interval",
154
+ description: "Poll interval in milliseconds (default: 30000)",
155
+ }),
156
+ quiet: flag({
157
+ long: "quiet",
158
+ short: "q",
159
+ description: "Suppress progress output",
151
160
  }),
152
161
  },
153
- async handler({ source, port, quiet, noCache }) {
154
- await serve({ source, port: port ?? 3000, quiet, noCache });
162
+ async handler({ source, archiveDir, pollInterval, quiet }) {
163
+ const watcher = new ArchiveWatcher([source], {
164
+ archiveDir: archiveDir ?? undefined,
165
+ pollIntervalMs: pollInterval ?? undefined,
166
+ quiet,
167
+ onUpdate(result) {
168
+ if (!quiet && result.updated.length > 0) {
169
+ console.error(`Updated: ${result.updated.join(", ")}`);
170
+ }
171
+ },
172
+ onError(error) {
173
+ console.error(`Watch error: ${error.message}`);
174
+ },
175
+ });
176
+
177
+ if (!quiet) {
178
+ console.error(`Watching ${source}...`);
179
+ }
180
+
181
+ await watcher.start();
182
+
183
+ process.on("SIGINT", () => {
184
+ if (!quiet) {
185
+ console.error("\nStopping watcher...");
186
+ }
187
+ watcher.stop();
188
+ process.exit(0);
189
+ });
155
190
  },
156
191
  });
157
192
 
@@ -174,7 +209,6 @@ const convertCmd = command({
174
209
  },
175
210
  async handler({ input, output, adapter, head }) {
176
211
  if (output && isDirectoryOutput(output)) {
177
- // Directory output: use provenance tracking
178
212
  await convertToDirectory({
179
213
  input,
180
214
  outputDir: output,
@@ -182,23 +216,21 @@ const convertCmd = command({
182
216
  head,
183
217
  });
184
218
  } else if (output) {
185
- // Explicit file output: not supported anymore (use directory)
186
219
  console.error(
187
220
  "Error: Explicit file output not supported. Use a directory path instead.",
188
221
  );
189
222
  process.exit(1);
190
223
  } else {
191
- // No output: stream to stdout
192
224
  const { transcripts } = await parseToTranscripts({ input, adapter });
193
225
  for (let i = 0; i < transcripts.length; i++) {
194
- if (i > 0) console.log(); // blank line between transcripts
195
- console.log(renderTranscript(transcripts[i], head));
226
+ if (i > 0) console.log();
227
+ console.log(renderTranscript(transcripts[i], { head }));
196
228
  }
197
229
  }
198
230
  },
199
231
  });
200
232
 
201
- const SUBCOMMANDS = ["convert", "sync", "title", "serve"] as const;
233
+ const SUBCOMMANDS = ["convert", "archive", "title", "serve", "watch"] as const;
202
234
 
203
235
  // Main CLI with subcommands
204
236
  const cli = subcommands({
@@ -206,9 +238,10 @@ const cli = subcommands({
206
238
  description: "Transform agent session files to readable transcripts",
207
239
  cmds: {
208
240
  convert: convertCmd,
209
- sync: syncCmd,
241
+ archive: archiveCmd,
210
242
  title: titleCmd,
211
243
  serve: serveCmd,
244
+ watch: watchCmd,
212
245
  },
213
246
  });
214
247
 
package/src/convert.ts CHANGED
@@ -1,25 +1,15 @@
1
1
  /**
2
- * Convert command: full pipeline with provenance tracking.
2
+ * Convert command: parse source and render to markdown.
3
3
  *
4
- * When output is a directory, uses provenance tracking via transcripts.json
5
- * index to manage output files.
4
+ * Standalone pipeline with no archive dependency. Directory output
5
+ * writes markdown files with deterministic names.
6
6
  */
7
7
 
8
- import { join } from "path";
8
+ import { join, resolve } from "path";
9
9
  import { mkdir } from "fs/promises";
10
10
  import { parseToTranscripts } from "./parse.ts";
11
11
  import { renderTranscript } from "./render.ts";
12
- import { generateOutputName, extractSessionId } from "./utils/naming.ts";
13
- import {
14
- loadIndex,
15
- saveIndex,
16
- removeEntriesForSource,
17
- restoreEntries,
18
- deleteOutputFiles,
19
- setEntry,
20
- normalizeSourcePath,
21
- extractFirstUserMessage,
22
- } from "./utils/provenance.ts";
12
+ import { generateOutputName } from "./utils/naming.ts";
23
13
 
24
14
  export interface ConvertToDirectoryOptions {
25
15
  input: string;
@@ -28,91 +18,34 @@ export interface ConvertToDirectoryOptions {
28
18
  head?: string;
29
19
  }
30
20
 
31
- /**
32
- * Convert source file to markdown in output directory.
33
- * Uses provenance tracking to replace existing outputs.
34
- */
35
21
  export async function convertToDirectory(
36
22
  options: ConvertToDirectoryOptions,
37
23
  ): Promise<void> {
38
24
  const { input, outputDir, adapter, head } = options;
39
25
 
40
- // Ensure output directory exists
41
26
  await mkdir(outputDir, { recursive: true });
42
27
 
43
- // Parse input to transcripts
44
28
  const { transcripts, inputPath } = await parseToTranscripts({
45
29
  input,
46
30
  adapter,
47
31
  });
48
32
 
49
- // Normalize source path for consistent index keys
50
- const sourcePath = normalizeSourcePath(inputPath);
51
-
52
- // Load index and handle existing outputs
53
- const index = await loadIndex(outputDir);
54
-
55
- // Remove old entries (save for restoration on error)
56
- const removedEntries =
57
- sourcePath !== "<stdin>" ? removeEntriesForSource(index, sourcePath) : [];
58
-
59
- const sessionId = extractSessionId(inputPath);
60
- const newOutputs: string[] = [];
33
+ const sourcePath = inputPath === "<stdin>" ? undefined : resolve(inputPath);
61
34
 
62
- try {
63
- // Generate fresh outputs
64
- for (let i = 0; i < transcripts.length; i++) {
65
- const transcript = transcripts[i];
66
- const segmentIndex = transcripts.length > 1 ? i + 1 : undefined;
35
+ for (let i = 0; i < transcripts.length; i++) {
36
+ const transcript = transcripts[i];
37
+ const segmentIndex = transcripts.length > 1 ? i + 1 : undefined;
67
38
 
68
- // Generate deterministic name
69
- const baseName = generateOutputName(transcript, inputPath);
70
- const suffix = segmentIndex ? `_${segmentIndex}` : "";
71
- const relativePath = `${baseName}${suffix}.md`;
72
- const outputPath = join(outputDir, relativePath);
39
+ const baseName = generateOutputName(transcript, inputPath);
40
+ const suffix = segmentIndex ? `_${segmentIndex}` : "";
41
+ const relativePath = `${baseName}${suffix}.md`;
42
+ const outputPath = join(outputDir, relativePath);
73
43
 
74
- // Render with provenance front matter
75
- const markdown = renderTranscript(transcript, {
76
- head,
77
- sourcePath: sourcePath !== "<stdin>" ? sourcePath : undefined,
78
- });
79
- await Bun.write(outputPath, markdown);
80
- newOutputs.push(relativePath);
81
-
82
- // Update index (only for non-stdin sources)
83
- if (sourcePath !== "<stdin>") {
84
- setEntry(index, relativePath, {
85
- source: sourcePath,
86
- sessionId,
87
- segmentIndex,
88
- syncedAt: new Date().toISOString(),
89
- firstUserMessage: extractFirstUserMessage(transcript),
90
- messageCount: transcript.metadata.messageCount,
91
- startTime: transcript.metadata.startTime,
92
- endTime: transcript.metadata.endTime,
93
- cwd: transcript.metadata.cwd,
94
- });
95
- }
96
-
97
- console.error(`Wrote: ${outputPath}`);
98
- }
99
-
100
- // Success: delete old output files (after new ones are written)
101
- const oldFilenames = removedEntries.map((e) => e.filename);
102
- const toDelete = oldFilenames.filter((f) => !newOutputs.includes(f));
103
- if (toDelete.length > 0) {
104
- await deleteOutputFiles(outputDir, toDelete);
105
- }
106
- } catch (error) {
107
- // Clean up any newly written files before restoring old entries
108
- if (newOutputs.length > 0) {
109
- await deleteOutputFiles(outputDir, newOutputs);
110
- }
111
- // Restore old entries on error to preserve provenance
112
- restoreEntries(index, removedEntries);
113
- throw error;
44
+ const markdown = renderTranscript(transcript, {
45
+ head,
46
+ sourcePath,
47
+ });
48
+ await Bun.write(outputPath, markdown);
49
+ console.error(`Wrote: ${outputPath}`);
114
50
  }
115
-
116
- // Save index
117
- await saveIndex(outputDir, index);
118
51
  }
package/src/parse.ts CHANGED
@@ -15,9 +15,6 @@ export interface ParseResult {
15
15
  inputPath: string;
16
16
  }
17
17
 
18
- /**
19
- * Read input content from file or stdin.
20
- */
21
18
  async function readInput(
22
19
  input: string,
23
20
  ): Promise<{ content: string; path: string }> {