@arcreflex/agent-transcripts 0.1.4 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/render.ts CHANGED
@@ -1,47 +1,9 @@
1
1
  /**
2
- * Render command: intermediate JSON → markdown
2
+ * Render: intermediate transcript format → markdown
3
3
  */
4
4
 
5
- import { basename, dirname, join } from "path";
6
- import { mkdir } from "fs/promises";
7
5
  import type { Transcript, Message, ToolCall } from "./types.ts";
8
6
 
9
- export interface RenderOptions {
10
- input?: string; // file path, undefined for stdin
11
- output?: string; // output path
12
- head?: string; // render branch ending at this message ID
13
- }
14
-
15
- /**
16
- * Read transcript from file or stdin.
17
- */
18
- async function readTranscript(
19
- input?: string,
20
- ): Promise<{ transcript: Transcript; path: string }> {
21
- let content: string;
22
- let path: string;
23
-
24
- if (input) {
25
- content = await Bun.file(input).text();
26
- path = input;
27
- } else {
28
- const chunks: string[] = [];
29
- const reader = Bun.stdin.stream().getReader();
30
-
31
- while (true) {
32
- const { done, value } = await reader.read();
33
- if (done) break;
34
- chunks.push(new TextDecoder().decode(value));
35
- }
36
-
37
- content = chunks.join("");
38
- path = "<stdin>";
39
- }
40
-
41
- const transcript = JSON.parse(content) as Transcript;
42
- return { transcript, path };
43
- }
44
-
45
7
  /**
46
8
  * Format a single tool call.
47
9
  */
@@ -213,15 +175,33 @@ function tracePath(target: string, parents: Map<string, string>): string[] {
213
175
  return path;
214
176
  }
215
177
 
178
+ export interface RenderTranscriptOptions {
179
+ head?: string; // render branch ending at this message ID
180
+ sourcePath?: string; // absolute source path for front matter provenance
181
+ }
182
+
216
183
  /**
217
184
  * Render transcript to markdown with branch awareness.
218
185
  */
219
186
  export function renderTranscript(
220
187
  transcript: Transcript,
221
- head?: string,
188
+ options: RenderTranscriptOptions | string = {},
222
189
  ): string {
190
+ // Support legacy signature: renderTranscript(transcript, head?: string)
191
+ const opts: RenderTranscriptOptions =
192
+ typeof options === "string" ? { head: options } : options;
193
+ const { head, sourcePath } = opts;
194
+
223
195
  const lines: string[] = [];
224
196
 
197
+ // YAML front matter (for provenance tracking)
198
+ if (sourcePath) {
199
+ lines.push("---");
200
+ lines.push(`source: ${sourcePath}`);
201
+ lines.push("---");
202
+ lines.push("");
203
+ }
204
+
225
205
  // Header
226
206
  lines.push("# Transcript");
227
207
  lines.push("");
@@ -315,48 +295,3 @@ export function renderTranscript(
315
295
 
316
296
  return lines.join("\n");
317
297
  }
318
-
319
- /**
320
- * Determine output path for markdown.
321
- */
322
- function getOutputPath(inputPath: string, outputOption?: string): string {
323
- if (outputOption) {
324
- // If it has an extension, use as-is
325
- if (outputOption.match(/\.\w+$/)) {
326
- return outputOption;
327
- }
328
- // Treat as directory
329
- const base =
330
- inputPath === "<stdin>"
331
- ? "transcript"
332
- : basename(inputPath).replace(/\.json$/, "");
333
- return join(outputOption, `${base}.md`);
334
- }
335
-
336
- // Default: same name in cwd
337
- const base =
338
- inputPath === "<stdin>"
339
- ? "transcript"
340
- : basename(inputPath).replace(/\.json$/, "");
341
- return join(process.cwd(), `${base}.md`);
342
- }
343
-
344
- /**
345
- * Render intermediate JSON to markdown.
346
- */
347
- export async function render(options: RenderOptions): Promise<void> {
348
- const { transcript, path: inputPath } = await readTranscript(options.input);
349
-
350
- const markdown = renderTranscript(transcript, options.head);
351
-
352
- if (options.output) {
353
- const outputPath = getOutputPath(inputPath, options.output);
354
- // Ensure directory exists
355
- await mkdir(dirname(outputPath), { recursive: true });
356
- await Bun.write(outputPath, markdown);
357
- console.error(`Wrote: ${outputPath}`);
358
- } else {
359
- // Default: print to stdout
360
- console.log(markdown);
361
- }
362
- }
package/src/sync.ts CHANGED
@@ -3,15 +3,25 @@
3
3
  *
4
4
  * Discovers session files in source directory, parses them,
5
5
  * and writes rendered markdown to output directory.
6
- * Output structure mirrors source structure with extension changed.
6
+ * Tracks provenance via transcripts.json index.
7
7
  */
8
8
 
9
- import { Glob } from "bun";
10
- import { dirname, join, relative } from "path";
11
- import { mkdir, stat } from "fs/promises";
9
+ import { dirname, join } from "path";
10
+ import { mkdir } from "fs/promises";
12
11
  import { getAdapters } from "./adapters/index.ts";
13
- import type { Adapter } from "./types.ts";
12
+ import type { Adapter, DiscoveredSession } from "./types.ts";
14
13
  import { renderTranscript } from "./render.ts";
14
+ import { generateOutputName, extractSessionId } from "./utils/naming.ts";
15
+ import {
16
+ loadIndex,
17
+ saveIndex,
18
+ isStale,
19
+ setEntry,
20
+ removeEntriesForSource,
21
+ restoreEntries,
22
+ deleteOutputFiles,
23
+ normalizeSourcePath,
24
+ } from "./utils/provenance.ts";
15
25
 
16
26
  export interface SyncOptions {
17
27
  source: string;
@@ -26,80 +36,10 @@ export interface SyncResult {
26
36
  errors: number;
27
37
  }
28
38
 
29
- interface SessionFile {
30
- path: string;
31
- relativePath: string;
32
- mtime: number;
39
+ interface SessionFile extends DiscoveredSession {
33
40
  adapter: Adapter;
34
41
  }
35
42
 
36
- /**
37
- * Discover session files for a specific adapter.
38
- */
39
- async function discoverForAdapter(
40
- source: string,
41
- adapter: Adapter,
42
- ): Promise<SessionFile[]> {
43
- const sessions: SessionFile[] = [];
44
-
45
- for (const pattern of adapter.filePatterns) {
46
- const glob = new Glob(`**/${pattern}`);
47
-
48
- for await (const file of glob.scan({ cwd: source, absolute: false })) {
49
- const fullPath = join(source, file);
50
-
51
- try {
52
- const fileStat = await stat(fullPath);
53
- sessions.push({
54
- path: fullPath,
55
- relativePath: file,
56
- mtime: fileStat.mtime.getTime(),
57
- adapter,
58
- });
59
- } catch {
60
- // Skip files we can't stat
61
- }
62
- }
63
- }
64
-
65
- return sessions;
66
- }
67
-
68
- /**
69
- * Compute output path for a session file.
70
- * Mirrors input structure, changing extension to .md.
71
- */
72
- function computeOutputPath(
73
- relativePath: string,
74
- outputDir: string,
75
- suffix?: string,
76
- ): string {
77
- // Replace extension with .md
78
- const mdPath = relativePath.replace(/\.[^.]+$/, ".md");
79
- // Add suffix if provided (for multiple transcripts from same file)
80
- const finalPath = suffix ? mdPath.replace(/\.md$/, `${suffix}.md`) : mdPath;
81
- return join(outputDir, finalPath);
82
- }
83
-
84
- /**
85
- * Check if output file needs to be re-rendered based on mtime.
86
- */
87
- async function needsSync(
88
- outputPath: string,
89
- sourceMtime: number,
90
- force: boolean,
91
- ): Promise<boolean> {
92
- if (force) return true;
93
-
94
- try {
95
- const outputStat = await stat(outputPath);
96
- return outputStat.mtime.getTime() < sourceMtime;
97
- } catch {
98
- // Output doesn't exist, needs sync
99
- return true;
100
- }
101
- }
102
-
103
43
  /**
104
44
  * Sync session files from source to output directory.
105
45
  */
@@ -108,11 +48,24 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
108
48
 
109
49
  const result: SyncResult = { synced: 0, skipped: 0, errors: 0 };
110
50
 
111
- // Discover sessions for each adapter
51
+ // Ensure output directory exists
52
+ await mkdir(output, { recursive: true });
53
+
54
+ // Load index
55
+ const index = await loadIndex(output);
56
+ if (!quiet && Object.keys(index.entries).length > 0) {
57
+ console.error(
58
+ `Found ${Object.keys(index.entries).length} existing transcript(s) in index`,
59
+ );
60
+ }
61
+
62
+ // Discover sessions from all adapters
112
63
  const sessions: SessionFile[] = [];
113
64
  for (const adapter of getAdapters()) {
114
- const adapterSessions = await discoverForAdapter(source, adapter);
115
- sessions.push(...adapterSessions);
65
+ const discovered = await adapter.discover(source);
66
+ for (const session of discovered) {
67
+ sessions.push({ ...session, adapter });
68
+ }
116
69
  }
117
70
 
118
71
  if (!quiet) {
@@ -121,41 +74,91 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
121
74
 
122
75
  // Process each session
123
76
  for (const session of sessions) {
77
+ // Normalize source path for consistent index keys
78
+ const sourcePath = normalizeSourcePath(session.path);
79
+
124
80
  try {
125
81
  // Read and parse using the adapter that discovered this file
126
82
  const content = await Bun.file(session.path).text();
127
83
  const transcripts = session.adapter.parse(content, session.path);
128
84
 
129
- // Process each transcript (usually just one per file)
130
- for (let i = 0; i < transcripts.length; i++) {
131
- const transcript = transcripts[i];
132
- const suffix = transcripts.length > 1 ? `_${i + 1}` : undefined;
133
- const outputPath = computeOutputPath(
134
- session.relativePath,
135
- output,
136
- suffix,
137
- );
138
-
139
- // Check if sync needed
140
- if (!(await needsSync(outputPath, session.mtime, force))) {
141
- if (!quiet) {
142
- console.error(`Skip (up to date): ${outputPath}`);
143
- }
144
- result.skipped++;
145
- continue;
85
+ // Check if sync needed (force or stale)
86
+ const needsUpdate =
87
+ force ||
88
+ isStale(index, sourcePath, session.mtime, transcripts.length, output);
89
+
90
+ if (!needsUpdate) {
91
+ if (!quiet) {
92
+ console.error(`Skip (up to date): ${session.relativePath}`);
146
93
  }
94
+ result.skipped++;
95
+ continue;
96
+ }
147
97
 
148
- // Ensure output directory exists
149
- await mkdir(dirname(outputPath), { recursive: true });
98
+ // Remove entries from index (save for potential restoration on error)
99
+ const removedEntries = removeEntriesForSource(index, sourcePath);
150
100
 
151
- // Render and write
152
- const markdown = renderTranscript(transcript);
153
- await Bun.write(outputPath, markdown);
101
+ // Track new outputs for this session
102
+ const newOutputs: string[] = [];
103
+ const sessionId = extractSessionId(session.path);
154
104
 
155
- if (!quiet) {
156
- console.error(`Synced: ${outputPath}`);
105
+ try {
106
+ // Generate fresh outputs for all transcripts
107
+ for (let i = 0; i < transcripts.length; i++) {
108
+ const transcript = transcripts[i];
109
+ const segmentIndex = transcripts.length > 1 ? i + 1 : undefined;
110
+
111
+ // Generate deterministic name
112
+ const baseName = generateOutputName(transcript, session.path);
113
+ const suffix = segmentIndex ? `_${segmentIndex}` : "";
114
+ const relativeDir = dirname(session.relativePath);
115
+ const relativePath =
116
+ relativeDir === "."
117
+ ? `${baseName}${suffix}.md`
118
+ : join(relativeDir, `${baseName}${suffix}.md`);
119
+ const outputPath = join(output, relativePath);
120
+
121
+ // Ensure output directory exists
122
+ await mkdir(dirname(outputPath), { recursive: true });
123
+
124
+ // Render with provenance front matter and write
125
+ const markdown = renderTranscript(transcript, {
126
+ sourcePath,
127
+ });
128
+ await Bun.write(outputPath, markdown);
129
+ newOutputs.push(relativePath);
130
+
131
+ // Update index
132
+ setEntry(index, relativePath, {
133
+ source: sourcePath,
134
+ sourceMtime: session.mtime,
135
+ sessionId,
136
+ segmentIndex,
137
+ syncedAt: new Date().toISOString(),
138
+ });
139
+
140
+ if (!quiet) {
141
+ console.error(`Synced: ${outputPath}`);
142
+ }
157
143
  }
144
+
145
+ // Success: delete old output files (after new ones are written)
146
+ const oldFilenames = removedEntries.map((e) => e.filename);
147
+ // Only delete files that aren't being reused
148
+ const toDelete = oldFilenames.filter((f) => !newOutputs.includes(f));
149
+ if (toDelete.length > 0) {
150
+ await deleteOutputFiles(output, toDelete, quiet);
151
+ }
152
+
158
153
  result.synced++;
154
+ } catch (error) {
155
+ // Clean up any newly written files before restoring old entries
156
+ if (newOutputs.length > 0) {
157
+ await deleteOutputFiles(output, newOutputs, quiet);
158
+ }
159
+ // Restore old entries on error to preserve provenance
160
+ restoreEntries(index, removedEntries);
161
+ throw error;
159
162
  }
160
163
  } catch (error) {
161
164
  const message = error instanceof Error ? error.message : String(error);
@@ -164,6 +167,9 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
164
167
  }
165
168
  }
166
169
 
170
+ // Save index
171
+ await saveIndex(output, index);
172
+
167
173
  // Summary
168
174
  if (!quiet) {
169
175
  console.error(
package/src/types.ts CHANGED
@@ -65,13 +65,22 @@ export interface ErrorMessage extends BaseMessage {
65
65
  content: string;
66
66
  }
67
67
 
68
+ /**
69
+ * A session file discovered by an adapter.
70
+ */
71
+ export interface DiscoveredSession {
72
+ path: string;
73
+ relativePath: string;
74
+ mtime: number;
75
+ }
76
+
68
77
  /**
69
78
  * Adapter interface - each source format implements this.
70
79
  */
71
80
  export interface Adapter {
72
81
  name: string;
73
- /** Glob patterns for discovering session files (e.g., ["*.jsonl"]) */
74
- filePatterns: string[];
82
+ /** Discover session files in the given directory */
83
+ discover(source: string): Promise<DiscoveredSession[]>;
75
84
  /** Parse source content into one or more transcripts (split by conversation) */
76
85
  parse(content: string, sourcePath: string): Transcript[];
77
86
  }
@@ -1,173 +1,60 @@
1
1
  /**
2
2
  * Output file naming utilities.
3
3
  *
4
- * Generates descriptive filenames for transcripts:
5
- * - With OpenRouter API key: yyyy-mm-dd-{llm-generated-slug}.{ext}
6
- * - Without: yyyy-mm-dd-{input-filename-prefix}.{ext}
4
+ * Generates deterministic filenames: {datetime}-{sessionId}.md
7
5
  */
8
6
 
9
- import type { Transcript, UserMessage } from "../types.ts";
7
+ import type { Transcript } from "../types.ts";
10
8
  import { basename } from "path";
11
9
 
12
- export interface NamingOptions {
13
- apiKey?: string; // OpenRouter API key
14
- model?: string; // Default: google/gemini-2.0-flash-001
15
- }
16
-
17
- const DEFAULT_MODEL = "google/gemini-2.0-flash-001";
18
- const SLUG_MAX_LENGTH = 40;
19
-
20
10
  /**
21
- * Extract date from transcript's first message timestamp.
11
+ * Extract date and time from transcript's first message timestamp.
12
+ * Returns format: yyyy-mm-dd-hhmm (24-hour, local time)
22
13
  */
23
- function extractDate(transcript: Transcript): string {
14
+ function extractDateTime(transcript: Transcript): string {
24
15
  const firstMessage = transcript.messages[0];
25
- if (firstMessage?.timestamp) {
26
- const date = new Date(firstMessage.timestamp);
27
- if (!isNaN(date.getTime())) {
28
- return date.toISOString().slice(0, 10); // yyyy-mm-dd
29
- }
30
- }
31
- // Fallback to current date
32
- return new Date().toISOString().slice(0, 10);
33
- }
34
-
35
- /**
36
- * Extract context from transcript for LLM summarization.
37
- * Uses first few user messages, truncated.
38
- */
39
- function extractContext(transcript: Transcript): string {
40
- const userMessages = transcript.messages.filter(
41
- (m): m is UserMessage => m.type === "user",
42
- );
16
+ const date = firstMessage?.timestamp
17
+ ? new Date(firstMessage.timestamp)
18
+ : new Date();
43
19
 
44
- const chunks: string[] = [];
45
- let totalLength = 0;
46
- const maxLength = 500;
47
-
48
- for (const msg of userMessages.slice(0, 3)) {
49
- const content = msg.content.slice(0, 200);
50
- if (totalLength + content.length > maxLength) break;
51
- chunks.push(content);
52
- totalLength += content.length;
20
+ if (isNaN(date.getTime())) {
21
+ return formatDateTime(new Date());
53
22
  }
54
-
55
- return chunks.join("\n\n");
23
+ return formatDateTime(date);
56
24
  }
57
25
 
58
- /**
59
- * Sanitize a string into a valid URL slug.
60
- */
61
- function sanitizeSlug(input: string): string {
62
- return input
63
- .toLowerCase()
64
- .replace(/[^a-z0-9\s-]/g, "") // remove special chars
65
- .replace(/\s+/g, "-") // spaces to hyphens
66
- .replace(/-+/g, "-") // collapse multiple hyphens
67
- .replace(/^-|-$/g, "") // trim leading/trailing hyphens
68
- .slice(0, SLUG_MAX_LENGTH);
26
+ function formatDateTime(date: Date): string {
27
+ const year = date.getFullYear();
28
+ const month = String(date.getMonth() + 1).padStart(2, "0");
29
+ const day = String(date.getDate()).padStart(2, "0");
30
+ const hours = String(date.getHours()).padStart(2, "0");
31
+ const minutes = String(date.getMinutes()).padStart(2, "0");
32
+ return `${year}-${month}-${day}-${hours}${minutes}`;
69
33
  }
70
34
 
71
35
  /**
72
- * Generate slug via OpenRouter API.
36
+ * Extract session ID from the input filename.
37
+ * Returns the full session ID (filename without extension) for traceability.
73
38
  */
74
- async function generateSlugViaLLM(
75
- context: string,
76
- options: NamingOptions,
77
- ): Promise<string | null> {
78
- const { apiKey, model = DEFAULT_MODEL } = options;
79
- if (!apiKey || !context.trim()) return null;
80
-
81
- try {
82
- const response = await fetch(
83
- "https://openrouter.ai/api/v1/chat/completions",
84
- {
85
- method: "POST",
86
- headers: {
87
- Authorization: `Bearer ${apiKey}`,
88
- "Content-Type": "application/json",
89
- },
90
- body: JSON.stringify({
91
- model,
92
- messages: [
93
- {
94
- role: "user",
95
- content: `Generate a 2-4 word URL slug (lowercase, hyphenated) summarizing this conversation topic. Reply with ONLY the slug, nothing else.\n\n${context}`,
96
- },
97
- ],
98
- max_tokens: 20,
99
- }),
100
- },
101
- );
102
-
103
- if (!response.ok) {
104
- console.error(
105
- `OpenRouter API error: ${response.status} ${response.statusText}`,
106
- );
107
- return null;
108
- }
109
-
110
- const data = (await response.json()) as {
111
- choices?: Array<{ message?: { content?: string } }>;
112
- };
113
- const content = data.choices?.[0]?.message?.content?.trim();
114
-
115
- if (!content) return null;
116
-
117
- const slug = sanitizeSlug(content);
118
- return slug || null;
119
- } catch (error) {
120
- console.error(
121
- `OpenRouter API call failed: ${error instanceof Error ? error.message : error}`,
122
- );
123
- return null;
124
- }
125
- }
126
-
127
- /**
128
- * Generate fallback slug from input filename.
129
- */
130
- function generateFallbackSlug(inputPath: string): string {
131
- return extractFileId(inputPath, 8) || "transcript";
132
- }
133
-
134
- /**
135
- * Extract a short identifier from the input filename.
136
- * Used as a suffix for traceability back to source.
137
- */
138
- function extractFileId(inputPath: string, length = 6): string {
39
+ export function extractSessionId(inputPath: string): string {
139
40
  if (inputPath === "<stdin>") {
140
- return "";
41
+ return "stdin";
141
42
  }
142
43
 
143
44
  const name = basename(inputPath);
144
- const base = name.replace(/\.jsonl?$/, "");
145
- // Take first N chars, sanitize, and clean up any trailing hyphens
146
- return sanitizeSlug(base.slice(0, length)).replace(/-+$/, "");
45
+ // Remove .jsonl or .json extension
46
+ return name.replace(/\.jsonl?$/, "");
147
47
  }
148
48
 
149
49
  /**
150
50
  * Generate output base name for a transcript.
151
- * Returns string like "2024-01-15-implement-auth-flow-abc123"
51
+ * Returns format: "2024-01-15-1423-{sessionId}"
152
52
  */
153
- export async function generateOutputName(
53
+ export function generateOutputName(
154
54
  transcript: Transcript,
155
55
  inputPath: string,
156
- options: NamingOptions = {},
157
- ): Promise<string> {
158
- const date = extractDate(transcript);
159
- const fileId = extractFileId(inputPath);
160
-
161
- // Try LLM-generated slug if API key available
162
- if (options.apiKey) {
163
- const context = extractContext(transcript);
164
- const slug = await generateSlugViaLLM(context, options);
165
- if (slug) {
166
- return fileId ? `${date}-${slug}-${fileId}` : `${date}-${slug}`;
167
- }
168
- }
169
-
170
- // Fallback to input filename prefix (no need for fileId suffix, it's already the slug)
171
- const slug = generateFallbackSlug(inputPath);
172
- return `${date}-${slug}`;
56
+ ): string {
57
+ const dateTime = extractDateTime(transcript);
58
+ const sessionId = extractSessionId(inputPath);
59
+ return `${dateTime}-${sessionId}`;
173
60
  }