@arcreflex/agent-transcripts 0.1.5 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,173 +1,60 @@
1
1
  /**
2
2
  * Output file naming utilities.
3
3
  *
4
- * Generates descriptive filenames for transcripts:
5
- * - With OpenRouter API key: yyyy-mm-dd-{llm-generated-slug}.{ext}
6
- * - Without: yyyy-mm-dd-{input-filename-prefix}.{ext}
4
+ * Generates deterministic filenames: {datetime}-{sessionId}.md
7
5
  */
8
6
 
9
- import type { Transcript, UserMessage } from "../types.ts";
7
+ import type { Transcript } from "../types.ts";
10
8
  import { basename } from "path";
11
9
 
12
- export interface NamingOptions {
13
- apiKey?: string; // OpenRouter API key
14
- model?: string; // Default: google/gemini-2.0-flash-001
15
- }
16
-
17
- const DEFAULT_MODEL = "google/gemini-2.0-flash-001";
18
- const SLUG_MAX_LENGTH = 40;
19
-
20
10
  /**
21
- * Extract date from transcript's first message timestamp.
11
+ * Extract date and time from transcript's first message timestamp.
12
+ * Returns format: yyyy-mm-dd-hhmm (24-hour, local time)
22
13
  */
23
- function extractDate(transcript: Transcript): string {
14
+ function extractDateTime(transcript: Transcript): string {
24
15
  const firstMessage = transcript.messages[0];
25
- if (firstMessage?.timestamp) {
26
- const date = new Date(firstMessage.timestamp);
27
- if (!isNaN(date.getTime())) {
28
- return date.toISOString().slice(0, 10); // yyyy-mm-dd
29
- }
30
- }
31
- // Fallback to current date
32
- return new Date().toISOString().slice(0, 10);
33
- }
34
-
35
- /**
36
- * Extract context from transcript for LLM summarization.
37
- * Uses first few user messages, truncated.
38
- */
39
- function extractContext(transcript: Transcript): string {
40
- const userMessages = transcript.messages.filter(
41
- (m): m is UserMessage => m.type === "user",
42
- );
16
+ const date = firstMessage?.timestamp
17
+ ? new Date(firstMessage.timestamp)
18
+ : new Date();
43
19
 
44
- const chunks: string[] = [];
45
- let totalLength = 0;
46
- const maxLength = 500;
47
-
48
- for (const msg of userMessages.slice(0, 3)) {
49
- const content = msg.content.slice(0, 200);
50
- if (totalLength + content.length > maxLength) break;
51
- chunks.push(content);
52
- totalLength += content.length;
20
+ if (isNaN(date.getTime())) {
21
+ return formatDateTime(new Date());
53
22
  }
54
-
55
- return chunks.join("\n\n");
23
+ return formatDateTime(date);
56
24
  }
57
25
 
58
- /**
59
- * Sanitize a string into a valid URL slug.
60
- */
61
- function sanitizeSlug(input: string): string {
62
- return input
63
- .toLowerCase()
64
- .replace(/[^a-z0-9\s-]/g, "") // remove special chars
65
- .replace(/\s+/g, "-") // spaces to hyphens
66
- .replace(/-+/g, "-") // collapse multiple hyphens
67
- .replace(/^-|-$/g, "") // trim leading/trailing hyphens
68
- .slice(0, SLUG_MAX_LENGTH);
26
+ function formatDateTime(date: Date): string {
27
+ const year = date.getFullYear();
28
+ const month = String(date.getMonth() + 1).padStart(2, "0");
29
+ const day = String(date.getDate()).padStart(2, "0");
30
+ const hours = String(date.getHours()).padStart(2, "0");
31
+ const minutes = String(date.getMinutes()).padStart(2, "0");
32
+ return `${year}-${month}-${day}-${hours}${minutes}`;
69
33
  }
70
34
 
71
35
  /**
72
- * Generate slug via OpenRouter API.
36
+ * Extract session ID from the input filename.
37
+ * Returns the full session ID (filename without extension) for traceability.
73
38
  */
74
- async function generateSlugViaLLM(
75
- context: string,
76
- options: NamingOptions,
77
- ): Promise<string | null> {
78
- const { apiKey, model = DEFAULT_MODEL } = options;
79
- if (!apiKey || !context.trim()) return null;
80
-
81
- try {
82
- const response = await fetch(
83
- "https://openrouter.ai/api/v1/chat/completions",
84
- {
85
- method: "POST",
86
- headers: {
87
- Authorization: `Bearer ${apiKey}`,
88
- "Content-Type": "application/json",
89
- },
90
- body: JSON.stringify({
91
- model,
92
- messages: [
93
- {
94
- role: "user",
95
- content: `Generate a 2-4 word URL slug (lowercase, hyphenated) summarizing this conversation topic. Reply with ONLY the slug, nothing else.\n\n${context}`,
96
- },
97
- ],
98
- max_tokens: 20,
99
- }),
100
- },
101
- );
102
-
103
- if (!response.ok) {
104
- console.error(
105
- `OpenRouter API error: ${response.status} ${response.statusText}`,
106
- );
107
- return null;
108
- }
109
-
110
- const data = (await response.json()) as {
111
- choices?: Array<{ message?: { content?: string } }>;
112
- };
113
- const content = data.choices?.[0]?.message?.content?.trim();
114
-
115
- if (!content) return null;
116
-
117
- const slug = sanitizeSlug(content);
118
- return slug || null;
119
- } catch (error) {
120
- console.error(
121
- `OpenRouter API call failed: ${error instanceof Error ? error.message : error}`,
122
- );
123
- return null;
124
- }
125
- }
126
-
127
- /**
128
- * Generate fallback slug from input filename.
129
- */
130
- function generateFallbackSlug(inputPath: string): string {
131
- return extractFileId(inputPath, 8) || "transcript";
132
- }
133
-
134
- /**
135
- * Extract a short identifier from the input filename.
136
- * Used as a suffix for traceability back to source.
137
- */
138
- function extractFileId(inputPath: string, length = 6): string {
39
+ export function extractSessionId(inputPath: string): string {
139
40
  if (inputPath === "<stdin>") {
140
- return "";
41
+ return "stdin";
141
42
  }
142
43
 
143
44
  const name = basename(inputPath);
144
- const base = name.replace(/\.jsonl?$/, "");
145
- // Take first N chars, sanitize, and clean up any trailing hyphens
146
- return sanitizeSlug(base.slice(0, length)).replace(/-+$/, "");
45
+ // Remove .jsonl or .json extension
46
+ return name.replace(/\.jsonl?$/, "");
147
47
  }
148
48
 
149
49
  /**
150
50
  * Generate output base name for a transcript.
151
- * Returns string like "2024-01-15-implement-auth-flow-abc123"
51
+ * Returns format: "2024-01-15-1423-{sessionId}"
152
52
  */
153
- export async function generateOutputName(
53
+ export function generateOutputName(
154
54
  transcript: Transcript,
155
55
  inputPath: string,
156
- options: NamingOptions = {},
157
- ): Promise<string> {
158
- const date = extractDate(transcript);
159
- const fileId = extractFileId(inputPath);
160
-
161
- // Try LLM-generated slug if API key available
162
- if (options.apiKey) {
163
- const context = extractContext(transcript);
164
- const slug = await generateSlugViaLLM(context, options);
165
- if (slug) {
166
- return fileId ? `${date}-${slug}-${fileId}` : `${date}-${slug}`;
167
- }
168
- }
169
-
170
- // Fallback to input filename prefix (no need for fileId suffix, it's already the slug)
171
- const slug = generateFallbackSlug(inputPath);
172
- return `${date}-${slug}`;
56
+ ): string {
57
+ const dateTime = extractDateTime(transcript);
58
+ const sessionId = extractSessionId(inputPath);
59
+ return `${dateTime}-${sessionId}`;
173
60
  }
@@ -0,0 +1,116 @@
1
+ /**
2
+ * OpenRouter API client for LLM-based title generation.
3
+ *
4
+ * Uses Gemini 2.5 Flash for fast, cheap title generation.
5
+ * Gracefully handles missing API key or API failures.
6
+ */
7
+
8
+ const OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions";
9
+ const MODEL = "google/gemini-2.5-flash";
10
+
11
+ // Approximate token limit for context (conservative estimate)
12
+ // Gemini Flash has 1M context, but we don't need anywhere near that
13
+ const MAX_CHARS = 32000; // ~8k tokens
14
+
15
+ /**
16
+ * Truncate content with middle-cut strategy.
17
+ * Keeps beginning and end, removes middle if over limit.
18
+ */
19
+ function truncateMiddle(content: string, maxChars: number): string {
20
+ if (content.length <= maxChars) return content;
21
+
22
+ const halfLimit = Math.floor(maxChars / 2);
23
+ const start = content.slice(0, halfLimit);
24
+ const end = content.slice(-halfLimit);
25
+
26
+ return `${start}\n\n[... middle truncated ...]\n\n${end}`;
27
+ }
28
+
29
+ interface OpenRouterResponse {
30
+ choices?: Array<{
31
+ message?: {
32
+ content?: string;
33
+ };
34
+ }>;
35
+ error?: {
36
+ message?: string;
37
+ };
38
+ }
39
+
40
+ /**
41
+ * Generate a title for a transcript using OpenRouter.
42
+ *
43
+ * @param markdownContent - The full markdown transcript
44
+ * @returns Generated title, or undefined if generation fails/skipped
45
+ */
46
+ export async function generateTitle(
47
+ markdownContent: string,
48
+ ): Promise<string | undefined> {
49
+ const apiKey = process.env.OPENROUTER_API_KEY;
50
+
51
+ if (!apiKey) {
52
+ // Silently skip - no API key means user doesn't want title generation
53
+ return undefined;
54
+ }
55
+
56
+ const truncated = truncateMiddle(markdownContent, MAX_CHARS);
57
+
58
+ const prompt = `Generate a concise title (5-10 words) for this AI coding session transcript. The title should capture the main task or topic discussed.
59
+
60
+ Reply with just the title, no quotes, no punctuation at the end, no explanation.
61
+
62
+ Transcript:
63
+ ${truncated}`;
64
+
65
+ try {
66
+ const response = await fetch(OPENROUTER_API_URL, {
67
+ method: "POST",
68
+ headers: {
69
+ Authorization: `Bearer ${apiKey}`,
70
+ "Content-Type": "application/json",
71
+ "HTTP-Referer": "https://github.com/arcreflex/agent-transcripts",
72
+ "X-Title": "agent-transcripts",
73
+ },
74
+ body: JSON.stringify({
75
+ model: MODEL,
76
+ messages: [{ role: "user", content: prompt }],
77
+ max_tokens: 50,
78
+ temperature: 0.3,
79
+ }),
80
+ });
81
+
82
+ if (!response.ok) {
83
+ const text = await response.text();
84
+ console.error(
85
+ `Warning: OpenRouter API error (${response.status}): ${text.slice(0, 200)}`,
86
+ );
87
+ return undefined;
88
+ }
89
+
90
+ const data = (await response.json()) as OpenRouterResponse;
91
+
92
+ if (data.error) {
93
+ console.error(
94
+ `Warning: OpenRouter error: ${data.error.message || "Unknown error"}`,
95
+ );
96
+ return undefined;
97
+ }
98
+
99
+ const title = data.choices?.[0]?.message?.content?.trim();
100
+
101
+ if (!title) {
102
+ console.error("Warning: OpenRouter returned empty title");
103
+ return undefined;
104
+ }
105
+
106
+ // Clean up: remove quotes if present, trim trailing punctuation
107
+ return title
108
+ .replace(/^["']|["']$/g, "")
109
+ .replace(/[.!?]+$/, "")
110
+ .trim();
111
+ } catch (err) {
112
+ const message = err instanceof Error ? err.message : String(err);
113
+ console.error(`Warning: OpenRouter request failed: ${message}`);
114
+ return undefined;
115
+ }
116
+ }
@@ -2,113 +2,211 @@
2
2
  * Provenance tracking utilities.
3
3
  *
4
4
  * Tracks the relationship between source files and output transcripts
5
- * via YAML front matter, enabling update-in-place behavior.
5
+ * via transcripts.json index (primary) and YAML front matter (for self-documenting files).
6
6
  */
7
7
 
8
- import { Glob } from "bun";
9
- import { join } from "path";
10
- import { stat, unlink } from "fs/promises";
8
+ import { join, resolve } from "path";
9
+ import { rename, unlink } from "fs/promises";
10
+
11
+ const INDEX_FILENAME = "transcripts.json";
12
+
13
+ // ============================================================================
14
+ // Index Types
15
+ // ============================================================================
16
+
17
+ export interface TranscriptEntry {
18
+ source: string; // absolute path to source
19
+ sessionId: string; // full session ID from source filename
20
+ segmentIndex?: number; // for multi-transcript sources (1-indexed)
21
+ syncedAt: string; // ISO timestamp
22
+ firstUserMessage: string; // first user message content (for display)
23
+ title?: string; // copied from cache for index.html convenience
24
+ messageCount: number;
25
+ startTime: string; // ISO timestamp
26
+ endTime: string; // ISO timestamp
27
+ cwd?: string;
28
+ }
29
+
30
+ export interface TranscriptsIndex {
31
+ version: 1;
32
+ entries: Record<string, TranscriptEntry>; // outputFilename → entry
33
+ }
34
+
35
+ // ============================================================================
36
+ // Path Utilities
37
+ // ============================================================================
38
+
39
+ /**
40
+ * Normalize a source path to absolute for consistent index keys.
41
+ */
42
+ export function normalizeSourcePath(sourcePath: string): string {
43
+ if (sourcePath === "<stdin>") return sourcePath;
44
+ return resolve(sourcePath);
45
+ }
46
+
47
+ // ============================================================================
48
+ // Index I/O
49
+ // ============================================================================
11
50
 
12
51
  /**
13
- * Extract source path from YAML front matter.
14
- * Returns null if no front matter or no source field.
52
+ * Load transcripts.json index from output directory.
53
+ * Returns empty index if file doesn't exist. Warns on corrupt file.
15
54
  */
16
- export function extractSourceFromFrontMatter(content: string): string | null {
17
- // Match YAML front matter at start of file
18
- const match = content.match(/^---\n([\s\S]*?)\n---/);
19
- if (!match) return null;
20
-
21
- // Extract source field (simple line-based parsing)
22
- const frontMatter = match[1];
23
- const sourceLine = frontMatter
24
- .split("\n")
25
- .find((line) => line.startsWith("source:"));
26
- if (!sourceLine) return null;
27
-
28
- return sourceLine.replace(/^source:\s*/, "").trim();
55
+ export async function loadIndex(outputDir: string): Promise<TranscriptsIndex> {
56
+ const indexPath = join(outputDir, INDEX_FILENAME);
57
+ try {
58
+ const content = await Bun.file(indexPath).text();
59
+ const data = JSON.parse(content) as TranscriptsIndex;
60
+ // Validate version
61
+ if (data.version !== 1) {
62
+ console.error(
63
+ `Warning: Unknown index version ${data.version}, creating fresh index`,
64
+ );
65
+ return { version: 1, entries: {} };
66
+ }
67
+ return data;
68
+ } catch (err) {
69
+ // Distinguish between missing file (expected) and corrupt file (unexpected)
70
+ const isEnoent =
71
+ err instanceof Error && (err as NodeJS.ErrnoException).code === "ENOENT";
72
+ if (!isEnoent) {
73
+ console.error(
74
+ `Warning: Could not parse index file, starting fresh: ${err instanceof Error ? err.message : String(err)}`,
75
+ );
76
+ }
77
+ return { version: 1, entries: {} };
78
+ }
29
79
  }
30
80
 
31
81
  /**
32
- * Scan output directory for existing transcripts.
33
- * Returns map from absolute source path all output file paths for that source.
82
+ * Save transcripts.json index to output directory.
83
+ * Uses atomic write (write to .tmp, then rename) to prevent corruption.
34
84
  */
35
- export async function scanOutputDirectory(
85
+ export async function saveIndex(
36
86
  outputDir: string,
37
- ): Promise<Map<string, string[]>> {
38
- const sourceToOutputs = new Map<string, string[]>();
39
- const glob = new Glob("**/*.md");
87
+ index: TranscriptsIndex,
88
+ ): Promise<void> {
89
+ const indexPath = join(outputDir, INDEX_FILENAME);
90
+ const tmpPath = `${indexPath}.tmp`;
40
91
 
41
- for await (const file of glob.scan({ cwd: outputDir, absolute: false })) {
42
- const fullPath = join(outputDir, file);
92
+ const content = JSON.stringify(index, null, 2) + "\n";
93
+ await Bun.write(tmpPath, content);
94
+ try {
95
+ await rename(tmpPath, indexPath);
96
+ } catch (err) {
97
+ // Clean up temp file on failure
43
98
  try {
44
- const content = await Bun.file(fullPath).text();
45
- const sourcePath = extractSourceFromFrontMatter(content);
46
- if (sourcePath) {
47
- const existing = sourceToOutputs.get(sourcePath) || [];
48
- existing.push(fullPath);
49
- sourceToOutputs.set(sourcePath, existing);
50
- }
99
+ await unlink(tmpPath);
51
100
  } catch {
52
- // Skip files we can't read
101
+ // Ignore cleanup errors
102
+ }
103
+ throw err;
104
+ }
105
+ }
106
+
107
+ // ============================================================================
108
+ // Index Operations
109
+ // ============================================================================
110
+
111
+ /**
112
+ * Get all output filenames for a given source path.
113
+ */
114
+ export function getOutputsForSource(
115
+ index: TranscriptsIndex,
116
+ sourcePath: string,
117
+ ): string[] {
118
+ const outputs: string[] = [];
119
+ for (const [filename, entry] of Object.entries(index.entries)) {
120
+ if (entry.source === sourcePath) {
121
+ outputs.push(filename);
53
122
  }
54
123
  }
124
+ return outputs;
125
+ }
55
126
 
56
- return sourceToOutputs;
127
+ /**
128
+ * Set or update an entry in the index.
129
+ * outputPath should be relative to the output directory.
130
+ */
131
+ export function setEntry(
132
+ index: TranscriptsIndex,
133
+ outputPath: string,
134
+ entry: TranscriptEntry,
135
+ ): void {
136
+ index.entries[outputPath] = entry;
57
137
  }
58
138
 
59
139
  /**
60
- * Find existing outputs for a specific source path.
140
+ * Remove all entries for a given source path.
141
+ * Returns the removed entries (for potential restoration on error).
61
142
  */
62
- export async function findExistingOutputs(
63
- outputDir: string,
143
+ export function removeEntriesForSource(
144
+ index: TranscriptsIndex,
64
145
  sourcePath: string,
65
- ): Promise<string[]> {
66
- const allOutputs = await scanOutputDirectory(outputDir);
67
- return allOutputs.get(sourcePath) || [];
146
+ ): Array<{ filename: string; entry: TranscriptEntry }> {
147
+ const removed: Array<{ filename: string; entry: TranscriptEntry }> = [];
148
+ for (const [filename, entry] of Object.entries(index.entries)) {
149
+ if (entry.source === sourcePath) {
150
+ removed.push({ filename, entry });
151
+ delete index.entries[filename];
152
+ }
153
+ }
154
+ return removed;
155
+ }
156
+
157
+ /**
158
+ * Restore previously removed entries to the index.
159
+ */
160
+ export function restoreEntries(
161
+ index: TranscriptsIndex,
162
+ entries: Array<{ filename: string; entry: TranscriptEntry }>,
163
+ ): void {
164
+ for (const { filename, entry } of entries) {
165
+ index.entries[filename] = entry;
166
+ }
68
167
  }
69
168
 
169
+ // ============================================================================
170
+ // File Operations
171
+ // ============================================================================
172
+
70
173
  /**
71
- * Delete existing output files, with warnings on failure.
174
+ * Delete output files, with warnings on failure.
72
175
  */
73
- export async function deleteExistingOutputs(
74
- paths: string[],
176
+ export async function deleteOutputFiles(
177
+ outputDir: string,
178
+ filenames: string[],
75
179
  quiet = false,
76
180
  ): Promise<void> {
77
- for (const oldPath of paths) {
181
+ for (const filename of filenames) {
182
+ const fullPath = join(outputDir, filename);
78
183
  try {
79
- await unlink(oldPath);
184
+ await unlink(fullPath);
80
185
  if (!quiet) {
81
- console.error(`Deleted: ${oldPath}`);
186
+ console.error(`Deleted: ${fullPath}`);
82
187
  }
83
188
  } catch (err) {
84
- // Warn but continue - file may already be gone or have permission issues
85
189
  const msg = err instanceof Error ? err.message : String(err);
86
- console.error(`Warning: could not delete ${oldPath}: ${msg}`);
190
+ console.error(`Warning: could not delete ${fullPath}: ${msg}`);
87
191
  }
88
192
  }
89
193
  }
90
194
 
195
+ // ============================================================================
196
+ // Transcript Metadata Extraction
197
+ // ============================================================================
198
+
199
+ import type { Transcript } from "../types.ts";
200
+
91
201
  /**
92
- * Check if any outputs are stale relative to source mtime.
202
+ * Extract the first user message from a transcript.
203
+ * Returns empty string if no user message found.
93
204
  */
94
- export async function hasStaleOutputs(
95
- existingOutputs: string[],
96
- expectedCount: number,
97
- sourceMtime: number,
98
- ): Promise<boolean> {
99
- if (existingOutputs.length !== expectedCount) return true;
100
-
101
- for (const outputPath of existingOutputs) {
102
- try {
103
- const outputStat = await stat(outputPath);
104
- if (outputStat.mtime.getTime() < sourceMtime) {
105
- return true;
106
- }
107
- } catch {
108
- // Output doesn't exist
109
- return true;
205
+ export function extractFirstUserMessage(transcript: Transcript): string {
206
+ for (const msg of transcript.messages) {
207
+ if (msg.type === "user") {
208
+ return msg.content;
110
209
  }
111
210
  }
112
-
113
- return false;
211
+ return "";
114
212
  }