@arcreflex/agent-transcripts 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/render.ts CHANGED
@@ -1,47 +1,9 @@
1
1
  /**
2
- * Render command: intermediate JSON → markdown
2
+ * Render: intermediate transcript format → markdown
3
3
  */
4
4
 
5
- import { basename, dirname, join } from "path";
6
- import { mkdir } from "fs/promises";
7
5
  import type { Transcript, Message, ToolCall } from "./types.ts";
8
6
 
9
- export interface RenderOptions {
10
- input: string; // file path, or "-" for stdin
11
- output?: string; // output path
12
- head?: string; // render branch ending at this message ID
13
- }
14
-
15
- /**
16
- * Read transcript from file or stdin.
17
- */
18
- async function readTranscript(
19
- input: string,
20
- ): Promise<{ transcript: Transcript; path: string }> {
21
- let content: string;
22
- let path: string;
23
-
24
- if (input !== "-") {
25
- content = await Bun.file(input).text();
26
- path = input;
27
- } else {
28
- const chunks: string[] = [];
29
- const reader = Bun.stdin.stream().getReader();
30
-
31
- while (true) {
32
- const { done, value } = await reader.read();
33
- if (done) break;
34
- chunks.push(new TextDecoder().decode(value));
35
- }
36
-
37
- content = chunks.join("");
38
- path = "<stdin>";
39
- }
40
-
41
- const transcript = JSON.parse(content) as Transcript;
42
- return { transcript, path };
43
- }
44
-
45
7
  /**
46
8
  * Format a single tool call.
47
9
  */
@@ -333,48 +295,3 @@ export function renderTranscript(
333
295
 
334
296
  return lines.join("\n");
335
297
  }
336
-
337
- /**
338
- * Determine output path for markdown.
339
- */
340
- function getOutputPath(inputPath: string, outputOption?: string): string {
341
- if (outputOption) {
342
- // If it has an extension, use as-is
343
- if (outputOption.match(/\.\w+$/)) {
344
- return outputOption;
345
- }
346
- // Treat as directory
347
- const base =
348
- inputPath === "<stdin>"
349
- ? "transcript"
350
- : basename(inputPath).replace(/\.json$/, "");
351
- return join(outputOption, `${base}.md`);
352
- }
353
-
354
- // Default: same name in cwd
355
- const base =
356
- inputPath === "<stdin>"
357
- ? "transcript"
358
- : basename(inputPath).replace(/\.json$/, "");
359
- return join(process.cwd(), `${base}.md`);
360
- }
361
-
362
- /**
363
- * Render intermediate JSON to markdown.
364
- */
365
- export async function render(options: RenderOptions): Promise<void> {
366
- const { transcript, path: inputPath } = await readTranscript(options.input);
367
-
368
- const markdown = renderTranscript(transcript, options.head);
369
-
370
- if (options.output) {
371
- const outputPath = getOutputPath(inputPath, options.output);
372
- // Ensure directory exists
373
- await mkdir(dirname(outputPath), { recursive: true });
374
- await Bun.write(outputPath, markdown);
375
- console.error(`Wrote: ${outputPath}`);
376
- } else {
377
- // Default: print to stdout
378
- console.log(markdown);
379
- }
380
- }
package/src/sync.ts CHANGED
@@ -3,21 +3,24 @@
3
3
  *
4
4
  * Discovers session files in source directory, parses them,
5
5
  * and writes rendered markdown to output directory.
6
- * Uses LLM-generated descriptive names when API key is available.
7
- * Tracks provenance via YAML front matter to correlate updates.
6
+ * Tracks provenance via transcripts.json index.
8
7
  */
9
8
 
10
- import { Glob } from "bun";
11
9
  import { dirname, join } from "path";
12
- import { mkdir, stat } from "fs/promises";
10
+ import { mkdir } from "fs/promises";
13
11
  import { getAdapters } from "./adapters/index.ts";
14
- import type { Adapter } from "./types.ts";
12
+ import type { Adapter, DiscoveredSession } from "./types.ts";
15
13
  import { renderTranscript } from "./render.ts";
16
- import { generateOutputName, type NamingOptions } from "./utils/naming.ts";
14
+ import { generateOutputName, extractSessionId } from "./utils/naming.ts";
17
15
  import {
18
- scanOutputDirectory,
19
- deleteExistingOutputs,
20
- hasStaleOutputs,
16
+ loadIndex,
17
+ saveIndex,
18
+ isStale,
19
+ setEntry,
20
+ removeEntriesForSource,
21
+ restoreEntries,
22
+ deleteOutputFiles,
23
+ normalizeSourcePath,
21
24
  } from "./utils/provenance.ts";
22
25
 
23
26
  export interface SyncOptions {
@@ -25,7 +28,6 @@ export interface SyncOptions {
25
28
  output: string;
26
29
  force?: boolean;
27
30
  quiet?: boolean;
28
- naming?: NamingOptions;
29
31
  }
30
32
 
31
33
  export interface SyncResult {
@@ -34,70 +36,36 @@ export interface SyncResult {
34
36
  errors: number;
35
37
  }
36
38
 
37
- interface SessionFile {
38
- path: string;
39
- relativePath: string;
40
- mtime: number;
39
+ interface SessionFile extends DiscoveredSession {
41
40
  adapter: Adapter;
42
41
  }
43
42
 
44
- /**
45
- * Discover session files for a specific adapter.
46
- */
47
- async function discoverForAdapter(
48
- source: string,
49
- adapter: Adapter,
50
- ): Promise<SessionFile[]> {
51
- const sessions: SessionFile[] = [];
52
-
53
- for (const pattern of adapter.filePatterns) {
54
- const glob = new Glob(`**/${pattern}`);
55
-
56
- for await (const file of glob.scan({ cwd: source, absolute: false })) {
57
- const fullPath = join(source, file);
58
-
59
- try {
60
- const fileStat = await stat(fullPath);
61
- sessions.push({
62
- path: fullPath,
63
- relativePath: file,
64
- mtime: fileStat.mtime.getTime(),
65
- adapter,
66
- });
67
- } catch {
68
- // Skip files we can't stat
69
- }
70
- }
71
- }
72
-
73
- return sessions;
74
- }
75
-
76
43
  /**
77
44
  * Sync session files from source to output directory.
78
45
  */
79
46
  export async function sync(options: SyncOptions): Promise<SyncResult> {
80
- const { source, output, force = false, quiet = false, naming } = options;
47
+ const { source, output, force = false, quiet = false } = options;
81
48
 
82
49
  const result: SyncResult = { synced: 0, skipped: 0, errors: 0 };
83
50
 
84
- // Scan output directory for existing transcripts (source → output paths)
85
- const existingOutputs = await scanOutputDirectory(output);
86
- if (!quiet && existingOutputs.size > 0) {
87
- const totalFiles = [...existingOutputs.values()].reduce(
88
- (sum, paths) => sum + paths.length,
89
- 0,
90
- );
51
+ // Ensure output directory exists
52
+ await mkdir(output, { recursive: true });
53
+
54
+ // Load index
55
+ const index = await loadIndex(output);
56
+ if (!quiet && Object.keys(index.entries).length > 0) {
91
57
  console.error(
92
- `Found ${totalFiles} existing transcript(s) from ${existingOutputs.size} source(s)`,
58
+ `Found ${Object.keys(index.entries).length} existing transcript(s) in index`,
93
59
  );
94
60
  }
95
61
 
96
- // Discover sessions for each adapter
62
+ // Discover sessions from all adapters
97
63
  const sessions: SessionFile[] = [];
98
64
  for (const adapter of getAdapters()) {
99
- const adapterSessions = await discoverForAdapter(source, adapter);
100
- sessions.push(...adapterSessions);
65
+ const discovered = await adapter.discover(source);
66
+ for (const session of discovered) {
67
+ sessions.push({ ...session, adapter });
68
+ }
101
69
  }
102
70
 
103
71
  if (!quiet) {
@@ -106,22 +74,19 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
106
74
 
107
75
  // Process each session
108
76
  for (const session of sessions) {
77
+ // Normalize source path for consistent index keys
78
+ const sourcePath = normalizeSourcePath(session.path);
79
+
109
80
  try {
110
81
  // Read and parse using the adapter that discovered this file
111
82
  const content = await Bun.file(session.path).text();
112
83
  const transcripts = session.adapter.parse(content, session.path);
113
84
 
114
- // Get all existing outputs for this source
115
- const existingPaths = existingOutputs.get(session.path) || [];
116
-
117
- // Check if sync needed (force, count mismatch, or any stale)
85
+ // Check if sync needed (force or stale)
118
86
  const needsUpdate =
119
87
  force ||
120
- (await hasStaleOutputs(
121
- existingPaths,
122
- transcripts.length,
123
- session.mtime,
124
- ));
88
+ isStale(index, sourcePath, session.mtime, transcripts.length, output);
89
+
125
90
  if (!needsUpdate) {
126
91
  if (!quiet) {
127
92
  console.error(`Skip (up to date): ${session.relativePath}`);
@@ -130,39 +95,71 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
130
95
  continue;
131
96
  }
132
97
 
133
- // Delete existing outputs before regenerating
134
- await deleteExistingOutputs(existingPaths, quiet);
135
-
136
- // Generate fresh outputs for all transcripts
137
- for (let i = 0; i < transcripts.length; i++) {
138
- const transcript = transcripts[i];
139
- const suffix = transcripts.length > 1 ? `_${i + 1}` : undefined;
140
-
141
- // Generate descriptive name, preserving directory structure
142
- const baseName = await generateOutputName(
143
- transcript,
144
- session.path,
145
- naming || {},
146
- );
147
- const finalName = suffix ? `${baseName}${suffix}` : baseName;
148
- const relativeDir = dirname(session.relativePath);
149
- const outputPath = join(output, relativeDir, `${finalName}.md`);
150
-
151
- // Ensure output directory exists
152
- await mkdir(dirname(outputPath), { recursive: true });
153
-
154
- // Render with provenance front matter and write
155
- const markdown = renderTranscript(transcript, {
156
- sourcePath: session.path,
157
- });
158
- await Bun.write(outputPath, markdown);
98
+ // Remove entries from index (save for potential restoration on error)
99
+ const removedEntries = removeEntriesForSource(index, sourcePath);
159
100
 
160
- if (!quiet) {
161
- console.error(`Synced: ${outputPath}`);
101
+ // Track new outputs for this session
102
+ const newOutputs: string[] = [];
103
+ const sessionId = extractSessionId(session.path);
104
+
105
+ try {
106
+ // Generate fresh outputs for all transcripts
107
+ for (let i = 0; i < transcripts.length; i++) {
108
+ const transcript = transcripts[i];
109
+ const segmentIndex = transcripts.length > 1 ? i + 1 : undefined;
110
+
111
+ // Generate deterministic name
112
+ const baseName = generateOutputName(transcript, session.path);
113
+ const suffix = segmentIndex ? `_${segmentIndex}` : "";
114
+ const relativeDir = dirname(session.relativePath);
115
+ const relativePath =
116
+ relativeDir === "."
117
+ ? `${baseName}${suffix}.md`
118
+ : join(relativeDir, `${baseName}${suffix}.md`);
119
+ const outputPath = join(output, relativePath);
120
+
121
+ // Ensure output directory exists
122
+ await mkdir(dirname(outputPath), { recursive: true });
123
+
124
+ // Render with provenance front matter and write
125
+ const markdown = renderTranscript(transcript, {
126
+ sourcePath,
127
+ });
128
+ await Bun.write(outputPath, markdown);
129
+ newOutputs.push(relativePath);
130
+
131
+ // Update index
132
+ setEntry(index, relativePath, {
133
+ source: sourcePath,
134
+ sourceMtime: session.mtime,
135
+ sessionId,
136
+ segmentIndex,
137
+ syncedAt: new Date().toISOString(),
138
+ });
139
+
140
+ if (!quiet) {
141
+ console.error(`Synced: ${outputPath}`);
142
+ }
143
+ }
144
+
145
+ // Success: delete old output files (after new ones are written)
146
+ const oldFilenames = removedEntries.map((e) => e.filename);
147
+ // Only delete files that aren't being reused
148
+ const toDelete = oldFilenames.filter((f) => !newOutputs.includes(f));
149
+ if (toDelete.length > 0) {
150
+ await deleteOutputFiles(output, toDelete, quiet);
162
151
  }
163
- }
164
152
 
165
- result.synced++;
153
+ result.synced++;
154
+ } catch (error) {
155
+ // Clean up any newly written files before restoring old entries
156
+ if (newOutputs.length > 0) {
157
+ await deleteOutputFiles(output, newOutputs, quiet);
158
+ }
159
+ // Restore old entries on error to preserve provenance
160
+ restoreEntries(index, removedEntries);
161
+ throw error;
162
+ }
166
163
  } catch (error) {
167
164
  const message = error instanceof Error ? error.message : String(error);
168
165
  console.error(`Error: ${session.relativePath}: ${message}`);
@@ -170,6 +167,9 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
170
167
  }
171
168
  }
172
169
 
170
+ // Save index
171
+ await saveIndex(output, index);
172
+
173
173
  // Summary
174
174
  if (!quiet) {
175
175
  console.error(
package/src/types.ts CHANGED
@@ -65,13 +65,22 @@ export interface ErrorMessage extends BaseMessage {
65
65
  content: string;
66
66
  }
67
67
 
68
+ /**
69
+ * A session file discovered by an adapter.
70
+ */
71
+ export interface DiscoveredSession {
72
+ path: string;
73
+ relativePath: string;
74
+ mtime: number;
75
+ }
76
+
68
77
  /**
69
78
  * Adapter interface - each source format implements this.
70
79
  */
71
80
  export interface Adapter {
72
81
  name: string;
73
- /** Glob patterns for discovering session files (e.g., ["*.jsonl"]) */
74
- filePatterns: string[];
82
+ /** Discover session files in the given directory */
83
+ discover(source: string): Promise<DiscoveredSession[]>;
75
84
  /** Parse source content into one or more transcripts (split by conversation) */
76
85
  parse(content: string, sourcePath: string): Transcript[];
77
86
  }
@@ -1,173 +1,60 @@
1
1
  /**
2
2
  * Output file naming utilities.
3
3
  *
4
- * Generates descriptive filenames for transcripts:
5
- * - With OpenRouter API key: yyyy-mm-dd-{llm-generated-slug}.{ext}
6
- * - Without: yyyy-mm-dd-{input-filename-prefix}.{ext}
4
+ * Generates deterministic filenames: {datetime}-{sessionId}.md
7
5
  */
8
6
 
9
- import type { Transcript, UserMessage } from "../types.ts";
7
+ import type { Transcript } from "../types.ts";
10
8
  import { basename } from "path";
11
9
 
12
- export interface NamingOptions {
13
- apiKey?: string; // OpenRouter API key
14
- model?: string; // Default: google/gemini-2.0-flash-001
15
- }
16
-
17
- const DEFAULT_MODEL = "google/gemini-2.0-flash-001";
18
- const SLUG_MAX_LENGTH = 40;
19
-
20
10
  /**
21
- * Extract date from transcript's first message timestamp.
11
+ * Extract date and time from transcript's first message timestamp.
12
+ * Returns format: yyyy-mm-dd-hhmm (24-hour, local time)
22
13
  */
23
- function extractDate(transcript: Transcript): string {
14
+ function extractDateTime(transcript: Transcript): string {
24
15
  const firstMessage = transcript.messages[0];
25
- if (firstMessage?.timestamp) {
26
- const date = new Date(firstMessage.timestamp);
27
- if (!isNaN(date.getTime())) {
28
- return date.toISOString().slice(0, 10); // yyyy-mm-dd
29
- }
30
- }
31
- // Fallback to current date
32
- return new Date().toISOString().slice(0, 10);
33
- }
34
-
35
- /**
36
- * Extract context from transcript for LLM summarization.
37
- * Uses first few user messages, truncated.
38
- */
39
- function extractContext(transcript: Transcript): string {
40
- const userMessages = transcript.messages.filter(
41
- (m): m is UserMessage => m.type === "user",
42
- );
16
+ const date = firstMessage?.timestamp
17
+ ? new Date(firstMessage.timestamp)
18
+ : new Date();
43
19
 
44
- const chunks: string[] = [];
45
- let totalLength = 0;
46
- const maxLength = 500;
47
-
48
- for (const msg of userMessages.slice(0, 3)) {
49
- const content = msg.content.slice(0, 200);
50
- if (totalLength + content.length > maxLength) break;
51
- chunks.push(content);
52
- totalLength += content.length;
20
+ if (isNaN(date.getTime())) {
21
+ return formatDateTime(new Date());
53
22
  }
54
-
55
- return chunks.join("\n\n");
23
+ return formatDateTime(date);
56
24
  }
57
25
 
58
- /**
59
- * Sanitize a string into a valid URL slug.
60
- */
61
- function sanitizeSlug(input: string): string {
62
- return input
63
- .toLowerCase()
64
- .replace(/[^a-z0-9\s-]/g, "") // remove special chars
65
- .replace(/\s+/g, "-") // spaces to hyphens
66
- .replace(/-+/g, "-") // collapse multiple hyphens
67
- .replace(/^-|-$/g, "") // trim leading/trailing hyphens
68
- .slice(0, SLUG_MAX_LENGTH);
26
+ function formatDateTime(date: Date): string {
27
+ const year = date.getFullYear();
28
+ const month = String(date.getMonth() + 1).padStart(2, "0");
29
+ const day = String(date.getDate()).padStart(2, "0");
30
+ const hours = String(date.getHours()).padStart(2, "0");
31
+ const minutes = String(date.getMinutes()).padStart(2, "0");
32
+ return `${year}-${month}-${day}-${hours}${minutes}`;
69
33
  }
70
34
 
71
35
  /**
72
- * Generate slug via OpenRouter API.
36
+ * Extract session ID from the input filename.
37
+ * Returns the full session ID (filename without extension) for traceability.
73
38
  */
74
- async function generateSlugViaLLM(
75
- context: string,
76
- options: NamingOptions,
77
- ): Promise<string | null> {
78
- const { apiKey, model = DEFAULT_MODEL } = options;
79
- if (!apiKey || !context.trim()) return null;
80
-
81
- try {
82
- const response = await fetch(
83
- "https://openrouter.ai/api/v1/chat/completions",
84
- {
85
- method: "POST",
86
- headers: {
87
- Authorization: `Bearer ${apiKey}`,
88
- "Content-Type": "application/json",
89
- },
90
- body: JSON.stringify({
91
- model,
92
- messages: [
93
- {
94
- role: "user",
95
- content: `Generate a 2-4 word URL slug (lowercase, hyphenated) summarizing this conversation topic. Reply with ONLY the slug, nothing else.\n\n${context}`,
96
- },
97
- ],
98
- max_tokens: 20,
99
- }),
100
- },
101
- );
102
-
103
- if (!response.ok) {
104
- console.error(
105
- `OpenRouter API error: ${response.status} ${response.statusText}`,
106
- );
107
- return null;
108
- }
109
-
110
- const data = (await response.json()) as {
111
- choices?: Array<{ message?: { content?: string } }>;
112
- };
113
- const content = data.choices?.[0]?.message?.content?.trim();
114
-
115
- if (!content) return null;
116
-
117
- const slug = sanitizeSlug(content);
118
- return slug || null;
119
- } catch (error) {
120
- console.error(
121
- `OpenRouter API call failed: ${error instanceof Error ? error.message : error}`,
122
- );
123
- return null;
124
- }
125
- }
126
-
127
- /**
128
- * Generate fallback slug from input filename.
129
- */
130
- function generateFallbackSlug(inputPath: string): string {
131
- return extractFileId(inputPath, 8) || "transcript";
132
- }
133
-
134
- /**
135
- * Extract a short identifier from the input filename.
136
- * Used as a suffix for traceability back to source.
137
- */
138
- function extractFileId(inputPath: string, length = 6): string {
39
+ export function extractSessionId(inputPath: string): string {
139
40
  if (inputPath === "<stdin>") {
140
- return "";
41
+ return "stdin";
141
42
  }
142
43
 
143
44
  const name = basename(inputPath);
144
- const base = name.replace(/\.jsonl?$/, "");
145
- // Take first N chars, sanitize, and clean up any trailing hyphens
146
- return sanitizeSlug(base.slice(0, length)).replace(/-+$/, "");
45
+ // Remove .jsonl or .json extension
46
+ return name.replace(/\.jsonl?$/, "");
147
47
  }
148
48
 
149
49
  /**
150
50
  * Generate output base name for a transcript.
151
- * Returns string like "2024-01-15-implement-auth-flow-abc123"
51
+ * Returns format: "2024-01-15-1423-{sessionId}"
152
52
  */
153
- export async function generateOutputName(
53
+ export function generateOutputName(
154
54
  transcript: Transcript,
155
55
  inputPath: string,
156
- options: NamingOptions = {},
157
- ): Promise<string> {
158
- const date = extractDate(transcript);
159
- const fileId = extractFileId(inputPath);
160
-
161
- // Try LLM-generated slug if API key available
162
- if (options.apiKey) {
163
- const context = extractContext(transcript);
164
- const slug = await generateSlugViaLLM(context, options);
165
- if (slug) {
166
- return fileId ? `${date}-${slug}-${fileId}` : `${date}-${slug}`;
167
- }
168
- }
169
-
170
- // Fallback to input filename prefix (no need for fileId suffix, it's already the slug)
171
- const slug = generateFallbackSlug(inputPath);
172
- return `${date}-${slug}`;
56
+ ): string {
57
+ const dateTime = extractDateTime(transcript);
58
+ const sessionId = extractSessionId(inputPath);
59
+ return `${dateTime}-${sessionId}`;
173
60
  }