npm - @arcreflex/agent-transcripts - Versions diffs - 0.1.4 → 0.1.8 - Mend

@arcreflex/agent-transcripts 0.1.4 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/.github/workflows/publish.yml +5 -0
package/CLAUDE.md +10 -0
package/README.md +44 -5
package/package.json +1 -1
package/src/adapters/claude-code.ts +97 -1
package/src/cli.ts +45 -75
package/src/convert.ts +126 -0
package/src/parse.ts +12 -106
package/src/render.ts +20 -85
package/src/sync.ts +109 -103
package/src/types.ts +11 -2
package/src/utils/naming.ts +30 -143
package/src/utils/provenance.ts +228 -0
package/test/snapshots.test.ts +39 -33

package/src/render.ts CHANGED Viewed

@@ -1,47 +1,9 @@
 /**
- * Render command: intermediate JSON → markdown
+ * Render: intermediate transcript format → markdown
  */
-import { basename, dirname, join } from "path";
-import { mkdir } from "fs/promises";
 import type { Transcript, Message, ToolCall } from "./types.ts";
-export interface RenderOptions {
-  input?: string; // file path, undefined for stdin
-  output?: string; // output path
-  head?: string; // render branch ending at this message ID
-}
-/**
- * Read transcript from file or stdin.
- */
-async function readTranscript(
-  input?: string,
-): Promise<{ transcript: Transcript; path: string }> {
-  let content: string;
-  let path: string;
-  if (input) {
-    content = await Bun.file(input).text();
-    path = input;
-  } else {
-    const chunks: string[] = [];
-    const reader = Bun.stdin.stream().getReader();
-    while (true) {
-      const { done, value } = await reader.read();
-      if (done) break;
-      chunks.push(new TextDecoder().decode(value));
-    }
-    content = chunks.join("");
-    path = "<stdin>";
-  }
-  const transcript = JSON.parse(content) as Transcript;
-  return { transcript, path };
-}
 /**
  * Format a single tool call.
  */
@@ -213,15 +175,33 @@ function tracePath(target: string, parents: Map<string, string>): string[] {
   return path;
 }
+export interface RenderTranscriptOptions {
+  head?: string; // render branch ending at this message ID
+  sourcePath?: string; // absolute source path for front matter provenance
+}
 /**
  * Render transcript to markdown with branch awareness.
  */
 export function renderTranscript(
   transcript: Transcript,
-  head?: string,
+  options: RenderTranscriptOptions | string = {},
 ): string {
+  // Support legacy signature: renderTranscript(transcript, head?: string)
+  const opts: RenderTranscriptOptions =
+    typeof options === "string" ? { head: options } : options;
+  const { head, sourcePath } = opts;
   const lines: string[] = [];
+  // YAML front matter (for provenance tracking)
+  if (sourcePath) {
+    lines.push("---");
+    lines.push(`source: ${sourcePath}`);
+    lines.push("---");
+    lines.push("");
+  }
   // Header
   lines.push("# Transcript");
   lines.push("");
@@ -315,48 +295,3 @@ export function renderTranscript(
   return lines.join("\n");
 }
-/**
- * Determine output path for markdown.
- */
-function getOutputPath(inputPath: string, outputOption?: string): string {
-  if (outputOption) {
-    // If it has an extension, use as-is
-    if (outputOption.match(/\.\w+$/)) {
-      return outputOption;
-    }
-    // Treat as directory
-    const base =
-      inputPath === "<stdin>"
-        ? "transcript"
-        : basename(inputPath).replace(/\.json$/, "");
-    return join(outputOption, `${base}.md`);
-  }
-  // Default: same name in cwd
-  const base =
-    inputPath === "<stdin>"
-      ? "transcript"
-      : basename(inputPath).replace(/\.json$/, "");
-  return join(process.cwd(), `${base}.md`);
-}
-/**
- * Render intermediate JSON to markdown.
- */
-export async function render(options: RenderOptions): Promise<void> {
-  const { transcript, path: inputPath } = await readTranscript(options.input);
-  const markdown = renderTranscript(transcript, options.head);
-  if (options.output) {
-    const outputPath = getOutputPath(inputPath, options.output);
-    // Ensure directory exists
-    await mkdir(dirname(outputPath), { recursive: true });
-    await Bun.write(outputPath, markdown);
-    console.error(`Wrote: ${outputPath}`);
-  } else {
-    // Default: print to stdout
-    console.log(markdown);
-  }
-}

package/src/sync.ts CHANGED Viewed

@@ -3,15 +3,25 @@
  *
  * Discovers session files in source directory, parses them,
  * and writes rendered markdown to output directory.
- * Output structure mirrors source structure with extension changed.
+ * Tracks provenance via transcripts.json index.
  */
-import { Glob } from "bun";
-import { dirname, join, relative } from "path";
-import { mkdir, stat } from "fs/promises";
+import { dirname, join } from "path";
+import { mkdir } from "fs/promises";
 import { getAdapters } from "./adapters/index.ts";
-import type { Adapter } from "./types.ts";
+import type { Adapter, DiscoveredSession } from "./types.ts";
 import { renderTranscript } from "./render.ts";
+import { generateOutputName, extractSessionId } from "./utils/naming.ts";
+import {
+  loadIndex,
+  saveIndex,
+  isStale,
+  setEntry,
+  removeEntriesForSource,
+  restoreEntries,
+  deleteOutputFiles,
+  normalizeSourcePath,
+} from "./utils/provenance.ts";
 export interface SyncOptions {
   source: string;
@@ -26,80 +36,10 @@ export interface SyncResult {
   errors: number;
 }
-interface SessionFile {
-  path: string;
-  relativePath: string;
-  mtime: number;
+interface SessionFile extends DiscoveredSession {
   adapter: Adapter;
 }
-/**
- * Discover session files for a specific adapter.
- */
-async function discoverForAdapter(
-  source: string,
-  adapter: Adapter,
-): Promise<SessionFile[]> {
-  const sessions: SessionFile[] = [];
-  for (const pattern of adapter.filePatterns) {
-    const glob = new Glob(`**/${pattern}`);
-    for await (const file of glob.scan({ cwd: source, absolute: false })) {
-      const fullPath = join(source, file);
-      try {
-        const fileStat = await stat(fullPath);
-        sessions.push({
-          path: fullPath,
-          relativePath: file,
-          mtime: fileStat.mtime.getTime(),
-          adapter,
-        });
-      } catch {
-        // Skip files we can't stat
-      }
-    }
-  }
-  return sessions;
-}
-/**
- * Compute output path for a session file.
- * Mirrors input structure, changing extension to .md.
- */
-function computeOutputPath(
-  relativePath: string,
-  outputDir: string,
-  suffix?: string,
-): string {
-  // Replace extension with .md
-  const mdPath = relativePath.replace(/\.[^.]+$/, ".md");
-  // Add suffix if provided (for multiple transcripts from same file)
-  const finalPath = suffix ? mdPath.replace(/\.md$/, `${suffix}.md`) : mdPath;
-  return join(outputDir, finalPath);
-}
-/**
- * Check if output file needs to be re-rendered based on mtime.
- */
-async function needsSync(
-  outputPath: string,
-  sourceMtime: number,
-  force: boolean,
-): Promise<boolean> {
-  if (force) return true;
-  try {
-    const outputStat = await stat(outputPath);
-    return outputStat.mtime.getTime() < sourceMtime;
-  } catch {
-    // Output doesn't exist, needs sync
-    return true;
-  }
-}
 /**
  * Sync session files from source to output directory.
  */
@@ -108,11 +48,24 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
   const result: SyncResult = { synced: 0, skipped: 0, errors: 0 };
-  // Discover sessions for each adapter
+  // Ensure output directory exists
+  await mkdir(output, { recursive: true });
+  // Load index
+  const index = await loadIndex(output);
+  if (!quiet && Object.keys(index.entries).length > 0) {
+    console.error(
+      `Found ${Object.keys(index.entries).length} existing transcript(s) in index`,
+    );
+  }
+  // Discover sessions from all adapters
   const sessions: SessionFile[] = [];
   for (const adapter of getAdapters()) {
-    const adapterSessions = await discoverForAdapter(source, adapter);
-    sessions.push(...adapterSessions);
+    const discovered = await adapter.discover(source);
+    for (const session of discovered) {
+      sessions.push({ ...session, adapter });
+    }
   }
   if (!quiet) {
@@ -121,41 +74,91 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
   // Process each session
   for (const session of sessions) {
+    // Normalize source path for consistent index keys
+    const sourcePath = normalizeSourcePath(session.path);
     try {
       // Read and parse using the adapter that discovered this file
       const content = await Bun.file(session.path).text();
       const transcripts = session.adapter.parse(content, session.path);
-      // Process each transcript (usually just one per file)
-      for (let i = 0; i < transcripts.length; i++) {
-        const transcript = transcripts[i];
-        const suffix = transcripts.length > 1 ? `_${i + 1}` : undefined;
-        const outputPath = computeOutputPath(
-          session.relativePath,
-          output,
-          suffix,
-        );
-        // Check if sync needed
-        if (!(await needsSync(outputPath, session.mtime, force))) {
-          if (!quiet) {
-            console.error(`Skip (up to date): ${outputPath}`);
-          }
-          result.skipped++;
-          continue;
+      // Check if sync needed (force or stale)
+      const needsUpdate =
+        force ||
+        isStale(index, sourcePath, session.mtime, transcripts.length, output);
+      if (!needsUpdate) {
+        if (!quiet) {
+          console.error(`Skip (up to date): ${session.relativePath}`);
         }
+        result.skipped++;
+        continue;
+      }
-        // Ensure output directory exists
-        await mkdir(dirname(outputPath), { recursive: true });
+      // Remove entries from index (save for potential restoration on error)
+      const removedEntries = removeEntriesForSource(index, sourcePath);
-        // Render and write
-        const markdown = renderTranscript(transcript);
-        await Bun.write(outputPath, markdown);
+      // Track new outputs for this session
+      const newOutputs: string[] = [];
+      const sessionId = extractSessionId(session.path);
-        if (!quiet) {
-          console.error(`Synced: ${outputPath}`);
+      try {
+        // Generate fresh outputs for all transcripts
+        for (let i = 0; i < transcripts.length; i++) {
+          const transcript = transcripts[i];
+          const segmentIndex = transcripts.length > 1 ? i + 1 : undefined;
+          // Generate deterministic name
+          const baseName = generateOutputName(transcript, session.path);
+          const suffix = segmentIndex ? `_${segmentIndex}` : "";
+          const relativeDir = dirname(session.relativePath);
+          const relativePath =
+            relativeDir === "."
+              ? `${baseName}${suffix}.md`
+              : join(relativeDir, `${baseName}${suffix}.md`);
+          const outputPath = join(output, relativePath);
+          // Ensure output directory exists
+          await mkdir(dirname(outputPath), { recursive: true });
+          // Render with provenance front matter and write
+          const markdown = renderTranscript(transcript, {
+            sourcePath,
+          });
+          await Bun.write(outputPath, markdown);
+          newOutputs.push(relativePath);
+          // Update index
+          setEntry(index, relativePath, {
+            source: sourcePath,
+            sourceMtime: session.mtime,
+            sessionId,
+            segmentIndex,
+            syncedAt: new Date().toISOString(),
+          });
+          if (!quiet) {
+            console.error(`Synced: ${outputPath}`);
+          }
         }
+        // Success: delete old output files (after new ones are written)
+        const oldFilenames = removedEntries.map((e) => e.filename);
+        // Only delete files that aren't being reused
+        const toDelete = oldFilenames.filter((f) => !newOutputs.includes(f));
+        if (toDelete.length > 0) {
+          await deleteOutputFiles(output, toDelete, quiet);
+        }
         result.synced++;
+      } catch (error) {
+        // Clean up any newly written files before restoring old entries
+        if (newOutputs.length > 0) {
+          await deleteOutputFiles(output, newOutputs, quiet);
+        }
+        // Restore old entries on error to preserve provenance
+        restoreEntries(index, removedEntries);
+        throw error;
       }
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
@@ -164,6 +167,9 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
     }
   }
+  // Save index
+  await saveIndex(output, index);
   // Summary
   if (!quiet) {
     console.error(

package/src/types.ts CHANGED Viewed

@@ -65,13 +65,22 @@ export interface ErrorMessage extends BaseMessage {
   content: string;
 }
+/**
+ * A session file discovered by an adapter.
+ */
+export interface DiscoveredSession {
+  path: string;
+  relativePath: string;
+  mtime: number;
+}
 /**
  * Adapter interface - each source format implements this.
  */
 export interface Adapter {
   name: string;
-  /** Glob patterns for discovering session files (e.g., ["*.jsonl"]) */
-  filePatterns: string[];
+  /** Discover session files in the given directory */
+  discover(source: string): Promise<DiscoveredSession[]>;
   /** Parse source content into one or more transcripts (split by conversation) */
   parse(content: string, sourcePath: string): Transcript[];
 }

package/src/utils/naming.ts CHANGED Viewed

@@ -1,173 +1,60 @@
 /**
  * Output file naming utilities.
  *
- * Generates descriptive filenames for transcripts:
- * - With OpenRouter API key: yyyy-mm-dd-{llm-generated-slug}.{ext}
- * - Without: yyyy-mm-dd-{input-filename-prefix}.{ext}
+ * Generates deterministic filenames: {datetime}-{sessionId}.md
  */
-import type { Transcript, UserMessage } from "../types.ts";
+import type { Transcript } from "../types.ts";
 import { basename } from "path";
-export interface NamingOptions {
-  apiKey?: string; // OpenRouter API key
-  model?: string; // Default: google/gemini-2.0-flash-001
-}
-const DEFAULT_MODEL = "google/gemini-2.0-flash-001";
-const SLUG_MAX_LENGTH = 40;
 /**
- * Extract date from transcript's first message timestamp.
+ * Extract date and time from transcript's first message timestamp.
+ * Returns format: yyyy-mm-dd-hhmm (24-hour, local time)
  */
-function extractDate(transcript: Transcript): string {
+function extractDateTime(transcript: Transcript): string {
   const firstMessage = transcript.messages[0];
-  if (firstMessage?.timestamp) {
-    const date = new Date(firstMessage.timestamp);
-    if (!isNaN(date.getTime())) {
-      return date.toISOString().slice(0, 10); // yyyy-mm-dd
-    }
-  }
-  // Fallback to current date
-  return new Date().toISOString().slice(0, 10);
-}
-/**
- * Extract context from transcript for LLM summarization.
- * Uses first few user messages, truncated.
- */
-function extractContext(transcript: Transcript): string {
-  const userMessages = transcript.messages.filter(
-    (m): m is UserMessage => m.type === "user",
-  );
+  const date = firstMessage?.timestamp
+    ? new Date(firstMessage.timestamp)
+    : new Date();
-  const chunks: string[] = [];
-  let totalLength = 0;
-  const maxLength = 500;
-  for (const msg of userMessages.slice(0, 3)) {
-    const content = msg.content.slice(0, 200);
-    if (totalLength + content.length > maxLength) break;
-    chunks.push(content);
-    totalLength += content.length;
+  if (isNaN(date.getTime())) {
+    return formatDateTime(new Date());
   }
-  return chunks.join("\n\n");
+  return formatDateTime(date);
 }
-/**
- * Sanitize a string into a valid URL slug.
- */
-function sanitizeSlug(input: string): string {
-  return input
-    .toLowerCase()
-    .replace(/[^a-z0-9\s-]/g, "") // remove special chars
-    .replace(/\s+/g, "-") // spaces to hyphens
-    .replace(/-+/g, "-") // collapse multiple hyphens
-    .replace(/^-|-$/g, "") // trim leading/trailing hyphens
-    .slice(0, SLUG_MAX_LENGTH);
+function formatDateTime(date: Date): string {
+  const year = date.getFullYear();
+  const month = String(date.getMonth() + 1).padStart(2, "0");
+  const day = String(date.getDate()).padStart(2, "0");
+  const hours = String(date.getHours()).padStart(2, "0");
+  const minutes = String(date.getMinutes()).padStart(2, "0");
+  return `${year}-${month}-${day}-${hours}${minutes}`;
 }
 /**
- * Generate slug via OpenRouter API.
+ * Extract session ID from the input filename.
+ * Returns the full session ID (filename without extension) for traceability.
  */
-async function generateSlugViaLLM(
-  context: string,
-  options: NamingOptions,
-): Promise<string | null> {
-  const { apiKey, model = DEFAULT_MODEL } = options;
-  if (!apiKey || !context.trim()) return null;
-  try {
-    const response = await fetch(
-      "https://openrouter.ai/api/v1/chat/completions",
-      {
-        method: "POST",
-        headers: {
-          Authorization: `Bearer ${apiKey}`,
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({
-          model,
-          messages: [
-            {
-              role: "user",
-              content: `Generate a 2-4 word URL slug (lowercase, hyphenated) summarizing this conversation topic. Reply with ONLY the slug, nothing else.\n\n${context}`,
-            },
-          ],
-          max_tokens: 20,
-        }),
-      },
-    );
-    if (!response.ok) {
-      console.error(
-        `OpenRouter API error: ${response.status} ${response.statusText}`,
-      );
-      return null;
-    }
-    const data = (await response.json()) as {
-      choices?: Array<{ message?: { content?: string } }>;
-    };
-    const content = data.choices?.[0]?.message?.content?.trim();
-    if (!content) return null;
-    const slug = sanitizeSlug(content);
-    return slug || null;
-  } catch (error) {
-    console.error(
-      `OpenRouter API call failed: ${error instanceof Error ? error.message : error}`,
-    );
-    return null;
-  }
-}
-/**
- * Generate fallback slug from input filename.
- */
-function generateFallbackSlug(inputPath: string): string {
-  return extractFileId(inputPath, 8) || "transcript";
-}
-/**
- * Extract a short identifier from the input filename.
- * Used as a suffix for traceability back to source.
- */
-function extractFileId(inputPath: string, length = 6): string {
+export function extractSessionId(inputPath: string): string {
   if (inputPath === "<stdin>") {
-    return "";
+    return "stdin";
   }
   const name = basename(inputPath);
-  const base = name.replace(/\.jsonl?$/, "");
-  // Take first N chars, sanitize, and clean up any trailing hyphens
-  return sanitizeSlug(base.slice(0, length)).replace(/-+$/, "");
+  // Remove .jsonl or .json extension
+  return name.replace(/\.jsonl?$/, "");
 }
 /**
  * Generate output base name for a transcript.
- * Returns string like "2024-01-15-implement-auth-flow-abc123"
+ * Returns format: "2024-01-15-1423-{sessionId}"
  */
-export async function generateOutputName(
+export function generateOutputName(
   transcript: Transcript,
   inputPath: string,
-  options: NamingOptions = {},
-): Promise<string> {
-  const date = extractDate(transcript);
-  const fileId = extractFileId(inputPath);
-  // Try LLM-generated slug if API key available
-  if (options.apiKey) {
-    const context = extractContext(transcript);
-    const slug = await generateSlugViaLLM(context, options);
-    if (slug) {
-      return fileId ? `${date}-${slug}-${fileId}` : `${date}-${slug}`;
-    }
-  }
-  // Fallback to input filename prefix (no need for fileId suffix, it's already the slug)
-  const slug = generateFallbackSlug(inputPath);
-  return `${date}-${slug}`;
+): string {
+  const dateTime = extractDateTime(transcript);
+  const sessionId = extractSessionId(inputPath);
+  return `${dateTime}-${sessionId}`;
 }