npm - @arcreflex/agent-transcripts - Versions diffs - 0.1.5 → 0.1.8 - Mend

@arcreflex/agent-transcripts 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.github/workflows/publish.yml +5 -0
package/README.md +33 -17
package/package.json +1 -1
package/src/adapters/claude-code.ts +97 -1
package/src/cli.ts +11 -65
package/src/convert.ts +88 -40
package/src/parse.ts +7 -101
package/src/render.ts +1 -84
package/src/sync.ts +97 -97
package/src/types.ts +11 -2
package/src/utils/naming.ts +30 -143
package/src/utils/provenance.ts +187 -73
package/test/snapshots.test.ts +39 -33

package/src/render.ts CHANGED Viewed

@@ -1,47 +1,9 @@
 /**
- * Render command: intermediate JSON → markdown
+ * Render: intermediate transcript format → markdown
  */
-import { basename, dirname, join } from "path";
-import { mkdir } from "fs/promises";
 import type { Transcript, Message, ToolCall } from "./types.ts";
-export interface RenderOptions {
-  input: string; // file path, or "-" for stdin
-  output?: string; // output path
-  head?: string; // render branch ending at this message ID
-}
-/**
- * Read transcript from file or stdin.
- */
-async function readTranscript(
-  input: string,
-): Promise<{ transcript: Transcript; path: string }> {
-  let content: string;
-  let path: string;
-  if (input !== "-") {
-    content = await Bun.file(input).text();
-    path = input;
-  } else {
-    const chunks: string[] = [];
-    const reader = Bun.stdin.stream().getReader();
-    while (true) {
-      const { done, value } = await reader.read();
-      if (done) break;
-      chunks.push(new TextDecoder().decode(value));
-    }
-    content = chunks.join("");
-    path = "<stdin>";
-  }
-  const transcript = JSON.parse(content) as Transcript;
-  return { transcript, path };
-}
 /**
  * Format a single tool call.
  */
@@ -333,48 +295,3 @@ export function renderTranscript(
   return lines.join("\n");
 }
-/**
- * Determine output path for markdown.
- */
-function getOutputPath(inputPath: string, outputOption?: string): string {
-  if (outputOption) {
-    // If it has an extension, use as-is
-    if (outputOption.match(/\.\w+$/)) {
-      return outputOption;
-    }
-    // Treat as directory
-    const base =
-      inputPath === "<stdin>"
-        ? "transcript"
-        : basename(inputPath).replace(/\.json$/, "");
-    return join(outputOption, `${base}.md`);
-  }
-  // Default: same name in cwd
-  const base =
-    inputPath === "<stdin>"
-      ? "transcript"
-      : basename(inputPath).replace(/\.json$/, "");
-  return join(process.cwd(), `${base}.md`);
-}
-/**
- * Render intermediate JSON to markdown.
- */
-export async function render(options: RenderOptions): Promise<void> {
-  const { transcript, path: inputPath } = await readTranscript(options.input);
-  const markdown = renderTranscript(transcript, options.head);
-  if (options.output) {
-    const outputPath = getOutputPath(inputPath, options.output);
-    // Ensure directory exists
-    await mkdir(dirname(outputPath), { recursive: true });
-    await Bun.write(outputPath, markdown);
-    console.error(`Wrote: ${outputPath}`);
-  } else {
-    // Default: print to stdout
-    console.log(markdown);
-  }
-}

package/src/sync.ts CHANGED Viewed

@@ -3,21 +3,24 @@
  *
  * Discovers session files in source directory, parses them,
  * and writes rendered markdown to output directory.
- * Uses LLM-generated descriptive names when API key is available.
- * Tracks provenance via YAML front matter to correlate updates.
+ * Tracks provenance via transcripts.json index.
  */
-import { Glob } from "bun";
 import { dirname, join } from "path";
-import { mkdir, stat } from "fs/promises";
+import { mkdir } from "fs/promises";
 import { getAdapters } from "./adapters/index.ts";
-import type { Adapter } from "./types.ts";
+import type { Adapter, DiscoveredSession } from "./types.ts";
 import { renderTranscript } from "./render.ts";
-import { generateOutputName, type NamingOptions } from "./utils/naming.ts";
+import { generateOutputName, extractSessionId } from "./utils/naming.ts";
 import {
-  scanOutputDirectory,
-  deleteExistingOutputs,
-  hasStaleOutputs,
+  loadIndex,
+  saveIndex,
+  isStale,
+  setEntry,
+  removeEntriesForSource,
+  restoreEntries,
+  deleteOutputFiles,
+  normalizeSourcePath,
 } from "./utils/provenance.ts";
 export interface SyncOptions {
@@ -25,7 +28,6 @@ export interface SyncOptions {
   output: string;
   force?: boolean;
   quiet?: boolean;
-  naming?: NamingOptions;
 }
 export interface SyncResult {
@@ -34,70 +36,36 @@ export interface SyncResult {
   errors: number;
 }
-interface SessionFile {
-  path: string;
-  relativePath: string;
-  mtime: number;
+interface SessionFile extends DiscoveredSession {
   adapter: Adapter;
 }
-/**
- * Discover session files for a specific adapter.
- */
-async function discoverForAdapter(
-  source: string,
-  adapter: Adapter,
-): Promise<SessionFile[]> {
-  const sessions: SessionFile[] = [];
-  for (const pattern of adapter.filePatterns) {
-    const glob = new Glob(`**/${pattern}`);
-    for await (const file of glob.scan({ cwd: source, absolute: false })) {
-      const fullPath = join(source, file);
-      try {
-        const fileStat = await stat(fullPath);
-        sessions.push({
-          path: fullPath,
-          relativePath: file,
-          mtime: fileStat.mtime.getTime(),
-          adapter,
-        });
-      } catch {
-        // Skip files we can't stat
-      }
-    }
-  }
-  return sessions;
-}
 /**
  * Sync session files from source to output directory.
  */
 export async function sync(options: SyncOptions): Promise<SyncResult> {
-  const { source, output, force = false, quiet = false, naming } = options;
+  const { source, output, force = false, quiet = false } = options;
   const result: SyncResult = { synced: 0, skipped: 0, errors: 0 };
-  // Scan output directory for existing transcripts (source → output paths)
-  const existingOutputs = await scanOutputDirectory(output);
-  if (!quiet && existingOutputs.size > 0) {
-    const totalFiles = [...existingOutputs.values()].reduce(
-      (sum, paths) => sum + paths.length,
-      0,
-    );
+  // Ensure output directory exists
+  await mkdir(output, { recursive: true });
+  // Load index
+  const index = await loadIndex(output);
+  if (!quiet && Object.keys(index.entries).length > 0) {
     console.error(
-      `Found ${totalFiles} existing transcript(s) from ${existingOutputs.size} source(s)`,
+      `Found ${Object.keys(index.entries).length} existing transcript(s) in index`,
     );
   }
-  // Discover sessions for each adapter
+  // Discover sessions from all adapters
   const sessions: SessionFile[] = [];
   for (const adapter of getAdapters()) {
-    const adapterSessions = await discoverForAdapter(source, adapter);
-    sessions.push(...adapterSessions);
+    const discovered = await adapter.discover(source);
+    for (const session of discovered) {
+      sessions.push({ ...session, adapter });
+    }
   }
   if (!quiet) {
@@ -106,22 +74,19 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
   // Process each session
   for (const session of sessions) {
+    // Normalize source path for consistent index keys
+    const sourcePath = normalizeSourcePath(session.path);
     try {
       // Read and parse using the adapter that discovered this file
       const content = await Bun.file(session.path).text();
       const transcripts = session.adapter.parse(content, session.path);
-      // Get all existing outputs for this source
-      const existingPaths = existingOutputs.get(session.path) || [];
-      // Check if sync needed (force, count mismatch, or any stale)
+      // Check if sync needed (force or stale)
       const needsUpdate =
         force ||
-        (await hasStaleOutputs(
-          existingPaths,
-          transcripts.length,
-          session.mtime,
-        ));
+        isStale(index, sourcePath, session.mtime, transcripts.length, output);
       if (!needsUpdate) {
         if (!quiet) {
           console.error(`Skip (up to date): ${session.relativePath}`);
@@ -130,39 +95,71 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
         continue;
       }
-      // Delete existing outputs before regenerating
-      await deleteExistingOutputs(existingPaths, quiet);
-      // Generate fresh outputs for all transcripts
-      for (let i = 0; i < transcripts.length; i++) {
-        const transcript = transcripts[i];
-        const suffix = transcripts.length > 1 ? `_${i + 1}` : undefined;
-        // Generate descriptive name, preserving directory structure
-        const baseName = await generateOutputName(
-          transcript,
-          session.path,
-          naming || {},
-        );
-        const finalName = suffix ? `${baseName}${suffix}` : baseName;
-        const relativeDir = dirname(session.relativePath);
-        const outputPath = join(output, relativeDir, `${finalName}.md`);
-        // Ensure output directory exists
-        await mkdir(dirname(outputPath), { recursive: true });
-        // Render with provenance front matter and write
-        const markdown = renderTranscript(transcript, {
-          sourcePath: session.path,
-        });
-        await Bun.write(outputPath, markdown);
+      // Remove entries from index (save for potential restoration on error)
+      const removedEntries = removeEntriesForSource(index, sourcePath);
-        if (!quiet) {
-          console.error(`Synced: ${outputPath}`);
+      // Track new outputs for this session
+      const newOutputs: string[] = [];
+      const sessionId = extractSessionId(session.path);
+      try {
+        // Generate fresh outputs for all transcripts
+        for (let i = 0; i < transcripts.length; i++) {
+          const transcript = transcripts[i];
+          const segmentIndex = transcripts.length > 1 ? i + 1 : undefined;
+          // Generate deterministic name
+          const baseName = generateOutputName(transcript, session.path);
+          const suffix = segmentIndex ? `_${segmentIndex}` : "";
+          const relativeDir = dirname(session.relativePath);
+          const relativePath =
+            relativeDir === "."
+              ? `${baseName}${suffix}.md`
+              : join(relativeDir, `${baseName}${suffix}.md`);
+          const outputPath = join(output, relativePath);
+          // Ensure output directory exists
+          await mkdir(dirname(outputPath), { recursive: true });
+          // Render with provenance front matter and write
+          const markdown = renderTranscript(transcript, {
+            sourcePath,
+          });
+          await Bun.write(outputPath, markdown);
+          newOutputs.push(relativePath);
+          // Update index
+          setEntry(index, relativePath, {
+            source: sourcePath,
+            sourceMtime: session.mtime,
+            sessionId,
+            segmentIndex,
+            syncedAt: new Date().toISOString(),
+          });
+          if (!quiet) {
+            console.error(`Synced: ${outputPath}`);
+          }
+        }
+        // Success: delete old output files (after new ones are written)
+        const oldFilenames = removedEntries.map((e) => e.filename);
+        // Only delete files that aren't being reused
+        const toDelete = oldFilenames.filter((f) => !newOutputs.includes(f));
+        if (toDelete.length > 0) {
+          await deleteOutputFiles(output, toDelete, quiet);
         }
-      }
-      result.synced++;
+        result.synced++;
+      } catch (error) {
+        // Clean up any newly written files before restoring old entries
+        if (newOutputs.length > 0) {
+          await deleteOutputFiles(output, newOutputs, quiet);
+        }
+        // Restore old entries on error to preserve provenance
+        restoreEntries(index, removedEntries);
+        throw error;
+      }
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.error(`Error: ${session.relativePath}: ${message}`);
@@ -170,6 +167,9 @@ export async function sync(options: SyncOptions): Promise<SyncResult> {
     }
   }
+  // Save index
+  await saveIndex(output, index);
   // Summary
   if (!quiet) {
     console.error(

package/src/types.ts CHANGED Viewed

@@ -65,13 +65,22 @@ export interface ErrorMessage extends BaseMessage {
   content: string;
 }
+/**
+ * A session file discovered by an adapter.
+ */
+export interface DiscoveredSession {
+  path: string;
+  relativePath: string;
+  mtime: number;
+}
 /**
  * Adapter interface - each source format implements this.
  */
 export interface Adapter {
   name: string;
-  /** Glob patterns for discovering session files (e.g., ["*.jsonl"]) */
-  filePatterns: string[];
+  /** Discover session files in the given directory */
+  discover(source: string): Promise<DiscoveredSession[]>;
   /** Parse source content into one or more transcripts (split by conversation) */
   parse(content: string, sourcePath: string): Transcript[];
 }

package/src/utils/naming.ts CHANGED Viewed

@@ -1,173 +1,60 @@
 /**
  * Output file naming utilities.
  *
- * Generates descriptive filenames for transcripts:
- * - With OpenRouter API key: yyyy-mm-dd-{llm-generated-slug}.{ext}
- * - Without: yyyy-mm-dd-{input-filename-prefix}.{ext}
+ * Generates deterministic filenames: {datetime}-{sessionId}.md
  */
-import type { Transcript, UserMessage } from "../types.ts";
+import type { Transcript } from "../types.ts";
 import { basename } from "path";
-export interface NamingOptions {
-  apiKey?: string; // OpenRouter API key
-  model?: string; // Default: google/gemini-2.0-flash-001
-}
-const DEFAULT_MODEL = "google/gemini-2.0-flash-001";
-const SLUG_MAX_LENGTH = 40;
 /**
- * Extract date from transcript's first message timestamp.
+ * Extract date and time from transcript's first message timestamp.
+ * Returns format: yyyy-mm-dd-hhmm (24-hour, local time)
  */
-function extractDate(transcript: Transcript): string {
+function extractDateTime(transcript: Transcript): string {
   const firstMessage = transcript.messages[0];
-  if (firstMessage?.timestamp) {
-    const date = new Date(firstMessage.timestamp);
-    if (!isNaN(date.getTime())) {
-      return date.toISOString().slice(0, 10); // yyyy-mm-dd
-    }
-  }
-  // Fallback to current date
-  return new Date().toISOString().slice(0, 10);
-}
-/**
- * Extract context from transcript for LLM summarization.
- * Uses first few user messages, truncated.
- */
-function extractContext(transcript: Transcript): string {
-  const userMessages = transcript.messages.filter(
-    (m): m is UserMessage => m.type === "user",
-  );
+  const date = firstMessage?.timestamp
+    ? new Date(firstMessage.timestamp)
+    : new Date();
-  const chunks: string[] = [];
-  let totalLength = 0;
-  const maxLength = 500;
-  for (const msg of userMessages.slice(0, 3)) {
-    const content = msg.content.slice(0, 200);
-    if (totalLength + content.length > maxLength) break;
-    chunks.push(content);
-    totalLength += content.length;
+  if (isNaN(date.getTime())) {
+    return formatDateTime(new Date());
   }
-  return chunks.join("\n\n");
+  return formatDateTime(date);
 }
-/**
- * Sanitize a string into a valid URL slug.
- */
-function sanitizeSlug(input: string): string {
-  return input
-    .toLowerCase()
-    .replace(/[^a-z0-9\s-]/g, "") // remove special chars
-    .replace(/\s+/g, "-") // spaces to hyphens
-    .replace(/-+/g, "-") // collapse multiple hyphens
-    .replace(/^-|-$/g, "") // trim leading/trailing hyphens
-    .slice(0, SLUG_MAX_LENGTH);
+function formatDateTime(date: Date): string {
+  const year = date.getFullYear();
+  const month = String(date.getMonth() + 1).padStart(2, "0");
+  const day = String(date.getDate()).padStart(2, "0");
+  const hours = String(date.getHours()).padStart(2, "0");
+  const minutes = String(date.getMinutes()).padStart(2, "0");
+  return `${year}-${month}-${day}-${hours}${minutes}`;
 }
 /**
- * Generate slug via OpenRouter API.
+ * Extract session ID from the input filename.
+ * Returns the full session ID (filename without extension) for traceability.
  */
-async function generateSlugViaLLM(
-  context: string,
-  options: NamingOptions,
-): Promise<string | null> {
-  const { apiKey, model = DEFAULT_MODEL } = options;
-  if (!apiKey || !context.trim()) return null;
-  try {
-    const response = await fetch(
-      "https://openrouter.ai/api/v1/chat/completions",
-      {
-        method: "POST",
-        headers: {
-          Authorization: `Bearer ${apiKey}`,
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({
-          model,
-          messages: [
-            {
-              role: "user",
-              content: `Generate a 2-4 word URL slug (lowercase, hyphenated) summarizing this conversation topic. Reply with ONLY the slug, nothing else.\n\n${context}`,
-            },
-          ],
-          max_tokens: 20,
-        }),
-      },
-    );
-    if (!response.ok) {
-      console.error(
-        `OpenRouter API error: ${response.status} ${response.statusText}`,
-      );
-      return null;
-    }
-    const data = (await response.json()) as {
-      choices?: Array<{ message?: { content?: string } }>;
-    };
-    const content = data.choices?.[0]?.message?.content?.trim();
-    if (!content) return null;
-    const slug = sanitizeSlug(content);
-    return slug || null;
-  } catch (error) {
-    console.error(
-      `OpenRouter API call failed: ${error instanceof Error ? error.message : error}`,
-    );
-    return null;
-  }
-}
-/**
- * Generate fallback slug from input filename.
- */
-function generateFallbackSlug(inputPath: string): string {
-  return extractFileId(inputPath, 8) || "transcript";
-}
-/**
- * Extract a short identifier from the input filename.
- * Used as a suffix for traceability back to source.
- */
-function extractFileId(inputPath: string, length = 6): string {
+export function extractSessionId(inputPath: string): string {
   if (inputPath === "<stdin>") {
-    return "";
+    return "stdin";
   }
   const name = basename(inputPath);
-  const base = name.replace(/\.jsonl?$/, "");
-  // Take first N chars, sanitize, and clean up any trailing hyphens
-  return sanitizeSlug(base.slice(0, length)).replace(/-+$/, "");
+  // Remove .jsonl or .json extension
+  return name.replace(/\.jsonl?$/, "");
 }
 /**
  * Generate output base name for a transcript.
- * Returns string like "2024-01-15-implement-auth-flow-abc123"
+ * Returns format: "2024-01-15-1423-{sessionId}"
  */
-export async function generateOutputName(
+export function generateOutputName(
   transcript: Transcript,
   inputPath: string,
-  options: NamingOptions = {},
-): Promise<string> {
-  const date = extractDate(transcript);
-  const fileId = extractFileId(inputPath);
-  // Try LLM-generated slug if API key available
-  if (options.apiKey) {
-    const context = extractContext(transcript);
-    const slug = await generateSlugViaLLM(context, options);
-    if (slug) {
-      return fileId ? `${date}-${slug}-${fileId}` : `${date}-${slug}`;
-    }
-  }
-  // Fallback to input filename prefix (no need for fileId suffix, it's already the slug)
-  const slug = generateFallbackSlug(inputPath);
-  return `${date}-${slug}`;
+): string {
+  const dateTime = extractDateTime(transcript);
+  const sessionId = extractSessionId(inputPath);
+  return `${dateTime}-${sessionId}`;
 }