npm - @arcreflex/agent-transcripts - Versions diffs - 0.1.5 → 0.1.9 - Mend

@arcreflex/agent-transcripts 0.1.5 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/.github/workflows/publish.yml +5 -0
package/CLAUDE.md +4 -0
package/README.md +70 -17
package/bun.lock +89 -0
package/package.json +3 -2
package/src/adapters/claude-code.ts +300 -33
package/src/cache.ts +129 -0
package/src/cli.ts +95 -68
package/src/convert.ts +82 -42
package/src/parse.ts +7 -101
package/src/render-html.ts +1096 -0
package/src/render-index.ts +611 -0
package/src/render.ts +7 -194
package/src/serve.ts +308 -0
package/src/sync.ts +211 -98
package/src/title.ts +172 -0
package/src/types.ts +18 -2
package/src/utils/html.ts +12 -0
package/src/utils/naming.ts +30 -143
package/src/utils/openrouter.ts +116 -0
package/src/utils/provenance.ts +167 -69
package/src/utils/tree.ts +116 -0
package/test/fixtures/claude/non-message-parents.input.jsonl +9 -0
package/test/fixtures/claude/non-message-parents.output.md +30 -0
package/test/snapshots.test.ts +39 -33

package/src/utils/naming.ts CHANGED Viewed

@@ -1,173 +1,60 @@
 /**
  * Output file naming utilities.
  *
- * Generates descriptive filenames for transcripts:
- * - With OpenRouter API key: yyyy-mm-dd-{llm-generated-slug}.{ext}
- * - Without: yyyy-mm-dd-{input-filename-prefix}.{ext}
+ * Generates deterministic filenames: {datetime}-{sessionId}.md
  */
-import type { Transcript, UserMessage } from "../types.ts";
+import type { Transcript } from "../types.ts";
 import { basename } from "path";
-export interface NamingOptions {
-  apiKey?: string; // OpenRouter API key
-  model?: string; // Default: google/gemini-2.0-flash-001
-}
-const DEFAULT_MODEL = "google/gemini-2.0-flash-001";
-const SLUG_MAX_LENGTH = 40;
 /**
- * Extract date from transcript's first message timestamp.
+ * Extract date and time from transcript's first message timestamp.
+ * Returns format: yyyy-mm-dd-hhmm (24-hour, local time)
  */
-function extractDate(transcript: Transcript): string {
+function extractDateTime(transcript: Transcript): string {
   const firstMessage = transcript.messages[0];
-  if (firstMessage?.timestamp) {
-    const date = new Date(firstMessage.timestamp);
-    if (!isNaN(date.getTime())) {
-      return date.toISOString().slice(0, 10); // yyyy-mm-dd
-    }
-  }
-  // Fallback to current date
-  return new Date().toISOString().slice(0, 10);
-}
-/**
- * Extract context from transcript for LLM summarization.
- * Uses first few user messages, truncated.
- */
-function extractContext(transcript: Transcript): string {
-  const userMessages = transcript.messages.filter(
-    (m): m is UserMessage => m.type === "user",
-  );
+  const date = firstMessage?.timestamp
+    ? new Date(firstMessage.timestamp)
+    : new Date();
-  const chunks: string[] = [];
-  let totalLength = 0;
-  const maxLength = 500;
-  for (const msg of userMessages.slice(0, 3)) {
-    const content = msg.content.slice(0, 200);
-    if (totalLength + content.length > maxLength) break;
-    chunks.push(content);
-    totalLength += content.length;
+  if (isNaN(date.getTime())) {
+    return formatDateTime(new Date());
   }
-  return chunks.join("\n\n");
+  return formatDateTime(date);
 }
-/**
- * Sanitize a string into a valid URL slug.
- */
-function sanitizeSlug(input: string): string {
-  return input
-    .toLowerCase()
-    .replace(/[^a-z0-9\s-]/g, "") // remove special chars
-    .replace(/\s+/g, "-") // spaces to hyphens
-    .replace(/-+/g, "-") // collapse multiple hyphens
-    .replace(/^-|-$/g, "") // trim leading/trailing hyphens
-    .slice(0, SLUG_MAX_LENGTH);
+function formatDateTime(date: Date): string {
+  const year = date.getFullYear();
+  const month = String(date.getMonth() + 1).padStart(2, "0");
+  const day = String(date.getDate()).padStart(2, "0");
+  const hours = String(date.getHours()).padStart(2, "0");
+  const minutes = String(date.getMinutes()).padStart(2, "0");
+  return `${year}-${month}-${day}-${hours}${minutes}`;
 }
 /**
- * Generate slug via OpenRouter API.
+ * Extract session ID from the input filename.
+ * Returns the full session ID (filename without extension) for traceability.
  */
-async function generateSlugViaLLM(
-  context: string,
-  options: NamingOptions,
-): Promise<string | null> {
-  const { apiKey, model = DEFAULT_MODEL } = options;
-  if (!apiKey || !context.trim()) return null;
-  try {
-    const response = await fetch(
-      "https://openrouter.ai/api/v1/chat/completions",
-      {
-        method: "POST",
-        headers: {
-          Authorization: `Bearer ${apiKey}`,
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({
-          model,
-          messages: [
-            {
-              role: "user",
-              content: `Generate a 2-4 word URL slug (lowercase, hyphenated) summarizing this conversation topic. Reply with ONLY the slug, nothing else.\n\n${context}`,
-            },
-          ],
-          max_tokens: 20,
-        }),
-      },
-    );
-    if (!response.ok) {
-      console.error(
-        `OpenRouter API error: ${response.status} ${response.statusText}`,
-      );
-      return null;
-    }
-    const data = (await response.json()) as {
-      choices?: Array<{ message?: { content?: string } }>;
-    };
-    const content = data.choices?.[0]?.message?.content?.trim();
-    if (!content) return null;
-    const slug = sanitizeSlug(content);
-    return slug || null;
-  } catch (error) {
-    console.error(
-      `OpenRouter API call failed: ${error instanceof Error ? error.message : error}`,
-    );
-    return null;
-  }
-}
-/**
- * Generate fallback slug from input filename.
- */
-function generateFallbackSlug(inputPath: string): string {
-  return extractFileId(inputPath, 8) || "transcript";
-}
-/**
- * Extract a short identifier from the input filename.
- * Used as a suffix for traceability back to source.
- */
-function extractFileId(inputPath: string, length = 6): string {
+export function extractSessionId(inputPath: string): string {
   if (inputPath === "<stdin>") {
-    return "";
+    return "stdin";
   }
   const name = basename(inputPath);
-  const base = name.replace(/\.jsonl?$/, "");
-  // Take first N chars, sanitize, and clean up any trailing hyphens
-  return sanitizeSlug(base.slice(0, length)).replace(/-+$/, "");
+  // Remove .jsonl or .json extension
+  return name.replace(/\.jsonl?$/, "");
 }
 /**
  * Generate output base name for a transcript.
- * Returns string like "2024-01-15-implement-auth-flow-abc123"
+ * Returns format: "2024-01-15-1423-{sessionId}"
  */
-export async function generateOutputName(
+export function generateOutputName(
   transcript: Transcript,
   inputPath: string,
-  options: NamingOptions = {},
-): Promise<string> {
-  const date = extractDate(transcript);
-  const fileId = extractFileId(inputPath);
-  // Try LLM-generated slug if API key available
-  if (options.apiKey) {
-    const context = extractContext(transcript);
-    const slug = await generateSlugViaLLM(context, options);
-    if (slug) {
-      return fileId ? `${date}-${slug}-${fileId}` : `${date}-${slug}`;
-    }
-  }
-  // Fallback to input filename prefix (no need for fileId suffix, it's already the slug)
-  const slug = generateFallbackSlug(inputPath);
-  return `${date}-${slug}`;
+): string {
+  const dateTime = extractDateTime(transcript);
+  const sessionId = extractSessionId(inputPath);
+  return `${dateTime}-${sessionId}`;
 }

package/src/utils/openrouter.ts ADDED Viewed

@@ -0,0 +1,116 @@
+/**
+ * OpenRouter API client for LLM-based title generation.
+ *
+ * Uses Gemini 2.5 Flash for fast, cheap title generation.
+ * Gracefully handles missing API key or API failures.
+ */
+const OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions";
+const MODEL = "google/gemini-2.5-flash";
+// Approximate token limit for context (conservative estimate)
+// Gemini Flash has 1M context, but we don't need anywhere near that
+const MAX_CHARS = 32000; // ~8k tokens
+/**
+ * Truncate content with middle-cut strategy.
+ * Keeps beginning and end, removes middle if over limit.
+ */
+function truncateMiddle(content: string, maxChars: number): string {
+  if (content.length <= maxChars) return content;
+  const halfLimit = Math.floor(maxChars / 2);
+  const start = content.slice(0, halfLimit);
+  const end = content.slice(-halfLimit);
+  return `${start}\n\n[... middle truncated ...]\n\n${end}`;
+}
+interface OpenRouterResponse {
+  choices?: Array<{
+    message?: {
+      content?: string;
+    };
+  }>;
+  error?: {
+    message?: string;
+  };
+}
+/**
+ * Generate a title for a transcript using OpenRouter.
+ *
+ * @param markdownContent - The full markdown transcript
+ * @returns Generated title, or undefined if generation fails/skipped
+ */
+export async function generateTitle(
+  markdownContent: string,
+): Promise<string | undefined> {
+  const apiKey = process.env.OPENROUTER_API_KEY;
+  if (!apiKey) {
+    // Silently skip - no API key means user doesn't want title generation
+    return undefined;
+  }
+  const truncated = truncateMiddle(markdownContent, MAX_CHARS);
+  const prompt = `Generate a concise title (5-10 words) for this AI coding session transcript. The title should capture the main task or topic discussed.
+Reply with just the title, no quotes, no punctuation at the end, no explanation.
+Transcript:
+${truncated}`;
+  try {
+    const response = await fetch(OPENROUTER_API_URL, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Type": "application/json",
+        "HTTP-Referer": "https://github.com/arcreflex/agent-transcripts",
+        "X-Title": "agent-transcripts",
+      },
+      body: JSON.stringify({
+        model: MODEL,
+        messages: [{ role: "user", content: prompt }],
+        max_tokens: 50,
+        temperature: 0.3,
+      }),
+    });
+    if (!response.ok) {
+      const text = await response.text();
+      console.error(
+        `Warning: OpenRouter API error (${response.status}): ${text.slice(0, 200)}`,
+      );
+      return undefined;
+    }
+    const data = (await response.json()) as OpenRouterResponse;
+    if (data.error) {
+      console.error(
+        `Warning: OpenRouter error: ${data.error.message || "Unknown error"}`,
+      );
+      return undefined;
+    }
+    const title = data.choices?.[0]?.message?.content?.trim();
+    if (!title) {
+      console.error("Warning: OpenRouter returned empty title");
+      return undefined;
+    }
+    // Clean up: remove quotes if present, trim trailing punctuation
+    return title
+      .replace(/^["']|["']$/g, "")
+      .replace(/[.!?]+$/, "")
+      .trim();
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    console.error(`Warning: OpenRouter request failed: ${message}`);
+    return undefined;
+  }
+}

package/src/utils/provenance.ts CHANGED Viewed

@@ -2,113 +2,211 @@
  * Provenance tracking utilities.
  *
  * Tracks the relationship between source files and output transcripts
- * via YAML front matter, enabling update-in-place behavior.
+ * via transcripts.json index (primary) and YAML front matter (for self-documenting files).
  */
-import { Glob } from "bun";
-import { join } from "path";
-import { stat, unlink } from "fs/promises";
+import { join, resolve } from "path";
+import { rename, unlink } from "fs/promises";
+const INDEX_FILENAME = "transcripts.json";
+// ============================================================================
+// Index Types
+// ============================================================================
+export interface TranscriptEntry {
+  source: string; // absolute path to source
+  sessionId: string; // full session ID from source filename
+  segmentIndex?: number; // for multi-transcript sources (1-indexed)
+  syncedAt: string; // ISO timestamp
+  firstUserMessage: string; // first user message content (for display)
+  title?: string; // copied from cache for index.html convenience
+  messageCount: number;
+  startTime: string; // ISO timestamp
+  endTime: string; // ISO timestamp
+  cwd?: string;
+}
+export interface TranscriptsIndex {
+  version: 1;
+  entries: Record<string, TranscriptEntry>; // outputFilename → entry
+}
+// ============================================================================
+// Path Utilities
+// ============================================================================
+/**
+ * Normalize a source path to absolute for consistent index keys.
+ */
+export function normalizeSourcePath(sourcePath: string): string {
+  if (sourcePath === "<stdin>") return sourcePath;
+  return resolve(sourcePath);
+}
+// ============================================================================
+// Index I/O
+// ============================================================================
 /**
- * Extract source path from YAML front matter.
- * Returns null if no front matter or no source field.
+ * Load transcripts.json index from output directory.
+ * Returns empty index if file doesn't exist. Warns on corrupt file.
  */
-export function extractSourceFromFrontMatter(content: string): string | null {
-  // Match YAML front matter at start of file
-  const match = content.match(/^---\n([\s\S]*?)\n---/);
-  if (!match) return null;
-  // Extract source field (simple line-based parsing)
-  const frontMatter = match[1];
-  const sourceLine = frontMatter
-    .split("\n")
-    .find((line) => line.startsWith("source:"));
-  if (!sourceLine) return null;
-  return sourceLine.replace(/^source:\s*/, "").trim();
+export async function loadIndex(outputDir: string): Promise<TranscriptsIndex> {
+  const indexPath = join(outputDir, INDEX_FILENAME);
+  try {
+    const content = await Bun.file(indexPath).text();
+    const data = JSON.parse(content) as TranscriptsIndex;
+    // Validate version
+    if (data.version !== 1) {
+      console.error(
+        `Warning: Unknown index version ${data.version}, creating fresh index`,
+      );
+      return { version: 1, entries: {} };
+    }
+    return data;
+  } catch (err) {
+    // Distinguish between missing file (expected) and corrupt file (unexpected)
+    const isEnoent =
+      err instanceof Error && (err as NodeJS.ErrnoException).code === "ENOENT";
+    if (!isEnoent) {
+      console.error(
+        `Warning: Could not parse index file, starting fresh: ${err instanceof Error ? err.message : String(err)}`,
+      );
+    }
+    return { version: 1, entries: {} };
+  }
 }
 /**
- * Scan output directory for existing transcripts.
- * Returns map from absolute source path → all output file paths for that source.
+ * Save transcripts.json index to output directory.
+ * Uses atomic write (write to .tmp, then rename) to prevent corruption.
  */
-export async function scanOutputDirectory(
+export async function saveIndex(
   outputDir: string,
-): Promise<Map<string, string[]>> {
-  const sourceToOutputs = new Map<string, string[]>();
-  const glob = new Glob("**/*.md");
+  index: TranscriptsIndex,
+): Promise<void> {
+  const indexPath = join(outputDir, INDEX_FILENAME);
+  const tmpPath = `${indexPath}.tmp`;
-  for await (const file of glob.scan({ cwd: outputDir, absolute: false })) {
-    const fullPath = join(outputDir, file);
+  const content = JSON.stringify(index, null, 2) + "\n";
+  await Bun.write(tmpPath, content);
+  try {
+    await rename(tmpPath, indexPath);
+  } catch (err) {
+    // Clean up temp file on failure
     try {
-      const content = await Bun.file(fullPath).text();
-      const sourcePath = extractSourceFromFrontMatter(content);
-      if (sourcePath) {
-        const existing = sourceToOutputs.get(sourcePath) || [];
-        existing.push(fullPath);
-        sourceToOutputs.set(sourcePath, existing);
-      }
+      await unlink(tmpPath);
     } catch {
-      // Skip files we can't read
+      // Ignore cleanup errors
+    }
+    throw err;
+  }
+}
+// ============================================================================
+// Index Operations
+// ============================================================================
+/**
+ * Get all output filenames for a given source path.
+ */
+export function getOutputsForSource(
+  index: TranscriptsIndex,
+  sourcePath: string,
+): string[] {
+  const outputs: string[] = [];
+  for (const [filename, entry] of Object.entries(index.entries)) {
+    if (entry.source === sourcePath) {
+      outputs.push(filename);
     }
   }
+  return outputs;
+}
-  return sourceToOutputs;
+/**
+ * Set or update an entry in the index.
+ * outputPath should be relative to the output directory.
+ */
+export function setEntry(
+  index: TranscriptsIndex,
+  outputPath: string,
+  entry: TranscriptEntry,
+): void {
+  index.entries[outputPath] = entry;
 }
 /**
- * Find existing outputs for a specific source path.
+ * Remove all entries for a given source path.
+ * Returns the removed entries (for potential restoration on error).
  */
-export async function findExistingOutputs(
-  outputDir: string,
+export function removeEntriesForSource(
+  index: TranscriptsIndex,
   sourcePath: string,
-): Promise<string[]> {
-  const allOutputs = await scanOutputDirectory(outputDir);
-  return allOutputs.get(sourcePath) || [];
+): Array<{ filename: string; entry: TranscriptEntry }> {
+  const removed: Array<{ filename: string; entry: TranscriptEntry }> = [];
+  for (const [filename, entry] of Object.entries(index.entries)) {
+    if (entry.source === sourcePath) {
+      removed.push({ filename, entry });
+      delete index.entries[filename];
+    }
+  }
+  return removed;
+}
+/**
+ * Restore previously removed entries to the index.
+ */
+export function restoreEntries(
+  index: TranscriptsIndex,
+  entries: Array<{ filename: string; entry: TranscriptEntry }>,
+): void {
+  for (const { filename, entry } of entries) {
+    index.entries[filename] = entry;
+  }
 }
+// ============================================================================
+// File Operations
+// ============================================================================
 /**
- * Delete existing output files, with warnings on failure.
+ * Delete output files, with warnings on failure.
  */
-export async function deleteExistingOutputs(
-  paths: string[],
+export async function deleteOutputFiles(
+  outputDir: string,
+  filenames: string[],
   quiet = false,
 ): Promise<void> {
-  for (const oldPath of paths) {
+  for (const filename of filenames) {
+    const fullPath = join(outputDir, filename);
     try {
-      await unlink(oldPath);
+      await unlink(fullPath);
       if (!quiet) {
-        console.error(`Deleted: ${oldPath}`);
+        console.error(`Deleted: ${fullPath}`);
       }
     } catch (err) {
-      // Warn but continue - file may already be gone or have permission issues
       const msg = err instanceof Error ? err.message : String(err);
-      console.error(`Warning: could not delete ${oldPath}: ${msg}`);
+      console.error(`Warning: could not delete ${fullPath}: ${msg}`);
     }
   }
 }
+// ============================================================================
+// Transcript Metadata Extraction
+// ============================================================================
+import type { Transcript } from "../types.ts";
 /**
- * Check if any outputs are stale relative to source mtime.
+ * Extract the first user message from a transcript.
+ * Returns empty string if no user message found.
  */
-export async function hasStaleOutputs(
-  existingOutputs: string[],
-  expectedCount: number,
-  sourceMtime: number,
-): Promise<boolean> {
-  if (existingOutputs.length !== expectedCount) return true;
-  for (const outputPath of existingOutputs) {
-    try {
-      const outputStat = await stat(outputPath);
-      if (outputStat.mtime.getTime() < sourceMtime) {
-        return true;
-      }
-    } catch {
-      // Output doesn't exist
-      return true;
+export function extractFirstUserMessage(transcript: Transcript): string {
+  for (const msg of transcript.messages) {
+    if (msg.type === "user") {
+      return msg.content;
     }
   }
-  return false;
+  return "";
 }