npm - @arcreflex/agent-transcripts - Versions diffs - 0.1.9 → 0.1.11 - Mend

@arcreflex/agent-transcripts 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/CLAUDE.md +3 -1
package/README.md +71 -52
package/package.json +1 -1
package/scripts/infer-cc-types.prose +87 -0
package/src/adapters/claude-code.ts +291 -53
package/src/adapters/index.ts +0 -6
package/src/archive.ts +267 -0
package/src/cli.ts +96 -63
package/src/convert.ts +19 -86
package/src/parse.ts +0 -3
package/src/render-html.ts +38 -195
package/src/render-index.ts +15 -178
package/src/render.ts +25 -88
package/src/serve.ts +124 -215
package/src/title.ts +24 -102
package/src/types.ts +5 -0
package/src/utils/naming.ts +8 -13
package/src/utils/summary.ts +1 -4
package/src/utils/text.ts +5 -0
package/src/utils/theme.ts +152 -0
package/src/utils/tree.ts +85 -1
package/src/watch.ts +178 -0
package/test/archive.test.ts +264 -0
package/test/fixtures/claude/branching.input.jsonl +6 -0
package/test/fixtures/claude/branching.output.md +25 -0
package/test/naming.test.ts +98 -0
package/test/summary.test.ts +144 -0
package/test/tree.test.ts +217 -0
package/tsconfig.json +1 -1
package/src/cache.ts +0 -129
package/src/sync.ts +0 -294
package/src/utils/provenance.ts +0 -212

package/CLAUDE.md CHANGED Viewed

@@ -4,7 +4,9 @@
 ## Architectural Notes
-- **Source paths are stable**: Session source paths (e.g., `~/.claude/projects/.../sessions/`) are standardized by the tools that create them. Don't over-engineer for path changes—use source paths as cache keys directly.
+- **Source paths are stable**: Session source paths (e.g., `~/.claude/projects/.../sessions/`) are standardized by the tools that create them. Archive entries store the absolute source path for traceability.
+- **Archive is the central store**: All derived data (titles, etc.) lives on archive entries. Rendered HTML is in-memory only (LRU in serve). No persistent cache layer.
+- **Serve is snapshot-based**: `serve` loads the archive once at startup. It does not live-reload when `watch` archives new sessions. This is a known simplification — revisit if live-updating becomes important.
 ## Verification

package/README.md CHANGED Viewed

@@ -17,30 +17,35 @@ src/
   render.ts       # Intermediate format → markdown
   render-html.ts  # HTML transcript rendering
   render-index.ts # Index page rendering
-  convert.ts      # Full pipeline with provenance tracking
-  sync.ts         # Batch sync sessions → markdown
-  serve.ts        # HTTP server for dynamic transcript serving
-  cache.ts        # Content-hash-based caching (~/.cache/agent-transcripts/)
-  title.ts        # LLM title generation
+  convert.ts      # Direct pipeline (parse → render to stdout or directory)
+  archive.ts      # Persistent archive store (~/.local/share/agent-transcripts/archive/)
+  watch.ts        # Continuous archive updates via fs.watch + polling
+  serve.ts        # HTTP server serving from archive with in-memory LRU
+  title.ts        # LLM title generation (writes to archive entries)
   types.ts        # Core types (Transcript, Message, Adapter)
   adapters/       # Source format adapters (currently: claude-code)
   utils/
     naming.ts     # Deterministic output file naming
-    provenance.ts # Source tracking via transcripts.json + YAML front matter
     summary.ts    # Tool call summary extraction
     openrouter.ts # OpenRouter API client for title generation
     html.ts       # HTML escaping utility
-    tree.ts       # Tree navigation utilities
+    tree.ts       # Tree navigation and walkTranscriptTree generator
+    text.ts       # Shared text utilities (truncate)
+    theme.ts      # Shared CSS theme constants
 test/
   fixtures/       # Snapshot test inputs/outputs
   snapshots.test.ts
+  archive.test.ts
+  tree.test.ts
+  naming.test.ts
+  summary.test.ts
 ```
 ## Commands
 ```bash
 bun run check        # typecheck + prettier
-bun run test         # snapshot tests
+bun run test         # snapshot tests + archive tests
 bun run format       # auto-format
 ```
@@ -50,9 +55,23 @@ bun run format       # auto-format
 # Subcommands (convert is default if omitted)
 agent-transcripts convert <file>              # Parse and render to stdout
 agent-transcripts convert <file> -o <dir>     # Parse and render to directory
-agent-transcripts sync <dir> -o <out>         # Batch sync sessions
-agent-transcripts serve <dir>                 # Serve transcripts via HTTP
-agent-transcripts serve <dir> -p 8080         # Serve on custom port
+# Archive management
+agent-transcripts archive <source>            # Archive sessions from source dir
+agent-transcripts archive <source> --archive-dir ~/my-archive
+# Serving
+agent-transcripts serve                       # Serve from default archive
+agent-transcripts serve --archive-dir <dir>   # Serve from custom archive
+agent-transcripts serve -p 8080               # Custom port
+# Watching
+agent-transcripts watch <source>              # Keep archive updated continuously
+agent-transcripts watch <source> --poll-interval 60000
+# Title generation
+agent-transcripts title                       # Generate titles for archive entries
+agent-transcripts title -f                    # Force regenerate all titles
 # Use "-" for stdin
 cat session.jsonl | agent-transcripts -
@@ -60,67 +79,61 @@ cat session.jsonl | agent-transcripts -
 ## Architecture
-Two-stage pipeline: Parse (source → intermediate) → Render (intermediate → markdown).
+```
+Source (Claude Code sessions)
+    ↓ [archive / watch]
+Archive (~/.local/share/agent-transcripts/archive/{sessionId}.json)
+    ↓ [serve]
+HTML (rendered on demand, in-memory LRU)
+```
+`convert` is a standalone direct pipeline (no archive dependency).
+`serve` loads the archive once at startup — it won't pick up new sessions archived by a concurrent `watch` without a restart. Live-reloading could be added later (periodic re-listing or file-watch trigger) if needed.
 - Adapters handle source formats (see `src/adapters/index.ts` for registry)
 - Auto-detection: paths containing `.claude/` → claude-code adapter
 - Branching conversations preserved via `parentMessageRef` on messages
-- Provenance tracking via `transcripts.json` index + YAML front matter
 - Deterministic naming: `{datetime}-{sessionId}.md`
-- Sync uses sessions-index.json for discovery (claude-code), skipping subagent files
-- Sync uses content hash to skip unchanged sources (see Cache section)
-### Cache
+### Archive
-Derived content (rendered outputs, LLM-generated titles) is cached at `~/.cache/agent-transcripts/`:
+The archive is the central data store at `~/.local/share/agent-transcripts/archive/`:
 ```
-~/.cache/agent-transcripts/
-  {source-path-hash}.json  →  CacheEntry
+~/.local/share/agent-transcripts/archive/
+  {sessionId}.json  →  ArchiveEntry
 ```
 ```typescript
-interface CacheEntry {
-  contentHash: string; // hash of source content (invalidation key)
-  segments: Array<{
-    title?: string; // LLM-generated title
-    html?: string; // rendered HTML
-    md?: string; // rendered markdown
-  }>;
+interface ArchiveEntry {
+  sessionId: string;
+  sourcePath: string; // absolute source path
+  sourceHash: string; // content hash (invalidation key)
+  adapterName: string;
+  adapterVersion: string; // e.g. "claude-code:1"
+  schemaVersion: number;
+  archivedAt: string; // ISO timestamp
+  title?: string; // harness-provided or LLM-generated
+  transcripts: Transcript[];
 }
 ```
-Cache is keyed by source path (hashed), invalidated by content hash. When source content changes, all cached data is invalidated and regenerated.
-### transcripts.json
-The index file is a table of contents for the output directory:
-```typescript
-interface TranscriptsIndex {
-  version: 1;
-  entries: {
-    [outputFilename: string]: {
-      source: string; // absolute path to source
-      sessionId: string; // full session ID from filename
-      segmentIndex?: number; // for multi-transcript sources (1-indexed)
-      syncedAt: string; // ISO timestamp
-      firstUserMessage: string; // first user message content
-      title?: string; // copied from cache for convenience
-      messageCount: number;
-      startTime: string; // ISO timestamp
-      endTime: string; // ISO timestamp
-      cwd?: string; // working directory
-    };
-  };
-}
-```
+Freshness is determined by `sourceHash + adapterVersion + schemaVersion`. When any changes, the entry is re-archived.
 ## Key Types
 - `Transcript`: source info, warnings, messages array
 - `Message`: union of UserMessage | AssistantMessage | SystemMessage | ToolCallGroup | ErrorMessage
-- `Adapter`: name, discover function, parse function
+- `Adapter`: name, version, discover function, parse function
+### Titles
+Transcripts get titles from (in priority order):
+1. Harness-provided summary (e.g., Claude Code's sessions-index.json `summary` field)
+2. Existing title from previous archive entry
+3. LLM-generated title via OpenRouter (requires `OPENROUTER_API_KEY`)
 ## Adding an Adapter
@@ -128,8 +141,14 @@ interface TranscriptsIndex {
 2. Register in `src/adapters/index.ts` (adapters map + detection rules)
 3. Add test fixtures in `test/fixtures/<name>/`
+## Development Scripts
+- `scripts/infer-cc-types.prose`: open-prose program to infer types from real CC session data
 ## Tests
 Snapshot-based: `*.input.jsonl` → parse → render → compare against `*.output.md`
+Archive tests: real fixture files + temp dirs to verify archiving, freshness, listing.
 To update snapshots: manually edit the expected `.output.md` files.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@arcreflex/agent-transcripts",
-  "version": "0.1.9",
+  "version": "0.1.11",
   "description": "Transform AI coding agent session files into readable transcripts",
   "type": "module",
   "repository": {

package/scripts/infer-cc-types.prose ADDED Viewed

@@ -0,0 +1,87 @@
+# Infer TypeScript types from Claude Code session data
+#
+# Usage: prose run scripts/infer-cc-types.prose
+#
+# Examines real CC session directories and updates the type definitions
+# in src/adapters/claude-code.ts to match the current data format.
+# Agent that synthesizes TypeScript types from JSON examples
+agent type-inferrer:
+  model: sonnet
+  prompt: """
+    You analyze JSON data samples and synthesize TypeScript type definitions.
+    Your output should be:
+    - Clean, minimal TypeScript interfaces
+    - JSDoc comments explaining non-obvious fields
+    - Optional fields marked with ?
+    - Union types where the data shows multiple shapes
+    - `unknown` (never `any`) for truly dynamic data
+    When you see multiple examples, infer which fields are always present
+    (required) vs sometimes present (optional).
+  """
+# Discover CC session data locations (once)
+let discovery = session "Find CC session data"
+  model: sonnet
+  prompt: """
+    Find Claude Code session data on this system.
+    1. Search for sessions-index.json files under ~/.claude
+    2. For each, note the directory path and count of .jsonl files
+    Return a structured list of what you found.
+    If nothing found, say so clearly.
+  """
+# Iterate: infer types → implement → check → repeat if needed
+let feedback = "(no feedback)"
+loop until **feedback indicates success** (max: 5):
+  # Parallel type inference
+  parallel:
+    index_types = session: type-inferrer
+      prompt: """
+        Analyze the sessions-index.json files from the discovered locations.
+        Generate TypeScript interfaces for:
+        - SessionsIndex (the root object)
+        - SessionIndexEntry (each entry in the entries array)
+      """
+      context: { discovery, feedback }
+    record_types = session: type-inferrer
+      prompt: """
+        Analyze Claude Code session .jsonl files (sample 20-30 lines from a few files).
+        Generate TypeScript types describing JSONL records.
+        - ClaudeRecord (the JSONL line structure)
+        - ContentBlock (the message.content array elements)
+      """
+      context: { discovery, feedback }
+  # Implement and verify
+  feedback = session "Update adapter types"
+    model: opus
+    prompt: """
+      Update the type definitions in src/adapters/claude-code.ts to match these inferred types.
+      ## Inferred from sessions-index.json
+      {index_types}
+      ## Inferred from JSONL records
+      {record_types}
+      ## Task
+      1. Read the current types in src/adapters/claude-code.ts
+      2. Edit to match the inferred types (add new fields, fix types, etc.)
+      3. Run: bun run typecheck
+      If typecheck succeeds and you're happy with the types, output a message indicating success.
+      If typecheck fails, assess whether the failures reflect a real need to change the code or a flaw in the type design.
+        If yes, and you're happy with the types, make the changes and output a message indicating success.
+      Otherwise: output feedback requesting revisions to the type design.
+    """
+    context: { index_types, record_types }