npm - agentel - Versions diffs - 0.2.3 → 0.2.4 - Mend

agentel 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +1 -1
package/docs/history-source-handling.md +24 -24
package/docs/release.md +1 -1
package/package.json +1 -2
package/src/importers/providers.js +1 -1
package/src/importers.js +91 -17
package/src/search.js +51 -25
package/agentlog-spec.md +0 -558

package/README.md CHANGED Viewed

@@ -41,7 +41,7 @@ ref for repeatable installs:
 ```sh
 npm install -g brianlzhou/agentlog
 # or
-npm install -g brianlzhou/agentlog#v0.2.3
+npm install -g brianlzhou/agentlog#v0.2.4
 agentlog init
 ```

package/docs/history-source-handling.md CHANGED Viewed

@@ -183,30 +183,30 @@ package-prefixed scheme.
 | Source type | Version |
 | --- | --- |
-| `codex-cli-history` | `0.2.3.0` |
-| `codex-desktop-history` | `0.2.3.0` |
-| `cli-history` | `0.2.3.0` |
-| `claude-sdk-history` | `0.2.3.0` |
-| `claude-code-desktop-metadata` | `0.2.3.0` |
-| `claude-workspace-desktop` | `0.2.3.0` |
-| `cursor-workspace-sqlite` | `0.2.3.0` |
-| `cursor-global-sqlite` | `0.2.3.0` |
-| `cursor-raw-sqlite-salvage` | `0.2.3.0` |
-| `cursor-agent-transcripts` | `0.2.3.0` |
-| `devin-cli-history` | `0.2.3.0` |
-| `gemini-cli-history` | `0.2.3.0` |
-| `cline-task-history` | `0.2.3.0` |
-| `opencode-history` | `0.2.3.0` |
-| `opencode-sqlite-history` | `0.2.3.0` |
-| `aider-chat-history` | `0.2.3.0` |
-| `antigravity-history` | `0.2.3.0` |
-| `antigravity-trajectory-summary` | `0.2.3.0` |
-| `windsurf-trajectory-export` | `0.2.3.0` |
-| `web-chat-export` | `0.2.3.0` |
-| `chatgpt-export` | `0.2.3.0` |
-| `claude-web-export` | `0.2.3.0` |
-| `claude-web-memory` | `0.2.3.0` |
-| `import` | `0.2.3.0` |
+| `codex-cli-history` | `0.2.4.0` |
+| `codex-desktop-history` | `0.2.4.0` |
+| `cli-history` | `0.2.4.0` |
+| `claude-sdk-history` | `0.2.4.0` |
+| `claude-code-desktop-metadata` | `0.2.4.0` |
+| `claude-workspace-desktop` | `0.2.4.0` |
+| `cursor-workspace-sqlite` | `0.2.4.0` |
+| `cursor-global-sqlite` | `0.2.4.0` |
+| `cursor-raw-sqlite-salvage` | `0.2.4.0` |
+| `cursor-agent-transcripts` | `0.2.4.0` |
+| `devin-cli-history` | `0.2.4.0` |
+| `gemini-cli-history` | `0.2.4.0` |
+| `cline-task-history` | `0.2.4.0` |
+| `opencode-history` | `0.2.4.0` |
+| `opencode-sqlite-history` | `0.2.4.0` |
+| `aider-chat-history` | `0.2.4.0` |
+| `antigravity-history` | `0.2.4.0` |
+| `antigravity-trajectory-summary` | `0.2.4.0` |
+| `windsurf-trajectory-export` | `0.2.4.0` |
+| `web-chat-export` | `0.2.4.0` |
+| `chatgpt-export` | `0.2.4.0` |
+| `claude-web-export` | `0.2.4.0` |
+| `claude-web-memory` | `0.2.4.0` |
+| `import` | `0.2.4.0` |
 `cursor-sqlite-history` and `antigravity-brain` are compatibility aliases for
 older labels. Fingerprints include the parser version prefix, so changing the

package/docs/release.md CHANGED Viewed

@@ -65,5 +65,5 @@ After tagging and pushing the release, sanity-check both public install forms:
 ```sh
 npm install -g agentel
-npm install -g brianlzhou/agentlog#v0.2.3
+npm install -g brianlzhou/agentlog#v0.2.4
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentel",
-  "version": "0.2.3",
+  "version": "0.2.4",
   "description": "Local-first archive and recall layer for agent coding sessions.",
   "type": "commonjs",
   "license": "MIT",
@@ -39,7 +39,6 @@
     "docs/code-reference.md",
     "docs/history-source-handling.md",
     "docs/release.md",
-    "agentlog-spec.md",
     "README.md",
     "LICENSE"
   ],

package/src/importers/providers.js CHANGED Viewed

@@ -79,7 +79,7 @@ const PROVIDER_ADAPTERS = [
     sourceType: "opencode-history",
     label: "OpenCode",
     run: ({ helpers, since, options, env }) =>
-      helpers.importStructuredProvider("opencode", helpers.readOpenCodeSessions(env, options), since, options, env)
+      helpers.importStructuredProvider("opencode", helpers.readOpenCodeSessions(env, { ...options, since }), since, options, env)
   },
   {
     source: "aider",

package/src/importers.js CHANGED Viewed

@@ -4149,6 +4149,7 @@ const CURSOR_RAW_ASSISTANT_MERGE_MIN_SCORE = 32;
 const CURSOR_RAW_ASSISTANT_MERGE_MIN_OVERLAP = 2;
 const CURSOR_ABSOLUTE_PATH_RE = /(?:file:\/\/)?\/(?:Users|home|Volumes|private|tmp|var)\/[^\s"'`<>{}|]+/g;
 const SQLITE_QUERY_TIMEOUT_MS = 30 * 1000;
+const OPENCODE_SQLITE_BATCH_SIZE = 100;
 function readCursorRawSqliteSalvageSessionsFromDb(dbPath, options = {}) {
   const files = cursorRawSqliteFilesForDb(dbPath);
@@ -6544,7 +6545,18 @@ function readOpenCodeSessions(env = process.env, options = {}) {
   const sessions = [];
   reportDiscoveryProgress(options, { current: 0, total: dbs.length, message: "reading OpenCode SQLite stores" });
   for (let index = 0; index < dbs.length; index++) {
-    const dbSessions = readOpenCodeSqliteSessionsFromDb(dbs[index]);
+    let dbSessions = [];
+    try {
+      dbSessions = readOpenCodeSqliteSessionsFromDb(dbs[index], options);
+    } catch (error) {
+      reportDiscoveryProgress(options, {
+        current: index + 1,
+        total: dbs.length,
+        message: `SQLite skipped: ${error.message}`,
+        path: dbs[index]
+      });
+      continue;
+    }
     sessions.push(...dbSessions);
     reportDiscoveryProgress(options, {
       current: index + 1,
@@ -6656,12 +6668,14 @@ function openCodeMessageSessionIds(root) {
     .sort((a, b) => a.localeCompare(b));
 }
-function readOpenCodeSqliteSessionsFromDb(dbPath) {
+function readOpenCodeSqliteSessionsFromDb(dbPath, options = {}) {
   if (!safeStat(dbPath)) return [];
   if (!sqliteTableExists(dbPath, "session") || !sqliteTableExists(dbPath, "message") || !sqliteTableExists(dbPath, "part")) return [];
-  const sessionRows = readOpenCodeSqliteSessionRows(dbPath);
-  const messageRows = readOpenCodeSqliteMessageRows(dbPath);
-  const partRows = readOpenCodeSqlitePartRows(dbPath);
+  const sessionRows = readOpenCodeSqliteSessionRows(dbPath, options);
+  if (!sessionRows.length) return [];
+  const sessionIds = sessionRows.map((row) => row.id).filter(Boolean);
+  const messageRows = sortOpenCodeSqliteRows(readOpenCodeSqliteMessageRows(dbPath, sessionIds), ["session_id", "time_created", "id"]);
+  const partRows = sortOpenCodeSqliteRows(readOpenCodeSqlitePartRows(dbPath, sessionIds), ["session_id", "message_id", "time_created", "id"]);
   const messagesBySession = groupRowsBy(messageRows, "session_id");
   const partsByMessage = groupRowsBy(partRows, "message_id");
   const storageRoot = path.join(path.dirname(dbPath), "storage");
@@ -6709,11 +6723,12 @@ function readOpenCodeSqliteSessionsFromDb(dbPath) {
   return sessions;
 }
-function readOpenCodeSqliteSessionRows(dbPath) {
+function readOpenCodeSqliteSessionRows(dbPath, options = {}) {
   const sessionColumns = sqliteTableColumns(dbPath, "session");
   if (!sessionColumns.has("id")) return [];
   const projectColumns = sqliteTableExists(dbPath, "project") ? sqliteTableColumns(dbPath, "project") : new Set();
   const canJoinProject = sessionColumns.has("project_id") && projectColumns.has("id");
+  const timestampExpr = openCodeSqliteSessionTimestampExpr(sessionColumns);
   const selects = [
     "s.id",
     sqliteSelectMaybe(sessionColumns, "s", "project_id"),
@@ -6735,7 +6750,11 @@ function readOpenCodeSqliteSessionRows(dbPath) {
   ];
   const queryParts = [`select ${selects.join(", ")}`, "from session s"];
   if (canJoinProject) queryParts.push("left join project p on p.id = s.project_id");
-  if (sessionColumns.has("time_archived")) queryParts.push("where coalesce(s.time_archived, 0) = 0");
+  const where = [];
+  if (sessionColumns.has("time_archived")) where.push("coalesce(s.time_archived, 0) = 0");
+  const sinceCondition = openCodeSqliteSinceCondition(timestampExpr, options.since);
+  if (sinceCondition) where.push(sinceCondition);
+  if (where.length) queryParts.push(`where ${where.join(" and ")}`);
   const orderColumns = [];
   if (sessionColumns.has("time_updated")) orderColumns.push("s.time_updated desc");
   if (sessionColumns.has("time_created")) orderColumns.push("s.time_created desc");
@@ -6744,7 +6763,23 @@ function readOpenCodeSqliteSessionRows(dbPath) {
   return readSqliteJson(dbPath, queryParts.join(" "), "OpenCode SQLite sessions");
 }
-function readOpenCodeSqliteMessageRows(dbPath) {
+function openCodeSqliteSessionTimestampExpr(sessionColumns) {
+  const candidates = ["time_updated", "time_created"].filter((column) => sessionColumns.has(column)).map((column) => `s.${column}`);
+  if (!candidates.length) return "";
+  return candidates.length === 1 ? candidates[0] : `coalesce(${candidates.join(", ")})`;
+}
+function openCodeSqliteSinceCondition(timestampExpr, since) {
+  if (!timestampExpr || !since) return "";
+  const sinceTime = since instanceof Date ? since.getTime() : Date.parse(since);
+  if (!Number.isFinite(sinceTime)) return "";
+  const sinceMs = Math.floor(sinceTime);
+  const sinceSeconds = Math.floor(sinceTime / 1000);
+  const sinceIso = new Date(sinceTime).toISOString();
+  return `((${timestampExpr} is not null) and ((abs(${timestampExpr}) > 1000000000000 and ${timestampExpr} >= ${sinceMs}) or (abs(${timestampExpr}) <= 1000000000000 and ${timestampExpr} >= ${sinceSeconds}) or (typeof(${timestampExpr}) = 'text' and datetime(${timestampExpr}) >= datetime(${sqlQuote(sinceIso)}))))`;
+}
+function readOpenCodeSqliteMessageRows(dbPath, sessionIds = []) {
   const columns = sqliteTableColumns(dbPath, "message");
   if (!columns.has("id") || !columns.has("session_id")) return [];
   const selects = [
@@ -6754,13 +6789,16 @@ function readOpenCodeSqliteMessageRows(dbPath) {
     sqliteSelectMaybe(columns, "message", "time_updated"),
     sqliteSelectMaybe(columns, "message", "data")
   ];
-  const orderColumns = ["session_id"];
-  if (columns.has("time_created")) orderColumns.push("time_created");
-  orderColumns.push("id");
-  return readSqliteJson(dbPath, `select ${selects.join(", ")} from message order by ${orderColumns.join(", ")}`, "OpenCode SQLite messages");
+  return readOpenCodeSqliteRowsForSessionIds(
+    dbPath,
+    "message",
+    selects,
+    sessionIds,
+    "OpenCode SQLite messages"
+  );
 }
-function readOpenCodeSqlitePartRows(dbPath) {
+function readOpenCodeSqlitePartRows(dbPath, sessionIds = []) {
   const columns = sqliteTableColumns(dbPath, "part");
   if (!columns.has("id") || !columns.has("message_id") || !columns.has("session_id")) return [];
   const selects = [
@@ -6771,10 +6809,46 @@ function readOpenCodeSqlitePartRows(dbPath) {
     sqliteSelectMaybe(columns, "part", "time_updated"),
     sqliteSelectMaybe(columns, "part", "data")
   ];
-  const orderColumns = ["session_id", "message_id"];
-  if (columns.has("time_created")) orderColumns.push("time_created");
-  orderColumns.push("id");
-  return readSqliteJson(dbPath, `select ${selects.join(", ")} from part order by ${orderColumns.join(", ")}`, "OpenCode SQLite parts");
+  return readOpenCodeSqliteRowsForSessionIds(
+    dbPath,
+    "part",
+    selects,
+    sessionIds,
+    "OpenCode SQLite parts"
+  );
+}
+function readOpenCodeSqliteRowsForSessionIds(dbPath, tableName, selects, sessionIds, label) {
+  const ids = [...new Set((sessionIds || []).map((id) => String(id || "")).filter(Boolean))];
+  if (!ids.length) return [];
+  const rows = [];
+  for (let index = 0; index < ids.length; index += OPENCODE_SQLITE_BATCH_SIZE) {
+    const batch = ids.slice(index, index + OPENCODE_SQLITE_BATCH_SIZE);
+    const query = [
+      `select ${selects.join(", ")}`,
+      `from ${tableName}`,
+      `where session_id in (${batch.map(sqlQuote).join(",")})`
+    ].join(" ");
+    rows.push(...readSqliteJson(dbPath, query, label));
+  }
+  return rows;
+}
+function sortOpenCodeSqliteRows(rows, keys) {
+  return rows.sort((left, right) => {
+    for (const key of keys) {
+      const result = compareOpenCodeSqliteValues(left?.[key], right?.[key]);
+      if (result) return result;
+    }
+    return 0;
+  });
+}
+function compareOpenCodeSqliteValues(left, right) {
+  const leftNumber = Number(left);
+  const rightNumber = Number(right);
+  if (Number.isFinite(leftNumber) && Number.isFinite(rightNumber) && leftNumber !== rightNumber) return leftNumber - rightNumber;
+  return String(left || "").localeCompare(String(right || ""));
 }
 function openCodeSqliteMessagesFromRow(row, partRows, index) {

package/src/search.js CHANGED Viewed

@@ -17,6 +17,8 @@ const SQLITE_BUILD_BATCH_SIZE = 100;
 const RIPGREP_SEARCH_TIMEOUT_MS = 8000;
 const RIPGREP_BATCH_FILE_COUNT = 200;
 const MARKDOWN_MATCHES_PER_FILE = 3;
+const FTS_SEARCH_BATCH_SIZE = 250;
+const FTS_MAX_SCAN_ROWS = 5000;
 const _indexCache = {
   path: "",
   mtimeMs: 0,
@@ -671,33 +673,29 @@ function searchFtsSessions(query, queryTokens, context, env = process.env) {
   if (!ftsIndexAvailable(env, { noStaleCheck: Boolean(context.options.noRebuild || context.options.allowStaleFts) })) return null;
   const matchQuery = ftsMatchQuery(query);
   if (!matchQuery) return [];
-  const candidateLimit = Math.max(context.limit * 8, 80);
-  const rows = sqliteJson(
-    ftsPath,
-    [
-      "SELECT",
-      "  d.doc_id, d.session_id, d.provider, d.source_type, d.repo_canonical, d.repo_display,",
-      "  d.scope_canonical, d.cwd, d.title, d.started_at, d.occurred_at, d.role,",
-      "  d.event_id, d.event_kind, d.message_index, d.path, d.matched_text,",
-      "  snippet(docs_fts, 0, '', '', '...', 32) AS excerpt,",
-      "  bm25(docs_fts) AS rank",
-      "FROM docs_fts",
-      "JOIN docs d ON d.rowid = docs_fts.rowid",
-      `WHERE docs_fts MATCH ${sqliteString(matchQuery)}`,
-      "ORDER BY rank ASC, d.occurred_at DESC, d.started_at DESC",
-      `LIMIT ${candidateLimit};`
-    ].join("\n")
-  );
-  if (!rows) return null;
   const bySession = new Map();
-  for (const row of rows) {
-    const doc = ftsRowToDoc(row);
-    if (!matchesSessionFilter(doc, { ...context.filter, includeWebChats: context.includeWebChats, since: context.since })) continue;
-    if (!context.options.repo && context.repo && doc.repoCanonical === context.repo) {
-      row.rank = Number(row.rank || 0) - 0.05;
+  const batchSize = Math.max(context.limit * 20, FTS_SEARCH_BATCH_SIZE);
+  const maxScanRows = Math.max(batchSize, Math.min(FTS_MAX_SCAN_ROWS, context.limit * 500));
+  let offset = 0;
+  while (offset < maxScanRows && bySession.size < context.limit) {
+    const rows = ftsSearchRows(ftsPath, matchQuery, {
+      limit: Math.min(batchSize, maxScanRows - offset),
+      offset,
+      context
+    });
+    if (!rows) return null;
+    if (!rows.length) break;
+    offset += rows.length;
+    for (const row of rows) {
+      const doc = ftsRowToDoc(row);
+      if (!matchesSessionFilter(doc, { ...context.filter, includeWebChats: context.includeWebChats, since: context.since })) continue;
+      if (!context.options.repo && context.repo && doc.repoCanonical === context.repo) {
+        row.rank = Number(row.rank || 0) - 0.05;
+      }
+      if (!bySession.has(doc.sessionId)) bySession.set(doc.sessionId, { doc, row });
+      if (bySession.size >= context.limit) break;
     }
-    if (!bySession.has(doc.sessionId)) bySession.set(doc.sessionId, { doc, row });
-    if (bySession.size >= context.limit) break;
+    if (rows.length < batchSize) break;
   }
   return [...bySession.values()].slice(0, context.limit).map(({ doc, row }) => ({
     session_id: doc.sessionId,
@@ -720,6 +718,34 @@ function searchFtsSessions(query, queryTokens, context, env = process.env) {
   }));
 }
+function ftsSearchRows(ftsPath, matchQuery, options) {
+  const clauses = [`docs_fts MATCH ${sqliteString(matchQuery)}`];
+  const filter = options.context?.filter || {};
+  if (filter.provider) clauses.push(`d.provider = ${sqliteString(filter.provider)}`);
+  const sourceTypes = filter.sourceTypes?.length ? filter.sourceTypes : filter.sourceType ? [filter.sourceType] : [];
+  if (sourceTypes.length) clauses.push(`d.source_type IN (${sourceTypes.map(sqliteString).join(", ")})`);
+  if (options.context?.since) {
+    const since = sqliteString(options.context.since.toISOString());
+    clauses.push(`(d.started_at >= ${since} OR (d.started_at = '' AND d.occurred_at >= ${since}))`);
+  }
+  return sqliteJson(
+    ftsPath,
+    [
+      "SELECT",
+      "  d.doc_id, d.session_id, d.provider, d.source_type, d.repo_canonical, d.repo_display,",
+      "  d.scope_canonical, d.cwd, d.title, d.started_at, d.occurred_at, d.role,",
+      "  d.event_id, d.event_kind, d.message_index, d.path, d.matched_text,",
+      "  snippet(docs_fts, 0, '', '', '...', 32) AS excerpt,",
+      "  bm25(docs_fts) AS rank",
+      "FROM docs_fts",
+      "JOIN docs d ON d.rowid = docs_fts.rowid",
+      `WHERE ${clauses.join(" AND ")}`,
+      "ORDER BY rank ASC, d.occurred_at DESC, d.started_at DESC",
+      `LIMIT ${Math.max(1, Number(options.limit) || FTS_SEARCH_BATCH_SIZE)} OFFSET ${Math.max(0, Number(options.offset) || 0)};`
+    ].join("\n")
+  );
+}
 function ftsRowToDoc(row) {
   return {
     id: row.doc_id || "",

package/agentlog-spec.md DELETED Viewed

@@ -1,558 +0,0 @@
-# agentlog — spec v0.2
-A weekend-buildable archive and recall layer for agent coding sessions across Codex, ChatGPT exports, Claude, Gemini, Antigravity, Devin, Cursor, and Windsurf trajectory exports. Local-first, optionally cloud-backed via any S3-compatible storage. Web chats from ChatGPT and Claude.ai are importable via their official export flows. Windsurf's encrypted local Cascade cache remains disabled, but downloaded trajectory Markdown is importable.
-## What it does
-Agentlog optimizes for four product constraints:
-1. **Preserve the source history as-is.** Import copies raw source files into the archive before normalizing them, so agentlog can re-parse sessions as provider formats change.
-2. **Create a durable recall substrate.** Readable markdown plus canonical event JSONL is the source of truth for `/recall` skills, MCP tools, coding agents, and standardization across providers.
-3. **Show full histories clearly.** The CLI and local web viewer must make complete conversation histories easy to browse, search, export, and inspect without hiding tool calls or context.
-4. **Sync across machines.** Local storage remains canonical, but every archive object should be syncable to S3-compatible storage or another cloud target for backup, restore, and multi-device recall.
-Three layers, separable, in priority order:
-1. **Archive** — every agent session is captured as readable markdown plus raw transcripts, redacted at ingest, written to S3-compatible storage keyed by canonical repo identity.
-2. **Recall** — an MCP server exposing one tool, `search_past_sessions(query, repo, limit)`, that searches canonical event JSONL first and falls back to markdown/transcript retrieval. Available to any MCP-capable agent and wrapped by installable agent commands/skills.
-3. **Notify** — optional `/buzz`-style Slack skill that posts session summaries on completion. Cut from v0; ships separately as `agentlog-slack`.
-## Architecture
-```
-┌─────────────────────────────────────────────────┐
-│  Agents (Claude Code, Codex, Devin, Cursor)     │
-└──────────────┬──────────────────────────────────┘
-               │
-        ┌──────┴───────┐
-        │              │
-   OTel push       File tail / poll
-   (Claude Code)   (Codex JSONL,
-   (Cowork)        Cursor SQLite)
-        │              │
-        └──────┬───────┘
-               ▼
-   ┌───────────────────────┐         ┌──────────────────┐
-   │  agentlog supervisor  │◄────────│  Web chat import │
-   │  ├─ collector         │         │  (Claude.ai,     │
-   │  ├─ openobserve       │         │   ChatGPT export │
-   │  ├─ codex-watcher     │         │   files)         │
-   │  ├─ cursor-poller     │         └──────────────────┘
-   │  ├─ indexer           │
-   │  └─ importer          │
-   └──────────┬────────────┘
-              ▼
-   ┌───────────────────────┐
-   │  S3-compatible bucket │
-   │  (R2 / S3 / B2 /      │
-   │   MinIO / etc)        │
-   └──────────┬────────────┘
-              ▼
-   ┌───────────────────────┐
-   │  agentlog-recall MCP  │      ┌─────────────────────┐
-   │  (stdio, on-demand)   │      │  agentlog history   │
-   │  search_past_sessions │      │  (cchv-based)       │
-   └───────────────────────┘      └─────────────────────┘
-```
-## Process model
-**One supervisor, several workers.** The supervisor is the only process the user thinks about. It manages child processes, handles restarts with backoff, exposes a control socket at `~/.agentlog/control.sock`, and unifies logging.
-**Always-on workers (run inside the supervisor):**
-- OTel collector — receives Claude Code's OTLP pushes, ~40MB RAM
-- OpenObserve — local-mode storage and query, ~100MB RAM (skipped in remote/team mode)
-- Codex watcher — `fsnotify` on `~/.codex/sessions/`, ~10MB RAM, idle when no Codex activity
-- Devin/Cursor pollers — SQLite/transcript scans for configured local sources
-- Indexer — runs every 10 minutes if there are unindexed sessions; pauses on battery
-- Importer — runs at low priority during backfill operations; idle otherwise
-**On-demand workers (launched by their caller):**
-- `agentlog-recall` MCP server — spawned by the agent client over stdio when a session starts; killed when the session ends. No always-on cost.
-**Total always-on footprint, solo install:** ~150MB RAM, near-zero idle CPU. Comparable to a menu-bar chat app.
-**Team mode inverts this:** developer machines run only the collector and watchers (~50MB total) and forward OTLP to a team server that runs OpenObserve, the indexer, and the recall HTTP endpoint.
-## Auto-start at login
-`init` offers to install one platform-native login item for the local watcher. Default: yes, with explicit opt-out.
-**Prompt during init:**
-```
-Background Watcher
-The supervisor is agentlog's local watcher: it imports new history,
-refreshes indexes, and runs scheduled cloud sync.
-Leave this checked to install one login item. Uncheck it for manual-only mode.
-  1  [x] Start watcher at login
-      installs one user-level login item; unchecked means no continuous
-      watching until you run agentlog watcher start
-```
-**Per-platform implementation:**
-- **macOS:** `~/Library/LaunchAgents/com.agentlog.supervisor.plist`. `RunAtLoad: true`, `KeepAlive: { SuccessfulExit: false }` (restart on crash, not on clean shutdown). Logs to `~/Library/Logs/agentlog/`. Loaded with `launchctl load -w`.
-- **Linux:** systemd user unit at `~/.config/systemd/user/agentlog.service`. `Type=simple`, `Restart=on-failure`, `RestartSec=5`. Enabled with `systemctl --user enable --now`. Init detects whether `loginctl enable-linger` is needed and prompts separately: "Keep agentlog running when you're logged out? [y/N]" — default no.
-- **Windows:** Scheduled Task triggered at logon (`schtasks /create /tn "agentlog" /tr ... /sc onlogon`). Service-based install is a v1 enhancement.
-**Critical detail:** the launch agent runs `agentlog watcher start --foreground`, not `agentlog watcher start`. Foreground mode keeps the OS supervisor (launchd/systemd/Task Scheduler) as the parent — detaching breaks process tracking and crash restart.
-**Lifecycle commands:**
-```
-agentlog watcher login enable     # writes the launch agent/unit/task
-agentlog watcher login disable    # removes auto-start, keeps agentlog installed
-agentlog watcher login status     # shows current state
-agentlog uninstall [--keep-data]
-```
-`uninstall` is exhaustively tested: removes the launch agent, the config, the binaries, optionally the data (with confirmation). For a tool handling sensitive data, "remove all traces" must actually work.
-## Resource awareness
-The supervisor respects laptop realities:
-- **Power state.** On battery, indexer interval drops from 10 to 30 minutes; compaction skipped; speculative pre-fetching disabled.
-- **Network state.** If storage backend is remote and we're offline, writes spool to `~/.agentlog/spool/` and flush on reconnect. The OTLP collector already does this for spans; the same pattern extends to S3 writes.
-- **Sleep/wake.** On wake, supervisor verifies child health and restarts any that died during sleep.
-- **Cursor presence.** Cursor poller checks `pgrep -x Cursor` before each poll cycle; sleeps entirely when Cursor is not running.
-These are v0 features, not v1, because they're the difference between a tool people keep installed and a tool people uninstall after a week.
-## Storage layer
-**Backend: anything S3-compatible.** Backend choice is a config matter, not a code matter. Supported in `init`:
-- **Local** (default first-run) — `~/.agentlog/data/`, no cloud account
-- **R2** (recommended for personal/small team) — Cloudflare, no egress fees, free 10GB tier
-- **S3** (recommended for AWS-native teams)
-- **Custom endpoint** — covers B2, MinIO, Wasabi, Tigris, Hetzner, etc.
-**Bucket layout:**
-```
-s3://<bucket>/agentlog/
-  devices/
-    <device-name>/
-      sessions/
-        repo=<canonical-repo-key>/
-          provider=<claude_code|codex|cursor|devin>/
-          year=2026/month=04/day=26/
-            session=<session_id>.conversation.md
-            session=<session_id>.transcript.jsonl
-            session=<session_id>.metadata.json
-            session=<session_id>.events.jsonl
-        scope=<claude-web|chatgpt>/
-          year=2026/month=04/day=26/
-            session=<session_id>.conversation.md
-            session=<session_id>.transcript.jsonl
-            session=<session_id>.metadata.json
-            session=<session_id>.events.jsonl
-      indexes/
-        bm25/...       # local keyword/BM25-style index over events/transcripts
-  snapshots/
-    20260504T173000Z/
-      <device-name>/
-        sessions/...
-```
-Markdown conversations are the primary human-readable representation because
-agents and humans can inspect them with ordinary filesystem tools. Raw
-transcripts are stored alongside as immutable JSONL for provenance and
-re-indexing. `events.jsonl` is the canonical machine-readable recall substrate:
-one provider-independent JSONL event stream with `session.started`,
-`prompt.submitted`, `response.generated`, `tool.called`, and `tool.completed`.
-Structured analytics artifacts such as Parquet/OTel spans are optional siblings,
-not the default recall substrate.
-Every importer has a centralized semantic parser version in
-`src/parser-versions.js`. Parser versions are included in archive metadata and
-import fingerprints. The first npm release uses `1.0.0` as the baseline for
-every source type. After release, when parser output changes for the same raw
-input, the source-type version must be bumped in the same change so stale
-archives can be replaced.
-**Migration:** `agentlog sync configure` records the remote target through an interactive picker in terminals, while non-interactive scripts can still pass `--target`, `--endpoint`, and credentials. Choosing an existing remote opens useful next actions instead of only echoing the current config. `agentlog sync` uploads the same markdown-primary object layout to any S3-compatible target under `agentlog/devices/<device-name>/...`; terminal runs pick a remote, preview the upload-only plan, and confirm before writing. Local→R2 is a one-shot upload and then an incremental supervisor upload. Normal sync does not delete remote objects. `agentlog sync replace` is the explicit repair path: it previews the selected remote, requires typed confirmation, deletes only the current device namespace, and then uploads the current local archive. `agentlog sync wipe` is delete-only, asks for remote and scope, previews the chosen target and prefix, requires typed confirmation, and is followed by `agentlog sync` when the user wants to rebuild a remote copy from local state. Wipe scopes include the current device namespace, one snapshot, all snapshots, the configured prefix, and the bucket/root. Receive-only and two-way sync should read other device namespaces and merge normalized archive metadata without interpreting absence on one device as a delete. `agentlog sync snapshot` lists existing snapshots, asks for a name, confirms, and writes redundant point-in-time copies under `agentlog/snapshots/<timestamp>/<device-name>/...`.
-## Repo keying
-Every span and record gets `agentlog.repo.canonical`, derived in this order:
-1. `git config --get remote.origin.url` from the session's `cwd`, normalized: lowercase host, strip protocol, strip `.git`, strip trailing slash. `git@github.com:User/Repo.git` → `github.com/user/repo`.
-2. First-commit SHA fallback: `git rev-list --max-parents=0 HEAD` → `firstcommit:<sha>`.
-3. Non-git fallback: content hash of cwd path normalized to home-relative → `path:<sha256>`.
-Repo-level override at `.agentlog.yaml`:
-```yaml
-canonical_repo: github.com/myorg/private-name
-aliases:
-  - github.com/myorg/old-name
-```
-Web chat imports use `agentlog.scope.canonical` instead (e.g. `claude-web`, `chatgpt`) — see Web chat import section.
-## Redaction (at ingest, not at query)
-Three layers in the collector before anything hits storage:
-1. **Built-in patterns** (always on):
-   - AWS keys, OpenAI/Anthropic keys, GitHub tokens, Slack tokens
-   - JWT-shaped strings, private key blocks
-   - High-entropy strings >32 chars in `KEY=value` shapes
-2. **Env-var value scrubbing.** If `.env` files are read in a session, configured variable values are scrubbed wherever they appear in transcripts.
-3. **User-defined patterns** in `~/.agentlog/redaction.yaml`:
-```yaml
-patterns:
-  - name: internal_api
-    regex: 'https://[a-z]+\.internal\.acme\.com/[^\s]+'
-env_vars: [API_KEY, DATABASE_URL, STRIPE_SECRET]
-allowlist_repos:
-  - github.com/acme/public-docs
-```
-Each session gets a `redaction_summary` span: counts by category, no content. Users can audit "did this leak anything" without seeing what leaked.
-`agentlog show <session-id> --unredacted` re-renders un-redacted from local cache. Local-only — never works on remote/team archives.
-**Honesty about limits:** pattern-based redaction catches credentials. It does not catch personal/sensitive content (medical, legal, financial conversations). This matters especially for web chat imports, which is why those default to local-only storage.
-## Per-provider capture
-### Claude Code & Claude Cowork
-Native OTel. `init` merges into `~/.claude/settings.json`:
-```json
-{
-  "env": {
-    "CLAUDE_CODE_ENABLE_TELEMETRY": "1",
-    "OTEL_METRICS_EXPORTER": "otlp",
-    "OTEL_LOGS_EXPORTER": "otlp",
-    "OTEL_EXPORTER_OTLP_PROTOCOL": "http/json",
-    "OTEL_EXPORTER_OTLP_ENDPOINT": "http://localhost:4318",
-    "OTEL_LOG_USER_PROMPTS": "1",
-    "OTEL_RESOURCE_ATTRIBUTES": "service.name=claude-code,agentlog.user=<user>"
-  }
-}
-```
-### Codex CLI
-`fsnotify` watcher on `~/.codex/sessions/YYYY/MM/DD/`. Decompresses `.jsonl.zst`, normalizes to OTel `gen_ai.*` spans, posts to local collector. State (file offsets) in `~/.agentlog/state/codex-cursor.db` SQLite. ~200 lines of Go. Deleted when OpenAI ships native OTel.
-### Cursor
-SQLite/transcript poller, 30-second interval, only active when `Cursor` process detected. Reads older `~/Library/Application Support/Cursor/User/workspaceStorage/*/state.vscdb` stores and newer `~/.cursor/projects/<project>/agent-transcripts/` JSON/JSONL transcripts on macOS/Linux. macOS Full Disk Access prompt documented with screenshots.
-### Devin
-SQLite importer for Devin for Terminal. Reads
-`~/.local/share/devin/cli/sessions.db`, reconstructs the visible message branch
-from `sessions.main_chain_id` plus `message_nodes.parent_node_id`, skips Devin's
-injected context messages, and archives user, assistant, and tool messages under
-provider `devin`. `AGENTLOG_DEVIN_SESSIONS_DB` can point at an alternate
-database.
-### Web chat import (Claude.ai, ChatGPT)
-Web chats don't have a real-time hook — Anthropic and OpenAI provide periodic export files only. agentlog imports these as one-shot operations per export.
-**Flow:**
-1. User exports from Claude.ai (Settings → Privacy → Export) or ChatGPT (Settings → Data Controls → Export)
-2. Email arrives with download link
-3. User runs: `agentlog import claude-web --file <downloaded file>` or `agentlog import chatgpt --file <downloaded file>`
-**Storage scope:** local-only by default, **even if the agentlog instance is configured for a team backend.** Web chats often contain personal content; opt-in required to share with team. Override with `--scope team`.
-**Repo keying:** web chats are stored under `scope=claude-web` or `scope=chatgpt` rather than a repo key. Heuristic repo inference (matching code blocks and error messages against known repos) deferred to v1.
-**Recall behavior:** excluded from agent-initiated `search_past_sessions` calls by default. Included in human-initiated `agentlog history` searches. Override via `--include-web-chats` flag on recall queries.
-**Frequency:** designed for periodic re-import, not continuous capture. Realistic cadence is "monthly or when the user remembers." Detection of new exports in `~/Downloads/` and prompting is a v1 enhancement.
-**Confirmation prompt for team-configured installs:**
-```
-$ agentlog import claude-web --file ...
-Storage backend: team (s3://acme-agentlog/)
-Web chats often contain personal content. By default they'll be
-stored only in your local archive, not the team archive.
-  1) Local only (recommended)
-  2) Team archive (your conversations will be visible to teammates)
-  3) Cancel
-```
-## Importing existing CLI history
-`init` scans for existing CLI conversations and offers to import them. The default scope is "last 30 days" — recent enough to be useful for recall, narrow enough to avoid surprises in archives users may have forgotten the contents of.
-**Discovery during init:**
-```
-Scanning for existing conversations...
-✓ Codex CLI: 89 sessions across 12 projects (oldest: 2025-09-15)
-✓ Codex Desktop: 14 sessions across 3 projects (oldest: 2026-02-01)
-✓ Claude Code CLI: 247 sessions across 18 projects (oldest: 2025-11-03)
-✓ Claude Code Desktop: 4 sessions (oldest: 2026-02-04)
-✓ Claude Workspace: 7 sessions (oldest: 2026-02-04)
-✓ Gemini CLI: 2 sessions (oldest: 2026-03-01)
-✓ Antigravity: 2 sessions (oldest: 2025-11-19)
-✓ Devin CLI: 3 sessions (oldest: 2026-04-28)
-✓ Cursor: 31 sessions across 4 workspaces (oldest: 2026-01-02)
-Import existing history?
-  1) Last 30 days (default)
-  2) Everything
-  3) Choose specific repos
-  4) Skip for now
-```
-Import runs as a background worker at lower priority than live ingestion. Progress visible via `agentlog import status`. Idempotent on re-run (tracks imported session IDs in state DB). Sessions whose `cwd` no longer exists fall back to path-hash repo keying.
-**Standalone command:**
-```
-agentlog import [--source codex-cli|codex-desktop|claude|claude-code-desktop|claude-workspace|gemini-cli|antigravity|devin-cli|cursor|all]
-                [--since 30d|all]
-                [--repos <list>]
-                [--dry-run]
-```
-`--dry-run` shows what would be imported without doing it.
-**Web chat import is a separate command** — it requires a file argument and has different default storage scope. Not part of the init flow because exports take time to generate.
-## Collector
-A single Go binary wrapping the upstream OTel collector with three custom processors:
-1. `repokeyprocessor` — derives canonical repo key from `cwd`, or scope key for web chats
-2. `redactionprocessor` — runs the three redaction layers
-3. `agentnormalizer` — for file-tail providers and import sources, converts ingested events into OTel spans matching `gen_ai.*` semantic conventions
-Exporters: OTLP→OpenObserve for spans/metrics/logs; direct `s3exporter` for raw transcripts. Both share S3 credentials.
-## Recall layer
-Separate binary, `agentlog-recall`. Spawned by agent clients over stdio (preferred) or run as HTTP server for team mode.
-**One MCP tool:**
-```
-search_past_sessions(query: string, repo?: string, limit?: int = 10,
-                     include_web_chats?: bool = false)
-  → list of message excerpts with session links
-```
-**Recall pipeline:**
-- Builds a local keyword/BM25-style index over `events.jsonl` when present
-- Indexes prompt, response, tool-call, and tool-result event text independently
-- Aggregates event hits back to sessions for CLI/MCP compatibility
-- Returns optional `event_id`, `event_kind`, `message_index`, and matched text
-- Falls back to transcript/markdown search for legacy archives without events
-**Retrieval:** event-first over canonical JSONL. `repo` parameter is a hard filter.
-Without it, results are weighted toward the calling agent's current `cwd` repo.
-Web chats are excluded unless `include_web_chats=true`.
-**No memory promotion in v0.** Raw evidence with good retrieval beats lossy summarization. Add summarization in v1 only if v0 retrieval proves insufficient.
-**Adding to agents:**
-```
-agentlog integrations add-to claude    # writes ~/.claude/mcp.json, /commands/recall.md, and /skills/agentlog-recall/SKILL.md
-agentlog integrations add-to cursor    # writes ~/.cursor/mcp.json
-agentlog integrations add-to codex     # writes ~/.codex/config.toml
-```
-Generated recall commands and skills should let the agent choose the first
-`agentlog history` query from the user's request. They should prefer concise,
-distinctive search terms over blindly passing the full `/recall` argument string
-through to the CLI. Skill-style files should include a concise command table,
-workflow, query-selection guidance, archive/filter hints, important rules, and
-troubleshooting. Archive hints should note that sessions live under
-`~/.agentlog/data/agentlog/sessions/repo=<repo-or-path-key>/provider=<provider>/...`,
-git repos use canonical keys like `github.com/org/repo`, non-git directories may
-use stable `path:<hash>` keys, and `agentlog history --repo "<repo-or-path>"`
-matches canonical repo keys, local `cwd`, display labels, web scopes, and path
-fragments.
-## History viewer
-v0 ships a dependency-free local viewer behind `agentlog web`. It lists sessions in a repo tree sorted by last updated time, pages large folders with a load-more control, searches the same event-first recall index, filters by repo/provider/date, and opens full conversations through the CLI API. The static viewer follows shadcn/ui-style tokens and compact button/input/select/sidebar patterns without requiring a frontend build step. Stable `path:<hash>` keys remain valid archive identifiers for folders without git identity, but the viewer displays the local folder path. The transcript pane defaults to readable chat bubbles for user, assistant, system, and tool messages, with a markdown toggle for the canonical archive file. Tool rendering reads canonical events or normalized metadata first, uses category/icon/target fields for consistent Bash/edit/read/search/web/task/skill/MCP cards, and uses raw text patterns only for legacy archives.
-**Commands:**
-```
-agentlog history                          # native app pointed at archive
-agentlog web                              # web UI on localhost:7824
-agentlog history "query" --provider codex-cli
-agentlog history --repo github.com/org/repo
-agentlog history --since 7d
-agentlog history --include-web-chats
-agentlog show <session-id>
-```
-The viewer's search hits the same retrieval endpoint as the recall MCP server — humans and agents see the same world (with the human-vs-agent default scope difference for web chats).
-**For headless/server contexts** (SSH'd into a dev box), `--web` mode serves the UI on a local port with a bearer-token-in-URL auth pattern.
-**Team mode** serves cchv as a web service at the same endpoint as the OTLP collector, gated by the same auth. v0 team mode: "everyone sees everything," documented as such. Per-user filtering and permissions in v1.
-## CLI surface
-Complete user-facing commands:
-```
-# Setup and lifecycle
-agentlog init [--storage local|r2|s3|custom] [--remote URL]
-agentlog watcher start [--foreground]
-agentlog watcher stop
-agentlog watcher logs [--follow]
-agentlog watcher login <enable|disable|status>
-agentlog status
-agentlog config <show|path|get|set|setup|sources> [args]
-agentlog sync configure
-agentlog sync [--endpoint <url>] [--bucket <name>] [--access-key-id <id>] [--secret-access-key <key>] [--prefix agentlog] [--yes|--dry-run]
-agentlog sync snapshot [--name <label>] [--yes|--dry-run]
-agentlog sync replace
-agentlog sync wipe [--scope device|snapshot|snapshots|prefix|bucket] [--snapshot-name <name>] [--yes|--dry-run]
-agentlog doctor [--json]
-agentlog uninstall [--keep-data]
-# Capture management
-agentlog show <session-id> --unredacted
-agentlog redact reapply
-agentlog index <pause|resume|status>
-# Import
-agentlog import [--source codex-cli|codex-desktop|claude|claude-code-desktop|claude-workspace|claude-sdk|gemini-cli|antigravity|devin-cli|cursor|all] [--since 30d|all] [--repos <list>] [--dry-run]
-agentlog import claude-web --file <path> [--scope local|team]
-agentlog import chatgpt --file <path> [--scope local|team]
-agentlog import status
-# Recall
-agentlog mcp serve
-agentlog integrations add-to <codex|claude|gemini|antigravity|cursor>
-agentlog integrations recall [target]
-agentlog index rebuild
-# Viewing
-agentlog history [query] [--repo <repo>] [--provider <provider>] [--since <duration>] [--include-web-chats]
-agentlog web [--port <port>] [--no-open]
-agentlog show <session-id> [--json|--path|--open]
-# Team mode
-agentlog mcp serve
-```
-## Setup flows
-### Solo, local only (~90 seconds)
-```
-brew install agentlog
-agentlog init                    # picks local storage, prompts for the login watcher
-                                 # → "Start watcher at login"
-                                 # → scans for existing history
-                                 # → "Import last 30 days? [Y/n]"
-                                 # → if checked, writes launch agent and starts supervisor
-                                 # → import runs in background
-```
-After `init` completes, prints:
-```
-✓ Launch agent installed at ~/Library/LaunchAgents/com.agentlog.supervisor.plist
-✓ Service started (PID 47291)
-✓ Collector listening on localhost:4318
-✓ Claude Code config updated at ~/.claude/settings.json
-✓ Background import started: 247 sessions queued (~25min)
-View your history: `agentlog history`
-Try it out: open Claude Code, have a quick conversation, then run
-`agentlog status` to see it captured.
-```
-### Solo, R2-backed (~5 minutes)
-```
-brew install agentlog
-agentlog init --storage r2
-# → opens dash.cloudflare.com/?to=/:account/r2/api-tokens
-# → user pastes credentials back into CLI
-# → agentlog creates bucket if needed, validates write
-# → same auto-start and import prompts as above
-```
-### Team (afternoon for operator, ~1 minute per developer)
-```
-# Operator, once:
-terraform apply              # ships agentlog/deploy-aws or deploy-cloudflare
-                             # outputs OTLP endpoint and bootstrap token
-# Each developer:
-agentlog init --remote https://agentlog.myteam.com --token <token>
-                             # auto-start prompt; import scoped to team policy;
-                             # no local OpenObserve (team server runs it)
-```
-Companion deployment modules: `agentlog/deploy-aws` (ECS + ALB + S3), `agentlog/deploy-cloudflare` (R2 + Workers for auth proxy), `agentlog/deploy-fly` (Fly.io single-region). The Terraform module is the unsexy linchpin for team adoption.
-## Privacy and data handling commitments
-Codified because they shape every other decision:
-1. **No phone-home telemetry.** agentlog itself ships zero usage analytics anywhere. Any future opt-in metrics live on a separate channel.
-2. **No filesystem scanning.** Only specific known paths: `~/.codex/`, `~/.claude/`, `~/.gemini/`, `~/.local/share/devin/cli/sessions.db`, Cursor's storage, `~/.agentlog/`, plus user-specified import file paths. Windsurf encrypted cache paths are excluded while Cascade transcripts remain encrypted; user-selected downloaded trajectory Markdown files are allowed.
-3. **No process inspection beyond `pgrep`.** We check whether Cursor is running. We don't introspect what it's doing.
-4. **Redaction at ingest, not query.** Pattern-matching credentials never land in storage in the first place.
-5. **Reveal is local-only.** Un-redacted content is never reconstructible from team/remote archives.
-6. **Web chats default to local-only.** Even on team-configured installs. Explicit override required to share.
-7. **Agent recall excludes web chats by default.** Human-initiated history viewing includes them.
-8. **Redaction limits are stated honestly.** Pattern-based redaction catches credentials, not sensitive personal content.
-9. **Uninstall removes everything.** Tested.
-## What's deferred
-- **Web UI beyond cchv** — v0.2 if cchv proves insufficient
-- **Slack notify skill** (`agentlog-slack`) — separate repo, consumes from archive; v0.2
-- **Other web chat sources** (Gemini, Perplexity, Grok) — v1, same pattern, different parsers
-- **Heuristic repo inference for web chats** — v1
-- **Auto-detection of new export files in `~/Downloads/`** — v1
-- **Summary generation / "revival packets"** — v1, only if raw retrieval proves insufficient
-- **Cross-machine session linking** — v1, depends on canonical repo keying landing solid
-- **SSO** — v1, enterprise tier
-- **Per-user permissions for team viewing** — v1
-- **Cursor extension for richer capture** — v1 if SQLite proves too lossy
-- **Windows Service install** — v1, currently Scheduled Task only
-- **Direct integration with Claude.ai/ChatGPT desktop app local stores** — probably never; respect the boundary
-## What this is not
-- Not a memory-curation product. Memorix and Hindsight occupy that space; agentlog can be their substrate.
-- Not an enterprise observability tool. SigNoz/Datadog/Honeycomb are better and accept the same OTel feeds.
-- Not a session-replay tool.
-- Not a Slack bot. Slack integration is downstream of the archive, intentionally.
-- Not a real-time capture tool for web chats. Those are import-only by design.
-## The differentiator
-A redaction-first, repo-keyed, S3-compatible archive substrate that the agent itself can query via MCP, with backfill of existing CLI history and import paths for web chats. Every piece exists separately. The value is in composing them under one CLI with a setup flow that doesn't take an afternoon and a privacy story that doesn't require trusting a vendor.
-If it's good, it disappears: the user forgets it's running, their agents quietly get smarter at their repos over time, and accumulated debugging knowledge stops evaporating between sessions.
-That's v0.2.