npm - agentboss - Versions diffs - 0.1.0 - Mend

agentboss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +34 -0
package/bin/aboss.js +288 -0
package/client/dist/assets/index-C1wFD_Vo.css +1 -0
package/client/dist/assets/index-DBj1Ujlx.js +137 -0
package/client/dist/index.html +34 -0
package/package.json +64 -0
package/server/analysis/daily-aggregator.js +258 -0
package/server/analysis/difficulty.js +129 -0
package/server/analysis/dimensions/ai-knowledge.js +172 -0
package/server/analysis/dimensions/ai-tools.js +161 -0
package/server/analysis/dimensions/judgement.js +107 -0
package/server/analysis/dimensions/llm-merge.js +57 -0
package/server/analysis/dimensions/output-quality.js +167 -0
package/server/analysis/dimensions/problem-definition.js +104 -0
package/server/analysis/dimensions/system-thinking.js +225 -0
package/server/analysis/evidence-builder.js +104 -0
package/server/analysis/job.js +273 -0
package/server/analysis/report-builder.js +581 -0
package/server/analysis/scoring-v2.js +72 -0
package/server/analysis/text-signals.js +179 -0
package/server/analysis/thresholds-v2.js +358 -0
package/server/api/advice.js +124 -0
package/server/api/analysis.js +141 -0
package/server/api/execution.js +330 -0
package/server/api/metrics.js +277 -0
package/server/api/overview.js +308 -0
package/server/api/project.js +255 -0
package/server/api/reports.js +125 -0
package/server/api/sessions.js +118 -0
package/server/api/settings.js +119 -0
package/server/db/connection.js +175 -0
package/server/db/queries.js +1051 -0
package/server/db/schema.js +487 -0
package/server/etl/active-time.js +150 -0
package/server/etl/backfill-subagents.js +178 -0
package/server/etl/claude-code.js +826 -0
package/server/etl/detect.js +341 -0
package/server/etl/judge-filter.js +117 -0
package/server/etl/opencode.js +606 -0
package/server/execution/job.js +662 -0
package/server/execution/prompt.js +227 -0
package/server/execution/runner.js +218 -0
package/server/index.js +94 -0
package/server/llm/advice-prompt.js +339 -0
package/server/llm/advice.js +384 -0
package/server/llm/analysis-prompt.js +162 -0
package/server/llm/cli-runner.js +249 -0
package/server/llm/judge-prompts.js +179 -0
package/server/llm/judge.js +118 -0
package/server/llm/project-advice-prompt.js +332 -0
package/server/llm/project-advice.js +491 -0
package/server/llm/session-analyzer.js +122 -0
package/server/utils/project.js +80 -0

package/server/etl/claude-code.js ADDED Viewed

@@ -0,0 +1,826 @@
+/**
+ * Claude Code ETL collector for Agent Boss
+ *
+ * Reads session, message, and tool-call data from Claude Code's JSONL/JSON
+ * files under ~/.claude/ and writes unified rows into boss.db.
+ *
+ * Source layout (§4.3):
+ *   ~/.claude/
+ *     projects/
+ *       <encoded-path>/           (e.g. "-Users-user-project")
+ *         sessions-index.json     (JSON array of session metadata)
+ *         <sessionId>.jsonl       (one JSON object per line)
+ *
+ * @author Felix
+ */
+const fs = require('fs');
+const path = require('path');
+const { saveDb } = require('../db/connection');
+const { isJudgePrompt } = require('./judge-filter');
+const {
+  upsertSession,
+  bulkInsertMessages,
+  bulkInsertParts,
+  bulkInsertToolCalls,
+  getEtlState,
+  updateEtlState,
+} = require('../db/queries');
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+const SOURCE = 'claude-code';
+const BATCH_SIZE = 50;
+// Rough per-token pricing (USD per million tokens)
+const MODEL_PRICING = {
+  // Sonnet-class models
+  sonnet: { input: 3, output: 15 },
+  // Opus-class models
+  opus: { input: 15, output: 75 },
+  // Haiku-class models
+  haiku: { input: 0.25, output: 1.25 },
+};
+// ---------------------------------------------------------------------------
+// Date / time helpers
+// ---------------------------------------------------------------------------
+/**
+ * Convert an ISO 8601 string to a YYYY-MM-DD date string (UTC).
+ * @param {string|null} iso
+ * @returns {string|null}
+ */
+function isoToDate(iso) {
+  if (!iso) return null;
+  try {
+    return new Date(iso).toISOString().slice(0, 10);
+  } catch (_) {
+    return null;
+  }
+}
+/**
+ * Compute the difference between two ISO timestamps in minutes.
+ * @param {string} startIso
+ * @param {string} endIso
+ * @returns {number}
+ */
+function diffMinutes(startIso, endIso) {
+  if (!startIso || !endIso) return 0;
+  try {
+    const ms = new Date(endIso).getTime() - new Date(startIso).getTime();
+    return Math.max(0, Math.round(ms / 60000));
+  } catch (_) {
+    return 0;
+  }
+}
+// ---------------------------------------------------------------------------
+// Cost estimation
+// ---------------------------------------------------------------------------
+/**
+ * Determine the pricing tier for a model ID string.
+ * @param {string|null} modelId
+ * @returns {{input: number, output: number}}
+ */
+function getPricing(modelId) {
+  if (!modelId) return MODEL_PRICING.sonnet;
+  const lower = modelId.toLowerCase();
+  if (lower.includes('opus')) return MODEL_PRICING.opus;
+  if (lower.includes('haiku')) return MODEL_PRICING.haiku;
+  return MODEL_PRICING.sonnet;
+}
+/**
+ * Estimate cost in USD from token counts and a model ID.
+ * @param {number} inputTokens
+ * @param {number} outputTokens
+ * @param {string|null} modelId
+ * @returns {number}
+ */
+function estimateCost(inputTokens, outputTokens, modelId) {
+  const pricing = getPricing(modelId);
+  return (
+    ((inputTokens || 0) * pricing.input) / 1_000_000 +
+    ((outputTokens || 0) * pricing.output) / 1_000_000
+  );
+}
+// ---------------------------------------------------------------------------
+// Encoded path helpers
+// ---------------------------------------------------------------------------
+/**
+ * Decode a Claude Code encoded directory name back to a filesystem path.
+ * e.g. "-Users-user-project" → "/Users/user/project"
+ * @param {string} encodedName
+ * @returns {string}
+ */
+function decodeProjectPath(encodedName) {
+  // Replace leading dash with "/" and all subsequent dashes with "/"
+  return encodedName.replace(/-/g, '/');
+}
+// ---------------------------------------------------------------------------
+// JSONL parsing
+// ---------------------------------------------------------------------------
+/**
+ * Read and parse a JSONL file, returning an array of parsed objects paired
+ * with their 1-based line number.  Malformed lines are skipped and logged.
+ *
+ * @param {string} filePath
+ * @param {(msg: string) => void} log
+ * @returns {{lineNumber: number, data: object}[]}
+ */
+function readJsonl(filePath, log) {
+  let content;
+  try {
+    content = fs.readFileSync(filePath, 'utf8');
+  } catch (err) {
+    log(`Cannot read JSONL file ${filePath}: ${err.message}`);
+    return [];
+  }
+  const lines = content.split('\n');
+  const results = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i].trim();
+    if (!line) continue;
+    try {
+      const data = JSON.parse(line);
+      results.push({ lineNumber: i + 1, data });
+    } catch (err) {
+      log(`JSONL parse error at ${filePath}:${i + 1}: ${err.message}`);
+    }
+  }
+  return results;
+}
+// ---------------------------------------------------------------------------
+// Mapping functions
+// ---------------------------------------------------------------------------
+/**
+ * Determine the most common model across parsed JSONL entries.
+ * @param {{lineNumber: number, data: object}[]} entries
+ * @returns {string|null}
+ */
+function findMostCommonModel(entries) {
+  const counts = {};
+  for (const { data } of entries) {
+    const model = data.message && data.message.model;
+    if (model) {
+      counts[model] = (counts[model] || 0) + 1;
+    }
+  }
+  let bestModel = null;
+  let bestCount = 0;
+  for (const [model, count] of Object.entries(counts)) {
+    if (count > bestCount) {
+      bestModel = model;
+      bestCount = count;
+    }
+  }
+  return bestModel;
+}
+/**
+ * Claude Code injects synthetic user messages at the start of every session
+ * for housekeeping — slash-command echoes, stdout captures, resume caveats,
+ * etc.  They all look like XML-tagged blobs and are NOT what the human
+ * actually typed, so they make terrible session titles.
+ */
+const SYNTHETIC_USER_TAGS = [
+  '<local-command-caveat',
+  '<local-command-stdout',
+  '<local-command-stderr',
+  '<command-name',
+  '<command-message',
+  '<command-args',
+  '<bash-input',
+  '<bash-stdout',
+  '<bash-stderr',
+  '<system-reminder',
+  '<user-memory-input',
+];
+function isSyntheticUserText(text) {
+  if (typeof text !== 'string') return false;
+  const t = text.trimStart();
+  if (!t) return true;
+  return SYNTHETIC_USER_TAGS.some((tag) => t.startsWith(tag));
+}
+/**
+ * Extract the session title from the first *real* user message — skipping
+ * Claude Code's synthetic XML-tagged housekeeping messages.
+ *
+ * @param {{lineNumber: number, data: object}[]} entries
+ * @returns {string|null}
+ */
+function extractTitle(entries) {
+  for (const { data } of entries) {
+    if (data.type !== 'user') continue;
+    const content = data.message && data.message.content;
+    let text = null;
+    if (typeof content === 'string') {
+      text = content;
+    } else if (Array.isArray(content)) {
+      for (const block of content) {
+        const blockText = typeof block === 'string' ? block : (block && block.text);
+        if (typeof blockText === 'string' && blockText.length > 0) {
+          text = blockText;
+          break;
+        }
+      }
+    }
+    if (!text || isSyntheticUserText(text)) continue;
+    return text.length > 100 ? text.slice(0, 100) : text;
+  }
+  return null;
+}
+/**
+ * Aggregate token counts from all JSONL entries in a session.
+ * @param {{lineNumber: number, data: object}[]} entries
+ * @returns {{input: number, output: number, cacheRead: number, cacheWrite: number}}
+ */
+function aggregateTokens(entries) {
+  let input = 0;
+  let output = 0;
+  let cacheRead = 0;
+  let cacheWrite = 0;
+  for (const { data } of entries) {
+    const usage = data.message && data.message.usage;
+    if (!usage) continue;
+    input += usage.input_tokens || 0;
+    output += usage.output_tokens || 0;
+    cacheRead += usage.cache_read_input_tokens || 0;
+    cacheWrite += usage.cache_creation_input_tokens || 0;
+  }
+  return { input, output, cacheRead, cacheWrite };
+}
+/**
+ * Map a sessions-index entry + parsed JSONL entries to a unified session.
+ *
+ * @param {object} sessionMeta   Entry from sessions-index.json
+ * @param {{lineNumber: number, data: object}[]} entries  Parsed JSONL lines
+ * @param {string} projectPath   Decoded project path
+ * @returns {object}
+ */
+function mapSession(sessionMeta, entries, projectPath) {
+  const msgEntries = entries.filter(
+    (e) => e.data.type === 'user' || e.data.type === 'assistant'
+  );
+  const toolEntries = entries.filter((e) => e.data.toolName);
+  const errorCount = entries.filter((e) => {
+    if (e.data.toolUseResult && e.data.toolUseResult.error) return true;
+    if (e.data.message && e.data.message.stop_reason === 'error') return true;
+    return false;
+  }).length;
+  const tokens = aggregateTokens(entries);
+  const model = findMostCommonModel(entries);
+  // Derive created/modified from JSONL timestamps when the sessions-index
+  // entry didn't carry them (e.g. when we synthesised sessionMeta from the
+  // directory listing because sessions-index.json was missing).
+  const timestamps = entries
+    .map((e) => e.data && e.data.timestamp)
+    .filter((t) => typeof t === 'string' && t);
+  const firstTs = timestamps.length ? timestamps[0] : null;
+  const lastTs = timestamps.length ? timestamps[timestamps.length - 1] : null;
+  const created = sessionMeta.created || firstTs || null;
+  const modified = sessionMeta.modified || lastTs || created || null;
+  return {
+    id: sessionMeta.sessionId,
+    source: SOURCE,
+    date: isoToDate(created),
+    started_at: created,
+    ended_at: modified,
+    duration_minutes: diffMinutes(created, modified),
+    active_minutes: null, // calculated separately by active-time calculator
+    message_count: msgEntries.length,
+    tokens_input: tokens.input,
+    tokens_output: tokens.output,
+    tokens_reasoning: 0,
+    tokens_cache_read: tokens.cacheRead,
+    tokens_cache_write: tokens.cacheWrite,
+    cost_usd: estimateCost(tokens.input, tokens.output, model),
+    project: sessionMeta.projectPath || projectPath || null,
+    title: extractTitle(entries),
+    model: model,
+    error_count: errorCount,
+    tool_call_count: toolEntries.length,
+    summary_additions: 0,
+    summary_deletions: 0,
+    summary_files: 0,
+    reverted: 0,
+    time_compacting: 0,
+  };
+}
+/**
+ * Flatten a Claude Code tool_result `content` payload to a short string.
+ * tool_result.content may be a string, an array of {type:"text",text} blocks,
+ * or other typed blocks (images etc. — ignored).  Output is truncated to
+ * 1024 chars so a single tool_result can't blow past the per-message cap.
+ *
+ * @param {*} content
+ * @returns {string}
+ */
+function stringifyToolResult(content) {
+  if (content == null) return '';
+  let out = '';
+  if (typeof content === 'string') {
+    out = content;
+  } else if (Array.isArray(content)) {
+    const parts = [];
+    for (const b of content) {
+      if (typeof b === 'string') parts.push(b);
+      else if (b && typeof b.text === 'string') parts.push(b.text);
+    }
+    out = parts.join('\n');
+  }
+  out = out.trim();
+  if (out.length > 1024) out = out.slice(0, 1024) + '…';
+  return out;
+}
+/**
+ * Map a single JSONL entry (user/assistant) to a unified message.
+ *
+ * @param {string} sessionId
+ * @param {number} lineNumber
+ * @param {object} data  Parsed JSONL line
+ * @returns {object}
+ */
+function mapMessage(sessionId, lineNumber, data) {
+  const usage = (data.message && data.message.usage) || {};
+  const modelId = (data.message && data.message.model) || null;
+  const content = data.message && data.message.content;
+  // Extract the textual payload so the "原始对话" replay can render it.
+  // Claude Code content can be a plain string OR an array of typed blocks:
+  //   - {type:"text", text:"..."}            assistant prose / user message
+  //   - {type:"thinking", thinking:"..."}    extended-thinking trace
+  //   - {type:"tool_use", name, input}       skipped here — rendered by
+  //                                          unified_tool_call timeline
+  //   - {type:"tool_result", content:...}    tool stdout/stderr fed back to
+  //                                          the model; content is string or
+  //                                          another array of typed blocks
+  // Same 4 KB cap as the opencode ETL so the column stays bounded.
+  let textParts = [];
+  let contentLength = 0;
+  if (typeof content === 'string') {
+    contentLength = content.length;
+    textParts.push(content);
+  } else if (Array.isArray(content)) {
+    for (const block of content) {
+      if (typeof block === 'string') {
+        contentLength += block.length;
+        textParts.push(block);
+        continue;
+      }
+      if (!block || typeof block !== 'object') continue;
+      if (typeof block.text === 'string' && block.text.length) {
+        contentLength += block.text.length;
+        textParts.push(block.text);
+      } else if (typeof block.thinking === 'string' && block.thinking.length) {
+        contentLength += block.thinking.length;
+        textParts.push(`[thinking] ${block.thinking}`);
+      } else if (block.type === 'tool_result') {
+        const piece = stringifyToolResult(block.content);
+        if (piece) {
+          contentLength += piece.length;
+          textParts.push(`[tool_result] ${piece}`);
+        }
+      }
+      // tool_use blocks intentionally skipped — they appear in the tool-call
+      // timeline already; duplicating them here just clutters the transcript.
+    }
+  }
+  let text = textParts.join('\n').trim() || null;
+  if (text && text.length > 4096) text = text.slice(0, 4096);
+  return {
+    id: `cc_${sessionId}_${lineNumber}`,
+    session_id: sessionId,
+    source: SOURCE,
+    role: data.type,
+    timestamp: data.timestamp || null,
+    tokens_input: usage.input_tokens || 0,
+    tokens_output: usage.output_tokens || 0,
+    tokens_reasoning: 0,
+    cost_usd: estimateCost(
+      usage.input_tokens || 0,
+      usage.output_tokens || 0,
+      modelId
+    ),
+    content_length: contentLength,
+    is_error: 0,
+    model_id: modelId,
+    text,
+  };
+}
+/**
+ * Map a single JSONL entry to a unified part.
+ *
+ * @param {string} sessionId
+ * @param {number} lineNumber
+ * @param {object} data  Parsed JSONL line
+ * @param {string|null} messageId  ID of the parent message (if applicable)
+ * @returns {object}
+ */
+function mapPart(sessionId, lineNumber, data, messageId) {
+  return {
+    id: `cc_${sessionId}_part_${lineNumber}`,
+    message_id: messageId,
+    session_id: sessionId,
+    source: SOURCE,
+    type: data.type || null,
+    timestamp: data.timestamp || null,
+  };
+}
+/**
+ * Extract a target file path from a tool input object.
+ * @param {object|null} toolInput
+ * @returns {string|null}
+ */
+function extractTargetFile(toolInput) {
+  if (!toolInput) return null;
+  if (typeof toolInput.path === 'string') return toolInput.path;
+  if (typeof toolInput.file_path === 'string') return toolInput.file_path;
+  if (typeof toolInput.filePath === 'string') return toolInput.filePath;
+  return null;
+}
+/**
+ * Map a JSONL entry with toolName to a unified tool call.
+ *
+ * @param {string} sessionId
+ * @param {number} lineNumber
+ * @param {object} data  Parsed JSONL line
+ * @returns {object}
+ */
+function mapToolCall(sessionId, lineNumber, data) {
+  const hasError =
+    data.toolUseResult && data.toolUseResult.error ? true : false;
+  return {
+    id: `cc_${sessionId}_tool_${lineNumber}`,
+    part_id: `cc_${sessionId}_tool_${lineNumber}`,
+    session_id: sessionId,
+    source: SOURCE,
+    tool_name: data.toolName || null,
+    timestamp: data.timestamp || null,
+    status: hasError ? 'error' : 'success',
+    error_message: hasError ? String(data.toolUseResult.error) : null,
+    target_file: extractTargetFile(data.toolInput),
+  };
+}
+// ---------------------------------------------------------------------------
+// Core ETL: process a single session
+// ---------------------------------------------------------------------------
+/**
+ * Process a single Claude Code session: read its JSONL file, parse entries,
+ * map to unified rows, and write to boss.db.
+ *
+ * @param {object}  bossDb       sql.js database instance (boss.db)
+ * @param {string}  projectDir   Path to the project directory under projects/
+ * @param {object}  sessionMeta  Entry from sessions-index.json
+ * @param {string}  projectPath  Decoded project path
+ * @param {(msg: string) => void} log  Progress callback
+ * @returns {{messages: number, parts: number, toolCalls: number, errors: number}}
+ */
+/**
+ * Extract the plain text of a user JSONL entry (string content or the
+ * first text block).  Used only for judge-artifact detection.
+ */
+function entryUserText(data) {
+  const content = data.message && data.message.content;
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    for (const block of content) {
+      if (typeof block === 'string') return block;
+      if (block && typeof block.text === 'string') return block.text;
+    }
+  }
+  return '';
+}
+function processSession(bossDb, projectDir, sessionMeta, projectPath, log) {
+  const jsonlPath = path.join(projectDir, `${sessionMeta.sessionId}.jsonl`);
+  if (!fs.existsSync(jsonlPath)) {
+    log(`Session JSONL not found: ${jsonlPath}`);
+    return { messages: 0, parts: 0, toolCalls: 0, errors: 0 };
+  }
+  // Parse all JSONL lines
+  const entries = readJsonl(jsonlPath, log);
+  if (entries.length === 0) {
+    return { messages: 0, parts: 0, toolCalls: 0, errors: 0 };
+  }
+  // Skip sessions created by our own LLM judge — `claude -p` logs each
+  // judge call as a session here; importing them back would create a
+  // feedback loop (see server/etl/judge-filter.js).
+  for (const { data } of entries) {
+    if (data.type !== 'user') continue;
+    if (isJudgePrompt(entryUserText(data))) {
+      return { messages: 0, parts: 0, toolCalls: 0, errors: 0, skipped: true };
+    }
+    break; // only the first user entry matters
+  }
+  // --- Build unified rows ---
+  const messages = [];
+  const parts = [];
+  const toolCalls = [];
+  let errorCount = 0;
+  for (const { lineNumber, data } of entries) {
+    // Messages: user and assistant types
+    if (data.type === 'user' || data.type === 'assistant') {
+      const messageId = `cc_${sessionMeta.sessionId}_${lineNumber}`;
+      messages.push(mapMessage(sessionMeta.sessionId, lineNumber, data));
+      parts.push(mapPart(sessionMeta.sessionId, lineNumber, data, messageId));
+    }
+    // Tool calls: entries with toolName
+    if (data.toolName) {
+      const tc = mapToolCall(sessionMeta.sessionId, lineNumber, data);
+      toolCalls.push(tc);
+      // Also create a part for tool-call entries
+      parts.push(
+        mapPart(
+          sessionMeta.sessionId,
+          lineNumber,
+          data,
+          // Associate with closest prior assistant message if available
+          messages.length > 0 ? messages[messages.length - 1].id : null
+        )
+      );
+      if (tc.status === 'error') {
+        errorCount++;
+      }
+    }
+  }
+  // --- Session row ---
+  const sessionObj = mapSession(sessionMeta, entries, projectPath);
+  sessionObj.error_count = errorCount; // override with precise count
+  // --- Write to boss.db ---
+  upsertSession(bossDb, sessionObj);
+  bulkInsertMessages(bossDb, messages);
+  bulkInsertParts(bossDb, parts);
+  bulkInsertToolCalls(bossDb, toolCalls);
+  return {
+    messages: messages.length,
+    parts: parts.length,
+    toolCalls: toolCalls.length,
+    errors: errorCount,
+  };
+}
+// ---------------------------------------------------------------------------
+// Main entry point
+// ---------------------------------------------------------------------------
+/**
+ * Collect data from Claude Code and write to boss.db.
+ *
+ * Performs incremental sync based on etl_state.last_session_time.  Sessions
+ * are processed in batches for memory efficiency.  Each session is
+ * individually wrapped in try/catch so that one bad file never stops the
+ * whole ETL run.
+ *
+ * @param {object}  bossDb         sql.js database instance
+ * @param {string}  claudeCodePath path to ~/.claude/
+ * @param {object}  [options]      { onProgress: (msg) => void }
+ * @returns {Promise<{sessionCount: number, messageCount: number, partCount: number, toolCallCount: number, errorSessionCount: number}>}
+ */
+async function collectClaudeCode(bossDb, claudeCodePath, options = {}) {
+  const log = options.onProgress || (() => {});
+  // -- 1. Read ETL watermark --------------------------------------------------
+  const etlState = getEtlState(bossDb, SOURCE);
+  const lastSessionTime = etlState ? etlState.last_session_time || null : null;
+  log(`ETL watermark: last_session_time = ${lastSessionTime}`);
+  // -- 2. Find all project directories ----------------------------------------
+  const projectsDir = path.join(claudeCodePath, 'projects');
+  if (!fs.existsSync(projectsDir) || !fs.statSync(projectsDir).isDirectory()) {
+    log(`Projects directory not found: ${projectsDir}`);
+    return {
+      sessionCount: 0,
+      messageCount: 0,
+      partCount: 0,
+      toolCallCount: 0,
+      errorSessionCount: 0,
+    };
+  }
+  let projectFolders;
+  try {
+    projectFolders = fs
+      .readdirSync(projectsDir, { withFileTypes: true })
+      .filter((d) => d.isDirectory());
+  } catch (err) {
+    log(`Cannot read projects directory: ${err.message}`);
+    return {
+      sessionCount: 0,
+      messageCount: 0,
+      partCount: 0,
+      toolCallCount: 0,
+      errorSessionCount: 0,
+    };
+  }
+  log(`Found ${projectFolders.length} project folder(s)`);
+  const totals = {
+    sessionCount: 0,
+    messageCount: 0,
+    partCount: 0,
+    toolCallCount: 0,
+    errorSessionCount: 0,
+  };
+  let latestSessionTime = lastSessionTime;
+  let latestSessionId = etlState ? etlState.last_session_id || null : null;
+  let processedInBatch = 0;
+  // -- 3. Iterate over project directories ------------------------------------
+  for (const folder of projectFolders) {
+    const projectDir = path.join(projectsDir, folder.name);
+    const indexPath = path.join(projectDir, 'sessions-index.json');
+    const projectPath =
+      decodeProjectPath(folder.name) || folder.name;
+    // 3a. Read sessions-index.json, or fall back to scanning *.jsonl files.
+    //     Recent Claude Code builds (and self-hosted setups) don't write
+    //     sessions-index.json at all; the JSONL files alone are the source
+    //     of truth.  When the index is missing we synthesise minimal
+    //     sessionMeta entries from the directory listing.
+    let sessionsIndex = null;
+    if (fs.existsSync(indexPath)) {
+      try {
+        const raw = fs.readFileSync(indexPath, 'utf8');
+        const parsed = JSON.parse(raw);
+        if (Array.isArray(parsed)) {
+          sessionsIndex = parsed;
+        } else {
+          log(`sessions-index.json in ${folder.name} is not an array; falling back to JSONL scan`);
+        }
+      } catch (err) {
+        log(`Cannot parse sessions-index.json in ${folder.name} (${err.message}); falling back to JSONL scan`);
+      }
+    }
+    if (!sessionsIndex) {
+      // Build the index from disk.  `modified` becomes the file mtime ISO
+      // string so the incremental-sync watermark still works.
+      let files;
+      try {
+        files = fs.readdirSync(projectDir);
+      } catch (err) {
+        log(`Cannot read ${folder.name}: ${err.message}`);
+        continue;
+      }
+      sessionsIndex = [];
+      for (const f of files) {
+        if (!f.endsWith('.jsonl')) continue;
+        const sessionId = f.slice(0, -'.jsonl'.length);
+        let mtimeIso;
+        try {
+          mtimeIso = fs.statSync(path.join(projectDir, f)).mtime.toISOString();
+        } catch (_) {
+          mtimeIso = new Date().toISOString();
+        }
+        sessionsIndex.push({ sessionId, modified: mtimeIso });
+      }
+      if (!sessionsIndex.length) {
+        log(`No .jsonl files in ${folder.name}, skipping`);
+        continue;
+      }
+    }
+    // 3b. Filter sessions: only process those modified after last_session_time
+    const newSessions = sessionsIndex.filter((s) => {
+      if (!s || !s.sessionId) return false;
+      if (!lastSessionTime) return true; // first run: process all
+      if (!s.modified) return false;
+      return s.modified > lastSessionTime;
+    });
+    if (newSessions.length === 0) {
+      continue;
+    }
+    log(
+      `Project ${folder.name}: ${newSessions.length} new session(s) ` +
+        `(of ${sessionsIndex.length} total)`
+    );
+    // 3c. Process each new session
+    for (const sessionMeta of newSessions) {
+      try {
+        const result = processSession(
+          bossDb,
+          projectDir,
+          sessionMeta,
+          projectPath,
+          log
+        );
+        if (!result.skipped) {
+          totals.sessionCount++;
+          totals.messageCount += result.messages;
+          totals.partCount += result.parts;
+          totals.toolCallCount += result.toolCalls;
+        }
+        // Track watermark (use modified timestamp as the high-water mark)
+        if (
+          !latestSessionTime ||
+          (sessionMeta.modified && sessionMeta.modified > latestSessionTime)
+        ) {
+          latestSessionTime = sessionMeta.modified;
+          latestSessionId = sessionMeta.sessionId;
+        }
+      } catch (err) {
+        totals.errorSessionCount++;
+        log(
+          `Error processing session ${sessionMeta.sessionId}: ${err.message}`
+        );
+      }
+      // Periodic save after each batch
+      processedInBatch++;
+      if (processedInBatch >= BATCH_SIZE) {
+        updateEtlState(bossDb, SOURCE, {
+          last_sync_at: new Date().toISOString(),
+          last_session_id: latestSessionId,
+          last_session_time: latestSessionTime,
+          status: 'running',
+        });
+        saveDb();
+        log(
+          `Batch checkpoint: ${totals.sessionCount} sessions processed so far`
+        );
+        processedInBatch = 0;
+      }
+    }
+  }
+  // -- 4. Final ETL state update ----------------------------------------------
+  updateEtlState(bossDb, SOURCE, {
+    last_sync_at: new Date().toISOString(),
+    last_session_id: latestSessionId,
+    last_session_time: latestSessionTime,
+    status: 'idle',
+  });
+  saveDb();
+  log(
+    `ETL complete: ${totals.sessionCount} sessions, ` +
+      `${totals.messageCount} messages, ${totals.partCount} parts, ` +
+      `${totals.toolCallCount} tool calls` +
+      (totals.errorSessionCount
+        ? `, ${totals.errorSessionCount} failed`
+        : '')
+  );
+  return totals;
+}
+// ---------------------------------------------------------------------------
+// Exports
+// ---------------------------------------------------------------------------
+module.exports = { collectClaudeCode };