npm - voyageai-cli - Versions diffs - 1.22.0 → 1.23.0 - Mend

voyageai-cli 1.22.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/package.json +4 -2
package/src/cli.js +4 -0
package/src/commands/chat.js +503 -0
package/src/commands/demo.js +75 -0
package/src/commands/embed.js +10 -0
package/src/commands/index.js +1 -1
package/src/commands/init.js +34 -97
package/src/commands/mcp-server.js +49 -0
package/src/commands/ping.js +52 -0
package/src/commands/pipeline.js +17 -3
package/src/commands/playground.js +186 -0
package/src/commands/purge.js +3 -1
package/src/commands/refresh.js +3 -1
package/src/commands/rerank.js +10 -0
package/src/commands/scaffold.js +1 -2
package/src/lib/chat.js +252 -0
package/src/lib/codegen.js +5 -4
package/src/lib/config.js +5 -1
package/src/lib/cost.js +352 -0
package/src/lib/explanations.js +260 -0
package/src/lib/history.js +260 -0
package/src/lib/llm.js +485 -0
package/src/lib/preflight.js +281 -0
package/src/lib/prompt.js +111 -0
package/src/lib/wizard-cli.js +135 -0
package/src/lib/wizard-steps-chat.js +171 -0
package/src/lib/wizard-steps-init.js +174 -0
package/src/lib/wizard.js +222 -0
package/src/mcp/schemas/index.js +102 -0
package/src/mcp/server.js +162 -0
package/src/mcp/tools/embedding.js +67 -0
package/src/mcp/tools/ingest.js +89 -0
package/src/mcp/tools/management.js +132 -0
package/src/mcp/tools/retrieval.js +209 -0
package/src/mcp/tools/utility.js +219 -0
package/src/playground/index.html +1195 -199

package/src/lib/explanations.js CHANGED Viewed

@@ -1151,6 +1151,175 @@ const concepts = {
     ],
   },
+  'auto-embedding': {
+    title: 'MongoDB Auto-Embedding',
+    summary: 'Automatic vector embedding generation in Atlas Vector Search',
+    content: [
+      `${pc.bold('What is Auto-Embedding?')}`,
+      `${pc.cyan('Auto-Embedding')} is a MongoDB Atlas Vector Search feature (currently in Preview)`,
+      `that automatically generates vector embeddings for text fields using Voyage AI`,
+      `models — no embedding code required.`,
+      ``,
+      `${pc.bold('How it works:')}`,
+      `  ${pc.dim('1.')} Configure your vector search index with ${pc.cyan('autoEmbed')} type`,
+      `  ${pc.dim('2.')} Specify which text field to embed and which Voyage AI model to use`,
+      `  ${pc.dim('3.')} MongoDB automatically generates embeddings when documents are inserted/updated`,
+      `  ${pc.dim('4.')} At query time, pass natural language text — MongoDB embeds it automatically`,
+      ``,
+      `${pc.bold('Supported models:')}`,
+      `  ${pc.cyan('voyage-4-lite')}   — High-volume, cost-sensitive applications`,
+      `  ${pc.cyan('voyage-4')}        — Balanced performance (recommended)`,
+      `  ${pc.cyan('voyage-4-large')}  — Maximum accuracy for complex relationships`,
+      `  ${pc.cyan('voyage-code-3')}   — Code search and technical documentation`,
+      ``,
+      `${pc.bold('Index definition example:')}`,
+      `  ${pc.dim('{')}`,
+      `    ${pc.dim('"mappings": {')}`,
+      `      ${pc.dim('"fields": {')}`,
+      `        ${pc.cyan('"summary"')}: ${pc.dim('{')}`,
+      `          ${pc.dim('"type": "')}${pc.cyan('autoEmbed')}${pc.dim('",')}`,
+      `          ${pc.dim('"model": "voyage-4"')}`,
+      `        ${pc.dim('}')}`,
+      `      ${pc.dim('}')}`,
+      `    ${pc.dim('}')}`,
+      `  ${pc.dim('}')}`,
+      ``,
+      `${pc.bold('Query syntax:')} Use ${pc.cyan('query.text')} in $vectorSearch instead of ${pc.cyan('queryVector')}:`,
+      `  ${pc.dim('$vectorSearch: {')}`,
+      `    ${pc.dim('index: "myIndex",')}`,
+      `    ${pc.dim('path: "summary",')}`,
+      `    ${pc.cyan('query: { text: "properties near amusement parks" }')},`,
+      `    ${pc.dim('numCandidates: 100,')}`,
+      `    ${pc.dim('limit: 10')}`,
+      `  ${pc.dim('}')}`,
+      ``,
+      `${pc.bold('API keys:')}`,
+      `Auto-Embedding uses Voyage AI API keys configured during mongot deployment.`,
+      `Best practice: use separate keys for indexing vs. querying to avoid rate limit`,
+      `conflicts. Keys can be created from Atlas (AI Models section) or Voyage AI directly.`,
+      ``,
+      `${pc.bold('Current limitations (Preview):')}`,
+      `  ${pc.dim('•')} ${pc.yellow('Not yet available')} on Atlas clusters (only self-managed Community Edition)`,
+      `  ${pc.dim('•')} Not available on local Atlas deployments via Atlas CLI`,
+      `  ${pc.dim('•')} Not available on MongoDB Enterprise Edition`,
+      `  ${pc.dim('•')} Available via Docker, tarball, package manager, or Kubernetes with 8.2+ CE`,
+      ``,
+      `${pc.bold('When to use Auto-Embedding:')}`,
+      `  ${pc.dim('•')} Simple use cases where you want zero embedding code`,
+      `  ${pc.dim('•')} Single-field text embedding scenarios`,
+      `  ${pc.dim('•')} When your data changes frequently and you want automatic sync`,
+      `  ${pc.dim('•')} Self-managed MongoDB deployments`,
+      ``,
+      `${pc.bold('When to use vai (manual embedding) instead:')}`,
+      `  ${pc.dim('•')} Atlas clusters (auto-embedding not yet available)`,
+      `  ${pc.dim('•')} Custom chunking strategies needed`,
+      `  ${pc.dim('•')} Multi-field or multi-collection embeddings`,
+      `  ${pc.dim('•')} Reranking pipelines (auto-embedding doesn't include reranking)`,
+      `  ${pc.dim('•')} Quantization (int8/binary) for storage optimization`,
+      `  ${pc.dim('•')} Multimodal embeddings (images + text)`,
+    ].join('\n'),
+    links: [
+      'https://www.mongodb.com/docs/atlas/atlas-vector-search/crud-embeddings/create-embeddings-automatic/',
+      'https://www.mongodb.com/docs/voyageai/management/api-keys/',
+    ],
+    tryIt: [
+      'vai explain vai-vs-auto-embedding',
+      'vai explain vector-search',
+      'vai models --type embedding',
+    ],
+  },
+  'vai-vs-auto-embedding': {
+    title: 'VAI vs Auto-Embedding — When to Use Each',
+    summary: 'Choosing between manual embedding pipelines and MongoDB auto-embedding',
+    content: [
+      `Both ${pc.cyan('vai')} (manual embedding) and ${pc.cyan('MongoDB Auto-Embedding')} use the same`,
+      `Voyage AI models, but they serve different use cases and deployment scenarios.`,
+      ``,
+      `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
+      ``,
+      `${pc.bold(pc.cyan('VAI (Manual Embedding Pipeline)'))}`,
+      ``,
+      `You embed text explicitly using ${pc.cyan('vai embed')}, ${pc.cyan('vai pipeline')}, or ${pc.cyan('vai store')},`,
+      `then store the vectors in any database. Full control over every step.`,
+      ``,
+      `${pc.bold('Use vai when:')}`,
+      `  ${pc.green('✓')} Using ${pc.cyan('MongoDB Atlas clusters')} (auto-embedding not available yet)`,
+      `  ${pc.green('✓')} Using ${pc.cyan('any vector database')} (Pinecone, Weaviate, Qdrant, etc.)`,
+      `  ${pc.green('✓')} You need ${pc.cyan('custom chunking')} (sentence, paragraph, semantic, sliding window)`,
+      `  ${pc.green('✓')} You need ${pc.cyan('reranking')} (vai supports two-stage retrieval pipelines)`,
+      `  ${pc.green('✓')} You want ${pc.cyan('quantization')} (int8, binary) for storage optimization`,
+      `  ${pc.green('✓')} You need ${pc.cyan('multimodal embeddings')} (images + text)`,
+      `  ${pc.green('✓')} You need ${pc.cyan('flexible dimensions')} (256, 512, 1024, 2048)`,
+      `  ${pc.green('✓')} You want to ${pc.cyan('mix models')} (embed docs with -large, query with -lite)`,
+      `  ${pc.green('✓')} You need ${pc.cyan('batch processing')} with custom concurrency/rate limiting`,
+      `  ${pc.green('✓')} You're building ${pc.cyan('RAG pipelines')} with custom retrieval logic`,
+      ``,
+      `${pc.dim('Workflow:')}`,
+      `  ${pc.cyan('vai chunk')} → ${pc.cyan('vai embed')} → ${pc.cyan('vai store')} → ${pc.cyan('vai search')} → ${pc.cyan('vai rerank')}`,
+      ``,
+      `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
+      ``,
+      `${pc.bold(pc.cyan('MongoDB Auto-Embedding'))}`,
+      ``,
+      `MongoDB automatically generates embeddings when you insert/update documents.`,
+      `No embedding code needed — just configure your index and insert data.`,
+      ``,
+      `${pc.bold('Use Auto-Embedding when:')}`,
+      `  ${pc.green('✓')} Using ${pc.cyan('self-managed MongoDB Community Edition')} (8.2+)`,
+      `  ${pc.green('✓')} You want ${pc.cyan('zero embedding code')} — simplest possible setup`,
+      `  ${pc.green('✓')} You're embedding a ${pc.cyan('single text field')} per collection`,
+      `  ${pc.green('✓')} Your data ${pc.cyan('changes frequently')} and you want automatic sync`,
+      `  ${pc.green('✓')} You don't need reranking, quantization, or multimodal`,
+      `  ${pc.green('✓')} Standard chunking is sufficient (or you pre-chunk your data)`,
+      ``,
+      `${pc.dim('Workflow:')}`,
+      `  ${pc.cyan('db.collection.insertOne({text: "..."})')} → embeddings auto-generated`,
+      `  ${pc.cyan('$vectorSearch: {query: {text: "..."}}')} → query auto-embedded`,
+      ``,
+      `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
+      ``,
+      `${pc.bold('FEATURE COMPARISON')}`,
+      ``,
+      `${pc.dim('Feature                    vai           Auto-Embedding')}`,
+      `${pc.dim('─────────────────────────────────────────────────────────────────')}`,
+      `Atlas clusters             ${pc.green('Yes')}           ${pc.yellow('Not yet')}`,
+      `Self-managed CE 8.2+       ${pc.green('Yes')}           ${pc.green('Yes')}`,
+      `Other vector DBs           ${pc.green('Yes')}           ${pc.dim('No')}`,
+      `Custom chunking            ${pc.green('Yes')}           ${pc.dim('No')}`,
+      `Reranking                  ${pc.green('Yes')}           ${pc.dim('No')}`,
+      `Quantization               ${pc.green('Yes')}           ${pc.dim('No')}`,
+      `Multimodal                 ${pc.green('Yes')}           ${pc.dim('No')}`,
+      `Flexible dimensions        ${pc.green('Yes')}           ${pc.dim('No')}`,
+      `Mix query/doc models       ${pc.green('Yes')}           ${pc.dim('No')}`,
+      `Auto-sync on update        ${pc.dim('Manual')}        ${pc.green('Yes')}`,
+      `Zero code setup            ${pc.dim('No')}            ${pc.green('Yes')}`,
+      ``,
+      `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
+      ``,
+      `${pc.bold('RECOMMENDATION')}`,
+      ``,
+      `${pc.dim('•')} For ${pc.cyan('Atlas users')}: Use vai — auto-embedding isn't available yet`,
+      `${pc.dim('•')} For ${pc.cyan('production RAG')}: Use vai — you'll want reranking and custom chunking`,
+      `${pc.dim('•')} For ${pc.cyan('quick prototypes')} on self-managed CE: Auto-embedding is faster to set up`,
+      `${pc.dim('•')} For ${pc.cyan('complex pipelines')}: vai gives you full control over every step`,
+      ``,
+      `${pc.bold('Migration path:')} Start with auto-embedding for simplicity, then migrate to`,
+      `vai when you need advanced features. The models are the same — your embeddings`,
+      `will be compatible.`,
+    ].join('\n'),
+    links: [
+      'https://www.mongodb.com/docs/atlas/atlas-vector-search/crud-embeddings/create-embeddings-automatic/',
+      'https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/',
+    ],
+    tryIt: [
+      'vai explain auto-embedding',
+      'vai pipeline --help',
+      'vai chunk --help',
+      'vai rerank --help',
+    ],
+  },
   'eval-comparison': {
     title: 'Evaluation Comparison — vai eval compare',
     summary: 'Compare configurations and track quality over time',
@@ -1199,6 +1368,72 @@ const concepts = {
       'vai eval compare --test-set test.jsonl --configs baseline.json,experiment.json',
     ],
   },
+  chat: {
+    title: 'RAG Chat',
+    summary: 'How vai chat works — retrieval-augmented conversational AI',
+    content: [
+      `${pc.cyan('vai chat')} adds a conversational layer on top of your existing RAG pipeline.`,
+      `It connects your embedded documents to an LLM for grounded Q&A with citations.`,
+      ``,
+      `${pc.bold('The Two-Stage Pipeline:')}`,
+      ``,
+      `  ┌─────────────────────────────────────────────────────┐`,
+      `  │  ${pc.cyan('STAGE 1: RETRIEVAL')} (Voyage AI + MongoDB)            │`,
+      `  │                                                     │`,
+      `  │  Your question → Voyage AI creates an embedding     │`,
+      `  │  → MongoDB Atlas finds similar document chunks      │`,
+      `  │  → Voyage AI reranks for better relevance           │`,
+      `  │                                                     │`,
+      `  │  Output: Top 5 relevant text chunks                 │`,
+      `  ├─────────────────────────────────────────────────────┤`,
+      `  │  ${pc.cyan('STAGE 2: GENERATION')} (Your chosen LLM)              │`,
+      `  │                                                     │`,
+      `  │  Those text chunks + your question → sent to LLM    │`,
+      `  │  → LLM reads the context and writes an answer       │`,
+      `  │  → Response streamed back with citations             │`,
+      `  │                                                     │`,
+      `  │  Output: Conversational answer                      │`,
+      `  └─────────────────────────────────────────────────────┘`,
+      ``,
+      `${pc.bold('Key insight:')} The LLM ${pc.cyan('never sees embedding vectors')}. It receives`,
+      `plain text — the retrieved document chunks — and produces plain text.`,
+      `Voyage AI finds the right documents; the LLM reads them and writes`,
+      `an answer. They are completely independent systems.`,
+      ``,
+      `${pc.bold('What goes where:')}`,
+      ``,
+      `  ${pc.dim('Voyage AI API')}    ← your question text, document chunks for reranking`,
+      `  ${pc.dim('Your MongoDB')}     ← embedded documents, chat history, session data`,
+      `  ${pc.dim('LLM Provider')}     ← system prompt, retrieved chunks, question, history`,
+      `  ${pc.dim('  (Ollama)')}       ← fully local, nothing leaves your machine`,
+      ``,
+      `${pc.bold('Supported LLM Providers:')}`,
+      ``,
+      `  ${pc.cyan('anthropic')}   Claude (API key required)`,
+      `  ${pc.cyan('openai')}      GPT-4o and others (API key required)`,
+      `  ${pc.cyan('ollama')}      Fully local, free (requires Ollama installed)`,
+      ``,
+      `${pc.bold('Conversation History:')}`,
+      `Previous turns are included so follow-up questions work naturally.`,
+      `History is stored in your MongoDB (collection: ${pc.dim('vai_chat_history')}).`,
+      `Sessions can be resumed with: ${pc.cyan('vai chat --session <id>')}`,
+      ``,
+      `${pc.bold('Slash Commands (inside chat):')}`,
+      `  ${pc.cyan('/sources')}    Show sources from last response`,
+      `  ${pc.cyan('/context')}    Show retrieved document chunks`,
+      `  ${pc.cyan('/history')}    List recent chat sessions`,
+      `  ${pc.cyan('/session')}    Show current session ID`,
+      `  ${pc.cyan('/export')}     Export to Markdown or JSON`,
+      `  ${pc.cyan('/clear')}      Clear conversation`,
+      `  ${pc.cyan('/model')}      Show or switch LLM model`,
+    ].join('\n'),
+    links: ['https://github.com/mrlynn/voyageai-cli#chat'],
+    tryIt: [
+      'vai config set llm-provider anthropic',
+      'vai config set llm-api-key YOUR_KEY',
+      'vai chat --db myapp --collection knowledge',
+    ],
+  },
 };
 /**
@@ -1320,6 +1555,23 @@ const aliases = {
   'save-results': 'eval-comparison',
   'a-b-test': 'eval-comparison',
   regression: 'eval-comparison',
+  // Auto-embedding aliases
+  'auto-embedding': 'auto-embedding',
+  'auto-embed': 'auto-embedding',
+  autoembed: 'auto-embedding',
+  'autoEmbed': 'auto-embedding',
+  'automatic-embedding': 'auto-embedding',
+  'automatic-embeddings': 'auto-embedding',
+  'atlas-auto-embed': 'auto-embedding',
+  'mongodb-auto-embedding': 'auto-embedding',
+  'zero-code': 'auto-embedding',
+  // VAI vs Auto-embedding aliases
+  'vai-vs-auto-embedding': 'vai-vs-auto-embedding',
+  'vai-vs-autoembedding': 'vai-vs-auto-embedding',
+  'manual-vs-auto': 'vai-vs-auto-embedding',
+  'auto-vs-manual': 'vai-vs-auto-embedding',
+  'which-approach': 'vai-vs-auto-embedding',
+  'embedding-approach': 'vai-vs-auto-embedding',
   // Provider comparison aliases
   'provider-comparison': 'provider-comparison',
   providers: 'provider-comparison',
@@ -1338,6 +1590,14 @@ const aliases = {
   'vs-anthropic': 'provider-comparison',
   competitors: 'provider-comparison',
   alternatives: 'provider-comparison',
+  // Chat aliases
+  chat: 'chat',
+  'vai-chat': 'chat',
+  'rag-chat': 'chat',
+  conversation: 'chat',
+  conversational: 'chat',
+  'chat-history': 'chat',
+  llm: 'chat',
 };
 /**

package/src/lib/history.js ADDED Viewed

@@ -0,0 +1,260 @@
+'use strict';
+const crypto = require('crypto');
+/**
+ * Chat History Manager
+ *
+ * Manages conversation sessions with in-memory storage
+ * and optional MongoDB persistence.
+ */
+/**
+ * Generate a new session ID.
+ * @returns {string}
+ */
+function generateSessionId() {
+  return crypto.randomUUID();
+}
+/**
+ * In-memory history store for a single session.
+ */
+class ChatHistory {
+  /**
+   * @param {object} [opts]
+   * @param {string} [opts.sessionId] - Resume an existing session
+   * @param {number} [opts.maxTurns] - Max turns to keep (default 20)
+   * @param {object} [opts.mongo] - { client, collection } for persistence
+   */
+  constructor(opts = {}) {
+    this.sessionId = opts.sessionId || generateSessionId();
+    this.maxTurns = opts.maxTurns || 20;
+    this.turns = []; // Array of { role, content, context?, metadata?, timestamp }
+    this._mongo = opts.mongo || null;
+  }
+  /**
+   * Load existing session from MongoDB.
+   * @returns {Promise<boolean>} true if session was found and loaded
+   */
+  async load() {
+    if (!this._mongo) return false;
+    try {
+      const docs = await this._mongo.collection
+        .find({ sessionId: this.sessionId })
+        .sort({ timestamp: 1 })
+        .limit(this.maxTurns * 2) // user + assistant turns
+        .toArray();
+      if (docs.length === 0) return false;
+      this.turns = docs.map(d => ({
+        role: d.role,
+        content: d.content,
+        context: d.context || undefined,
+        metadata: d.metadata || undefined,
+        timestamp: d.timestamp,
+      }));
+      return true;
+    } catch {
+      // Persistence failure is non-fatal
+      return false;
+    }
+  }
+  /**
+   * Add a turn to history and optionally persist.
+   * @param {object} turn - { role, content, context?, metadata? }
+   */
+  async addTurn(turn) {
+    const entry = {
+      ...turn,
+      timestamp: new Date(),
+    };
+    this.turns.push(entry);
+    // Trim to maxTurns (keep pairs)
+    const maxEntries = this.maxTurns * 2;
+    if (this.turns.length > maxEntries) {
+      this.turns = this.turns.slice(-maxEntries);
+    }
+    // Persist to MongoDB if available
+    if (this._mongo) {
+      try {
+        await this._mongo.collection.insertOne({
+          sessionId: this.sessionId,
+          ...entry,
+        });
+      } catch {
+        // Persistence failure is non-fatal — chat continues in-memory
+      }
+    }
+  }
+  /**
+   * Get conversation history as message array for the LLM.
+   * Returns only role + content (no metadata).
+   * @returns {Array<{role: string, content: string}>}
+   */
+  getMessages() {
+    return this.turns.map(t => ({ role: t.role, content: t.content }));
+  }
+  /**
+   * Get the last assistant turn's context docs.
+   * @returns {Array|null}
+   */
+  getLastContext() {
+    for (let i = this.turns.length - 1; i >= 0; i--) {
+      if (this.turns[i].role === 'assistant' && this.turns[i].context) {
+        return this.turns[i].context;
+      }
+    }
+    return null;
+  }
+  /**
+   * Get the last assistant turn's sources formatted for display.
+   * @returns {Array<{source: string, score: number}>|null}
+   */
+  getLastSources() {
+    const ctx = this.getLastContext();
+    if (!ctx) return null;
+    return ctx.map(d => ({
+      source: d.source || d.metadata?.source || 'unknown',
+      score: d.score,
+    }));
+  }
+  /**
+   * Clear conversation history (keep session ID).
+   */
+  clear() {
+    this.turns = [];
+  }
+  /**
+   * Get conversation history trimmed to fit a token budget.
+   * Uses ~4 chars per token estimate. Prioritizes recent turns.
+   * @param {number} [maxTokens=8000] - Token budget for history
+   * @returns {Array<{role: string, content: string}>}
+   */
+  getMessagesWithBudget(maxTokens = 8000) {
+    const messages = this.getMessages();
+    if (messages.length === 0) return [];
+    let totalChars = 0;
+    const maxChars = maxTokens * 4;
+    const result = [];
+    // Work backwards from most recent
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const charCount = messages[i].content.length;
+      if (totalChars + charCount > maxChars && result.length > 0) break;
+      result.unshift(messages[i]);
+      totalChars += charCount;
+    }
+    return result;
+  }
+  /**
+   * Export conversation to markdown.
+   * @returns {string}
+   */
+  exportMarkdown() {
+    const lines = [
+      `# Chat Session: ${this.sessionId}`,
+      `_Exported: ${new Date().toISOString()}_`,
+      '',
+    ];
+    for (const turn of this.turns) {
+      if (turn.role === 'user') {
+        lines.push(`**You:** ${turn.content}`);
+      } else if (turn.role === 'assistant') {
+        lines.push(`**Assistant:** ${turn.content}`);
+        if (turn.context && turn.context.length > 0) {
+          lines.push('');
+          lines.push('Sources:');
+          for (const doc of turn.context) {
+            const src = doc.source || doc.metadata?.source || 'unknown';
+            lines.push(`- ${src} (${doc.score?.toFixed(2) || 'N/A'})`);
+          }
+        }
+      }
+      lines.push('');
+    }
+    return lines.join('\n');
+  }
+  /**
+   * Export conversation to JSON.
+   * @returns {object}
+   */
+  exportJSON() {
+    return {
+      sessionId: this.sessionId,
+      exportedAt: new Date().toISOString(),
+      turns: this.turns,
+    };
+  }
+  /**
+   * Ensure MongoDB indexes exist for chat history.
+   * Called once on first persist.
+   * @param {import('mongodb').Collection} collection
+   */
+  static async ensureIndexes(collection) {
+    try {
+      await collection.createIndex(
+        { sessionId: 1, timestamp: 1 },
+        { background: true }
+      );
+    } catch {
+      // Index creation failure is non-fatal
+    }
+  }
+}
+/**
+ * List recent chat sessions from MongoDB.
+ * @param {import('mongodb').Collection} collection
+ * @param {number} [limit=10]
+ * @returns {Promise<Array<{sessionId: string, firstMessage: string, lastActivity: Date, turnCount: number}>>}
+ */
+async function listSessions(collection, limit = 10) {
+  const pipeline = [
+    {
+      $group: {
+        _id: '$sessionId',
+        firstMessage: { $first: '$content' },
+        firstRole: { $first: '$role' },
+        lastActivity: { $max: '$timestamp' },
+        turnCount: { $sum: 1 },
+      },
+    },
+    { $sort: { lastActivity: -1 } },
+    { $limit: limit },
+  ];
+  const sessions = await collection.aggregate(pipeline).toArray();
+  return sessions.map(s => ({
+    sessionId: s._id,
+    firstMessage: s.firstRole === 'user' ? s.firstMessage : '(continued)',
+    lastActivity: s.lastActivity,
+    turnCount: s.turnCount,
+  }));
+}
+module.exports = {
+  generateSessionId,
+  ChatHistory,
+  listSessions,
+};