npm - voyageai-cli - Versions diffs - 1.26.0 → 1.27.0 - Mend

voyageai-cli 1.26.0 → 1.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/package.json +1 -1
package/src/commands/chat.js +281 -78
package/src/commands/doctor.js +157 -14
package/src/commands/playground.js +233 -19
package/src/lib/chat.js +170 -4
package/src/lib/llm.js +304 -2
package/src/lib/mongo.js +6 -6
package/src/lib/prompt.js +60 -1
package/src/lib/tool-registry.js +194 -0
package/src/mcp/tools/embedding.js +55 -43
package/src/mcp/tools/ingest.js +74 -67
package/src/mcp/tools/management.js +60 -48
package/src/mcp/tools/retrieval.js +181 -163
package/src/mcp/tools/utility.js +171 -153
package/src/playground/icons/dark/128.png +0 -0
package/src/playground/icons/dark/16.png +0 -0
package/src/playground/icons/dark/256.png +0 -0
package/src/playground/icons/dark/32.png +0 -0
package/src/playground/icons/dark/64.png +0 -0
package/src/playground/icons/light/128.png +0 -0
package/src/playground/icons/light/16.png +0 -0
package/src/playground/icons/light/256.png +0 -0
package/src/playground/icons/light/32.png +0 -0
package/src/playground/icons/light/64.png +0 -0
package/src/playground/index.html +2769 -27

package/src/lib/chat.js CHANGED Viewed

@@ -3,8 +3,9 @@
 /**
  * Chat Orchestrator
  *
- * Coordinates the retrieval pipeline (embed → search → rerank)
+ * Coordinates the retrieval pipeline (embed -> search -> rerank)
  * with LLM generation and history management.
+ * Supports both pipeline mode (fixed RAG) and agent mode (tool-calling).
  */
 const { generateEmbeddings, apiRequest } = require('./api');
@@ -32,12 +33,12 @@ function resolveSourceLabel(doc) {
   return doc.source || meta.source || doc._id?.toString() || 'unknown';
 }
 const { getMongoCollection } = require('./mongo');
-const { buildMessages } = require('./prompt');
+const { buildMessages, buildAgentMessages } = require('./prompt');
 const { getDefaultModel, DEFAULT_RERANK_MODEL } = require('./catalog');
 const { loadProject } = require('./project');
 /**
- * Perform retrieval: embed query → vector search → optional rerank.
+ * Perform retrieval: embed query -> vector search -> optional rerank.
  *
  * @param {object} params
  * @param {string} params.query - User's question
@@ -154,7 +155,7 @@ async function retrieve({ query, db, collection, opts = {} }) {
 }
 /**
- * Execute a single chat turn: retrieve context → build prompt → generate response.
+ * Execute a single chat turn: retrieve context -> build prompt -> generate response.
  *
  * @param {object} params
  * @param {string} params.query - User's question
@@ -246,7 +247,172 @@ async function* chatTurn({ query, db, collection, llm, history, opts = {} }) {
   };
 }
+/**
+ * Execute a single agent chat turn: LLM decides which tools to call.
+ *
+ * @param {object} params
+ * @param {string} params.query - User's question
+ * @param {object} params.llm - LLM provider instance (must have chatWithTools)
+ * @param {import('./history').ChatHistory} params.history - Chat history
+ * @param {object} [params.opts] - Additional options
+ * @param {string} [params.opts.systemPrompt] - Override agent system prompt
+ * @param {number} [params.opts.maxIterations] - Max tool-calling iterations (default 10)
+ * @param {string} [params.opts.db] - Default database for tool calls
+ * @param {string} [params.opts.collection] - Default collection for tool calls
+ * @returns {AsyncGenerator<{type: string, data: any}>}
+ *   Yields: { type: 'tool_call', data: { name, args, result, error, timeMs } }
+ *           { type: 'chunk', data: string }
+ *           { type: 'done', data: { fullResponse, toolCalls, metadata } }
+ */
+async function* agentChatTurn({ query, llm, history, opts = {} }) {
+  const { getToolDefinitions, executeTool } = require('./tool-registry');
+  const maxIterations = opts.maxIterations || 10;
+  const start = Date.now();
+  // 1. Build initial messages
+  const initialMessages = buildAgentMessages({
+    query,
+    history: history.getMessagesWithBudget(8000),
+    systemPrompt: opts.systemPrompt,
+  });
+  // 2. Get tool definitions for this provider
+  const format = llm.name === 'anthropic' ? 'anthropic' : 'openai';
+  const tools = getToolDefinitions(format);
+  // Track messages for the tool-calling loop (mutable copy)
+  const messages = [...initialMessages];
+  const toolCallLog = [];
+  // 3. Agent loop
+  for (let iteration = 0; iteration < maxIterations; iteration++) {
+    const response = await llm.chatWithTools(messages, tools);
+    // Text response: done
+    if (response.type === 'text') {
+      const fullResponse = response.content;
+      yield { type: 'chunk', data: fullResponse };
+      const totalTimeMs = Date.now() - start;
+      // Store turns in history
+      await history.addTurn({ role: 'user', content: query });
+      await history.addTurn({
+        role: 'assistant',
+        content: fullResponse,
+        metadata: {
+          mode: 'agent',
+          llmProvider: llm.name,
+          llmModel: llm.model,
+          toolCallCount: toolCallLog.length,
+          iterationCount: iteration + 1,
+          totalTimeMs,
+        },
+      });
+      yield {
+        type: 'done',
+        data: {
+          fullResponse,
+          toolCalls: toolCallLog,
+          metadata: {
+            mode: 'agent',
+            iterationCount: iteration + 1,
+            toolCallCount: toolCallLog.length,
+            totalTimeMs,
+          },
+        },
+      };
+      return;
+    }
+    // Tool calls: execute each and continue loop
+    if (response.type === 'tool_calls') {
+      // Append assistant tool-call message
+      messages.push(llm.formatAssistantToolCall(response));
+      for (const call of response.calls) {
+        const callStart = Date.now();
+        let result;
+        let error = null;
+        // Inject default db/collection if not provided
+        const args = { ...call.arguments };
+        if (opts.db && !args.db) args.db = opts.db;
+        if (opts.collection && !args.collection) args.collection = opts.collection;
+        try {
+          result = await executeTool(call.name, args);
+        } catch (err) {
+          error = err.message;
+          result = { content: [{ type: 'text', text: `Error: ${err.message}` }] };
+        }
+        const callTimeMs = Date.now() - callStart;
+        // Extract text content from result for the LLM
+        const resultText = result.content
+          ? result.content.map(c => c.text || JSON.stringify(c)).join('\n')
+          : JSON.stringify(result.structuredContent || {});
+        // Append tool result message
+        messages.push(llm.formatToolResult(call.id, resultText, !!error));
+        const logEntry = {
+          name: call.name,
+          args,
+          result: result.structuredContent || null,
+          error,
+          timeMs: callTimeMs,
+        };
+        toolCallLog.push(logEntry);
+        yield { type: 'tool_call', data: logEntry };
+      }
+      // Continue loop to let LLM see results and decide next action
+      continue;
+    }
+  }
+  // Max iterations reached: yield a fallback message
+  const fallback = 'I reached the maximum number of tool-calling iterations. Here is what I found so far based on the tool results above.';
+  yield { type: 'chunk', data: fallback };
+  await history.addTurn({ role: 'user', content: query });
+  await history.addTurn({
+    role: 'assistant',
+    content: fallback,
+    metadata: {
+      mode: 'agent',
+      llmProvider: llm.name,
+      llmModel: llm.model,
+      toolCallCount: toolCallLog.length,
+      iterationCount: maxIterations,
+      totalTimeMs: Date.now() - start,
+      maxIterationsReached: true,
+    },
+  });
+  yield {
+    type: 'done',
+    data: {
+      fullResponse: fallback,
+      toolCalls: toolCallLog,
+      metadata: {
+        mode: 'agent',
+        iterationCount: maxIterations,
+        toolCallCount: toolCallLog.length,
+        totalTimeMs: Date.now() - start,
+        maxIterationsReached: true,
+      },
+    },
+  };
+}
 module.exports = {
   retrieve,
   chatTurn,
+  agentChatTurn,
 };

package/src/lib/llm.js CHANGED Viewed

@@ -6,8 +6,8 @@ const { loadProject } = require('./project');
 /**
  * LLM Provider Adapter
  *
- * Provider-agnostic LLM client with streaming support.
- * Uses native fetch — zero new dependencies.
+ * Provider-agnostic LLM client with streaming and tool-calling support.
+ * Uses native fetch, zero new dependencies.
  */
 // Provider default models
@@ -107,6 +107,8 @@ class AnthropicProvider {
     }
   }
+  get supportsTools() { return true; }
   async *chat(messages, options = {}) {
     const model = options.model || this.model;
     const maxTokens = options.maxTokens || 4096;
@@ -156,6 +158,112 @@ class AnthropicProvider {
     });
   }
+  /**
+   * Non-streaming tool-calling request.
+   * @param {Array} messages - Conversation messages
+   * @param {Array} tools - Tool definitions in Anthropic format
+   * @param {object} [options]
+   * @returns {Promise<{type: 'text'|'tool_calls', content?: string, calls?: Array, stopReason: string}>}
+   */
+  async chatWithTools(messages, tools, options = {}) {
+    const model = options.model || this.model;
+    const maxTokens = options.maxTokens || 4096;
+    const systemMsg = messages.find(m => m.role === 'system');
+    const nonSystemMsgs = messages.filter(m => m.role !== 'system');
+    const body = {
+      model,
+      max_tokens: maxTokens,
+      stream: false,
+      messages: nonSystemMsgs,
+      tools,
+    };
+    if (systemMsg) {
+      body.system = systemMsg.content;
+    }
+    const res = await fetch(`${this.baseUrl}/v1/messages`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-api-key': this.apiKey,
+        'anthropic-version': '2023-06-01',
+      },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) {
+      const errBody = await res.text();
+      throw new Error(`Anthropic API error (${res.status}): ${errBody}`);
+    }
+    const json = await res.json();
+    const stopReason = json.stop_reason || 'end_turn';
+    // Check for tool_use blocks
+    const toolBlocks = (json.content || []).filter(b => b.type === 'tool_use');
+    if (toolBlocks.length > 0) {
+      return {
+        type: 'tool_calls',
+        calls: toolBlocks.map(b => ({
+          id: b.id,
+          name: b.name,
+          arguments: b.input,
+        })),
+        stopReason,
+        _raw: json.content,
+      };
+    }
+    // Text response
+    const textBlocks = (json.content || []).filter(b => b.type === 'text');
+    return {
+      type: 'text',
+      content: textBlocks.map(b => b.text).join(''),
+      stopReason,
+    };
+  }
+  /**
+   * Format a tool-calling response as an assistant message.
+   * @param {object} response - Response from chatWithTools
+   * @returns {{role: string, content: Array}}
+   */
+  formatAssistantToolCall(response) {
+    if (response._raw) {
+      return { role: 'assistant', content: response._raw };
+    }
+    return {
+      role: 'assistant',
+      content: response.calls.map(c => ({
+        type: 'tool_use',
+        id: c.id,
+        name: c.name,
+        input: c.arguments,
+      })),
+    };
+  }
+  /**
+   * Format a tool result as a user message.
+   * @param {string} callId - Tool call ID
+   * @param {string} content - Stringified result
+   * @param {boolean} [isError=false]
+   * @returns {{role: string, content: Array}}
+   */
+  formatToolResult(callId, content, isError = false) {
+    return {
+      role: 'user',
+      content: [{
+        type: 'tool_result',
+        tool_use_id: callId,
+        content,
+        ...(isError && { is_error: true }),
+      }],
+    };
+  }
   async ping() {
     try {
       const res = await fetch(`${this.baseUrl}/v1/messages`, {
@@ -202,6 +310,8 @@ class OpenAIProvider {
     }
   }
+  get supportsTools() { return true; }
   async *chat(messages, options = {}) {
     const model = options.model || this.model;
     const maxTokens = options.maxTokens || 4096;
@@ -242,6 +352,103 @@ class OpenAIProvider {
     });
   }
+  /**
+   * Non-streaming tool-calling request (OpenAI format).
+   * @param {Array} messages - Conversation messages
+   * @param {Array} tools - Tool definitions in OpenAI format
+   * @param {object} [options]
+   * @returns {Promise<{type: 'text'|'tool_calls', content?: string, calls?: Array, stopReason: string}>}
+   */
+  async chatWithTools(messages, tools, options = {}) {
+    const model = options.model || this.model;
+    const maxTokens = options.maxTokens || 4096;
+    const body = {
+      model,
+      max_tokens: maxTokens,
+      stream: false,
+      messages,
+      tools,
+    };
+    const res = await fetch(`${this.baseUrl}/v1/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${this.apiKey}`,
+      },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) {
+      const errBody = await res.text();
+      throw new Error(`OpenAI API error (${res.status}): ${errBody}`);
+    }
+    const json = await res.json();
+    const choice = json.choices?.[0] || {};
+    const msg = choice.message || {};
+    const stopReason = choice.finish_reason || 'stop';
+    if (msg.tool_calls && msg.tool_calls.length > 0) {
+      return {
+        type: 'tool_calls',
+        calls: msg.tool_calls.map(tc => ({
+          id: tc.id,
+          name: tc.function.name,
+          arguments: typeof tc.function.arguments === 'string'
+            ? JSON.parse(tc.function.arguments)
+            : tc.function.arguments,
+        })),
+        stopReason,
+        _raw: msg,
+      };
+    }
+    return {
+      type: 'text',
+      content: msg.content || '',
+      stopReason,
+    };
+  }
+  /**
+   * Format a tool-calling response as an assistant message.
+   * @param {object} response - Response from chatWithTools
+   * @returns {{role: string, content: string|null, tool_calls: Array}}
+   */
+  formatAssistantToolCall(response) {
+    if (response._raw) {
+      return response._raw;
+    }
+    return {
+      role: 'assistant',
+      content: null,
+      tool_calls: response.calls.map(c => ({
+        id: c.id,
+        type: 'function',
+        function: {
+          name: c.name,
+          arguments: JSON.stringify(c.arguments),
+        },
+      })),
+    };
+  }
+  /**
+   * Format a tool result as a tool message.
+   * @param {string} callId - Tool call ID
+   * @param {string} content - Stringified result
+   * @returns {{role: string, tool_call_id: string, content: string}}
+   */
+  formatToolResult(callId, content) {
+    return {
+      role: 'tool',
+      tool_call_id: callId,
+      content,
+    };
+  }
   async ping() {
     try {
       const res = await fetch(`${this.baseUrl}/v1/models`, {
@@ -268,6 +475,8 @@ class OllamaProvider {
     this.baseUrl = config.baseUrl || PROVIDER_BASE_URLS.ollama;
   }
+  get supportsTools() { return true; }
   async *chat(messages, options = {}) {
     const model = options.model || this.model;
     const stream = options.stream !== false;
@@ -303,6 +512,99 @@ class OllamaProvider {
     });
   }
+  /**
+   * Non-streaming tool-calling request (OpenAI-compatible format).
+   * @param {Array} messages - Conversation messages
+   * @param {Array} tools - Tool definitions in OpenAI format
+   * @param {object} [options]
+   * @returns {Promise<{type: 'text'|'tool_calls', content?: string, calls?: Array, stopReason: string}>}
+   */
+  async chatWithTools(messages, tools, options = {}) {
+    const model = options.model || this.model;
+    const body = {
+      model,
+      stream: false,
+      messages,
+      tools,
+    };
+    const res = await fetch(`${this.baseUrl}/v1/chat/completions`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) {
+      const errBody = await res.text();
+      throw new Error(`Ollama API error (${res.status}): ${errBody}`);
+    }
+    const json = await res.json();
+    const choice = json.choices?.[0] || {};
+    const msg = choice.message || {};
+    const stopReason = choice.finish_reason || 'stop';
+    if (msg.tool_calls && msg.tool_calls.length > 0) {
+      return {
+        type: 'tool_calls',
+        calls: msg.tool_calls.map(tc => ({
+          id: tc.id || `call_${Date.now()}`,
+          name: tc.function.name,
+          arguments: typeof tc.function.arguments === 'string'
+            ? JSON.parse(tc.function.arguments)
+            : tc.function.arguments,
+        })),
+        stopReason,
+        _raw: msg,
+      };
+    }
+    return {
+      type: 'text',
+      content: msg.content || '',
+      stopReason,
+    };
+  }
+  /**
+   * Format a tool-calling response as an assistant message.
+   * (Same as OpenAI format since Ollama uses OpenAI-compatible API)
+   * @param {object} response - Response from chatWithTools
+   * @returns {{role: string, content: string|null, tool_calls: Array}}
+   */
+  formatAssistantToolCall(response) {
+    if (response._raw) {
+      return response._raw;
+    }
+    return {
+      role: 'assistant',
+      content: null,
+      tool_calls: response.calls.map(c => ({
+        id: c.id,
+        type: 'function',
+        function: {
+          name: c.name,
+          arguments: JSON.stringify(c.arguments),
+        },
+      })),
+    };
+  }
+  /**
+   * Format a tool result as a tool message.
+   * @param {string} callId - Tool call ID
+   * @param {string} content - Stringified result
+   * @returns {{role: string, tool_call_id: string, content: string}}
+   */
+  formatToolResult(callId, content) {
+    return {
+      role: 'tool',
+      tool_call_id: callId,
+      content,
+    };
+  }
   async ping() {
     try {
       const res = await fetch(`${this.baseUrl}/v1/models`);

package/src/lib/mongo.js CHANGED Viewed

@@ -1,7 +1,7 @@
 'use strict';
 /**
- * Get MongoDB URI or exit with a helpful error.
+ * Get MongoDB URI or throw with a helpful error.
  * Checks: env var → config file.
  * @returns {string}
  */
@@ -9,11 +9,11 @@ function requireMongoUri() {
   const { getConfigValue } = require('./config');
   const uri = process.env.MONGODB_URI || getConfigValue('mongodbUri');
   if (!uri) {
-    console.error('Error: MONGODB_URI is not set.');
-    console.error('');
-    console.error('Option 1: export MONGODB_URI="mongodb+srv://user:pass@cluster.mongodb.net/"');
-    console.error('Option 2: vai config set mongodb-uri "mongodb+srv://user:pass@cluster.mongodb.net/"');
-    process.exit(1);
+    throw new Error(
+      'MONGODB_URI is not set.\n' +
+      'Option 1: export MONGODB_URI="mongodb+srv://user:pass@cluster.mongodb.net/"\n' +
+      'Option 2: vai config set mongodb-uri "mongodb+srv://user:pass@cluster.mongodb.net/"'
+    );
   }
   return uri;
 }

package/src/lib/prompt.js CHANGED Viewed

@@ -5,6 +5,7 @@
  *
  * Constructs the message array sent to the LLM from
  * retrieved documents, conversation history, and user query.
+ * Supports both pipeline mode (fixed RAG) and agent mode (tool-calling).
  */
 const DEFAULT_SYSTEM_PROMPT = `You are an assistant powered by a retrieval-augmented generation (RAG) pipeline built with Voyage AI embeddings and MongoDB Atlas Vector Search. Your answers are grounded in documents retrieved from the user's knowledge base.
@@ -23,6 +24,32 @@ const DEFAULT_SYSTEM_PROMPT = `You are an assistant powered by a retrieval-augme
 4. Be concise. Prefer short, direct answers. Use lists or structure when it aids clarity.
 5. For follow-up questions, rely on the newly retrieved context for that turn. Prior context may be stale.`;
+const AGENT_SYSTEM_PROMPT = `You are an AI assistant with access to a suite of Voyage AI and MongoDB Atlas tools. You can search knowledge bases, embed text, compare documents, explore collections, and more. Use your tools to answer the user's questions accurately.
+## Available tools
+- **vai_query**: Full RAG pipeline (embed, vector search, rerank). Use this as your primary tool for answering questions from the knowledge base.
+- **vai_search**: Raw vector search without reranking. Faster, useful for exploratory queries.
+- **vai_rerank**: Rerank candidate documents against a query. Use when you have documents from another source.
+- **vai_embed**: Get the raw embedding vector for a text. Use for debugging or custom logic.
+- **vai_similarity**: Compare two texts semantically. Returns a cosine similarity score.
+- **vai_collections**: List available collections with document counts and vector index info. Call this first if you need to discover which knowledge bases exist.
+- **vai_models**: List available Voyage AI models with pricing. Use when the user asks about model options.
+- **vai_topics**: List educational topics that vai can explain.
+- **vai_explain**: Get a detailed explanation of a topic (embeddings, RAG, vector search, etc).
+- **vai_estimate**: Estimate costs for embedding and query operations.
+- **vai_ingest**: Add new content to a collection (chunk, embed, store).
+## Answering rules
+1. Always use tools to retrieve information before answering. Do not guess or make up facts.
+2. Cite sources from tool results using [Source: <label>] format.
+3. You may call multiple tools in sequence. For example: vai_collections to discover collections, then vai_query to search one.
+4. If a tool returns no results or errors, explain what happened and suggest alternatives.
+5. Be concise. Prefer short, direct answers. Use lists or structure when it aids clarity.
+6. For questions about Voyage AI concepts, use vai_explain rather than answering from memory.
+7. If the user asks you to ingest content, use vai_ingest. Confirm what was stored.`;
 /**
  * Format retrieved documents into a context block.
  * @param {Array<{source: string, text: string, score: number}>} docs
@@ -66,7 +93,7 @@ ${customPrompt}`;
 }
 /**
- * Build the message array for the LLM.
+ * Build the message array for the LLM (pipeline mode).
  *
  * @param {object} params
  * @param {string} params.query - Current user question
@@ -103,9 +130,41 @@ function buildMessages({ query, contextDocs = [], history = [], systemPrompt })
   return messages;
 }
+/**
+ * Build the message array for agent mode (no context injection).
+ * The agent fetches its own context via tool calls.
+ *
+ * @param {object} params
+ * @param {string} params.query - Current user question
+ * @param {Array} [params.history] - Previous conversation turns [{role, content}]
+ * @param {string} [params.systemPrompt] - Override the agent system prompt
+ * @returns {Array<{role: string, content: string}>}
+ */
+function buildAgentMessages({ query, history = [], systemPrompt }) {
+  const messages = [];
+  // 1. Agent system prompt
+  messages.push({
+    role: 'system',
+    content: systemPrompt || AGENT_SYSTEM_PROMPT,
+  });
+  // 2. Conversation history
+  for (const turn of history) {
+    messages.push({ role: turn.role, content: turn.content });
+  }
+  // 3. Current user message (no context injection, agent decides what to fetch)
+  messages.push({ role: 'user', content: query });
+  return messages;
+}
 module.exports = {
   DEFAULT_SYSTEM_PROMPT,
+  AGENT_SYSTEM_PROMPT,
   buildSystemPrompt,
   formatContextBlock,
   buildMessages,
+  buildAgentMessages,
 };