npm - voyageai-cli - Versions diffs - 1.26.0 → 1.26.1 - Mend

voyageai-cli 1.26.0 → 1.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/package.json +1 -1
package/src/commands/chat.js +281 -78
package/src/commands/playground.js +42 -19
package/src/lib/chat.js +170 -4
package/src/lib/llm.js +304 -2
package/src/lib/mongo.js +6 -6
package/src/lib/prompt.js +60 -1
package/src/lib/tool-registry.js +194 -0
package/src/mcp/tools/embedding.js +55 -43
package/src/mcp/tools/ingest.js +74 -67
package/src/mcp/tools/management.js +60 -48
package/src/mcp/tools/retrieval.js +181 -163
package/src/mcp/tools/utility.js +171 -153
package/src/playground/index.html +508 -10

package/src/lib/tool-registry.js ADDED Viewed

@@ -0,0 +1,194 @@
+'use strict';
+/**
+ * Tool Registry
+ *
+ * Single source of truth mapping MCP Zod schemas to LLM tool definitions
+ * and dispatching tool execution. Bridges the MCP tool handlers with the
+ * agent chat loop.
+ */
+const { z } = require('zod');
+const schemas = require('../mcp/schemas');
+// Lazy-loaded handlers to avoid circular deps
+let _handlers;
+function getHandlers() {
+  if (!_handlers) {
+    const { handleVaiQuery, handleVaiSearch, handleVaiRerank } = require('../mcp/tools/retrieval');
+    const { handleVaiEmbed, handleVaiSimilarity } = require('../mcp/tools/embedding');
+    const { handleVaiCollections, handleVaiModels } = require('../mcp/tools/management');
+    const { handleVaiTopics, handleVaiExplain, handleVaiEstimate } = require('../mcp/tools/utility');
+    const { handleVaiIngest } = require('../mcp/tools/ingest');
+    _handlers = {
+      vai_query: handleVaiQuery,
+      vai_search: handleVaiSearch,
+      vai_rerank: handleVaiRerank,
+      vai_embed: handleVaiEmbed,
+      vai_similarity: handleVaiSimilarity,
+      vai_collections: handleVaiCollections,
+      vai_models: handleVaiModels,
+      vai_topics: handleVaiTopics,
+      vai_explain: handleVaiExplain,
+      vai_estimate: handleVaiEstimate,
+      vai_ingest: handleVaiIngest,
+    };
+  }
+  return _handlers;
+}
+/**
+ * Tool definitions: name, description, and schema key for each tool.
+ */
+const TOOL_DEFINITIONS = [
+  {
+    name: 'vai_query',
+    description: 'Full RAG query: embeds the question with Voyage AI, runs vector search against MongoDB Atlas, and reranks results. Use this when you need to answer a question using the knowledge base.',
+    schemaKey: 'querySchema',
+  },
+  {
+    name: 'vai_search',
+    description: 'Raw vector similarity search without reranking. Faster than vai_query but results are ordered by vector distance only. Use for exploratory searches or when you plan to rerank separately.',
+    schemaKey: 'searchSchema',
+  },
+  {
+    name: 'vai_rerank',
+    description: 'Rerank documents against a query using Voyage AI reranker. Takes a query and candidate documents, returns them reordered by relevance.',
+    schemaKey: 'rerankSchema',
+  },
+  {
+    name: 'vai_embed',
+    description: 'Embed text using a Voyage AI model and return the vector representation. Use for custom similarity logic, storing vectors, or debugging.',
+    schemaKey: 'embedSchema',
+  },
+  {
+    name: 'vai_similarity',
+    description: 'Compare two texts semantically by embedding both and computing cosine similarity. Returns a score from -1 to 1.',
+    schemaKey: 'similaritySchema',
+  },
+  {
+    name: 'vai_collections',
+    description: 'List available MongoDB collections with document counts and vector index information. Use to discover which knowledge bases exist.',
+    schemaKey: 'collectionsSchema',
+  },
+  {
+    name: 'vai_models',
+    description: 'List available Voyage AI models with capabilities and pricing. Use when selecting a model or comparing options.',
+    schemaKey: 'modelsSchema',
+  },
+  {
+    name: 'vai_topics',
+    description: 'List all available educational topics. Call this to discover what vai can explain.',
+    schemaKey: 'topicsSchema',
+  },
+  {
+    name: 'vai_explain',
+    description: 'Get a detailed explanation of a topic (embeddings, vector search, RAG, MoE, etc). Supports fuzzy matching.',
+    schemaKey: 'explainSchema',
+  },
+  {
+    name: 'vai_estimate',
+    description: 'Estimate costs for Voyage AI embedding and query operations at various scales.',
+    schemaKey: 'estimateSchema',
+  },
+  {
+    name: 'vai_ingest',
+    description: 'Add a document to a collection: chunks the text, embeds each chunk with Voyage AI, and stores in MongoDB Atlas.',
+    schemaKey: 'ingestSchema',
+  },
+];
+/**
+ * Convert a Zod schema fields object (as used in MCP schemas) to JSON Schema.
+ * Strips fields with defaults from the required array so the LLM doesn't
+ * have to provide them.
+ *
+ * @param {object} zodFields - Plain object of Zod field definitions
+ * @returns {object} JSON Schema object
+ */
+function zodSchemaToJsonSchema(zodFields) {
+  const obj = z.object(zodFields);
+  const jsonSchema = z.toJSONSchema(obj);
+  // Remove $schema key (not needed for tool definitions)
+  delete jsonSchema['$schema'];
+  // Strip fields with 'default' from required array.
+  // LLMs should not be forced to provide values that have defaults.
+  if (jsonSchema.required && jsonSchema.properties) {
+    jsonSchema.required = jsonSchema.required.filter(key => {
+      const prop = jsonSchema.properties[key];
+      return prop && !('default' in prop);
+    });
+    if (jsonSchema.required.length === 0) delete jsonSchema.required;
+  }
+  return jsonSchema;
+}
+/**
+ * Get tool definitions formatted for a specific LLM provider.
+ *
+ * @param {'anthropic'|'openai'|'ollama'} format - Provider format
+ * @returns {Array} Tool definitions in provider-specific format
+ */
+function getToolDefinitions(format) {
+  return TOOL_DEFINITIONS.map(def => {
+    const zodFields = schemas[def.schemaKey];
+    const inputSchema = zodSchemaToJsonSchema(zodFields);
+    if (format === 'anthropic') {
+      return {
+        name: def.name,
+        description: def.description,
+        input_schema: inputSchema,
+      };
+    }
+    // OpenAI / Ollama format
+    return {
+      type: 'function',
+      function: {
+        name: def.name,
+        description: def.description,
+        parameters: inputSchema,
+      },
+    };
+  });
+}
+/**
+ * Execute a tool by name with the given arguments.
+ * Validates args against the Zod schema, then calls the handler.
+ *
+ * @param {string} name - Tool name (e.g. 'vai_query')
+ * @param {object} args - Tool arguments
+ * @returns {Promise<{structuredContent: object, content: Array}>}
+ */
+async function executeTool(name, args) {
+  const handlers = getHandlers();
+  const handler = handlers[name];
+  if (!handler) {
+    throw new Error(`Unknown tool: "${name}". Available: ${Object.keys(handlers).join(', ')}`);
+  }
+  // Find the schema for validation
+  const def = TOOL_DEFINITIONS.find(d => d.name === name);
+  if (!def) throw new Error(`No schema found for tool: "${name}"`);
+  const zodFields = schemas[def.schemaKey];
+  const zodObj = z.object(zodFields);
+  // Validate and apply defaults
+  const validated = zodObj.parse(args);
+  return handler(validated);
+}
+module.exports = {
+  TOOL_DEFINITIONS,
+  zodSchemaToJsonSchema,
+  getToolDefinitions,
+  executeTool,
+};

package/src/mcp/tools/embedding.js CHANGED Viewed

@@ -3,65 +3,77 @@
 const { generateEmbeddings } = require('../../lib/api');
 const { cosineSimilarity } = require('../../lib/math');
+/**
+ * Handler for vai_embed: embed text and return the vector.
+ * @param {object} input - Validated input matching embedSchema
+ * @returns {Promise<{structuredContent: object, content: Array}>}
+ */
+async function handleVaiEmbed(input) {
+  const embedOpts = { model: input.model, inputType: input.inputType };
+  if (input.dimensions) embedOpts.dimensions = input.dimensions;
+  const result = await generateEmbeddings([input.text], embedOpts);
+  const vector = result.data[0].embedding;
+  const structured = {
+    text: input.text.slice(0, 100) + (input.text.length > 100 ? '...' : ''),
+    model: input.model,
+    vector,
+    dimensions: vector.length,
+    inputType: input.inputType,
+  };
+  return {
+    structuredContent: structured,
+    content: [{ type: 'text', text: `Embedded text (${vector.length} dimensions, model: ${input.model}, type: ${input.inputType}). Vector: [${vector.slice(0, 5).map(v => v.toFixed(4)).join(', ')}, ... ${vector.length - 5} more]` }],
+  };
+}
+/**
+ * Handler for vai_similarity: compare two texts semantically.
+ * @param {object} input - Validated input matching similaritySchema
+ * @returns {Promise<{structuredContent: object, content: Array}>}
+ */
+async function handleVaiSimilarity(input) {
+  const result = await generateEmbeddings([input.text1, input.text2], {
+    model: input.model,
+    inputType: 'document',
+  });
+  const vec1 = result.data[0].embedding;
+  const vec2 = result.data[1].embedding;
+  const similarity = cosineSimilarity(vec1, vec2);
+  return {
+    structuredContent: {
+      text1: input.text1.slice(0, 100) + (input.text1.length > 100 ? '...' : ''),
+      text2: input.text2.slice(0, 100) + (input.text2.length > 100 ? '...' : ''),
+      similarity,
+      model: input.model,
+    },
+    content: [{ type: 'text', text: `Similarity: ${similarity.toFixed(4)} (model: ${input.model})\nText 1: "${input.text1.slice(0, 80)}..."\nText 2: "${input.text2.slice(0, 80)}..."` }],
+  };
+}
 /**
  * Register embedding tools: vai_embed, vai_similarity
  * @param {import('@modelcontextprotocol/sdk/server/mcp.js').McpServer} server
  * @param {object} schemas
  */
 function registerEmbeddingTools(server, schemas) {
-  // vai_embed — embed text and return the vector
   server.tool(
     'vai_embed',
     'Embed text using a Voyage AI model and return the vector representation. Use when you need the raw embedding vector for custom similarity logic, storing in another system, or debugging.',
     schemas.embedSchema,
-    async (input) => {
-      const embedOpts = { model: input.model, inputType: input.inputType };
-      if (input.dimensions) embedOpts.dimensions = input.dimensions;
-      const result = await generateEmbeddings([input.text], embedOpts);
-      const vector = result.data[0].embedding;
-      const structured = {
-        text: input.text.slice(0, 100) + (input.text.length > 100 ? '...' : ''),
-        model: input.model,
-        vector,
-        dimensions: vector.length,
-        inputType: input.inputType,
-      };
-      return {
-        structuredContent: structured,
-        content: [{ type: 'text', text: `Embedded text (${vector.length} dimensions, model: ${input.model}, type: ${input.inputType}). Vector: [${vector.slice(0, 5).map(v => v.toFixed(4)).join(', ')}, ... ${vector.length - 5} more]` }],
-      };
-    }
+    handleVaiEmbed
   );
-  // vai_similarity — compare two texts
   server.tool(
     'vai_similarity',
     'Compare two texts semantically by embedding both and computing cosine similarity. Returns a score from -1 (opposite) to 1 (identical). Use for duplicate detection, relevance checking, or topic comparison.',
     schemas.similaritySchema,
-    async (input) => {
-      const result = await generateEmbeddings([input.text1, input.text2], {
-        model: input.model,
-        inputType: 'document',
-      });
-      const vec1 = result.data[0].embedding;
-      const vec2 = result.data[1].embedding;
-      const similarity = cosineSimilarity(vec1, vec2);
-      return {
-        structuredContent: {
-          text1: input.text1.slice(0, 100) + (input.text1.length > 100 ? '...' : ''),
-          text2: input.text2.slice(0, 100) + (input.text2.length > 100 ? '...' : ''),
-          similarity,
-          model: input.model,
-        },
-        content: [{ type: 'text', text: `Similarity: ${similarity.toFixed(4)} (model: ${input.model})\nText 1: "${input.text1.slice(0, 80)}..."\nText 2: "${input.text2.slice(0, 80)}..."` }],
-      };
-    }
+    handleVaiSimilarity
   );
 }
-module.exports = { registerEmbeddingTools };
+module.exports = { registerEmbeddingTools, handleVaiEmbed, handleVaiSimilarity };

package/src/mcp/tools/ingest.js CHANGED Viewed

@@ -7,83 +7,90 @@ const { loadProject } = require('../../lib/project');
 const { getDefaultModel } = require('../../lib/catalog');
 /**
- * Register the vai_ingest tool (write operation).
- * @param {import('@modelcontextprotocol/sdk/server/mcp.js').McpServer} server
- * @param {object} schemas
+ * Handler for vai_ingest: chunk, embed, and store a document.
+ * @param {object} input - Validated input matching ingestSchema
+ * @returns {Promise<{structuredContent: object, content: Array}>}
  */
-function registerIngestTool(server, schemas) {
-  server.tool(
-    'vai_ingest',
-    'Add a document to a collection: chunks the text, embeds each chunk with Voyage AI, and stores them in MongoDB Atlas. Use when the user provides new content to add to the knowledge base.',
-    schemas.ingestSchema,
-    async (input) => {
-      const { config: proj } = loadProject();
-      const db = input.db || proj.db;
-      const collName = input.collection || proj.collection;
-      if (!db) throw new Error('No database specified. Pass db parameter or configure via vai init.');
-      if (!collName) throw new Error('No collection specified. Pass collection parameter or configure via vai init.');
+async function handleVaiIngest(input) {
+  const { config: proj } = loadProject();
+  const db = input.db || proj.db;
+  const collName = input.collection || proj.collection;
+  if (!db) throw new Error('No database specified. Pass db parameter or configure via vai init.');
+  if (!collName) throw new Error('No collection specified. Pass collection parameter or configure via vai init.');
-      const model = input.model || proj.model || getDefaultModel();
-      const start = Date.now();
+  const model = input.model || proj.model || getDefaultModel();
+  const start = Date.now();
-      // Step 1: Chunk the text
-      const chunks = chunk(input.text, {
-        strategy: input.chunkStrategy,
-        size: input.chunkSize,
-      });
+  // Step 1: Chunk the text
+  const chunks = chunk(input.text, {
+    strategy: input.chunkStrategy,
+    size: input.chunkSize,
+  });
-      if (chunks.length === 0) {
-        return {
-          structuredContent: { source: input.source || 'unknown', chunksCreated: 0, collection: collName },
-          content: [{ type: 'text', text: 'No chunks produced — text may be too short or empty.' }],
-        };
-      }
+  if (chunks.length === 0) {
+    return {
+      structuredContent: { source: input.source || 'unknown', chunksCreated: 0, collection: collName },
+      content: [{ type: 'text', text: 'No chunks produced — text may be too short or empty.' }],
+    };
+  }
-      // Step 2: Embed all chunks
-      const embedResult = await generateEmbeddings(chunks, {
+  // Step 2: Embed all chunks
+  const embedResult = await generateEmbeddings(chunks, {
+    model,
+    inputType: 'document',
+  });
+  // Step 3: Store in MongoDB
+  const { client, collection: coll } = await getMongoCollection(db, collName);
+  try {
+    const docs = chunks.map((text, i) => ({
+      text,
+      embedding: embedResult.data[i].embedding,
+      source: input.source || 'mcp-ingest',
+      metadata: {
+        ...(input.metadata || {}),
+        ingestedAt: new Date().toISOString(),
+        chunkIndex: i,
+        totalChunks: chunks.length,
         model,
-        inputType: 'document',
-      });
+        chunkStrategy: input.chunkStrategy,
+      },
+    }));
-      // Step 3: Store in MongoDB
-      const { client, collection: coll } = await getMongoCollection(db, collName);
-      try {
-        const docs = chunks.map((text, i) => ({
-          text,
-          embedding: embedResult.data[i].embedding,
-          source: input.source || 'mcp-ingest',
-          metadata: {
-            ...(input.metadata || {}),
-            ingestedAt: new Date().toISOString(),
-            chunkIndex: i,
-            totalChunks: chunks.length,
-            model,
-            chunkStrategy: input.chunkStrategy,
-          },
-        }));
+    await coll.insertMany(docs);
+    const timeMs = Date.now() - start;
-        await coll.insertMany(docs);
-        const timeMs = Date.now() - start;
+    const structured = {
+      source: input.source || 'mcp-ingest',
+      chunksCreated: chunks.length,
+      collection: collName,
+      database: db,
+      model,
+      timeMs,
+      metadata: input.metadata || {},
+    };
-        const structured = {
-          source: input.source || 'mcp-ingest',
-          chunksCreated: chunks.length,
-          collection: collName,
-          database: db,
-          model,
-          timeMs,
-          metadata: input.metadata || {},
-        };
+    return {
+      structuredContent: structured,
+      content: [{ type: 'text', text: `Ingested "${input.source || 'document'}" into ${db}.${collName}: ${chunks.length} chunks embedded with ${model} (${timeMs}ms)` }],
+    };
+  } finally {
+    await client.close();
+  }
+}
-        return {
-          structuredContent: structured,
-          content: [{ type: 'text', text: `Ingested "${input.source || 'document'}" into ${db}.${collName}: ${chunks.length} chunks embedded with ${model} (${timeMs}ms)` }],
-        };
-      } finally {
-        await client.close();
-      }
-    }
+/**
+ * Register the vai_ingest tool (write operation).
+ * @param {import('@modelcontextprotocol/sdk/server/mcp.js').McpServer} server
+ * @param {object} schemas
+ */
+function registerIngestTool(server, schemas) {
+  server.tool(
+    'vai_ingest',
+    'Add a document to a collection: chunks the text, embeds each chunk with Voyage AI, and stores them in MongoDB Atlas. Use when the user provides new content to add to the knowledge base.',
+    schemas.ingestSchema,
+    handleVaiIngest
   );
 }
-module.exports = { registerIngestTool };
+module.exports = { registerIngestTool, handleVaiIngest };

package/src/mcp/tools/management.js CHANGED Viewed

@@ -4,70 +4,82 @@ const { MODEL_CATALOG } = require('../../lib/catalog');
 const { loadProject } = require('../../lib/project');
 const { introspectCollections } = require('../../lib/workflow-utils');
+/**
+ * Handler for vai_collections: list collections with vector index info.
+ * @param {object} input - Validated input matching collectionsSchema
+ * @returns {Promise<{structuredContent: object, content: Array}>}
+ */
+async function handleVaiCollections(input) {
+  const { config: proj } = loadProject();
+  const dbName = input.db || proj.db;
+  if (!dbName) throw new Error('No database specified. Pass db parameter or configure via vai init.');
+  const collections = await introspectCollections(dbName);
+  return {
+    structuredContent: { database: dbName, collections },
+    content: [{
+      type: 'text',
+      text: `Database: ${dbName}\n\n${collections.map(c =>
+        `• ${c.name} — ${c.documentCount} docs${c.hasVectorIndex ? ` ✓ vector index (${c.embeddingField}, ${c.dimensions}d)` : ''}`
+      ).join('\n')}`,
+    }],
+  };
+}
+/**
+ * Handler for vai_models: list Voyage AI models.
+ * @param {object} input - Validated input matching modelsSchema
+ * @returns {Promise<{structuredContent: object, content: Array}>}
+ */
+async function handleVaiModels(input) {
+  let models = MODEL_CATALOG.filter(m => !m.legacy && !m.unreleased);
+  if (input.category !== 'all') {
+    models = models.filter(m => m.type === input.category);
+  }
+  const mapped = models.map(m => ({
+    id: m.name,
+    name: m.name,
+    type: m.type,
+    dimensions: m.dimensions,
+    maxTokens: m.maxTokens,
+    pricePerMToken: m.pricePerMToken,
+    ...(m.architecture && { architecture: m.architecture }),
+    ...(m.sharedSpace && { sharedSpace: m.sharedSpace }),
+  }));
+  return {
+    structuredContent: { category: input.category, models: mapped },
+    content: [{
+      type: 'text',
+      text: `Available ${input.category === 'all' ? '' : input.category + ' '}models:\n\n${mapped.map(m =>
+        `• ${m.name} (${m.type}) — ${m.dimensions}d, $${m.pricePerMToken}/M tokens`
+      ).join('\n')}`,
+    }],
+  };
+}
 /**
  * Register management tools: vai_collections, vai_models
  * @param {import('@modelcontextprotocol/sdk/server/mcp.js').McpServer} server
  * @param {object} schemas
  */
 function registerManagementTools(server, schemas) {
-  // vai_collections — list collections with vector index info
   server.tool(
     'vai_collections',
     'List available MongoDB collections with document counts and vector index information. Use at the start of a task to discover which knowledge bases exist, or when the user mentions a topic and you need to find the right collection.',
     schemas.collectionsSchema,
-    async (input) => {
-      const { config: proj } = loadProject();
-      const dbName = input.db || proj.db;
-      if (!dbName) throw new Error('No database specified. Pass db parameter or configure via vai init.');
-      const collections = await introspectCollections(dbName);
-      return {
-        structuredContent: { database: dbName, collections },
-        content: [{
-          type: 'text',
-          text: `Database: ${dbName}\n\n${collections.map(c =>
-            `• ${c.name} — ${c.documentCount} docs${c.hasVectorIndex ? ` ✓ vector index (${c.embeddingField}, ${c.dimensions}d)` : ''}`
-          ).join('\n')}`,
-        }],
-      };
-    }
+    handleVaiCollections
   );
-  // vai_models — list Voyage AI models
   server.tool(
     'vai_models',
     'List available Voyage AI models with capabilities, benchmarks, and pricing. Use when selecting a model for embedding or reranking, or when the user asks about model tradeoffs.',
     schemas.modelsSchema,
-    async (input) => {
-      let models = MODEL_CATALOG.filter(m => !m.legacy && !m.unreleased);
-      if (input.category !== 'all') {
-        models = models.filter(m => m.type === input.category);
-      }
-      const mapped = models.map(m => ({
-        id: m.name,
-        name: m.name,
-        type: m.type,
-        dimensions: m.dimensions,
-        maxTokens: m.maxTokens,
-        pricePerMToken: m.pricePerMToken,
-        ...(m.architecture && { architecture: m.architecture }),
-        ...(m.sharedSpace && { sharedSpace: m.sharedSpace }),
-      }));
-      return {
-        structuredContent: { category: input.category, models: mapped },
-        content: [{
-          type: 'text',
-          text: `Available ${input.category === 'all' ? '' : input.category + ' '}models:\n\n${mapped.map(m =>
-            `• ${m.name} (${m.type}) — ${m.dimensions}d, $${m.pricePerMToken}/M tokens`
-          ).join('\n')}`,
-        }],
-      };
-    }
+    handleVaiModels
   );
 }
-module.exports = { registerManagementTools, introspectCollections };
+module.exports = { registerManagementTools, handleVaiCollections, handleVaiModels, introspectCollections };