npm - @mastra/rag - Versions diffs - 0.1.23 → 0.2.0-alpha.1 - Mend

@mastra/rag 0.1.23 → 0.2.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.turbo/turbo-build.log +7 -7
package/CHANGELOG.md +36 -0
package/dist/_tsup-dts-rollup.d.cts +263 -5
package/dist/_tsup-dts-rollup.d.ts +263 -5
package/dist/index.cjs +92 -13
package/dist/index.js +92 -13
package/package.json +8 -5
package/src/graph-rag/index.ts +1 -1
package/src/rerank/index.ts +4 -3
package/src/tools/graph-rag.ts +23 -3
package/src/tools/vector-query.ts +28 -4
package/src/utils/convert-sources.ts +43 -0

package/dist/index.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { z } from 'zod';
 import { parse } from 'node-html-better-parser';
 import { encodingForModel, getEncoding } from 'js-tiktoken';
 import { CohereRelevanceScorer, MastraAgentRelevanceScorer } from '@mastra/core/relevance';
+import { Big } from 'big.js';
 import { createTool } from '@mastra/core/tools';
 import { embed } from 'ai';
@@ -5948,9 +5949,9 @@ async function rerank(results, query, model, options) {
     ...DEFAULT_WEIGHTS,
     ...options.weights
   };
-  const totalWeights = Object.values(weights).reduce((sum, weight) => sum + weight, 0);
-  if (totalWeights !== 1) {
-    throw new Error("Weights must add up to 1");
+  const sum = Object.values(weights).reduce((acc, w) => acc.plus(w.toString()), new Big(0));
+  if (!sum.eq(1)) {
+    throw new Error(`Weights must add up to 1. Got ${sum} from ${weights}`);
   }
   const resultLength = results.length;
   const queryAnalysis = queryEmbedding ? analyzeQueryEmbedding(queryEmbedding) : null;
@@ -6269,12 +6270,44 @@ var filterDescription = `JSON-formatted criteria to refine search results.
 - IMPORTANT: Always ensure JSON is properly closed with matching brackets
 - Multiple filters can be combined`;
+// src/utils/convert-sources.ts
+var convertToSources = (results) => {
+  return results.map((result) => {
+    if ("content" in result) {
+      return {
+        id: result.id,
+        vector: result.embedding || [],
+        score: result.score,
+        metadata: result.metadata,
+        document: result.content || ""
+      };
+    }
+    if ("result" in result) {
+      return {
+        id: result.result.id,
+        vector: result.result.vector || [],
+        score: result.score,
+        metadata: result.result.metadata,
+        document: result.result.document || ""
+      };
+    }
+    return {
+      id: result.id,
+      vector: result.vector || [],
+      score: result.score,
+      metadata: result.metadata,
+      document: result.document || ""
+    };
+  });
+};
 // src/tools/graph-rag.ts
 var createGraphRAGTool = ({
   vectorStoreName,
   indexName,
   model,
   enableFilter = false,
+  includeSources = true,
   graphOptions = {
     dimension: 1536,
     randomWalkSteps: 100,
@@ -6299,8 +6332,27 @@ var createGraphRAGTool = ({
   return createTool({
     id: toolId,
     inputSchema,
+    // Output schema includes `sources`, which exposes the full set of retrieved chunks (QueryResult objects)
+    // Each source contains all information needed to reference
+    // the original document, chunk, and similarity score.
     outputSchema: z.object({
-      relevantContext: z.any()
+      // Array of metadata or content for compatibility with prior usage
+      relevantContext: z.any(),
+      // Array of full retrieval result objects
+      sources: z.array(
+        z.object({
+          id: z.string(),
+          // Unique chunk/document identifier
+          metadata: z.any(),
+          // All metadata fields (document ID, etc.)
+          vector: z.array(z.number()),
+          // Embedding vector (if available)
+          score: z.number(),
+          // Similarity score for this retrieval
+          document: z.string()
+          // Full chunk/document text (if available)
+        })
+      )
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
@@ -6320,7 +6372,7 @@ var createGraphRAGTool = ({
           if (logger) {
             logger.error("Vector store not found", { vectorStoreName });
           }
-          return { relevantContext: [] };
+          return { relevantContext: [], sources: [] };
         }
         let queryFilter = {};
         if (enableFilter) {
@@ -6379,8 +6431,10 @@ var createGraphRAGTool = ({
         if (logger) {
           logger.debug("Returning relevant context chunks", { count: relevantChunks.length });
         }
+        const sources = includeSources ? convertToSources(rerankedResults) : [];
         return {
-          relevantContext: relevantChunks
+          relevantContext: relevantChunks,
+          sources
         };
       } catch (err) {
         if (logger) {
@@ -6390,7 +6444,7 @@ var createGraphRAGTool = ({
             errorStack: err instanceof Error ? err.stack : void 0
           });
         }
-        return { relevantContext: [] };
+        return { relevantContext: [], sources: [] };
       }
     }
   });
@@ -6400,6 +6454,8 @@ var createVectorQueryTool = ({
   indexName,
   model,
   enableFilter = false,
+  includeVectors = false,
+  includeSources = true,
   reranker,
   id,
   description
@@ -6417,8 +6473,27 @@ var createVectorQueryTool = ({
   return createTool({
     id: toolId,
     inputSchema,
+    // Output schema includes `sources`, which exposes the full set of retrieved chunks (QueryResult objects)
+    // Each source contains all information needed to reference
+    // the original document, chunk, and similarity score.
     outputSchema: z.object({
-      relevantContext: z.any()
+      // Array of metadata or content for compatibility with prior usage
+      relevantContext: z.any(),
+      // Array of full retrieval result objects
+      sources: z.array(
+        z.object({
+          id: z.string(),
+          // Unique chunk/document identifier
+          metadata: z.any(),
+          // All metadata fields (document ID, etc.)
+          vector: z.array(z.number()),
+          // Embedding vector (if available)
+          score: z.number(),
+          // Similarity score for this retrieval
+          document: z.string()
+          // Full chunk/document text (if available)
+        })
+      )
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
@@ -6438,7 +6513,7 @@ var createVectorQueryTool = ({
           if (logger) {
             logger.error("Vector store not found", { vectorStoreName });
           }
-          return { relevantContext: [] };
+          return { relevantContext: [], sources: [] };
         }
         let queryFilter = {};
         if (enableFilter && filter) {
@@ -6462,7 +6537,8 @@ var createVectorQueryTool = ({
           queryText,
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
-          topK: topKValue
+          topK: topKValue,
+          includeVectors
         });
         if (logger) {
           logger.debug("vectorQuerySearch returned results", { count: results.length });
@@ -6482,14 +6558,17 @@ var createVectorQueryTool = ({
           if (logger) {
             logger.debug("Returning reranked relevant context chunks", { count: relevantChunks2.length });
           }
-          return { relevantContext: relevantChunks2 };
+          const sources2 = includeSources ? convertToSources(rerankedResults) : [];
+          return { relevantContext: relevantChunks2, sources: sources2 };
         }
         const relevantChunks = results.map((result) => result?.metadata);
         if (logger) {
           logger.debug("Returning relevant context chunks", { count: relevantChunks.length });
         }
+        const sources = includeSources ? convertToSources(results) : [];
         return {
-          relevantContext: relevantChunks
+          relevantContext: relevantChunks,
+          sources
         };
       } catch (err) {
         if (logger) {
@@ -6499,7 +6578,7 @@ var createVectorQueryTool = ({
             errorStack: err instanceof Error ? err.stack : void 0
           });
         }
-        return { relevantContext: [] };
+        return { relevantContext: [], sources: [] };
       }
     }
   });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/rag",
-  "version": "0.1.23",
+  "version": "0.2.0-alpha.1",
   "description": "",
   "type": "module",
   "main": "dist/index.js",
@@ -22,19 +22,21 @@
   "license": "Elastic-2.0",
   "dependencies": {
     "@paralleldrive/cuid2": "^2.2.2",
+    "big.js": "^7.0.1",
     "js-tiktoken": "^1.0.19",
     "node-html-better-parser": "^1.4.7",
     "pathe": "^2.0.3",
-    "zod": "^3.24.3",
-    "@mastra/core": "^0.9.4"
+    "zod": "^3.24.3"
   },
   "peerDependencies": {
-    "ai": "^4.0.0"
+    "ai": "^4.0.0",
+    "@mastra/core": "^0.9.4"
   },
   "devDependencies": {
     "@ai-sdk/cohere": "latest",
     "@ai-sdk/openai": "latest",
     "@microsoft/api-extractor": "^7.52.5",
+    "@types/big.js": "^6.2.2",
     "@types/node": "^20.17.27",
     "ai": "^4.2.2",
     "dotenv": "^16.4.7",
@@ -42,7 +44,8 @@
     "tsup": "^8.4.0",
     "typescript": "^5.8.2",
     "vitest": "^3.1.2",
-    "@internal/lint": "0.0.5"
+    "@internal/lint": "0.0.5",
+    "@mastra/core": "0.10.0-alpha.1"
   },
   "keywords": [
     "rag",

package/src/graph-rag/index.ts CHANGED Viewed

@@ -16,7 +16,7 @@ export interface GraphNode {
   metadata?: Record<string, any>;
 }
-interface RankedNode extends GraphNode {
+export interface RankedNode extends GraphNode {
   score: number;
 }

package/src/rerank/index.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import type { MastraLanguageModel } from '@mastra/core/agent';
 import { MastraAgentRelevanceScorer, CohereRelevanceScorer } from '@mastra/core/relevance';
 import type { RelevanceScoreProvider } from '@mastra/core/relevance';
 import type { QueryResult } from '@mastra/core/vector';
+import { Big } from 'big.js';
 // Default weights for different scoring components (must add up to 1)
 const DEFAULT_WEIGHTS = {
@@ -102,9 +103,9 @@ export async function rerank(
   };
   //weights must add up to 1
-  const totalWeights = Object.values(weights).reduce((sum, weight) => sum + weight, 0);
-  if (totalWeights !== 1) {
-    throw new Error('Weights must add up to 1');
+  const sum = Object.values(weights).reduce((acc: Big, w: number) => acc.plus(w.toString()), new Big(0));
+  if (!sum.eq(1)) {
+    throw new Error(`Weights must add up to 1. Got ${sum} from ${weights}`);
   }
   const resultLength = results.length;

package/src/tools/graph-rag.ts CHANGED Viewed

@@ -10,12 +10,14 @@ import {
   topKDescription,
   queryTextDescription,
 } from '../utils';
+import { convertToSources } from '../utils/convert-sources';
 export const createGraphRAGTool = ({
   vectorStoreName,
   indexName,
   model,
   enableFilter = false,
+  includeSources = true,
   graphOptions = {
     dimension: 1536,
     randomWalkSteps: 100,
@@ -29,6 +31,7 @@ export const createGraphRAGTool = ({
   indexName: string;
   model: EmbeddingModel<string>;
   enableFilter?: boolean;
+  includeSources?: boolean;
   graphOptions?: {
     dimension?: number;
     randomWalkSteps?: number;
@@ -37,7 +40,7 @@ export const createGraphRAGTool = ({
   };
   id?: string;
   description?: string;
-}): ReturnType<typeof createTool> => {
+}) => {
   const toolId = id || `GraphRAG ${vectorStoreName} ${indexName} Tool`;
   const toolDescription = description || defaultGraphRagDescription();
   // Initialize GraphRAG
@@ -59,8 +62,22 @@ export const createGraphRAGTool = ({
   return createTool({
     id: toolId,
     inputSchema,
+    // Output schema includes `sources`, which exposes the full set of retrieved chunks (QueryResult objects)
+    // Each source contains all information needed to reference
+    // the original document, chunk, and similarity score.
     outputSchema: z.object({
+      // Array of metadata or content for compatibility with prior usage
       relevantContext: z.any(),
+      // Array of full retrieval result objects
+      sources: z.array(
+        z.object({
+          id: z.string(), // Unique chunk/document identifier
+          metadata: z.any(), // All metadata fields (document ID, etc.)
+          vector: z.array(z.number()), // Embedding vector (if available)
+          score: z.number(), // Similarity score for this retrieval
+          document: z.string(), // Full chunk/document text (if available)
+        }),
+      ),
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
@@ -86,7 +103,7 @@ export const createGraphRAGTool = ({
           if (logger) {
             logger.error('Vector store not found', { vectorStoreName });
           }
-          return { relevantContext: [] };
+          return { relevantContext: [], sources: [] };
         }
         let queryFilter = {};
@@ -154,8 +171,11 @@ export const createGraphRAGTool = ({
         if (logger) {
           logger.debug('Returning relevant context chunks', { count: relevantChunks.length });
         }
+        // `sources` exposes the full retrieval objects
+        const sources = includeSources ? convertToSources(rerankedResults) : [];
         return {
           relevantContext: relevantChunks,
+          sources,
         };
       } catch (err) {
         if (logger) {
@@ -165,7 +185,7 @@ export const createGraphRAGTool = ({
             errorStack: err instanceof Error ? err.stack : undefined,
           });
         }
-        return { relevantContext: [] };
+        return { relevantContext: [], sources: [] };
       }
     },
   });

package/src/tools/vector-query.ts CHANGED Viewed

@@ -11,12 +11,15 @@ import {
   topKDescription,
   queryTextDescription,
 } from '../utils';
+import { convertToSources } from '../utils/convert-sources';
 export const createVectorQueryTool = ({
   vectorStoreName,
   indexName,
   model,
   enableFilter = false,
+  includeVectors = false,
+  includeSources = true,
   reranker,
   id,
   description,
@@ -25,10 +28,12 @@ export const createVectorQueryTool = ({
   indexName: string;
   model: EmbeddingModel<string>;
   enableFilter?: boolean;
+  includeVectors?: boolean;
+  includeSources?: boolean;
   reranker?: RerankConfig;
   id?: string;
   description?: string;
-}): ReturnType<typeof createTool> => {
+}) => {
   const toolId = id || `VectorQuery ${vectorStoreName} ${indexName} Tool`;
   const toolDescription = description || defaultVectorQueryDescription();
   // Create base schema with required fields
@@ -47,8 +52,22 @@ export const createVectorQueryTool = ({
   return createTool({
     id: toolId,
     inputSchema,
+    // Output schema includes `sources`, which exposes the full set of retrieved chunks (QueryResult objects)
+    // Each source contains all information needed to reference
+    // the original document, chunk, and similarity score.
     outputSchema: z.object({
+      // Array of metadata or content for compatibility with prior usage
       relevantContext: z.any(),
+      // Array of full retrieval result objects
+      sources: z.array(
+        z.object({
+          id: z.string(), // Unique chunk/document identifier
+          metadata: z.any(), // All metadata fields (document ID, etc.)
+          vector: z.array(z.number()), // Embedding vector (if available)
+          score: z.number(), // Similarity score for this retrieval
+          document: z.string(), // Full chunk/document text (if available)
+        }),
+      ),
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
@@ -75,7 +94,7 @@ export const createVectorQueryTool = ({
           if (logger) {
             logger.error('Vector store not found', { vectorStoreName });
           }
-          return { relevantContext: [] };
+          return { relevantContext: [], sources: [] };
         }
         // Get relevant chunks from the vector database
         let queryFilter = {};
@@ -103,6 +122,7 @@ export const createVectorQueryTool = ({
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
           topK: topKValue,
+          includeVectors,
         });
         if (logger) {
           logger.debug('vectorQuerySearch returned results', { count: results.length });
@@ -122,15 +142,19 @@ export const createVectorQueryTool = ({
           if (logger) {
             logger.debug('Returning reranked relevant context chunks', { count: relevantChunks.length });
           }
-          return { relevantContext: relevantChunks };
+          const sources = includeSources ? convertToSources(rerankedResults) : [];
+          return { relevantContext: relevantChunks, sources };
         }
         const relevantChunks = results.map(result => result?.metadata);
         if (logger) {
           logger.debug('Returning relevant context chunks', { count: relevantChunks.length });
         }
+        // `sources` exposes the full retrieval objects
+        const sources = includeSources ? convertToSources(results) : [];
         return {
           relevantContext: relevantChunks,
+          sources,
         };
       } catch (err) {
         if (logger) {
@@ -140,7 +164,7 @@ export const createVectorQueryTool = ({
             errorStack: err instanceof Error ? err.stack : undefined,
           });
         }
-        return { relevantContext: [] };
+        return { relevantContext: [], sources: [] };
       }
     },
   });

package/src/utils/convert-sources.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import type { QueryResult } from '@mastra/core';
+import type { RankedNode } from '../graph-rag';
+import type { RerankResult } from '../rerank';
+type SourceInput = QueryResult | RankedNode | RerankResult;
+/**
+ * Convert an array of source inputs (QueryResult, RankedNode, or RerankResult) to an array of sources.
+ * @param results Array of source inputs to convert.
+ * @returns Array of sources.
+ */
+export const convertToSources = (results: SourceInput[]) => {
+  return results.map(result => {
+    // RankedNode
+    if ('content' in result) {
+      return {
+        id: result.id,
+        vector: result.embedding || [],
+        score: result.score,
+        metadata: result.metadata,
+        document: result.content || '',
+      };
+    }
+    // RerankResult
+    if ('result' in result) {
+      return {
+        id: result.result.id,
+        vector: result.result.vector || [],
+        score: result.score,
+        metadata: result.result.metadata,
+        document: result.result.document || '',
+      };
+    }
+    // QueryResult
+    return {
+      id: result.id,
+      vector: result.vector || [],
+      score: result.score,
+      metadata: result.metadata,
+      document: result.document || '',
+    };
+  });
+};