npm - @mastra/rag - Versions diffs - 0.1.24-alpha.0 → 0.2.0-alpha.1 - Mend

@mastra/rag 0.1.24-alpha.0 → 0.2.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.turbo/turbo-build.log +7 -7
package/CHANGELOG.md +23 -0
package/dist/_tsup-dts-rollup.d.cts +263 -5
package/dist/_tsup-dts-rollup.d.ts +263 -5
package/dist/index.cjs +92 -13
package/dist/index.js +92 -13
package/package.json +8 -5
package/src/graph-rag/index.ts +1 -1
package/src/rerank/index.ts +4 -3
package/src/tools/graph-rag.ts +23 -3
package/src/tools/vector-query.ts +28 -4
package/src/utils/convert-sources.ts +43 -0

package/dist/_tsup-dts-rollup.d.ts CHANGED Viewed

@@ -3,9 +3,13 @@ import type { EmbeddingModel } from 'ai';
 import type { MastraLanguageModel } from '@mastra/core/agent';
 import type { MastraVector } from '@mastra/core/vector';
 import type { QueryResult } from '@mastra/core/vector';
+import type { QueryResult as QueryResult_2 } from '@mastra/core';
 import type { TiktokenEncoding } from 'js-tiktoken';
 import type { TiktokenModel } from 'js-tiktoken';
+import { Tool } from '@mastra/core/tools';
+import { ToolExecutionContext } from '@mastra/core';
 import type { VectorFilter } from '@mastra/core/vector/filter';
+import { z } from 'zod';
 /** @deprecated Import from @mastra/astra instead */
 declare const ASTRA_PROMPT = "When querying Astra, you can ONLY use the operators listed below. Any other operators will be rejected.\nImportant: Don't explain how to construct the filter - use the specified operators and fields to search the content and return relevant results.\nIf a user tries to give an explicit operator that is not supported, reject the filter entirely and let them know that the operator is not supported.\n\nBasic Comparison Operators:\n- $eq: Exact match (default when using field: value)\n  Example: { \"category\": \"electronics\" }\n- $ne: Not equal\n  Example: { \"category\": { \"$ne\": \"electronics\" } }\n- $gt: Greater than\n  Example: { \"price\": { \"$gt\": 100 } }\n- $gte: Greater than or equal\n  Example: { \"price\": { \"$gte\": 100 } }\n- $lt: Less than\n  Example: { \"price\": { \"$lt\": 100 } }\n- $lte: Less than or equal\n  Example: { \"price\": { \"$lte\": 100 } }\n\nArray Operators:\n- $in: Match any value in array\n  Example: { \"category\": { \"$in\": [\"electronics\", \"books\"] } }\n- $nin: Does not match any value in array\n  Example: { \"category\": { \"$nin\": [\"electronics\", \"books\"] } }\n- $all: Match all values in array\n  Example: { \"tags\": { \"$all\": [\"premium\", \"sale\"] } }\n\nLogical Operators:\n- $and: Logical AND (can be implicit or explicit)\n  Implicit Example: { \"price\": { \"$gt\": 100 }, \"category\": \"electronics\" }\n  Explicit Example: { \"$and\": [{ \"price\": { \"$gt\": 100 } }, { \"category\": \"electronics\" }] }\n- $or: Logical OR\n  Example: { \"$or\": [{ \"price\": { \"$lt\": 50 } }, { \"category\": \"books\" }] }\n- $not: Logical NOT\n  Example: { \"$not\": { \"category\": \"electronics\" } }\n\nElement Operators:\n- $exists: Check if field exists\n  Example: { \"rating\": { \"$exists\": true } }\n\nSpecial Operators:\n- $size: Array length check\n  Example: { \"tags\": { \"$size\": 2 } }\n\nRestrictions:\n- Regex patterns are not supported\n- Only $and, $or, and $not logical operators are supported\n- Nested fields are supported using dot notation\n- Multiple conditions on the same field are supported with both implicit and explicit $and\n- Empty arrays in $in/$nin will return no results\n- A non-empty array is required for $all operator\n- Only logical operators ($and, $or, $not) can be used at the top level\n- All other operators must be used within a field condition\n  Valid: { \"field\": { \"$gt\": 100 } }\n  Valid: { \"$and\": [...] }\n  Invalid: { \"$gt\": 100 }\n- Logical operators must contain field conditions, not direct operators\n  Valid: { \"$and\": [{ \"field\": { \"$gt\": 100 } }] }\n  Invalid: { \"$and\": [{ \"$gt\": 100 }] }\n- $not operator:\n  - Must be an object\n  - Cannot be empty\n  - Can be used at field level or top level\n  - Valid: { \"$not\": { \"field\": \"value\" } }\n  - Valid: { \"field\": { \"$not\": { \"$eq\": \"value\" } } }\n- Other logical operators ($and, $or):\n  - Can only be used at top level or nested within other logical operators\n  - Can not be used on a field level, or be nested inside a field\n  - Can not be used inside an operator\n  - Valid: { \"$and\": [{ \"field\": { \"$gt\": 100 } }] }\n  - Valid: { \"$or\": [{ \"$and\": [{ \"field\": { \"$gt\": 100 } }] }] }\n  - Invalid: { \"field\": { \"$and\": [{ \"$gt\": 100 }] } }\n  - Invalid: { \"field\": { \"$or\": [{ \"$gt\": 100 }] } }\n  - Invalid: { \"field\": { \"$gt\": { \"$and\": [{...}] } } }\n\nExample Complex Query:\n{\n  \"$and\": [\n    { \"category\": { \"$in\": [\"electronics\", \"computers\"] } },\n    { \"price\": { \"$gte\": 100, \"$lte\": 1000 } },\n    { \"tags\": { \"$all\": [\"premium\"] } },\n    { \"rating\": { \"$exists\": true, \"$gt\": 4 } },\n    { \"$or\": [\n      { \"stock\": { \"$gt\": 0 } },\n      { \"preorder\": true }\n    ]}\n  ]\n}";
@@ -143,6 +147,19 @@ declare type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' |
 export { ChunkStrategy }
 export { ChunkStrategy as ChunkStrategy_alias_1 }
+/**
+ * Convert an array of source inputs (QueryResult, RankedNode, or RerankResult) to an array of sources.
+ * @param results Array of source inputs to convert.
+ * @returns Array of sources.
+ */
+export declare const convertToSources: (results: SourceInput[]) => {
+    id: string;
+    vector: number[];
+    score: number;
+    metadata: Record<string, any> | undefined;
+    document: string;
+}[];
 declare const createDocumentChunkerTool: ({ doc, params, }: {
     doc: MDocument;
     params?: ChunkParams;
@@ -151,11 +168,12 @@ export { createDocumentChunkerTool }
 export { createDocumentChunkerTool as createDocumentChunkerTool_alias_1 }
 export { createDocumentChunkerTool as createDocumentChunkerTool_alias_2 }
-declare const createGraphRAGTool: ({ vectorStoreName, indexName, model, enableFilter, graphOptions, id, description, }: {
+declare const createGraphRAGTool: ({ vectorStoreName, indexName, model, enableFilter, includeSources, graphOptions, id, description, }: {
     vectorStoreName: string;
     indexName: string;
     model: EmbeddingModel<string>;
     enableFilter?: boolean;
+    includeSources?: boolean;
     graphOptions?: {
         dimension?: number;
         randomWalkSteps?: number;
@@ -164,20 +182,258 @@ declare const createGraphRAGTool: ({ vectorStoreName, indexName, model, enableFi
     };
     id?: string;
     description?: string;
-}) => ReturnType<typeof createTool>;
+}) => Tool<z.ZodObject<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, z.ZodTypeAny, "passthrough">>, z.ZodObject<{
+relevantContext: z.ZodAny;
+sources: z.ZodArray<z.ZodObject<{
+id: z.ZodString;
+metadata: z.ZodAny;
+vector: z.ZodArray<z.ZodNumber, "many">;
+score: z.ZodNumber;
+document: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+id: string;
+vector: number[];
+score: number;
+document: string;
+metadata?: any;
+}, {
+id: string;
+vector: number[];
+score: number;
+document: string;
+metadata?: any;
+}>, "many">;
+}, "strip", z.ZodTypeAny, {
+sources: {
+id: string;
+vector: number[];
+score: number;
+document: string;
+metadata?: any;
+}[];
+relevantContext?: any;
+}, {
+sources: {
+id: string;
+vector: number[];
+score: number;
+document: string;
+metadata?: any;
+}[];
+relevantContext?: any;
+}>, ToolExecutionContext<z.ZodObject<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, z.ZodTypeAny, "passthrough">>>> & {
+    inputSchema: z.ZodObject<{
+        queryText: z.ZodString;
+        topK: z.ZodNumber;
+    }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+        queryText: z.ZodString;
+        topK: z.ZodNumber;
+    }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+        queryText: z.ZodString;
+        topK: z.ZodNumber;
+    }, z.ZodTypeAny, "passthrough">>;
+    outputSchema: z.ZodObject<{
+        relevantContext: z.ZodAny;
+        sources: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            metadata: z.ZodAny;
+            vector: z.ZodArray<z.ZodNumber, "many">;
+            score: z.ZodNumber;
+            document: z.ZodString;
+        }, "strip", z.ZodTypeAny, {
+            id: string;
+            vector: number[];
+            score: number;
+            document: string;
+            metadata?: any;
+        }, {
+            id: string;
+            vector: number[];
+            score: number;
+            document: string;
+            metadata?: any;
+        }>, "many">;
+    }, "strip", z.ZodTypeAny, {
+        sources: {
+            id: string;
+            vector: number[];
+            score: number;
+            document: string;
+            metadata?: any;
+        }[];
+        relevantContext?: any;
+    }, {
+        sources: {
+            id: string;
+            vector: number[];
+            score: number;
+            document: string;
+            metadata?: any;
+        }[];
+        relevantContext?: any;
+    }>;
+    execute: (context: ToolExecutionContext<z.ZodObject<{
+    queryText: z.ZodString;
+    topK: z.ZodNumber;
+    }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+    queryText: z.ZodString;
+    topK: z.ZodNumber;
+    }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+    queryText: z.ZodString;
+    topK: z.ZodNumber;
+    }, z.ZodTypeAny, "passthrough">>>) => Promise<any>;
+};
 export { createGraphRAGTool }
 export { createGraphRAGTool as createGraphRAGTool_alias_1 }
 export { createGraphRAGTool as createGraphRAGTool_alias_2 }
-declare const createVectorQueryTool: ({ vectorStoreName, indexName, model, enableFilter, reranker, id, description, }: {
+declare const createVectorQueryTool: ({ vectorStoreName, indexName, model, enableFilter, includeVectors, includeSources, reranker, id, description, }: {
     vectorStoreName: string;
     indexName: string;
     model: EmbeddingModel<string>;
     enableFilter?: boolean;
+    includeVectors?: boolean;
+    includeSources?: boolean;
     reranker?: RerankConfig;
     id?: string;
     description?: string;
-}) => ReturnType<typeof createTool>;
+}) => Tool<z.ZodObject<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, z.ZodTypeAny, "passthrough">>, z.ZodObject<{
+relevantContext: z.ZodAny;
+sources: z.ZodArray<z.ZodObject<{
+id: z.ZodString;
+metadata: z.ZodAny;
+vector: z.ZodArray<z.ZodNumber, "many">;
+score: z.ZodNumber;
+document: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+id: string;
+vector: number[];
+score: number;
+document: string;
+metadata?: any;
+}, {
+id: string;
+vector: number[];
+score: number;
+document: string;
+metadata?: any;
+}>, "many">;
+}, "strip", z.ZodTypeAny, {
+sources: {
+id: string;
+vector: number[];
+score: number;
+document: string;
+metadata?: any;
+}[];
+relevantContext?: any;
+}, {
+sources: {
+id: string;
+vector: number[];
+score: number;
+document: string;
+metadata?: any;
+}[];
+relevantContext?: any;
+}>, ToolExecutionContext<z.ZodObject<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+queryText: z.ZodString;
+topK: z.ZodNumber;
+}, z.ZodTypeAny, "passthrough">>>> & {
+    inputSchema: z.ZodObject<{
+        queryText: z.ZodString;
+        topK: z.ZodNumber;
+    }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+        queryText: z.ZodString;
+        topK: z.ZodNumber;
+    }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+        queryText: z.ZodString;
+        topK: z.ZodNumber;
+    }, z.ZodTypeAny, "passthrough">>;
+    outputSchema: z.ZodObject<{
+        relevantContext: z.ZodAny;
+        sources: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            metadata: z.ZodAny;
+            vector: z.ZodArray<z.ZodNumber, "many">;
+            score: z.ZodNumber;
+            document: z.ZodString;
+        }, "strip", z.ZodTypeAny, {
+            id: string;
+            vector: number[];
+            score: number;
+            document: string;
+            metadata?: any;
+        }, {
+            id: string;
+            vector: number[];
+            score: number;
+            document: string;
+            metadata?: any;
+        }>, "many">;
+    }, "strip", z.ZodTypeAny, {
+        sources: {
+            id: string;
+            vector: number[];
+            score: number;
+            document: string;
+            metadata?: any;
+        }[];
+        relevantContext?: any;
+    }, {
+        sources: {
+            id: string;
+            vector: number[];
+            score: number;
+            document: string;
+            metadata?: any;
+        }[];
+        relevantContext?: any;
+    }>;
+    execute: (context: ToolExecutionContext<z.ZodObject<{
+    queryText: z.ZodString;
+    topK: z.ZodNumber;
+    }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+    queryText: z.ZodString;
+    topK: z.ZodNumber;
+    }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+    queryText: z.ZodString;
+    topK: z.ZodNumber;
+    }, z.ZodTypeAny, "passthrough">>>) => Promise<any>;
+};
 export { createVectorQueryTool }
 export { createVectorQueryTool as createVectorQueryTool_alias_1 }
 export { createVectorQueryTool as createVectorQueryTool_alias_2 }
@@ -622,7 +878,7 @@ declare class QuestionsAnsweredExtractor extends BaseExtractor {
 export { QuestionsAnsweredExtractor }
 export { QuestionsAnsweredExtractor as QuestionsAnsweredExtractor_alias_1 }
-declare interface RankedNode extends GraphNode {
+export declare interface RankedNode extends GraphNode {
     score: number;
 }
@@ -796,6 +1052,8 @@ declare interface ScoringDetails {
     };
 }
+declare type SourceInput = QueryResult_2 | RankedNode | RerankResult;
 export declare function splitTextOnTokens({ text, tokenizer }: {
     text: string;
     tokenizer: Tokenizer;

package/dist/index.cjs CHANGED Viewed

@@ -5,6 +5,7 @@ var zod = require('zod');
 var nodeHtmlBetterParser = require('node-html-better-parser');
 var jsTiktoken = require('js-tiktoken');
 var relevance = require('@mastra/core/relevance');
+var big_js = require('big.js');
 var tools = require('@mastra/core/tools');
 var ai = require('ai');
@@ -5950,9 +5951,9 @@ async function rerank(results, query, model, options) {
     ...DEFAULT_WEIGHTS,
     ...options.weights
   };
-  const totalWeights = Object.values(weights).reduce((sum, weight) => sum + weight, 0);
-  if (totalWeights !== 1) {
-    throw new Error("Weights must add up to 1");
+  const sum = Object.values(weights).reduce((acc, w) => acc.plus(w.toString()), new big_js.Big(0));
+  if (!sum.eq(1)) {
+    throw new Error(`Weights must add up to 1. Got ${sum} from ${weights}`);
   }
   const resultLength = results.length;
   const queryAnalysis = queryEmbedding ? analyzeQueryEmbedding(queryEmbedding) : null;
@@ -6271,12 +6272,44 @@ var filterDescription = `JSON-formatted criteria to refine search results.
 - IMPORTANT: Always ensure JSON is properly closed with matching brackets
 - Multiple filters can be combined`;
+// src/utils/convert-sources.ts
+var convertToSources = (results) => {
+  return results.map((result) => {
+    if ("content" in result) {
+      return {
+        id: result.id,
+        vector: result.embedding || [],
+        score: result.score,
+        metadata: result.metadata,
+        document: result.content || ""
+      };
+    }
+    if ("result" in result) {
+      return {
+        id: result.result.id,
+        vector: result.result.vector || [],
+        score: result.score,
+        metadata: result.result.metadata,
+        document: result.result.document || ""
+      };
+    }
+    return {
+      id: result.id,
+      vector: result.vector || [],
+      score: result.score,
+      metadata: result.metadata,
+      document: result.document || ""
+    };
+  });
+};
 // src/tools/graph-rag.ts
 var createGraphRAGTool = ({
   vectorStoreName,
   indexName,
   model,
   enableFilter = false,
+  includeSources = true,
   graphOptions = {
     dimension: 1536,
     randomWalkSteps: 100,
@@ -6301,8 +6334,27 @@ var createGraphRAGTool = ({
   return tools.createTool({
     id: toolId,
     inputSchema,
+    // Output schema includes `sources`, which exposes the full set of retrieved chunks (QueryResult objects)
+    // Each source contains all information needed to reference
+    // the original document, chunk, and similarity score.
     outputSchema: zod.z.object({
-      relevantContext: zod.z.any()
+      // Array of metadata or content for compatibility with prior usage
+      relevantContext: zod.z.any(),
+      // Array of full retrieval result objects
+      sources: zod.z.array(
+        zod.z.object({
+          id: zod.z.string(),
+          // Unique chunk/document identifier
+          metadata: zod.z.any(),
+          // All metadata fields (document ID, etc.)
+          vector: zod.z.array(zod.z.number()),
+          // Embedding vector (if available)
+          score: zod.z.number(),
+          // Similarity score for this retrieval
+          document: zod.z.string()
+          // Full chunk/document text (if available)
+        })
+      )
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
@@ -6322,7 +6374,7 @@ var createGraphRAGTool = ({
           if (logger) {
             logger.error("Vector store not found", { vectorStoreName });
           }
-          return { relevantContext: [] };
+          return { relevantContext: [], sources: [] };
         }
         let queryFilter = {};
         if (enableFilter) {
@@ -6381,8 +6433,10 @@ var createGraphRAGTool = ({
         if (logger) {
           logger.debug("Returning relevant context chunks", { count: relevantChunks.length });
         }
+        const sources = includeSources ? convertToSources(rerankedResults) : [];
         return {
-          relevantContext: relevantChunks
+          relevantContext: relevantChunks,
+          sources
         };
       } catch (err) {
         if (logger) {
@@ -6392,7 +6446,7 @@ var createGraphRAGTool = ({
             errorStack: err instanceof Error ? err.stack : void 0
           });
         }
-        return { relevantContext: [] };
+        return { relevantContext: [], sources: [] };
       }
     }
   });
@@ -6402,6 +6456,8 @@ var createVectorQueryTool = ({
   indexName,
   model,
   enableFilter = false,
+  includeVectors = false,
+  includeSources = true,
   reranker,
   id,
   description
@@ -6419,8 +6475,27 @@ var createVectorQueryTool = ({
   return tools.createTool({
     id: toolId,
     inputSchema,
+    // Output schema includes `sources`, which exposes the full set of retrieved chunks (QueryResult objects)
+    // Each source contains all information needed to reference
+    // the original document, chunk, and similarity score.
     outputSchema: zod.z.object({
-      relevantContext: zod.z.any()
+      // Array of metadata or content for compatibility with prior usage
+      relevantContext: zod.z.any(),
+      // Array of full retrieval result objects
+      sources: zod.z.array(
+        zod.z.object({
+          id: zod.z.string(),
+          // Unique chunk/document identifier
+          metadata: zod.z.any(),
+          // All metadata fields (document ID, etc.)
+          vector: zod.z.array(zod.z.number()),
+          // Embedding vector (if available)
+          score: zod.z.number(),
+          // Similarity score for this retrieval
+          document: zod.z.string()
+          // Full chunk/document text (if available)
+        })
+      )
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
@@ -6440,7 +6515,7 @@ var createVectorQueryTool = ({
           if (logger) {
             logger.error("Vector store not found", { vectorStoreName });
           }
-          return { relevantContext: [] };
+          return { relevantContext: [], sources: [] };
         }
         let queryFilter = {};
         if (enableFilter && filter) {
@@ -6464,7 +6539,8 @@ var createVectorQueryTool = ({
           queryText,
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
-          topK: topKValue
+          topK: topKValue,
+          includeVectors
         });
         if (logger) {
           logger.debug("vectorQuerySearch returned results", { count: results.length });
@@ -6484,14 +6560,17 @@ var createVectorQueryTool = ({
           if (logger) {
             logger.debug("Returning reranked relevant context chunks", { count: relevantChunks2.length });
           }
-          return { relevantContext: relevantChunks2 };
+          const sources2 = includeSources ? convertToSources(rerankedResults) : [];
+          return { relevantContext: relevantChunks2, sources: sources2 };
         }
         const relevantChunks = results.map((result) => result?.metadata);
         if (logger) {
           logger.debug("Returning relevant context chunks", { count: relevantChunks.length });
         }
+        const sources = includeSources ? convertToSources(results) : [];
         return {
-          relevantContext: relevantChunks
+          relevantContext: relevantChunks,
+          sources
         };
       } catch (err) {
         if (logger) {
@@ -6501,7 +6580,7 @@ var createVectorQueryTool = ({
             errorStack: err instanceof Error ? err.stack : void 0
           });
         }
-        return { relevantContext: [] };
+        return { relevantContext: [], sources: [] };
       }
     }
   });