@mastra/rag 2.1.2 → 2.1.3-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/LICENSE.md +15 -0
  3. package/dist/docs/SKILL.md +3 -3
  4. package/dist/docs/assets/SOURCE_MAP.json +1 -1
  5. package/dist/docs/references/docs-rag-chunking-and-embedding.md +5 -5
  6. package/dist/docs/references/docs-rag-graph-rag.md +2 -2
  7. package/dist/docs/references/docs-rag-overview.md +2 -2
  8. package/dist/docs/references/docs-rag-retrieval.md +16 -16
  9. package/dist/docs/references/reference-rag-chunk.md +40 -40
  10. package/dist/docs/references/reference-rag-database-config.md +19 -15
  11. package/dist/docs/references/reference-rag-document.md +13 -13
  12. package/dist/docs/references/reference-rag-extract-params.md +31 -31
  13. package/dist/docs/references/reference-rag-graph-rag.md +16 -16
  14. package/dist/docs/references/reference-rag-rerank.md +28 -20
  15. package/dist/docs/references/reference-rag-rerankWithScorer.md +27 -19
  16. package/dist/docs/references/reference-tools-document-chunker-tool.md +11 -11
  17. package/dist/docs/references/reference-tools-graph-rag-tool.md +23 -25
  18. package/dist/docs/references/reference-tools-vector-query-tool.md +47 -35
  19. package/dist/document/validation.d.ts.map +1 -1
  20. package/dist/index.cjs +6 -5
  21. package/dist/index.cjs.map +1 -1
  22. package/dist/index.js +6 -5
  23. package/dist/index.js.map +1 -1
  24. package/dist/tools/document-chunker.d.ts +1 -3
  25. package/dist/tools/document-chunker.d.ts.map +1 -1
  26. package/dist/tools/graph-rag.d.ts +5 -19
  27. package/dist/tools/graph-rag.d.ts.map +1 -1
  28. package/dist/tools/vector-query.d.ts +5 -19
  29. package/dist/tools/vector-query.d.ts.map +1 -1
  30. package/dist/utils/tool-schemas.d.ts +9 -47
  31. package/dist/utils/tool-schemas.d.ts.map +1 -1
  32. package/package.json +9 -9
@@ -2,7 +2,7 @@
2
2
 
3
3
  The `createVectorQueryTool()` function creates a tool for semantic search over vector stores. It supports filtering, reranking, database-specific configurations, and integrates with various vector store backends.
4
4
 
5
- ## Basic Usage
5
+ ## Basic usage
6
6
 
7
7
  ```typescript
8
8
  import { createVectorQueryTool } from '@mastra/rag'
@@ -19,55 +19,67 @@ const queryTool = createVectorQueryTool({
19
19
 
20
20
  > **Note:** **Parameter Requirements:** Most fields can be set at creation as defaults. Some fields can be overridden at runtime via the request context or input. If a required field is missing from both creation and runtime, an error will be thrown. Note that `model`, `id`, and `description` can only be set at creation time.
21
21
 
22
- **id?:** (`string`): Custom ID for the tool. By default: 'VectorQuery {vectorStoreName} {indexName} Tool'. (Set at creation only.)
22
+ **id** (`string`): Custom ID for the tool. By default: 'VectorQuery {vectorStoreName} {indexName} Tool'. (Set at creation only.)
23
23
 
24
- **description?:** (`string`): Custom description for the tool. By default: 'Access the knowledge base to find information needed to answer user questions' (Set at creation only.)
24
+ **description** (`string`): Custom description for the tool. By default: 'Access the knowledge base to find information needed to answer user questions' (Set at creation only.)
25
25
 
26
- **model:** (`EmbeddingModel`): Embedding model to use for vector search. (Set at creation only.)
26
+ **model** (`EmbeddingModel`): Embedding model to use for vector search. (Set at creation only.)
27
27
 
28
- **vectorStoreName:** (`string`): Name of the vector store to query. (Can be set at creation or overridden at runtime.)
28
+ **vectorStoreName** (`string`): Name of the vector store to query. (Can be set at creation or overridden at runtime.)
29
29
 
30
- **indexName:** (`string`): Name of the index within the vector store. (Can be set at creation or overridden at runtime.)
30
+ **indexName** (`string`): Name of the index within the vector store. (Can be set at creation or overridden at runtime.)
31
31
 
32
- **enableFilter?:** (`boolean`): Enable filtering of results based on metadata. (Set at creation only, but will be automatically enabled if a filter is provided in the request context.) (Default: `false`)
32
+ **enableFilter** (`boolean`): Enable filtering of results based on metadata. (Set at creation only, but will be automatically enabled if a filter is provided in the request context.) (Default: `false`)
33
33
 
34
- **includeVectors?:** (`boolean`): Include the embedding vectors in the results. (Can be set at creation or overridden at runtime.) (Default: `false`)
34
+ **includeVectors** (`boolean`): Include the embedding vectors in the results. (Can be set at creation or overridden at runtime.) (Default: `false`)
35
35
 
36
- **includeSources?:** (`boolean`): Include the full retrieval objects in the results. (Can be set at creation or overridden at runtime.) (Default: `true`)
36
+ **includeSources** (`boolean`): Include the full retrieval objects in the results. (Can be set at creation or overridden at runtime.) (Default: `true`)
37
37
 
38
- **reranker?:** (`RerankConfig`): Options for reranking results. (Can be set at creation or overridden at runtime.)
38
+ **reranker** (`RerankConfig`): Options for reranking results. (Can be set at creation or overridden at runtime.)
39
39
 
40
- **databaseConfig?:** (`DatabaseConfig`): Database-specific configuration options for optimizing queries. (Can be set at creation or overridden at runtime.)
40
+ **reranker.model** (`MastraLanguageModel`): Language model to use for reranking
41
41
 
42
- **providerOptions?:** (`Record<string, Record<string, any>>`): Provider-specific options for the embedding model (e.g., outputDimensionality). \*\*Important\*\*: Only works with AI SDK EmbeddingModelV2 models. For V1 models, configure options when creating the model itself.
42
+ **reranker.options** (`RerankerOptions`): Options for the reranking process
43
43
 
44
- **vectorStore?:** (`MastraVector | VectorStoreResolver`): Direct vector store instance or a resolver function for dynamic selection. Use a function for multi-tenant applications where the vector store is selected based on request context. When provided, \`vectorStoreName\` becomes optional.
44
+ **reranker.options.weights** (`WeightConfig`): Weights for scoring components (semantic: 0.4, vector: 0.4, position: 0.2)
45
45
 
46
- ### DatabaseConfig
46
+ **reranker.options.topK** (`number`): Number of top results to return
47
47
 
48
- The `DatabaseConfig` type allows you to specify database-specific configurations that are automatically applied to query operations. This enables you to take advantage of unique features and optimizations offered by different vector stores.
48
+ **databaseConfig** (`DatabaseConfig`): Database-specific configuration options for optimizing queries. (Can be set at creation or overridden at runtime.)
49
49
 
50
- **pinecone?:** (`PineconeConfig`): objectnamespace?:stringPinecone namespace for organizing vectorssparseVector?:{ indices: number\[]; values: number\[]; }Sparse vector for hybrid search
50
+ **databaseConfig.pinecone** (`PineconeConfig`): Configuration specific to Pinecone vector store
51
51
 
52
- **pgvector?:** (`PgVectorConfig`): objectminScore?:numberMinimum similarity score threshold for resultsef?:numberHNSW search parameter - controls accuracy vs speed tradeoffprobes?:numberIVFFlat probe parameter - number of cells to visit during search
52
+ **databaseConfig.pinecone.namespace** (`string`): Pinecone namespace for organizing vectors
53
53
 
54
- **chroma?:** (`ChromaConfig`): objectwhere?:Record\<string, any>Metadata filtering conditionswhereDocument?:Record\<string, any>Document content filtering conditions
54
+ **databaseConfig.pinecone.sparseVector** (`{ indices: number[]; values: number[]; }`): Sparse vector for hybrid search
55
55
 
56
- ### RerankConfig
56
+ **databaseConfig.pgvector** (`PgVectorConfig`): Configuration specific to PostgreSQL with pgvector extension
57
57
 
58
- **model:** (`MastraLanguageModel`): Language model to use for reranking
58
+ **databaseConfig.pgvector.minScore** (`number`): Minimum similarity score threshold for results
59
59
 
60
- **options?:** (`RerankerOptions`): objectweights?:WeightConfigWeights for scoring components (semantic: 0.4, vector: 0.4, position: 0.2)topK?:numberNumber of top results to return
60
+ **databaseConfig.pgvector.ef** (`number`): HNSW search parameter - controls accuracy vs speed tradeoff
61
+
62
+ **databaseConfig.pgvector.probes** (`number`): IVFFlat probe parameter - number of cells to visit during search
63
+
64
+ **databaseConfig.chroma** (`ChromaConfig`): Configuration specific to Chroma vector store
65
+
66
+ **databaseConfig.chroma.where** (`Record<string, any>`): Metadata filtering conditions
67
+
68
+ **databaseConfig.chroma.whereDocument** (`Record<string, any>`): Document content filtering conditions
69
+
70
+ **providerOptions** (`Record<string, Record<string, any>>`): Provider-specific options for the embedding model (e.g., outputDimensionality). \*\*Important\*\*: Only works with AI SDK EmbeddingModelV2 models. For V1 models, configure options when creating the model itself.
71
+
72
+ **vectorStore** (`MastraVector | VectorStoreResolver`): Direct vector store instance or a resolver function for dynamic selection. Use a function for multi-tenant applications where the vector store is selected based on request context. When provided, \`vectorStoreName\` becomes optional.
61
73
 
62
74
  ## Returns
63
75
 
64
76
  The tool returns an object with:
65
77
 
66
- **relevantContext:** (`string`): Combined text from the most relevant document chunks
78
+ **relevantContext** (`string`): Combined text from the most relevant document chunks
67
79
 
68
- **sources:** (`QueryResult[]`): Array of full retrieval result objects. Each object contains all information needed to reference the original document, chunk, and similarity score.
80
+ **sources** (`QueryResult[]`): Array of full retrieval result objects. Each object contains all information needed to reference the original document, chunk, and similarity score.
69
81
 
70
- ### QueryResult object structure
82
+ ### `QueryResult` object structure
71
83
 
72
84
  ```typescript
73
85
  {
@@ -79,7 +91,7 @@ The tool returns an object with:
79
91
  }
80
92
  ```
81
93
 
82
- ## Default Tool Description
94
+ ## Default tool description
83
95
 
84
96
  The default description focuses on:
85
97
 
@@ -87,11 +99,11 @@ The default description focuses on:
87
99
  - Answering user questions
88
100
  - Retrieving factual content
89
101
 
90
- ## Result Handling
102
+ ## Result handling
91
103
 
92
104
  The tool determines the number of results to return based on the user's query, with a default of 10 results. This can be adjusted based on the query requirements.
93
105
 
94
- ## Example with Filters
106
+ ## Example with filters
95
107
 
96
108
  ```typescript
97
109
  const queryTool = createVectorQueryTool({
@@ -122,7 +134,7 @@ For detailed filter syntax and store-specific capabilities, see the [Metadata Fi
122
134
 
123
135
  For an example of how agent-driven filtering works, see the [Agent-Driven Metadata Filtering](https://github.com/mastra-ai/mastra/tree/main/examples/basics/rag/filter-rag) example.
124
136
 
125
- ## Example with Reranking
137
+ ## Example with reranking
126
138
 
127
139
  ```typescript
128
140
  const queryTool = createVectorQueryTool({
@@ -130,7 +142,7 @@ const queryTool = createVectorQueryTool({
130
142
  indexName: 'documentation',
131
143
  model: new ModelRouterEmbeddingModel('openai/text-embedding-3-small'),
132
144
  reranker: {
133
- model: 'openai/gpt-5.1',
145
+ model: 'openai/gpt-5.4',
134
146
  options: {
135
147
  weights: {
136
148
  semantic: 0.5, // Semantic relevance weight
@@ -152,7 +164,7 @@ Reranking improves result quality by combining:
152
164
 
153
165
  The reranker processes the initial vector search results and returns a reordered list optimized for relevance.
154
166
 
155
- ## Example with Custom Description
167
+ ## Example with custom description
156
168
 
157
169
  ```typescript
158
170
  const queryTool = createVectorQueryTool({
@@ -166,7 +178,7 @@ const queryTool = createVectorQueryTool({
166
178
 
167
179
  This example shows how to customize the tool description for a specific use case while maintaining its core purpose of information retrieval.
168
180
 
169
- ## Database-Specific Configuration Examples
181
+ ## Database-specific configuration examples
170
182
 
171
183
  The `databaseConfig` parameter allows you to leverage unique features and optimizations specific to each vector database. These configurations are automatically applied during query execution.
172
184
 
@@ -323,7 +335,7 @@ This approach allows you to:
323
335
  - Adjust performance parameters based on load
324
336
  - Apply different filtering strategies per request
325
337
 
326
- ## Example: Using Request Context
338
+ ## Example: Using request context
327
339
 
328
340
  ```typescript
329
341
  const queryTool = createVectorQueryTool({
@@ -362,7 +374,7 @@ For more information on request context, please see:
362
374
  - [Agent Request Context](https://mastra.ai/docs/server/request-context)
363
375
  - [Request Context](https://mastra.ai/docs/server/request-context)
364
376
 
365
- ## Usage Without a Mastra Server
377
+ ## Usage without a Mastra server
366
378
 
367
379
  The tool can be used by itself to retrieve documents matching a query:
368
380
 
@@ -389,7 +401,7 @@ const queryResult = await vectorQueryTool.execute({ queryText: 'foo', topK: 1 },
389
401
  console.log(queryResult.sources)
390
402
  ```
391
403
 
392
- ## Dynamic Vector Store for Multi-Tenant Applications
404
+ ## Dynamic vector store for multi-tenant applications
393
405
 
394
406
  For multi-tenant applications where each tenant has isolated data (e.g., separate PostgreSQL schemas), you can pass a resolver function instead of a static vector store instance. The function receives the request context and can return the appropriate vector store for the current tenant:
395
407
 
@@ -445,7 +457,7 @@ This pattern is similar to how `Agent.memory` supports dynamic configuration and
445
457
  - **Database isolation**: Route to different database instances per tenant
446
458
  - **Dynamic configuration**: Adjust vector store settings based on request context
447
459
 
448
- ## Tool Details
460
+ ## Tool details
449
461
 
450
462
  The tool is created with:
451
463
 
@@ -1 +1 @@
1
- {"version":3,"file":"validation.d.ts","sourceRoot":"","sources":["../../src/document/validation.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAsH7C,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,GAAG,IAAI,CAsB9E"}
1
+ {"version":3,"file":"validation.d.ts","sourceRoot":"","sources":["../../src/document/validation.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAsH7C,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,GAAG,IAAI,CAwB9E"}
package/dist/index.cjs CHANGED
@@ -735,7 +735,7 @@ var customAlphabet = (alphabet, defaultSize = 21) => {
735
735
  };
736
736
  };
737
737
 
738
- // ../../node_modules/.pnpm/@ai-sdk+provider-utils@2.2.8_zod@3.25.76/node_modules/@ai-sdk/provider-utils/dist/index.mjs
738
+ // ../../node_modules/.pnpm/@ai-sdk+provider-utils@2.2.8_zod@4.3.6/node_modules/@ai-sdk/provider-utils/dist/index.mjs
739
739
  var import_secure_json_parse = __toESM(require_secure_json_parse());
740
740
  function combineHeaders(...headers) {
741
741
  return headers.reduce(
@@ -6644,12 +6644,13 @@ function validateChunkParams(strategy, params) {
6644
6644
  }
6645
6645
  const result = schema.safeParse(params);
6646
6646
  if (!result.success) {
6647
- const unrecognizedError = result.error.errors.find((e) => e.code === "unrecognized_keys");
6647
+ const issues = result.error.issues;
6648
+ const unrecognizedError = issues.find((e) => e.code === "unrecognized_keys");
6648
6649
  if (unrecognizedError && "keys" in unrecognizedError) {
6649
6650
  const keys = unrecognizedError.keys.join(", ");
6650
6651
  throw new Error(`Invalid parameters for ${strategy} strategy: '${keys}' not supported`);
6651
6652
  }
6652
- const errorMessage = result.error.errors.map((e) => `${e.path.length > 0 ? e.path.join(".") : "parameter"}: ${e.message}`).join(", ");
6653
+ const errorMessage = issues.map((e) => `${e.path.length > 0 ? e.path.join(".") : "parameter"}: ${e.message}`).join(", ");
6653
6654
  throw new Error(`Invalid parameters for ${strategy} strategy: ${errorMessage}`);
6654
6655
  }
6655
6656
  }
@@ -7696,7 +7697,7 @@ var createGraphRAGTool = (options) => {
7696
7697
  const vectorStore = await resolveVectorStore(options, { requestContext, mastra, vectorStoreName });
7697
7698
  if (!vectorStore) {
7698
7699
  if (logger) {
7699
- logger.error(`Vector store '${vectorStoreName}' not found`);
7700
+ logger.error("Vector store not found", { vectorStore: vectorStoreName });
7700
7701
  }
7701
7702
  return { relevantContext: [], sources: [] };
7702
7703
  }
@@ -7801,7 +7802,7 @@ var createVectorQueryTool = (options) => {
7801
7802
  const vectorStore = await resolveVectorStore(options, { requestContext, mastra, vectorStoreName });
7802
7803
  if (!vectorStore) {
7803
7804
  if (logger) {
7804
- logger.error(`Vector store '${vectorStoreName}' not found`);
7805
+ logger.error("Vector store not found", { vectorStore: vectorStoreName });
7805
7806
  }
7806
7807
  return { relevantContext: [], sources: [] };
7807
7808
  }