@mastra/rag 2.1.2-alpha.0 → 2.1.3-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/LICENSE.md +15 -0
  3. package/dist/docs/SKILL.md +3 -3
  4. package/dist/docs/assets/SOURCE_MAP.json +1 -1
  5. package/dist/docs/references/docs-rag-chunking-and-embedding.md +5 -5
  6. package/dist/docs/references/docs-rag-graph-rag.md +2 -2
  7. package/dist/docs/references/docs-rag-overview.md +2 -2
  8. package/dist/docs/references/docs-rag-retrieval.md +16 -16
  9. package/dist/docs/references/reference-rag-chunk.md +40 -40
  10. package/dist/docs/references/reference-rag-database-config.md +19 -15
  11. package/dist/docs/references/reference-rag-document.md +13 -13
  12. package/dist/docs/references/reference-rag-extract-params.md +31 -31
  13. package/dist/docs/references/reference-rag-graph-rag.md +16 -16
  14. package/dist/docs/references/reference-rag-rerank.md +28 -20
  15. package/dist/docs/references/reference-rag-rerankWithScorer.md +27 -19
  16. package/dist/docs/references/reference-tools-document-chunker-tool.md +11 -11
  17. package/dist/docs/references/reference-tools-graph-rag-tool.md +23 -25
  18. package/dist/docs/references/reference-tools-vector-query-tool.md +47 -35
  19. package/dist/document/validation.d.ts.map +1 -1
  20. package/dist/index.cjs +6 -5
  21. package/dist/index.cjs.map +1 -1
  22. package/dist/index.js +6 -5
  23. package/dist/index.js.map +1 -1
  24. package/dist/tools/document-chunker.d.ts +1 -3
  25. package/dist/tools/document-chunker.d.ts.map +1 -1
  26. package/dist/tools/graph-rag.d.ts +5 -19
  27. package/dist/tools/graph-rag.d.ts.map +1 -1
  28. package/dist/tools/vector-query.d.ts +5 -19
  29. package/dist/tools/vector-query.d.ts.map +1 -1
  30. package/dist/utils/tool-schemas.d.ts +9 -47
  31. package/dist/utils/tool-schemas.d.ts.map +1 -1
  32. package/package.json +9 -9
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 2.1.3-alpha.0
4
+
5
+ ### Patch Changes
6
+
7
+ - Standardized all logger calls across the codebase to use static string messages with structured data objects. Dynamic values are now passed as key-value pairs in the second argument instead of being interpolated into template literal strings. This improves log filterability and searchability in observability storage. ([#14899](https://github.com/mastra-ai/mastra/pull/14899))
8
+
9
+ Removed ~150 redundant or noisy log calls including duplicate error logging after trackException and verbose in-memory storage CRUD traces.
10
+
11
+ - Updated dependencies [[`cbeec24`](https://github.com/mastra-ai/mastra/commit/cbeec24b3c97a1a296e7e461e66cc7f7d215dc50), [`cee146b`](https://github.com/mastra-ai/mastra/commit/cee146b5d858212e1df2b2730fc36d3ceda0e08d), [`aa0aeff`](https://github.com/mastra-ai/mastra/commit/aa0aeffa11efbef5e219fbd97bf43d263cfe3afe), [`2bcec65`](https://github.com/mastra-ai/mastra/commit/2bcec652d62b07eab15e9eb9822f70184526eede), [`ad9bded`](https://github.com/mastra-ai/mastra/commit/ad9bdedf86a824801f49928a8d40f6e31ff5450f), [`cbeec24`](https://github.com/mastra-ai/mastra/commit/cbeec24b3c97a1a296e7e461e66cc7f7d215dc50), [`208c0bb`](https://github.com/mastra-ai/mastra/commit/208c0bbacbf5a1da6318f2a0e0c544390e542ddc), [`f566ee7`](https://github.com/mastra-ai/mastra/commit/f566ee7d53a3da33a01103e2a5ac2070ddefe6b0)]:
12
+ - @mastra/core@1.20.0-alpha.0
13
+
14
+ ## 2.1.2
15
+
16
+ ### Patch Changes
17
+
18
+ - Improved token-based chunking performance in `token` and `semantic-markdown` strategies. Markdown knowledge bases now chunk significantly faster with lower tokenization overhead. ([#13495](https://github.com/mastra-ai/mastra/pull/13495))
19
+
20
+ - Updated dependencies [[`df170fd`](https://github.com/mastra-ai/mastra/commit/df170fd139b55f845bfd2de8488b16435bd3d0da), [`ae55343`](https://github.com/mastra-ai/mastra/commit/ae5534397fc006fd6eef3e4f80c235bcdc9289ef), [`c290cec`](https://github.com/mastra-ai/mastra/commit/c290cec5bf9107225de42942b56b487107aa9dce), [`f03e794`](https://github.com/mastra-ai/mastra/commit/f03e794630f812b56e95aad54f7b1993dc003add), [`aa4a5ae`](https://github.com/mastra-ai/mastra/commit/aa4a5aedb80d8d6837bab8cbb2e301215d1ba3e9), [`de3f584`](https://github.com/mastra-ai/mastra/commit/de3f58408752a8d80a295275c7f23fc306cf7f4f), [`d3fb010`](https://github.com/mastra-ai/mastra/commit/d3fb010c98f575f1c0614452667396e2653815f6), [`702ee1c`](https://github.com/mastra-ai/mastra/commit/702ee1c41be67cc532b4dbe89bcb62143508f6f0), [`f495051`](https://github.com/mastra-ai/mastra/commit/f495051eb6496a720f637fc85b6d69941c12554c), [`e622f1d`](https://github.com/mastra-ai/mastra/commit/e622f1d3ab346a8e6aca6d1fe2eac99bd961e50b), [`861f111`](https://github.com/mastra-ai/mastra/commit/861f11189211b20ddb70d8df81a6b901fc78d11e), [`00f43e8`](https://github.com/mastra-ai/mastra/commit/00f43e8e97a80c82b27d5bd30494f10a715a1df9), [`1b6f651`](https://github.com/mastra-ai/mastra/commit/1b6f65127d4a0d6c38d0a1055cb84527db529d6b), [`96a1702`](https://github.com/mastra-ai/mastra/commit/96a1702ce362c50dda20c8b4a228b4ad1a36a17a), [`cb9f921`](https://github.com/mastra-ai/mastra/commit/cb9f921320913975657abb1404855d8c510f7ac5), [`114e7c1`](https://github.com/mastra-ai/mastra/commit/114e7c146ac682925f0fb37376c1be70e5d6e6e5), [`1b6f651`](https://github.com/mastra-ai/mastra/commit/1b6f65127d4a0d6c38d0a1055cb84527db529d6b), [`72df4a8`](https://github.com/mastra-ai/mastra/commit/72df4a8f9bf1a20cfd3d9006a4fdb597ad56d10a)]:
21
+ - @mastra/core@1.8.0
22
+
3
23
  ## 2.1.2-alpha.0
4
24
 
5
25
  ### Patch Changes
package/LICENSE.md CHANGED
@@ -1,3 +1,18 @@
1
+ Portions of this software are licensed as follows:
2
+
3
+ - All content that resides under any directory named "ee/" within this
4
+ repository, including but not limited to:
5
+ - `packages/core/src/auth/ee/`
6
+ - `packages/server/src/server/auth/ee/`
7
+ is licensed under the license defined in `ee/LICENSE`.
8
+
9
+ - All third-party components incorporated into the Mastra Software are
10
+ licensed under the original license provided by the owner of the
11
+ applicable component.
12
+
13
+ - Content outside of the above-mentioned directories or restrictions is
14
+ available under the "Apache License 2.0" as defined below.
15
+
1
16
  # Apache License 2.0
2
17
 
3
18
  Copyright (c) 2025 Kepler Software, Inc.
@@ -3,7 +3,7 @@ name: mastra-rag
3
3
  description: Documentation for @mastra/rag. Use when working with @mastra/rag APIs, configuration, or implementation.
4
4
  metadata:
5
5
  package: "@mastra/rag"
6
- version: "2.1.2-alpha.0"
6
+ version: "2.1.3-alpha.0"
7
7
  ---
8
8
 
9
9
  ## When to use
@@ -16,10 +16,10 @@ Read the individual reference documents for detailed explanations and code examp
16
16
 
17
17
  ### Docs
18
18
 
19
- - [Chunking and Embedding Documents](references/docs-rag-chunking-and-embedding.md) - Guide on chunking and embedding documents in Mastra for efficient processing and retrieval.
19
+ - [Chunking and embedding documents](references/docs-rag-chunking-and-embedding.md) - Guide on chunking and embedding documents in Mastra for efficient processing and retrieval.
20
20
  - [GraphRAG](references/docs-rag-graph-rag.md) - Guide on graph-based retrieval in Mastra's RAG systems for documents with complex relationships.
21
21
  - [RAG (Retrieval-Augmented Generation) in Mastra](references/docs-rag-overview.md) - Overview of Retrieval-Augmented Generation (RAG) in Mastra, detailing its capabilities for enhancing LLM outputs with relevant context.
22
- - [Retrieval, Semantic Search, Reranking](references/docs-rag-retrieval.md) - Guide on retrieval processes in Mastra's RAG systems, including semantic search, filtering, and re-ranking.
22
+ - [Retrieval, semantic search, reranking](references/docs-rag-retrieval.md) - Guide on retrieval processes in Mastra's RAG systems, including semantic search, filtering, and re-ranking.
23
23
 
24
24
  ### Reference
25
25
 
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.1.2-alpha.0",
2
+ "version": "2.1.3-alpha.0",
3
3
  "package": "@mastra/rag",
4
4
  "exports": {},
5
5
  "modules": {}
@@ -1,4 +1,4 @@
1
- # Chunking and Embedding Documents
1
+ # Chunking and embedding documents
2
2
 
3
3
  Before processing, create a MDocument instance from your content. You can initialize it from various formats:
4
4
 
@@ -9,7 +9,7 @@ const docFromMarkdown = MDocument.fromMarkdown('# Your Markdown content...')
9
9
  const docFromJSON = MDocument.fromJSON(`{ "key": "value" }`)
10
10
  ```
11
11
 
12
- ## Step 1: Document Processing
12
+ ## Document processing
13
13
 
14
14
  Use `chunk` to split documents into manageable pieces. Mastra supports multiple chunking strategies optimized for different document types:
15
15
 
@@ -65,7 +65,7 @@ const chunks = await doc.chunk({
65
65
 
66
66
  We go deeper into chunking strategies in our [`chunk()` reference documentation](https://mastra.ai/reference/rag/chunk).
67
67
 
68
- ## Step 2: Embedding Generation
68
+ ## Embedding generation
69
69
 
70
70
  Transform chunks into embeddings using your preferred provider. Mastra supports embedding models through the model router.
71
71
 
@@ -123,9 +123,9 @@ const { embeddings } = await embedMany({
123
123
  })
124
124
  ```
125
125
 
126
- > **Vector Database Compatibility:** When storing embeddings, the vector database index must be configured to match the output size of your embedding model. If the dimensions do not match, you may get errors or data corruption.
126
+ > **Vector Database Compatibility:** When storing embeddings, the vector database index must be configured to match the output size of your embedding model. If the dimensions don't match, you may get errors or data corruption.
127
127
 
128
- ## Example: Complete Pipeline
128
+ ## Example: Complete pipeline
129
129
 
130
130
  Here's an example showing document processing and embedding generation with both providers:
131
131
 
@@ -75,7 +75,7 @@ const ragAgent = new Agent({
75
75
  instructions: `You are a helpful assistant that answers questions based on the provided context.
76
76
  When answering questions, use the graph query tool to find relevant information and relationships.
77
77
  Base your answers on the context provided by the tool, and clearly state if the context doesn't contain enough information.`,
78
- model: 'openai/gpt-5.1',
78
+ model: 'openai/gpt-5.4',
79
79
  tools: {
80
80
  graphQueryTool,
81
81
  },
@@ -197,7 +197,7 @@ const agent = new Agent({
197
197
  name: 'RAG Agent',
198
198
  instructions: `Use vector search for simple fact-finding queries.
199
199
  Use graph search when you need to understand relationships or find connected information.`,
200
- model: 'openai/gpt-5.1',
200
+ model: 'openai/gpt-5.4',
201
201
  tools: {
202
202
  vectorQueryTool,
203
203
  graphQueryTool,
@@ -59,11 +59,11 @@ console.log('Similar chunks:', results)
59
59
 
60
60
  This example shows the essentials: initialize a document, create chunks, generate embeddings, store them, and query for similar content.
61
61
 
62
- ## Document Processing
62
+ ## Document processing
63
63
 
64
64
  The basic building block of RAG is document processing. Documents can be chunked using various strategies (recursive, sliding window, etc.) and enriched with metadata. See the [chunking and embedding doc](https://mastra.ai/docs/rag/chunking-and-embedding).
65
65
 
66
- ## Vector Storage
66
+ ## Vector storage
67
67
 
68
68
  Mastra supports multiple vector stores for embedding persistence and similarity search, including pgvector, Pinecone, Qdrant, and MongoDB. See the [vector database doc](https://mastra.ai/docs/rag/vector-databases).
69
69
 
@@ -1,10 +1,10 @@
1
- # Retrieval in RAG Systems
1
+ # Retrieval in RAG systems
2
2
 
3
3
  After storing embeddings, you need to retrieve relevant chunks to answer user queries.
4
4
 
5
5
  Mastra provides flexible retrieval options with support for semantic search, filtering, and re-ranking.
6
6
 
7
- ## How Retrieval Works
7
+ ## How retrieval works
8
8
 
9
9
  1. The user's query is converted to an embedding using the same model used for document embeddings
10
10
  2. This embedding is compared to stored embeddings using vector similarity
@@ -14,7 +14,7 @@ Mastra provides flexible retrieval options with support for semantic search, fil
14
14
  - Re-ranked for better relevance
15
15
  - Processed through a knowledge graph
16
16
 
17
- ## Basic Retrieval
17
+ ## Basic retrieval
18
18
 
19
19
  The simplest approach is direct semantic search. This method uses vector similarity to find chunks that are semantically similar to the query:
20
20
 
@@ -63,7 +63,7 @@ Results include both the text content and a similarity score:
63
63
  ]
64
64
  ```
65
65
 
66
- ## Advanced Retrieval options
66
+ ## Advanced retrieval options
67
67
 
68
68
  ### Metadata Filtering
69
69
 
@@ -272,7 +272,7 @@ import { PGVECTOR_PROMPT } from '@mastra/pg'
272
272
  export const ragAgent = new Agent({
273
273
  id: 'rag-agent',
274
274
  name: 'RAG Agent',
275
- model: 'openai/gpt-5.1',
275
+ model: 'openai/gpt-5.4',
276
276
  instructions: `
277
277
  Process queries using the provided context. Structure responses to be concise and relevant.
278
278
  ${PGVECTOR_PROMPT}
@@ -289,7 +289,7 @@ import { PINECONE_PROMPT } from '@mastra/pinecone'
289
289
  export const ragAgent = new Agent({
290
290
  id: 'rag-agent',
291
291
  name: 'RAG Agent',
292
- model: 'openai/gpt-5.1',
292
+ model: 'openai/gpt-5.4',
293
293
  instructions: `
294
294
  Process queries using the provided context. Structure responses to be concise and relevant.
295
295
  ${PINECONE_PROMPT}
@@ -306,7 +306,7 @@ import { QDRANT_PROMPT } from '@mastra/qdrant'
306
306
  export const ragAgent = new Agent({
307
307
  id: 'rag-agent',
308
308
  name: 'RAG Agent',
309
- model: 'openai/gpt-5.1',
309
+ model: 'openai/gpt-5.4',
310
310
  instructions: `
311
311
  Process queries using the provided context. Structure responses to be concise and relevant.
312
312
  ${QDRANT_PROMPT}
@@ -323,7 +323,7 @@ import { CHROMA_PROMPT } from '@mastra/chroma'
323
323
  export const ragAgent = new Agent({
324
324
  id: 'rag-agent',
325
325
  name: 'RAG Agent',
326
- model: 'openai/gpt-5.1',
326
+ model: 'openai/gpt-5.4',
327
327
  instructions: `
328
328
  Process queries using the provided context. Structure responses to be concise and relevant.
329
329
  ${CHROMA_PROMPT}
@@ -340,7 +340,7 @@ import { ASTRA_PROMPT } from '@mastra/astra'
340
340
  export const ragAgent = new Agent({
341
341
  id: 'rag-agent',
342
342
  name: 'RAG Agent',
343
- model: 'openai/gpt-5.1',
343
+ model: 'openai/gpt-5.4',
344
344
  instructions: `
345
345
  Process queries using the provided context. Structure responses to be concise and relevant.
346
346
  ${ASTRA_PROMPT}
@@ -357,7 +357,7 @@ import { LIBSQL_PROMPT } from '@mastra/libsql'
357
357
  export const ragAgent = new Agent({
358
358
  id: 'rag-agent',
359
359
  name: 'RAG Agent',
360
- model: 'openai/gpt-5.1',
360
+ model: 'openai/gpt-5.4',
361
361
  instructions: `
362
362
  Process queries using the provided context. Structure responses to be concise and relevant.
363
363
  ${LIBSQL_PROMPT}
@@ -374,7 +374,7 @@ import { UPSTASH_PROMPT } from '@mastra/upstash'
374
374
  export const ragAgent = new Agent({
375
375
  id: 'rag-agent',
376
376
  name: 'RAG Agent',
377
- model: 'openai/gpt-5.1',
377
+ model: 'openai/gpt-5.4',
378
378
  instructions: `
379
379
  Process queries using the provided context. Structure responses to be concise and relevant.
380
380
  ${UPSTASH_PROMPT}
@@ -391,7 +391,7 @@ import { VECTORIZE_PROMPT } from '@mastra/vectorize'
391
391
  export const ragAgent = new Agent({
392
392
  id: 'rag-agent',
393
393
  name: 'RAG Agent',
394
- model: 'openai/gpt-5.1',
394
+ model: 'openai/gpt-5.4',
395
395
  instructions: `
396
396
  Process queries using the provided context. Structure responses to be concise and relevant.
397
397
  ${VECTORIZE_PROMPT}
@@ -408,7 +408,7 @@ import { MONGODB_PROMPT } from '@mastra/mongodb'
408
408
  export const ragAgent = new Agent({
409
409
  id: 'rag-agent',
410
410
  name: 'RAG Agent',
411
- model: 'openai/gpt-5.1',
411
+ model: 'openai/gpt-5.4',
412
412
  instructions: `
413
413
  Process queries using the provided context. Structure responses to be concise and relevant.
414
414
  ${MONGODB_PROMPT}
@@ -425,7 +425,7 @@ import { OPENSEARCH_PROMPT } from '@mastra/opensearch'
425
425
  export const ragAgent = new Agent({
426
426
  id: 'rag-agent',
427
427
  name: 'RAG Agent',
428
- model: 'openai/gpt-5.1',
428
+ model: 'openai/gpt-5.4',
429
429
  instructions: `
430
430
  Process queries using the provided context. Structure responses to be concise and relevant.
431
431
  ${OPENSEARCH_PROMPT}
@@ -442,7 +442,7 @@ import { S3VECTORS_PROMPT } from '@mastra/s3vectors'
442
442
  export const ragAgent = new Agent({
443
443
  id: 'rag-agent',
444
444
  name: 'RAG Agent',
445
- model: 'openai/gpt-5.1',
445
+ model: 'openai/gpt-5.4',
446
446
  instructions: `
447
447
  Process queries using the provided context. Structure responses to be concise and relevant.
448
448
  ${S3VECTORS_PROMPT}
@@ -472,7 +472,7 @@ const initialResults = await pgVector.query({
472
472
  })
473
473
 
474
474
  // Create a relevance scorer
475
- const relevanceProvider = new MastraAgentRelevanceScorer('relevance-scorer', 'openai/gpt-5.1')
475
+ const relevanceProvider = new MastraAgentRelevanceScorer('relevance-scorer', 'openai/gpt-5.4')
476
476
 
477
477
  // Re-rank the results
478
478
  const rerankedResults = await rerank({
@@ -39,25 +39,25 @@ const chunksWithMetadata = await doc.chunk({
39
39
 
40
40
  The following parameters are available for all chunking strategies. **Important:** Each strategy will only utilize a subset of these parameters relevant to its specific use case.
41
41
 
42
- **strategy?:** (`'recursive' | 'character' | 'token' | 'markdown' | 'semantic-markdown' | 'html' | 'json' | 'latex' | 'sentence'`): The chunking strategy to use. If not specified, defaults based on document type. Depending on the chunking strategy, there are additional optionals. Defaults: .md files → 'markdown', .html/.htm → 'html', .json → 'json', .tex → 'latex', others → 'recursive'
42
+ **strategy** (`'recursive' | 'character' | 'token' | 'markdown' | 'semantic-markdown' | 'html' | 'json' | 'latex' | 'sentence'`): The chunking strategy to use. If not specified, defaults based on document type. Depending on the chunking strategy, there are additional optionals. Defaults: .md files → 'markdown', .html/.htm → 'html', .json → 'json', .tex → 'latex', others → 'recursive'
43
43
 
44
- **maxSize?:** (`number`): Maximum size of each chunk. \*\*Note:\*\* Some strategy configurations (markdown with headers, HTML with headers) ignore this parameter. (Default: `4000`)
44
+ **maxSize** (`number`): Maximum size of each chunk. \*\*Note:\*\* Some strategy configurations (markdown with headers, HTML with headers) ignore this parameter. (Default: `4000`)
45
45
 
46
- **overlap?:** (`number`): Number of characters/tokens that overlap between chunks. (Default: `50`)
46
+ **overlap** (`number`): Number of characters/tokens that overlap between chunks. (Default: `50`)
47
47
 
48
- **lengthFunction?:** (`(text: string) => number`): Function to calculate text length. Defaults to character count.
48
+ **lengthFunction** (`(text: string) => number`): Function to calculate text length. Defaults to character count.
49
49
 
50
- **separatorPosition?:** (`'start' | 'end'`): Where to position the separator in chunks. 'start' attaches to beginning of next chunk, 'end' attaches to end of current chunk. If not specified, separators are discarded.
50
+ **separatorPosition** (`'start' | 'end'`): Where to position the separator in chunks. 'start' attaches to beginning of next chunk, 'end' attaches to end of current chunk. If not specified, separators are discarded.
51
51
 
52
- **addStartIndex?:** (`boolean`): Whether to add start index metadata to chunks. (Default: `false`)
52
+ **addStartIndex** (`boolean`): Whether to add start index metadata to chunks. (Default: `false`)
53
53
 
54
- **stripWhitespace?:** (`boolean`): Whether to strip whitespace from chunks. (Default: `true`)
54
+ **stripWhitespace** (`boolean`): Whether to strip whitespace from chunks. (Default: `true`)
55
55
 
56
- **extract?:** (`ExtractParams`): Metadata extraction configuration.
56
+ **extract** (`ExtractParams`): Metadata extraction configuration.
57
57
 
58
58
  See [ExtractParams reference](https://mastra.ai/reference/rag/extract-params) for details on the `extract` parameter.
59
59
 
60
- ## Strategy-Specific Options
60
+ ## Strategy-specific options
61
61
 
62
62
  Strategy-specific options are passed as top-level parameters alongside the strategy parameter. For example:
63
63
 
@@ -126,89 +126,89 @@ The options documented below are passed directly at the top level of the configu
126
126
 
127
127
  ### Character
128
128
 
129
- **separators?:** (`string[]`): Array of separators to try in order of preference. The strategy will attempt to split on the first separator, then fall back to subsequent ones.
129
+ **separators** (`string[]`): Array of separators to try in order of preference. The strategy will attempt to split on the first separator, then fall back to subsequent ones.
130
130
 
131
- **isSeparatorRegex?:** (`boolean`): Whether the separator is a regex pattern (Default: `false`)
131
+ **isSeparatorRegex** (`boolean`): Whether the separator is a regex pattern (Default: `false`)
132
132
 
133
133
  ### Recursive
134
134
 
135
- **separators?:** (`string[]`): Array of separators to try in order of preference. The strategy will attempt to split on the first separator, then fall back to subsequent ones.
135
+ **separators** (`string[]`): Array of separators to try in order of preference. The strategy will attempt to split on the first separator, then fall back to subsequent ones.
136
136
 
137
- **isSeparatorRegex?:** (`boolean`): Whether the separators are regex patterns (Default: `false`)
137
+ **isSeparatorRegex** (`boolean`): Whether the separators are regex patterns (Default: `false`)
138
138
 
139
- **language?:** (`Language`): Programming or markup language for language-specific splitting behavior. See Language enum for supported values.
139
+ **language** (`Language`): Programming or markup language for language-specific splitting behavior. See Language enum for supported values.
140
140
 
141
141
  ### Sentence
142
142
 
143
- **maxSize:** (`number`): Maximum size of each chunk (required for sentence strategy)
143
+ **maxSize** (`number`): Maximum size of each chunk (required for sentence strategy)
144
144
 
145
- **minSize?:** (`number`): Minimum size of each chunk. Chunks smaller than this will be merged with adjacent chunks when possible. (Default: `50`)
145
+ **minSize** (`number`): Minimum size of each chunk. Chunks smaller than this will be merged with adjacent chunks when possible. (Default: `50`)
146
146
 
147
- **targetSize?:** (`number`): Preferred target size for chunks. Defaults to 80% of maxSize. The strategy will try to create chunks close to this size.
147
+ **targetSize** (`number`): Preferred target size for chunks. Defaults to 80% of maxSize. The strategy will try to create chunks close to this size.
148
148
 
149
- **sentenceEnders?:** (`string[]`): Array of characters that mark sentence endings for splitting boundaries. (Default: `['.', '!', '?']`)
149
+ **sentenceEnders** (`string[]`): Array of characters that mark sentence endings for splitting boundaries. (Default: `['.', '!', '?']`)
150
150
 
151
- **fallbackToWords?:** (`boolean`): Whether to fall back to word-level splitting for sentences that exceed maxSize. (Default: `true`)
151
+ **fallbackToWords** (`boolean`): Whether to fall back to word-level splitting for sentences that exceed maxSize. (Default: `true`)
152
152
 
153
- **fallbackToCharacters?:** (`boolean`): Whether to fall back to character-level splitting for words that exceed maxSize. Only applies if fallbackToWords is enabled. (Default: `true`)
153
+ **fallbackToCharacters** (`boolean`): Whether to fall back to character-level splitting for words that exceed maxSize. Only applies if fallbackToWords is enabled. (Default: `true`)
154
154
 
155
155
  ### HTML
156
156
 
157
- **headers:** (`Array<[string, string]>`): Array of \[selector, metadata key] pairs for header-based splitting
157
+ **headers** (`Array<[string, string]>`): Array of \[selector, metadata key] pairs for header-based splitting
158
158
 
159
- **sections:** (`Array<[string, string]>`): Array of \[selector, metadata key] pairs for section-based splitting
159
+ **sections** (`Array<[string, string]>`): Array of \[selector, metadata key] pairs for section-based splitting
160
160
 
161
- **returnEachLine?:** (`boolean`): Whether to return each line as a separate chunk
161
+ **returnEachLine** (`boolean`): Whether to return each line as a separate chunk
162
162
 
163
163
  **Important:** When using the HTML strategy, all general options are ignored. Use `headers` for header-based splitting or `sections` for section-based splitting. If used together, `sections` will be ignored.
164
164
 
165
165
  ### Markdown
166
166
 
167
- **headers?:** (`Array<[string, string]>`): Array of \[header level, metadata key] pairs
167
+ **headers** (`Array<[string, string]>`): Array of \[header level, metadata key] pairs
168
168
 
169
- **stripHeaders?:** (`boolean`): Whether to remove headers from the output
169
+ **stripHeaders** (`boolean`): Whether to remove headers from the output
170
170
 
171
- **returnEachLine?:** (`boolean`): Whether to return each line as a separate chunk
171
+ **returnEachLine** (`boolean`): Whether to return each line as a separate chunk
172
172
 
173
173
  **Important:** When using the `headers` option, the markdown strategy ignores all general options and content is split based on the markdown header structure. To use size-based chunking with markdown, omit the `headers` parameter.
174
174
 
175
175
  ### Semantic Markdown
176
176
 
177
- **joinThreshold?:** (`number`): Maximum token count for merging related sections. Sections exceeding this limit individually are left intact, but smaller sections are merged with siblings or parents if the combined size stays under this threshold. (Default: `500`)
177
+ **joinThreshold** (`number`): Maximum token count for merging related sections. Sections exceeding this limit individually are left intact, but smaller sections are merged with siblings or parents if the combined size stays under this threshold. (Default: `500`)
178
178
 
179
- **modelName?:** (`string`): Name of the model for tokenization. If provided, the model's underlying tokenization \`encodingName\` will be used.
179
+ **modelName** (`string`): Name of the model for tokenization. If provided, the model's underlying tokenization \`encodingName\` will be used.
180
180
 
181
- **encodingName?:** (`string`): Name of the token encoding to use. Derived from \`modelName\` if available. (Default: `cl100k_base`)
181
+ **encodingName** (`string`): Name of the token encoding to use. Derived from \`modelName\` if available. (Default: `cl100k_base`)
182
182
 
183
- **allowedSpecial?:** (`Set<string> | 'all'`): Set of special tokens allowed during tokenization, or 'all' to allow all special tokens
183
+ **allowedSpecial** (`Set<string> | 'all'`): Set of special tokens allowed during tokenization, or 'all' to allow all special tokens
184
184
 
185
- **disallowedSpecial?:** (`Set<string> | 'all'`): Set of special tokens to disallow during tokenization, or 'all' to disallow all special tokens (Default: `all`)
185
+ **disallowedSpecial** (`Set<string> | 'all'`): Set of special tokens to disallow during tokenization, or 'all' to disallow all special tokens (Default: `all`)
186
186
 
187
187
  ### Token
188
188
 
189
- **encodingName?:** (`string`): Name of the token encoding to use
189
+ **encodingName** (`string`): Name of the token encoding to use
190
190
 
191
- **modelName?:** (`string`): Name of the model for tokenization
191
+ **modelName** (`string`): Name of the model for tokenization
192
192
 
193
- **allowedSpecial?:** (`Set<string> | 'all'`): Set of special tokens allowed during tokenization, or 'all' to allow all special tokens
193
+ **allowedSpecial** (`Set<string> | 'all'`): Set of special tokens allowed during tokenization, or 'all' to allow all special tokens
194
194
 
195
- **disallowedSpecial?:** (`Set<string> | 'all'`): Set of special tokens to disallow during tokenization, or 'all' to disallow all special tokens
195
+ **disallowedSpecial** (`Set<string> | 'all'`): Set of special tokens to disallow during tokenization, or 'all' to disallow all special tokens
196
196
 
197
197
  ### JSON
198
198
 
199
- **maxSize:** (`number`): Maximum size of each chunk
199
+ **maxSize** (`number`): Maximum size of each chunk
200
200
 
201
- **minSize?:** (`number`): Minimum size of each chunk
201
+ **minSize** (`number`): Minimum size of each chunk
202
202
 
203
- **ensureAscii?:** (`boolean`): Whether to ensure ASCII encoding
203
+ **ensureAscii** (`boolean`): Whether to ensure ASCII encoding
204
204
 
205
- **convertLists?:** (`boolean`): Whether to convert lists in the JSON
205
+ **convertLists** (`boolean`): Whether to convert lists in the JSON
206
206
 
207
207
  ### Latex
208
208
 
209
209
  The Latex strategy uses only the general chunking options listed above. It provides LaTeX-aware splitting optimized for mathematical and academic documents.
210
210
 
211
- ## Return Value
211
+ ## Return value
212
212
 
213
213
  Returns a `MDocument` instance containing the chunked documents. Each chunk includes:
214
214
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  The `DatabaseConfig` type allows you to specify database-specific configurations when using vector query tools. These configurations enable you to leverage unique features and optimizations offered by different vector stores.
4
4
 
5
- ## Type Definition
5
+ ## Type definition
6
6
 
7
7
  ```typescript
8
8
  export type DatabaseConfig = {
@@ -13,15 +13,19 @@ export type DatabaseConfig = {
13
13
  }
14
14
  ```
15
15
 
16
- ## Database-Specific Types
16
+ ## Database-specific types
17
17
 
18
- ### PineconeConfig
18
+ ### `PineconeConfig`
19
19
 
20
20
  Configuration options specific to Pinecone vector store.
21
21
 
22
- **namespace?:** (`string`): Pinecone namespace for organizing and isolating vectors within the same index. Useful for multi-tenancy or environment separation.
22
+ **namespace** (`string`): Pinecone namespace for organizing and isolating vectors within the same index. Useful for multi-tenancy or environment separation.
23
23
 
24
- **sparseVector?:** (`{ indices: number[]; values: number[]; }`): objectindices:number\[]Array of indices for sparse vector componentsvalues:number\[]Array of values corresponding to the indices
24
+ **sparseVector** (`{ indices: number[]; values: number[]; }`): Sparse vector for hybrid search combining dense and sparse embeddings. Enables better search quality for keyword-based queries. The indices and values arrays must be the same length.
25
+
26
+ **sparseVector.indices** (`number[]`): Array of indices for sparse vector components
27
+
28
+ **sparseVector.values** (`number[]`): Array of values corresponding to the indices
25
29
 
26
30
  **Use Cases:**
27
31
 
@@ -29,15 +33,15 @@ Configuration options specific to Pinecone vector store.
29
33
  - Environment isolation (dev/staging/prod namespaces)
30
34
  - Hybrid search combining semantic and keyword matching
31
35
 
32
- ### PgVectorConfig
36
+ ### `PgVectorConfig`
33
37
 
34
38
  Configuration options specific to PostgreSQL with pgvector extension.
35
39
 
36
- **minScore?:** (`number`): Minimum similarity score threshold for results. Only vectors with similarity scores above this value will be returned.
40
+ **minScore** (`number`): Minimum similarity score threshold for results. Only vectors with similarity scores above this value will be returned.
37
41
 
38
- **ef?:** (`number`): HNSW search parameter that controls the size of the dynamic candidate list during search. Higher values improve accuracy at the cost of speed. Typically set between topK and 200.
42
+ **ef** (`number`): HNSW search parameter that controls the size of the dynamic candidate list during search. Higher values improve accuracy at the cost of speed. Typically set between topK and 200.
39
43
 
40
- **probes?:** (`number`): IVFFlat probe parameter that specifies the number of index cells to visit during search. Higher values improve recall at the cost of speed.
44
+ **probes** (`number`): IVFFlat probe parameter that specifies the number of index cells to visit during search. Higher values improve recall at the cost of speed.
41
45
 
42
46
  **Performance Guidelines:**
43
47
 
@@ -51,13 +55,13 @@ Configuration options specific to PostgreSQL with pgvector extension.
51
55
  - Quality filtering to remove irrelevant results
52
56
  - Fine-tuning search accuracy vs speed tradeoffs
53
57
 
54
- ### ChromaConfig
58
+ ### `ChromaConfig`
55
59
 
56
60
  Configuration options specific to Chroma vector store.
57
61
 
58
- **where?:** (`Record<string, any>`): Metadata filtering conditions using MongoDB-style query syntax. Filters results based on metadata fields.
62
+ **where** (`Record<string, any>`): Metadata filtering conditions using MongoDB-style query syntax. Filters results based on metadata fields.
59
63
 
60
- **whereDocument?:** (`Record<string, any>`): Document content filtering conditions. Allows filtering based on the actual document text content.
64
+ **whereDocument** (`Record<string, any>`): Document content filtering conditions. Allows filtering based on the actual document text content.
61
65
 
62
66
  **Filter Syntax Examples:**
63
67
 
@@ -84,7 +88,7 @@ whereDocument: { "$contains": "API documentation" }
84
88
  - Content-based document filtering
85
89
  - Complex query combinations
86
90
 
87
- ## Usage Examples
91
+ ## Usage examples
88
92
 
89
93
  **Basic Usage**:
90
94
 
@@ -229,7 +233,7 @@ const vectorTool = createVectorQueryTool({
229
233
  })
230
234
  ```
231
235
 
232
- ## Best Practices
236
+ ## Best practices
233
237
 
234
238
  1. **Environment Configuration**: Use different namespaces or configurations for different environments
235
239
  2. **Performance Tuning**: Start with default values and adjust based on your specific needs
@@ -237,7 +241,7 @@ const vectorTool = createVectorQueryTool({
237
241
  4. **Runtime Flexibility**: Override configurations at runtime for dynamic scenarios
238
242
  5. **Documentation**: Document your specific configuration choices for team members
239
243
 
240
- ## Migration Guide
244
+ ## Migration guide
241
245
 
242
246
  Existing vector query tools continue to work without changes. To add database configurations:
243
247
 
@@ -4,13 +4,13 @@ The MDocument class processes documents for RAG applications. The main methods a
4
4
 
5
5
  ## Constructor
6
6
 
7
- **docs:** (`Array<{ text: string, metadata?: Record<string, any> }>`): Array of document chunks with their text content and optional metadata
7
+ **docs** (`Array<{ text: string, metadata?: Record<string, any> }>`): Array of document chunks with their text content and optional metadata
8
8
 
9
- **type:** (`'text' | 'html' | 'markdown' | 'json' | 'latex'`): Type of document content
9
+ **type** (`'text' | 'html' | 'markdown' | 'json' | 'latex'`): Type of document content
10
10
 
11
- ## Static Methods
11
+ ## Static methods
12
12
 
13
- ### fromText()
13
+ ### `fromText()`
14
14
 
15
15
  Creates a document from plain text content.
16
16
 
@@ -18,7 +18,7 @@ Creates a document from plain text content.
18
18
  static fromText(text: string, metadata?: Record<string, any>): MDocument
19
19
  ```
20
20
 
21
- ### fromHTML()
21
+ ### `fromHTML()`
22
22
 
23
23
  Creates a document from HTML content.
24
24
 
@@ -26,7 +26,7 @@ Creates a document from HTML content.
26
26
  static fromHTML(html: string, metadata?: Record<string, any>): MDocument
27
27
  ```
28
28
 
29
- ### fromMarkdown()
29
+ ### `fromMarkdown()`
30
30
 
31
31
  Creates a document from Markdown content.
32
32
 
@@ -34,7 +34,7 @@ Creates a document from Markdown content.
34
34
  static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument
35
35
  ```
36
36
 
37
- ### fromJSON()
37
+ ### `fromJSON()`
38
38
 
39
39
  Creates a document from JSON content.
40
40
 
@@ -42,9 +42,9 @@ Creates a document from JSON content.
42
42
  static fromJSON(json: string, metadata?: Record<string, any>): MDocument
43
43
  ```
44
44
 
45
- ## Instance Methods
45
+ ## Instance methods
46
46
 
47
- ### chunk()
47
+ ### `chunk()`
48
48
 
49
49
  Splits document into chunks and optionally extracts metadata.
50
50
 
@@ -54,7 +54,7 @@ async chunk(params?: ChunkParams): Promise<Chunk[]>
54
54
 
55
55
  See [chunk() reference](https://mastra.ai/reference/rag/chunk) for detailed options.
56
56
 
57
- ### getDocs()
57
+ ### `getDocs()`
58
58
 
59
59
  Returns array of processed document chunks.
60
60
 
@@ -62,7 +62,7 @@ Returns array of processed document chunks.
62
62
  getDocs(): Chunk[]
63
63
  ```
64
64
 
65
- ### getText()
65
+ ### `getText()`
66
66
 
67
67
  Returns array of text strings from chunks.
68
68
 
@@ -70,7 +70,7 @@ Returns array of text strings from chunks.
70
70
  getText(): string[]
71
71
  ```
72
72
 
73
- ### getMetadata()
73
+ ### `getMetadata()`
74
74
 
75
75
  Returns array of metadata objects from chunks.
76
76
 
@@ -78,7 +78,7 @@ Returns array of metadata objects from chunks.
78
78
  getMetadata(): Record<string, any>[]
79
79
  ```
80
80
 
81
- ### extractMetadata()
81
+ ### `extractMetadata()`
82
82
 
83
83
  Extracts metadata using specified extractors. See [ExtractParams reference](https://mastra.ai/reference/rag/extract-params) for details.
84
84