@mastra/mcp-docs-server 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fcomposio.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fgithub.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +11 -0
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Frag.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fragie.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-azure.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-deepgram.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-elevenlabs.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-google.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-ibm.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-murf.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-openai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-playai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-replicate.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fspeech-speechify.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fstabilityai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +59 -0
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +24 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +302 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +12 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +302 -0
- package/.docs/organized/changelogs/create-mastra.md +302 -0
- package/.docs/organized/changelogs/mastra.md +302 -0
- package/.docs/organized/code-examples/agent.md +385 -0
- package/.docs/organized/code-examples/ai-sdk-useChat.md +377 -0
- package/.docs/organized/code-examples/assistant-ui.md +37 -0
- package/.docs/organized/code-examples/bird-checker-with-express.md +235 -0
- package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +360 -0
- package/.docs/organized/code-examples/bird-checker-with-nextjs.md +250 -0
- package/.docs/organized/code-examples/crypto-chatbot.md +96 -0
- package/.docs/organized/code-examples/fireworks-r1.md +159 -0
- package/.docs/organized/code-examples/integrations.md +184 -0
- package/.docs/organized/code-examples/mcp-configuration.md +341 -0
- package/.docs/organized/code-examples/memory-todo-agent.md +161 -0
- package/.docs/organized/code-examples/memory-with-context.md +167 -0
- package/.docs/organized/code-examples/memory-with-libsql.md +204 -0
- package/.docs/organized/code-examples/memory-with-pg.md +224 -0
- package/.docs/organized/code-examples/memory-with-upstash.md +268 -0
- package/.docs/organized/code-examples/quick-start.md +127 -0
- package/.docs/organized/code-examples/stock-price-tool.md +124 -0
- package/.docs/organized/code-examples/weather-agent.md +352 -0
- package/.docs/organized/code-examples/workflow-ai-recruiter.md +159 -0
- package/.docs/organized/code-examples/workflow-with-inline-steps.md +111 -0
- package/.docs/organized/code-examples/workflow-with-memory.md +393 -0
- package/.docs/organized/code-examples/workflow-with-separate-steps.md +131 -0
- package/.docs/raw/agents/00-overview.mdx +185 -0
- package/.docs/raw/agents/01-agent-memory.mdx +610 -0
- package/.docs/raw/agents/02-adding-tools.mdx +224 -0
- package/.docs/raw/agents/03-adding-voice.mdx +170 -0
- package/.docs/raw/deployment/deployment.mdx +156 -0
- package/.docs/raw/deployment/logging-and-tracing.mdx +242 -0
- package/.docs/raw/deployment/server.mdx +114 -0
- package/.docs/raw/evals/00-overview.mdx +106 -0
- package/.docs/raw/evals/01-supported-evals.mdx +31 -0
- package/.docs/raw/evals/02-custom-eval.mdx +187 -0
- package/.docs/raw/faq/index.mdx +63 -0
- package/.docs/raw/frameworks/01-next-js.mdx +238 -0
- package/.docs/raw/frameworks/02-ai-sdk.mdx +218 -0
- package/.docs/raw/getting-started/installation.mdx +436 -0
- package/.docs/raw/getting-started/project-structure.mdx +80 -0
- package/.docs/raw/guides/01-chef-michel.mdx +242 -0
- package/.docs/raw/guides/02-stock-agent.mdx +182 -0
- package/.docs/raw/guides/03-recruiter.mdx +187 -0
- package/.docs/raw/index.mdx +22 -0
- package/.docs/raw/local-dev/creating-projects.mdx +74 -0
- package/.docs/raw/local-dev/integrations.mdx +127 -0
- package/.docs/raw/local-dev/mastra-dev.mdx +65 -0
- package/.docs/raw/rag/chunking-and-embedding.mdx +128 -0
- package/.docs/raw/rag/overview.mdx +85 -0
- package/.docs/raw/rag/retrieval.mdx +362 -0
- package/.docs/raw/rag/vector-databases.mdx +271 -0
- package/.docs/raw/reference/agents/createTool.mdx +190 -0
- package/.docs/raw/reference/agents/generate.mdx +327 -0
- package/.docs/raw/reference/agents/getAgent.mdx +54 -0
- package/.docs/raw/reference/agents/stream.mdx +361 -0
- package/.docs/raw/reference/cli/build.mdx +48 -0
- package/.docs/raw/reference/cli/deploy.mdx +22 -0
- package/.docs/raw/reference/cli/dev.mdx +97 -0
- package/.docs/raw/reference/cli/init.mdx +43 -0
- package/.docs/raw/reference/client-js/agents.mdx +90 -0
- package/.docs/raw/reference/client-js/error-handling.mdx +38 -0
- package/.docs/raw/reference/client-js/index.mdx +127 -0
- package/.docs/raw/reference/client-js/logs.mdx +24 -0
- package/.docs/raw/reference/client-js/memory.mdx +94 -0
- package/.docs/raw/reference/client-js/telemetry.mdx +20 -0
- package/.docs/raw/reference/client-js/tools.mdx +44 -0
- package/.docs/raw/reference/client-js/vectors.mdx +79 -0
- package/.docs/raw/reference/client-js/workflows.mdx +137 -0
- package/.docs/raw/reference/core/mastra-class.mdx +232 -0
- package/.docs/raw/reference/deployer/cloudflare.mdx +176 -0
- package/.docs/raw/reference/deployer/deployer.mdx +159 -0
- package/.docs/raw/reference/deployer/netlify.mdx +88 -0
- package/.docs/raw/reference/deployer/vercel.mdx +97 -0
- package/.docs/raw/reference/evals/answer-relevancy.mdx +186 -0
- package/.docs/raw/reference/evals/bias.mdx +186 -0
- package/.docs/raw/reference/evals/completeness.mdx +174 -0
- package/.docs/raw/reference/evals/content-similarity.mdx +183 -0
- package/.docs/raw/reference/evals/context-position.mdx +190 -0
- package/.docs/raw/reference/evals/context-precision.mdx +189 -0
- package/.docs/raw/reference/evals/context-relevancy.mdx +188 -0
- package/.docs/raw/reference/evals/contextual-recall.mdx +191 -0
- package/.docs/raw/reference/evals/faithfulness.mdx +193 -0
- package/.docs/raw/reference/evals/hallucination.mdx +219 -0
- package/.docs/raw/reference/evals/keyword-coverage.mdx +176 -0
- package/.docs/raw/reference/evals/prompt-alignment.mdx +238 -0
- package/.docs/raw/reference/evals/summarization.mdx +205 -0
- package/.docs/raw/reference/evals/textual-difference.mdx +161 -0
- package/.docs/raw/reference/evals/tone-consistency.mdx +181 -0
- package/.docs/raw/reference/evals/toxicity.mdx +165 -0
- package/.docs/raw/reference/index.mdx +8 -0
- package/.docs/raw/reference/memory/Memory.mdx +186 -0
- package/.docs/raw/reference/memory/createThread.mdx +93 -0
- package/.docs/raw/reference/memory/getThreadById.mdx +43 -0
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +45 -0
- package/.docs/raw/reference/memory/query.mdx +164 -0
- package/.docs/raw/reference/observability/create-logger.mdx +106 -0
- package/.docs/raw/reference/observability/logger.mdx +55 -0
- package/.docs/raw/reference/observability/otel-config.mdx +120 -0
- package/.docs/raw/reference/observability/providers/braintrust.mdx +40 -0
- package/.docs/raw/reference/observability/providers/index.mdx +15 -0
- package/.docs/raw/reference/observability/providers/laminar.mdx +41 -0
- package/.docs/raw/reference/observability/providers/langfuse.mdx +51 -0
- package/.docs/raw/reference/observability/providers/langsmith.mdx +46 -0
- package/.docs/raw/reference/observability/providers/langwatch.mdx +45 -0
- package/.docs/raw/reference/observability/providers/new-relic.mdx +40 -0
- package/.docs/raw/reference/observability/providers/signoz.mdx +40 -0
- package/.docs/raw/reference/observability/providers/traceloop.mdx +40 -0
- package/.docs/raw/reference/rag/astra.mdx +258 -0
- package/.docs/raw/reference/rag/chroma.mdx +281 -0
- package/.docs/raw/reference/rag/chunk.mdx +237 -0
- package/.docs/raw/reference/rag/document.mdx +129 -0
- package/.docs/raw/reference/rag/embeddings.mdx +160 -0
- package/.docs/raw/reference/rag/extract-params.mdx +72 -0
- package/.docs/raw/reference/rag/graph-rag.mdx +182 -0
- package/.docs/raw/reference/rag/libsql.mdx +357 -0
- package/.docs/raw/reference/rag/metadata-filters.mdx +298 -0
- package/.docs/raw/reference/rag/pg.mdx +477 -0
- package/.docs/raw/reference/rag/pinecone.mdx +249 -0
- package/.docs/raw/reference/rag/qdrant.mdx +236 -0
- package/.docs/raw/reference/rag/rerank.mdx +212 -0
- package/.docs/raw/reference/rag/turbopuffer.mdx +249 -0
- package/.docs/raw/reference/rag/upstash.mdx +198 -0
- package/.docs/raw/reference/rag/vectorize.mdx +253 -0
- package/.docs/raw/reference/storage/libsql.mdx +74 -0
- package/.docs/raw/reference/storage/postgresql.mdx +48 -0
- package/.docs/raw/reference/storage/upstash.mdx +86 -0
- package/.docs/raw/reference/tools/client.mdx +180 -0
- package/.docs/raw/reference/tools/document-chunker-tool.mdx +141 -0
- package/.docs/raw/reference/tools/graph-rag-tool.mdx +154 -0
- package/.docs/raw/reference/tools/mcp-configuration.mdx +206 -0
- package/.docs/raw/reference/tools/vector-query-tool.mdx +212 -0
- package/.docs/raw/reference/voice/composite-voice.mdx +140 -0
- package/.docs/raw/reference/voice/deepgram.mdx +164 -0
- package/.docs/raw/reference/voice/elevenlabs.mdx +216 -0
- package/.docs/raw/reference/voice/google.mdx +198 -0
- package/.docs/raw/reference/voice/mastra-voice.mdx +394 -0
- package/.docs/raw/reference/voice/murf.mdx +251 -0
- package/.docs/raw/reference/voice/openai-realtime.mdx +431 -0
- package/.docs/raw/reference/voice/openai.mdx +168 -0
- package/.docs/raw/reference/voice/playai.mdx +159 -0
- package/.docs/raw/reference/voice/speechify.mdx +145 -0
- package/.docs/raw/reference/workflows/after.mdx +88 -0
- package/.docs/raw/reference/workflows/commit.mdx +37 -0
- package/.docs/raw/reference/workflows/createRun.mdx +77 -0
- package/.docs/raw/reference/workflows/else.mdx +72 -0
- package/.docs/raw/reference/workflows/execute.mdx +110 -0
- package/.docs/raw/reference/workflows/if.mdx +107 -0
- package/.docs/raw/reference/workflows/resume.mdx +155 -0
- package/.docs/raw/reference/workflows/start.mdx +84 -0
- package/.docs/raw/reference/workflows/step-class.mdx +100 -0
- package/.docs/raw/reference/workflows/step-condition.mdx +134 -0
- package/.docs/raw/reference/workflows/step-function.mdx +92 -0
- package/.docs/raw/reference/workflows/step-options.mdx +69 -0
- package/.docs/raw/reference/workflows/suspend.mdx +80 -0
- package/.docs/raw/reference/workflows/then.mdx +74 -0
- package/.docs/raw/reference/workflows/until.mdx +165 -0
- package/.docs/raw/reference/workflows/watch.mdx +118 -0
- package/.docs/raw/reference/workflows/while.mdx +168 -0
- package/.docs/raw/reference/workflows/workflow.mdx +233 -0
- package/.docs/raw/workflows/00-overview.mdx +168 -0
- package/.docs/raw/workflows/control-flow.mdx +712 -0
- package/.docs/raw/workflows/dynamic-workflows.mdx +232 -0
- package/.docs/raw/workflows/steps.mdx +98 -0
- package/.docs/raw/workflows/suspend-and-resume.mdx +196 -0
- package/.docs/raw/workflows/variables.mdx +248 -0
- package/LICENSE +44 -0
- package/README.md +129 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +19 -0
- package/dist/prepare-docs/code-examples.d.ts +4 -0
- package/dist/prepare-docs/code-examples.js +91 -0
- package/dist/prepare-docs/copy-raw.d.ts +1 -0
- package/dist/prepare-docs/copy-raw.js +41 -0
- package/dist/prepare-docs/index.d.ts +1 -0
- package/dist/prepare-docs/index.js +8 -0
- package/dist/prepare-docs/package-changes.d.ts +4 -0
- package/dist/prepare-docs/package-changes.js +92 -0
- package/dist/prepare-docs/prepare.d.ts +1 -0
- package/dist/prepare-docs/prepare.js +13 -0
- package/dist/sse.d.ts +1 -0
- package/dist/sse.js +9 -0
- package/dist/stdio.d.ts +1 -0
- package/dist/stdio.js +8 -0
- package/dist/tools/__tests__/blog.test.d.ts +1 -0
- package/dist/tools/__tests__/blog.test.js +48 -0
- package/dist/tools/__tests__/changes.test.d.ts +1 -0
- package/dist/tools/__tests__/changes.test.js +36 -0
- package/dist/tools/__tests__/docs.test.d.ts +1 -0
- package/dist/tools/__tests__/docs.test.js +46 -0
- package/dist/tools/__tests__/examples.test.d.ts +1 -0
- package/dist/tools/__tests__/examples.test.js +52 -0
- package/dist/tools/blog.d.ts +15 -0
- package/dist/tools/blog.js +73 -0
- package/dist/tools/changes.d.ts +11 -0
- package/dist/tools/changes.js +69 -0
- package/dist/tools/docs.d.ts +11 -0
- package/dist/tools/docs.js +176 -0
- package/dist/tools/examples.d.ts +11 -0
- package/dist/tools/examples.js +61 -0
- package/dist/utils.d.ts +6 -0
- package/dist/utils.js +9 -0
- package/package.json +66 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: .chunk() | Document Processing | RAG | Mastra Docs"
|
|
3
|
+
description: Documentation for the chunk function in Mastra, which splits documents into smaller segments using various strategies.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Reference: .chunk()
|
|
7
|
+
|
|
8
|
+
The `.chunk()` function splits documents into smaller segments using various strategies and options.
|
|
9
|
+
|
|
10
|
+
## Example
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { Document } from '@mastra/core';
|
|
14
|
+
|
|
15
|
+
const doc = new Document(`
|
|
16
|
+
# Introduction
|
|
17
|
+
This is a sample document that we want to split into chunks.
|
|
18
|
+
|
|
19
|
+
## Section 1
|
|
20
|
+
Here is the first section with some content.
|
|
21
|
+
|
|
22
|
+
## Section 2
|
|
23
|
+
Here is another section with different content.
|
|
24
|
+
`);
|
|
25
|
+
|
|
26
|
+
// Basic chunking with defaults
|
|
27
|
+
const chunks = await doc.chunk();
|
|
28
|
+
|
|
29
|
+
// Markdown-specific chunking with header extraction
|
|
30
|
+
const chunksWithMetadata = await doc.chunk({
|
|
31
|
+
strategy: 'markdown',
|
|
32
|
+
headers: [['#', 'title'], ['##', 'section']],
|
|
33
|
+
extract: {
|
|
34
|
+
fields: [
|
|
35
|
+
{ name: 'summary', description: 'A brief summary of the chunk content' },
|
|
36
|
+
{ name: 'keywords', description: 'Key terms found in the chunk' }
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Parameters
|
|
43
|
+
|
|
44
|
+
<PropertiesTable
|
|
45
|
+
content={[
|
|
46
|
+
{
|
|
47
|
+
name: "strategy",
|
|
48
|
+
type: "'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex'",
|
|
49
|
+
isOptional: true,
|
|
50
|
+
description:
|
|
51
|
+
"The chunking strategy to use. If not specified, defaults based on document type. Depending on the chunking strategy, there are additional optionals. Defaults: .md files → 'markdown', .html/.htm → 'html', .json → 'json', .tex → 'latex', others → 'recursive'",
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
name: "size",
|
|
55
|
+
type: "number",
|
|
56
|
+
isOptional: true,
|
|
57
|
+
defaultValue: "512",
|
|
58
|
+
description: "Maximum size of each chunk",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "overlap",
|
|
62
|
+
type: "number",
|
|
63
|
+
isOptional: true,
|
|
64
|
+
defaultValue: "50",
|
|
65
|
+
description: "Number of characters/tokens that overlap between chunks.",
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
name: "separator",
|
|
69
|
+
type: "string",
|
|
70
|
+
isOptional: true,
|
|
71
|
+
defaultValue: "\\n\\n",
|
|
72
|
+
description: "Character(s) to split on. Defaults to double newline for text content.",
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: "isSeparatorRegex",
|
|
76
|
+
type: "boolean",
|
|
77
|
+
isOptional: true,
|
|
78
|
+
defaultValue: "false",
|
|
79
|
+
description: "Whether the separator is a regex pattern",
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
name: "keepSeparator",
|
|
83
|
+
type: "'start' | 'end'",
|
|
84
|
+
isOptional: true,
|
|
85
|
+
description:
|
|
86
|
+
"Whether to keep the separator at the start or end of chunks",
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
name: "extract",
|
|
90
|
+
type: "ExtractParams",
|
|
91
|
+
isOptional: true,
|
|
92
|
+
description: "Metadata extraction configuration. See [ExtractParams reference](./extract-params) for details.",
|
|
93
|
+
},
|
|
94
|
+
]}
|
|
95
|
+
/>
|
|
96
|
+
|
|
97
|
+
## Strategy-Specific Options
|
|
98
|
+
|
|
99
|
+
Strategy-specific options are passed as top-level parameters alongside the strategy parameter. For example:
|
|
100
|
+
|
|
101
|
+
```typescript showLineNumbers copy
|
|
102
|
+
// HTML strategy example
|
|
103
|
+
const chunks = await doc.chunk({
|
|
104
|
+
strategy: 'html',
|
|
105
|
+
headers: [['h1', 'title'], ['h2', 'subtitle']], // HTML-specific option
|
|
106
|
+
sections: [['div.content', 'main']], // HTML-specific option
|
|
107
|
+
size: 500 // general option
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// Markdown strategy example
|
|
111
|
+
const chunks = await doc.chunk({
|
|
112
|
+
strategy: 'markdown',
|
|
113
|
+
headers: [['#', 'title'], ['##', 'section']], // Markdown-specific option
|
|
114
|
+
stripHeaders: true, // Markdown-specific option
|
|
115
|
+
overlap: 50 // general option
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// Token strategy example
|
|
119
|
+
const chunks = await doc.chunk({
|
|
120
|
+
strategy: 'token',
|
|
121
|
+
encodingName: 'gpt2', // Token-specific option
|
|
122
|
+
modelName: 'gpt-3.5-turbo', // Token-specific option
|
|
123
|
+
size: 1000 // general option
|
|
124
|
+
});
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
The options documented below are passed directly at the top level of the configuration object, not nested within a separate options object.
|
|
128
|
+
|
|
129
|
+
### HTML
|
|
130
|
+
|
|
131
|
+
<PropertiesTable
|
|
132
|
+
content={[
|
|
133
|
+
{
|
|
134
|
+
name: "headers",
|
|
135
|
+
type: "Array<[string, string]>",
|
|
136
|
+
description:
|
|
137
|
+
"Array of [selector, metadata key] pairs for header-based splitting",
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
name: "sections",
|
|
141
|
+
type: "Array<[string, string]>",
|
|
142
|
+
description:
|
|
143
|
+
"Array of [selector, metadata key] pairs for section-based splitting",
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
name: "returnEachLine",
|
|
147
|
+
type: "boolean",
|
|
148
|
+
isOptional: true,
|
|
149
|
+
description: "Whether to return each line as a separate chunk",
|
|
150
|
+
},
|
|
151
|
+
]}
|
|
152
|
+
/>
|
|
153
|
+
|
|
154
|
+
### Markdown
|
|
155
|
+
|
|
156
|
+
<PropertiesTable
|
|
157
|
+
content={[
|
|
158
|
+
{
|
|
159
|
+
name: "headers",
|
|
160
|
+
type: "Array<[string, string]>",
|
|
161
|
+
description: "Array of [header level, metadata key] pairs",
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
name: "stripHeaders",
|
|
165
|
+
type: "boolean",
|
|
166
|
+
isOptional: true,
|
|
167
|
+
description: "Whether to remove headers from the output",
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
name: "returnEachLine",
|
|
171
|
+
type: "boolean",
|
|
172
|
+
isOptional: true,
|
|
173
|
+
description: "Whether to return each line as a separate chunk",
|
|
174
|
+
},
|
|
175
|
+
]}
|
|
176
|
+
/>
|
|
177
|
+
|
|
178
|
+
### Token
|
|
179
|
+
|
|
180
|
+
<PropertiesTable
|
|
181
|
+
content={[
|
|
182
|
+
{
|
|
183
|
+
name: "encodingName",
|
|
184
|
+
type: "string",
|
|
185
|
+
isOptional: true,
|
|
186
|
+
description: "Name of the token encoding to use",
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
name: "modelName",
|
|
190
|
+
type: "string",
|
|
191
|
+
isOptional: true,
|
|
192
|
+
description: "Name of the model for tokenization",
|
|
193
|
+
},
|
|
194
|
+
]}
|
|
195
|
+
/>
|
|
196
|
+
|
|
197
|
+
### JSON
|
|
198
|
+
|
|
199
|
+
<PropertiesTable
|
|
200
|
+
content={[
|
|
201
|
+
{
|
|
202
|
+
name: "maxSize",
|
|
203
|
+
type: "number",
|
|
204
|
+
description: "Maximum size of each chunk",
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
name: "minSize",
|
|
208
|
+
type: "number",
|
|
209
|
+
isOptional: true,
|
|
210
|
+
description: "Minimum size of each chunk",
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
name: "ensureAscii",
|
|
214
|
+
type: "boolean",
|
|
215
|
+
isOptional: true,
|
|
216
|
+
description: "Whether to ensure ASCII encoding",
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
name: "convertLists",
|
|
220
|
+
type: "boolean",
|
|
221
|
+
isOptional: true,
|
|
222
|
+
description: "Whether to convert lists in the JSON",
|
|
223
|
+
},
|
|
224
|
+
]}
|
|
225
|
+
/>
|
|
226
|
+
|
|
227
|
+
## Return Value
|
|
228
|
+
|
|
229
|
+
Returns a `MDocument` instance containing the chunked documents. Each chunk includes:
|
|
230
|
+
|
|
231
|
+
```typescript
|
|
232
|
+
interface DocumentNode {
|
|
233
|
+
text: string;
|
|
234
|
+
metadata: Record<string, any>;
|
|
235
|
+
embedding?: number[];
|
|
236
|
+
}
|
|
237
|
+
```
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: MDocument | Document Processing | RAG | Mastra Docs"
|
|
3
|
+
description: Documentation for the MDocument class in Mastra, which handles document processing and chunking.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# MDocument
|
|
7
|
+
|
|
8
|
+
The MDocument class processes documents for RAG applications. The main methods are `.chunk()` and `.extractMetadata()`.
|
|
9
|
+
|
|
10
|
+
## Constructor
|
|
11
|
+
|
|
12
|
+
<PropertiesTable
|
|
13
|
+
content={[
|
|
14
|
+
{
|
|
15
|
+
name: "docs",
|
|
16
|
+
type: "Array<{ text: string, metadata?: Record<string, any> }>",
|
|
17
|
+
description: "Array of document chunks with their text content and optional metadata",
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
name: "type",
|
|
21
|
+
type: "'text' | 'html' | 'markdown' | 'json' | 'latex'",
|
|
22
|
+
description: "Type of document content",
|
|
23
|
+
}
|
|
24
|
+
]}
|
|
25
|
+
/>
|
|
26
|
+
|
|
27
|
+
## Static Methods
|
|
28
|
+
|
|
29
|
+
### fromText()
|
|
30
|
+
|
|
31
|
+
Creates a document from plain text content.
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
static fromText(text: string, metadata?: Record<string, any>): MDocument
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### fromHTML()
|
|
38
|
+
|
|
39
|
+
Creates a document from HTML content.
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
static fromHTML(html: string, metadata?: Record<string, any>): MDocument
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### fromMarkdown()
|
|
46
|
+
|
|
47
|
+
Creates a document from Markdown content.
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### fromJSON()
|
|
54
|
+
|
|
55
|
+
Creates a document from JSON content.
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
static fromJSON(json: string, metadata?: Record<string, any>): MDocument
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Instance Methods
|
|
62
|
+
|
|
63
|
+
### chunk()
|
|
64
|
+
|
|
65
|
+
Splits document into chunks and optionally extracts metadata.
|
|
66
|
+
|
|
67
|
+
```typescript
|
|
68
|
+
async chunk(params?: ChunkParams): Promise<Chunk[]>
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
See [chunk() reference](./chunk) for detailed options.
|
|
72
|
+
|
|
73
|
+
### getDocs()
|
|
74
|
+
|
|
75
|
+
Returns array of processed document chunks.
|
|
76
|
+
|
|
77
|
+
```typescript
|
|
78
|
+
getDocs(): Chunk[]
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### getText()
|
|
82
|
+
|
|
83
|
+
Returns array of text strings from chunks.
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
getText(): string[]
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### getMetadata()
|
|
90
|
+
|
|
91
|
+
Returns array of metadata objects from chunks.
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
getMetadata(): Record<string, any>[]
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### extractMetadata()
|
|
98
|
+
|
|
99
|
+
Extracts metadata using specified extractors. See [ExtractParams reference](./extract-params) for details.
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
async extractMetadata(params: ExtractParams): Promise<MDocument>
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Examples
|
|
106
|
+
|
|
107
|
+
```typescript
|
|
108
|
+
import { MDocument } from '@mastra/rag';
|
|
109
|
+
|
|
110
|
+
// Create document from text
|
|
111
|
+
const doc = MDocument.fromText('Your content here');
|
|
112
|
+
|
|
113
|
+
// Split into chunks with metadata extraction
|
|
114
|
+
const chunks = await doc.chunk({
|
|
115
|
+
strategy: 'markdown',
|
|
116
|
+
headers: [['#', 'title'], ['##', 'section']],
|
|
117
|
+
extract: {
|
|
118
|
+
fields: [
|
|
119
|
+
{ name: 'summary', description: 'A brief summary' },
|
|
120
|
+
{ name: 'keywords', description: 'Key terms' }
|
|
121
|
+
]
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// Get processed chunks
|
|
126
|
+
const docs = doc.getDocs();
|
|
127
|
+
const texts = doc.getText();
|
|
128
|
+
const metadata = doc.getMetadata();
|
|
129
|
+
```
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: embed() | Document Embedding | RAG | Mastra Docs"
|
|
3
|
+
description: Documentation for embedding functionality in Mastra using the AI SDK.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Embed
|
|
7
|
+
|
|
8
|
+
Mastra uses the AI SDK's `embed` and `embedMany` functions to generate vector embeddings for text inputs, enabling similarity search and RAG workflows.
|
|
9
|
+
|
|
10
|
+
## Single Embedding
|
|
11
|
+
|
|
12
|
+
The `embed` function generates a vector embedding for a single text input:
|
|
13
|
+
|
|
14
|
+
```typescript
|
|
15
|
+
import { embed } from 'ai';
|
|
16
|
+
|
|
17
|
+
const result = await embed({
|
|
18
|
+
model: openai.embedding('text-embedding-3-small'),
|
|
19
|
+
value: "Your text to embed",
|
|
20
|
+
maxRetries: 2 // optional, defaults to 2
|
|
21
|
+
});
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Parameters
|
|
25
|
+
|
|
26
|
+
<PropertiesTable
|
|
27
|
+
content={[
|
|
28
|
+
{
|
|
29
|
+
name: "model",
|
|
30
|
+
type: "EmbeddingModel",
|
|
31
|
+
description: "The embedding model to use (e.g. openai.embedding('text-embedding-3-small'))"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
name: "value",
|
|
35
|
+
type: "string | Record<string, any>",
|
|
36
|
+
description: "The text content or object to embed"
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: "maxRetries",
|
|
40
|
+
type: "number",
|
|
41
|
+
description: "Maximum number of retries per embedding call. Set to 0 to disable retries.",
|
|
42
|
+
isOptional: true,
|
|
43
|
+
defaultValue: "2"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: "abortSignal",
|
|
47
|
+
type: "AbortSignal",
|
|
48
|
+
description: "Optional abort signal to cancel the request",
|
|
49
|
+
isOptional: true
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: "headers",
|
|
53
|
+
type: "Record<string, string>",
|
|
54
|
+
description: "Additional HTTP headers for the request (only for HTTP-based providers)",
|
|
55
|
+
isOptional: true
|
|
56
|
+
}
|
|
57
|
+
]}
|
|
58
|
+
/>
|
|
59
|
+
|
|
60
|
+
### Return Value
|
|
61
|
+
|
|
62
|
+
<PropertiesTable
|
|
63
|
+
content={[
|
|
64
|
+
{
|
|
65
|
+
name: "embedding",
|
|
66
|
+
type: "number[]",
|
|
67
|
+
description: "The embedding vector for the input"
|
|
68
|
+
}
|
|
69
|
+
]}
|
|
70
|
+
/>
|
|
71
|
+
|
|
72
|
+
## Multiple Embeddings
|
|
73
|
+
|
|
74
|
+
For embedding multiple texts at once, use the `embedMany` function:
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
import { embedMany } from 'ai';
|
|
78
|
+
|
|
79
|
+
const result = await embedMany({
|
|
80
|
+
model: openai.embedding('text-embedding-3-small'),
|
|
81
|
+
values: ["First text", "Second text", "Third text"],
|
|
82
|
+
maxRetries: 2 // optional, defaults to 2
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Parameters
|
|
87
|
+
|
|
88
|
+
<PropertiesTable
|
|
89
|
+
content={[
|
|
90
|
+
{
|
|
91
|
+
name: "model",
|
|
92
|
+
type: "EmbeddingModel",
|
|
93
|
+
description: "The embedding model to use (e.g. openai.embedding('text-embedding-3-small'))"
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
name: "values",
|
|
97
|
+
type: "string[] | Record<string, any>[]",
|
|
98
|
+
description: "Array of text content or objects to embed"
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
name: "maxRetries",
|
|
102
|
+
type: "number",
|
|
103
|
+
description: "Maximum number of retries per embedding call. Set to 0 to disable retries.",
|
|
104
|
+
isOptional: true,
|
|
105
|
+
defaultValue: "2"
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
name: "abortSignal",
|
|
109
|
+
type: "AbortSignal",
|
|
110
|
+
description: "Optional abort signal to cancel the request",
|
|
111
|
+
isOptional: true
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
name: "headers",
|
|
115
|
+
type: "Record<string, string>",
|
|
116
|
+
description: "Additional HTTP headers for the request (only for HTTP-based providers)",
|
|
117
|
+
isOptional: true
|
|
118
|
+
}
|
|
119
|
+
]}
|
|
120
|
+
/>
|
|
121
|
+
|
|
122
|
+
### Return Value
|
|
123
|
+
|
|
124
|
+
<PropertiesTable
|
|
125
|
+
content={[
|
|
126
|
+
{
|
|
127
|
+
name: "embeddings",
|
|
128
|
+
type: "number[][]",
|
|
129
|
+
description: "Array of embedding vectors corresponding to the input values"
|
|
130
|
+
}
|
|
131
|
+
]}
|
|
132
|
+
/>
|
|
133
|
+
|
|
134
|
+
## Example Usage
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
import { embed, embedMany } from 'ai';
|
|
138
|
+
import { openai } from '@ai-sdk/openai';
|
|
139
|
+
|
|
140
|
+
// Single embedding
|
|
141
|
+
const singleResult = await embed({
|
|
142
|
+
model: openai.embedding('text-embedding-3-small'),
|
|
143
|
+
value: "What is the meaning of life?",
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
// Multiple embeddings
|
|
147
|
+
const multipleResult = await embedMany({
|
|
148
|
+
model: openai.embedding('text-embedding-3-small'),
|
|
149
|
+
values: [
|
|
150
|
+
"First question about life",
|
|
151
|
+
"Second question about universe",
|
|
152
|
+
"Third question about everything"
|
|
153
|
+
],
|
|
154
|
+
});
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
For more detailed information about embeddings in the Vercel AI SDK, see:
|
|
158
|
+
- [AI SDK Embeddings Overview](https://sdk.vercel.ai/docs/ai-sdk-core/embeddings)
|
|
159
|
+
- [embed()](https://sdk.vercel.ai/docs/reference/ai-sdk-core/embed)
|
|
160
|
+
- [embedMany()](https://sdk.vercel.ai/docs/reference/ai-sdk-core/embed-many)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: ExtractParams | Document Processing | RAG | Mastra Docs"
|
|
3
|
+
description: Documentation for metadata extraction configuration in Mastra.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# ExtractParams
|
|
7
|
+
|
|
8
|
+
ExtractParams configures metadata extraction from document chunks.
|
|
9
|
+
|
|
10
|
+
## Example
|
|
11
|
+
|
|
12
|
+
## ExtractParams
|
|
13
|
+
|
|
14
|
+
`ExtractParams` configures automatic metadata extraction from chunks using LLM analysis.
|
|
15
|
+
|
|
16
|
+
```typescript showLineNumbers copy
|
|
17
|
+
const doc = new Document(text);
|
|
18
|
+
const chunks = await doc.chunk({
|
|
19
|
+
extract: {
|
|
20
|
+
fields: [
|
|
21
|
+
{
|
|
22
|
+
name: 'summary',
|
|
23
|
+
description: 'A 1-2 sentence summary of the main points'
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
name: 'entities',
|
|
27
|
+
description: 'List of companies, people, and locations mentioned'
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
name: 'custom_field',
|
|
31
|
+
description: 'Any other metadata you want to extract, guided by this description'
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
model: 'gpt-4o-mini' // Optional: specify a different model
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Parameters
|
|
40
|
+
|
|
41
|
+
<PropertiesTable
|
|
42
|
+
content={[
|
|
43
|
+
{
|
|
44
|
+
name: "fields",
|
|
45
|
+
type: "Array<{ name: string, description: string }>",
|
|
46
|
+
description: "Array of fields to extract from each chunk",
|
|
47
|
+
isOptional: false
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
name: "model",
|
|
51
|
+
type: "string",
|
|
52
|
+
description: "OpenAI model to use for extraction",
|
|
53
|
+
defaultValue: "gpt-3.5-turbo",
|
|
54
|
+
isOptional: true
|
|
55
|
+
}
|
|
56
|
+
]}
|
|
57
|
+
/>
|
|
58
|
+
|
|
59
|
+
## Field Types
|
|
60
|
+
|
|
61
|
+
The fields are flexible - you can define any metadata fields you want to extract. Common field types include:
|
|
62
|
+
|
|
63
|
+
- `summary`: Brief overview of chunk content
|
|
64
|
+
- `keywords`: Key terms or concepts
|
|
65
|
+
- `topics`: Main subjects discussed
|
|
66
|
+
- `entities`: Named entities (people, places, organizations)
|
|
67
|
+
- `sentiment`: Emotional tone
|
|
68
|
+
- `language`: Detected language
|
|
69
|
+
- `timestamp`: Temporal references
|
|
70
|
+
- `categories`: Content classification
|
|
71
|
+
|
|
72
|
+
Example:
|