@mastra/mcp-docs-server 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fastra.md +302 -0
  2. package/.docs/organized/changelogs/%40mastra%2Fchroma.md +302 -0
  3. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +302 -0
  4. package/.docs/organized/changelogs/%40mastra%2Fcomposio.md +302 -0
  5. package/.docs/organized/changelogs/%40mastra%2Fcore.md +302 -0
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +302 -0
  7. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +302 -0
  8. package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +302 -0
  9. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +302 -0
  10. package/.docs/organized/changelogs/%40mastra%2Fevals.md +302 -0
  11. package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +302 -0
  12. package/.docs/organized/changelogs/%40mastra%2Fgithub.md +302 -0
  13. package/.docs/organized/changelogs/%40mastra%2Floggers.md +302 -0
  14. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +11 -0
  15. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +302 -0
  16. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +302 -0
  17. package/.docs/organized/changelogs/%40mastra%2Fpg.md +302 -0
  18. package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +302 -0
  19. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +302 -0
  20. package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +302 -0
  21. package/.docs/organized/changelogs/%40mastra%2Frag.md +302 -0
  22. package/.docs/organized/changelogs/%40mastra%2Fragie.md +302 -0
  23. package/.docs/organized/changelogs/%40mastra%2Fspeech-azure.md +302 -0
  24. package/.docs/organized/changelogs/%40mastra%2Fspeech-deepgram.md +302 -0
  25. package/.docs/organized/changelogs/%40mastra%2Fspeech-elevenlabs.md +302 -0
  26. package/.docs/organized/changelogs/%40mastra%2Fspeech-google.md +302 -0
  27. package/.docs/organized/changelogs/%40mastra%2Fspeech-ibm.md +302 -0
  28. package/.docs/organized/changelogs/%40mastra%2Fspeech-murf.md +302 -0
  29. package/.docs/organized/changelogs/%40mastra%2Fspeech-openai.md +302 -0
  30. package/.docs/organized/changelogs/%40mastra%2Fspeech-playai.md +302 -0
  31. package/.docs/organized/changelogs/%40mastra%2Fspeech-replicate.md +302 -0
  32. package/.docs/organized/changelogs/%40mastra%2Fspeech-speechify.md +302 -0
  33. package/.docs/organized/changelogs/%40mastra%2Fstabilityai.md +302 -0
  34. package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +59 -0
  35. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +302 -0
  36. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +302 -0
  37. package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +302 -0
  38. package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +302 -0
  39. package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +302 -0
  40. package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +302 -0
  41. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +24 -0
  42. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +302 -0
  43. package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +302 -0
  44. package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +12 -0
  45. package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +302 -0
  46. package/.docs/organized/changelogs/create-mastra.md +302 -0
  47. package/.docs/organized/changelogs/mastra.md +302 -0
  48. package/.docs/organized/code-examples/agent.md +385 -0
  49. package/.docs/organized/code-examples/ai-sdk-useChat.md +377 -0
  50. package/.docs/organized/code-examples/assistant-ui.md +37 -0
  51. package/.docs/organized/code-examples/bird-checker-with-express.md +235 -0
  52. package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +360 -0
  53. package/.docs/organized/code-examples/bird-checker-with-nextjs.md +250 -0
  54. package/.docs/organized/code-examples/crypto-chatbot.md +96 -0
  55. package/.docs/organized/code-examples/fireworks-r1.md +159 -0
  56. package/.docs/organized/code-examples/integrations.md +184 -0
  57. package/.docs/organized/code-examples/mcp-configuration.md +341 -0
  58. package/.docs/organized/code-examples/memory-todo-agent.md +161 -0
  59. package/.docs/organized/code-examples/memory-with-context.md +167 -0
  60. package/.docs/organized/code-examples/memory-with-libsql.md +204 -0
  61. package/.docs/organized/code-examples/memory-with-pg.md +224 -0
  62. package/.docs/organized/code-examples/memory-with-upstash.md +268 -0
  63. package/.docs/organized/code-examples/quick-start.md +127 -0
  64. package/.docs/organized/code-examples/stock-price-tool.md +124 -0
  65. package/.docs/organized/code-examples/weather-agent.md +352 -0
  66. package/.docs/organized/code-examples/workflow-ai-recruiter.md +159 -0
  67. package/.docs/organized/code-examples/workflow-with-inline-steps.md +111 -0
  68. package/.docs/organized/code-examples/workflow-with-memory.md +393 -0
  69. package/.docs/organized/code-examples/workflow-with-separate-steps.md +131 -0
  70. package/.docs/raw/agents/00-overview.mdx +185 -0
  71. package/.docs/raw/agents/01-agent-memory.mdx +610 -0
  72. package/.docs/raw/agents/02-adding-tools.mdx +224 -0
  73. package/.docs/raw/agents/03-adding-voice.mdx +170 -0
  74. package/.docs/raw/deployment/deployment.mdx +156 -0
  75. package/.docs/raw/deployment/logging-and-tracing.mdx +242 -0
  76. package/.docs/raw/deployment/server.mdx +114 -0
  77. package/.docs/raw/evals/00-overview.mdx +106 -0
  78. package/.docs/raw/evals/01-supported-evals.mdx +31 -0
  79. package/.docs/raw/evals/02-custom-eval.mdx +187 -0
  80. package/.docs/raw/faq/index.mdx +63 -0
  81. package/.docs/raw/frameworks/01-next-js.mdx +238 -0
  82. package/.docs/raw/frameworks/02-ai-sdk.mdx +218 -0
  83. package/.docs/raw/getting-started/installation.mdx +436 -0
  84. package/.docs/raw/getting-started/project-structure.mdx +80 -0
  85. package/.docs/raw/guides/01-chef-michel.mdx +242 -0
  86. package/.docs/raw/guides/02-stock-agent.mdx +182 -0
  87. package/.docs/raw/guides/03-recruiter.mdx +187 -0
  88. package/.docs/raw/index.mdx +22 -0
  89. package/.docs/raw/local-dev/creating-projects.mdx +74 -0
  90. package/.docs/raw/local-dev/integrations.mdx +127 -0
  91. package/.docs/raw/local-dev/mastra-dev.mdx +65 -0
  92. package/.docs/raw/rag/chunking-and-embedding.mdx +128 -0
  93. package/.docs/raw/rag/overview.mdx +85 -0
  94. package/.docs/raw/rag/retrieval.mdx +362 -0
  95. package/.docs/raw/rag/vector-databases.mdx +271 -0
  96. package/.docs/raw/reference/agents/createTool.mdx +190 -0
  97. package/.docs/raw/reference/agents/generate.mdx +327 -0
  98. package/.docs/raw/reference/agents/getAgent.mdx +54 -0
  99. package/.docs/raw/reference/agents/stream.mdx +361 -0
  100. package/.docs/raw/reference/cli/build.mdx +48 -0
  101. package/.docs/raw/reference/cli/deploy.mdx +22 -0
  102. package/.docs/raw/reference/cli/dev.mdx +97 -0
  103. package/.docs/raw/reference/cli/init.mdx +43 -0
  104. package/.docs/raw/reference/client-js/agents.mdx +90 -0
  105. package/.docs/raw/reference/client-js/error-handling.mdx +38 -0
  106. package/.docs/raw/reference/client-js/index.mdx +127 -0
  107. package/.docs/raw/reference/client-js/logs.mdx +24 -0
  108. package/.docs/raw/reference/client-js/memory.mdx +94 -0
  109. package/.docs/raw/reference/client-js/telemetry.mdx +20 -0
  110. package/.docs/raw/reference/client-js/tools.mdx +44 -0
  111. package/.docs/raw/reference/client-js/vectors.mdx +79 -0
  112. package/.docs/raw/reference/client-js/workflows.mdx +137 -0
  113. package/.docs/raw/reference/core/mastra-class.mdx +232 -0
  114. package/.docs/raw/reference/deployer/cloudflare.mdx +176 -0
  115. package/.docs/raw/reference/deployer/deployer.mdx +159 -0
  116. package/.docs/raw/reference/deployer/netlify.mdx +88 -0
  117. package/.docs/raw/reference/deployer/vercel.mdx +97 -0
  118. package/.docs/raw/reference/evals/answer-relevancy.mdx +186 -0
  119. package/.docs/raw/reference/evals/bias.mdx +186 -0
  120. package/.docs/raw/reference/evals/completeness.mdx +174 -0
  121. package/.docs/raw/reference/evals/content-similarity.mdx +183 -0
  122. package/.docs/raw/reference/evals/context-position.mdx +190 -0
  123. package/.docs/raw/reference/evals/context-precision.mdx +189 -0
  124. package/.docs/raw/reference/evals/context-relevancy.mdx +188 -0
  125. package/.docs/raw/reference/evals/contextual-recall.mdx +191 -0
  126. package/.docs/raw/reference/evals/faithfulness.mdx +193 -0
  127. package/.docs/raw/reference/evals/hallucination.mdx +219 -0
  128. package/.docs/raw/reference/evals/keyword-coverage.mdx +176 -0
  129. package/.docs/raw/reference/evals/prompt-alignment.mdx +238 -0
  130. package/.docs/raw/reference/evals/summarization.mdx +205 -0
  131. package/.docs/raw/reference/evals/textual-difference.mdx +161 -0
  132. package/.docs/raw/reference/evals/tone-consistency.mdx +181 -0
  133. package/.docs/raw/reference/evals/toxicity.mdx +165 -0
  134. package/.docs/raw/reference/index.mdx +8 -0
  135. package/.docs/raw/reference/memory/Memory.mdx +186 -0
  136. package/.docs/raw/reference/memory/createThread.mdx +93 -0
  137. package/.docs/raw/reference/memory/getThreadById.mdx +43 -0
  138. package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +45 -0
  139. package/.docs/raw/reference/memory/query.mdx +164 -0
  140. package/.docs/raw/reference/observability/create-logger.mdx +106 -0
  141. package/.docs/raw/reference/observability/logger.mdx +55 -0
  142. package/.docs/raw/reference/observability/otel-config.mdx +120 -0
  143. package/.docs/raw/reference/observability/providers/braintrust.mdx +40 -0
  144. package/.docs/raw/reference/observability/providers/index.mdx +15 -0
  145. package/.docs/raw/reference/observability/providers/laminar.mdx +41 -0
  146. package/.docs/raw/reference/observability/providers/langfuse.mdx +51 -0
  147. package/.docs/raw/reference/observability/providers/langsmith.mdx +46 -0
  148. package/.docs/raw/reference/observability/providers/langwatch.mdx +45 -0
  149. package/.docs/raw/reference/observability/providers/new-relic.mdx +40 -0
  150. package/.docs/raw/reference/observability/providers/signoz.mdx +40 -0
  151. package/.docs/raw/reference/observability/providers/traceloop.mdx +40 -0
  152. package/.docs/raw/reference/rag/astra.mdx +258 -0
  153. package/.docs/raw/reference/rag/chroma.mdx +281 -0
  154. package/.docs/raw/reference/rag/chunk.mdx +237 -0
  155. package/.docs/raw/reference/rag/document.mdx +129 -0
  156. package/.docs/raw/reference/rag/embeddings.mdx +160 -0
  157. package/.docs/raw/reference/rag/extract-params.mdx +72 -0
  158. package/.docs/raw/reference/rag/graph-rag.mdx +182 -0
  159. package/.docs/raw/reference/rag/libsql.mdx +357 -0
  160. package/.docs/raw/reference/rag/metadata-filters.mdx +298 -0
  161. package/.docs/raw/reference/rag/pg.mdx +477 -0
  162. package/.docs/raw/reference/rag/pinecone.mdx +249 -0
  163. package/.docs/raw/reference/rag/qdrant.mdx +236 -0
  164. package/.docs/raw/reference/rag/rerank.mdx +212 -0
  165. package/.docs/raw/reference/rag/turbopuffer.mdx +249 -0
  166. package/.docs/raw/reference/rag/upstash.mdx +198 -0
  167. package/.docs/raw/reference/rag/vectorize.mdx +253 -0
  168. package/.docs/raw/reference/storage/libsql.mdx +74 -0
  169. package/.docs/raw/reference/storage/postgresql.mdx +48 -0
  170. package/.docs/raw/reference/storage/upstash.mdx +86 -0
  171. package/.docs/raw/reference/tools/client.mdx +180 -0
  172. package/.docs/raw/reference/tools/document-chunker-tool.mdx +141 -0
  173. package/.docs/raw/reference/tools/graph-rag-tool.mdx +154 -0
  174. package/.docs/raw/reference/tools/mcp-configuration.mdx +206 -0
  175. package/.docs/raw/reference/tools/vector-query-tool.mdx +212 -0
  176. package/.docs/raw/reference/voice/composite-voice.mdx +140 -0
  177. package/.docs/raw/reference/voice/deepgram.mdx +164 -0
  178. package/.docs/raw/reference/voice/elevenlabs.mdx +216 -0
  179. package/.docs/raw/reference/voice/google.mdx +198 -0
  180. package/.docs/raw/reference/voice/mastra-voice.mdx +394 -0
  181. package/.docs/raw/reference/voice/murf.mdx +251 -0
  182. package/.docs/raw/reference/voice/openai-realtime.mdx +431 -0
  183. package/.docs/raw/reference/voice/openai.mdx +168 -0
  184. package/.docs/raw/reference/voice/playai.mdx +159 -0
  185. package/.docs/raw/reference/voice/speechify.mdx +145 -0
  186. package/.docs/raw/reference/workflows/after.mdx +88 -0
  187. package/.docs/raw/reference/workflows/commit.mdx +37 -0
  188. package/.docs/raw/reference/workflows/createRun.mdx +77 -0
  189. package/.docs/raw/reference/workflows/else.mdx +72 -0
  190. package/.docs/raw/reference/workflows/execute.mdx +110 -0
  191. package/.docs/raw/reference/workflows/if.mdx +107 -0
  192. package/.docs/raw/reference/workflows/resume.mdx +155 -0
  193. package/.docs/raw/reference/workflows/start.mdx +84 -0
  194. package/.docs/raw/reference/workflows/step-class.mdx +100 -0
  195. package/.docs/raw/reference/workflows/step-condition.mdx +134 -0
  196. package/.docs/raw/reference/workflows/step-function.mdx +92 -0
  197. package/.docs/raw/reference/workflows/step-options.mdx +69 -0
  198. package/.docs/raw/reference/workflows/suspend.mdx +80 -0
  199. package/.docs/raw/reference/workflows/then.mdx +74 -0
  200. package/.docs/raw/reference/workflows/until.mdx +165 -0
  201. package/.docs/raw/reference/workflows/watch.mdx +118 -0
  202. package/.docs/raw/reference/workflows/while.mdx +168 -0
  203. package/.docs/raw/reference/workflows/workflow.mdx +233 -0
  204. package/.docs/raw/workflows/00-overview.mdx +168 -0
  205. package/.docs/raw/workflows/control-flow.mdx +712 -0
  206. package/.docs/raw/workflows/dynamic-workflows.mdx +232 -0
  207. package/.docs/raw/workflows/steps.mdx +98 -0
  208. package/.docs/raw/workflows/suspend-and-resume.mdx +196 -0
  209. package/.docs/raw/workflows/variables.mdx +248 -0
  210. package/LICENSE +44 -0
  211. package/README.md +129 -0
  212. package/dist/index.d.ts +3 -0
  213. package/dist/index.js +19 -0
  214. package/dist/prepare-docs/code-examples.d.ts +4 -0
  215. package/dist/prepare-docs/code-examples.js +91 -0
  216. package/dist/prepare-docs/copy-raw.d.ts +1 -0
  217. package/dist/prepare-docs/copy-raw.js +41 -0
  218. package/dist/prepare-docs/index.d.ts +1 -0
  219. package/dist/prepare-docs/index.js +8 -0
  220. package/dist/prepare-docs/package-changes.d.ts +4 -0
  221. package/dist/prepare-docs/package-changes.js +92 -0
  222. package/dist/prepare-docs/prepare.d.ts +1 -0
  223. package/dist/prepare-docs/prepare.js +13 -0
  224. package/dist/sse.d.ts +1 -0
  225. package/dist/sse.js +9 -0
  226. package/dist/stdio.d.ts +1 -0
  227. package/dist/stdio.js +8 -0
  228. package/dist/tools/__tests__/blog.test.d.ts +1 -0
  229. package/dist/tools/__tests__/blog.test.js +48 -0
  230. package/dist/tools/__tests__/changes.test.d.ts +1 -0
  231. package/dist/tools/__tests__/changes.test.js +36 -0
  232. package/dist/tools/__tests__/docs.test.d.ts +1 -0
  233. package/dist/tools/__tests__/docs.test.js +46 -0
  234. package/dist/tools/__tests__/examples.test.d.ts +1 -0
  235. package/dist/tools/__tests__/examples.test.js +52 -0
  236. package/dist/tools/blog.d.ts +15 -0
  237. package/dist/tools/blog.js +73 -0
  238. package/dist/tools/changes.d.ts +11 -0
  239. package/dist/tools/changes.js +69 -0
  240. package/dist/tools/docs.d.ts +11 -0
  241. package/dist/tools/docs.js +176 -0
  242. package/dist/tools/examples.d.ts +11 -0
  243. package/dist/tools/examples.js +61 -0
  244. package/dist/utils.d.ts +6 -0
  245. package/dist/utils.js +9 -0
  246. package/package.json +66 -0
@@ -0,0 +1,237 @@
1
+ ---
2
+ title: "Reference: .chunk() | Document Processing | RAG | Mastra Docs"
3
+ description: Documentation for the chunk function in Mastra, which splits documents into smaller segments using various strategies.
4
+ ---
5
+
6
+ # Reference: .chunk()
7
+
8
+ The `.chunk()` function splits documents into smaller segments using various strategies and options.
9
+
10
+ ## Example
11
+
12
+ ```typescript
13
+ import { Document } from '@mastra/core';
14
+
15
+ const doc = new Document(`
16
+ # Introduction
17
+ This is a sample document that we want to split into chunks.
18
+
19
+ ## Section 1
20
+ Here is the first section with some content.
21
+
22
+ ## Section 2
23
+ Here is another section with different content.
24
+ `);
25
+
26
+ // Basic chunking with defaults
27
+ const chunks = await doc.chunk();
28
+
29
+ // Markdown-specific chunking with header extraction
30
+ const chunksWithMetadata = await doc.chunk({
31
+ strategy: 'markdown',
32
+ headers: [['#', 'title'], ['##', 'section']],
33
+ extract: {
34
+ fields: [
35
+ { name: 'summary', description: 'A brief summary of the chunk content' },
36
+ { name: 'keywords', description: 'Key terms found in the chunk' }
37
+ ]
38
+ }
39
+ });
40
+ ```
41
+
42
+ ## Parameters
43
+
44
+ <PropertiesTable
45
+ content={[
46
+ {
47
+ name: "strategy",
48
+ type: "'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex'",
49
+ isOptional: true,
50
+ description:
51
+ "The chunking strategy to use. If not specified, defaults based on document type. Depending on the chunking strategy, there are additional optionals. Defaults: .md files → 'markdown', .html/.htm → 'html', .json → 'json', .tex → 'latex', others → 'recursive'",
52
+ },
53
+ {
54
+ name: "size",
55
+ type: "number",
56
+ isOptional: true,
57
+ defaultValue: "512",
58
+ description: "Maximum size of each chunk",
59
+ },
60
+ {
61
+ name: "overlap",
62
+ type: "number",
63
+ isOptional: true,
64
+ defaultValue: "50",
65
+ description: "Number of characters/tokens that overlap between chunks.",
66
+ },
67
+ {
68
+ name: "separator",
69
+ type: "string",
70
+ isOptional: true,
71
+ defaultValue: "\\n\\n",
72
+ description: "Character(s) to split on. Defaults to double newline for text content.",
73
+ },
74
+ {
75
+ name: "isSeparatorRegex",
76
+ type: "boolean",
77
+ isOptional: true,
78
+ defaultValue: "false",
79
+ description: "Whether the separator is a regex pattern",
80
+ },
81
+ {
82
+ name: "keepSeparator",
83
+ type: "'start' | 'end'",
84
+ isOptional: true,
85
+ description:
86
+ "Whether to keep the separator at the start or end of chunks",
87
+ },
88
+ {
89
+ name: "extract",
90
+ type: "ExtractParams",
91
+ isOptional: true,
92
+ description: "Metadata extraction configuration. See [ExtractParams reference](./extract-params) for details.",
93
+ },
94
+ ]}
95
+ />
96
+
97
+ ## Strategy-Specific Options
98
+
99
+ Strategy-specific options are passed as top-level parameters alongside the strategy parameter. For example:
100
+
101
+ ```typescript showLineNumbers copy
102
+ // HTML strategy example
103
+ const chunks = await doc.chunk({
104
+ strategy: 'html',
105
+ headers: [['h1', 'title'], ['h2', 'subtitle']], // HTML-specific option
106
+ sections: [['div.content', 'main']], // HTML-specific option
107
+ size: 500 // general option
108
+ });
109
+
110
+ // Markdown strategy example
111
+ const chunks = await doc.chunk({
112
+ strategy: 'markdown',
113
+ headers: [['#', 'title'], ['##', 'section']], // Markdown-specific option
114
+ stripHeaders: true, // Markdown-specific option
115
+ overlap: 50 // general option
116
+ });
117
+
118
+ // Token strategy example
119
+ const chunks = await doc.chunk({
120
+ strategy: 'token',
121
+ encodingName: 'gpt2', // Token-specific option
122
+ modelName: 'gpt-3.5-turbo', // Token-specific option
123
+ size: 1000 // general option
124
+ });
125
+ ```
126
+
127
+ The options documented below are passed directly at the top level of the configuration object, not nested within a separate options object.
128
+
129
+ ### HTML
130
+
131
+ <PropertiesTable
132
+ content={[
133
+ {
134
+ name: "headers",
135
+ type: "Array<[string, string]>",
136
+ description:
137
+ "Array of [selector, metadata key] pairs for header-based splitting",
138
+ },
139
+ {
140
+ name: "sections",
141
+ type: "Array<[string, string]>",
142
+ description:
143
+ "Array of [selector, metadata key] pairs for section-based splitting",
144
+ },
145
+ {
146
+ name: "returnEachLine",
147
+ type: "boolean",
148
+ isOptional: true,
149
+ description: "Whether to return each line as a separate chunk",
150
+ },
151
+ ]}
152
+ />
153
+
154
+ ### Markdown
155
+
156
+ <PropertiesTable
157
+ content={[
158
+ {
159
+ name: "headers",
160
+ type: "Array<[string, string]>",
161
+ description: "Array of [header level, metadata key] pairs",
162
+ },
163
+ {
164
+ name: "stripHeaders",
165
+ type: "boolean",
166
+ isOptional: true,
167
+ description: "Whether to remove headers from the output",
168
+ },
169
+ {
170
+ name: "returnEachLine",
171
+ type: "boolean",
172
+ isOptional: true,
173
+ description: "Whether to return each line as a separate chunk",
174
+ },
175
+ ]}
176
+ />
177
+
178
+ ### Token
179
+
180
+ <PropertiesTable
181
+ content={[
182
+ {
183
+ name: "encodingName",
184
+ type: "string",
185
+ isOptional: true,
186
+ description: "Name of the token encoding to use",
187
+ },
188
+ {
189
+ name: "modelName",
190
+ type: "string",
191
+ isOptional: true,
192
+ description: "Name of the model for tokenization",
193
+ },
194
+ ]}
195
+ />
196
+
197
+ ### JSON
198
+
199
+ <PropertiesTable
200
+ content={[
201
+ {
202
+ name: "maxSize",
203
+ type: "number",
204
+ description: "Maximum size of each chunk",
205
+ },
206
+ {
207
+ name: "minSize",
208
+ type: "number",
209
+ isOptional: true,
210
+ description: "Minimum size of each chunk",
211
+ },
212
+ {
213
+ name: "ensureAscii",
214
+ type: "boolean",
215
+ isOptional: true,
216
+ description: "Whether to ensure ASCII encoding",
217
+ },
218
+ {
219
+ name: "convertLists",
220
+ type: "boolean",
221
+ isOptional: true,
222
+ description: "Whether to convert lists in the JSON",
223
+ },
224
+ ]}
225
+ />
226
+
227
+ ## Return Value
228
+
229
+ Returns a `MDocument` instance containing the chunked documents. Each chunk includes:
230
+
231
+ ```typescript
232
+ interface DocumentNode {
233
+ text: string;
234
+ metadata: Record<string, any>;
235
+ embedding?: number[];
236
+ }
237
+ ```
@@ -0,0 +1,129 @@
1
+ ---
2
+ title: "Reference: MDocument | Document Processing | RAG | Mastra Docs"
3
+ description: Documentation for the MDocument class in Mastra, which handles document processing and chunking.
4
+ ---
5
+
6
+ # MDocument
7
+
8
+ The MDocument class processes documents for RAG applications. The main methods are `.chunk()` and `.extractMetadata()`.
9
+
10
+ ## Constructor
11
+
12
+ <PropertiesTable
13
+ content={[
14
+ {
15
+ name: "docs",
16
+ type: "Array<{ text: string, metadata?: Record<string, any> }>",
17
+ description: "Array of document chunks with their text content and optional metadata",
18
+ },
19
+ {
20
+ name: "type",
21
+ type: "'text' | 'html' | 'markdown' | 'json' | 'latex'",
22
+ description: "Type of document content",
23
+ }
24
+ ]}
25
+ />
26
+
27
+ ## Static Methods
28
+
29
+ ### fromText()
30
+
31
+ Creates a document from plain text content.
32
+
33
+ ```typescript
34
+ static fromText(text: string, metadata?: Record<string, any>): MDocument
35
+ ```
36
+
37
+ ### fromHTML()
38
+
39
+ Creates a document from HTML content.
40
+
41
+ ```typescript
42
+ static fromHTML(html: string, metadata?: Record<string, any>): MDocument
43
+ ```
44
+
45
+ ### fromMarkdown()
46
+
47
+ Creates a document from Markdown content.
48
+
49
+ ```typescript
50
+ static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument
51
+ ```
52
+
53
+ ### fromJSON()
54
+
55
+ Creates a document from JSON content.
56
+
57
+ ```typescript
58
+ static fromJSON(json: string, metadata?: Record<string, any>): MDocument
59
+ ```
60
+
61
+ ## Instance Methods
62
+
63
+ ### chunk()
64
+
65
+ Splits document into chunks and optionally extracts metadata.
66
+
67
+ ```typescript
68
+ async chunk(params?: ChunkParams): Promise<Chunk[]>
69
+ ```
70
+
71
+ See [chunk() reference](./chunk) for detailed options.
72
+
73
+ ### getDocs()
74
+
75
+ Returns array of processed document chunks.
76
+
77
+ ```typescript
78
+ getDocs(): Chunk[]
79
+ ```
80
+
81
+ ### getText()
82
+
83
+ Returns array of text strings from chunks.
84
+
85
+ ```typescript
86
+ getText(): string[]
87
+ ```
88
+
89
+ ### getMetadata()
90
+
91
+ Returns array of metadata objects from chunks.
92
+
93
+ ```typescript
94
+ getMetadata(): Record<string, any>[]
95
+ ```
96
+
97
+ ### extractMetadata()
98
+
99
+ Extracts metadata using specified extractors. See [ExtractParams reference](./extract-params) for details.
100
+
101
+ ```typescript
102
+ async extractMetadata(params: ExtractParams): Promise<MDocument>
103
+ ```
104
+
105
+ ## Examples
106
+
107
+ ```typescript
108
+ import { MDocument } from '@mastra/rag';
109
+
110
+ // Create document from text
111
+ const doc = MDocument.fromText('Your content here');
112
+
113
+ // Split into chunks with metadata extraction
114
+ const chunks = await doc.chunk({
115
+ strategy: 'markdown',
116
+ headers: [['#', 'title'], ['##', 'section']],
117
+ extract: {
118
+ fields: [
119
+ { name: 'summary', description: 'A brief summary' },
120
+ { name: 'keywords', description: 'Key terms' }
121
+ ]
122
+ }
123
+ });
124
+
125
+ // Get processed chunks
126
+ const docs = doc.getDocs();
127
+ const texts = doc.getText();
128
+ const metadata = doc.getMetadata();
129
+ ```
@@ -0,0 +1,160 @@
1
+ ---
2
+ title: "Reference: embed() | Document Embedding | RAG | Mastra Docs"
3
+ description: Documentation for embedding functionality in Mastra using the AI SDK.
4
+ ---
5
+
6
+ # Embed
7
+
8
+ Mastra uses the AI SDK's `embed` and `embedMany` functions to generate vector embeddings for text inputs, enabling similarity search and RAG workflows.
9
+
10
+ ## Single Embedding
11
+
12
+ The `embed` function generates a vector embedding for a single text input:
13
+
14
+ ```typescript
15
+ import { embed } from 'ai';
16
+
17
+ const result = await embed({
18
+ model: openai.embedding('text-embedding-3-small'),
19
+ value: "Your text to embed",
20
+ maxRetries: 2 // optional, defaults to 2
21
+ });
22
+ ```
23
+
24
+ ### Parameters
25
+
26
+ <PropertiesTable
27
+ content={[
28
+ {
29
+ name: "model",
30
+ type: "EmbeddingModel",
31
+ description: "The embedding model to use (e.g. openai.embedding('text-embedding-3-small'))"
32
+ },
33
+ {
34
+ name: "value",
35
+ type: "string | Record<string, any>",
36
+ description: "The text content or object to embed"
37
+ },
38
+ {
39
+ name: "maxRetries",
40
+ type: "number",
41
+ description: "Maximum number of retries per embedding call. Set to 0 to disable retries.",
42
+ isOptional: true,
43
+ defaultValue: "2"
44
+ },
45
+ {
46
+ name: "abortSignal",
47
+ type: "AbortSignal",
48
+ description: "Optional abort signal to cancel the request",
49
+ isOptional: true
50
+ },
51
+ {
52
+ name: "headers",
53
+ type: "Record<string, string>",
54
+ description: "Additional HTTP headers for the request (only for HTTP-based providers)",
55
+ isOptional: true
56
+ }
57
+ ]}
58
+ />
59
+
60
+ ### Return Value
61
+
62
+ <PropertiesTable
63
+ content={[
64
+ {
65
+ name: "embedding",
66
+ type: "number[]",
67
+ description: "The embedding vector for the input"
68
+ }
69
+ ]}
70
+ />
71
+
72
+ ## Multiple Embeddings
73
+
74
+ For embedding multiple texts at once, use the `embedMany` function:
75
+
76
+ ```typescript
77
+ import { embedMany } from 'ai';
78
+
79
+ const result = await embedMany({
80
+ model: openai.embedding('text-embedding-3-small'),
81
+ values: ["First text", "Second text", "Third text"],
82
+ maxRetries: 2 // optional, defaults to 2
83
+ });
84
+ ```
85
+
86
+ ### Parameters
87
+
88
+ <PropertiesTable
89
+ content={[
90
+ {
91
+ name: "model",
92
+ type: "EmbeddingModel",
93
+ description: "The embedding model to use (e.g. openai.embedding('text-embedding-3-small'))"
94
+ },
95
+ {
96
+ name: "values",
97
+ type: "string[] | Record<string, any>[]",
98
+ description: "Array of text content or objects to embed"
99
+ },
100
+ {
101
+ name: "maxRetries",
102
+ type: "number",
103
+ description: "Maximum number of retries per embedding call. Set to 0 to disable retries.",
104
+ isOptional: true,
105
+ defaultValue: "2"
106
+ },
107
+ {
108
+ name: "abortSignal",
109
+ type: "AbortSignal",
110
+ description: "Optional abort signal to cancel the request",
111
+ isOptional: true
112
+ },
113
+ {
114
+ name: "headers",
115
+ type: "Record<string, string>",
116
+ description: "Additional HTTP headers for the request (only for HTTP-based providers)",
117
+ isOptional: true
118
+ }
119
+ ]}
120
+ />
121
+
122
+ ### Return Value
123
+
124
+ <PropertiesTable
125
+ content={[
126
+ {
127
+ name: "embeddings",
128
+ type: "number[][]",
129
+ description: "Array of embedding vectors corresponding to the input values"
130
+ }
131
+ ]}
132
+ />
133
+
134
+ ## Example Usage
135
+
136
+ ```typescript
137
+ import { embed, embedMany } from 'ai';
138
+ import { openai } from '@ai-sdk/openai';
139
+
140
+ // Single embedding
141
+ const singleResult = await embed({
142
+ model: openai.embedding('text-embedding-3-small'),
143
+ value: "What is the meaning of life?",
144
+ });
145
+
146
+ // Multiple embeddings
147
+ const multipleResult = await embedMany({
148
+ model: openai.embedding('text-embedding-3-small'),
149
+ values: [
150
+ "First question about life",
151
+ "Second question about universe",
152
+ "Third question about everything"
153
+ ],
154
+ });
155
+ ```
156
+
157
+ For more detailed information about embeddings in the Vercel AI SDK, see:
158
+ - [AI SDK Embeddings Overview](https://sdk.vercel.ai/docs/ai-sdk-core/embeddings)
159
+ - [embed()](https://sdk.vercel.ai/docs/reference/ai-sdk-core/embed)
160
+ - [embedMany()](https://sdk.vercel.ai/docs/reference/ai-sdk-core/embed-many)
@@ -0,0 +1,72 @@
1
+ ---
2
+ title: "Reference: ExtractParams | Document Processing | RAG | Mastra Docs"
3
+ description: Documentation for metadata extraction configuration in Mastra.
4
+ ---
5
+
6
+ # ExtractParams
7
+
8
+ ExtractParams configures metadata extraction from document chunks.
9
+
10
+ ## Example
11
+
12
+ ## ExtractParams
13
+
14
+ `ExtractParams` configures automatic metadata extraction from chunks using LLM analysis.
15
+
16
+ ```typescript showLineNumbers copy
17
+ const doc = new Document(text);
18
+ const chunks = await doc.chunk({
19
+ extract: {
20
+ fields: [
21
+ {
22
+ name: 'summary',
23
+ description: 'A 1-2 sentence summary of the main points'
24
+ },
25
+ {
26
+ name: 'entities',
27
+ description: 'List of companies, people, and locations mentioned'
28
+ },
29
+ {
30
+ name: 'custom_field',
31
+ description: 'Any other metadata you want to extract, guided by this description'
32
+ }
33
+ ],
34
+ model: 'gpt-4o-mini' // Optional: specify a different model
35
+ }
36
+ });
37
+ ```
38
+
39
+ ## Parameters
40
+
41
+ <PropertiesTable
42
+ content={[
43
+ {
44
+ name: "fields",
45
+ type: "Array<{ name: string, description: string }>",
46
+ description: "Array of fields to extract from each chunk",
47
+ isOptional: false
48
+ },
49
+ {
50
+ name: "model",
51
+ type: "string",
52
+ description: "OpenAI model to use for extraction",
53
+ defaultValue: "gpt-3.5-turbo",
54
+ isOptional: true
55
+ }
56
+ ]}
57
+ />
58
+
59
+ ## Field Types
60
+
61
+ The fields are flexible - you can define any metadata fields you want to extract. Common field types include:
62
+
63
+ - `summary`: Brief overview of chunk content
64
+ - `keywords`: Key terms or concepts
65
+ - `topics`: Main subjects discussed
66
+ - `entities`: Named entities (people, places, organizations)
67
+ - `sentiment`: Emotional tone
68
+ - `language`: Detected language
69
+ - `timestamp`: Temporal references
70
+ - `categories`: Content classification
71
+
72
+ Example: