@mastra/mcp-docs-server 1.1.16-alpha.9 → 1.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -137,6 +137,42 @@ const memory = new Memory({
137
137
 
138
138
  See [model configuration](https://mastra.ai/reference/memory/observational-memory) for using different models per agent.
139
139
 
140
+ ### Token-tiered model selection
141
+
142
+ You can use `ModelByInputTokens` to specify different Observer or Reflector models based on input token count. OM selects the matching model tier at runtime from the configured `upTo` thresholds.
143
+
144
+ ```typescript
145
+ import { Memory, ModelByInputTokens } from '@mastra/memory'
146
+
147
+ const memory = new Memory({
148
+ options: {
149
+ observationalMemory: {
150
+ observation: {
151
+ model: new ModelByInputTokens({
152
+ upTo: {
153
+ 10_000: 'google/gemini-2.5-flash', // Fast and cheap for small inputs
154
+ 40_000: 'openai/gpt-4o', // Stronger for medium inputs
155
+ 1_000_000: 'openai/gpt-4.5', // Most capable for very large inputs
156
+ },
157
+ }),
158
+ },
159
+ reflection: {
160
+ model: new ModelByInputTokens({
161
+ upTo: {
162
+ 20_000: 'google/gemini-2.5-flash',
163
+ 80_000: 'openai/gpt-4o',
164
+ },
165
+ }),
166
+ },
167
+ },
168
+ },
169
+ })
170
+ ```
171
+
172
+ The `upTo` keys are inclusive upper bounds. OM computes the actual input token count for the Observer or Reflector call, resolves the matching tier directly, and uses that concrete model for the run.
173
+
174
+ If the input exceeds the largest configured threshold, an error is thrown — ensure your thresholds cover the full range of possible input sizes, or use a model with a sufficiently large context window at the highest tier.
175
+
140
176
  ## Scopes
141
177
 
142
178
  ### Thread scope (default)
@@ -1,6 +1,6 @@
1
1
  # ![Vivgrid logo](https://models.dev/logos/vivgrid.svg)Vivgrid
2
2
 
3
- Access 8 Vivgrid models through Mastra's model router. Authentication is handled automatically using the `VIVGRID_API_KEY` environment variable.
3
+ Access 9 Vivgrid models through Mastra's model router. Authentication is handled automatically using the `VIVGRID_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Vivgrid documentation](https://docs.vivgrid.com/models).
6
6
 
@@ -32,16 +32,17 @@ for await (const chunk of stream) {
32
32
 
33
33
  ## Models
34
34
 
35
- | Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
36
- | -------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
37
- | `vivgrid/deepseek-v3.2` | 128K | | | | | | $0.28 | $0.42 |
38
- | `vivgrid/gemini-3-flash-preview` | 1.0M | | | | | | $0.50 | $3 |
39
- | `vivgrid/gemini-3-pro-preview` | 1.0M | | | | | | $2 | $12 |
40
- | `vivgrid/glm-5` | 203K | | | | | | $1 | $3 |
41
- | `vivgrid/gpt-5-mini` | 272K | | | | | | $0.25 | $2 |
42
- | `vivgrid/gpt-5.1-codex` | 400K | | | | | | $1 | $10 |
43
- | `vivgrid/gpt-5.1-codex-max` | 400K | | | | | | $1 | $10 |
44
- | `vivgrid/gpt-5.2-codex` | 400K | | | | | | $2 | $14 |
35
+ | Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
36
+ | --------------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
37
+ | `vivgrid/deepseek-v3.2` | 128K | | | | | | $0.28 | $0.42 |
38
+ | `vivgrid/gemini-3.1-flash-lite-preview` | 1.0M | | | | | | $0.25 | $2 |
39
+ | `vivgrid/gemini-3.1-pro-preview` | 1.0M | | | | | | $2 | $12 |
40
+ | `vivgrid/glm-5` | 203K | | | | | | $1 | $3 |
41
+ | `vivgrid/gpt-5-mini` | 272K | | | | | | $0.25 | $2 |
42
+ | `vivgrid/gpt-5.1-codex` | 400K | | | | | | $1 | $10 |
43
+ | `vivgrid/gpt-5.1-codex-max` | 400K | | | | | | $1 | $10 |
44
+ | `vivgrid/gpt-5.2-codex` | 400K | | | | | | $2 | $14 |
45
+ | `vivgrid/gpt-5.4` | 400K | | | | | | $3 | $15 |
45
46
 
46
47
  ## Advanced configuration
47
48
 
@@ -71,7 +72,7 @@ const agent = new Agent({
71
72
  model: ({ requestContext }) => {
72
73
  const useAdvanced = requestContext.task === "complex";
73
74
  return useAdvanced
74
- ? "vivgrid/gpt-5.2-codex"
75
+ ? "vivgrid/gpt-5.4"
75
76
  : "vivgrid/deepseek-v3.2";
76
77
  }
77
78
  });
@@ -1,6 +1,6 @@
1
1
  # ![Vultr logo](https://models.dev/logos/vultr.svg)Vultr
2
2
 
3
- Access 10 Vultr models through Mastra's model router. Authentication is handled automatically using the `VULTR_API_KEY` environment variable.
3
+ Access 9 Vultr models through Mastra's model router. Authentication is handled automatically using the `VULTR_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Vultr documentation](https://api.vultrinference.com/).
6
6
 
@@ -43,7 +43,6 @@ for await (const chunk of stream) {
43
43
  | `vultr/Llama-3_1-Nemotron-Ultra-253B-v1` | 32K | | | | | | $0.55 | $2 |
44
44
  | `vultr/MiniMax-M2.5` | 196K | | | | | | $0.30 | $1 |
45
45
  | `vultr/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4` | 260K | | | | | | $0.20 | $0.80 |
46
- | `vultr/Qwen2.5-Coder-32B-Instruct` | 15K | | | | | | $0.20 | $0.60 |
47
46
 
48
47
  ## Advanced configuration
49
48
 
@@ -32,7 +32,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
32
32
 
33
33
  **enabled** (`boolean`): Enable or disable Observational Memory. When omitted from a config object, defaults to \`true\`. Only \`enabled: false\` explicitly disables it. (Default: `true`)
34
34
 
35
- **model** (`string | LanguageModel | DynamicModel | ModelWithRetries[]`): Model for both the Observer and Reflector agents. Sets the model for both at once. Cannot be used together with \`observation.model\` or \`reflection.model\` — an error will be thrown if both are set. When using \`observationalMemory: true\`, defaults to \`google/gemini-2.5-flash\`. When passing a config object, this or \`observation.model\`/\`reflection.model\` must be set. Use \`"default"\` to explicitly use the default model (\`google/gemini-2.5-flash\`). (Default: `'google/gemini-2.5-flash' (when using observationalMemory: true)`)
35
+ **model** (`string | LanguageModel | DynamicModel | ModelByInputTokens | ModelWithRetries[]`): Model for both the Observer and Reflector agents. Sets the model for both at once. Cannot be used together with \`observation.model\` or \`reflection.model\` — an error will be thrown if both are set. When using \`observationalMemory: true\`, defaults to \`google/gemini-2.5-flash\`. When passing a config object, this or \`observation.model\`/\`reflection.model\` must be set. Use \`"default"\` to explicitly use the default model (\`google/gemini-2.5-flash\`). (Default: `'google/gemini-2.5-flash' (when using observationalMemory: true)`)
36
36
 
37
37
  **scope** (`'resource' | 'thread'`): Memory scope for observations. \`'thread'\` keeps observations per-thread. \`'resource'\` (experimental) shares observations across all threads for a resource, enabling cross-conversation memory. (Default: `'thread'`)
38
38
 
@@ -42,7 +42,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
42
42
 
43
43
  **observation** (`ObservationalMemoryObservationConfig`): Configuration for the observation step. Controls when the Observer agent runs and how it behaves.
44
44
 
45
- **observation.model** (`string | LanguageModel | DynamicModel | ModelWithRetries[]`): Model for the Observer agent. Cannot be set if a top-level \`model\` is also provided. If neither this nor the top-level \`model\` is set, falls back to \`reflection.model\`.
45
+ **observation.model** (`string | LanguageModel | DynamicModel | ModelByInputTokens | ModelWithRetries[]`): Model for the Observer agent. Cannot be set if a top-level \`model\` is also provided. If neither this nor the top-level \`model\` is set, falls back to \`reflection.model\`.
46
46
 
47
47
  **observation.instruction** (`string`): Custom instruction appended to the Observer's system prompt. Use this to customize what the Observer focuses on, such as domain-specific preferences or priorities.
48
48
 
@@ -68,7 +68,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
68
68
 
69
69
  **reflection** (`ObservationalMemoryReflectionConfig`): Configuration for the reflection step. Controls when the Reflector agent runs and how it behaves.
70
70
 
71
- **reflection.model** (`string | LanguageModel | DynamicModel | ModelWithRetries[]`): Model for the Reflector agent. Cannot be set if a top-level \`model\` is also provided. If neither this nor the top-level \`model\` is set, falls back to \`observation.model\`.
71
+ **reflection.model** (`string | LanguageModel | DynamicModel | ModelByInputTokens | ModelWithRetries[]`): Model for the Reflector agent. Cannot be set if a top-level \`model\` is also provided. If neither this nor the top-level \`model\` is set, falls back to \`observation.model\`.
72
72
 
73
73
  **reflection.instruction** (`string`): Custom instruction appended to the Reflector's system prompt. Use this to customize how the Reflector consolidates observations, such as prioritizing certain types of information.
74
74
 
@@ -612,6 +612,45 @@ When `retrieval: true` is set with `scope: 'thread'`, OM registers a `recall` to
612
612
 
613
613
  **tokenOffset** (`number`): Approximate number of tokens that were trimmed when \`truncated\` is true.
614
614
 
615
+ ### ModelByInputTokens
616
+
617
+ `ModelByInputTokens` selects a model based on the input token count. It chooses the model for the smallest threshold that covers the actual input size.
618
+
619
+ #### Constructor
620
+
621
+ ```typescript
622
+ new ModelByInputTokens(config)
623
+ ```
624
+
625
+ Where `config` is an object with `upTo` keys that map token thresholds (numbers) to model targets.
626
+
627
+ #### Example
628
+
629
+ ```typescript
630
+ import { ModelByInputTokens } from '@mastra/memory'
631
+
632
+ const selector = new ModelByInputTokens({
633
+ upTo: {
634
+ 10_000: 'google/gemini-2.5-flash', // Fast for small inputs
635
+ 40_000: 'openai/gpt-4o', // Stronger for medium inputs
636
+ 1_000_000: 'openai/gpt-4.5', // Most capable for large inputs
637
+ },
638
+ })
639
+ ```
640
+
641
+ #### Behavior
642
+
643
+ - Thresholds are sorted internally, so the order in the config object does not matter.
644
+ - `inputTokens ≤ smallest threshold` → uses that threshold's model
645
+ - `inputTokens > largest threshold` → `resolve()` throws an error. If this happens during an OM Observer or Reflector run, OM aborts via TripWire, so callers receive an empty `text` result or streamed `tripwire` instead of a normal assistant response.
646
+ - OM computes the input token count for the Observer or Reflector call and resolves the matching model tier directly
647
+
648
+ #### Methods
649
+
650
+ **resolve** (`(inputTokens: number) => MastraModelConfig`): Returns the model for the given input token count. Throws if inputTokens exceeds the largest configured threshold. When this happens during an OM run, callers receive a TripWire/empty-text outcome instead of a normal assistant response.
651
+
652
+ **getThresholds** (`() => number[]`): Returns the configured thresholds in ascending order. Useful for introspection.
653
+
615
654
  ### Related
616
655
 
617
656
  - [Observational Memory](https://mastra.ai/docs/memory/observational-memory)
package/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # @mastra/mcp-docs-server
2
2
 
3
+ ## 1.1.16
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [[`68ed4e9`](https://github.com/mastra-ai/mastra/commit/68ed4e9f118e8646b60a6112dabe854d0ef53902), [`085c1da`](https://github.com/mastra-ai/mastra/commit/085c1daf71b55a97b8ebad26623089e40055021c), [`be37de4`](https://github.com/mastra-ai/mastra/commit/be37de4391bd1d5486ce38efacbf00ca51637262), [`7dbd611`](https://github.com/mastra-ai/mastra/commit/7dbd611a85cb1e0c0a1581c57564268cb183d86e), [`f14604c`](https://github.com/mastra-ai/mastra/commit/f14604c7ef01ba794e1a8d5c7bae5415852aacec), [`4a75e10`](https://github.com/mastra-ai/mastra/commit/4a75e106bd31c283a1b3fe74c923610dcc46415b), [`f3ce603`](https://github.com/mastra-ai/mastra/commit/f3ce603fd76180f4a5be90b6dc786d389b6b3e98), [`423aa6f`](https://github.com/mastra-ai/mastra/commit/423aa6fd12406de6a1cc6b68e463d30af1d790fb), [`f21c626`](https://github.com/mastra-ai/mastra/commit/f21c6263789903ab9720b4d11373093298e97f15), [`41aee84`](https://github.com/mastra-ai/mastra/commit/41aee84561ceebe28bad1ecba8702d92838f67f0), [`2871451`](https://github.com/mastra-ai/mastra/commit/2871451703829aefa06c4a5d6eca7fd3731222ef), [`085c1da`](https://github.com/mastra-ai/mastra/commit/085c1daf71b55a97b8ebad26623089e40055021c), [`4bb5adc`](https://github.com/mastra-ai/mastra/commit/4bb5adc05c88e3a83fe1ea5ecb9eae6e17313124), [`4bb5adc`](https://github.com/mastra-ai/mastra/commit/4bb5adc05c88e3a83fe1ea5ecb9eae6e17313124), [`e06b520`](https://github.com/mastra-ai/mastra/commit/e06b520bdd5fdef844760c5e692c7852cbc5c240), [`d3930ea`](https://github.com/mastra-ai/mastra/commit/d3930eac51c30b0ecf7eaa54bb9430758b399777), [`dd9c4e0`](https://github.com/mastra-ai/mastra/commit/dd9c4e0a47962f1413e9b72114fcad912e19a0a6)]:
8
+ - @mastra/core@1.16.0
9
+ - @mastra/mcp@1.3.1
10
+
11
+ ## 1.1.16-alpha.11
12
+
13
+ ### Patch Changes
14
+
15
+ - Updated dependencies [[`f21c626`](https://github.com/mastra-ai/mastra/commit/f21c6263789903ab9720b4d11373093298e97f15)]:
16
+ - @mastra/core@1.16.0-alpha.5
17
+
18
+ ## 1.1.16-alpha.10
19
+
20
+ ### Patch Changes
21
+
22
+ - Updated dependencies [[`f14604c`](https://github.com/mastra-ai/mastra/commit/f14604c7ef01ba794e1a8d5c7bae5415852aacec), [`e06b520`](https://github.com/mastra-ai/mastra/commit/e06b520bdd5fdef844760c5e692c7852cbc5c240), [`dd9c4e0`](https://github.com/mastra-ai/mastra/commit/dd9c4e0a47962f1413e9b72114fcad912e19a0a6)]:
23
+ - @mastra/core@1.16.0-alpha.4
24
+
3
25
  ## 1.1.16-alpha.8
4
26
 
5
27
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/mcp-docs-server",
3
- "version": "1.1.16-alpha.9",
3
+ "version": "1.1.16",
4
4
  "description": "MCP server for accessing Mastra.ai documentation, changelogs, and news.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -29,7 +29,7 @@
29
29
  "jsdom": "^26.1.0",
30
30
  "local-pkg": "^1.1.2",
31
31
  "zod": "^4.3.6",
32
- "@mastra/core": "1.16.0-alpha.3",
32
+ "@mastra/core": "1.16.0",
33
33
  "@mastra/mcp": "^1.3.1"
34
34
  },
35
35
  "devDependencies": {
@@ -46,9 +46,9 @@
46
46
  "tsx": "^4.21.0",
47
47
  "typescript": "^5.9.3",
48
48
  "vitest": "4.0.18",
49
- "@internal/lint": "0.0.73",
50
- "@internal/types-builder": "0.0.48",
51
- "@mastra/core": "1.16.0-alpha.3"
49
+ "@internal/lint": "0.0.74",
50
+ "@internal/types-builder": "0.0.49",
51
+ "@mastra/core": "1.16.0"
52
52
  },
53
53
  "homepage": "https://mastra.ai",
54
54
  "repository": {