npm - @mastra/mcp-docs-server - Versions diffs - 1.1.16-alpha.9 → 1.1.16 - Mend

@mastra/mcp-docs-server 1.1.16-alpha.9 → 1.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/.docs/docs/memory/observational-memory.md +36 -0
package/.docs/models/providers/vivgrid.md +13 -12
package/.docs/models/providers/vultr.md +1 -2
package/.docs/reference/memory/observational-memory.md +42 -3
package/CHANGELOG.md +22 -0
package/package.json +5 -5

package/.docs/docs/memory/observational-memory.md CHANGED Viewed

@@ -137,6 +137,42 @@ const memory = new Memory({
 See [model configuration](https://mastra.ai/reference/memory/observational-memory) for using different models per agent.
+### Token-tiered model selection
+You can use `ModelByInputTokens` to specify different Observer or Reflector models based on input token count. OM selects the matching model tier at runtime from the configured `upTo` thresholds.
+```typescript
+import { Memory, ModelByInputTokens } from '@mastra/memory'
+const memory = new Memory({
+  options: {
+    observationalMemory: {
+      observation: {
+        model: new ModelByInputTokens({
+          upTo: {
+            10_000: 'google/gemini-2.5-flash', // Fast and cheap for small inputs
+            40_000: 'openai/gpt-4o', // Stronger for medium inputs
+            1_000_000: 'openai/gpt-4.5', // Most capable for very large inputs
+          },
+        }),
+      },
+      reflection: {
+        model: new ModelByInputTokens({
+          upTo: {
+            20_000: 'google/gemini-2.5-flash',
+            80_000: 'openai/gpt-4o',
+          },
+        }),
+      },
+    },
+  },
+})
+```
+The `upTo` keys are inclusive upper bounds. OM computes the actual input token count for the Observer or Reflector call, resolves the matching tier directly, and uses that concrete model for the run.
+If the input exceeds the largest configured threshold, an error is thrown — ensure your thresholds cover the full range of possible input sizes, or use a model with a sufficiently large context window at the highest tier.
 ## Scopes
 ### Thread scope (default)

package/.docs/models/providers/vivgrid.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # ![Vivgrid logo](https://models.dev/logos/vivgrid.svg)Vivgrid
-Access 8 Vivgrid models through Mastra's model router. Authentication is handled automatically using the `VIVGRID_API_KEY` environment variable.
+Access 9 Vivgrid models through Mastra's model router. Authentication is handled automatically using the `VIVGRID_API_KEY` environment variable.
 Learn more in the [Vivgrid documentation](https://docs.vivgrid.com/models).
@@ -32,16 +32,17 @@ for await (const chunk of stream) {
 ## Models
-| Model                            | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
-| -------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
-| `vivgrid/deepseek-v3.2`          | 128K    |       |           |       |       |       | $0.28      | $0.42       |
-| `vivgrid/gemini-3-flash-preview` | 1.0M    |       |           |       |       |       | $0.50      | $3          |
-| `vivgrid/gemini-3-pro-preview`   | 1.0M    |       |           |       |       |       | $2         | $12         |
-| `vivgrid/glm-5`                  | 203K    |       |           |       |       |       | $1         | $3          |
-| `vivgrid/gpt-5-mini`             | 272K    |       |           |       |       |       | $0.25      | $2          |
-| `vivgrid/gpt-5.1-codex`          | 400K    |       |           |       |       |       | $1         | $10         |
-| `vivgrid/gpt-5.1-codex-max`      | 400K    |       |           |       |       |       | $1         | $10         |
-| `vivgrid/gpt-5.2-codex`          | 400K    |       |           |       |       |       | $2         | $14         |
+| Model                                   | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
+| --------------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
+| `vivgrid/deepseek-v3.2`                 | 128K    |       |           |       |       |       | $0.28      | $0.42       |
+| `vivgrid/gemini-3.1-flash-lite-preview` | 1.0M    |       |           |       |       |       | $0.25      | $2          |
+| `vivgrid/gemini-3.1-pro-preview`        | 1.0M    |       |           |       |       |       | $2         | $12         |
+| `vivgrid/glm-5`                         | 203K    |       |           |       |       |       | $1         | $3          |
+| `vivgrid/gpt-5-mini`                    | 272K    |       |           |       |       |       | $0.25      | $2          |
+| `vivgrid/gpt-5.1-codex`                 | 400K    |       |           |       |       |       | $1         | $10         |
+| `vivgrid/gpt-5.1-codex-max`             | 400K    |       |           |       |       |       | $1         | $10         |
+| `vivgrid/gpt-5.2-codex`                 | 400K    |       |           |       |       |       | $2         | $14         |
+| `vivgrid/gpt-5.4`                       | 400K    |       |           |       |       |       | $3         | $15         |
 ## Advanced configuration
@@ -71,7 +72,7 @@ const agent = new Agent({
   model: ({ requestContext }) => {
     const useAdvanced = requestContext.task === "complex";
     return useAdvanced
-      ? "vivgrid/gpt-5.2-codex"
+      ? "vivgrid/gpt-5.4"
       : "vivgrid/deepseek-v3.2";
   }
 });

package/.docs/models/providers/vultr.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # ![Vultr logo](https://models.dev/logos/vultr.svg)Vultr
-Access 10 Vultr models through Mastra's model router. Authentication is handled automatically using the `VULTR_API_KEY` environment variable.
+Access 9 Vultr models through Mastra's model router. Authentication is handled automatically using the `VULTR_API_KEY` environment variable.
 Learn more in the [Vultr documentation](https://api.vultrinference.com/).
@@ -43,7 +43,6 @@ for await (const chunk of stream) {
 | `vultr/Llama-3_1-Nemotron-Ultra-253B-v1`        | 32K     |       |           |       |       |       | $0.55      | $2          |
 | `vultr/MiniMax-M2.5`                            | 196K    |       |           |       |       |       | $0.30      | $1          |
 | `vultr/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4` | 260K    |       |           |       |       |       | $0.20      | $0.80       |
-| `vultr/Qwen2.5-Coder-32B-Instruct`              | 15K     |       |           |       |       |       | $0.20      | $0.60       |
 ## Advanced configuration

package/.docs/reference/memory/observational-memory.md CHANGED Viewed

@@ -32,7 +32,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
 **enabled** (`boolean`): Enable or disable Observational Memory. When omitted from a config object, defaults to \`true\`. Only \`enabled: false\` explicitly disables it. (Default: `true`)
-**model** (`string | LanguageModel | DynamicModel | ModelWithRetries[]`): Model for both the Observer and Reflector agents. Sets the model for both at once. Cannot be used together with \`observation.model\` or \`reflection.model\` — an error will be thrown if both are set. When using \`observationalMemory: true\`, defaults to \`google/gemini-2.5-flash\`. When passing a config object, this or \`observation.model\`/\`reflection.model\` must be set. Use \`"default"\` to explicitly use the default model (\`google/gemini-2.5-flash\`). (Default: `'google/gemini-2.5-flash' (when using observationalMemory: true)`)
+**model** (`string | LanguageModel | DynamicModel | ModelByInputTokens | ModelWithRetries[]`): Model for both the Observer and Reflector agents. Sets the model for both at once. Cannot be used together with \`observation.model\` or \`reflection.model\` — an error will be thrown if both are set. When using \`observationalMemory: true\`, defaults to \`google/gemini-2.5-flash\`. When passing a config object, this or \`observation.model\`/\`reflection.model\` must be set. Use \`"default"\` to explicitly use the default model (\`google/gemini-2.5-flash\`). (Default: `'google/gemini-2.5-flash' (when using observationalMemory: true)`)
 **scope** (`'resource' | 'thread'`): Memory scope for observations. \`'thread'\` keeps observations per-thread. \`'resource'\` (experimental) shares observations across all threads for a resource, enabling cross-conversation memory. (Default: `'thread'`)
@@ -42,7 +42,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
 **observation** (`ObservationalMemoryObservationConfig`): Configuration for the observation step. Controls when the Observer agent runs and how it behaves.
-**observation.model** (`string | LanguageModel | DynamicModel | ModelWithRetries[]`): Model for the Observer agent. Cannot be set if a top-level \`model\` is also provided. If neither this nor the top-level \`model\` is set, falls back to \`reflection.model\`.
+**observation.model** (`string | LanguageModel | DynamicModel | ModelByInputTokens | ModelWithRetries[]`): Model for the Observer agent. Cannot be set if a top-level \`model\` is also provided. If neither this nor the top-level \`model\` is set, falls back to \`reflection.model\`.
 **observation.instruction** (`string`): Custom instruction appended to the Observer's system prompt. Use this to customize what the Observer focuses on, such as domain-specific preferences or priorities.
@@ -68,7 +68,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
 **reflection** (`ObservationalMemoryReflectionConfig`): Configuration for the reflection step. Controls when the Reflector agent runs and how it behaves.
-**reflection.model** (`string | LanguageModel | DynamicModel | ModelWithRetries[]`): Model for the Reflector agent. Cannot be set if a top-level \`model\` is also provided. If neither this nor the top-level \`model\` is set, falls back to \`observation.model\`.
+**reflection.model** (`string | LanguageModel | DynamicModel | ModelByInputTokens | ModelWithRetries[]`): Model for the Reflector agent. Cannot be set if a top-level \`model\` is also provided. If neither this nor the top-level \`model\` is set, falls back to \`observation.model\`.
 **reflection.instruction** (`string`): Custom instruction appended to the Reflector's system prompt. Use this to customize how the Reflector consolidates observations, such as prioritizing certain types of information.
@@ -612,6 +612,45 @@ When `retrieval: true` is set with `scope: 'thread'`, OM registers a `recall` to
 **tokenOffset** (`number`): Approximate number of tokens that were trimmed when \`truncated\` is true.
+### ModelByInputTokens
+`ModelByInputTokens` selects a model based on the input token count. It chooses the model for the smallest threshold that covers the actual input size.
+#### Constructor
+```typescript
+new ModelByInputTokens(config)
+```
+Where `config` is an object with `upTo` keys that map token thresholds (numbers) to model targets.
+#### Example
+```typescript
+import { ModelByInputTokens } from '@mastra/memory'
+const selector = new ModelByInputTokens({
+  upTo: {
+    10_000: 'google/gemini-2.5-flash', // Fast for small inputs
+    40_000: 'openai/gpt-4o', // Stronger for medium inputs
+    1_000_000: 'openai/gpt-4.5', // Most capable for large inputs
+  },
+})
+```
+#### Behavior
+- Thresholds are sorted internally, so the order in the config object does not matter.
+- `inputTokens ≤ smallest threshold` → uses that threshold's model
+- `inputTokens > largest threshold` → `resolve()` throws an error. If this happens during an OM Observer or Reflector run, OM aborts via TripWire, so callers receive an empty `text` result or streamed `tripwire` instead of a normal assistant response.
+- OM computes the input token count for the Observer or Reflector call and resolves the matching model tier directly
+#### Methods
+**resolve** (`(inputTokens: number) => MastraModelConfig`): Returns the model for the given input token count. Throws if inputTokens exceeds the largest configured threshold. When this happens during an OM run, callers receive a TripWire/empty-text outcome instead of a normal assistant response.
+**getThresholds** (`() => number[]`): Returns the configured thresholds in ascending order. Useful for introspection.
 ### Related
 - [Observational Memory](https://mastra.ai/docs/memory/observational-memory)

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,27 @@
 # @mastra/mcp-docs-server
+## 1.1.16
+### Patch Changes
+- Updated dependencies [[`68ed4e9`](https://github.com/mastra-ai/mastra/commit/68ed4e9f118e8646b60a6112dabe854d0ef53902), [`085c1da`](https://github.com/mastra-ai/mastra/commit/085c1daf71b55a97b8ebad26623089e40055021c), [`be37de4`](https://github.com/mastra-ai/mastra/commit/be37de4391bd1d5486ce38efacbf00ca51637262), [`7dbd611`](https://github.com/mastra-ai/mastra/commit/7dbd611a85cb1e0c0a1581c57564268cb183d86e), [`f14604c`](https://github.com/mastra-ai/mastra/commit/f14604c7ef01ba794e1a8d5c7bae5415852aacec), [`4a75e10`](https://github.com/mastra-ai/mastra/commit/4a75e106bd31c283a1b3fe74c923610dcc46415b), [`f3ce603`](https://github.com/mastra-ai/mastra/commit/f3ce603fd76180f4a5be90b6dc786d389b6b3e98), [`423aa6f`](https://github.com/mastra-ai/mastra/commit/423aa6fd12406de6a1cc6b68e463d30af1d790fb), [`f21c626`](https://github.com/mastra-ai/mastra/commit/f21c6263789903ab9720b4d11373093298e97f15), [`41aee84`](https://github.com/mastra-ai/mastra/commit/41aee84561ceebe28bad1ecba8702d92838f67f0), [`2871451`](https://github.com/mastra-ai/mastra/commit/2871451703829aefa06c4a5d6eca7fd3731222ef), [`085c1da`](https://github.com/mastra-ai/mastra/commit/085c1daf71b55a97b8ebad26623089e40055021c), [`4bb5adc`](https://github.com/mastra-ai/mastra/commit/4bb5adc05c88e3a83fe1ea5ecb9eae6e17313124), [`4bb5adc`](https://github.com/mastra-ai/mastra/commit/4bb5adc05c88e3a83fe1ea5ecb9eae6e17313124), [`e06b520`](https://github.com/mastra-ai/mastra/commit/e06b520bdd5fdef844760c5e692c7852cbc5c240), [`d3930ea`](https://github.com/mastra-ai/mastra/commit/d3930eac51c30b0ecf7eaa54bb9430758b399777), [`dd9c4e0`](https://github.com/mastra-ai/mastra/commit/dd9c4e0a47962f1413e9b72114fcad912e19a0a6)]:
+  - @mastra/core@1.16.0
+  - @mastra/mcp@1.3.1
+## 1.1.16-alpha.11
+### Patch Changes
+- Updated dependencies [[`f21c626`](https://github.com/mastra-ai/mastra/commit/f21c6263789903ab9720b4d11373093298e97f15)]:
+  - @mastra/core@1.16.0-alpha.5
+## 1.1.16-alpha.10
+### Patch Changes
+- Updated dependencies [[`f14604c`](https://github.com/mastra-ai/mastra/commit/f14604c7ef01ba794e1a8d5c7bae5415852aacec), [`e06b520`](https://github.com/mastra-ai/mastra/commit/e06b520bdd5fdef844760c5e692c7852cbc5c240), [`dd9c4e0`](https://github.com/mastra-ai/mastra/commit/dd9c4e0a47962f1413e9b72114fcad912e19a0a6)]:
+  - @mastra/core@1.16.0-alpha.4
 ## 1.1.16-alpha.8
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/mcp-docs-server",
-  "version": "1.1.16-alpha.9",
+  "version": "1.1.16",
   "description": "MCP server for accessing Mastra.ai documentation, changelogs, and news.",
   "type": "module",
   "main": "dist/index.js",
@@ -29,7 +29,7 @@
     "jsdom": "^26.1.0",
     "local-pkg": "^1.1.2",
     "zod": "^4.3.6",
-    "@mastra/core": "1.16.0-alpha.3",
+    "@mastra/core": "1.16.0",
     "@mastra/mcp": "^1.3.1"
   },
   "devDependencies": {
@@ -46,9 +46,9 @@
     "tsx": "^4.21.0",
     "typescript": "^5.9.3",
     "vitest": "4.0.18",
-    "@internal/lint": "0.0.73",
-    "@internal/types-builder": "0.0.48",
-    "@mastra/core": "1.16.0-alpha.3"
+    "@internal/lint": "0.0.74",
+    "@internal/types-builder": "0.0.49",
+    "@mastra/core": "1.16.0"
   },
   "homepage": "https://mastra.ai",
   "repository": {