@dbx-tools/appkit-mastra 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ /**
2
+ * Databricks Model Serving resolver for Mastra agents.
3
+ *
4
+ * Each agent step calls {@link buildModel} with the active
5
+ * `RequestContext`. The user stamped by `MastraServer` carries an
6
+ * AppKit `WorkspaceClient`; we ask it for the workspace host and a
7
+ * fresh bearer header, then point Mastra's OpenAI-compatible provider
8
+ * at `/serving-endpoints` on that host.
9
+ *
10
+ * Model id resolution walks three sources before falling back to the
11
+ * hard-coded default, **in this priority order**:
12
+ *
13
+ * 1. Per-request override stashed by the auth middleware under
14
+ * {@link MASTRA_MODEL_OVERRIDE_KEY} (header / query / body).
15
+ * 2. The static `modelId` passed in by the agent / plugin (string
16
+ * sugar on `def.model` or `config.defaultModel`).
17
+ * 3. `DATABRICKS_SERVING_ENDPOINT_NAME` env var.
18
+ * 4. {@link FALLBACK_MODEL_ID}.
19
+ *
20
+ * Whatever wins is then fuzzy-matched against the live
21
+ * `/serving-endpoints` list ({@link listServingEndpoints}) so loose
22
+ * names like `"claude sonnet"` resolve to the real endpoint name.
23
+ * Fuzzy matching is best-effort: when the workspace client throws
24
+ * (network blip, expired token at cache-fill time) we fall back to
25
+ * the input verbatim and let Databricks return the canonical error.
26
+ */
27
+ import { commonUtils, httpUtils, logUtils, stringUtils, } from "@dbx-tools/appkit-shared";
28
+ import { MASTRA_USER_KEY } from "./config.js";
29
+ import { listServingEndpoints, MASTRA_MODEL_OVERRIDE_KEY, resolveModelId, resolveServingConfig, } from "./serving.js";
30
+ /**
31
+ * Capability tiers for Databricks Foundation Model API endpoints.
32
+ *
33
+ * - {@link ModelTier.Thinking}: deepest reasoning / "thinking" models
34
+ * (Claude Opus, GPT-5.5 Pro, Gemini Pro, Llama 4 Maverick, etc).
35
+ * Highest cost and latency; reserve for hard multi-step reasoning.
36
+ * - {@link ModelTier.Balanced}: cost/latency sweet spot for general
37
+ * agent work (Claude Sonnet, GPT-5.x, Gemini Flash, Llama 3.3 70B).
38
+ * The right default for most agents.
39
+ * - {@link ModelTier.Fast}: cheap and quick; classification, routing,
40
+ * tool-arg extraction, simple summarisation (Claude Haiku, GPT-5
41
+ * mini/nano, Gemini Flash Lite, GPT-OSS 20B, Llama 3.1 8B).
42
+ *
43
+ * String enum so the value is the slug we use in cache keys, logs,
44
+ * and as the value users see in serialized configs.
45
+ */
46
+ export var ModelTier;
47
+ (function (ModelTier) {
48
+ ModelTier["Thinking"] = "thinking";
49
+ ModelTier["Balanced"] = "balanced";
50
+ ModelTier["Fast"] = "fast";
51
+ })(ModelTier || (ModelTier = {}));
52
+ /**
53
+ * Catalogue of Databricks-hosted Foundation Model API endpoints,
54
+ * grouped by capability {@link ModelTier} and then by provider. Each
55
+ * inner array is priority-ordered (most powerful first within the
56
+ * same provider+tier).
57
+ *
58
+ * Provider buckets:
59
+ *
60
+ * - `claude`: Anthropic Claude family (closed; flagship reasoning).
61
+ * - `gpt`: OpenAI GPT-5 family (closed; "ChatGPT" on Databricks FMAPI).
62
+ * - `gemini`: Google Gemini family (closed; multimodal + web-search).
63
+ * - `openSource`: open-weights models (widest regional / SKU availability).
64
+ *
65
+ * The list is curated by hand; refresh from the Databricks "supported
66
+ * foundation models" doc when new endpoints land.
67
+ */
68
+ export const MODEL_CATALOG = {
69
+ [ModelTier.Thinking]: {
70
+ claude: [
71
+ "databricks-claude-opus-4-8",
72
+ "databricks-claude-opus-4-7",
73
+ "databricks-claude-opus-4-6",
74
+ "databricks-claude-opus-4-5",
75
+ "databricks-claude-opus-4-1",
76
+ ],
77
+ gpt: ["databricks-gpt-5-5-pro"],
78
+ gemini: [
79
+ "databricks-gemini-3-1-pro",
80
+ "databricks-gemini-3-pro",
81
+ "databricks-gemini-2-5-pro",
82
+ ],
83
+ openSource: [
84
+ "databricks-llama-4-maverick",
85
+ "databricks-gpt-oss-120b",
86
+ "databricks-meta-llama-3-1-405b-instruct",
87
+ ],
88
+ },
89
+ [ModelTier.Balanced]: {
90
+ claude: [
91
+ "databricks-claude-sonnet-4-6",
92
+ "databricks-claude-sonnet-4-5",
93
+ "databricks-claude-sonnet-4",
94
+ ],
95
+ gpt: [
96
+ "databricks-gpt-5-5",
97
+ "databricks-gpt-5-4",
98
+ "databricks-gpt-5-2",
99
+ "databricks-gpt-5-1",
100
+ "databricks-gpt-5",
101
+ ],
102
+ gemini: [
103
+ "databricks-gemini-3-5-flash",
104
+ "databricks-gemini-3-flash",
105
+ "databricks-gemini-2-5-flash",
106
+ ],
107
+ openSource: [
108
+ "databricks-meta-llama-3-3-70b-instruct",
109
+ "databricks-qwen3-next-80b-a3b-instruct",
110
+ "databricks-qwen35-122b-a10b",
111
+ ],
112
+ },
113
+ [ModelTier.Fast]: {
114
+ claude: ["databricks-claude-haiku-4-5"],
115
+ gpt: [
116
+ "databricks-gpt-5-4-mini",
117
+ "databricks-gpt-5-4-nano",
118
+ "databricks-gpt-5-mini",
119
+ "databricks-gpt-5-nano",
120
+ ],
121
+ gemini: ["databricks-gemini-3-1-flash-lite"],
122
+ openSource: [
123
+ "databricks-gpt-oss-20b",
124
+ "databricks-gemma-3-12b",
125
+ "databricks-meta-llama-3-1-8b-instruct",
126
+ ],
127
+ },
128
+ };
129
+ /**
130
+ * Round-robin zip: take one from each input list in order, skipping
131
+ * lists that have already been exhausted. Used to interleave provider
132
+ * buckets within a tier so the resolver alternates between vendors
133
+ * instead of draining one before trying the next.
134
+ *
135
+ * Example: `interleave(["a1","a2","a3"], ["b1","b2"])` ->
136
+ * `["a1","b1","a2","b2","a3"]`.
137
+ */
138
+ function interleave(...lists) {
139
+ const out = [];
140
+ const max = Math.max(0, ...lists.map((l) => l.length));
141
+ for (let i = 0; i < max; i++) {
142
+ for (const list of lists) {
143
+ if (i < list.length)
144
+ out.push(list[i]);
145
+ }
146
+ }
147
+ return out;
148
+ }
149
+ /**
150
+ * Priority-ordered model ids for a single capability {@link ModelTier},
151
+ * interleaved across providers so a workspace missing the top Claude
152
+ * still lands on a flagship GPT / Gemini on the next probe.
153
+ *
154
+ * Provider order within the interleave: Claude, GPT, Gemini, then the
155
+ * open-weights tail appended verbatim as the universal floor (widest
156
+ * regional availability).
157
+ *
158
+ * @example
159
+ * ```ts
160
+ * mastra({
161
+ * defaultModelFallbacks: modelsForTier(ModelTier.Fast),
162
+ * });
163
+ * ```
164
+ */
165
+ export function modelsForTier(tier) {
166
+ const bucket = MODEL_CATALOG[tier];
167
+ return [
168
+ ...interleave(bucket.claude, bucket.gpt, bucket.gemini),
169
+ ...bucket.openSource,
170
+ ];
171
+ }
172
+ /**
173
+ * Top model id at the given {@link ModelTier}. Sync; the agent-step
174
+ * resolver fuzzy-matches it against the workspace catalogue at call
175
+ * time, so this works even when the literal top pick isn't deployed.
176
+ *
177
+ * Use when wiring a tier-appropriate model into an agent definition:
178
+ *
179
+ * @example
180
+ * ```ts
181
+ * const classifier = createAgent({
182
+ * instructions: "Classify this email",
183
+ * model: modelForTier(ModelTier.Fast), // cheap, quick
184
+ * });
185
+ *
186
+ * const planner = createAgent({
187
+ * instructions: "Plan a multi-step migration",
188
+ * model: modelForTier(ModelTier.Thinking), // deep reasoning
189
+ * });
190
+ * ```
191
+ */
192
+ export function modelForTier(tier) {
193
+ return modelsForTier(tier)[0];
194
+ }
195
+ /**
196
+ * Last-resort model ids used when neither `config.defaultModel`,
197
+ * per-agent `model`, nor `DATABRICKS_SERVING_ENDPOINT_NAME` is set.
198
+ *
199
+ * Walked in order at resolve time: the first id whose endpoint is
200
+ * actually present in the workspace's `/serving-endpoints` listing
201
+ * wins. Workspaces vary - not every region / SKU has every model,
202
+ * and the list of Foundation Model APIs evolves quickly - so the
203
+ * resolver degrades all the way from "best thinking model" down to
204
+ * "smallest commodity Llama" before giving up.
205
+ *
206
+ * Built by chaining the per-tier interleaves (Thinking -> Balanced
207
+ * -> Fast); within each tier the providers are round-robin-zipped
208
+ * (Claude, GPT, Gemini, then open-weights tail). Override the entire
209
+ * list via `MastraPluginConfig.defaultModelFallbacks` (e.g. to pin a
210
+ * regulated workspace to a specific approved subset, or to bias the
211
+ * priority toward a particular tier).
212
+ */
213
+ export const FALLBACK_MODEL_IDS = [
214
+ ...modelsForTier(ModelTier.Thinking),
215
+ ...modelsForTier(ModelTier.Balanced),
216
+ ...modelsForTier(ModelTier.Fast),
217
+ ];
218
+ /**
219
+ * Resolve a `MastraModelConfig` for the current agent step. Runs
220
+ * while `agent.stream` is inside the `asUser(req)` scope so tokens
221
+ * are user-scoped; outside an active user context the workspace
222
+ * client falls back to the service principal.
223
+ */
224
+ export async function buildModel(config, requestContext, overrides = {}) {
225
+ void setupFetchInterceptor();
226
+ const user = requestContext.get(MASTRA_USER_KEY);
227
+ const clientConfig = user.executionContext.client.config;
228
+ const host = (await clientConfig.getHost()).toString();
229
+ const headers = new Headers();
230
+ await clientConfig.authenticate(headers);
231
+ // The OpenAI Node SDK appends paths like `/chat/completions` to whatever
232
+ // URL we hand it. Drop the trailing slash so the resulting URL stays
233
+ // well-formed (`/serving-endpoints/chat/completions`).
234
+ const url = new URL("/serving-endpoints", host).toString().replace(/\/$/, "");
235
+ const modelId = await pickModelId(config, requestContext, overrides, user, host);
236
+ return {
237
+ providerId: config.providerId ?? "databricks",
238
+ modelId,
239
+ url,
240
+ headers: Object.fromEntries(headers.entries()),
241
+ };
242
+ }
243
+ /**
244
+ * Walk the resolution ladder and pick a modelId.
245
+ *
246
+ * 1. **Explicit ask** (per-request override, agent `model` string,
247
+ * `config.defaultModel` string, or `DATABRICKS_SERVING_ENDPOINT_NAME`):
248
+ * when fuzzy matching is on, snap the input to the closest live
249
+ * endpoint so loose names like `"claude sonnet"` resolve. When it's
250
+ * off (or no endpoint matches within threshold), the input is used
251
+ * verbatim and Databricks surfaces the canonical 404.
252
+ *
253
+ * 2. **No explicit ask**: walk
254
+ * {@link MastraPluginConfig.defaultModelFallbacks} (or
255
+ * {@link FALLBACK_MODEL_IDS} when unset) and return the first id
256
+ * whose endpoint is actually present in the workspace listing. A
257
+ * workspace without Claude Opus still gets a sensible default by
258
+ * skipping ahead to whichever Sonnet / GPT-5 / Llama variant is
259
+ * wired up.
260
+ *
261
+ * Catalogue fetches fail loud: network / auth errors propagate to the
262
+ * caller so they see the real SDK message instead of a silent fallback
263
+ * to the top of the priority list.
264
+ */
265
+ async function pickModelId(config, requestContext, overrides, user, host) {
266
+ const serving = resolveServingConfig(config, FALLBACK_MODEL_IDS);
267
+ const override = serving.allowOverride
268
+ ? requestContext.get(MASTRA_MODEL_OVERRIDE_KEY)
269
+ : undefined;
270
+ const explicit = override ?? overrides.modelId ?? process.env.DATABRICKS_SERVING_ENDPOINT_NAME;
271
+ // Cheap exit: when the caller named a specific model and fuzzy
272
+ // matching is off, there's no reason to touch the catalogue at all.
273
+ if (explicit !== undefined && !serving.fuzzy)
274
+ return explicit;
275
+ const endpoints = await listServingEndpoints(user.executionContext.client, host, {
276
+ ttlMs: serving.ttlMs,
277
+ });
278
+ const modelId = explicit !== undefined
279
+ ? resolveModelId(explicit, endpoints, { threshold: serving.threshold }).modelId
280
+ : pickFirstAvailable(serving.fallbacks, endpoints);
281
+ //logUtils.logger(config).debug(`model selected: ${modelId}`);
282
+ return modelId;
283
+ }
284
+ /**
285
+ * Find the first id in `fallbacks` whose endpoint is present in
286
+ * `endpoints`. Returns the top fallback when the workspace has none
287
+ * of them so callers always get a string; an offline workspace will
288
+ * then receive a clean 404 from Databricks instead of a malformed
289
+ * config.
290
+ */
291
+ function pickFirstAvailable(fallbacks, endpoints) {
292
+ const present = new Set(endpoints.map((e) => e.name));
293
+ for (const candidate of fallbacks) {
294
+ if (present.has(candidate))
295
+ return candidate;
296
+ }
297
+ return fallbacks[0] ?? FALLBACK_MODEL_IDS[0];
298
+ }
299
+ /** Path prefix that identifies a Databricks Model Serving REST call. */
300
+ const SERVING_ENDPOINTS_PATH_PREFIX = "/serving-endpoints/";
301
+ /**
302
+ * Install a single shared `globalThis.fetch` wrapper for every POST to
303
+ * `/serving-endpoints/...`. The wrapper does two things:
304
+ *
305
+ * 1. Rewrites the outgoing `messages` array to repair Mastra/AI SDK
306
+ * stream-replay quirks that Databricks-hosted Claude rejects (see
307
+ * {@link sanitizeServingMessages}).
308
+ * 2. When `MASTRA_DEBUG_LLM=1`, dumps the (post-sanitize) JSON body
309
+ * to stderr so 4xx debugging doesn't have to fight AI SDK's
310
+ * `[Array]` formatter.
311
+ *
312
+ * Safe to call from any hot path: {@link commonUtils.memoize} ensures
313
+ * the wrapper is installed at most once per process, so subsequent
314
+ * calls collapse to a single cached promise even when
315
+ * {@link buildModel} fires on every agent step.
316
+ */
317
+ const setupFetchInterceptor = commonUtils.memoize(() => {
318
+ const debug = Boolean(process.env.MASTRA_DEBUG_LLM);
319
+ const original = globalThis.fetch.bind(globalThis);
320
+ globalThis.fetch = (async (input, init) => {
321
+ const url = httpUtils.toURL(input);
322
+ if (!url ||
323
+ !url.pathname.startsWith(SERVING_ENDPOINTS_PATH_PREFIX) ||
324
+ typeof init?.body !== "string") {
325
+ return original(input, init);
326
+ }
327
+ const rewritten = rewriteServingBody(init.body);
328
+ if (rewritten !== init.body) {
329
+ init = { ...init, body: rewritten };
330
+ }
331
+ if (debug) {
332
+ try {
333
+ console.error("[mastra:llm-debug] -> POST", url.toString());
334
+ console.error(JSON.stringify(JSON.parse(rewritten), null, 2));
335
+ }
336
+ catch {
337
+ console.error("[mastra:llm-debug] -> POST", url.toString(), "(non-JSON body)");
338
+ }
339
+ }
340
+ return original(input, init);
341
+ });
342
+ });
343
+ /**
344
+ * Parse, sanitize, and re-serialize a `/serving-endpoints/...` POST
345
+ * body. Returns the original string verbatim when the body is not
346
+ * JSON, has no `messages`, or no rewrite was needed; this lets the
347
+ * caller skip the allocation of a new `init` object in the common
348
+ * pass-through case.
349
+ */
350
+ function rewriteServingBody(body) {
351
+ let parsed;
352
+ try {
353
+ parsed = JSON.parse(body);
354
+ }
355
+ catch {
356
+ return body;
357
+ }
358
+ if (!Array.isArray(parsed.messages))
359
+ return body;
360
+ const changed = sanitizeServingMessages(parsed.messages);
361
+ return changed ? JSON.stringify(parsed) : body;
362
+ }
363
+ /**
364
+ * Repair a Mastra/AI SDK message replay that Databricks-hosted Claude
365
+ * rejects with `"This model does not support assistant message
366
+ * prefill. The conversation must end with a user message."`.
367
+ *
368
+ * The bug pattern: when an assistant turn streams text *and* a
369
+ * `tool_call`, the AI SDK persists them as two separate assistant
370
+ * entries (text-only and tool-call-only). On the next agent step the
371
+ * tool-call entry is replayed *before* the tool result and the
372
+ * text entry is replayed *after* it, so the conversation ends with a
373
+ * trailing assistant text message. Anthropic interprets that as a
374
+ * prefill request and rejects it on Databricks (the upstream Bedrock
375
+ * route disallows prefill).
376
+ *
377
+ * Fix: when the last message is an assistant text with no `tool_calls`
378
+ * and the chain immediately before it is `assistant(tool_calls=...)`
379
+ * followed only by `tool(...)` results, fold the trailing text back
380
+ * into the `content` of that opening assistant and drop the duplicate.
381
+ * The result is the canonical OpenAI shape
382
+ * `[..., user, assistant(text + tool_calls), tool(...)]` which both
383
+ * Databricks Claude and every other endpoint accept.
384
+ *
385
+ * Mutates `messages` in place; returns `true` when something changed
386
+ * so the caller knows whether to re-serialize.
387
+ */
388
+ function sanitizeServingMessages(messages) {
389
+ if (messages.length < 2)
390
+ return false;
391
+ const last = messages[messages.length - 1];
392
+ if (!last ||
393
+ last.role !== "assistant" ||
394
+ (last.tool_calls && last.tool_calls.length > 0)) {
395
+ return false;
396
+ }
397
+ // Walk back through any contiguous tool-result messages to find the
398
+ // assistant turn that opened this tool sequence.
399
+ let i = messages.length - 2;
400
+ while (i >= 0 && messages[i]?.role === "tool")
401
+ i--;
402
+ if (i < 0)
403
+ return false;
404
+ const opener = messages[i];
405
+ if (!opener ||
406
+ opener.role !== "assistant" ||
407
+ !opener.tool_calls ||
408
+ opener.tool_calls.length === 0) {
409
+ return false;
410
+ }
411
+ // `trimToNull` collapses the `typeof string && trimmed` dance and
412
+ // drops blank fragments before the `\n\n` join below, so the merge
413
+ // never introduces stray leading / trailing whitespace.
414
+ const merged = [
415
+ stringUtils.trimToNull(opener.content),
416
+ stringUtils.trimToNull(last.content),
417
+ ]
418
+ .filter((s) => s !== null)
419
+ .join("\n\n");
420
+ opener.content = merged;
421
+ messages.pop();
422
+ return true;
423
+ }
@@ -0,0 +1,120 @@
1
+ /**
2
+ * AppKit plugin that builds one or more Mastra `Agent` instances and
3
+ * mounts the `@mastra/express` server plus `@mastra/ai-sdk` `chatRoute`
4
+ * handlers. The UI message stream matches what `chatRoute()` emits, so
5
+ * the client can use `useChat()` from `@ai-sdk/react` without custom
6
+ * parsing.
7
+ *
8
+ * - Agents: registered through `config.agents` at plugin creation
9
+ * ({@link MastraAgentDefinition}). Each entry's `tools` field accepts
10
+ * either a plain record or a `(plugins) => tools` callback that gets
11
+ * a typed sibling-plugin index ({@link MastraPlugins}). Omit
12
+ * `config.agents` to get a single built-in `default` analyst.
13
+ * - Model: each agent call resolves a `MastraModelConfig` via
14
+ * {@link buildModel} from `./model.js`. Per-agent `model` overrides
15
+ * (`AgentConfig["model"]` or a `modelId` string) flow through
16
+ * {@link buildAgents}.
17
+ * - Memory / storage: per-agent, built by {@link createMemoryBuilder}
18
+ * from `./memory.js`. Both auto-default to `true` when the
19
+ * `lakebase` plugin is registered (unless the caller passed
20
+ * `false` or a custom config). Storage namespaces per agent via
21
+ * `schemaName: "mastra_<agentId>"`; the vector store is a single
22
+ * shared singleton across every agent.
23
+ * - Server: the Express subapp wiring lives in `./server.js`.
24
+ * - HTTP: AppKit mounts this plugin under `/api/mastra`. `chatRoute`
25
+ * is registered at `/route/chat` (bound to `config.defaultAgent` or
26
+ * the first registered id) and `/route/chat/:agentId`, so the
27
+ * AI SDK transport URL is `/api/mastra/route/chat/<agentId>`.
28
+ */
29
+ import { Plugin, type IAppRouter, type ResourceRequirement } from "@databricks/appkit";
30
+ import type { Agent } from "@mastra/core/agent";
31
+ import { Mastra } from "@mastra/core/mastra";
32
+ import type { MastraPluginConfig } from "./config.js";
33
+ import { MastraServer } from "./server.js";
34
+ import { type ServingEndpointSummary } from "./serving.js";
35
+ /**
36
+ * AppKit plugin (registered name: `mastra`) that hosts Mastra agents
37
+ * with optional Lakebase-backed memory and AI SDK chat routes under
38
+ * the plugin mount (typically `/api/mastra`).
39
+ */
40
+ export declare class MastraPlugin extends Plugin<MastraPluginConfig> {
41
+ static manifest: {
42
+ name: "mastra";
43
+ displayName: string;
44
+ description: string;
45
+ stability: "beta";
46
+ resources: {
47
+ required: never[];
48
+ optional: Omit<ResourceRequirement, "required">[];
49
+ };
50
+ };
51
+ /**
52
+ * Tighten resource requirements based on which features are enabled.
53
+ * AppKit calls this at registration time (config-aware) so disabled
54
+ * features don't surface their resource asks to the host app.
55
+ */
56
+ static getResourceRequirements(config: MastraPluginConfig): ResourceRequirement[];
57
+ private log;
58
+ private built;
59
+ private mastra;
60
+ private mastraApp;
61
+ private mastraServer;
62
+ setup(): Promise<void>;
63
+ /**
64
+ * When the `lakebase` plugin is registered, auto-enable `storage`
65
+ * and `memory` unless the caller opted out explicitly (`false` or a
66
+ * custom config object). Run after `setup:complete` so the lookup
67
+ * is reliable: any plugin that registers itself synchronously is
68
+ * already in the registry by the time this fires.
69
+ */
70
+ private applyLakebaseAutoDefaults;
71
+ exports(): {
72
+ /**
73
+ * Ids of every registered agent in registration order. Matches
74
+ * AppKit `agents.list()` so callers can iterate the registry the
75
+ * same way under both plugins.
76
+ */
77
+ list: () => string[];
78
+ /**
79
+ * Look up a registered agent by id. Returns `null` (not
80
+ * undefined) when unknown so call sites can early-return without
81
+ * a separate `in` check.
82
+ */
83
+ get: (id: string) => Agent | null;
84
+ /**
85
+ * The agent `chatRoute` binds to when the client doesn't name
86
+ * one. Resolves to `config.defaultAgent`, the first registered
87
+ * id, or the built-in `default` fallback.
88
+ */
89
+ getDefault: () => Agent | null;
90
+ /** Underlying Mastra instance for advanced use (custom routes etc.). */
91
+ getMastra: () => Mastra<Record<string, Agent<any, import("@mastra/core/agent").ToolsInput, undefined, unknown>>, Record<string, import("@mastra/core/workflows").AnyWorkflow>, Record<string, import("@mastra/core/vector").MastraVector<any>>, Record<string, import("@mastra/core/tts").MastraTTS>, import("@mastra/core/logger").IMastraLogger, Record<string, import("@mastra/core/mcp").MCPServerBase<any>>, Record<string, import("@mastra/core/evals").MastraScorer<any, any, any, any>>, Record<string, import("@mastra/core/tools").ToolAction<any, any, any, any, any, any, unknown>>, Record<string, import("@mastra/core/processors").Processor<any, unknown>>, Record<string, import("@mastra/core/memory").MastraMemory>, Record<string, import("@mastra/core/channels").ChannelProvider>> | null;
92
+ /** Express subapp Mastra is mounted on; mostly for tests. */
93
+ getMastraServer: () => MastraServer | null;
94
+ /**
95
+ * Fetch the workspace's Model Serving endpoints (cached). Same
96
+ * payload the `GET /models` route returns; surfaced here so
97
+ * other plugins / scripts can introspect the catalogue without
98
+ * an HTTP round-trip. AppKit wraps this with `asUser(req)` for
99
+ * OBO scoping automatically.
100
+ */
101
+ listModels: () => Promise<ServingEndpointSummary[]>;
102
+ /**
103
+ * Force-evict cached endpoint listings via AppKit's
104
+ * `CacheManager`. Useful in tests or right after an admin
105
+ * deploys a new endpoint and doesn't want to wait for the TTL.
106
+ * Returns the underlying `CacheManager.delete`/`clear` promise.
107
+ */
108
+ clearModelsCache: (host?: string) => Promise<void>;
109
+ };
110
+ clientConfig(): Record<string, unknown>;
111
+ injectRoutes(router: IAppRouter): void;
112
+ /**
113
+ * Implementation backing both the `/models` route and the
114
+ * `listModels` export. Runs inside the AppKit user-context proxy so
115
+ * `getExecutionContext()` returns the OBO-scoped client.
116
+ */
117
+ private listModels;
118
+ private buildAgentAndServer;
119
+ }
120
+ export declare const mastra: import("@databricks/appkit").ToPlugin<typeof MastraPlugin, MastraPluginConfig, "mastra">;