@dbx-tools/appkit-mastra 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/plugin.ts ADDED
@@ -0,0 +1,269 @@
1
+ /**
2
+ * AppKit plugin that builds one or more Mastra `Agent` instances and
3
+ * mounts the `@mastra/express` server plus `@mastra/ai-sdk` `chatRoute`
4
+ * handlers. The UI message stream matches what `chatRoute()` emits, so
5
+ * the client can use `useChat()` from `@ai-sdk/react` without custom
6
+ * parsing.
7
+ *
8
+ * - Agents: registered through `config.agents` at plugin creation
9
+ * ({@link MastraAgentDefinition}). Each entry's `tools` field accepts
10
+ * either a plain record or a `(plugins) => tools` callback that gets
11
+ * a typed sibling-plugin index ({@link MastraPlugins}). Omit
12
+ * `config.agents` to get a single built-in `default` analyst.
13
+ * - Model: each agent call resolves a `MastraModelConfig` via
14
+ * {@link buildModel} from `./model.js`. Per-agent `model` overrides
15
+ * (`AgentConfig["model"]` or a `modelId` string) flow through
16
+ * {@link buildAgents}.
17
+ * - Memory / storage: per-agent, built by {@link createMemoryBuilder}
18
+ * from `./memory.js`. Both auto-default to `true` when the
19
+ * `lakebase` plugin is registered (unless the caller passed
20
+ * `false` or a custom config). Storage namespaces per agent via
21
+ * `schemaName: "mastra_<agentId>"`; the vector store is a single
22
+ * shared singleton across every agent.
23
+ * - Server: the Express subapp wiring lives in `./server.js`.
24
+ * - HTTP: AppKit mounts this plugin under `/api/mastra`. `chatRoute`
25
+ * is registered at `/route/chat` (bound to `config.defaultAgent` or
26
+ * the first registered id) and `/route/chat/:agentId`, so the
27
+ * AI SDK transport URL is `/api/mastra/route/chat/<agentId>`.
28
+ */
29
+
30
+ import {
31
+ genie,
32
+ getExecutionContext,
33
+ lakebase,
34
+ Plugin,
35
+ toPlugin,
36
+ type IAppRouter,
37
+ type PluginManifest,
38
+ type ResourceRequirement,
39
+ } from "@databricks/appkit";
40
+ import { logUtils, pluginUtils } from "@dbx-tools/appkit-shared";
41
+ import { chatRoute } from "@mastra/ai-sdk";
42
+ import type { Agent } from "@mastra/core/agent";
43
+ import { Mastra } from "@mastra/core/mastra";
44
+ import express from "express";
45
+
46
+ import { buildAgents, FALLBACK_AGENT_ID, type BuiltAgents } from "./agents.js";
47
+ import type { MastraClientConfig } from "@dbx-tools/appkit-mastra-shared";
48
+ import type { MastraPluginConfig } from "./config.js";
49
+ import { createMemoryBuilder, needsLakebase } from "./memory.js";
50
+ import { attachRoutePatchMiddleware, MastraServer } from "./server.js";
51
+ import {
52
+ clearServingEndpointsCache,
53
+ listServingEndpoints,
54
+ resolveServingConfig,
55
+ type ServingEndpointSummary,
56
+ } from "./serving.js";
57
+
58
+ const GENIE_MANIFEST = pluginUtils.data(genie).plugin.manifest;
59
+ const LAKEBASE_MANIFEST = pluginUtils.data(lakebase).plugin.manifest;
60
+
61
+ /**
62
+ * AppKit plugin (registered name: `mastra`) that hosts Mastra agents
63
+ * with optional Lakebase-backed memory and AI SDK chat routes under
64
+ * the plugin mount (typically `/api/mastra`).
65
+ */
66
+ export class MastraPlugin extends Plugin<MastraPluginConfig> {
67
+ static manifest = {
68
+ name: "mastra",
69
+ displayName: "Mastra",
70
+ description:
71
+ "Builds a Mastra Agent with user-scoped workspace auth (asUser) " +
72
+ "and optional Postgres-backed Mastra Memory via the `lakebase` plugin.",
73
+ stability: "beta",
74
+ resources: {
75
+ required: [],
76
+ optional: [
77
+ ...GENIE_MANIFEST.resources.required,
78
+ ...LAKEBASE_MANIFEST.resources.required,
79
+ ],
80
+ },
81
+ } satisfies PluginManifest<"mastra">;
82
+
83
+ /**
84
+ * Tighten resource requirements based on which features are enabled.
85
+ * AppKit calls this at registration time (config-aware) so disabled
86
+ * features don't surface their resource asks to the host app.
87
+ */
88
+ static getResourceRequirements(config: MastraPluginConfig): ResourceRequirement[] {
89
+ const resources: ResourceRequirement[] = [];
90
+ const enabledManifests: PluginManifest<string>[] = [];
91
+
92
+ if (needsLakebase(config)) {
93
+ enabledManifests.push(LAKEBASE_MANIFEST);
94
+ }
95
+ for (const m of enabledManifests) {
96
+ for (const resource of m.resources.required) {
97
+ resources.push({ ...resource, required: true } as ResourceRequirement);
98
+ }
99
+ }
100
+ return resources;
101
+ }
102
+
103
+ private log = logUtils.logger(this);
104
+ private built: BuiltAgents | null = null;
105
+ private mastra: Mastra | null = null;
106
+ private mastraApp: express.Express | null = null;
107
+ private mastraServer: MastraServer | null = null;
108
+
109
+ override async setup(): Promise<void> {
110
+ // Wait until sibling plugins (e.g. `lakebase`) finish `setup()` so
111
+ // the lakebase pool is valid when storage/memory are enabled.
112
+ this.context?.onLifecycle("setup:complete", async () => {
113
+ this.applyLakebaseAutoDefaults();
114
+ this.log.info("setup:complete");
115
+ await this.buildAgentAndServer();
116
+ });
117
+ }
118
+
119
+ /**
120
+ * When the `lakebase` plugin is registered, auto-enable `storage`
121
+ * and `memory` unless the caller opted out explicitly (`false` or a
122
+ * custom config object). Run after `setup:complete` so the lookup
123
+ * is reliable: any plugin that registers itself synchronously is
124
+ * already in the registry by the time this fires.
125
+ */
126
+ private applyLakebaseAutoDefaults(): void {
127
+ const hasLakebase = pluginUtils.instance(this.context, lakebase) !== undefined;
128
+ if (!hasLakebase) return;
129
+ if (this.config.storage === undefined) this.config.storage = true;
130
+ if (this.config.memory === undefined) this.config.memory = true;
131
+ }
132
+
133
+ override exports() {
134
+ return {
135
+ /**
136
+ * Ids of every registered agent in registration order. Matches
137
+ * AppKit `agents.list()` so callers can iterate the registry the
138
+ * same way under both plugins.
139
+ */
140
+ list: (): string[] => Object.keys(this.built?.agents ?? {}),
141
+ /**
142
+ * Look up a registered agent by id. Returns `null` (not
143
+ * undefined) when unknown so call sites can early-return without
144
+ * a separate `in` check.
145
+ */
146
+ get: (id: string): Agent | null => this.built?.agents[id] ?? null,
147
+ /**
148
+ * The agent `chatRoute` binds to when the client doesn't name
149
+ * one. Resolves to `config.defaultAgent`, the first registered
150
+ * id, or the built-in `default` fallback.
151
+ */
152
+ getDefault: (): Agent | null =>
153
+ (this.built && this.built.agents[this.built.defaultAgentId]) ?? null,
154
+ /** Underlying Mastra instance for advanced use (custom routes etc.). */
155
+ getMastra: () => this.mastra,
156
+ /** Express subapp Mastra is mounted on; mostly for tests. */
157
+ getMastraServer: () => this.mastraServer,
158
+ /**
159
+ * Fetch the workspace's Model Serving endpoints (cached). Same
160
+ * payload the `GET /models` route returns; surfaced here so
161
+ * other plugins / scripts can introspect the catalogue without
162
+ * an HTTP round-trip. AppKit wraps this with `asUser(req)` for
163
+ * OBO scoping automatically.
164
+ */
165
+ listModels: (): Promise<ServingEndpointSummary[]> => this.listModels(),
166
+ /**
167
+ * Force-evict cached endpoint listings via AppKit's
168
+ * `CacheManager`. Useful in tests or right after an admin
169
+ * deploys a new endpoint and doesn't want to wait for the TTL.
170
+ * Returns the underlying `CacheManager.delete`/`clear` promise.
171
+ */
172
+ clearModelsCache: (host?: string): Promise<void> =>
173
+ clearServingEndpointsCache(host),
174
+ };
175
+ }
176
+
177
+ override clientConfig(): Record<string, unknown> {
178
+ // AppKit mounts every plugin at `/api/<plugin.name>`. `this.name`
179
+ // honors `config.name` overrides, so the published paths stay
180
+ // accurate if someone remounts the plugin under a custom id.
181
+ // Return widens to `Record<string, unknown>` to satisfy the
182
+ // base-class signature; consumers read it through the typed
183
+ // `MastraClientConfig` shape via `usePluginClientConfig<...>(...)`.
184
+ const basePath = `/api/${this.name}`;
185
+ const config: MastraClientConfig = {
186
+ basePath,
187
+ chatPath: `${basePath}/route/chat`,
188
+ chatPathTemplate: `${basePath}/route/chat/:agentId`,
189
+ modelsPath: `${basePath}/models`,
190
+ defaultAgent: this.built?.defaultAgentId ?? FALLBACK_AGENT_ID,
191
+ agents: Object.keys(this.built?.agents ?? {}),
192
+ };
193
+ return config as unknown as Record<string, unknown>;
194
+ }
195
+
196
+ override injectRoutes(router: IAppRouter): void {
197
+ // `GET /models` exposes the cached endpoint list so clients can
198
+ // populate model pickers, validate `?model=` choices, etc. Must
199
+ // be registered before the catch-all that forwards everything to
200
+ // the Mastra subapp. Errors propagate to Express's default error
201
+ // handler via `next(err)` so callers see the real SDK message.
202
+ router.get("/models", (req, res, next) => {
203
+ this.asUser(req)
204
+ .listModels()
205
+ .then((endpoints) => res.json({ endpoints }))
206
+ .catch(next);
207
+ });
208
+
209
+ router.use("", (req, res, next) => {
210
+ if (!this.mastraApp) return res.status(503).end();
211
+ return this.asUser(req).mastraApp!(req, res, next);
212
+ });
213
+ }
214
+
215
+ /**
216
+ * Implementation backing both the `/models` route and the
217
+ * `listModels` export. Runs inside the AppKit user-context proxy so
218
+ * `getExecutionContext()` returns the OBO-scoped client.
219
+ */
220
+ private async listModels(): Promise<ServingEndpointSummary[]> {
221
+ const client = getExecutionContext().client;
222
+ const host = (await client.config.getHost()).toString();
223
+ const serving = resolveServingConfig(this.config);
224
+ return listServingEndpoints(client, host, { ttlMs: serving.ttlMs });
225
+ }
226
+
227
+ private async buildAgentAndServer(): Promise<void> {
228
+ // Per-agent memory factory. The builder resolves the Lakebase pool
229
+ // lazily (on first agent that actually needs storage / vector) and
230
+ // caches both the pool and the shared `PgVector` singleton so
231
+ // registering N agents stays cheap. See `./memory.js`.
232
+ const memoryBuilder = needsLakebase(this.config)
233
+ ? createMemoryBuilder(this.config, this.context)
234
+ : undefined;
235
+
236
+ // Build every agent declared in `config.agents` (or the built-in
237
+ // fallback when none are declared). Each agent's `model` resolves
238
+ // workspace URL + bearer at call time so concurrent requests get
239
+ // distinct user identities; the `asUser(req)` scope around
240
+ // `handleChat` is what lets `getExecutionContext()` return the
241
+ // right user inside the resolver.
242
+ this.built = await buildAgents({
243
+ config: this.config,
244
+ context: this.context,
245
+ memoryBuilder,
246
+ log: this.log,
247
+ });
248
+
249
+ // `mastra.server.apiRoutes` is only honored by Mastra's standalone
250
+ // dev server. Since we're hosting Mastra inside our own Express
251
+ // subapp via `@mastra/express`, custom routes must be passed to
252
+ // the `MastraServer` constructor directly.
253
+ this.mastra = new Mastra({ agents: this.built.agents });
254
+ this.mastraApp = express();
255
+ attachRoutePatchMiddleware(this.mastraApp);
256
+ this.mastraServer = new MastraServer(this.config, {
257
+ app: this.mastraApp,
258
+ mastra: this.mastra,
259
+ prefix: "",
260
+ customApiRoutes: [
261
+ chatRoute({ path: "/route/chat", agent: this.built.defaultAgentId }),
262
+ chatRoute({ path: "/route/chat/:agentId" }),
263
+ ],
264
+ });
265
+ await this.mastraServer.init();
266
+ }
267
+ }
268
+
269
+ export const mastra = toPlugin(MastraPlugin);
package/src/server.ts ADDED
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Express-layer plumbing for the Mastra plugin: a `MastraServer` that
3
+ * stamps the per-request `RequestContext`, and a route-patch middleware
4
+ * that lets `@mastra/ai-sdk` `chatRoute` work behind an Express mount
5
+ * point.
6
+ */
7
+
8
+ import { getExecutionContext } from "@databricks/appkit";
9
+ import { httpUtils, logUtils, stringUtils } from "@dbx-tools/appkit-shared";
10
+ import {
11
+ MASTRA_RESOURCE_ID_KEY,
12
+ MASTRA_THREAD_ID_KEY,
13
+ type RequestContext,
14
+ } from "@mastra/core/request-context";
15
+ import { MastraServer as MastraServerExpress } from "@mastra/express";
16
+ import type express from "express";
17
+ import { randomUUID } from "node:crypto";
18
+
19
+ import { MASTRA_USER_KEY, type MastraPluginConfig, type User } from "./config.js";
20
+ import {
21
+ extractModelOverride,
22
+ MASTRA_MODEL_OVERRIDE_KEY,
23
+ resolveServingConfig,
24
+ } from "./serving.js";
25
+
26
+ /**
27
+ * `@mastra/express` subclass that stamps `RequestContext` with the
28
+ * AppKit user, resource id, and a thread id backed by an HTTP-only
29
+ * session cookie (`appkit_<plugin-name>_session_id`).
30
+ */
31
+ export class MastraServer extends MastraServerExpress {
32
+ private log: logUtils.Logger;
33
+
34
+ constructor(
35
+ private config: MastraPluginConfig,
36
+ ...args: ConstructorParameters<typeof MastraServerExpress>
37
+ ) {
38
+ super(...args);
39
+ this.log = logUtils.logger(config);
40
+ }
41
+
42
+ override registerAuthMiddleware(): void {
43
+ super.registerAuthMiddleware();
44
+ this.app.use((req, res, next) => {
45
+ const executionContext = getExecutionContext();
46
+ const user: User = {
47
+ id:
48
+ "userId" in executionContext
49
+ ? executionContext.userId
50
+ : executionContext.serviceUserId,
51
+ executionContext,
52
+ };
53
+ const requestContext = res.locals.requestContext! as RequestContext;
54
+ requestContext.set(MASTRA_USER_KEY, user);
55
+ if (!requestContext.get(MASTRA_RESOURCE_ID_KEY)) {
56
+ this.log.debug(`Setting resource id: ${user.id}`);
57
+ requestContext.set(MASTRA_RESOURCE_ID_KEY, user.id);
58
+ }
59
+ const cookies = httpUtils.parseCookies(req.headers.cookie);
60
+ const cookieName = stringUtils.toIdentifierWithOptions(
61
+ { delimiter: "_", distinct: true },
62
+ "appkit",
63
+ this.config.name!,
64
+ "sessionId",
65
+ );
66
+ let sessionId = cookies[cookieName];
67
+ if (!sessionId) {
68
+ sessionId = randomUUID();
69
+ res.cookie(cookieName, sessionId, {
70
+ httpOnly: true,
71
+ sameSite: "lax",
72
+ secure: req.secure,
73
+ path: "/",
74
+ });
75
+ }
76
+ res.locals.sessionId = sessionId;
77
+ if (!requestContext.get(MASTRA_THREAD_ID_KEY)) {
78
+ this.log.debug(`Setting thread id: ${sessionId}`);
79
+ requestContext.set(MASTRA_THREAD_ID_KEY, sessionId);
80
+ }
81
+ // Per-request model override: only honored when the plugin
82
+ // opts in (default). Sources, in priority order, are
83
+ // `X-Mastra-Model` header, `?model=` query, and `model` /
84
+ // `modelId` body field; see `serving.ts`.
85
+ const serving = resolveServingConfig(this.config);
86
+ if (serving.allowOverride) {
87
+ const override = extractModelOverride({
88
+ headers: req.headers as Record<string, string | string[] | undefined>,
89
+ query: req.query as Record<string, unknown>,
90
+ body: req.body,
91
+ });
92
+ if (override) {
93
+ this.log.debug(`Model override: ${override}`);
94
+ requestContext.set(MASTRA_MODEL_OVERRIDE_KEY, override);
95
+ }
96
+ }
97
+ next();
98
+ });
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Patches around `@mastra/express`'s custom-route dispatcher so
104
+ * `chatRoute` works when `MastraServer` is hosted on an Express subapp
105
+ * mounted under a parent path (e.g. `/api/mastra`).
106
+ *
107
+ * Two concerns:
108
+ *
109
+ * 1. The adapter's `registerCustomApiRoutes` matches against `req.path`
110
+ * (mount-relative, correct) but dispatches to its internal Hono
111
+ * mini-app using `req.originalUrl`, which still contains the parent
112
+ * mount prefix. The Hono app registers the literal `chatRoute` paths
113
+ * (for example `/route/chat`), so the absolute URL never matches
114
+ * until we overwrite `originalUrl` for `/route` and `/route/*` to
115
+ * the mount-relative path.
116
+ *
117
+ * 2. `memory.resource` must be the authenticated user, not whatever the
118
+ * client posts. The custom-route forwarder re-serializes `req.body`
119
+ * into the Request body it hands Hono, so mutating the parsed body
120
+ * here would propagate into `handleChatStream`'s params (kept for
121
+ * future use; `express.json()` runs first so `req.body` is parsed).
122
+ */
123
+ export function attachRoutePatchMiddleware(app: express.Express): void {
124
+ app.use((req, _res, next) => {
125
+ const isChat = req.path === "/route" || req.path.startsWith("/route/");
126
+ if (!isChat) return next();
127
+ req.originalUrl = req.path;
128
+ next();
129
+ });
130
+ }
package/src/serving.ts ADDED
@@ -0,0 +1,294 @@
1
+ /**
2
+ * Dynamic model resolution against Databricks Model Serving.
3
+ *
4
+ * Three concerns live here:
5
+ *
6
+ * 1. **Listing** - {@link listServingEndpoints} pulls the workspace's
7
+ * `/serving-endpoints` via the SDK and caches the result per host
8
+ * with a TTL. Concurrent callers share one in-flight promise (the
9
+ * same coalescing pattern as Python's `cachetools-async`).
10
+ * 2. **Fuzzy matching** - {@link resolveModelId} runs the user's input
11
+ * through `fuse.js` extended search so loose tokens like
12
+ * `"claude sonnet"` snap to `databricks-claude-sonnet-4-6` even
13
+ * when typed without the full endpoint name.
14
+ * 3. **Per-request override** - {@link extractModelOverride} pulls a
15
+ * model name from the `X-Mastra-Model` header, `?model=` query
16
+ * string, or `model` body field so the same agent can be exercised
17
+ * against different endpoints without redeploying.
18
+ *
19
+ * `model.ts` glues these together inside the per-step model resolver;
20
+ * `plugin.ts` exposes the cached list at `GET /models`.
21
+ */
22
+
23
+ import { CacheManager, type getExecutionContext } from "@databricks/appkit";
24
+ import { stringUtils } from "@dbx-tools/appkit-shared";
25
+ import Fuse from "fuse.js";
26
+
27
+ import type { ServingEndpointSummary } from "@dbx-tools/appkit-mastra-shared";
28
+ import type { MastraPluginConfig } from "./config.js";
29
+
30
+ export type { ServingEndpointSummary };
31
+
32
+ /**
33
+ * Structural type for the Databricks workspace client. Derived from
34
+ * AppKit's `ExecutionContext` so this module doesn't take a direct
35
+ * dependency on `@databricks/sdk-experimental`; the dep flows in
36
+ * transitively through `@databricks/appkit`.
37
+ */
38
+ type WorkspaceClientLike = ReturnType<typeof getExecutionContext>["client"];
39
+
40
+ /**
41
+ * `RequestContext` key under which {@link MastraServer} stores the
42
+ * per-request model override (header / query / body). `model.ts`
43
+ * reads it before falling back to the agent / plugin default.
44
+ */
45
+ export const MASTRA_MODEL_OVERRIDE_KEY = "mastra__model_override";
46
+
47
+ /** HTTP header inspected for a per-request model override. */
48
+ export const MODEL_OVERRIDE_HEADER = "x-mastra-model";
49
+
50
+ /** Query string parameter inspected for a per-request model override. */
51
+ export const MODEL_OVERRIDE_QUERY = "model";
52
+
53
+ /** Body fields (in priority order) inspected for a per-request model override. */
54
+ export const MODEL_OVERRIDE_BODY_FIELDS = ["model", "modelId"] as const;
55
+
56
+ /** Default TTL for the in-memory endpoint cache. Matches the Databricks SDK's session lifetime budget. */
57
+ const DEFAULT_TTL_MS = 5 * 60 * 1000;
58
+
59
+ /** Default Fuse.js score threshold below which a fuzzy match is accepted. */
60
+ const DEFAULT_FUZZY_THRESHOLD = 0.4;
61
+
62
+ /** Cache key parts under which endpoint listings are stored. */
63
+ const CACHE_KEY_NAMESPACE = "mastra:serving-endpoints";
64
+
65
+ /**
66
+ * Stable `userKey` arg for AppKit's `CacheManager.getOrExecute`.
67
+ * Endpoint visibility is effectively workspace-scoped (we cache by
68
+ * host in the key parts), so a single shared key lets every user of
69
+ * the same workspace share one cached fetch and coalesce on the
70
+ * in-flight promise. Permissions can differ in theory, but the
71
+ * Foundation Model API catalogue is the same view for every caller.
72
+ */
73
+ const SHARED_USER_KEY = "mastra-shared";
74
+
75
+ /**
76
+ * List Model Serving endpoints for the workspace owning `client`,
77
+ * routed through AppKit's `CacheManager`. The manager gives us
78
+ * everything `cachetools.TTLCache` provides plus what
79
+ * `cachetools-async` adds on top: per-entry TTL, in-flight request
80
+ * coalescing (concurrent callers share one fetch via the manager's
81
+ * internal `inFlightRequests` map), bounded size, telemetry spans
82
+ * (`cache.getOrExecute`), and optional Lakebase persistence so the
83
+ * catalogue survives restarts when the lakebase plugin is wired up.
84
+ *
85
+ * Returns plain {@link ServingEndpointSummary} objects (a stable
86
+ * subset of the SDK type) so cache hits never expose stale SDK
87
+ * internals. Errors from `CacheManager` or the SDK fetch propagate
88
+ * to the caller - we don't swallow them so users see the real
89
+ * auth / network issue.
90
+ *
91
+ * @param host - Workspace host used as the cache key. Pass the value
92
+ * resolved from `client.config.getHost()` so multi-host apps share
93
+ * one entry per workspace.
94
+ * @param opts.ttlMs - Override the default TTL just for this call.
95
+ * Forwarded to `CacheManager` as seconds.
96
+ */
97
+ export async function listServingEndpoints(
98
+ client: WorkspaceClientLike,
99
+ host: string,
100
+ opts: { ttlMs?: number } = {},
101
+ ): Promise<ServingEndpointSummary[]> {
102
+ const ttlSec = Math.max(1, Math.round((opts.ttlMs ?? DEFAULT_TTL_MS) / 1000));
103
+ return CacheManager.getInstanceSync().getOrExecute(
104
+ [CACHE_KEY_NAMESPACE, host],
105
+ () => fetchEndpoints(client),
106
+ SHARED_USER_KEY,
107
+ { ttl: ttlSec },
108
+ );
109
+ }
110
+
111
+ async function fetchEndpoints(
112
+ client: WorkspaceClientLike,
113
+ ): Promise<ServingEndpointSummary[]> {
114
+ const out: ServingEndpointSummary[] = [];
115
+ for await (const ep of client.servingEndpoints.list()) {
116
+ if (!ep.name) continue;
117
+ out.push({
118
+ name: ep.name,
119
+ ...(ep.task !== undefined ? { task: ep.task } : {}),
120
+ ...(ep.state?.ready !== undefined ? { state: String(ep.state.ready) } : {}),
121
+ ...(ep.description !== undefined ? { description: ep.description } : {}),
122
+ });
123
+ }
124
+ return out;
125
+ }
126
+
127
+ /**
128
+ * Force-evict cached endpoint listings via AppKit's `CacheManager`.
129
+ * With a `host` deletes that one workspace's entry; without one
130
+ * clears every cache entry on the manager (since `CacheManager`
131
+ * doesn't expose a namespace-scoped clear, this is the brute-force
132
+ * path - fine for tests, avoid in steady-state code).
133
+ */
134
+ export async function clearServingEndpointsCache(host?: string): Promise<void> {
135
+ const cache = CacheManager.getInstanceSync();
136
+ if (host) {
137
+ const key = cache.generateKey([CACHE_KEY_NAMESPACE, host], SHARED_USER_KEY);
138
+ await cache.delete(key);
139
+ } else {
140
+ await cache.clear();
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Result of fuzzy-resolving a user-supplied model name against the
146
+ * live endpoint list. `score` is Fuse.js's distance (`0` is exact,
147
+ * `1` is no match); `matched` is `false` when the score exceeds the
148
+ * configured threshold so callers can fall back to the original
149
+ * input (Databricks will then return a clean 404).
150
+ */
151
+ export interface ResolvedModel {
152
+ modelId: string;
153
+ matched: boolean;
154
+ score?: number;
155
+ }
156
+
157
+ /** Options accepted by {@link resolveModelId}. */
158
+ export interface ResolveModelOptions {
159
+ /** Fuse.js threshold (0 = exact, 1 = anything). Default `0.4`. */
160
+ threshold?: number;
161
+ }
162
+
163
+ /**
164
+ * Snap a user-supplied model name to the closest configured serving
165
+ * endpoint:
166
+ *
167
+ * 1. Exact name match wins immediately (no fuzzy needed).
168
+ * 2. Otherwise the input is tokenized (dashes / underscores / spaces
169
+ * become separators) and fed through Fuse.js extended search,
170
+ * which AND-s each token with fuzzy matching enabled. This is the
171
+ * "tokenized fuzzy match" the user reaches for when they type
172
+ * `"claude sonnet"` instead of the full endpoint name.
173
+ * 3. If the best Fuse score is above `threshold`, return the input
174
+ * unchanged and let the upstream call surface the 404. This keeps
175
+ * deliberate model ids (e.g. brand new endpoints) from being
176
+ * silently rewritten to a similar-looking neighbour.
177
+ *
178
+ * Pass an empty endpoint list to short-circuit fuzzy matching - the
179
+ * input is returned verbatim. This is what {@link buildModel} does
180
+ * when the workspace client can't be reached at resolve time.
181
+ */
182
+ export function resolveModelId(
183
+ input: string,
184
+ endpoints: readonly ServingEndpointSummary[],
185
+ opts: ResolveModelOptions = {},
186
+ ): ResolvedModel {
187
+ if (endpoints.length === 0) {
188
+ return { modelId: input, matched: false };
189
+ }
190
+ for (const ep of endpoints) {
191
+ if (ep.name === input) {
192
+ return { modelId: ep.name, matched: true, score: 0 };
193
+ }
194
+ }
195
+ const threshold = opts.threshold ?? DEFAULT_FUZZY_THRESHOLD;
196
+ const fuse = new Fuse(endpoints, {
197
+ keys: ["name"],
198
+ threshold,
199
+ ignoreLocation: true,
200
+ includeScore: true,
201
+ useExtendedSearch: true,
202
+ isCaseSensitive: false,
203
+ });
204
+ // Fuse 7.3 has no built-in tokenize hook; in extended search,
205
+ // space-separated tokens are AND-ed with fuzzy matching enabled. We
206
+ // lean on the shared tokenizer so the splitting rules stay
207
+ // consistent with the rest of the toolkit.
208
+ const query = Array.from(
209
+ stringUtils.tokenizeWithOptions({ lowerCase: true, camelCase: false }, input),
210
+ ).join(" ");
211
+ if (!query) return { modelId: input, matched: false };
212
+ const results = fuse.search(query);
213
+ const best = results[0];
214
+ if (best?.item.name && (best.score ?? 0) <= threshold) {
215
+ return { modelId: best.item.name, matched: true, score: best.score };
216
+ }
217
+ return { modelId: input, matched: false };
218
+ }
219
+
220
+ /**
221
+ * Minimal Express-ish request shape used by {@link extractModelOverride}.
222
+ * Keeps this module independent of `express` so the helper can be
223
+ * reused from non-Express adapters.
224
+ */
225
+ export interface ModelOverrideRequest {
226
+ headers?: Record<string, string | string[] | undefined>;
227
+ query?: Record<string, unknown> | undefined;
228
+ body?: unknown;
229
+ }
230
+
231
+ /**
232
+ * Pull a model override out of a single HTTP request, checking
233
+ * sources in priority order:
234
+ *
235
+ * 1. `X-Mastra-Model` header
236
+ * 2. `?model=` query string parameter
237
+ * 3. Body field (`model` or `modelId`, in that order)
238
+ *
239
+ * Returns `null` when nothing is set, so callers can wrap with
240
+ * `if (override) ...` without juggling empty strings. Body inspection
241
+ * is lenient - any plain object with one of the configured keys
242
+ * counts, mirroring how AI SDK chat clients pass arbitrary metadata
243
+ * alongside `messages`.
244
+ */
245
+ export function extractModelOverride(req: ModelOverrideRequest): string | null {
246
+ const headers = req.headers;
247
+ if (headers) {
248
+ const headerVal = stringUtils.firstNonEmpty(
249
+ headers[MODEL_OVERRIDE_HEADER] ?? headers[MODEL_OVERRIDE_HEADER.toLowerCase()],
250
+ );
251
+ if (headerVal) return headerVal;
252
+ }
253
+ if (req.query) {
254
+ const queryVal = stringUtils.firstNonEmpty(req.query[MODEL_OVERRIDE_QUERY]);
255
+ if (queryVal) return queryVal;
256
+ }
257
+ if (req.body && typeof req.body === "object") {
258
+ const record = req.body as Record<string, unknown>;
259
+ for (const field of MODEL_OVERRIDE_BODY_FIELDS) {
260
+ const bodyVal = stringUtils.firstNonEmpty(record[field]);
261
+ if (bodyVal) return bodyVal;
262
+ }
263
+ }
264
+ return null;
265
+ }
266
+
267
+ /**
268
+ * Read the fuzzy-resolution config knobs off the plugin config with
269
+ * defaults applied. Kept here so `buildModel` and the `/models` route
270
+ * agree on what "enabled" means.
271
+ *
272
+ * `fallbacks` is the priority-ordered list `pickModelId` walks when
273
+ * nothing explicit is set; defaults live in `model.ts`
274
+ * (`FALLBACK_MODEL_IDS`) and are passed in by callers to avoid a
275
+ * circular import between `serving.ts` and `model.ts`.
276
+ */
277
+ export function resolveServingConfig(
278
+ config: MastraPluginConfig,
279
+ defaultFallbacks: readonly string[] = [],
280
+ ): {
281
+ ttlMs: number;
282
+ threshold: number;
283
+ fuzzy: boolean;
284
+ allowOverride: boolean;
285
+ fallbacks: readonly string[];
286
+ } {
287
+ return {
288
+ ttlMs: config.modelCacheTtlMs ?? DEFAULT_TTL_MS,
289
+ threshold: config.modelFuzzyThreshold ?? DEFAULT_FUZZY_THRESHOLD,
290
+ fuzzy: config.modelFuzzyMatch !== false,
291
+ allowOverride: config.modelOverride !== false,
292
+ fallbacks: config.defaultModelFallbacks ?? defaultFallbacks,
293
+ };
294
+ }