@dbx-tools/appkit-mastra 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +593 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +18 -0
- package/dist/src/agents.d.ts +306 -0
- package/dist/src/agents.js +379 -0
- package/dist/src/config.d.ts +170 -0
- package/dist/src/config.js +12 -0
- package/dist/src/genie.d.ts +109 -0
- package/dist/src/genie.js +271 -0
- package/dist/src/memory.d.ts +79 -0
- package/dist/src/memory.js +197 -0
- package/dist/src/model.d.ts +159 -0
- package/dist/src/model.js +423 -0
- package/dist/src/plugin.d.ts +120 -0
- package/dist/src/plugin.js +235 -0
- package/dist/src/server.d.ts +42 -0
- package/dist/src/server.js +109 -0
- package/dist/src/serving.d.ts +156 -0
- package/dist/src/serving.js +214 -0
- package/index.ts +36 -0
- package/package.json +55 -0
- package/src/agents.ts +675 -0
- package/src/config.ts +179 -0
- package/src/genie.ts +354 -0
- package/src/memory.ts +245 -0
- package/src/model.ts +491 -0
- package/src/plugin.ts +269 -0
- package/src/server.ts +130 -0
- package/src/serving.ts +294 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AppKit plugin that builds one or more Mastra `Agent` instances and
|
|
3
|
+
* mounts the `@mastra/express` server plus `@mastra/ai-sdk` `chatRoute`
|
|
4
|
+
* handlers. The UI message stream matches what `chatRoute()` emits, so
|
|
5
|
+
* the client can use `useChat()` from `@ai-sdk/react` without custom
|
|
6
|
+
* parsing.
|
|
7
|
+
*
|
|
8
|
+
* - Agents: registered through `config.agents` at plugin creation
|
|
9
|
+
* ({@link MastraAgentDefinition}). Each entry's `tools` field accepts
|
|
10
|
+
* either a plain record or a `(plugins) => tools` callback that gets
|
|
11
|
+
* a typed sibling-plugin index ({@link MastraPlugins}). Omit
|
|
12
|
+
* `config.agents` to get a single built-in `default` analyst.
|
|
13
|
+
* - Model: each agent call resolves a `MastraModelConfig` via
|
|
14
|
+
* {@link buildModel} from `./model.js`. Per-agent `model` overrides
|
|
15
|
+
* (`AgentConfig["model"]` or a `modelId` string) flow through
|
|
16
|
+
* {@link buildAgents}.
|
|
17
|
+
* - Memory / storage: per-agent, built by {@link createMemoryBuilder}
|
|
18
|
+
* from `./memory.js`. Both auto-default to `true` when the
|
|
19
|
+
* `lakebase` plugin is registered (unless the caller passed
|
|
20
|
+
* `false` or a custom config). Storage namespaces per agent via
|
|
21
|
+
* `schemaName: "mastra_<agentId>"`; the vector store is a single
|
|
22
|
+
* shared singleton across every agent.
|
|
23
|
+
* - Server: the Express subapp wiring lives in `./server.js`.
|
|
24
|
+
* - HTTP: AppKit mounts this plugin under `/api/mastra`. `chatRoute`
|
|
25
|
+
* is registered at `/route/chat` (bound to `config.defaultAgent` or
|
|
26
|
+
* the first registered id) and `/route/chat/:agentId`, so the
|
|
27
|
+
* AI SDK transport URL is `/api/mastra/route/chat/<agentId>`.
|
|
28
|
+
*/
|
|
29
|
+
import { genie, getExecutionContext, lakebase, Plugin, toPlugin, } from "@databricks/appkit";
|
|
30
|
+
import { logUtils, pluginUtils } from "@dbx-tools/appkit-shared";
|
|
31
|
+
import { chatRoute } from "@mastra/ai-sdk";
|
|
32
|
+
import { Mastra } from "@mastra/core/mastra";
|
|
33
|
+
import express from "express";
|
|
34
|
+
import { buildAgents, FALLBACK_AGENT_ID } from "./agents.js";
|
|
35
|
+
import { createMemoryBuilder, needsLakebase } from "./memory.js";
|
|
36
|
+
import { attachRoutePatchMiddleware, MastraServer } from "./server.js";
|
|
37
|
+
import { clearServingEndpointsCache, listServingEndpoints, resolveServingConfig, } from "./serving.js";
|
|
38
|
+
const GENIE_MANIFEST = pluginUtils.data(genie).plugin.manifest;
|
|
39
|
+
const LAKEBASE_MANIFEST = pluginUtils.data(lakebase).plugin.manifest;
|
|
40
|
+
/**
|
|
41
|
+
* AppKit plugin (registered name: `mastra`) that hosts Mastra agents
|
|
42
|
+
* with optional Lakebase-backed memory and AI SDK chat routes under
|
|
43
|
+
* the plugin mount (typically `/api/mastra`).
|
|
44
|
+
*/
|
|
45
|
+
export class MastraPlugin extends Plugin {
|
|
46
|
+
static manifest = {
|
|
47
|
+
name: "mastra",
|
|
48
|
+
displayName: "Mastra",
|
|
49
|
+
description: "Builds a Mastra Agent with user-scoped workspace auth (asUser) " +
|
|
50
|
+
"and optional Postgres-backed Mastra Memory via the `lakebase` plugin.",
|
|
51
|
+
stability: "beta",
|
|
52
|
+
resources: {
|
|
53
|
+
required: [],
|
|
54
|
+
optional: [
|
|
55
|
+
...GENIE_MANIFEST.resources.required,
|
|
56
|
+
...LAKEBASE_MANIFEST.resources.required,
|
|
57
|
+
],
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
/**
|
|
61
|
+
* Tighten resource requirements based on which features are enabled.
|
|
62
|
+
* AppKit calls this at registration time (config-aware) so disabled
|
|
63
|
+
* features don't surface their resource asks to the host app.
|
|
64
|
+
*/
|
|
65
|
+
static getResourceRequirements(config) {
|
|
66
|
+
const resources = [];
|
|
67
|
+
const enabledManifests = [];
|
|
68
|
+
if (needsLakebase(config)) {
|
|
69
|
+
enabledManifests.push(LAKEBASE_MANIFEST);
|
|
70
|
+
}
|
|
71
|
+
for (const m of enabledManifests) {
|
|
72
|
+
for (const resource of m.resources.required) {
|
|
73
|
+
resources.push({ ...resource, required: true });
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return resources;
|
|
77
|
+
}
|
|
78
|
+
log = logUtils.logger(this);
|
|
79
|
+
built = null;
|
|
80
|
+
mastra = null;
|
|
81
|
+
mastraApp = null;
|
|
82
|
+
mastraServer = null;
|
|
83
|
+
async setup() {
|
|
84
|
+
// Wait until sibling plugins (e.g. `lakebase`) finish `setup()` so
|
|
85
|
+
// the lakebase pool is valid when storage/memory are enabled.
|
|
86
|
+
this.context?.onLifecycle("setup:complete", async () => {
|
|
87
|
+
this.applyLakebaseAutoDefaults();
|
|
88
|
+
this.log.info("setup:complete");
|
|
89
|
+
await this.buildAgentAndServer();
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* When the `lakebase` plugin is registered, auto-enable `storage`
|
|
94
|
+
* and `memory` unless the caller opted out explicitly (`false` or a
|
|
95
|
+
* custom config object). Run after `setup:complete` so the lookup
|
|
96
|
+
* is reliable: any plugin that registers itself synchronously is
|
|
97
|
+
* already in the registry by the time this fires.
|
|
98
|
+
*/
|
|
99
|
+
applyLakebaseAutoDefaults() {
|
|
100
|
+
const hasLakebase = pluginUtils.instance(this.context, lakebase) !== undefined;
|
|
101
|
+
if (!hasLakebase)
|
|
102
|
+
return;
|
|
103
|
+
if (this.config.storage === undefined)
|
|
104
|
+
this.config.storage = true;
|
|
105
|
+
if (this.config.memory === undefined)
|
|
106
|
+
this.config.memory = true;
|
|
107
|
+
}
|
|
108
|
+
exports() {
|
|
109
|
+
return {
|
|
110
|
+
/**
|
|
111
|
+
* Ids of every registered agent in registration order. Matches
|
|
112
|
+
* AppKit `agents.list()` so callers can iterate the registry the
|
|
113
|
+
* same way under both plugins.
|
|
114
|
+
*/
|
|
115
|
+
list: () => Object.keys(this.built?.agents ?? {}),
|
|
116
|
+
/**
|
|
117
|
+
* Look up a registered agent by id. Returns `null` (not
|
|
118
|
+
* undefined) when unknown so call sites can early-return without
|
|
119
|
+
* a separate `in` check.
|
|
120
|
+
*/
|
|
121
|
+
get: (id) => this.built?.agents[id] ?? null,
|
|
122
|
+
/**
|
|
123
|
+
* The agent `chatRoute` binds to when the client doesn't name
|
|
124
|
+
* one. Resolves to `config.defaultAgent`, the first registered
|
|
125
|
+
* id, or the built-in `default` fallback.
|
|
126
|
+
*/
|
|
127
|
+
getDefault: () => (this.built && this.built.agents[this.built.defaultAgentId]) ?? null,
|
|
128
|
+
/** Underlying Mastra instance for advanced use (custom routes etc.). */
|
|
129
|
+
getMastra: () => this.mastra,
|
|
130
|
+
/** Express subapp Mastra is mounted on; mostly for tests. */
|
|
131
|
+
getMastraServer: () => this.mastraServer,
|
|
132
|
+
/**
|
|
133
|
+
* Fetch the workspace's Model Serving endpoints (cached). Same
|
|
134
|
+
* payload the `GET /models` route returns; surfaced here so
|
|
135
|
+
* other plugins / scripts can introspect the catalogue without
|
|
136
|
+
* an HTTP round-trip. AppKit wraps this with `asUser(req)` for
|
|
137
|
+
* OBO scoping automatically.
|
|
138
|
+
*/
|
|
139
|
+
listModels: () => this.listModels(),
|
|
140
|
+
/**
|
|
141
|
+
* Force-evict cached endpoint listings via AppKit's
|
|
142
|
+
* `CacheManager`. Useful in tests or right after an admin
|
|
143
|
+
* deploys a new endpoint and doesn't want to wait for the TTL.
|
|
144
|
+
* Returns the underlying `CacheManager.delete`/`clear` promise.
|
|
145
|
+
*/
|
|
146
|
+
clearModelsCache: (host) => clearServingEndpointsCache(host),
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
clientConfig() {
|
|
150
|
+
// AppKit mounts every plugin at `/api/<plugin.name>`. `this.name`
|
|
151
|
+
// honors `config.name` overrides, so the published paths stay
|
|
152
|
+
// accurate if someone remounts the plugin under a custom id.
|
|
153
|
+
// Return widens to `Record<string, unknown>` to satisfy the
|
|
154
|
+
// base-class signature; consumers read it through the typed
|
|
155
|
+
// `MastraClientConfig` shape via `usePluginClientConfig<...>(...)`.
|
|
156
|
+
const basePath = `/api/${this.name}`;
|
|
157
|
+
const config = {
|
|
158
|
+
basePath,
|
|
159
|
+
chatPath: `${basePath}/route/chat`,
|
|
160
|
+
chatPathTemplate: `${basePath}/route/chat/:agentId`,
|
|
161
|
+
modelsPath: `${basePath}/models`,
|
|
162
|
+
defaultAgent: this.built?.defaultAgentId ?? FALLBACK_AGENT_ID,
|
|
163
|
+
agents: Object.keys(this.built?.agents ?? {}),
|
|
164
|
+
};
|
|
165
|
+
return config;
|
|
166
|
+
}
|
|
167
|
+
injectRoutes(router) {
|
|
168
|
+
// `GET /models` exposes the cached endpoint list so clients can
|
|
169
|
+
// populate model pickers, validate `?model=` choices, etc. Must
|
|
170
|
+
// be registered before the catch-all that forwards everything to
|
|
171
|
+
// the Mastra subapp. Errors propagate to Express's default error
|
|
172
|
+
// handler via `next(err)` so callers see the real SDK message.
|
|
173
|
+
router.get("/models", (req, res, next) => {
|
|
174
|
+
this.asUser(req)
|
|
175
|
+
.listModels()
|
|
176
|
+
.then((endpoints) => res.json({ endpoints }))
|
|
177
|
+
.catch(next);
|
|
178
|
+
});
|
|
179
|
+
router.use("", (req, res, next) => {
|
|
180
|
+
if (!this.mastraApp)
|
|
181
|
+
return res.status(503).end();
|
|
182
|
+
return this.asUser(req).mastraApp(req, res, next);
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Implementation backing both the `/models` route and the
|
|
187
|
+
* `listModels` export. Runs inside the AppKit user-context proxy so
|
|
188
|
+
* `getExecutionContext()` returns the OBO-scoped client.
|
|
189
|
+
*/
|
|
190
|
+
async listModels() {
|
|
191
|
+
const client = getExecutionContext().client;
|
|
192
|
+
const host = (await client.config.getHost()).toString();
|
|
193
|
+
const serving = resolveServingConfig(this.config);
|
|
194
|
+
return listServingEndpoints(client, host, { ttlMs: serving.ttlMs });
|
|
195
|
+
}
|
|
196
|
+
async buildAgentAndServer() {
|
|
197
|
+
// Per-agent memory factory. The builder resolves the Lakebase pool
|
|
198
|
+
// lazily (on first agent that actually needs storage / vector) and
|
|
199
|
+
// caches both the pool and the shared `PgVector` singleton so
|
|
200
|
+
// registering N agents stays cheap. See `./memory.js`.
|
|
201
|
+
const memoryBuilder = needsLakebase(this.config)
|
|
202
|
+
? createMemoryBuilder(this.config, this.context)
|
|
203
|
+
: undefined;
|
|
204
|
+
// Build every agent declared in `config.agents` (or the built-in
|
|
205
|
+
// fallback when none are declared). Each agent's `model` resolves
|
|
206
|
+
// workspace URL + bearer at call time so concurrent requests get
|
|
207
|
+
// distinct user identities; the `asUser(req)` scope around
|
|
208
|
+
// `handleChat` is what lets `getExecutionContext()` return the
|
|
209
|
+
// right user inside the resolver.
|
|
210
|
+
this.built = await buildAgents({
|
|
211
|
+
config: this.config,
|
|
212
|
+
context: this.context,
|
|
213
|
+
memoryBuilder,
|
|
214
|
+
log: this.log,
|
|
215
|
+
});
|
|
216
|
+
// `mastra.server.apiRoutes` is only honored by Mastra's standalone
|
|
217
|
+
// dev server. Since we're hosting Mastra inside our own Express
|
|
218
|
+
// subapp via `@mastra/express`, custom routes must be passed to
|
|
219
|
+
// the `MastraServer` constructor directly.
|
|
220
|
+
this.mastra = new Mastra({ agents: this.built.agents });
|
|
221
|
+
this.mastraApp = express();
|
|
222
|
+
attachRoutePatchMiddleware(this.mastraApp);
|
|
223
|
+
this.mastraServer = new MastraServer(this.config, {
|
|
224
|
+
app: this.mastraApp,
|
|
225
|
+
mastra: this.mastra,
|
|
226
|
+
prefix: "",
|
|
227
|
+
customApiRoutes: [
|
|
228
|
+
chatRoute({ path: "/route/chat", agent: this.built.defaultAgentId }),
|
|
229
|
+
chatRoute({ path: "/route/chat/:agentId" }),
|
|
230
|
+
],
|
|
231
|
+
});
|
|
232
|
+
await this.mastraServer.init();
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
export const mastra = toPlugin(MastraPlugin);
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Express-layer plumbing for the Mastra plugin: a `MastraServer` that
|
|
3
|
+
* stamps the per-request `RequestContext`, and a route-patch middleware
|
|
4
|
+
* that lets `@mastra/ai-sdk` `chatRoute` work behind an Express mount
|
|
5
|
+
* point.
|
|
6
|
+
*/
|
|
7
|
+
import { MastraServer as MastraServerExpress } from "@mastra/express";
|
|
8
|
+
import type express from "express";
|
|
9
|
+
import { type MastraPluginConfig } from "./config.js";
|
|
10
|
+
/**
|
|
11
|
+
* `@mastra/express` subclass that stamps `RequestContext` with the
|
|
12
|
+
* AppKit user, resource id, and a thread id backed by an HTTP-only
|
|
13
|
+
* session cookie (`appkit_<plugin-name>_session_id`).
|
|
14
|
+
*/
|
|
15
|
+
export declare class MastraServer extends MastraServerExpress {
|
|
16
|
+
private config;
|
|
17
|
+
private log;
|
|
18
|
+
constructor(config: MastraPluginConfig, ...args: ConstructorParameters<typeof MastraServerExpress>);
|
|
19
|
+
registerAuthMiddleware(): void;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Patches around `@mastra/express`'s custom-route dispatcher so
|
|
23
|
+
* `chatRoute` works when `MastraServer` is hosted on an Express subapp
|
|
24
|
+
* mounted under a parent path (e.g. `/api/mastra`).
|
|
25
|
+
*
|
|
26
|
+
* Two concerns:
|
|
27
|
+
*
|
|
28
|
+
* 1. The adapter's `registerCustomApiRoutes` matches against `req.path`
|
|
29
|
+
* (mount-relative, correct) but dispatches to its internal Hono
|
|
30
|
+
* mini-app using `req.originalUrl`, which still contains the parent
|
|
31
|
+
* mount prefix. The Hono app registers the literal `chatRoute` paths
|
|
32
|
+
* (for example `/route/chat`), so the absolute URL never matches
|
|
33
|
+
* until we overwrite `originalUrl` for `/route` and `/route/*` to
|
|
34
|
+
* the mount-relative path.
|
|
35
|
+
*
|
|
36
|
+
* 2. `memory.resource` must be the authenticated user, not whatever the
|
|
37
|
+
* client posts. The custom-route forwarder re-serializes `req.body`
|
|
38
|
+
* into the Request body it hands Hono, so mutating the parsed body
|
|
39
|
+
* here would propagate into `handleChatStream`'s params (kept for
|
|
40
|
+
* future use; `express.json()` runs first so `req.body` is parsed).
|
|
41
|
+
*/
|
|
42
|
+
export declare function attachRoutePatchMiddleware(app: express.Express): void;
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Express-layer plumbing for the Mastra plugin: a `MastraServer` that
|
|
3
|
+
* stamps the per-request `RequestContext`, and a route-patch middleware
|
|
4
|
+
* that lets `@mastra/ai-sdk` `chatRoute` work behind an Express mount
|
|
5
|
+
* point.
|
|
6
|
+
*/
|
|
7
|
+
import { getExecutionContext } from "@databricks/appkit";
|
|
8
|
+
import { httpUtils, logUtils, stringUtils } from "@dbx-tools/appkit-shared";
|
|
9
|
+
import { MASTRA_RESOURCE_ID_KEY, MASTRA_THREAD_ID_KEY, } from "@mastra/core/request-context";
|
|
10
|
+
import { MastraServer as MastraServerExpress } from "@mastra/express";
|
|
11
|
+
import { randomUUID } from "node:crypto";
|
|
12
|
+
import { MASTRA_USER_KEY } from "./config.js";
|
|
13
|
+
import { extractModelOverride, MASTRA_MODEL_OVERRIDE_KEY, resolveServingConfig, } from "./serving.js";
|
|
14
|
+
/**
|
|
15
|
+
* `@mastra/express` subclass that stamps `RequestContext` with the
|
|
16
|
+
* AppKit user, resource id, and a thread id backed by an HTTP-only
|
|
17
|
+
* session cookie (`appkit_<plugin-name>_session_id`).
|
|
18
|
+
*/
|
|
19
|
+
export class MastraServer extends MastraServerExpress {
|
|
20
|
+
config;
|
|
21
|
+
log;
|
|
22
|
+
constructor(config, ...args) {
|
|
23
|
+
super(...args);
|
|
24
|
+
this.config = config;
|
|
25
|
+
this.log = logUtils.logger(config);
|
|
26
|
+
}
|
|
27
|
+
registerAuthMiddleware() {
|
|
28
|
+
super.registerAuthMiddleware();
|
|
29
|
+
this.app.use((req, res, next) => {
|
|
30
|
+
const executionContext = getExecutionContext();
|
|
31
|
+
const user = {
|
|
32
|
+
id: "userId" in executionContext
|
|
33
|
+
? executionContext.userId
|
|
34
|
+
: executionContext.serviceUserId,
|
|
35
|
+
executionContext,
|
|
36
|
+
};
|
|
37
|
+
const requestContext = res.locals.requestContext;
|
|
38
|
+
requestContext.set(MASTRA_USER_KEY, user);
|
|
39
|
+
if (!requestContext.get(MASTRA_RESOURCE_ID_KEY)) {
|
|
40
|
+
this.log.debug(`Setting resource id: ${user.id}`);
|
|
41
|
+
requestContext.set(MASTRA_RESOURCE_ID_KEY, user.id);
|
|
42
|
+
}
|
|
43
|
+
const cookies = httpUtils.parseCookies(req.headers.cookie);
|
|
44
|
+
const cookieName = stringUtils.toIdentifierWithOptions({ delimiter: "_", distinct: true }, "appkit", this.config.name, "sessionId");
|
|
45
|
+
let sessionId = cookies[cookieName];
|
|
46
|
+
if (!sessionId) {
|
|
47
|
+
sessionId = randomUUID();
|
|
48
|
+
res.cookie(cookieName, sessionId, {
|
|
49
|
+
httpOnly: true,
|
|
50
|
+
sameSite: "lax",
|
|
51
|
+
secure: req.secure,
|
|
52
|
+
path: "/",
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
res.locals.sessionId = sessionId;
|
|
56
|
+
if (!requestContext.get(MASTRA_THREAD_ID_KEY)) {
|
|
57
|
+
this.log.debug(`Setting thread id: ${sessionId}`);
|
|
58
|
+
requestContext.set(MASTRA_THREAD_ID_KEY, sessionId);
|
|
59
|
+
}
|
|
60
|
+
// Per-request model override: only honored when the plugin
|
|
61
|
+
// opts in (default). Sources, in priority order, are
|
|
62
|
+
// `X-Mastra-Model` header, `?model=` query, and `model` /
|
|
63
|
+
// `modelId` body field; see `serving.ts`.
|
|
64
|
+
const serving = resolveServingConfig(this.config);
|
|
65
|
+
if (serving.allowOverride) {
|
|
66
|
+
const override = extractModelOverride({
|
|
67
|
+
headers: req.headers,
|
|
68
|
+
query: req.query,
|
|
69
|
+
body: req.body,
|
|
70
|
+
});
|
|
71
|
+
if (override) {
|
|
72
|
+
this.log.debug(`Model override: ${override}`);
|
|
73
|
+
requestContext.set(MASTRA_MODEL_OVERRIDE_KEY, override);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
next();
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Patches around `@mastra/express`'s custom-route dispatcher so
|
|
82
|
+
* `chatRoute` works when `MastraServer` is hosted on an Express subapp
|
|
83
|
+
* mounted under a parent path (e.g. `/api/mastra`).
|
|
84
|
+
*
|
|
85
|
+
* Two concerns:
|
|
86
|
+
*
|
|
87
|
+
* 1. The adapter's `registerCustomApiRoutes` matches against `req.path`
|
|
88
|
+
* (mount-relative, correct) but dispatches to its internal Hono
|
|
89
|
+
* mini-app using `req.originalUrl`, which still contains the parent
|
|
90
|
+
* mount prefix. The Hono app registers the literal `chatRoute` paths
|
|
91
|
+
* (for example `/route/chat`), so the absolute URL never matches
|
|
92
|
+
* until we overwrite `originalUrl` for `/route` and `/route/*` to
|
|
93
|
+
* the mount-relative path.
|
|
94
|
+
*
|
|
95
|
+
* 2. `memory.resource` must be the authenticated user, not whatever the
|
|
96
|
+
* client posts. The custom-route forwarder re-serializes `req.body`
|
|
97
|
+
* into the Request body it hands Hono, so mutating the parsed body
|
|
98
|
+
* here would propagate into `handleChatStream`'s params (kept for
|
|
99
|
+
* future use; `express.json()` runs first so `req.body` is parsed).
|
|
100
|
+
*/
|
|
101
|
+
export function attachRoutePatchMiddleware(app) {
|
|
102
|
+
app.use((req, _res, next) => {
|
|
103
|
+
const isChat = req.path === "/route" || req.path.startsWith("/route/");
|
|
104
|
+
if (!isChat)
|
|
105
|
+
return next();
|
|
106
|
+
req.originalUrl = req.path;
|
|
107
|
+
next();
|
|
108
|
+
});
|
|
109
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dynamic model resolution against Databricks Model Serving.
|
|
3
|
+
*
|
|
4
|
+
* Three concerns live here:
|
|
5
|
+
*
|
|
6
|
+
* 1. **Listing** - {@link listServingEndpoints} pulls the workspace's
|
|
7
|
+
* `/serving-endpoints` via the SDK and caches the result per host
|
|
8
|
+
* with a TTL. Concurrent callers share one in-flight promise (the
|
|
9
|
+
* same coalescing pattern as Python's `cachetools-async`).
|
|
10
|
+
* 2. **Fuzzy matching** - {@link resolveModelId} runs the user's input
|
|
11
|
+
* through `fuse.js` extended search so loose tokens like
|
|
12
|
+
* `"claude sonnet"` snap to `databricks-claude-sonnet-4-6` even
|
|
13
|
+
* when typed without the full endpoint name.
|
|
14
|
+
* 3. **Per-request override** - {@link extractModelOverride} pulls a
|
|
15
|
+
* model name from the `X-Mastra-Model` header, `?model=` query
|
|
16
|
+
* string, or `model` body field so the same agent can be exercised
|
|
17
|
+
* against different endpoints without redeploying.
|
|
18
|
+
*
|
|
19
|
+
* `model.ts` glues these together inside the per-step model resolver;
|
|
20
|
+
* `plugin.ts` exposes the cached list at `GET /models`.
|
|
21
|
+
*/
|
|
22
|
+
import { type getExecutionContext } from "@databricks/appkit";
|
|
23
|
+
import type { ServingEndpointSummary } from "@dbx-tools/appkit-mastra-shared";
|
|
24
|
+
import type { MastraPluginConfig } from "./config.js";
|
|
25
|
+
export type { ServingEndpointSummary };
|
|
26
|
+
/**
|
|
27
|
+
* Structural type for the Databricks workspace client. Derived from
|
|
28
|
+
* AppKit's `ExecutionContext` so this module doesn't take a direct
|
|
29
|
+
* dependency on `@databricks/sdk-experimental`; the dep flows in
|
|
30
|
+
* transitively through `@databricks/appkit`.
|
|
31
|
+
*/
|
|
32
|
+
type WorkspaceClientLike = ReturnType<typeof getExecutionContext>["client"];
|
|
33
|
+
/**
|
|
34
|
+
* `RequestContext` key under which {@link MastraServer} stores the
|
|
35
|
+
* per-request model override (header / query / body). `model.ts`
|
|
36
|
+
* reads it before falling back to the agent / plugin default.
|
|
37
|
+
*/
|
|
38
|
+
export declare const MASTRA_MODEL_OVERRIDE_KEY = "mastra__model_override";
|
|
39
|
+
/** HTTP header inspected for a per-request model override. */
|
|
40
|
+
export declare const MODEL_OVERRIDE_HEADER = "x-mastra-model";
|
|
41
|
+
/** Query string parameter inspected for a per-request model override. */
|
|
42
|
+
export declare const MODEL_OVERRIDE_QUERY = "model";
|
|
43
|
+
/** Body fields (in priority order) inspected for a per-request model override. */
|
|
44
|
+
export declare const MODEL_OVERRIDE_BODY_FIELDS: readonly ["model", "modelId"];
|
|
45
|
+
/**
|
|
46
|
+
* List Model Serving endpoints for the workspace owning `client`,
|
|
47
|
+
* routed through AppKit's `CacheManager`. The manager gives us
|
|
48
|
+
* everything `cachetools.TTLCache` provides plus what
|
|
49
|
+
* `cachetools-async` adds on top: per-entry TTL, in-flight request
|
|
50
|
+
* coalescing (concurrent callers share one fetch via the manager's
|
|
51
|
+
* internal `inFlightRequests` map), bounded size, telemetry spans
|
|
52
|
+
* (`cache.getOrExecute`), and optional Lakebase persistence so the
|
|
53
|
+
* catalogue survives restarts when the lakebase plugin is wired up.
|
|
54
|
+
*
|
|
55
|
+
* Returns plain {@link ServingEndpointSummary} objects (a stable
|
|
56
|
+
* subset of the SDK type) so cache hits never expose stale SDK
|
|
57
|
+
* internals. Errors from `CacheManager` or the SDK fetch propagate
|
|
58
|
+
* to the caller - we don't swallow them so users see the real
|
|
59
|
+
* auth / network issue.
|
|
60
|
+
*
|
|
61
|
+
* @param host - Workspace host used as the cache key. Pass the value
|
|
62
|
+
* resolved from `client.config.getHost()` so multi-host apps share
|
|
63
|
+
* one entry per workspace.
|
|
64
|
+
* @param opts.ttlMs - Override the default TTL just for this call.
|
|
65
|
+
* Forwarded to `CacheManager` as seconds.
|
|
66
|
+
*/
|
|
67
|
+
export declare function listServingEndpoints(client: WorkspaceClientLike, host: string, opts?: {
|
|
68
|
+
ttlMs?: number;
|
|
69
|
+
}): Promise<ServingEndpointSummary[]>;
|
|
70
|
+
/**
|
|
71
|
+
* Force-evict cached endpoint listings via AppKit's `CacheManager`.
|
|
72
|
+
* With a `host` deletes that one workspace's entry; without one
|
|
73
|
+
* clears every cache entry on the manager (since `CacheManager`
|
|
74
|
+
* doesn't expose a namespace-scoped clear, this is the brute-force
|
|
75
|
+
* path - fine for tests, avoid in steady-state code).
|
|
76
|
+
*/
|
|
77
|
+
export declare function clearServingEndpointsCache(host?: string): Promise<void>;
|
|
78
|
+
/**
|
|
79
|
+
* Result of fuzzy-resolving a user-supplied model name against the
|
|
80
|
+
* live endpoint list. `score` is Fuse.js's distance (`0` is exact,
|
|
81
|
+
* `1` is no match); `matched` is `false` when the score exceeds the
|
|
82
|
+
* configured threshold so callers can fall back to the original
|
|
83
|
+
* input (Databricks will then return a clean 404).
|
|
84
|
+
*/
|
|
85
|
+
export interface ResolvedModel {
|
|
86
|
+
modelId: string;
|
|
87
|
+
matched: boolean;
|
|
88
|
+
score?: number;
|
|
89
|
+
}
|
|
90
|
+
/** Options accepted by {@link resolveModelId}. */
|
|
91
|
+
export interface ResolveModelOptions {
|
|
92
|
+
/** Fuse.js threshold (0 = exact, 1 = anything). Default `0.4`. */
|
|
93
|
+
threshold?: number;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Snap a user-supplied model name to the closest configured serving
|
|
97
|
+
* endpoint:
|
|
98
|
+
*
|
|
99
|
+
* 1. Exact name match wins immediately (no fuzzy needed).
|
|
100
|
+
* 2. Otherwise the input is tokenized (dashes / underscores / spaces
|
|
101
|
+
* become separators) and fed through Fuse.js extended search,
|
|
102
|
+
* which AND-s each token with fuzzy matching enabled. This is the
|
|
103
|
+
* "tokenized fuzzy match" the user reaches for when they type
|
|
104
|
+
* `"claude sonnet"` instead of the full endpoint name.
|
|
105
|
+
* 3. If the best Fuse score is above `threshold`, return the input
|
|
106
|
+
* unchanged and let the upstream call surface the 404. This keeps
|
|
107
|
+
* deliberate model ids (e.g. brand new endpoints) from being
|
|
108
|
+
* silently rewritten to a similar-looking neighbour.
|
|
109
|
+
*
|
|
110
|
+
* Pass an empty endpoint list to short-circuit fuzzy matching - the
|
|
111
|
+
* input is returned verbatim. This is what {@link buildModel} does
|
|
112
|
+
* when the workspace client can't be reached at resolve time.
|
|
113
|
+
*/
|
|
114
|
+
export declare function resolveModelId(input: string, endpoints: readonly ServingEndpointSummary[], opts?: ResolveModelOptions): ResolvedModel;
|
|
115
|
+
/**
|
|
116
|
+
* Minimal Express-ish request shape used by {@link extractModelOverride}.
|
|
117
|
+
* Keeps this module independent of `express` so the helper can be
|
|
118
|
+
* reused from non-Express adapters.
|
|
119
|
+
*/
|
|
120
|
+
export interface ModelOverrideRequest {
|
|
121
|
+
headers?: Record<string, string | string[] | undefined>;
|
|
122
|
+
query?: Record<string, unknown> | undefined;
|
|
123
|
+
body?: unknown;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Pull a model override out of a single HTTP request, checking
|
|
127
|
+
* sources in priority order:
|
|
128
|
+
*
|
|
129
|
+
* 1. `X-Mastra-Model` header
|
|
130
|
+
* 2. `?model=` query string parameter
|
|
131
|
+
* 3. Body field (`model` or `modelId`, in that order)
|
|
132
|
+
*
|
|
133
|
+
* Returns `null` when nothing is set, so callers can wrap with
|
|
134
|
+
* `if (override) ...` without juggling empty strings. Body inspection
|
|
135
|
+
* is lenient - any plain object with one of the configured keys
|
|
136
|
+
* counts, mirroring how AI SDK chat clients pass arbitrary metadata
|
|
137
|
+
* alongside `messages`.
|
|
138
|
+
*/
|
|
139
|
+
export declare function extractModelOverride(req: ModelOverrideRequest): string | null;
|
|
140
|
+
/**
|
|
141
|
+
* Read the fuzzy-resolution config knobs off the plugin config with
|
|
142
|
+
* defaults applied. Kept here so `buildModel` and the `/models` route
|
|
143
|
+
* agree on what "enabled" means.
|
|
144
|
+
*
|
|
145
|
+
* `fallbacks` is the priority-ordered list `pickModelId` walks when
|
|
146
|
+
* nothing explicit is set; defaults live in `model.ts`
|
|
147
|
+
* (`FALLBACK_MODEL_IDS`) and are passed in by callers to avoid a
|
|
148
|
+
* circular import between `serving.ts` and `model.ts`.
|
|
149
|
+
*/
|
|
150
|
+
export declare function resolveServingConfig(config: MastraPluginConfig, defaultFallbacks?: readonly string[]): {
|
|
151
|
+
ttlMs: number;
|
|
152
|
+
threshold: number;
|
|
153
|
+
fuzzy: boolean;
|
|
154
|
+
allowOverride: boolean;
|
|
155
|
+
fallbacks: readonly string[];
|
|
156
|
+
};
|