@dbx-tools/appkit-mastra 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,214 @@
1
+ /**
2
+ * Dynamic model resolution against Databricks Model Serving.
3
+ *
4
+ * Three concerns live here:
5
+ *
6
+ * 1. **Listing** - {@link listServingEndpoints} pulls the workspace's
7
+ * `/serving-endpoints` via the SDK and caches the result per host
8
+ * with a TTL. Concurrent callers share one in-flight promise (the
9
+ * same coalescing pattern as Python's `cachetools-async`).
10
+ * 2. **Fuzzy matching** - {@link resolveModelId} runs the user's input
11
+ * through `fuse.js` extended search so loose tokens like
12
+ * `"claude sonnet"` snap to `databricks-claude-sonnet-4-6` even
13
+ * when typed without the full endpoint name.
14
+ * 3. **Per-request override** - {@link extractModelOverride} pulls a
15
+ * model name from the `X-Mastra-Model` header, `?model=` query
16
+ * string, or `model` body field so the same agent can be exercised
17
+ * against different endpoints without redeploying.
18
+ *
19
+ * `model.ts` glues these together inside the per-step model resolver;
20
+ * `plugin.ts` exposes the cached list at `GET /models`.
21
+ */
22
+ import { CacheManager } from "@databricks/appkit";
23
+ import { stringUtils } from "@dbx-tools/appkit-shared";
24
+ import Fuse from "fuse.js";
25
+ /**
26
+ * `RequestContext` key under which {@link MastraServer} stores the
27
+ * per-request model override (header / query / body). `model.ts`
28
+ * reads it before falling back to the agent / plugin default.
29
+ */
30
+ export const MASTRA_MODEL_OVERRIDE_KEY = "mastra__model_override";
31
+ /** HTTP header inspected for a per-request model override. */
32
+ export const MODEL_OVERRIDE_HEADER = "x-mastra-model";
33
+ /** Query string parameter inspected for a per-request model override. */
34
+ export const MODEL_OVERRIDE_QUERY = "model";
35
+ /** Body fields (in priority order) inspected for a per-request model override. */
36
+ export const MODEL_OVERRIDE_BODY_FIELDS = ["model", "modelId"];
37
+ /** Default TTL for the in-memory endpoint cache. Matches the Databricks SDK's session lifetime budget. */
38
+ const DEFAULT_TTL_MS = 5 * 60 * 1000;
39
+ /** Default Fuse.js score threshold below which a fuzzy match is accepted. */
40
+ const DEFAULT_FUZZY_THRESHOLD = 0.4;
41
+ /** Cache key parts under which endpoint listings are stored. */
42
+ const CACHE_KEY_NAMESPACE = "mastra:serving-endpoints";
43
+ /**
44
+ * Stable `userKey` arg for AppKit's `CacheManager.getOrExecute`.
45
+ * Endpoint visibility is effectively workspace-scoped (we cache by
46
+ * host in the key parts), so a single shared key lets every user of
47
+ * the same workspace share one cached fetch and coalesce on the
48
+ * in-flight promise. Permissions can differ in theory, but the
49
+ * Foundation Model API catalogue is the same view for every caller.
50
+ */
51
+ const SHARED_USER_KEY = "mastra-shared";
52
+ /**
53
+ * List Model Serving endpoints for the workspace owning `client`,
54
+ * routed through AppKit's `CacheManager`. The manager gives us
55
+ * everything `cachetools.TTLCache` provides plus what
56
+ * `cachetools-async` adds on top: per-entry TTL, in-flight request
57
+ * coalescing (concurrent callers share one fetch via the manager's
58
+ * internal `inFlightRequests` map), bounded size, telemetry spans
59
+ * (`cache.getOrExecute`), and optional Lakebase persistence so the
60
+ * catalogue survives restarts when the lakebase plugin is wired up.
61
+ *
62
+ * Returns plain {@link ServingEndpointSummary} objects (a stable
63
+ * subset of the SDK type) so cache hits never expose stale SDK
64
+ * internals. Errors from `CacheManager` or the SDK fetch propagate
65
+ * to the caller - we don't swallow them so users see the real
66
+ * auth / network issue.
67
+ *
68
+ * @param host - Workspace host used as the cache key. Pass the value
69
+ * resolved from `client.config.getHost()` so multi-host apps share
70
+ * one entry per workspace.
71
+ * @param opts.ttlMs - Override the default TTL just for this call.
72
+ * Forwarded to `CacheManager` as seconds.
73
+ */
74
+ export async function listServingEndpoints(client, host, opts = {}) {
75
+ const ttlSec = Math.max(1, Math.round((opts.ttlMs ?? DEFAULT_TTL_MS) / 1000));
76
+ return CacheManager.getInstanceSync().getOrExecute([CACHE_KEY_NAMESPACE, host], () => fetchEndpoints(client), SHARED_USER_KEY, { ttl: ttlSec });
77
+ }
78
+ async function fetchEndpoints(client) {
79
+ const out = [];
80
+ for await (const ep of client.servingEndpoints.list()) {
81
+ if (!ep.name)
82
+ continue;
83
+ out.push({
84
+ name: ep.name,
85
+ ...(ep.task !== undefined ? { task: ep.task } : {}),
86
+ ...(ep.state?.ready !== undefined ? { state: String(ep.state.ready) } : {}),
87
+ ...(ep.description !== undefined ? { description: ep.description } : {}),
88
+ });
89
+ }
90
+ return out;
91
+ }
92
+ /**
93
+ * Force-evict cached endpoint listings via AppKit's `CacheManager`.
94
+ * With a `host` deletes that one workspace's entry; without one
95
+ * clears every cache entry on the manager (since `CacheManager`
96
+ * doesn't expose a namespace-scoped clear, this is the brute-force
97
+ * path - fine for tests, avoid in steady-state code).
98
+ */
99
+ export async function clearServingEndpointsCache(host) {
100
+ const cache = CacheManager.getInstanceSync();
101
+ if (host) {
102
+ const key = cache.generateKey([CACHE_KEY_NAMESPACE, host], SHARED_USER_KEY);
103
+ await cache.delete(key);
104
+ }
105
+ else {
106
+ await cache.clear();
107
+ }
108
+ }
109
+ /**
110
+ * Snap a user-supplied model name to the closest configured serving
111
+ * endpoint:
112
+ *
113
+ * 1. Exact name match wins immediately (no fuzzy needed).
114
+ * 2. Otherwise the input is tokenized (dashes / underscores / spaces
115
+ * become separators) and fed through Fuse.js extended search,
116
+ * which AND-s each token with fuzzy matching enabled. This is the
117
+ * "tokenized fuzzy match" the user reaches for when they type
118
+ * `"claude sonnet"` instead of the full endpoint name.
119
+ * 3. If the best Fuse score is above `threshold`, return the input
120
+ * unchanged and let the upstream call surface the 404. This keeps
121
+ * deliberate model ids (e.g. brand new endpoints) from being
122
+ * silently rewritten to a similar-looking neighbour.
123
+ *
124
+ * Pass an empty endpoint list to short-circuit fuzzy matching - the
125
+ * input is returned verbatim. This is what {@link buildModel} does
126
+ * when the workspace client can't be reached at resolve time.
127
+ */
128
+ export function resolveModelId(input, endpoints, opts = {}) {
129
+ if (endpoints.length === 0) {
130
+ return { modelId: input, matched: false };
131
+ }
132
+ for (const ep of endpoints) {
133
+ if (ep.name === input) {
134
+ return { modelId: ep.name, matched: true, score: 0 };
135
+ }
136
+ }
137
+ const threshold = opts.threshold ?? DEFAULT_FUZZY_THRESHOLD;
138
+ const fuse = new Fuse(endpoints, {
139
+ keys: ["name"],
140
+ threshold,
141
+ ignoreLocation: true,
142
+ includeScore: true,
143
+ useExtendedSearch: true,
144
+ isCaseSensitive: false,
145
+ });
146
+ // Fuse 7.3 has no built-in tokenize hook; in extended search,
147
+ // space-separated tokens are AND-ed with fuzzy matching enabled. We
148
+ // lean on the shared tokenizer so the splitting rules stay
149
+ // consistent with the rest of the toolkit.
150
+ const query = Array.from(stringUtils.tokenizeWithOptions({ lowerCase: true, camelCase: false }, input)).join(" ");
151
+ if (!query)
152
+ return { modelId: input, matched: false };
153
+ const results = fuse.search(query);
154
+ const best = results[0];
155
+ if (best?.item.name && (best.score ?? 0) <= threshold) {
156
+ return { modelId: best.item.name, matched: true, score: best.score };
157
+ }
158
+ return { modelId: input, matched: false };
159
+ }
160
+ /**
161
+ * Pull a model override out of a single HTTP request, checking
162
+ * sources in priority order:
163
+ *
164
+ * 1. `X-Mastra-Model` header
165
+ * 2. `?model=` query string parameter
166
+ * 3. Body field (`model` or `modelId`, in that order)
167
+ *
168
+ * Returns `null` when nothing is set, so callers can wrap with
169
+ * `if (override) ...` without juggling empty strings. Body inspection
170
+ * is lenient - any plain object with one of the configured keys
171
+ * counts, mirroring how AI SDK chat clients pass arbitrary metadata
172
+ * alongside `messages`.
173
+ */
174
+ export function extractModelOverride(req) {
175
+ const headers = req.headers;
176
+ if (headers) {
177
+ const headerVal = stringUtils.firstNonEmpty(headers[MODEL_OVERRIDE_HEADER] ?? headers[MODEL_OVERRIDE_HEADER.toLowerCase()]);
178
+ if (headerVal)
179
+ return headerVal;
180
+ }
181
+ if (req.query) {
182
+ const queryVal = stringUtils.firstNonEmpty(req.query[MODEL_OVERRIDE_QUERY]);
183
+ if (queryVal)
184
+ return queryVal;
185
+ }
186
+ if (req.body && typeof req.body === "object") {
187
+ const record = req.body;
188
+ for (const field of MODEL_OVERRIDE_BODY_FIELDS) {
189
+ const bodyVal = stringUtils.firstNonEmpty(record[field]);
190
+ if (bodyVal)
191
+ return bodyVal;
192
+ }
193
+ }
194
+ return null;
195
+ }
196
+ /**
197
+ * Read the fuzzy-resolution config knobs off the plugin config with
198
+ * defaults applied. Kept here so `buildModel` and the `/models` route
199
+ * agree on what "enabled" means.
200
+ *
201
+ * `fallbacks` is the priority-ordered list `pickModelId` walks when
202
+ * nothing explicit is set; defaults live in `model.ts`
203
+ * (`FALLBACK_MODEL_IDS`) and are passed in by callers to avoid a
204
+ * circular import between `serving.ts` and `model.ts`.
205
+ */
206
+ export function resolveServingConfig(config, defaultFallbacks = []) {
207
+ return {
208
+ ttlMs: config.modelCacheTtlMs ?? DEFAULT_TTL_MS,
209
+ threshold: config.modelFuzzyThreshold ?? DEFAULT_FUZZY_THRESHOLD,
210
+ fuzzy: config.modelFuzzyMatch !== false,
211
+ allowOverride: config.modelOverride !== false,
212
+ fallbacks: config.defaultModelFallbacks ?? defaultFallbacks,
213
+ };
214
+ }
package/index.ts ADDED
@@ -0,0 +1,36 @@
1
+ /**
2
+ * AppKit Mastra integration: {@link MastraPlugin} / {@link mastra},
3
+ * plugin config types, agent registration helpers, Genie tool
4
+ * builders, and dynamic Model Serving endpoint resolution.
5
+ *
6
+ * Client-side consumers should import URL helpers and the
7
+ * {@link MastraClientConfig} type from `@dbx-tools/appkit-mastra-shared`
8
+ * instead - that package is pure (no pg / fastembed / Mastra deps) and
9
+ * is the right surface for browser bundles and `usePluginClientConfig`
10
+ * consumers.
11
+ */
12
+ export * from "./src/plugin.js";
13
+ export * from "@dbx-tools/appkit-mastra-shared";
14
+ export * from "./src/config.js";
15
+ export * from "./src/agents.js";
16
+ export * from "./src/genie.js";
17
+ export {
18
+ clearServingEndpointsCache,
19
+ extractModelOverride,
20
+ listServingEndpoints,
21
+ MASTRA_MODEL_OVERRIDE_KEY,
22
+ MODEL_OVERRIDE_BODY_FIELDS,
23
+ MODEL_OVERRIDE_HEADER,
24
+ MODEL_OVERRIDE_QUERY,
25
+ resolveModelId,
26
+ type ResolvedModel,
27
+ type ResolveModelOptions,
28
+ type ServingEndpointSummary,
29
+ } from "./src/serving.js";
30
+ export {
31
+ FALLBACK_MODEL_IDS,
32
+ MODEL_CATALOG,
33
+ modelForTier,
34
+ modelsForTier,
35
+ ModelTier,
36
+ } from "./src/model.js";
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "main": "dist/index.js",
3
+ "types": "dist/index.d.ts",
4
+ "exports": {
5
+ ".": {
6
+ "source": "./index.ts",
7
+ "types": "./dist/index.d.ts",
8
+ "default": "./dist/index.js"
9
+ }
10
+ },
11
+ "files": [
12
+ "dist",
13
+ "index.ts",
14
+ "src"
15
+ ],
16
+ "license": "Apache-2.0",
17
+ "homepage": "https://github.com/reggie-db/dbx-tools-appkit#readme",
18
+ "bugs": {
19
+ "url": "https://github.com/reggie-db/dbx-tools-appkit/issues"
20
+ },
21
+ "publishConfig": {
22
+ "registry": "https://registry.npmjs.org/",
23
+ "access": "public"
24
+ },
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "git+https://github.com/reggie-db/dbx-tools-appkit.git",
28
+ "directory": "packages/mastra"
29
+ },
30
+ "name": "@dbx-tools/appkit-mastra",
31
+ "version": "0.1.0",
32
+ "module": "index.ts",
33
+ "type": "module",
34
+ "dependencies": {
35
+ "@dbx-tools/appkit-shared": "workspace:*",
36
+ "@dbx-tools/appkit-mastra-shared": "workspace:*",
37
+ "@mastra/ai-sdk": "^1.3",
38
+ "@mastra/core": "^1.32",
39
+ "@mastra/express": "^1.3",
40
+ "@mastra/fastembed": "^1.0",
41
+ "@mastra/memory": "^1.17",
42
+ "@mastra/pg": "^1.10",
43
+ "fuse.js": "^7.0.0",
44
+ "zod": "^4"
45
+ },
46
+ "peerDependencies": {
47
+ "@databricks/appkit": "^0.35",
48
+ "express": "^5"
49
+ },
50
+ "devDependencies": {
51
+ "@types/express": "^5",
52
+ "@types/pg": "^8",
53
+ "express": "^5"
54
+ }
55
+ }