@dbx-tools/appkit-mastra 0.1.12 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -45
- package/dist/src/agents.d.ts +2 -2
- package/dist/src/agents.js +66 -14
- package/dist/src/chart.d.ts +39 -105
- package/dist/src/chart.js +199 -194
- package/dist/src/config.d.ts +104 -0
- package/dist/src/config.js +43 -0
- package/dist/src/genie.d.ts +170 -107
- package/dist/src/genie.js +1003 -577
- package/dist/src/history.d.ts +31 -3
- package/dist/src/history.js +137 -31
- package/dist/src/memory.d.ts +25 -4
- package/dist/src/memory.js +34 -2
- package/dist/src/model.js +2 -2
- package/dist/src/observability.d.ts +64 -0
- package/dist/src/observability.js +85 -0
- package/dist/src/plugin.js +39 -7
- package/dist/src/processors/strip-stale-charts.js +1 -1
- package/dist/src/server.d.ts +12 -0
- package/dist/src/server.js +38 -2
- package/dist/src/serving.js +1 -1
- package/dist/src/tools/email.js +1 -1
- package/dist/tsconfig.build.tsbuildinfo +1 -1
- package/package.json +21 -16
- package/src/agents.ts +73 -17
- package/src/chart.ts +221 -251
- package/src/config.ts +120 -0
- package/src/genie.ts +1199 -654
- package/src/history.ts +147 -33
- package/src/memory.ts +41 -5
- package/src/model.ts +3 -3
- package/src/observability.ts +116 -0
- package/src/plugin.ts +39 -7
- package/src/processors/strip-stale-charts.ts +1 -1
- package/src/server.ts +49 -2
- package/src/serving.ts +1 -1
- package/src/tools/email.ts +1 -1
package/dist/src/genie.js
CHANGED
|
@@ -1,630 +1,1056 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Genie agent for Mastra.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* parameter.
|
|
4
|
+
* Each configured Genie space exposes a single Mastra tool to the
|
|
5
|
+
* calling agent (`genie` for the `"default"` alias, `genie_<alias>`
|
|
6
|
+
* otherwise). When invoked, the tool runs end-to-end:
|
|
8
7
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
8
|
+
* 1. Pulls the per-request {@link WorkspaceClient} off
|
|
9
|
+
* `ctx.requestContext` (stamped by `MastraServer`) and emits a
|
|
10
|
+
* `started` writer event so the host UI can show progress
|
|
11
|
+
* immediately, before any LLM round-trip.
|
|
12
|
+
* 2. Spins up a per-call inner Mastra `Agent` with three tools:
|
|
13
|
+
* - `ask_genie`: drives one `genieEventChat` turn, fetches
|
|
14
|
+
* the matching statement's rows when the turn ran SQL,
|
|
15
|
+
* and forwards every wire event (status, thinking, sql,
|
|
16
|
+
* rows) through `ctx.writer` for streaming UI updates.
|
|
17
|
+
* - `get_space_description`: cheap title / description /
|
|
18
|
+
* warehouse id lookup for grounding.
|
|
19
|
+
* - `get_space_serialized`: full `GenieSpace` JSON for
|
|
20
|
+
* column-level grounding when the description isn't
|
|
21
|
+
* enough.
|
|
22
|
+
* 3. Runs the inner agent with `structuredOutput` (Mastra's
|
|
23
|
+
* two-pass mode + `jsonPromptInjection`) to coerce the
|
|
24
|
+
* agent's final answer into a tagged
|
|
25
|
+
* `[{type:"text"|"data", ...}]` array. The two-pass design
|
|
26
|
+
* avoids Databricks Model Serving's `response_format` +
|
|
27
|
+
* `tools` collision; prompt injection sidesteps the
|
|
28
|
+
* separate `response_format` + streaming collision in the
|
|
29
|
+
* structuring agent.
|
|
30
|
+
* 4. Charts every `data` item in parallel via
|
|
31
|
+
* {@link runChartPlanner}, maps `text` items to the shared
|
|
32
|
+
* {@link GenieSummaryItem} `string` variant, and returns the
|
|
33
|
+
* hydrated {@link GenieAgentResult}.
|
|
12
34
|
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* `
|
|
18
|
-
*
|
|
19
|
-
* LLM never sees rows, and charts come from the separate
|
|
20
|
-
* `render_data` tool when the model decides one is useful.
|
|
35
|
+
* The legacy AppKit `genie` plugin (`@databricks/appkit`'s `genie`)
|
|
36
|
+
* is no longer used at runtime. The inner agent talks to Genie
|
|
37
|
+
* directly via `@dbx-tools/genie` (`genieEventChat`) and the
|
|
38
|
+
* workspace `statementExecution.getStatement` API. The plugin's
|
|
39
|
+
* `spaces` config is still honored so existing AppKit-style wiring
|
|
40
|
+
* keeps working without change.
|
|
21
41
|
*/
|
|
22
|
-
import { genie } from "@databricks/appkit";
|
|
23
|
-
import {
|
|
42
|
+
import { CacheManager, genie } from "@databricks/appkit";
|
|
43
|
+
import { ApiError, HttpError, WorkspaceClient } from "@databricks/sdk-experimental";
|
|
44
|
+
import { genieEventChat } from "@dbx-tools/genie";
|
|
45
|
+
import {} from "@dbx-tools/genie-shared";
|
|
46
|
+
import {} from "@dbx-tools/appkit-mastra-shared";
|
|
47
|
+
import { apiUtils, appkitUtils, commonUtils, logUtils, stringUtils, } from "@dbx-tools/shared";
|
|
48
|
+
import { Agent } from "@mastra/core/agent";
|
|
49
|
+
import { MASTRA_THREAD_ID_KEY } from "@mastra/core/request-context";
|
|
24
50
|
import { createTool } from "@mastra/core/tools";
|
|
25
51
|
import { z } from "zod";
|
|
26
|
-
import {
|
|
52
|
+
import { runChartPlanner } from "./chart.js";
|
|
53
|
+
import { MASTRA_USER_KEY } from "./config.js";
|
|
54
|
+
import { buildModel } from "./model.js";
|
|
55
|
+
const log = logUtils.logger("mastra/genie");
|
|
56
|
+
/** Default alias used when a single unnamed Genie space is wired up. */
|
|
57
|
+
export const DEFAULT_GENIE_ALIAS = "default";
|
|
27
58
|
/**
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
59
|
+
* Cap on the inner agent's tool-loop steps. 5 (Mastra default) is
|
|
60
|
+
* tight - one `get_space_description` + one `ask_genie` per
|
|
61
|
+
* sub-question saturates fast. 16 leaves room for ~10 `ask_genie`
|
|
62
|
+
* rounds plus grounding plus the structuring pass (which runs
|
|
63
|
+
* after the loop and is its own single call).
|
|
33
64
|
*/
|
|
34
|
-
const
|
|
65
|
+
const DEFAULT_MAX_STEPS = 16;
|
|
66
|
+
/* ------------------------- helpers ------------------------- */
|
|
67
|
+
/** Best-effort numeric coercion for Genie's all-strings cells. */
|
|
68
|
+
function coerceCell(cell) {
|
|
69
|
+
if (cell === null)
|
|
70
|
+
return null;
|
|
71
|
+
if (/^-?\d+(\.\d+)?$/.test(cell)) {
|
|
72
|
+
const n = Number(cell);
|
|
73
|
+
if (Number.isFinite(n))
|
|
74
|
+
return n;
|
|
75
|
+
}
|
|
76
|
+
return cell;
|
|
77
|
+
}
|
|
35
78
|
/**
|
|
36
|
-
*
|
|
37
|
-
*
|
|
38
|
-
*
|
|
39
|
-
* regardless of dataset size). The model uses `chartId` to
|
|
40
|
-
* reference the chart inline via the `[[chart:<chartId>]]` marker.
|
|
79
|
+
* Fetch a single Genie statement's rows via the Statement
|
|
80
|
+
* Execution API and reshape into the shared
|
|
81
|
+
* {@link GenieDatasetData} shape (column array + row records).
|
|
41
82
|
*/
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
is being charted (e.g. "trend of fill_rate over date").
|
|
61
|
-
`),
|
|
62
|
-
rowCount: z.number().describe(stringUtils.toDescription `
|
|
63
|
-
Total rows in this dataset. Mention only if it adds context
|
|
64
|
-
(e.g. "across the last 90 days").
|
|
65
|
-
`),
|
|
66
|
-
sql: z
|
|
67
|
-
.string()
|
|
68
|
-
.optional()
|
|
69
|
-
.describe(stringUtils.toDescription `
|
|
70
|
-
SQL Genie generated and executed. The host UI shows this on
|
|
71
|
-
demand; you do not need to repeat it.
|
|
72
|
-
`),
|
|
73
|
-
});
|
|
83
|
+
async function fetchStatementData(client, statementId, signal) {
|
|
84
|
+
const ctx = signal ? apiUtils.toContext(signal) : undefined;
|
|
85
|
+
const r = await client.statementExecution.getStatement({ statement_id: statementId }, ctx);
|
|
86
|
+
const columns = (r.manifest?.schema?.columns ?? []).map((c) => c.name ?? "");
|
|
87
|
+
const dataArray = (r.result?.data_array ?? []);
|
|
88
|
+
const rows = dataArray.map((row) => {
|
|
89
|
+
const obj = {};
|
|
90
|
+
columns.forEach((col, i) => {
|
|
91
|
+
obj[col] = coerceCell(row[i] ?? null);
|
|
92
|
+
});
|
|
93
|
+
return obj;
|
|
94
|
+
});
|
|
95
|
+
return {
|
|
96
|
+
columns,
|
|
97
|
+
rows,
|
|
98
|
+
rowCount: r.manifest?.total_row_count ?? rows.length,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
74
101
|
/**
|
|
75
|
-
*
|
|
76
|
-
*
|
|
77
|
-
*
|
|
78
|
-
*
|
|
102
|
+
* Resolve the message's representative `statement_id`. Genie
|
|
103
|
+
* returns one statement per turn in practice; we read the
|
|
104
|
+
* (deprecated-but-singular) `message.query_result.statement_id`
|
|
105
|
+
* first and fall back to the first attachment's
|
|
106
|
+
* `query.statement_id`. Returns `undefined` when the turn had no
|
|
107
|
+
* SQL run (pure prose answer).
|
|
79
108
|
*/
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
through to the user (verbatim, or as the basis of your
|
|
93
|
-
reply). Genie may have run multiple SQL queries and tools to
|
|
94
|
-
produce this; the full text is the answer.
|
|
95
|
-
`),
|
|
96
|
-
datasets: z
|
|
97
|
-
.array(datasetSchema)
|
|
98
|
-
.optional()
|
|
99
|
-
.describe(stringUtils.toDescription `
|
|
100
|
-
Datasets Genie produced for this turn (one per executed SQL
|
|
101
|
-
statement). Each entry is metadata only; the rows are
|
|
102
|
-
streamed to the host UI out-of-band. To render any of these
|
|
103
|
-
as a chart inline in your reply, embed
|
|
104
|
-
\`[[chart:<chartId>]]\` where you want the chart to appear.
|
|
105
|
-
Do not paraphrase the rows - the chart is what the user
|
|
106
|
-
should see; your prose should add interpretation
|
|
107
|
-
(highlights, deltas, anomalies) around the chart.
|
|
108
|
-
`),
|
|
109
|
-
suggestedFollowUps: z
|
|
110
|
-
.array(z.string())
|
|
111
|
-
.optional()
|
|
112
|
-
.describe(stringUtils.toDescription `
|
|
113
|
-
Follow-up question suggestions Genie produced. The host UI
|
|
114
|
-
renders these as clickable buttons; you do not need to list
|
|
115
|
-
them in your reply.
|
|
116
|
-
`),
|
|
117
|
-
error: z
|
|
118
|
-
.string()
|
|
119
|
-
.optional()
|
|
120
|
-
.describe(stringUtils.toDescription `
|
|
121
|
-
Genie-side error message if the request failed.
|
|
122
|
-
`),
|
|
123
|
-
});
|
|
124
|
-
const sendMessageSchema = z.object({
|
|
125
|
-
content: z.string().describe(stringUtils.toDescription `
|
|
126
|
-
Natural-language question to send to the Genie space.
|
|
127
|
-
`),
|
|
128
|
-
conversationId: z
|
|
129
|
-
.string()
|
|
130
|
-
.optional()
|
|
131
|
-
.describe(stringUtils.toDescription `
|
|
132
|
-
Optional Genie conversation id to continue an earlier thread.
|
|
133
|
-
Omit on the first call; pass the id returned in the previous
|
|
134
|
-
result's \`conversationId\` to follow up.
|
|
135
|
-
`),
|
|
136
|
-
});
|
|
137
|
-
const getConversationSchema = z.object({
|
|
138
|
-
alias: z.string().describe(stringUtils.toDescription `
|
|
139
|
-
Alias of the Genie space the conversation belongs to (matches
|
|
140
|
-
the key in the genie plugin's \`spaces\` config).
|
|
141
|
-
`),
|
|
142
|
-
conversationId: z.string().describe(stringUtils.toDescription `
|
|
143
|
-
Genie conversation id whose history to fetch.
|
|
144
|
-
`),
|
|
145
|
-
});
|
|
146
|
-
/** Per-attachment shape returned inside a stored Genie message. */
|
|
147
|
-
const genieAttachmentSchema = z.object({
|
|
148
|
-
attachmentId: z.string().optional().describe(stringUtils.toDescription `
|
|
149
|
-
Genie attachment id; internal bookkeeping.
|
|
150
|
-
`),
|
|
151
|
-
query: z
|
|
152
|
-
.object({
|
|
153
|
-
title: z.string().optional().describe(stringUtils.toDescription `
|
|
154
|
-
Genie's title for the SQL, if any.
|
|
155
|
-
`),
|
|
156
|
-
description: z.string().optional().describe(stringUtils.toDescription `
|
|
157
|
-
Genie's prose description of the SQL, if any.
|
|
158
|
-
`),
|
|
159
|
-
query: z.string().optional().describe(stringUtils.toDescription `
|
|
160
|
-
SQL Genie generated and executed.
|
|
161
|
-
`),
|
|
162
|
-
statementId: z.string().optional().describe(stringUtils.toDescription `
|
|
163
|
-
Statement-execution id; internal bookkeeping.
|
|
164
|
-
`),
|
|
165
|
-
})
|
|
166
|
-
.optional()
|
|
167
|
-
.describe(stringUtils.toDescription `
|
|
168
|
-
SQL Genie attached to this message, if it ran any.
|
|
169
|
-
`),
|
|
170
|
-
text: z
|
|
171
|
-
.object({
|
|
172
|
-
content: z.string().optional().describe(stringUtils.toDescription `
|
|
173
|
-
Genie's natural-language answer text for this attachment.
|
|
174
|
-
`),
|
|
175
|
-
})
|
|
176
|
-
.optional()
|
|
177
|
-
.describe(stringUtils.toDescription `
|
|
178
|
-
Per-attachment text content (independent of the message-level
|
|
179
|
-
\`content\` field).
|
|
180
|
-
`),
|
|
181
|
-
suggestedQuestions: z
|
|
182
|
-
.array(z.string())
|
|
183
|
-
.optional()
|
|
184
|
-
.describe(stringUtils.toDescription `
|
|
185
|
-
Follow-up question suggestions Genie generated for this turn.
|
|
186
|
-
`),
|
|
187
|
-
});
|
|
188
|
-
/** Single message inside a Genie conversation history page. */
|
|
189
|
-
const genieMessageSchema = z.object({
|
|
190
|
-
messageId: z.string().describe(stringUtils.toDescription `
|
|
191
|
-
Genie message id; internal bookkeeping.
|
|
192
|
-
`),
|
|
193
|
-
conversationId: z.string().describe(stringUtils.toDescription `
|
|
194
|
-
Conversation id this message belongs to.
|
|
195
|
-
`),
|
|
196
|
-
spaceId: z.string().describe(stringUtils.toDescription `
|
|
197
|
-
Genie space id this message belongs to.
|
|
198
|
-
`),
|
|
199
|
-
status: z.string().describe(stringUtils.toDescription `
|
|
200
|
-
Genie message status (\`COMPLETED\`, \`FAILED\`, etc.).
|
|
201
|
-
`),
|
|
202
|
-
content: z.string().describe(stringUtils.toDescription `
|
|
203
|
-
Outer message-level natural-language content Genie wrote.
|
|
204
|
-
`),
|
|
205
|
-
attachments: z
|
|
206
|
-
.array(genieAttachmentSchema)
|
|
207
|
-
.optional()
|
|
208
|
-
.describe(stringUtils.toDescription `
|
|
209
|
-
Attachments (SQL queries, text blocks, suggested follow-ups)
|
|
210
|
-
Genie produced for this message.
|
|
211
|
-
`),
|
|
212
|
-
error: z.string().optional().describe(stringUtils.toDescription `
|
|
213
|
-
Genie-side error attached to this message, if any.
|
|
214
|
-
`),
|
|
215
|
-
});
|
|
109
|
+
function extractStatementId(message) {
|
|
110
|
+
const top = message.query_result
|
|
111
|
+
?.statement_id;
|
|
112
|
+
if (top)
|
|
113
|
+
return top;
|
|
114
|
+
for (const att of message.attachments ?? []) {
|
|
115
|
+
const id = att.query?.statement_id;
|
|
116
|
+
if (id)
|
|
117
|
+
return id;
|
|
118
|
+
}
|
|
119
|
+
return undefined;
|
|
120
|
+
}
|
|
216
121
|
/**
|
|
217
|
-
*
|
|
218
|
-
*
|
|
219
|
-
*
|
|
122
|
+
* Best-effort `writer.write`. The writer carries the unified flat
|
|
123
|
+
* event vocabulary directly - no translation layer - so
|
|
124
|
+
* subscribers narrow on `event.type` and read fields inline.
|
|
125
|
+
* Failures (downstream stream closed, cancelled request) are
|
|
126
|
+
* swallowed with a `warn` log so an in-flight Genie turn isn't
|
|
127
|
+
* taken down by a navigated-away client.
|
|
220
128
|
*/
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
`),
|
|
234
|
-
});
|
|
129
|
+
async function safeWrite(writer, chunk) {
|
|
130
|
+
if (!writer)
|
|
131
|
+
return;
|
|
132
|
+
try {
|
|
133
|
+
await writer.write(chunk);
|
|
134
|
+
}
|
|
135
|
+
catch (err) {
|
|
136
|
+
log.warn("writer:error", {
|
|
137
|
+
error: err instanceof Error ? err.message : String(err),
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
235
141
|
/**
|
|
236
|
-
*
|
|
237
|
-
*
|
|
238
|
-
*
|
|
239
|
-
*
|
|
240
|
-
*
|
|
142
|
+
* Lowercased placeholder strings we reject at the `ask_genie`
|
|
143
|
+
* boundary so the LLM doesn't spend a Genie round-trip on a
|
|
144
|
+
* non-question. Genie politely answers any of these with "Your
|
|
145
|
+
* request '...' does not relate to..." which is pure UI noise.
|
|
146
|
+
* Kept narrow on purpose - real questions sometimes start with
|
|
147
|
+
* one of these tokens, so we only match the FULL trimmed string.
|
|
241
148
|
*/
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
149
|
+
const PLACEHOLDER_QUESTIONS = new Set([
|
|
150
|
+
"noop",
|
|
151
|
+
"no-op",
|
|
152
|
+
"skip",
|
|
153
|
+
"none",
|
|
154
|
+
"n/a",
|
|
155
|
+
"na",
|
|
156
|
+
"null",
|
|
157
|
+
"undefined",
|
|
158
|
+
"test",
|
|
159
|
+
"placeholder",
|
|
160
|
+
]);
|
|
161
|
+
/* ----------------------- conversation state ----------------------- */
|
|
162
|
+
/**
|
|
163
|
+
* Estimated Genie conversation lifetime in seconds. Databricks
|
|
164
|
+
* publishes no official TTL on the conversation resource itself;
|
|
165
|
+
* community projects (e.g. the open-source Databricks Genie Bot)
|
|
166
|
+
* converge on 4 hours of inactivity as a safe operating window.
|
|
167
|
+
* Treat this as an estimate that gets *extended on every use* by
|
|
168
|
+
* re-setting the cache entry after each successful turn (sliding
|
|
169
|
+
* TTL via re-`set`). When the estimate ends up wrong (conversation
|
|
170
|
+
* deleted, expired upstream, cross-space referenced), the wrapper
|
|
171
|
+
* catches the SDK's `RESOURCE_DOES_NOT_EXIST`/404 and transparently
|
|
172
|
+
* starts a fresh conversation.
|
|
173
|
+
*/
|
|
174
|
+
const CONVERSATION_TTL_SEC = 4 * 60 * 60;
|
|
175
|
+
/** Cache namespace prefix so coexisting Mastra caches don't collide. */
|
|
176
|
+
const CONVERSATION_CACHE_NAMESPACE = "mastra:genie:conversation";
|
|
177
|
+
/**
|
|
178
|
+
* Build the per-request {@link RequestContext} key the active
|
|
179
|
+
* Genie `conversation_id` lives under for `spaceId`. Scoped by
|
|
180
|
+
* space so an app calling two Genie spaces in one request keeps
|
|
181
|
+
* each conversation distinct (Genie conversation ids are
|
|
182
|
+
* space-scoped on the wire). The same `RequestContext` instance
|
|
183
|
+
* flows from the outer `genie` tool through to the inner
|
|
184
|
+
* `ask_genie` tool via Mastra, so writes on one side are visible
|
|
185
|
+
* on the other without an explicit shared ref.
|
|
186
|
+
*/
|
|
187
|
+
const conversationContextKey = (spaceId) => `mastra__genie_conversation__${spaceId}`;
|
|
188
|
+
/**
|
|
189
|
+
* Read the active Genie `conversation_id` for `spaceId` off the
|
|
190
|
+
* per-request {@link RequestContext}. Returns `undefined` when no
|
|
191
|
+
* conversation has been started yet this request.
|
|
192
|
+
*/
|
|
193
|
+
function readContextConversationId(requestContext, spaceId) {
|
|
194
|
+
return requestContext.get(conversationContextKey(spaceId));
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Write the active Genie `conversation_id` for `spaceId` onto the
|
|
198
|
+
* per-request {@link RequestContext}. Subsequent `ask_genie` calls
|
|
199
|
+
* in this request will reuse it; the wrapper's tail logic also
|
|
200
|
+
* reads it back out for the {@link GenieAgentResult}.
|
|
201
|
+
*/
|
|
202
|
+
function writeContextConversationId(requestContext, spaceId, conversationId) {
|
|
203
|
+
requestContext.set(conversationContextKey(spaceId), conversationId);
|
|
246
204
|
}
|
|
205
|
+
/* ------------------------- chart inventory ------------------------- */
|
|
247
206
|
/**
|
|
248
|
-
*
|
|
249
|
-
*
|
|
250
|
-
*
|
|
207
|
+
* Per-request {@link RequestContext} key the resolved chart
|
|
208
|
+
* inventory lives under. Keyed by `chartId`, the inventory is a
|
|
209
|
+
* `Map<string, ChartEvent>` carrying the full Echarts spec for
|
|
210
|
+
* every chart minted on this request - the same payload that
|
|
211
|
+
* goes out on the writer stream, kept in-process so output
|
|
212
|
+
* processors and downstream tools can resolve `[[chart:<id>]]`
|
|
213
|
+
* markers without re-running the planner or pulling from the
|
|
214
|
+
* writer stream.
|
|
251
215
|
*
|
|
252
|
-
*
|
|
253
|
-
* `
|
|
254
|
-
*
|
|
255
|
-
* chart-planner's model.
|
|
216
|
+
* Shared across all Genie spaces because chart ids are minted
|
|
217
|
+
* via `commonUtils.shortId()` and are unique within a single
|
|
218
|
+
* request regardless of which space produced them.
|
|
256
219
|
*/
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
220
|
+
const CHART_INVENTORY_CONTEXT_KEY = "mastra__genie_chart_inventory__";
|
|
221
|
+
/**
|
|
222
|
+
* Get the chart inventory map for this request, creating it on
|
|
223
|
+
* first access. Subsequent reads return the same map so callers
|
|
224
|
+
* mutate in place. The map is request-scoped (collected with the
|
|
225
|
+
* `RequestContext` at end of request), so there's no per-process
|
|
226
|
+
* leak.
|
|
227
|
+
*/
|
|
228
|
+
export function chartInventoryFromContext(requestContext) {
|
|
229
|
+
const existing = requestContext.get(CHART_INVENTORY_CONTEXT_KEY);
|
|
230
|
+
if (existing instanceof Map) {
|
|
231
|
+
return existing;
|
|
232
|
+
}
|
|
233
|
+
const fresh = new Map();
|
|
234
|
+
requestContext.set(CHART_INVENTORY_CONTEXT_KEY, fresh);
|
|
235
|
+
return fresh;
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Stash a resolved chart on the request-scoped inventory so any
|
|
239
|
+
* subsequent code in this request (output processors validating
|
|
240
|
+
* `[[chart:<id>]]` markers, follow-up tools that want to chart
|
|
241
|
+
* the same dataset differently, etc.) can look it up by id.
|
|
242
|
+
* No-op when `requestContext` is missing.
|
|
243
|
+
*/
|
|
244
|
+
function recordChartInContext(requestContext, chart) {
|
|
245
|
+
if (!requestContext)
|
|
246
|
+
return;
|
|
247
|
+
chartInventoryFromContext(requestContext).set(chart.chartId, chart);
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* `userKey` for `CacheManager.getOrExecute` / `generateKey`. Genie
|
|
251
|
+
* conversations are scoped to a single user + space + thread, and
|
|
252
|
+
* `threadId` is already user-scoped (Mastra mints threads per
|
|
253
|
+
* `resourceId`), so a constant user key here is safe and keeps the
|
|
254
|
+
* cache key short.
|
|
255
|
+
*/
|
|
256
|
+
const CONVERSATION_USER_KEY = "mastra-genie";
|
|
257
|
+
/**
|
|
258
|
+
* Build the canonical cache key for a `(spaceId, threadId)` pair.
|
|
259
|
+
* Returns `undefined` when `threadId` is missing - callers should
|
|
260
|
+
* skip caching entirely in that case (no Mastra memory wired up).
|
|
261
|
+
*/
|
|
262
|
+
async function conversationCacheKey(spaceId, threadId) {
|
|
263
|
+
if (!threadId)
|
|
264
|
+
return undefined;
|
|
265
|
+
return (await CacheManager.getInstance()).generateKey([CONVERSATION_CACHE_NAMESPACE, spaceId, threadId], CONVERSATION_USER_KEY);
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Read the cached Genie conversation id for `(spaceId, threadId)`.
|
|
269
|
+
* Returns `undefined` on miss, on expiry, or when the cache layer
|
|
270
|
+
* is unhealthy - never throws. The TTL is renewed via re-`set`
|
|
271
|
+
* after each successful turn (see {@link saveCachedConversationId}).
|
|
272
|
+
*/
|
|
273
|
+
async function readCachedConversationId(cacheKey) {
|
|
274
|
+
if (!cacheKey)
|
|
275
|
+
return undefined;
|
|
276
|
+
try {
|
|
277
|
+
const v = await CacheManager.getInstanceSync().get(cacheKey);
|
|
278
|
+
return v ?? undefined;
|
|
279
|
+
}
|
|
280
|
+
catch (err) {
|
|
281
|
+
log.warn("conversation-cache:read-error", {
|
|
282
|
+
error: err instanceof Error ? err.message : String(err),
|
|
283
|
+
});
|
|
284
|
+
return undefined;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Persist the active conversation id under `cacheKey`, refreshing
|
|
289
|
+
* its TTL. Idempotent; no-op when `cacheKey` or `conversationId`
|
|
290
|
+
* is missing. Re-setting the same key acts as a sliding TTL: every
|
|
291
|
+
* turn that uses the conversation extends the window by another
|
|
292
|
+
* {@link CONVERSATION_TTL_SEC} seconds.
|
|
293
|
+
*/
|
|
294
|
+
async function saveCachedConversationId(cacheKey, conversationId) {
|
|
295
|
+
if (!cacheKey || !conversationId)
|
|
296
|
+
return;
|
|
297
|
+
try {
|
|
298
|
+
await CacheManager.getInstanceSync().set(cacheKey, conversationId, {
|
|
299
|
+
ttl: CONVERSATION_TTL_SEC,
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
catch (err) {
|
|
303
|
+
log.warn("conversation-cache:write-error", {
|
|
304
|
+
error: err instanceof Error ? err.message : String(err),
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
/** Force-evict a cached conversation id. Used on the stale-id recovery path. */
|
|
309
|
+
async function evictCachedConversationId(cacheKey) {
|
|
310
|
+
if (!cacheKey)
|
|
311
|
+
return;
|
|
312
|
+
try {
|
|
313
|
+
await CacheManager.getInstanceSync().delete(cacheKey);
|
|
314
|
+
}
|
|
315
|
+
catch (err) {
|
|
316
|
+
log.warn("conversation-cache:delete-error", {
|
|
317
|
+
error: err instanceof Error ? err.message : String(err),
|
|
291
318
|
});
|
|
292
319
|
}
|
|
293
|
-
|
|
294
|
-
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* True when `err` is the SDK error Genie returns for a
|
|
323
|
+
* conversation id that no longer exists (deleted, expired upstream,
|
|
324
|
+
* or referenced from the wrong space). Matches the typed
|
|
325
|
+
* {@link ApiError} 404 / `RESOURCE_DOES_NOT_EXIST` shape first, then
|
|
326
|
+
* falls back to the lower-level {@link HttpError} 404, then to a
|
|
327
|
+
* loose message sniff for SDK shapes we haven't catalogued.
|
|
328
|
+
*/
|
|
329
|
+
function isConversationGoneError(err) {
|
|
330
|
+
if (err instanceof ApiError) {
|
|
331
|
+
if (err.statusCode === 404)
|
|
332
|
+
return true;
|
|
333
|
+
if (err.errorCode === "RESOURCE_DOES_NOT_EXIST")
|
|
334
|
+
return true;
|
|
335
|
+
}
|
|
336
|
+
if (err instanceof HttpError && err.code === 404)
|
|
337
|
+
return true;
|
|
338
|
+
if (err instanceof Error && /does not exist/i.test(err.message))
|
|
339
|
+
return true;
|
|
340
|
+
return false;
|
|
341
|
+
}
|
|
342
|
+
function buildAskGenieTool(deps) {
|
|
343
|
+
const { spaceId, client, writer, signal, resultSets, cacheKey } = deps;
|
|
344
|
+
return createTool({
|
|
345
|
+
id: "ask_genie",
|
|
295
346
|
description: stringUtils.toDescription `
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
347
|
+
Send ONE focused natural-language question to the Genie
|
|
348
|
+
space and wait for the turn to complete. Returns the final
|
|
349
|
+
\`GenieMessage\` plus, when the turn ran SQL, the rows of
|
|
350
|
+
the resulting query as \`query_result_data\`. The
|
|
351
|
+
\`statement_id\` you reference in your final \`data\`
|
|
352
|
+
blocks lives at \`message.query_result.statement_id\` (or
|
|
353
|
+
the first attachment's \`query.statement_id\`). Wire
|
|
354
|
+
events (status, thinking, sql) stream to the user
|
|
355
|
+
automatically. Call multiple times to gather different
|
|
356
|
+
angles before composing the final response.
|
|
299
357
|
`,
|
|
300
|
-
inputSchema:
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
358
|
+
inputSchema: z.object({
|
|
359
|
+
question: z.string().min(1, "question is required"),
|
|
360
|
+
}),
|
|
361
|
+
outputSchema: z.object({
|
|
362
|
+
message: z.custom(),
|
|
363
|
+
query_result_data: z.custom().optional(),
|
|
364
|
+
}),
|
|
365
|
+
execute: async ({ question }, ctxRaw) => {
|
|
366
|
+
const ctx = ctxRaw;
|
|
367
|
+
const requestContext = ctx?.requestContext;
|
|
368
|
+
if (!requestContext) {
|
|
369
|
+
// Mastra always passes a `RequestContext` to tools when the
|
|
370
|
+
// parent agent received one. The outer Genie tool insists on
|
|
371
|
+
// it (it sources the user from there), so this only fires
|
|
372
|
+
// if a misconfigured caller invokes `ask_genie` directly.
|
|
373
|
+
throw new Error("ask_genie: missing requestContext (parent agent must propagate it)");
|
|
374
|
+
}
|
|
375
|
+
// Bounce placeholder / no-op questions BEFORE spending a Genie
|
|
376
|
+
// round-trip on them. The structuring pass occasionally pads
|
|
377
|
+
// out the tool loop with a fake `ask_genie("noop")` call,
|
|
378
|
+
// which Genie answers with "Your request 'noop' does not
|
|
379
|
+
// relate to..." - useless noise that shows up in the UI and
|
|
380
|
+
// eats one of the workspace's 5 questions/minute. Returning
|
|
381
|
+
// a clear error here surfaces the issue to the agent loop so
|
|
382
|
+
// the model corrects course instead of wasting a turn.
|
|
383
|
+
const trimmed = question.trim();
|
|
384
|
+
if (trimmed.length === 0 || PLACEHOLDER_QUESTIONS.has(trimmed.toLowerCase())) {
|
|
385
|
+
throw new Error(`ask_genie: refusing placeholder question "${question}" - ` +
|
|
386
|
+
`call ask_genie only with a real natural-language question, ` +
|
|
387
|
+
`or skip the call entirely`);
|
|
388
|
+
}
|
|
389
|
+
// Single turn of `genieEventChat`. Hoisted into a closure so
|
|
390
|
+
// we can re-run it after evicting a stale `conversation_id`
|
|
391
|
+
// without duplicating the event-loop body.
|
|
392
|
+
const runTurn = async () => {
|
|
393
|
+
const seedConversationId = readContextConversationId(requestContext, spaceId);
|
|
394
|
+
let finalMessage;
|
|
395
|
+
for await (const event of genieEventChat(spaceId, question, {
|
|
396
|
+
workspaceClient: client,
|
|
397
|
+
...(seedConversationId ? { conversationId: seedConversationId } : {}),
|
|
398
|
+
...(signal ? { context: signal } : {}),
|
|
399
|
+
})) {
|
|
400
|
+
await safeWrite(writer, event);
|
|
401
|
+
// Wire events come in two flavors: the lifecycle `message`
|
|
402
|
+
// event embeds the raw `GenieMessage` (read its
|
|
403
|
+
// `conversation_id`), and the rest carry a flat
|
|
404
|
+
// `conversation_id` field at the top level. The terminal
|
|
405
|
+
// `result` event also carries the final `GenieMessage`
|
|
406
|
+
// inline so we can capture the snapshot without re-reading
|
|
407
|
+
// a buffered `message` event.
|
|
408
|
+
const eventConversationId = event.type === "message"
|
|
409
|
+
? event.message.conversation_id
|
|
410
|
+
: event.conversation_id;
|
|
411
|
+
if (eventConversationId) {
|
|
412
|
+
writeContextConversationId(requestContext, spaceId, eventConversationId);
|
|
413
|
+
}
|
|
414
|
+
if (event.type === "result") {
|
|
415
|
+
finalMessage = event.message;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
if (!finalMessage) {
|
|
419
|
+
throw new Error("Genie turn ended without a result event");
|
|
420
|
+
}
|
|
421
|
+
return finalMessage;
|
|
422
|
+
};
|
|
423
|
+
let finalMessage;
|
|
424
|
+
try {
|
|
425
|
+
finalMessage = await runTurn();
|
|
426
|
+
}
|
|
427
|
+
catch (err) {
|
|
428
|
+
// The seeded `conversation_id` was rejected by Genie - most
|
|
429
|
+
// commonly because it was deleted upstream, expired past
|
|
430
|
+
// Databricks' (undocumented) lifetime, or was minted in a
|
|
431
|
+
// different space. Drop both the cached id AND the
|
|
432
|
+
// per-request value so the retry calls `startConversation`,
|
|
433
|
+
// and try once more. Only retry when we *had* a seeded id -
|
|
434
|
+
// a fresh call that 404s shouldn't loop.
|
|
435
|
+
const seeded = readContextConversationId(requestContext, spaceId);
|
|
436
|
+
if (seeded && isConversationGoneError(err)) {
|
|
437
|
+
log.warn("conversation-cache:stale, resetting", {
|
|
438
|
+
spaceId,
|
|
439
|
+
conversationId: seeded,
|
|
440
|
+
error: err instanceof Error ? err.message : String(err),
|
|
441
|
+
});
|
|
442
|
+
await evictCachedConversationId(cacheKey);
|
|
443
|
+
writeContextConversationId(requestContext, spaceId, undefined);
|
|
444
|
+
finalMessage = await runTurn();
|
|
445
|
+
}
|
|
446
|
+
else {
|
|
447
|
+
throw err;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
// Refresh the cache entry on every successful turn. Re-setting
|
|
451
|
+
// the same key both persists newly-minted ids (cache miss path)
|
|
452
|
+
// and extends the TTL on active conversations (sliding window).
|
|
453
|
+
await saveCachedConversationId(cacheKey, readContextConversationId(requestContext, spaceId));
|
|
454
|
+
const statementId = extractStatementId(finalMessage);
|
|
455
|
+
let queryResultData;
|
|
456
|
+
if (statementId) {
|
|
457
|
+
const data = await fetchStatementData(client, statementId, signal);
|
|
458
|
+
if (data.rowCount > 0) {
|
|
459
|
+
queryResultData = data;
|
|
460
|
+
// Stash with this ask's `message_id` so the outer chart
|
|
461
|
+
// loop can stamp downstream `chart` events with the
|
|
462
|
+
// same id the wire events carry - keeps the chart in
|
|
463
|
+
// the same `message_id` pill bucket on the host UI.
|
|
464
|
+
resultSets.set(statementId, {
|
|
465
|
+
data,
|
|
466
|
+
messageId: finalMessage.message_id,
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
return {
|
|
471
|
+
message: finalMessage,
|
|
472
|
+
...(queryResultData ? { query_result_data: queryResultData } : {}),
|
|
473
|
+
};
|
|
474
|
+
},
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
function buildSpaceDescriptionTool(deps) {
|
|
478
|
+
const { spaceId, client, signal } = deps;
|
|
479
|
+
return createTool({
|
|
480
|
+
id: "get_space_description",
|
|
481
|
+
description: stringUtils.toDescription `
|
|
482
|
+
Return the Genie space's title, description, and warehouse id.
|
|
483
|
+
Cheap. Call once at the start of a turn to ground yourself
|
|
484
|
+
in what data the space covers.
|
|
485
|
+
`,
|
|
486
|
+
inputSchema: z.object({}),
|
|
487
|
+
outputSchema: z.object({
|
|
488
|
+
spaceId: z.string(),
|
|
489
|
+
title: z.string().optional(),
|
|
490
|
+
description: z.string().optional(),
|
|
491
|
+
warehouseId: z.string().optional(),
|
|
492
|
+
}),
|
|
493
|
+
execute: async () => {
|
|
494
|
+
const ctx = signal ? apiUtils.toContext(signal) : undefined;
|
|
495
|
+
const space = await client.genie.getSpace({ space_id: spaceId }, ctx);
|
|
496
|
+
return {
|
|
497
|
+
spaceId,
|
|
498
|
+
...(space.title ? { title: space.title } : {}),
|
|
499
|
+
...(space.description ? { description: space.description } : {}),
|
|
500
|
+
...(space.warehouse_id ? { warehouseId: space.warehouse_id } : {}),
|
|
501
|
+
};
|
|
502
|
+
},
|
|
503
|
+
});
|
|
504
|
+
}
|
|
505
|
+
function buildSpaceSerializedTool(deps) {
|
|
506
|
+
const { spaceId, client, signal } = deps;
|
|
507
|
+
return createTool({
|
|
508
|
+
id: "get_space_serialized",
|
|
509
|
+
description: stringUtils.toDescription `
|
|
510
|
+
Return the full \`GenieSpace\` JSON for this space. Use only
|
|
511
|
+
when you need exact column / table identifiers
|
|
512
|
+
\`get_space_description\` doesn't expose. Larger payload, so
|
|
513
|
+
prefer the description tool when it's enough.
|
|
514
|
+
`,
|
|
515
|
+
inputSchema: z.object({}),
|
|
516
|
+
outputSchema: z.object({ space: z.unknown() }),
|
|
517
|
+
execute: async () => {
|
|
518
|
+
const ctx = signal ? apiUtils.toContext(signal) : undefined;
|
|
519
|
+
const space = await client.genie.getSpace({ space_id: spaceId }, ctx);
|
|
520
|
+
return { space };
|
|
304
521
|
},
|
|
305
522
|
});
|
|
306
|
-
return tools;
|
|
307
523
|
}
|
|
524
|
+
/* --------------------------- inner agent --------------------------- */
|
|
525
|
+
const AGENT_INSTRUCTIONS = stringUtils.toDescription `
|
|
526
|
+
You orchestrate a Databricks Genie space. For every user
|
|
527
|
+
question:
|
|
528
|
+
|
|
529
|
+
1. Optionally call \`get_space_description\` to ground; reach
|
|
530
|
+
for \`get_space_serialized\` only when you need exact
|
|
531
|
+
column / table names the description doesn't expose.
|
|
532
|
+
2. Decompose the question into focused sub-questions (one per
|
|
533
|
+
distinct metric / dimension / time window) and call
|
|
534
|
+
\`ask_genie\` once per sub-question. Two to six calls is
|
|
535
|
+
typical for a non-trivial question; one call is fine when
|
|
536
|
+
the question is genuinely atomic.
|
|
537
|
+
3. Each \`ask_genie\` call returns the terminal
|
|
538
|
+
\`GenieMessage\`. When the turn ran SQL it also returns
|
|
539
|
+
\`query_result_data\` - the actual rows. The matching
|
|
540
|
+
\`statement_id\` is on
|
|
541
|
+
\`message.query_result.statement_id\` (or the first
|
|
542
|
+
attachment's \`query.statement_id\`). You will reference
|
|
543
|
+
that exact id in your final \`data\` blocks.
|
|
544
|
+
4. Produce a final structured summary as an ordered array
|
|
545
|
+
interleaving \`text\` paragraphs with \`data\` blocks.
|
|
546
|
+
INTERLEAVE: prose first, then the \`data\` block it
|
|
547
|
+
interprets, then the next prose / data pair. Never dump
|
|
548
|
+
all data at the end.
|
|
549
|
+
5. For every \`data\` block, supply the exact
|
|
550
|
+
\`statement_id\` you saw on the \`ask_genie\` response. A
|
|
551
|
+
short \`description\` ("compare quarterly revenue across
|
|
552
|
+
regions", "highlight the steep drop after position 5")
|
|
553
|
+
biases the chart-planner's choice of visual. Do NOT pick
|
|
554
|
+
chart types or axis labels - the host wraps each \`data\`
|
|
555
|
+
block in a chart automatically.
|
|
556
|
+
6. Each \`data\` block should be followed by a short
|
|
557
|
+
\`text\` interpretation (deltas, anomalies, takeaways).
|
|
558
|
+
Don't paraphrase numbers the visualization will already
|
|
559
|
+
show. Skip openers / closers. Plain prose, hyphens (not em
|
|
560
|
+
/ en dashes), no emojis.
|
|
561
|
+
`;
|
|
308
562
|
/**
|
|
309
|
-
*
|
|
310
|
-
*
|
|
311
|
-
*
|
|
312
|
-
*
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
563
|
+
* Boundary schema for the inner agent's structured output. Two
|
|
564
|
+
* tagged shapes only - text or data. The wrapper maps these onto
|
|
565
|
+
* the shared {@link GenieSummaryItem} (`string` / `visualize`)
|
|
566
|
+
* after charting; we don't redefine GenieSummaryItem here.
|
|
567
|
+
*/
|
|
568
|
+
const agentSummarySchema = z.object({
|
|
569
|
+
summary: z.array(z.discriminatedUnion("type", [
|
|
570
|
+
z.object({
|
|
571
|
+
type: z.literal("text"),
|
|
572
|
+
text: z.string(),
|
|
573
|
+
}),
|
|
574
|
+
z.object({
|
|
575
|
+
type: z.literal("data"),
|
|
576
|
+
statementId: z.string(),
|
|
577
|
+
title: z.string().optional(),
|
|
578
|
+
description: z.string().optional(),
|
|
579
|
+
}),
|
|
580
|
+
])),
|
|
581
|
+
});
|
|
582
|
+
/**
|
|
583
|
+
* Build the calling agent's Genie tool. The returned Mastra tool
|
|
584
|
+
* runs end-to-end on each invocation:
|
|
329
585
|
*
|
|
330
|
-
*
|
|
331
|
-
*
|
|
332
|
-
*
|
|
333
|
-
*
|
|
334
|
-
* the
|
|
335
|
-
*
|
|
336
|
-
*
|
|
337
|
-
*
|
|
586
|
+
* 1. Pull the per-request `WorkspaceClient` off
|
|
587
|
+
* `ctx.requestContext` (stamped by `MastraServer` under
|
|
588
|
+
* {@link MASTRA_USER_KEY}) and emit a `started` writer
|
|
589
|
+
* event so the host UI shows progress immediately.
|
|
590
|
+
* 2. Spin up the inner Mastra agent + three tools, fresh per
|
|
591
|
+
* call so the row cache stays invocation-scoped.
|
|
592
|
+
* 3. Run the agent with `structuredOutput` against
|
|
593
|
+
* {@link agentSummarySchema}. Mastra's two-pass design keeps
|
|
594
|
+
* the inner loop tools-only (no `response_format`), so the
|
|
595
|
+
* Databricks Model Serving `response_format`+`tools`
|
|
596
|
+
* collision never fires.
|
|
597
|
+
* 4. Walk the returned `[text|data][]`, map `text` items to
|
|
598
|
+
* shared `GenieSummaryItem.string`, and chart every `data`
|
|
599
|
+
* item in parallel via {@link runChartPlanner} to a
|
|
600
|
+
* `GenieSummaryItem.visualize`. Items referencing a missing
|
|
601
|
+
* `statementId` are dropped with a warn log; chart-planner
|
|
602
|
+
* failures leave `dataset.chart` unset so the host UI falls
|
|
603
|
+
* back to a table.
|
|
338
604
|
*/
|
|
339
|
-
|
|
340
|
-
const { config,
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
for await (const event of stream) {
|
|
380
|
-
// Per-event raw payload for tuning the pill / answer pipeline
|
|
381
|
-
// against real Genie traffic. At `info` (the default) this is
|
|
382
|
-
// discarded for free; flip `LOG_LEVEL=debug` to see every
|
|
383
|
-
// raw wire event before the switch routes it through writer
|
|
384
|
-
// and DrainResult.
|
|
385
|
-
log.debug("event", { type: event.type, payload: event });
|
|
386
|
-
switch (event.type) {
|
|
387
|
-
case "message_start":
|
|
388
|
-
conversationId = event.conversationId;
|
|
389
|
-
await emit({
|
|
390
|
-
kind: "started",
|
|
391
|
-
conversationId: event.conversationId,
|
|
392
|
-
messageId: event.messageId,
|
|
393
|
-
spaceId: event.spaceId,
|
|
394
|
-
});
|
|
395
|
-
break;
|
|
396
|
-
case "status":
|
|
397
|
-
if (event.status === lastStatus)
|
|
398
|
-
break;
|
|
399
|
-
lastStatus = event.status;
|
|
400
|
-
await emit({
|
|
401
|
-
kind: "status",
|
|
402
|
-
status: event.status,
|
|
403
|
-
label: humanizeGenieStatus(event.status),
|
|
404
|
-
});
|
|
405
|
-
break;
|
|
406
|
-
case "query_result": {
|
|
407
|
-
const columns = (event.data?.manifest?.schema?.columns ?? []).map((c) => c.name);
|
|
408
|
-
const dataArray = (event.data?.result?.data_array ?? []);
|
|
409
|
-
const rows = genieRowsToObjects(columns, dataArray);
|
|
410
|
-
const scratch = getScratch(event.statementId);
|
|
411
|
-
// emitChartWithPlanning emits the dataset event immediately
|
|
412
|
-
// and kicks off the chart-planner agent in the background.
|
|
413
|
-
// It returns the chartId synchronously; the plannerPromise
|
|
414
|
-
// is awaited at end-of-stream so chart work shows up under
|
|
415
|
-
// this tool's trace span.
|
|
416
|
-
const { chartId, plannerPromise } = await emitChartWithPlanning({
|
|
417
|
-
...(writer ? { writer } : {}),
|
|
418
|
-
config,
|
|
419
|
-
...(requestContext ? { requestContext } : {}),
|
|
420
|
-
title: scratch.title ?? `Genie query`,
|
|
421
|
-
...(scratch.description ? { description: scratch.description } : {}),
|
|
422
|
-
data: rows,
|
|
423
|
-
});
|
|
424
|
-
scratch.chartId = chartId;
|
|
425
|
-
scratch.columns = columns;
|
|
426
|
-
scratch.rowCount = rows.length;
|
|
427
|
-
plannerPromises.push(plannerPromise);
|
|
428
|
-
log.debug("query_result", {
|
|
429
|
-
statementId: event.statementId,
|
|
430
|
-
chartId,
|
|
431
|
-
rows: rows.length,
|
|
432
|
-
columns,
|
|
433
|
-
});
|
|
434
|
-
break;
|
|
605
|
+
export function createGenieTool(opts) {
|
|
606
|
+
const { spaceId, config, toolId = "genie", toolDescription = stringUtils.toDescription `
|
|
607
|
+
Ask a question about the Databricks Genie space.
|
|
608
|
+
|
|
609
|
+
Returns \`{ summary: SummaryItem[] }\` where each item is
|
|
610
|
+
one of:
|
|
611
|
+
|
|
612
|
+
- \`{ type: "string", text }\` - prose to weave into your
|
|
613
|
+
reply verbatim or paraphrase.
|
|
614
|
+
- \`{ type: "visualize", statementId, title?, description?,
|
|
615
|
+
dataset: { data: { columns, rows, rowCount },
|
|
616
|
+
chart?: { chartId, chartType } } }\` - a chartable result
|
|
617
|
+
set. When \`dataset.chart\` is present the chart is ALREADY
|
|
618
|
+
rendered and queued for inline display; embed the marker
|
|
619
|
+
\`[[chart:<chartId>]]\` on its own line at the position
|
|
620
|
+
you want it to appear and the host UI drops the rendered
|
|
621
|
+
chart in. Re-use the chartId verbatim - do NOT call
|
|
622
|
+
\`render_data\` for the same dataset (it would render the
|
|
623
|
+
same chart a second time and stall your stream). Only
|
|
624
|
+
fall back to \`render_data\` when \`dataset.chart\` is
|
|
625
|
+
missing (chart-planner failed) AND you genuinely need a
|
|
626
|
+
picture; otherwise present the data inline as prose or a
|
|
627
|
+
short table.
|
|
628
|
+
`, maxSteps = DEFAULT_MAX_STEPS, } = opts;
|
|
629
|
+
return createTool({
|
|
630
|
+
id: toolId,
|
|
631
|
+
description: toolDescription,
|
|
632
|
+
inputSchema: z.object({
|
|
633
|
+
question: z.string().describe(stringUtils.toDescription `
|
|
634
|
+
Natural-language question about the data in this Genie
|
|
635
|
+
space. Phrase it from the user's perspective; the agent
|
|
636
|
+
decomposes it internally.
|
|
637
|
+
`),
|
|
638
|
+
}),
|
|
639
|
+
outputSchema: z.custom(),
|
|
640
|
+
execute: async (input, ctxRaw) => {
|
|
641
|
+
const ctx = ctxRaw;
|
|
642
|
+
const requestContext = ctx?.requestContext;
|
|
643
|
+
if (!requestContext) {
|
|
644
|
+
throw new Error("genie: missing requestContext (MastraServer must stamp MASTRA_USER_KEY)");
|
|
435
645
|
}
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
646
|
+
const user = requestContext.get(MASTRA_USER_KEY);
|
|
647
|
+
if (!user) {
|
|
648
|
+
throw new Error("genie: no user on requestContext (MASTRA_USER_KEY not set)");
|
|
649
|
+
}
|
|
650
|
+
const client = user.executionContext.client;
|
|
651
|
+
const writer = ctx?.writer;
|
|
652
|
+
const signal = ctx?.abortSignal;
|
|
653
|
+
const threadId = requestContext.get(MASTRA_THREAD_ID_KEY);
|
|
654
|
+
// Fire the lifecycle `started` event before any LLM /
|
|
655
|
+
// network round-trip so the host UI can pop a "Thinking..."
|
|
656
|
+
// pill the instant the model decides to delegate. The wire
|
|
657
|
+
// `conversation_id` / `message_id` aren't known yet (no
|
|
658
|
+
// Genie call has been made) and ride as `undefined` -
|
|
659
|
+
// subscribers that need them watch the later
|
|
660
|
+
// `message` / `result` wire events for the real ids.
|
|
661
|
+
const startedEvent = {
|
|
662
|
+
type: "started",
|
|
663
|
+
spaceId,
|
|
664
|
+
content: input.question,
|
|
665
|
+
};
|
|
666
|
+
await safeWrite(writer, startedEvent);
|
|
667
|
+
const resultSets = new Map();
|
|
668
|
+
// Seed the active Genie `conversation_id` onto
|
|
669
|
+
// `RequestContext` from AppKit's `CacheManager` when a Mastra
|
|
670
|
+
// `threadId` is present so multi-turn chats reuse the same
|
|
671
|
+
// Genie conversation (and Genie's accumulated context) across
|
|
672
|
+
// separate tool invocations. The same `RequestContext` flows
|
|
673
|
+
// to the inner `ask_genie` tool via Mastra, which reads and
|
|
674
|
+
// updates the same slot as Genie hands out / rotates ids.
|
|
675
|
+
// Cache misses, threads without memory, and unhealthy cache
|
|
676
|
+
// storage all leave the slot unset, which makes `ask_genie`
|
|
677
|
+
// call `startConversation` and mint a fresh id (then cache
|
|
678
|
+
// it).
|
|
679
|
+
const cacheKey = await conversationCacheKey(spaceId, threadId);
|
|
680
|
+
const cachedConversationId = await readCachedConversationId(cacheKey);
|
|
681
|
+
if (cachedConversationId) {
|
|
682
|
+
writeContextConversationId(requestContext, spaceId, cachedConversationId);
|
|
683
|
+
}
|
|
684
|
+
const innerDeps = {
|
|
685
|
+
spaceId,
|
|
686
|
+
client,
|
|
687
|
+
...(writer ? { writer } : {}),
|
|
688
|
+
...(signal ? { signal } : {}),
|
|
689
|
+
resultSets,
|
|
690
|
+
...(cacheKey ? { cacheKey } : {}),
|
|
691
|
+
};
|
|
692
|
+
const tools = {
|
|
693
|
+
ask_genie: buildAskGenieTool(innerDeps),
|
|
694
|
+
get_space_description: buildSpaceDescriptionTool({
|
|
695
|
+
spaceId,
|
|
696
|
+
client,
|
|
697
|
+
...(signal ? { signal } : {}),
|
|
698
|
+
}),
|
|
699
|
+
get_space_serialized: buildSpaceSerializedTool({
|
|
700
|
+
spaceId,
|
|
701
|
+
client,
|
|
702
|
+
...(signal ? { signal } : {}),
|
|
703
|
+
}),
|
|
704
|
+
};
|
|
705
|
+
// Resolve the model config once for this request so we can
|
|
706
|
+
// share it with the structuring pass below. The agent's
|
|
707
|
+
// `model` field accepts a function form for per-request
|
|
708
|
+
// resolution, but `structuredOutput.model` requires a
|
|
709
|
+
// static `MastraModelConfig`, and we need both to be on
|
|
710
|
+
// the same Databricks endpoint with the same OBO-scoped
|
|
711
|
+
// headers. Calling `buildModel` here (inside `execute`)
|
|
712
|
+
// keeps user scoping correct because `requestContext`
|
|
713
|
+
// already reflects the active request's user.
|
|
714
|
+
const resolvedModel = await buildModel(config, requestContext);
|
|
715
|
+
const agent = new Agent({
|
|
716
|
+
id: `genie__${spaceId}`,
|
|
717
|
+
name: `Genie (${spaceId})`,
|
|
718
|
+
description: stringUtils.toDescription `
|
|
719
|
+
Inner orchestrator for the "${spaceId}" Genie space.
|
|
720
|
+
Asks Genie one focused sub-question at a time and
|
|
721
|
+
returns an interleaved [text|data] summary.
|
|
722
|
+
`,
|
|
723
|
+
instructions: AGENT_INSTRUCTIONS,
|
|
724
|
+
model: resolvedModel,
|
|
725
|
+
tools,
|
|
726
|
+
});
|
|
727
|
+
// Mastra's `structuredOutput` operates in one of two modes
|
|
728
|
+
// based on whether `model` is set:
|
|
729
|
+
// - "direct" (no model) -> the schema is enforced
|
|
730
|
+
// in the SAME LLM call as
|
|
731
|
+
// the agent loop, by
|
|
732
|
+
// adding `response_format`
|
|
733
|
+
// alongside `tools`.
|
|
734
|
+
// Databricks Model Serving
|
|
735
|
+
// rejects that combination
|
|
736
|
+
// with `INVALID_PARAMETER_VALUE:
|
|
737
|
+
// Cannot specify both
|
|
738
|
+
// response_format and tools
|
|
739
|
+
// in the same request.`
|
|
740
|
+
// - "processor" (model passed) -> the main loop carries
|
|
741
|
+
// tools and NO
|
|
742
|
+
// `response_format`; a
|
|
743
|
+
// separate, tool-free
|
|
744
|
+
// structuring agent
|
|
745
|
+
// re-prompts the model
|
|
746
|
+
// with `response_format`
|
|
747
|
+
// to coerce the agent's
|
|
748
|
+
// final text into the
|
|
749
|
+
// schema.
|
|
750
|
+
// We use "processor" mode but ALSO set
|
|
751
|
+
// `jsonPromptInjection: true`. Mastra's structuring agent
|
|
752
|
+
// calls `.stream(...)` under the hood, and Databricks Model
|
|
753
|
+
// Serving rejects `response_format` together with streaming
|
|
754
|
+
// (`INVALID_PARAMETER_VALUE: Structured output is not
|
|
755
|
+
// currently supported with streaming.`). Prompt injection
|
|
756
|
+
// sidesteps that by embedding the JSON Schema in the
|
|
757
|
+
// structuring agent's system prompt instead of sending
|
|
758
|
+
// `response_format`. `errorStrategy: "warn"` keeps a
|
|
759
|
+
// structuring failure from escaping as an unhandled
|
|
760
|
+
// promise rejection: it logs and leaves `result.object`
|
|
761
|
+
// undefined, which we surface as a clean error in
|
|
762
|
+
// {@link GenieAgentResult}.
|
|
763
|
+
const agentResult = await agent.generate(input.question, {
|
|
764
|
+
requestContext,
|
|
765
|
+
maxSteps,
|
|
766
|
+
structuredOutput: {
|
|
767
|
+
schema: agentSummarySchema,
|
|
768
|
+
model: resolvedModel,
|
|
769
|
+
jsonPromptInjection: true,
|
|
770
|
+
errorStrategy: "warn",
|
|
771
|
+
},
|
|
772
|
+
...(signal ? { abortSignal: signal } : {}),
|
|
773
|
+
});
|
|
774
|
+
const submission = agentResult.object;
|
|
775
|
+
if (!submission) {
|
|
776
|
+
const message = "Genie agent returned no structured summary";
|
|
777
|
+
log.warn("agent:no-summary", { spaceId });
|
|
778
|
+
const finalConversationId = readContextConversationId(requestContext, spaceId);
|
|
779
|
+
return {
|
|
780
|
+
spaceId,
|
|
781
|
+
summary: [],
|
|
782
|
+
...(finalConversationId ? { conversationId: finalConversationId } : {}),
|
|
783
|
+
error: message,
|
|
784
|
+
};
|
|
785
|
+
}
|
|
786
|
+
// Lifecycle hook: the agent + structuring pass are done.
|
|
787
|
+
// Emit one `summary` event with the structured-item counts
|
|
788
|
+
// so the host UI can transition from "thinking" to
|
|
789
|
+
// "charting" and seed N chart skeletons before the
|
|
790
|
+
// per-chart `chart` events arrive. We can't fire this
|
|
791
|
+
// EARLIER (i.e. when the structuring pass starts) because
|
|
792
|
+
// Mastra runs the inner loop + structuring pass together
|
|
793
|
+
// inside `agent.generate(...)` with no observable boundary
|
|
794
|
+
// between them.
|
|
795
|
+
const textItemCount = submission.summary.filter((i) => i.type === "text").length;
|
|
796
|
+
const dataItemCount = submission.summary.length - textItemCount;
|
|
797
|
+
const summaryEvent = {
|
|
798
|
+
type: "summary",
|
|
799
|
+
spaceId,
|
|
800
|
+
items: submission.summary.length,
|
|
801
|
+
textItems: textItemCount,
|
|
802
|
+
dataItems: dataItemCount,
|
|
803
|
+
};
|
|
804
|
+
await safeWrite(writer, summaryEvent);
|
|
805
|
+
// Chart every `data` item in parallel; map `text` items to
|
|
806
|
+
// the shared `string` summary variant verbatim. Missing
|
|
807
|
+
// statement ids are dropped (the agent referenced something
|
|
808
|
+
// that never came back from `ask_genie`), planner failures
|
|
809
|
+
// leave `dataset.chart` unset so the host UI falls back to
|
|
810
|
+
// a table render. Each successfully planned chart pushes a
|
|
811
|
+
// `chart` writer event so the UI can fade in the rendered
|
|
812
|
+
// chart slot the moment its planner returns rather than
|
|
813
|
+
// waiting for the entire batch to finish.
|
|
814
|
+
const hydrated = await Promise.all(submission.summary.map(async (item) => {
|
|
815
|
+
if (item.type === "text") {
|
|
816
|
+
return { type: "string", text: item.text };
|
|
473
817
|
}
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
818
|
+
const entry = resultSets.get(item.statementId);
|
|
819
|
+
if (!entry) {
|
|
820
|
+
log.warn("data:missing-statement", {
|
|
821
|
+
statementId: item.statementId,
|
|
822
|
+
});
|
|
823
|
+
return undefined;
|
|
824
|
+
}
|
|
825
|
+
const { data, messageId } = entry;
|
|
826
|
+
let dataset = { data };
|
|
827
|
+
try {
|
|
828
|
+
const planned = await runChartPlanner({
|
|
829
|
+
config,
|
|
830
|
+
requestContext,
|
|
831
|
+
title: item.title ?? "Genie result",
|
|
832
|
+
...(item.description ? { description: item.description } : {}),
|
|
833
|
+
data: data.rows,
|
|
834
|
+
...(signal ? { signal } : {}),
|
|
835
|
+
});
|
|
836
|
+
const chartId = commonUtils.shortId();
|
|
837
|
+
// Slim chart reference for the LLM-bound result: just
|
|
838
|
+
// `chartId` + `chartType`. The full Echarts spec goes
|
|
839
|
+
// to the UI via the writer event AND into the
|
|
840
|
+
// request-scoped chart inventory below; the model
|
|
841
|
+
// only needs the id to place `[[chart:<id>]]`.
|
|
842
|
+
dataset = {
|
|
843
|
+
data,
|
|
844
|
+
chart: {
|
|
845
|
+
chartId,
|
|
846
|
+
chartType: planned.chartType,
|
|
847
|
+
},
|
|
848
|
+
};
|
|
849
|
+
const chartEvent = {
|
|
850
|
+
type: "chart",
|
|
851
|
+
chartId,
|
|
852
|
+
statementId: item.statementId,
|
|
853
|
+
messageId,
|
|
854
|
+
...(item.title ? { title: item.title } : {}),
|
|
855
|
+
...(item.description ? { description: item.description } : {}),
|
|
856
|
+
data: data.rows,
|
|
857
|
+
option: planned.option,
|
|
858
|
+
};
|
|
859
|
+
await safeWrite(writer, chartEvent);
|
|
860
|
+
// Stash the resolved chart on the per-request
|
|
861
|
+
// `RequestContext` so downstream code in the same
|
|
862
|
+
// request (output processors, follow-up tool calls,
|
|
863
|
+
// any post-run hook) can look up the full spec by
|
|
864
|
+
// `chartId` without re-fetching or re-planning.
|
|
865
|
+
recordChartInContext(requestContext, chartEvent);
|
|
866
|
+
}
|
|
867
|
+
catch (err) {
|
|
868
|
+
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
869
|
+
log.warn("chart:error", {
|
|
870
|
+
statementId: item.statementId,
|
|
871
|
+
messageId,
|
|
872
|
+
error: errorMessage,
|
|
873
|
+
});
|
|
874
|
+
// Surface the chart-planner failure as a writer event
|
|
875
|
+
// stamped with the same `messageId` the rest of this
|
|
876
|
+
// ask's wire events carry, so the host UI groups the
|
|
877
|
+
// failure into the same pill bucket and can surface
|
|
878
|
+
// a "couldn't render chart" note next to the table
|
|
879
|
+
// fallback instead of silently dropping the chart.
|
|
880
|
+
const errorEvent = {
|
|
881
|
+
type: "error",
|
|
882
|
+
spaceId,
|
|
883
|
+
messageId,
|
|
884
|
+
error: `chart-planner: ${errorMessage}`,
|
|
885
|
+
};
|
|
886
|
+
await safeWrite(writer, errorEvent);
|
|
887
|
+
}
|
|
888
|
+
return {
|
|
889
|
+
type: "visualize",
|
|
890
|
+
statementId: item.statementId,
|
|
891
|
+
...(item.title ? { title: item.title } : {}),
|
|
892
|
+
...(item.description ? { description: item.description } : {}),
|
|
893
|
+
dataset,
|
|
894
|
+
};
|
|
895
|
+
}));
|
|
896
|
+
const summary = hydrated.filter((x) => x !== undefined);
|
|
897
|
+
log.info("genie:done", {
|
|
898
|
+
spaceId,
|
|
899
|
+
items: summary.length,
|
|
900
|
+
statementsCharted: summary.filter((s) => s.type === "visualize" && s.dataset.chart).length,
|
|
901
|
+
});
|
|
902
|
+
const finalConversationId = readContextConversationId(requestContext, spaceId);
|
|
903
|
+
return {
|
|
904
|
+
spaceId,
|
|
905
|
+
summary,
|
|
906
|
+
...(finalConversationId ? { conversationId: finalConversationId } : {}),
|
|
907
|
+
};
|
|
908
|
+
},
|
|
516
909
|
});
|
|
517
|
-
return {
|
|
518
|
-
...(conversationId ? { conversationId } : {}),
|
|
519
|
-
...(genieAnswer ? { genieAnswer } : {}),
|
|
520
|
-
...(datasets.length > 0 ? { datasets } : {}),
|
|
521
|
-
...(suggestedFollowUps ? { suggestedFollowUps } : {}),
|
|
522
|
-
...(error ? { error } : {}),
|
|
523
|
-
};
|
|
524
910
|
}
|
|
911
|
+
/* --------------------- multi-alias surface --------------------- */
|
|
525
912
|
/**
|
|
526
|
-
*
|
|
527
|
-
*
|
|
528
|
-
*
|
|
529
|
-
*
|
|
530
|
-
* through verbatim. `null` becomes `null`.
|
|
913
|
+
* Default tool id for a wired Genie alias. The well-known
|
|
914
|
+
* `default` alias collapses to `genie`; every other alias gets a
|
|
915
|
+
* `genie_` prefix so multi-space registrations stay
|
|
916
|
+
* disambiguated.
|
|
531
917
|
*/
|
|
532
|
-
function
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
918
|
+
export function defaultGenieToolName(alias) {
|
|
919
|
+
if (alias === DEFAULT_GENIE_ALIAS)
|
|
920
|
+
return "genie";
|
|
921
|
+
return stringUtils.toIdentifierWithOptions({ distinct: true }, "genie", alias);
|
|
922
|
+
}
|
|
923
|
+
/**
|
|
924
|
+
* Normalize the {@link GenieSpacesConfig} record. Bare-string
|
|
925
|
+
* entries (`{ default: "01ef..." }`) get wrapped as
|
|
926
|
+
* `{ spaceId: "01ef..." }`; object entries pass through unchanged.
|
|
927
|
+
* `undefined` and empty-string values are dropped so callers can
|
|
928
|
+
* pass `process.env.X` directly (matches AppKit `genie()`'s
|
|
929
|
+
* defensive treatment of unset env vars).
|
|
930
|
+
*/
|
|
931
|
+
export function normalizeGenieSpaces(spaces) {
|
|
932
|
+
if (!spaces)
|
|
933
|
+
return {};
|
|
934
|
+
const out = {};
|
|
935
|
+
for (const [alias, value] of Object.entries(spaces)) {
|
|
936
|
+
if (value === undefined)
|
|
937
|
+
continue;
|
|
938
|
+
if (typeof value === "string") {
|
|
939
|
+
if (!value)
|
|
940
|
+
continue;
|
|
941
|
+
out[alias] = { spaceId: value };
|
|
942
|
+
continue;
|
|
943
|
+
}
|
|
944
|
+
if (!value.spaceId)
|
|
945
|
+
continue;
|
|
946
|
+
out[alias] = value;
|
|
541
947
|
}
|
|
542
948
|
return out;
|
|
543
949
|
}
|
|
544
|
-
/**
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
950
|
+
/**
|
|
951
|
+
* Discover Genie space aliases from every supported source and
|
|
952
|
+
* merge them into a single record. Precedence (highest first):
|
|
953
|
+
*
|
|
954
|
+
* 1. {@link MastraPluginConfig.genieSpaces} on the `mastra(...)`
|
|
955
|
+
* call. Explicit Mastra wiring always wins so users can
|
|
956
|
+
* override AppKit's defaults per-agent.
|
|
957
|
+
* 2. AppKit `genie({ spaces: { ... } })` plugin instance. Lets
|
|
958
|
+
* users keep using the existing AppKit config format
|
|
959
|
+
* (`genie({ spaces: { sales: "...", ops: "..." } })`)
|
|
960
|
+
* without restating the same record on the Mastra plugin.
|
|
961
|
+
* Read off the live plugin instance via a structural cast
|
|
962
|
+
* since `Plugin.config` is TS-protected (not runtime-private).
|
|
963
|
+
* 3. `DATABRICKS_GENIE_SPACE_ID` env var (registered under the
|
|
964
|
+
* well-known `default` alias). Matches the AppKit `genie()`
|
|
965
|
+
* plugin's fallback behavior so a bare `mastra()` + `genie()`
|
|
966
|
+
* pair just works.
|
|
967
|
+
*
|
|
968
|
+
* Aliases collide cleanly: a higher-precedence source's value
|
|
969
|
+
* replaces a lower one's wholesale. Sources that contribute zero
|
|
970
|
+
* aliases (or contribute only `undefined` / empty entries) are
|
|
971
|
+
* silently ignored.
|
|
972
|
+
*/
|
|
973
|
+
export function resolveGenieSpaces(config, context) {
|
|
974
|
+
const merged = {};
|
|
975
|
+
// Source 3 (lowest precedence): env var.
|
|
976
|
+
const envSpaceId = process.env["DATABRICKS_GENIE_SPACE_ID"];
|
|
977
|
+
if (envSpaceId) {
|
|
978
|
+
merged[DEFAULT_GENIE_ALIAS] = { spaceId: envSpaceId };
|
|
554
979
|
}
|
|
555
|
-
|
|
980
|
+
// Source 2: AppKit `genie()` plugin instance config. Use a
|
|
981
|
+
// structural cast - `Plugin.config` is `protected` in TS only,
|
|
982
|
+
// and the runtime layout is plain object property access.
|
|
983
|
+
const geniePlugin = appkitUtils.instance(context, genie);
|
|
984
|
+
if (geniePlugin) {
|
|
985
|
+
const pluginSpaces = geniePlugin
|
|
986
|
+
.config?.spaces;
|
|
987
|
+
if (pluginSpaces) {
|
|
988
|
+
Object.assign(merged, normalizeGenieSpaces(pluginSpaces));
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
// Source 1 (highest precedence): explicit Mastra wiring.
|
|
992
|
+
if (config.genieSpaces) {
|
|
993
|
+
Object.assign(merged, normalizeGenieSpaces(config.genieSpaces));
|
|
994
|
+
}
|
|
995
|
+
return merged;
|
|
556
996
|
}
|
|
557
997
|
/**
|
|
558
|
-
*
|
|
559
|
-
*
|
|
560
|
-
*
|
|
561
|
-
*
|
|
562
|
-
* of the AppKit default (which does one blocking call per tool with no
|
|
563
|
-
* mid-flight events).
|
|
564
|
-
*
|
|
565
|
-
* The returned `toolkit()` reads alias names off the plugin's
|
|
566
|
-
* `getAgentTools()` registry (each entry is `${alias}.sendMessage` or
|
|
567
|
-
* `${alias}.getConversation`), then mints one `sendMessage` tool per
|
|
568
|
-
* alias plus a shared `getConversation`. `sendMessage` / `getConversation`
|
|
569
|
-
* are bound back to the plugin instance so they keep their `this`
|
|
570
|
-
* (they are class methods, not free functions).
|
|
998
|
+
* Build one Mastra tool per configured Genie space. Each tool is
|
|
999
|
+
* a thin {@link createGenieTool} wrapper with the alias-derived
|
|
1000
|
+
* id and a hint-flavored description so the calling LLM knows
|
|
1001
|
+
* which space covers what data.
|
|
571
1002
|
*
|
|
572
|
-
*
|
|
573
|
-
*
|
|
574
|
-
*
|
|
1003
|
+
* Returns a record keyed by tool id, ready to spread into an
|
|
1004
|
+
* `Agent`'s `tools` map (or surfaced via
|
|
1005
|
+
* `plugins.genie?.toolkit()`).
|
|
575
1006
|
*/
|
|
576
|
-
export function
|
|
1007
|
+
export function buildGenieTools(opts) {
|
|
1008
|
+
const normalized = normalizeGenieSpaces(opts.spaces);
|
|
1009
|
+
const tools = {};
|
|
1010
|
+
for (const [alias, space] of Object.entries(normalized)) {
|
|
1011
|
+
const id = defaultGenieToolName(alias);
|
|
1012
|
+
const toolDescription = stringUtils.toDescription `
|
|
1013
|
+
Delegate a natural-language data question to the
|
|
1014
|
+
Databricks Genie space "${alias}"${space.hint ? ` (${space.hint})` : ""}.
|
|
1015
|
+
Returns an ordered (text | dataset)[] summary the host UI
|
|
1016
|
+
renders inline; datasets carry the rows and a
|
|
1017
|
+
pre-rendered Echarts spec when the chart-planner
|
|
1018
|
+
succeeded. Progress events (status, SQL, row counts,
|
|
1019
|
+
charts) stream to the UI automatically.
|
|
1020
|
+
`;
|
|
1021
|
+
tools[id] = createGenieTool({
|
|
1022
|
+
spaceId: space.spaceId,
|
|
1023
|
+
config: opts.config,
|
|
1024
|
+
toolId: id,
|
|
1025
|
+
toolDescription,
|
|
1026
|
+
});
|
|
1027
|
+
}
|
|
1028
|
+
return tools;
|
|
1029
|
+
}
|
|
1030
|
+
/**
|
|
1031
|
+
* Plugin-toolkit adapter so the `plugins.genie?.toolkit()` lookup
|
|
1032
|
+
* inside an agent's `tools(plugins)` callback returns the
|
|
1033
|
+
* Genie agent-backed tools instead of throwing on missing plugin.
|
|
1034
|
+
* Mirrors AppKit's `PluginToolkitProvider` shape.
|
|
1035
|
+
*/
|
|
1036
|
+
export function buildGenieToolkitProvider(opts) {
|
|
577
1037
|
return {
|
|
578
1038
|
toolkit(_opts) {
|
|
579
|
-
|
|
580
|
-
return buildGenieTools({
|
|
581
|
-
aliases,
|
|
582
|
-
exports: {
|
|
583
|
-
sendMessage: plugin.sendMessage.bind(plugin),
|
|
584
|
-
getConversation: plugin.getConversation.bind(plugin),
|
|
585
|
-
},
|
|
586
|
-
config: opts.config,
|
|
587
|
-
});
|
|
1039
|
+
return buildGenieTools(opts);
|
|
588
1040
|
},
|
|
589
1041
|
};
|
|
590
1042
|
}
|
|
591
1043
|
/**
|
|
592
|
-
*
|
|
593
|
-
*
|
|
594
|
-
*
|
|
595
|
-
*
|
|
596
|
-
*
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
const dot = t.name.indexOf(".");
|
|
602
|
-
if (dot > 0)
|
|
603
|
-
aliases.add(t.name.slice(0, dot));
|
|
604
|
-
}
|
|
605
|
-
return [...aliases];
|
|
606
|
-
}
|
|
607
|
-
/**
|
|
608
|
-
* Convert raw Genie status codes (`FETCHING_METADATA`, `ASKING_AI`,
|
|
609
|
-
* `EXECUTING_QUERY`, `COMPLETED`, ...) into short, sentence-cased
|
|
610
|
-
* labels safe to drop straight into a UI pill. Unknown codes are
|
|
611
|
-
* lower-cased with underscores stripped so new states still render.
|
|
1044
|
+
* Returns `true` when at least one Genie space is reachable
|
|
1045
|
+
* through {@link resolveGenieSpaces} - either via
|
|
1046
|
+
* {@link MastraPluginConfig.genieSpaces}, the AppKit `genie()`
|
|
1047
|
+
* plugin instance, or the `DATABRICKS_GENIE_SPACE_ID` env var.
|
|
1048
|
+
*
|
|
1049
|
+
* Cheap to call from `resolveProvider` to short-circuit `genie`
|
|
1050
|
+
* lookups when nothing is wired, so the `plugins.genie` lookup
|
|
1051
|
+
* still resolves to `undefined` (matching AppKit's
|
|
1052
|
+
* absent-plugin semantics) when neither source is configured.
|
|
612
1053
|
*/
|
|
613
|
-
function
|
|
614
|
-
|
|
615
|
-
case "FETCHING_METADATA":
|
|
616
|
-
return "Fetching metadata";
|
|
617
|
-
case "ASKING_AI":
|
|
618
|
-
return "Asking Genie";
|
|
619
|
-
case "EXECUTING_QUERY":
|
|
620
|
-
return "Running SQL query";
|
|
621
|
-
case "COMPLETED":
|
|
622
|
-
return "Completed";
|
|
623
|
-
case "FAILED":
|
|
624
|
-
return "Failed";
|
|
625
|
-
default:
|
|
626
|
-
return [
|
|
627
|
-
...stringUtils.tokenizeWithOptions({ capitalize: true, lowerCase: true }, status),
|
|
628
|
-
].join(" ");
|
|
629
|
-
}
|
|
1054
|
+
export function hasAnyGenieSpaces(config, context) {
|
|
1055
|
+
return Object.keys(resolveGenieSpaces(config, context)).length > 0;
|
|
630
1056
|
}
|