@dbx-tools/appkit-mastra 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/genie.ts CHANGED
@@ -11,14 +11,17 @@
11
11
  * upstream change in `@databricks/appkit` flows in automatically.
12
12
  *
13
13
  * As Genie streams its long-running events (`FETCHING_METADATA` →
14
- * `ASKING_AI` → `EXECUTING_QUERY` → `COMPLETED`, plus SQL queries and
15
- * row data in `message_result.attachments` / `query_result`), the tool
16
- * forwards a normalised {@link GenieProgress} discriminated union out
17
- * through `ctx.writer` so the client can render incremental feedback
18
- * (status pill, SQL code block, row count) while the LLM still sees a
19
- * single clean final payload.
14
+ * `ASKING_AI` → `EXECUTING_QUERY` → `COMPLETED`, plus SQL text and
15
+ * follow-ups in `message_result.attachments`), the tool forwards a
16
+ * normalised {@link GenieProgress} discriminated union out through
17
+ * `ctx.writer` so the client can render an incremental loading pill.
18
+ * Row payloads from `query_result` are intentionally discarded - the
19
+ * LLM never sees rows, and charts come from the separate
20
+ * `render_data` tool when the model decides one is useful.
20
21
  */
21
22
 
23
+ import { randomUUID } from "node:crypto";
24
+
22
25
  import { genie } from "@databricks/appkit";
23
26
  import { stringUtils } from "@dbx-tools/appkit-shared";
24
27
  import { createTool } from "@mastra/core/tools";
@@ -42,14 +45,109 @@ export type GenieStreamEvent =
42
45
  /** Conversation history returned by `genie.exports().getConversation`. */
43
46
  export type GenieConversation = Awaited<ReturnType<GenieExports["getConversation"]>>;
44
47
 
45
- type GenieMessage = Extract<GenieStreamEvent, { type: "message_result" }>["message"];
46
- type GenieStatement = Extract<GenieStreamEvent, { type: "query_result" }>["data"];
48
+ /**
49
+ * Per-dataset metadata surfaced to the LLM. The actual rows are
50
+ * dispatched separately as a `kind: "chart"` writer event so the
51
+ * model never has the rows in its context (token cost stays flat
52
+ * regardless of dataset size). The model uses `chartId` to
53
+ * reference the chart inline via the `[[chart:<chartId>]]` marker.
54
+ */
55
+ const datasetSchema = z.object({
56
+ chartId: z.string().describe(stringUtils.toDescription`
57
+ Short id (8 hex chars) for the chart-render slot the host UI
58
+ has staged for this dataset. Embed
59
+ \`[[chart:<chartId>]]\` on its own line in your reply at the
60
+ position you want the chart to appear; the client renders it
61
+ inline. Do not paraphrase the dataset's rows in prose - the
62
+ chart is the rendering.
63
+ `),
64
+ title: z.string().optional().describe(stringUtils.toDescription`
65
+ Genie's own title for the SQL that produced this dataset.
66
+ Useful as a label when you reference the chart in prose.
67
+ `),
68
+ description: z.string().optional().describe(stringUtils.toDescription`
69
+ Genie's prose description of the SQL, if any.
70
+ `),
71
+ columns: z.array(z.string()).describe(stringUtils.toDescription`
72
+ Column names in display order. Use these when describing what
73
+ is being charted (e.g. "trend of fill_rate over date").
74
+ `),
75
+ rowCount: z.number().describe(stringUtils.toDescription`
76
+ Total rows in this dataset. Mention only if it adds context
77
+ (e.g. "across the last 90 days").
78
+ `),
79
+ sql: z
80
+ .string()
81
+ .optional()
82
+ .describe(stringUtils.toDescription`
83
+ SQL Genie generated and executed. The host UI shows this on
84
+ demand; you do not need to repeat it.
85
+ `),
86
+ });
87
+
88
+ /**
89
+ * Top-level output schema returned to the LLM from a Genie tool
90
+ * call. The `datasets` array is intentionally metadata-only - row
91
+ * data rides a writer event the host UI consumes directly and is
92
+ * not in the model's context.
93
+ */
94
+ const genieToolOutputSchema = z.object({
95
+ conversationId: z
96
+ .string()
97
+ .optional()
98
+ .describe(stringUtils.toDescription`
99
+ Pass back on the next call to continue the same Genie thread.
100
+ `),
101
+ genieAnswer: z
102
+ .string()
103
+ .optional()
104
+ .describe(stringUtils.toDescription`
105
+ Genie's natural-language answer to the question. Pass this
106
+ through to the user (verbatim, or as the basis of your
107
+ reply). Genie may have run multiple SQL queries and tools to
108
+ produce this; the full text is the answer.
109
+ `),
110
+ datasets: z
111
+ .array(datasetSchema)
112
+ .optional()
113
+ .describe(stringUtils.toDescription`
114
+ Datasets Genie produced for this turn (one per executed SQL
115
+ statement). Each entry is metadata only; the rows are
116
+ streamed to the host UI out-of-band. To render any of these
117
+ as a chart inline in your reply, embed
118
+ \`[[chart:<chartId>]]\` where you want the chart to appear.
119
+ Do not paraphrase the rows - the chart is what the user
120
+ should see; your prose should add interpretation
121
+ (highlights, deltas, anomalies) around the chart.
122
+ `),
123
+ suggestedFollowUps: z
124
+ .array(z.string())
125
+ .optional()
126
+ .describe(stringUtils.toDescription`
127
+ Follow-up question suggestions Genie produced. The host UI
128
+ renders these as clickable buttons; you do not need to list
129
+ them in your reply.
130
+ `),
131
+ error: z
132
+ .string()
133
+ .optional()
134
+ .describe(stringUtils.toDescription`
135
+ Genie-side error message if the request failed.
136
+ `),
137
+ });
138
+
139
+ type DrainResult = z.infer<typeof genieToolOutputSchema>;
140
+ type DatasetMeta = z.infer<typeof datasetSchema> & { statementId: string };
47
141
 
48
142
  /**
49
- * Normalised progress event surfaced to the UI as a Mastra `tool-output`
50
- * chunk. The discriminator (`kind`) keeps the union open for future
51
- * Genie features (charts, attachments, retries) without forcing the
52
- * client to know any Genie wire format.
143
+ * Normalised progress event surfaced to the UI as a Mastra
144
+ * `tool-output` chunk. Loading pill events (`started`, `status`,
145
+ * `sql`, `suggested`, `error`) are pure UI metadata and never reach
146
+ * the LLM. The `chart` variant carries the rows from a Genie SQL
147
+ * statement so the host UI's `<ChartSlot>` can render them inline
148
+ * via the same path as the `render_data` tool; the LLM still only
149
+ * sees the matching {@link datasetSchema} metadata in
150
+ * `genieAnswer`'s sibling `datasets[]` field.
53
151
  */
54
152
  export type GenieProgress =
55
153
  | { kind: "started"; conversationId: string; messageId: string; spaceId: string }
@@ -61,31 +159,131 @@ export type GenieProgress =
61
159
  description?: string;
62
160
  statementId?: string;
63
161
  }
64
- | { kind: "data"; rowCount: number; columns: string[] }
162
+ | {
163
+ kind: "chart";
164
+ chartId: string;
165
+ title: string;
166
+ description?: string;
167
+ data: Array<Record<string, unknown>>;
168
+ }
65
169
  | { kind: "text"; content: string }
66
170
  | { kind: "suggested"; questions: string[] }
67
171
  | { kind: "error"; error: string };
68
172
 
69
173
  const sendMessageSchema = z.object({
70
- content: z.string().describe("Natural-language question to send to the Genie space."),
174
+ content: z.string().describe(stringUtils.toDescription`
175
+ Natural-language question to send to the Genie space.
176
+ `),
71
177
  conversationId: z
72
178
  .string()
73
179
  .optional()
74
- .describe(
75
- "Optional Genie conversation id to continue an earlier thread. " +
76
- "Omit on the first call; pass the id returned in the previous " +
77
- "result's `conversationId` to follow up.",
78
- ),
180
+ .describe(stringUtils.toDescription`
181
+ Optional Genie conversation id to continue an earlier thread.
182
+ Omit on the first call; pass the id returned in the previous
183
+ result's \`conversationId\` to follow up.
184
+ `),
79
185
  });
80
186
 
81
187
  const getConversationSchema = z.object({
82
- alias: z
83
- .string()
84
- .describe(
85
- "Alias of the Genie space the conversation belongs to (matches the " +
86
- "key in the genie plugin's `spaces` config).",
87
- ),
88
- conversationId: z.string().describe("Genie conversation id whose history to fetch."),
188
+ alias: z.string().describe(stringUtils.toDescription`
189
+ Alias of the Genie space the conversation belongs to (matches
190
+ the key in the genie plugin's \`spaces\` config).
191
+ `),
192
+ conversationId: z.string().describe(stringUtils.toDescription`
193
+ Genie conversation id whose history to fetch.
194
+ `),
195
+ });
196
+
197
+ /** Per-attachment shape returned inside a stored Genie message. */
198
+ const genieAttachmentSchema = z.object({
199
+ attachmentId: z.string().optional().describe(stringUtils.toDescription`
200
+ Genie attachment id; internal bookkeeping.
201
+ `),
202
+ query: z
203
+ .object({
204
+ title: z.string().optional().describe(stringUtils.toDescription`
205
+ Genie's title for the SQL, if any.
206
+ `),
207
+ description: z.string().optional().describe(stringUtils.toDescription`
208
+ Genie's prose description of the SQL, if any.
209
+ `),
210
+ query: z.string().optional().describe(stringUtils.toDescription`
211
+ SQL Genie generated and executed.
212
+ `),
213
+ statementId: z.string().optional().describe(stringUtils.toDescription`
214
+ Statement-execution id; internal bookkeeping.
215
+ `),
216
+ })
217
+ .optional()
218
+ .describe(stringUtils.toDescription`
219
+ SQL Genie attached to this message, if it ran any.
220
+ `),
221
+ text: z
222
+ .object({
223
+ content: z.string().optional().describe(stringUtils.toDescription`
224
+ Genie's natural-language answer text for this attachment.
225
+ `),
226
+ })
227
+ .optional()
228
+ .describe(stringUtils.toDescription`
229
+ Per-attachment text content (independent of the message-level
230
+ \`content\` field).
231
+ `),
232
+ suggestedQuestions: z
233
+ .array(z.string())
234
+ .optional()
235
+ .describe(stringUtils.toDescription`
236
+ Follow-up question suggestions Genie generated for this turn.
237
+ `),
238
+ });
239
+
240
+ /** Single message inside a Genie conversation history page. */
241
+ const genieMessageSchema = z.object({
242
+ messageId: z.string().describe(stringUtils.toDescription`
243
+ Genie message id; internal bookkeeping.
244
+ `),
245
+ conversationId: z.string().describe(stringUtils.toDescription`
246
+ Conversation id this message belongs to.
247
+ `),
248
+ spaceId: z.string().describe(stringUtils.toDescription`
249
+ Genie space id this message belongs to.
250
+ `),
251
+ status: z.string().describe(stringUtils.toDescription`
252
+ Genie message status (\`COMPLETED\`, \`FAILED\`, etc.).
253
+ `),
254
+ content: z.string().describe(stringUtils.toDescription`
255
+ Outer message-level natural-language content Genie wrote.
256
+ `),
257
+ attachments: z
258
+ .array(genieAttachmentSchema)
259
+ .optional()
260
+ .describe(stringUtils.toDescription`
261
+ Attachments (SQL queries, text blocks, suggested follow-ups)
262
+ Genie produced for this message.
263
+ `),
264
+ error: z.string().optional().describe(stringUtils.toDescription`
265
+ Genie-side error attached to this message, if any.
266
+ `),
267
+ });
268
+
269
+ /**
270
+ * Output schema for the \`genie_get_conversation\` tool. Mirrors
271
+ * AppKit's \`GenieConversationHistoryResponse\` so the model gets a
272
+ * clear, typed view of prior messages instead of an opaque blob.
273
+ */
274
+ const genieGetConversationOutputSchema = z.object({
275
+ conversationId: z.string().describe(stringUtils.toDescription`
276
+ Conversation id you fetched.
277
+ `),
278
+ spaceId: z.string().describe(stringUtils.toDescription`
279
+ Genie space the conversation belongs to.
280
+ `),
281
+ messages: z.array(genieMessageSchema).describe(stringUtils.toDescription`
282
+ Messages in the conversation, oldest to newest. Each
283
+ \`message.content\` is Genie's natural-language answer for
284
+ that turn; attachments carry the SQL and follow-ups Genie
285
+ produced.
286
+ `),
89
287
  });
90
288
 
91
289
  /**
@@ -116,14 +314,29 @@ export function buildGenieTools(opts: {
116
314
  const id = defaultGenieToolName(alias);
117
315
  tools[id] = createTool({
118
316
  id,
119
- description:
120
- `Ask the Databricks Genie space "${alias}" a natural-language ` +
121
- "question. Genie translates the question to SQL, runs it against " +
122
- "the configured datasets, and returns a written answer plus any " +
123
- "SQL statements it executed. Returns `{ conversationId, content, " +
124
- "queries, ... }`; pass `conversationId` back in to follow up in " +
125
- "the same Genie thread.",
317
+ description: stringUtils.toDescription`
318
+ Ask the Databricks Genie space "${alias}" a single
319
+ natural-language question. Genie translates it to SQL,
320
+ runs the SQL against the configured datasets, and returns
321
+ \`genieAnswer\` (its prose answer) plus \`datasets[]\`
322
+ (one metadata entry per executed query). Each dataset
323
+ carries a short \`chartId\`; embed
324
+ \`[[chart:<chartId>]]\` on its own line in your reply at
325
+ the position where you want that data rendered as an
326
+ inline chart. Do not paraphrase row values - the chart is
327
+ the rendering. Add interpretation around the chart
328
+ (highlights, deltas, anomalies, takeaways) instead of
329
+ repeating numbers.
330
+
331
+ Calling this tool is expensive; issue **one** focused
332
+ question per user turn. If the first answer doesn't fit,
333
+ ask the user a clarifying question rather than
334
+ re-querying with rephrased intent. Prefer aggregated
335
+ questions over raw-row queries (e.g. ask for "monthly
336
+ averages" instead of "all rows" for time-series).
337
+ `,
126
338
  inputSchema: sendMessageSchema,
339
+ outputSchema: genieToolOutputSchema,
127
340
  execute: async ({ content, conversationId }, ctx) => {
128
341
  const stream = opts.exports.sendMessage(alias, content, conversationId, {
129
342
  signal: opts.signal,
@@ -135,11 +348,13 @@ export function buildGenieTools(opts: {
135
348
 
136
349
  tools.genie_get_conversation = createTool({
137
350
  id: "genie_get_conversation",
138
- description:
139
- "Fetch the full message history of a prior Genie conversation by id. " +
140
- "Use when the user references an earlier Genie thread by id, or to " +
141
- "inspect attachments / SQL from previous turns.",
351
+ description: stringUtils.toDescription`
352
+ Fetch the full message history of a prior Genie conversation
353
+ by id. Use when the user references an earlier Genie thread
354
+ by id, or to inspect attachments / SQL from previous turns.
355
+ `,
142
356
  inputSchema: getConversationSchema,
357
+ outputSchema: genieGetConversationOutputSchema,
143
358
  execute: async ({ alias, conversationId }) => {
144
359
  return opts.exports.getConversation(alias, conversationId, opts.signal);
145
360
  },
@@ -149,42 +364,53 @@ export function buildGenieTools(opts: {
149
364
  }
150
365
 
151
366
  /**
152
- * Drain the genie `sendMessage` AsyncGenerator into a flat result the
153
- * agent's calling LLM can reason about. Final assistant text is pulled
154
- * from the last `message_result`; SQL statements are extracted from
155
- * `query_result` events; conversation / message ids are surfaced so the
156
- * caller can pass `conversationId` back into a follow-up tool call.
367
+ * Drain the genie `sendMessage` AsyncGenerator into a flat result
368
+ * the agent's calling LLM can reason about, while forwarding
369
+ * progress and chart events to the host UI.
370
+ *
371
+ * Three streams of output happen in parallel:
157
372
  *
158
- * When a Mastra `writer` is passed (i.e. the tool runs inside an agent
159
- * stream), normalised {@link GenieProgress} events are pushed mid-flight
160
- * so the UI can show status changes, SQL, and row counts as they
161
- * happen instead of staring at a spinner for the full Genie round-trip.
373
+ * 1. {@link GenieProgress} pill events on the writer (`started`,
374
+ * `status`, `sql`, `suggested`, `error`) drive the loading
375
+ * pill in the chat bubble.
376
+ * 2. `kind: "chart"` events on the writer carry the row payload
377
+ * from each Genie SQL statement so the host UI's
378
+ * `<ChartSlot>` can render the chart inline at the marker
379
+ * position the model picked. The data never reaches the LLM.
380
+ * 3. The `DrainResult` returned to the LLM contains
381
+ * Genie's prose answer plus a `datasets[]` array of metadata
382
+ * (chartId, title, columns, rowCount, sql) the model uses to
383
+ * cite charts via `[[chart:<chartId>]]` markers.
384
+ *
385
+ * `query_result` and `message_result` events arrive in either
386
+ * order; we buffer per-statement metadata in
387
+ * {@link DatasetMeta} so each half can fill in the bits it knows
388
+ * about and we emit the chart event once `query_result` lands
389
+ * (with whatever title was already set, falling back to a
390
+ * generic label otherwise).
162
391
  */
163
392
  async function drainGenieStream(
164
393
  stream: AsyncGenerator<GenieStreamEvent>,
165
394
  writer?: ToolStream,
166
- ): Promise<{
167
- conversationId?: string;
168
- messageId?: string;
169
- spaceId?: string;
170
- status?: string;
171
- content?: string;
172
- attachments?: GenieMessage["attachments"];
173
- queries: { attachmentId: string; statementId: string; data: GenieStatement }[];
174
- error?: string;
175
- }> {
395
+ ): Promise<DrainResult> {
176
396
  let conversationId: string | undefined;
177
- let messageId: string | undefined;
178
- let spaceId: string | undefined;
179
- let status: string | undefined;
180
- let content: string | undefined;
181
- let attachments: GenieMessage["attachments"] | undefined;
397
+ let genieAnswer: string | undefined;
398
+ let suggestedFollowUps: string[] | undefined;
182
399
  let error: string | undefined;
183
- const queries: {
184
- attachmentId: string;
185
- statementId: string;
186
- data: GenieStatement;
187
- }[] = [];
400
+ // AppKit's `streamSendMessage` forwards every SDK `onProgress`
401
+ // callback verbatim - the same `EXECUTING_QUERY` can fire several
402
+ // times during a single poll loop. AppKit's other path,
403
+ // `streamGetMessage`, dedupes on the connector side; we mirror that
404
+ // behaviour here so the UI status pill doesn't flicker and we don't
405
+ // burn writer bytes on no-op events.
406
+ let lastStatus: string | undefined;
407
+
408
+ // Per-statement scratch keyed by Genie's `statementId`. Filled in
409
+ // by both `query_result` (rows + columns) and `message_result`
410
+ // (sql + title + description); the LLM-bound `datasets[]` is
411
+ // built from this at end-of-stream, and chart writer events fire
412
+ // when `query_result` lands.
413
+ const datasetsByStatementId = new Map<string, DatasetMeta>();
188
414
 
189
415
  // Best-effort progress emission. Awaited so the underlying agent
190
416
  // stream sees events in order; write failures are swallowed so a
@@ -199,20 +425,26 @@ async function drainGenieStream(
199
425
  };
200
426
 
201
427
  for await (const event of stream) {
428
+ // Uncomment to log every raw Genie wire event before the switch
429
+ // routes it through the writer / DrainResult. Useful when tuning
430
+ // the pill / answer pipeline against real Genie payloads (status
431
+ // codes, attachment shapes, query_result manifests Genie surfaces
432
+ // only on certain question types, etc.).
433
+ // eslint-disable-next-line no-console
434
+ // console.log("[mastra/genie] event", event);
202
435
  switch (event.type) {
203
436
  case "message_start":
204
437
  conversationId = event.conversationId;
205
- messageId = event.messageId;
206
- spaceId = event.spaceId;
207
438
  await emit({
208
439
  kind: "started",
209
- conversationId,
210
- messageId,
211
- spaceId,
440
+ conversationId: event.conversationId,
441
+ messageId: event.messageId,
442
+ spaceId: event.spaceId,
212
443
  });
213
444
  break;
214
445
  case "status":
215
- status = event.status;
446
+ if (event.status === lastStatus) break;
447
+ lastStatus = event.status;
216
448
  await emit({
217
449
  kind: "status",
218
450
  status: event.status,
@@ -220,36 +452,57 @@ async function drainGenieStream(
220
452
  });
221
453
  break;
222
454
  case "query_result": {
223
- queries.push({
224
- attachmentId: event.attachmentId,
225
- statementId: event.statementId,
226
- data: event.data,
227
- });
228
- const rowCount = event.data?.result?.data_array?.length ?? 0;
229
455
  const columns = (event.data?.manifest?.schema?.columns ?? []).map(
230
456
  (c) => c.name,
231
457
  );
232
- await emit({ kind: "data", rowCount, columns });
458
+ const dataArray = (event.data?.result?.data_array ?? []) as Array<
459
+ Array<string | null>
460
+ >;
461
+ const rows = genieRowsToObjects(columns, dataArray);
462
+ const meta = upsertDatasetMeta(datasetsByStatementId, event.statementId, {
463
+ columns,
464
+ rowCount: rows.length,
465
+ });
466
+ await emit({
467
+ kind: "chart",
468
+ chartId: meta.chartId,
469
+ title: meta.title ?? `Genie query`,
470
+ ...(meta.description ? { description: meta.description } : {}),
471
+ data: rows,
472
+ });
233
473
  break;
234
474
  }
235
475
  case "message_result":
236
- content = event.message.content;
237
- attachments = event.message.attachments;
238
- status = event.message.status;
239
- for (const attachment of attachments ?? []) {
240
- if (attachment.query?.query) {
476
+ genieAnswer = event.message.content;
477
+ for (const attachment of event.message.attachments ?? []) {
478
+ const sqlText = attachment.query?.query;
479
+ const stmtId = attachment.query?.statementId;
480
+ if (sqlText && stmtId) {
481
+ upsertDatasetMeta(datasetsByStatementId, stmtId, {
482
+ sql: sqlText,
483
+ ...(attachment.query?.title ? { title: attachment.query.title } : {}),
484
+ ...(attachment.query?.description
485
+ ? { description: attachment.query.description }
486
+ : {}),
487
+ });
488
+ }
489
+ if (sqlText) {
241
490
  await emit({
242
491
  kind: "sql",
243
- sql: attachment.query.query,
244
- title: attachment.query.title,
245
- description: attachment.query.description,
246
- statementId: attachment.query.statementId,
492
+ sql: sqlText,
493
+ title: attachment.query?.title,
494
+ description: attachment.query?.description,
495
+ statementId: stmtId,
247
496
  });
248
497
  }
249
498
  if (attachment.text?.content) {
250
499
  await emit({ kind: "text", content: attachment.text.content });
251
500
  }
252
501
  if (attachment.suggestedQuestions?.length) {
502
+ // Last attachment with suggestions wins (same merge rule
503
+ // the UI uses via `collectSuggestions`); keeping just one
504
+ // copy per turn caps token usage.
505
+ suggestedFollowUps = attachment.suggestedQuestions;
253
506
  await emit({
254
507
  kind: "suggested",
255
508
  questions: attachment.suggestedQuestions,
@@ -266,16 +519,92 @@ async function drainGenieStream(
266
519
  }
267
520
  }
268
521
 
522
+ // Strip statementId / row-only fields when handing the LLM the
523
+ // datasets - the model never references statementId, and the
524
+ // chartId is what the marker uses.
525
+ const datasets: Array<z.infer<typeof datasetSchema>> = [];
526
+ for (const meta of datasetsByStatementId.values()) {
527
+ datasets.push({
528
+ chartId: meta.chartId,
529
+ ...(meta.title ? { title: meta.title } : {}),
530
+ ...(meta.description ? { description: meta.description } : {}),
531
+ columns: meta.columns,
532
+ rowCount: meta.rowCount,
533
+ ...(meta.sql ? { sql: meta.sql } : {}),
534
+ });
535
+ }
536
+
269
537
  return {
270
- conversationId,
271
- messageId,
272
- spaceId,
273
- status,
274
- content,
275
- attachments,
276
- queries,
277
- error,
538
+ ...(conversationId ? { conversationId } : {}),
539
+ ...(genieAnswer ? { genieAnswer } : {}),
540
+ ...(datasets.length > 0 ? { datasets } : {}),
541
+ ...(suggestedFollowUps ? { suggestedFollowUps } : {}),
542
+ ...(error ? { error } : {}),
543
+ };
544
+ }
545
+
546
+ /**
547
+ * Get-or-create-and-merge the per-statement scratch entry. Both
548
+ * `query_result` and `message_result` paths call this with their
549
+ * partial bag of fields; the resulting record is the union of
550
+ * everything we know about that statement so far.
551
+ */
552
+ function upsertDatasetMeta(
553
+ store: Map<string, DatasetMeta>,
554
+ statementId: string,
555
+ patch: Partial<Omit<DatasetMeta, "chartId" | "statementId">>,
556
+ ): DatasetMeta {
557
+ const existing = store.get(statementId);
558
+ const merged: DatasetMeta = {
559
+ chartId: existing?.chartId ?? randomUUID().replace(/-/g, "").slice(0, 8),
560
+ statementId,
561
+ columns: patch.columns ?? existing?.columns ?? [],
562
+ rowCount: patch.rowCount ?? existing?.rowCount ?? 0,
563
+ ...(patch.title ?? existing?.title
564
+ ? { title: patch.title ?? existing?.title }
565
+ : {}),
566
+ ...(patch.description ?? existing?.description
567
+ ? { description: patch.description ?? existing?.description }
568
+ : {}),
569
+ ...(patch.sql ?? existing?.sql ? { sql: patch.sql ?? existing?.sql } : {}),
278
570
  };
571
+ store.set(statementId, merged);
572
+ return merged;
573
+ }
574
+
575
+ /**
576
+ * Convert Genie's `data_array` (column-positional `string | null`
577
+ * tuples) into plain JS row objects keyed by column name. Numeric
578
+ * strings are coerced to numbers so the chart-planner picks
579
+ * `value` axes instead of `category` axes; everything else passes
580
+ * through verbatim. `null` becomes `null`.
581
+ */
582
+ function genieRowsToObjects(
583
+ columns: ReadonlyArray<string>,
584
+ dataArray: ReadonlyArray<ReadonlyArray<string | null>>,
585
+ ): Array<Record<string, unknown>> {
586
+ const out: Array<Record<string, unknown>> = [];
587
+ for (const row of dataArray) {
588
+ const obj: Record<string, unknown> = {};
589
+ columns.forEach((col, i) => {
590
+ const cell = row[i] ?? null;
591
+ obj[col] = coerceCell(cell);
592
+ });
593
+ out.push(obj);
594
+ }
595
+ return out;
596
+ }
597
+
598
+ /** Best-effort numeric coercion for Genie's all-strings cells. */
599
+ function coerceCell(cell: string | null): unknown {
600
+ if (cell === null) return null;
601
+ // Anchored to keep `12.5px` / `123abc` as strings; only fully
602
+ // numeric values become JS numbers.
603
+ if (/^-?\d+(\.\d+)?$/.test(cell)) {
604
+ const n = Number(cell);
605
+ if (Number.isFinite(n)) return n;
606
+ }
607
+ return cell;
279
608
  }
280
609
 
281
610
  /**
@@ -349,6 +678,11 @@ function humanizeGenieStatus(status: string): string {
349
678
  case "FAILED":
350
679
  return "Failed";
351
680
  default:
352
- return status.toLowerCase().replace(/_/g, " ");
681
+ return [
682
+ ...stringUtils.tokenizeWithOptions(
683
+ { capitalize: true, lowerCase: true },
684
+ status,
685
+ ),
686
+ ].join(" ");
353
687
  }
354
688
  }