@dbx-tools/genie-shared 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,519 @@
1
+ /**
2
+ * Wire-format zod schemas + types and high-level event vocabulary
3
+ * for `@dbx-tools/genie`.
4
+ *
5
+ * Two related layers live here:
6
+ *
7
+ * 1. Genie wire shapes derived from `@dbx-tools/sdk-shared`'s
8
+ * `dashboards.zod.ts` (which is regenerated from the upstream
9
+ * `@databricks/sdk-experimental` `apis/dashboards/model.d.ts`
10
+ * on every `bun run prebuild`). We extend the SDK schemas
11
+ * where Genie ships fields on the wire that the SDK doesn't
12
+ * currently type:
13
+ *
14
+ * - `GenieMessage.auto_regenerate_count: number` (stamped
15
+ * on every wire message, omitted from the SDK shape).
16
+ * - `GenieQueryAttachment.thoughts: GenieThought[]` (the
17
+ * streamed reasoning payload with `DESCRIPTION`,
18
+ * `DATA_SOURCING`, `STEPS`, and `UNDERSTANDING` thought
19
+ * kinds).
20
+ * - `GenieAttachment.attachment_type: AttachmentType`
21
+ * (a derived discriminator literal so callers can
22
+ * `switch (att.attachment_type)` instead of probing
23
+ * which sub-key is populated; populated by
24
+ * {@link tagAttachment}).
25
+ *
26
+ * 2. Event vocabulary for the high-level `genieEventChat`
27
+ * driver. Each event is a flat `z.object` with a `type`
28
+ * literal discriminator and snake_case payload fields hoisted
29
+ * to the top level (no `payload` wrapper). Events that share
30
+ * the attachment-scoped location use
31
+ * {@link GenieChatLocationSchema} as a base; events that
32
+ * don't carry an `attachment_id` (status, result) omit it.
33
+ * {@link GenieChatEventSchema} bundles the variants into one
34
+ * discriminated union.
35
+ *
36
+ * Pure types: no runtime imports beyond zod + the generated
37
+ * sdk-type schemas, no Node-only code, safe for browser bundles.
38
+ */
39
+
40
+ import {
41
+ genieAttachmentSchema as sdkGenieAttachmentSchema,
42
+ genieMessageSchema as sdkGenieMessageSchema,
43
+ genieQueryAttachmentSchema as sdkGenieQueryAttachmentSchema,
44
+ messageStatusSchema,
45
+ } from "@dbx-tools/sdk-shared";
46
+ import { stringUtils } from "@dbx-tools/shared";
47
+ import { z } from "zod";
48
+
49
+ /* ----------------------------- statuses ---------------------------- */
50
+
51
+ /** SDK `MessageStatus` enum value (e.g. `SUBMITTED`, `ASKING_AI`, `COMPLETED`). */
52
+ export type MessageStatus = z.infer<typeof messageStatusSchema>;
53
+
54
+ /* ----------------------------- thoughts ---------------------------- */
55
+
56
+ /**
57
+ * Genie's per-query "thoughts" surface. These appear on the
58
+ * `/messages/{id}` wire under `attachments[i].query.thoughts[]` but
59
+ * are not typed on the SDK's `GenieQueryAttachment` at v0.17.
60
+ *
61
+ * Known thought types observed in production polls:
62
+ *
63
+ * - `THOUGHT_TYPE_DESCRIPTION`: a one-paragraph restatement of
64
+ * what the user asked. The final `query.description` field on
65
+ * the attachment carries the same text.
66
+ * - `THOUGHT_TYPE_DATA_SOURCING`: markdown bullets of the
67
+ * fully-qualified `catalog.schema.table` sources Genie chose.
68
+ * - `THOUGHT_TYPE_STEPS`: the high-level plan Genie wrote before
69
+ * running SQL (one bullet per step).
70
+ * - `THOUGHT_TYPE_UNDERSTANDING`: ambiguity / interpretation
71
+ * notes (e.g. "'revenue' could be interpreted as gross,
72
+ * net, or recognized revenue...").
73
+ *
74
+ * Open at the type level (`| (string & {})`) so a new server-side
75
+ * thought type doesn't break compilation; the four known types
76
+ * still narrow correctly under `switch`.
77
+ */
78
+ export type GenieThoughtType =
79
+ | "THOUGHT_TYPE_DESCRIPTION"
80
+ | "THOUGHT_TYPE_DATA_SOURCING"
81
+ | "THOUGHT_TYPE_STEPS"
82
+ | "THOUGHT_TYPE_UNDERSTANDING"
83
+ | (string & {});
84
+
85
+ /**
86
+ * One reasoning step on a query attachment. See
87
+ * {@link GenieThoughtType} for the known `thought_type` values.
88
+ */
89
+ export const GenieThoughtSchema = z.object({
90
+ thought_type: z.custom<GenieThoughtType>((v) => typeof v === "string"),
91
+ content: z.string(),
92
+ });
93
+ export type GenieThought = z.infer<typeof GenieThoughtSchema>;
94
+
95
+ /* ----------------------- attachment discriminator ---------------------- */
96
+
97
+ /**
98
+ * Discriminator for what's inside a `GenieAttachment`. Genie only
99
+ * ever populates one of `query` / `text` / `suggested_questions`
100
+ * per attachment. Open via `(string & {})` so a new server-side
101
+ * type doesn't break compilation; the three known types still
102
+ * narrow correctly under `switch`.
103
+ *
104
+ * Lifted into the schema as the optional
105
+ * {@link GenieAttachmentSchema}.`attachment_type` field
106
+ * (populated by {@link tagAttachment}) so consumers can branch
107
+ * on a literal instead of probing which sub-object key is
108
+ * present. Also surfaced on {@link AttachmentEvent}'s payload as
109
+ * `attachment_type` (vs a bare `type`) to keep the field clear of
110
+ * the event-union discriminator key.
111
+ */
112
+ export const ATTACHMENT_TYPES = [
113
+ "query",
114
+ "text",
115
+ "suggested_questions",
116
+ ] as const satisfies readonly string[];
117
+ export type KnownAttachmentType = (typeof ATTACHMENT_TYPES)[number];
118
+ export type AttachmentType = KnownAttachmentType | (string & {});
119
+
120
+ /* ------------------- widened wire schemas + types ------------------ */
121
+
122
+ /**
123
+ * `GenieQueryAttachment` widened with the `thoughts[]` field the
124
+ * Genie wire exposes but the SDK doesn't currently type. Every
125
+ * other field (`description`, `query`, `statement_id`,
126
+ * `query_result_metadata`, `title`, `parameters`,
127
+ * `last_updated_timestamp`, `id`) flows through verbatim from
128
+ * the SDK schema.
129
+ */
130
+ export const GenieQueryAttachmentSchema = sdkGenieQueryAttachmentSchema.extend({
131
+ thoughts: z.array(GenieThoughtSchema).optional(),
132
+ });
133
+ export type GenieQueryAttachment = z.infer<typeof GenieQueryAttachmentSchema>;
134
+
135
+ /**
136
+ * `GenieAttachment` with:
137
+ *
138
+ * - `query` re-typed to the thoughts-aware
139
+ * {@link GenieQueryAttachmentSchema}.
140
+ * - `attachment_type` discriminator literal
141
+ * ({@link AttachmentType}) so consumers can `switch
142
+ * (att.attachment_type)` to narrow which sub-object is
143
+ * populated. Optional on the wire (Genie doesn't send it),
144
+ * but every attachment that flows through {@link
145
+ * tagAttachment} - including all of the ones
146
+ * `genieEventChat` emits - has it filled in.
147
+ *
148
+ * `attachment_id`, `text`, and `suggested_questions` pass through
149
+ * unchanged. `attachment_id` is genuinely optional on the wire:
150
+ * the first text attachment Genie emits per turn (the "main
151
+ * answer text") arrives with no id, only the follow-up text
152
+ * attachment gets one.
153
+ */
154
+ export const GenieAttachmentSchema = sdkGenieAttachmentSchema.extend({
155
+ query: GenieQueryAttachmentSchema.optional(),
156
+ attachment_type: z.custom<AttachmentType>((v) => typeof v === "string").optional(),
157
+ });
158
+ export type GenieAttachment = z.infer<typeof GenieAttachmentSchema>;
159
+
160
+ /**
161
+ * `GenieMessage` widened with:
162
+ *
163
+ * - `auto_regenerate_count`: number stamped on every wire
164
+ * message that the SDK type omits at v0.17.
165
+ * - `attachments`: re-typed to the local
166
+ * {@link GenieAttachmentSchema} so
167
+ * `attachment.query?.thoughts` (and the
168
+ * `attachment_type` discriminator) is reachable without a
169
+ * cast.
170
+ *
171
+ * Every other field (`id`, `space_id`, `conversation_id`,
172
+ * `user_id`, `created_timestamp`, `last_updated_timestamp`,
173
+ * `status`, `content`, `message_id`, `query_result`, `error`,
174
+ * `feedback`) passes through from the SDK schema.
175
+ */
176
+ export const GenieMessageSchema = sdkGenieMessageSchema.extend({
177
+ attachments: z.array(GenieAttachmentSchema).optional(),
178
+ auto_regenerate_count: z.number().optional(),
179
+ });
180
+ export type GenieMessage = z.infer<typeof GenieMessageSchema>;
181
+
182
+ /* ------------------------ terminal helpers ------------------------- */
183
+
184
+ /**
185
+ * Terminal Genie message statuses. The polling loop in
186
+ * `chat.ts` stops as soon as the latest message has one of these
187
+ * statuses.
188
+ */
189
+ export const TERMINAL_STATUSES = ["COMPLETED", "FAILED", "CANCELLED"] as const;
190
+ export type TerminalStatus = (typeof TERMINAL_STATUSES)[number];
191
+
192
+ /** Narrow `MessageStatus | undefined` to a {@link TerminalStatus}. */
193
+ export function isTerminalStatus(s: MessageStatus | undefined): s is TerminalStatus {
194
+ return s !== undefined && (TERMINAL_STATUSES as readonly string[]).includes(s);
195
+ }
196
+
197
+ /**
198
+ * Convert a raw Genie wire status (`FETCHING_METADATA`,
199
+ * `ASKING_AI`, `EXECUTING_QUERY`, ...) into a short, sentence-cased
200
+ * label safe to drop into a UI pill. Known states get a curated
201
+ * label; unknown states fall back to `stringUtils.tokenizeWithOptions`
202
+ * so new states still render cleanly without code changes.
203
+ *
204
+ * Pure (no Node-only deps), safe for browser bundles. Both the
205
+ * Genie agent (server) and any UI that subscribes to status events
206
+ * call this so labels stay in lock-step across the wire.
207
+ */
208
+ export function humanizeStatus(status: MessageStatus): string {
209
+ switch (status) {
210
+ case "FETCHING_METADATA":
211
+ return "Fetching metadata";
212
+ case "ASKING_AI":
213
+ return "Asking Genie";
214
+ case "EXECUTING_QUERY":
215
+ return "Running SQL query";
216
+ case "FILTERING_CONTEXT":
217
+ return "Filtering context";
218
+ case "PENDING_WAREHOUSE":
219
+ return "Waiting for warehouse";
220
+ case "COMPLETED":
221
+ return "Completed";
222
+ case "FAILED":
223
+ return "Failed";
224
+ case "CANCELLED":
225
+ return "Cancelled";
226
+ default:
227
+ return [
228
+ ...stringUtils.tokenizeWithOptions(
229
+ { capitalize: true, lowerCase: true },
230
+ status,
231
+ ),
232
+ ].join(" ");
233
+ }
234
+ }
235
+
236
+ /* ----------------------- attachment type helper ---------------------- */
237
+
238
+ /**
239
+ * Inspect a {@link GenieAttachment} and return the
240
+ * {@link AttachmentType} of payload it carries. Returns the first
241
+ * known sub-object that's present; if none of the known ones are
242
+ * populated, falls back to the first non-bookkeeping key
243
+ * (forward-compat for types we don't model yet), else `"unknown"`.
244
+ *
245
+ * Honors a pre-tagged `attachment_type` if one is already on the
246
+ * value (e.g. from a prior {@link tagAttachment} pass) so this is
247
+ * idempotent across re-detections.
248
+ */
249
+ export function detectAttachmentType(att: GenieAttachment): AttachmentType {
250
+ if (att.attachment_type) return att.attachment_type;
251
+ if (att.query) return "query";
252
+ if (att.text) return "text";
253
+ if (att.suggested_questions) return "suggested_questions";
254
+ for (const k of Object.keys(att)) {
255
+ if (k !== "attachment_id" && k !== "attachment_type") return k;
256
+ }
257
+ return "unknown";
258
+ }
259
+
260
+ /**
261
+ * Return a copy of `att` with `attachment_type` filled in from
262
+ * {@link detectAttachmentType}. Consumers that want a discriminator
263
+ * literal up-front (e.g. `switch (att.attachment_type)`) call this
264
+ * once when an attachment first arrives.
265
+ */
266
+ export function tagAttachment(att: GenieAttachment): GenieAttachment {
267
+ if (att.attachment_type) return att;
268
+ return { ...att, attachment_type: detectAttachmentType(att) };
269
+ }
270
+
271
+ /* ------------------------- GenieChat events ------------------------ */
272
+
273
+ /**
274
+ * Where on the wire an event was observed. Spread into every
275
+ * attachment-scoped event payload so subscribers can route, log,
276
+ * or correlate without re-walking the message.
277
+ *
278
+ * Fields:
279
+ * - `space_id`: the Genie space the conversation lives in.
280
+ * - `conversation_id`: conversation id once Genie has assigned one.
281
+ * - `message_id`: Genie message id for the active turn.
282
+ * - `attachment_id`: attachment the event came from. Optional
283
+ * because Genie sometimes emits an anonymous main-answer
284
+ * attachment without an id; those still get events, just with
285
+ * `attachment_id` undefined.
286
+ */
287
+ export const GenieChatLocationSchema = z.object({
288
+ space_id: z.string(),
289
+ conversation_id: z.string().optional(),
290
+ message_id: z.string().optional(),
291
+ attachment_id: z.string().optional(),
292
+ });
293
+ export type GenieChatLocation = z.infer<typeof GenieChatLocationSchema>;
294
+
295
+ /**
296
+ * Lifecycle event: the question this turn is asking Genie. Fires
297
+ * once per `genieEventChat` call, the first time the underlying
298
+ * `genieChat` loop yields a `GenieMessage`. Carries the prompt
299
+ * text Genie echoed back on `message.content` and the assigned
300
+ * `message_id` so subscribers can group every subsequent event
301
+ * for this turn under one stable key. `conversation_id` is
302
+ * populated for both opening and follow-up turns (Genie assigns
303
+ * it on `startConversation`).
304
+ *
305
+ * Deferred (instead of fired synchronously on entry) so the
306
+ * `message_id` is available when the event lands - that one round
307
+ * trip costs ~200ms but lets UIs render question / thinking /
308
+ * query / text as a single grouped block per Genie call instead
309
+ * of a flat stream.
310
+ *
311
+ * `attachment_id` is intentionally absent - questions are turn
312
+ * scoped, not attachment scoped.
313
+ */
314
+ export const QuestionEventSchema = GenieChatLocationSchema.omit({
315
+ attachment_id: true,
316
+ }).extend({
317
+ type: z.literal("question"),
318
+ content: z.string(),
319
+ });
320
+ export type QuestionEvent = z.infer<typeof QuestionEventSchema>;
321
+
322
+ /**
323
+ * Lifecycle event: raw `GenieMessage` snapshot for the active
324
+ * turn. Fires once per poll yield. The full message shape
325
+ * (including the widened thought / regen / attachment-type
326
+ * fields) is exposed inline as `message`; consumers narrow on
327
+ * `event.type === "message"` and reach for `event.message`.
328
+ */
329
+ export const MessageEventSchema = z.object({
330
+ type: z.literal("message"),
331
+ message: GenieMessageSchema,
332
+ });
333
+ export type MessageEvent = z.infer<typeof MessageEventSchema>;
334
+
335
+ /**
336
+ * Top-level `message.status` transitioned. Fires for every
337
+ * distinct status seen on the wire (e.g. `SUBMITTED` ->
338
+ * `FILTERING_CONTEXT` -> `ASKING_AI` -> `PENDING_WAREHOUSE` ->
339
+ * `ASKING_AI` -> `COMPLETED`).
340
+ */
341
+ export const StatusEventSchema = GenieChatLocationSchema.omit({
342
+ attachment_id: true,
343
+ }).extend({
344
+ type: z.literal("status"),
345
+ status: messageStatusSchema,
346
+ previous_status: messageStatusSchema.optional(),
347
+ });
348
+ export type StatusEvent = z.infer<typeof StatusEventSchema>;
349
+
350
+ /**
351
+ * A new attachment slot appeared in `message.attachments[]`.
352
+ * Fires exactly once per attachment (matched by `attachment_id`,
353
+ * positionally for anonymous attachments) the first time we see
354
+ * it. The slot's payload kind lands in `attachment_type` so the
355
+ * outer `type` discriminator stays unambiguous.
356
+ */
357
+ export const AttachmentEventSchema = GenieChatLocationSchema.extend({
358
+ type: z.literal("attachment"),
359
+ index: z.number(),
360
+ attachment_type: z.custom<AttachmentType>((v) => typeof v === "string"),
361
+ });
362
+ export type AttachmentEvent = z.infer<typeof AttachmentEventSchema>;
363
+
364
+ /**
365
+ * A new reasoning step appeared on a query attachment
366
+ * (`attachments[i].query.thoughts[i]`). Deduplicated per
367
+ * `(thought_type, content)` tuple within an attachment so
368
+ * subscribers don't see the same thought multiple times - Genie
369
+ * sometimes mutates existing thought slots in place, so the diff
370
+ * is value-based rather than positional.
371
+ */
372
+ export const ThinkingEventSchema = GenieChatLocationSchema.extend({
373
+ type: z.literal("thinking"),
374
+ text: z.string(),
375
+ thought_type: z.custom<GenieThoughtType>((v) => typeof v === "string"),
376
+ });
377
+ export type ThinkingEvent = z.infer<typeof ThinkingEventSchema>;
378
+
379
+ /**
380
+ * A text-attachment `content` field appeared or changed
381
+ * (`attachments[i].text.content`). Fires whenever the snapshot
382
+ * value differs from the previous one for the same attachment.
383
+ */
384
+ export const TextEventSchema = GenieChatLocationSchema.extend({
385
+ type: z.literal("text"),
386
+ text: z.string(),
387
+ });
388
+ export type TextEvent = z.infer<typeof TextEventSchema>;
389
+
390
+ /**
391
+ * SQL was finalized on a query attachment
392
+ * (`attachments[i].query.query`). Fires once when the SQL string
393
+ * transitions from undefined to defined, and again if Genie ever
394
+ * rewrites it. `title` and `description` are denormalised off the
395
+ * attachment's `query.title` / `query.description` so consumers
396
+ * can label the SQL pill without re-walking the message.
397
+ */
398
+ export const QueryEventSchema = GenieChatLocationSchema.extend({
399
+ type: z.literal("query"),
400
+ sql: z.string(),
401
+ title: z.string().optional(),
402
+ description: z.string().optional(),
403
+ });
404
+ export type QueryEvent = z.infer<typeof QueryEventSchema>;
405
+
406
+ /**
407
+ * SQL was submitted to a SQL warehouse and a statement id was
408
+ * assigned (`attachments[i].query.statement_id`). Fires when the
409
+ * statement id transitions from undefined to defined - this is the
410
+ * point at which `client.statementExecution.getStatement({ statement_id })`
411
+ * becomes a valid call.
412
+ */
413
+ export const StatementEventSchema = GenieChatLocationSchema.extend({
414
+ type: z.literal("statement"),
415
+ statement_id: z.string(),
416
+ });
417
+ export type StatementEvent = z.infer<typeof StatementEventSchema>;
418
+
419
+ /**
420
+ * Row count for a query attachment's result changed
421
+ * (`attachments[i].query.query_result_metadata.row_count`). Fires
422
+ * on every change, including the initial `undefined -> 0` and the
423
+ * later `0 -> N` once the warehouse finishes execution.
424
+ */
425
+ export const RowsEventSchema = GenieChatLocationSchema.extend({
426
+ type: z.literal("rows"),
427
+ row_count: z.number(),
428
+ previous_row_count: z.number().optional(),
429
+ statement_id: z.string().optional(),
430
+ });
431
+ export type RowsEvent = z.infer<typeof RowsEventSchema>;
432
+
433
+ /**
434
+ * Genie produced a follow-up suggested-questions list
435
+ * (`attachments[i].suggested_questions.questions[]`). Fires once
436
+ * when the list appears, and again if Genie rewrites it.
437
+ */
438
+ export const SuggestedQuestionsEventSchema = GenieChatLocationSchema.extend({
439
+ type: z.literal("suggested_questions"),
440
+ questions: z.array(z.string()),
441
+ });
442
+ export type SuggestedQuestionsEvent = z.infer<typeof SuggestedQuestionsEventSchema>;
443
+
444
+ /**
445
+ * The active turn reached a terminal status. Always fires once
446
+ * per turn (immediately after the terminal `message` event).
447
+ * Carries the final `GenieMessage` snapshot inline so subscribers
448
+ * don't need to keep their own copy of the last message.
449
+ */
450
+ export const ResultEventSchema = GenieChatLocationSchema.omit({
451
+ attachment_id: true,
452
+ }).extend({
453
+ type: z.literal("result"),
454
+ status: z.enum(TERMINAL_STATUSES),
455
+ message: GenieMessageSchema,
456
+ });
457
+ export type ResultEvent = z.infer<typeof ResultEventSchema>;
458
+
459
+ /**
460
+ * Discriminated union yielded by `genieEventChat`. Each variant
461
+ * is a single flat object with `type` as the discriminator and
462
+ * the payload fields hoisted directly to the top level - no
463
+ * `payload` wrapper. Consumers narrow on `type` and read fields
464
+ * inline:
465
+ *
466
+ * @example
467
+ * for await (const event of genieEventChat(spaceId, "Top stores?")) {
468
+ * switch (event.type) {
469
+ * case "thinking":
470
+ * console.log(event.thought_type, event.text);
471
+ * break;
472
+ * case "result":
473
+ * console.log("done:", event.status);
474
+ * break;
475
+ * }
476
+ * }
477
+ *
478
+ * Stream order per turn:
479
+ *
480
+ * 1. `question` (synchronous, before the first SDK call)
481
+ * carrying the prompt this turn sent to Genie.
482
+ * 2. `message` for every poll yield (raw `GenieMessage` on
483
+ * `event.message`).
484
+ * 3. Any derived events the snapshot diff produced (`status`,
485
+ * `attachment`, `thinking`, `text`, `query`, `statement`,
486
+ * `rows`, `suggested_questions`) in that fixed order.
487
+ * 4. On the terminal snapshot, a final `result` event.
488
+ *
489
+ * Errors propagate via the generator throwing (`try`/`catch` the
490
+ * `for await`), not via an `error` variant on this union.
491
+ */
492
+ export const GenieChatEventSchema = z.discriminatedUnion("type", [
493
+ QuestionEventSchema,
494
+ MessageEventSchema,
495
+ StatusEventSchema,
496
+ AttachmentEventSchema,
497
+ ThinkingEventSchema,
498
+ TextEventSchema,
499
+ QueryEventSchema,
500
+ StatementEventSchema,
501
+ RowsEventSchema,
502
+ SuggestedQuestionsEventSchema,
503
+ ResultEventSchema,
504
+ ]);
505
+ export type GenieChatEvent = z.infer<typeof GenieChatEventSchema>;
506
+
507
+ /** Discriminator type for {@link GenieChatEvent}. */
508
+ export type GenieChatEventType = GenieChatEvent["type"];
509
+
510
+ /**
511
+ * Field set for a given {@link GenieChatEventType} - the variant
512
+ * with the `type` discriminator stripped. Used by detectors in
513
+ * `event.ts` so each detector returns just the payload fields and
514
+ * the orchestrator stamps `type` at yield time.
515
+ */
516
+ export type GenieChatEventFields<T extends GenieChatEventType> = Omit<
517
+ Extract<GenieChatEvent, { type: T }>,
518
+ "type"
519
+ >;