@dbx-tools/genie-shared 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,418 @@
1
+ /**
2
+ * Wire-format zod schemas + types and high-level event vocabulary
3
+ * for `@dbx-tools/genie`.
4
+ *
5
+ * Two related layers live here:
6
+ *
7
+ * 1. Genie wire shapes derived from `@dbx-tools/sdk-shared`'s
8
+ * `dashboards.zod.ts` (which is regenerated from the upstream
9
+ * `@databricks/sdk-experimental` `apis/dashboards/model.d.ts`
10
+ * on every `bun run prebuild`). We extend the SDK schemas
11
+ * where Genie ships fields on the wire that the SDK doesn't
12
+ * currently type:
13
+ *
14
+ * - `GenieMessage.auto_regenerate_count: number` (stamped
15
+ * on every wire message, omitted from the SDK shape).
16
+ * - `GenieQueryAttachment.thoughts: GenieThought[]` (the
17
+ * streamed reasoning payload with `DESCRIPTION`,
18
+ * `DATA_SOURCING`, `STEPS`, and `UNDERSTANDING` thought
19
+ * kinds).
20
+ * - `GenieAttachment.attachment_type: AttachmentType`
21
+ * (a derived discriminator literal so callers can
22
+ * `switch (att.attachment_type)` instead of probing
23
+ * which sub-key is populated; populated by
24
+ * {@link tagAttachment}).
25
+ *
26
+ * 2. Event vocabulary for the high-level `genieEventChat`
27
+ * driver. Each event is a flat `z.object` with a `type`
28
+ * literal discriminator and snake_case payload fields hoisted
29
+ * to the top level (no `payload` wrapper). Events that share
30
+ * the attachment-scoped location use
31
+ * {@link GenieChatLocationSchema} as a base; events that
32
+ * don't carry an `attachment_id` (status, result) omit it.
33
+ * {@link GenieChatEventSchema} bundles the variants into one
34
+ * discriminated union.
35
+ *
36
+ * Pure types: no runtime imports beyond zod + the generated
37
+ * sdk-type schemas, no Node-only code, safe for browser bundles.
38
+ */
39
+ import { genieAttachmentSchema as sdkGenieAttachmentSchema, genieMessageSchema as sdkGenieMessageSchema, genieQueryAttachmentSchema as sdkGenieQueryAttachmentSchema, messageStatusSchema, } from "@dbx-tools/sdk-shared";
40
+ import { stringUtils } from "@dbx-tools/shared";
41
+ import { z } from "zod";
42
+ /**
43
+ * One reasoning step on a query attachment. See
44
+ * {@link GenieThoughtType} for the known `thought_type` values.
45
+ */
46
+ export const GenieThoughtSchema = z.object({
47
+ thought_type: z.custom((v) => typeof v === "string"),
48
+ content: z.string(),
49
+ });
50
+ /* ----------------------- attachment discriminator ---------------------- */
51
+ /**
52
+ * Discriminator for what's inside a `GenieAttachment`. Genie only
53
+ * ever populates one of `query` / `text` / `suggested_questions`
54
+ * per attachment. Open via `(string & {})` so a new server-side
55
+ * type doesn't break compilation; the three known types still
56
+ * narrow correctly under `switch`.
57
+ *
58
+ * Lifted into the schema as the optional
59
+ * {@link GenieAttachmentSchema}.`attachment_type` field
60
+ * (populated by {@link tagAttachment}) so consumers can branch
61
+ * on a literal instead of probing which sub-object key is
62
+ * present. Also surfaced on {@link AttachmentEvent}'s payload as
63
+ * `attachment_type` (vs a bare `type`) to keep the field clear of
64
+ * the event-union discriminator key.
65
+ */
66
+ export const ATTACHMENT_TYPES = [
67
+ "query",
68
+ "text",
69
+ "suggested_questions",
70
+ ];
71
+ /* ------------------- widened wire schemas + types ------------------ */
72
+ /**
73
+ * `GenieQueryAttachment` widened with the `thoughts[]` field the
74
+ * Genie wire exposes but the SDK doesn't currently type. Every
75
+ * other field (`description`, `query`, `statement_id`,
76
+ * `query_result_metadata`, `title`, `parameters`,
77
+ * `last_updated_timestamp`, `id`) flows through verbatim from
78
+ * the SDK schema.
79
+ */
80
+ export const GenieQueryAttachmentSchema = sdkGenieQueryAttachmentSchema.extend({
81
+ thoughts: z.array(GenieThoughtSchema).optional(),
82
+ });
83
+ /**
84
+ * `GenieAttachment` with:
85
+ *
86
+ * - `query` re-typed to the thoughts-aware
87
+ * {@link GenieQueryAttachmentSchema}.
88
+ * - `attachment_type` discriminator literal
89
+ * ({@link AttachmentType}) so consumers can `switch
90
+ * (att.attachment_type)` to narrow which sub-object is
91
+ * populated. Optional on the wire (Genie doesn't send it),
92
+ * but every attachment that flows through {@link
93
+ * tagAttachment} - including all of the ones
94
+ * `genieEventChat` emits - has it filled in.
95
+ *
96
+ * `attachment_id`, `text`, and `suggested_questions` pass through
97
+ * unchanged. `attachment_id` is genuinely optional on the wire:
98
+ * the first text attachment Genie emits per turn (the "main
99
+ * answer text") arrives with no id, only the follow-up text
100
+ * attachment gets one.
101
+ */
102
+ export const GenieAttachmentSchema = sdkGenieAttachmentSchema.extend({
103
+ query: GenieQueryAttachmentSchema.optional(),
104
+ attachment_type: z.custom((v) => typeof v === "string").optional(),
105
+ });
106
+ /**
107
+ * `GenieMessage` widened with:
108
+ *
109
+ * - `auto_regenerate_count`: number stamped on every wire
110
+ * message that the SDK type omits at v0.17.
111
+ * - `attachments`: re-typed to the local
112
+ * {@link GenieAttachmentSchema} so
113
+ * `attachment.query?.thoughts` (and the
114
+ * `attachment_type` discriminator) is reachable without a
115
+ * cast.
116
+ *
117
+ * Every other field (`id`, `space_id`, `conversation_id`,
118
+ * `user_id`, `created_timestamp`, `last_updated_timestamp`,
119
+ * `status`, `content`, `message_id`, `query_result`, `error`,
120
+ * `feedback`) passes through from the SDK schema.
121
+ */
122
+ export const GenieMessageSchema = sdkGenieMessageSchema.extend({
123
+ attachments: z.array(GenieAttachmentSchema).optional(),
124
+ auto_regenerate_count: z.number().optional(),
125
+ });
126
+ /* ------------------------ terminal helpers ------------------------- */
127
+ /**
128
+ * Terminal Genie message statuses. The polling loop in
129
+ * `chat.ts` stops as soon as the latest message has one of these
130
+ * statuses.
131
+ */
132
+ export const TERMINAL_STATUSES = ["COMPLETED", "FAILED", "CANCELLED"];
133
+ /** Narrow `MessageStatus | undefined` to a {@link TerminalStatus}. */
134
+ export function isTerminalStatus(s) {
135
+ return s !== undefined && TERMINAL_STATUSES.includes(s);
136
+ }
137
+ /**
138
+ * Convert a raw Genie wire status (`FETCHING_METADATA`,
139
+ * `ASKING_AI`, `EXECUTING_QUERY`, ...) into a short, sentence-cased
140
+ * label safe to drop into a UI pill. Known states get a curated
141
+ * label; unknown states fall back to `stringUtils.tokenizeWithOptions`
142
+ * so new states still render cleanly without code changes.
143
+ *
144
+ * Pure (no Node-only deps), safe for browser bundles. Both the
145
+ * Genie agent (server) and any UI that subscribes to status events
146
+ * call this so labels stay in lock-step across the wire.
147
+ */
148
+ export function humanizeStatus(status) {
149
+ switch (status) {
150
+ case "FETCHING_METADATA":
151
+ return "Fetching metadata";
152
+ case "ASKING_AI":
153
+ return "Asking Genie";
154
+ case "EXECUTING_QUERY":
155
+ return "Running SQL query";
156
+ case "FILTERING_CONTEXT":
157
+ return "Filtering context";
158
+ case "PENDING_WAREHOUSE":
159
+ return "Waiting for warehouse";
160
+ case "COMPLETED":
161
+ return "Completed";
162
+ case "FAILED":
163
+ return "Failed";
164
+ case "CANCELLED":
165
+ return "Cancelled";
166
+ default:
167
+ return [
168
+ ...stringUtils.tokenizeWithOptions({ capitalize: true, lowerCase: true }, status),
169
+ ].join(" ");
170
+ }
171
+ }
172
+ /* ----------------------- attachment type helper ---------------------- */
173
+ /**
174
+ * Inspect a {@link GenieAttachment} and return the
175
+ * {@link AttachmentType} of payload it carries. Returns the first
176
+ * known sub-object that's present; if none of the known ones are
177
+ * populated, falls back to the first non-bookkeeping key
178
+ * (forward-compat for types we don't model yet), else `"unknown"`.
179
+ *
180
+ * Honors a pre-tagged `attachment_type` if one is already on the
181
+ * value (e.g. from a prior {@link tagAttachment} pass) so this is
182
+ * idempotent across re-detections.
183
+ */
184
+ export function detectAttachmentType(att) {
185
+ if (att.attachment_type)
186
+ return att.attachment_type;
187
+ if (att.query)
188
+ return "query";
189
+ if (att.text)
190
+ return "text";
191
+ if (att.suggested_questions)
192
+ return "suggested_questions";
193
+ for (const k of Object.keys(att)) {
194
+ if (k !== "attachment_id" && k !== "attachment_type")
195
+ return k;
196
+ }
197
+ return "unknown";
198
+ }
199
+ /**
200
+ * Return a copy of `att` with `attachment_type` filled in from
201
+ * {@link detectAttachmentType}. Consumers that want a discriminator
202
+ * literal up-front (e.g. `switch (att.attachment_type)`) call this
203
+ * once when an attachment first arrives.
204
+ */
205
+ export function tagAttachment(att) {
206
+ if (att.attachment_type)
207
+ return att;
208
+ return { ...att, attachment_type: detectAttachmentType(att) };
209
+ }
210
+ /* ------------------------- GenieChat events ------------------------ */
211
+ /**
212
+ * Where on the wire an event was observed. Spread into every
213
+ * attachment-scoped event payload so subscribers can route, log,
214
+ * or correlate without re-walking the message.
215
+ *
216
+ * Fields:
217
+ * - `space_id`: the Genie space the conversation lives in.
218
+ * - `conversation_id`: conversation id once Genie has assigned one.
219
+ * - `message_id`: Genie message id for the active turn.
220
+ * - `attachment_id`: attachment the event came from. Optional
221
+ * because Genie sometimes emits an anonymous main-answer
222
+ * attachment without an id; those still get events, just with
223
+ * `attachment_id` undefined.
224
+ */
225
+ export const GenieChatLocationSchema = z.object({
226
+ space_id: z.string(),
227
+ conversation_id: z.string().optional(),
228
+ message_id: z.string().optional(),
229
+ attachment_id: z.string().optional(),
230
+ });
231
+ /**
232
+ * Lifecycle event: the question this turn is asking Genie. Fires
233
+ * once per `genieEventChat` call, the first time the underlying
234
+ * `genieChat` loop yields a `GenieMessage`. Carries the prompt
235
+ * text Genie echoed back on `message.content` and the assigned
236
+ * `message_id` so subscribers can group every subsequent event
237
+ * for this turn under one stable key. `conversation_id` is
238
+ * populated for both opening and follow-up turns (Genie assigns
239
+ * it on `startConversation`).
240
+ *
241
+ * Deferred (instead of fired synchronously on entry) so the
242
+ * `message_id` is available when the event lands - that one round
243
+ * trip costs ~200ms but lets UIs render question / thinking /
244
+ * query / text as a single grouped block per Genie call instead
245
+ * of a flat stream.
246
+ *
247
+ * `attachment_id` is intentionally absent - questions are turn
248
+ * scoped, not attachment scoped.
249
+ */
250
+ export const QuestionEventSchema = GenieChatLocationSchema.omit({
251
+ attachment_id: true,
252
+ }).extend({
253
+ type: z.literal("question"),
254
+ content: z.string(),
255
+ });
256
+ /**
257
+ * Lifecycle event: raw `GenieMessage` snapshot for the active
258
+ * turn. Fires once per poll yield. The full message shape
259
+ * (including the widened thought / regen / attachment-type
260
+ * fields) is exposed inline as `message`; consumers narrow on
261
+ * `event.type === "message"` and reach for `event.message`.
262
+ */
263
+ export const MessageEventSchema = z.object({
264
+ type: z.literal("message"),
265
+ message: GenieMessageSchema,
266
+ });
267
+ /**
268
+ * Top-level `message.status` transitioned. Fires for every
269
+ * distinct status seen on the wire (e.g. `SUBMITTED` ->
270
+ * `FILTERING_CONTEXT` -> `ASKING_AI` -> `PENDING_WAREHOUSE` ->
271
+ * `ASKING_AI` -> `COMPLETED`).
272
+ */
273
+ export const StatusEventSchema = GenieChatLocationSchema.omit({
274
+ attachment_id: true,
275
+ }).extend({
276
+ type: z.literal("status"),
277
+ status: messageStatusSchema,
278
+ previous_status: messageStatusSchema.optional(),
279
+ });
280
+ /**
281
+ * A new attachment slot appeared in `message.attachments[]`.
282
+ * Fires exactly once per attachment (matched by `attachment_id`,
283
+ * positionally for anonymous attachments) the first time we see
284
+ * it. The slot's payload kind lands in `attachment_type` so the
285
+ * outer `type` discriminator stays unambiguous.
286
+ */
287
+ export const AttachmentEventSchema = GenieChatLocationSchema.extend({
288
+ type: z.literal("attachment"),
289
+ index: z.number(),
290
+ attachment_type: z.custom((v) => typeof v === "string"),
291
+ });
292
+ /**
293
+ * A new reasoning step appeared on a query attachment
294
+ * (`attachments[i].query.thoughts[i]`). Deduplicated per
295
+ * `(thought_type, content)` tuple within an attachment so
296
+ * subscribers don't see the same thought multiple times - Genie
297
+ * sometimes mutates existing thought slots in place, so the diff
298
+ * is value-based rather than positional.
299
+ */
300
+ export const ThinkingEventSchema = GenieChatLocationSchema.extend({
301
+ type: z.literal("thinking"),
302
+ text: z.string(),
303
+ thought_type: z.custom((v) => typeof v === "string"),
304
+ });
305
+ /**
306
+ * A text-attachment `content` field appeared or changed
307
+ * (`attachments[i].text.content`). Fires whenever the snapshot
308
+ * value differs from the previous one for the same attachment.
309
+ */
310
+ export const TextEventSchema = GenieChatLocationSchema.extend({
311
+ type: z.literal("text"),
312
+ text: z.string(),
313
+ });
314
+ /**
315
+ * SQL was finalized on a query attachment
316
+ * (`attachments[i].query.query`). Fires once when the SQL string
317
+ * transitions from undefined to defined, and again if Genie ever
318
+ * rewrites it. `title` and `description` are denormalised off the
319
+ * attachment's `query.title` / `query.description` so consumers
320
+ * can label the SQL pill without re-walking the message.
321
+ */
322
+ export const QueryEventSchema = GenieChatLocationSchema.extend({
323
+ type: z.literal("query"),
324
+ sql: z.string(),
325
+ title: z.string().optional(),
326
+ description: z.string().optional(),
327
+ });
328
+ /**
329
+ * SQL was submitted to a SQL warehouse and a statement id was
330
+ * assigned (`attachments[i].query.statement_id`). Fires when the
331
+ * statement id transitions from undefined to defined - this is the
332
+ * point at which `client.statementExecution.getStatement({ statement_id })`
333
+ * becomes a valid call.
334
+ */
335
+ export const StatementEventSchema = GenieChatLocationSchema.extend({
336
+ type: z.literal("statement"),
337
+ statement_id: z.string(),
338
+ });
339
+ /**
340
+ * Row count for a query attachment's result changed
341
+ * (`attachments[i].query.query_result_metadata.row_count`). Fires
342
+ * on every change, including the initial `undefined -> 0` and the
343
+ * later `0 -> N` once the warehouse finishes execution.
344
+ */
345
+ export const RowsEventSchema = GenieChatLocationSchema.extend({
346
+ type: z.literal("rows"),
347
+ row_count: z.number(),
348
+ previous_row_count: z.number().optional(),
349
+ statement_id: z.string().optional(),
350
+ });
351
+ /**
352
+ * Genie produced a follow-up suggested-questions list
353
+ * (`attachments[i].suggested_questions.questions[]`). Fires once
354
+ * when the list appears, and again if Genie rewrites it.
355
+ */
356
+ export const SuggestedQuestionsEventSchema = GenieChatLocationSchema.extend({
357
+ type: z.literal("suggested_questions"),
358
+ questions: z.array(z.string()),
359
+ });
360
+ /**
361
+ * The active turn reached a terminal status. Always fires once
362
+ * per turn (immediately after the terminal `message` event).
363
+ * Carries the final `GenieMessage` snapshot inline so subscribers
364
+ * don't need to keep their own copy of the last message.
365
+ */
366
+ export const ResultEventSchema = GenieChatLocationSchema.omit({
367
+ attachment_id: true,
368
+ }).extend({
369
+ type: z.literal("result"),
370
+ status: z.enum(TERMINAL_STATUSES),
371
+ message: GenieMessageSchema,
372
+ });
373
+ /**
374
+ * Discriminated union yielded by `genieEventChat`. Each variant
375
+ * is a single flat object with `type` as the discriminator and
376
+ * the payload fields hoisted directly to the top level - no
377
+ * `payload` wrapper. Consumers narrow on `type` and read fields
378
+ * inline:
379
+ *
380
+ * @example
381
+ * for await (const event of genieEventChat(spaceId, "Top stores?")) {
382
+ * switch (event.type) {
383
+ * case "thinking":
384
+ * console.log(event.thought_type, event.text);
385
+ * break;
386
+ * case "result":
387
+ * console.log("done:", event.status);
388
+ * break;
389
+ * }
390
+ * }
391
+ *
392
+ * Stream order per turn:
393
+ *
394
+ * 1. `question` (synchronous, before the first SDK call)
395
+ * carrying the prompt this turn sent to Genie.
396
+ * 2. `message` for every poll yield (raw `GenieMessage` on
397
+ * `event.message`).
398
+ * 3. Any derived events the snapshot diff produced (`status`,
399
+ * `attachment`, `thinking`, `text`, `query`, `statement`,
400
+ * `rows`, `suggested_questions`) in that fixed order.
401
+ * 4. On the terminal snapshot, a final `result` event.
402
+ *
403
+ * Errors propagate via the generator throwing (`try`/`catch` the
404
+ * `for await`), not via an `error` variant on this union.
405
+ */
406
+ export const GenieChatEventSchema = z.discriminatedUnion("type", [
407
+ QuestionEventSchema,
408
+ MessageEventSchema,
409
+ StatusEventSchema,
410
+ AttachmentEventSchema,
411
+ ThinkingEventSchema,
412
+ TextEventSchema,
413
+ QueryEventSchema,
414
+ StatementEventSchema,
415
+ RowsEventSchema,
416
+ SuggestedQuestionsEventSchema,
417
+ ResultEventSchema,
418
+ ]);