@dbx-tools/genie-shared 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts ADDED
@@ -0,0 +1,33 @@
1
+ /**
2
+ * `@dbx-tools/genie-shared`: pure-types + sync-helpers surface of
3
+ * the `@dbx-tools/genie` package. Safe to import from browser
4
+ * bundles (no `node:*`, no `WorkspaceClient`, no I/O).
5
+ *
6
+ * What lives here:
7
+ *
8
+ * - {@link ./src/protocol.js}: wire-format zod schemas + types
9
+ * extending the generated `@dbx-tools/sdk-shared` Genie shapes
10
+ * (`GenieMessageSchema`, `GenieAttachmentSchema`,
11
+ * `GenieQueryAttachmentSchema`, `GenieThoughtSchema`,
12
+ * `messageStatusSchema`, ...) plus the high-level event
13
+ * vocabulary the `genieEventChat` driver emits
14
+ * (`GenieChatEvent`, `GenieChatLocation`, per-variant payload
15
+ * interfaces) and terminal-status / attachment-discriminator
16
+ * helpers (`TERMINAL_STATUSES`, `isTerminalStatus`,
17
+ * `detectAttachmentType`, `tagAttachment`).
18
+ * - {@link ./src/event.js}: pure sync detectors
19
+ * (`detectStatus`, `detectThinking`, `detectAttachmentAdded`,
20
+ * `detectText`, `detectQuery`, `detectStatement`,
21
+ * `detectRows`, `detectSuggestedQuestions`) and the
22
+ * `eventsFromMessage` orchestrator generator. Used by
23
+ * `genieEventChat` server-side; also reusable from the
24
+ * browser when consumers want to derive UI events from
25
+ * `GenieMessage` snapshots themselves.
26
+ *
27
+ * Server-only chat driving (`genieChat`, `genieEventChat`) lives
28
+ * in `@dbx-tools/genie` and pulls these types in. Frontends only
29
+ * need this package.
30
+ */
31
+
32
+ export * from "./src/event.js";
33
+ export * from "./src/protocol.js";
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "main": "./dist/index.js",
3
+ "types": "./dist/index.d.ts",
4
+ "exports": {
5
+ ".": {
6
+ "source": "./index.ts",
7
+ "types": "./dist/index.d.ts",
8
+ "default": "./dist/index.js"
9
+ }
10
+ },
11
+ "name": "@dbx-tools/genie-shared",
12
+ "version": "0.1.18",
13
+ "dependencies": {
14
+ "@dbx-tools/sdk-shared": "0.1.18",
15
+ "@dbx-tools/shared": "0.1.18",
16
+ "zod": "^4.3.6"
17
+ },
18
+ "module": "index.ts",
19
+ "peerDependencies": {
20
+ "@databricks/appkit": "*"
21
+ },
22
+ "type": "module",
23
+ "files": [
24
+ "dist",
25
+ "index*.ts",
26
+ "src"
27
+ ],
28
+ "license": "Apache-2.0",
29
+ "homepage": "https://github.com/reggie-db/dbx-tools-appkit#readme",
30
+ "bugs": {
31
+ "url": "https://github.com/reggie-db/dbx-tools-appkit/issues"
32
+ },
33
+ "repository": {
34
+ "type": "git",
35
+ "url": "git+https://github.com/reggie-db/dbx-tools-appkit.git",
36
+ "directory": "packages/genie-shared"
37
+ }
38
+ }
package/src/event.ts ADDED
@@ -0,0 +1,376 @@
1
+ /**
2
+ * Pure event-detection for `genieEventChat`. Given two
3
+ * `GenieMessage` snapshots (current + prior) and the surrounding
4
+ * `space_id`, derive the semantic deltas (status transitions, new
5
+ * attachments, new thoughts, SQL emission, warehouse submission,
6
+ * row-count progress, follow-up suggestions, text deltas) and
7
+ * yield them as typed {@link GenieChatEvent}s.
8
+ *
9
+ * Architecture:
10
+ *
11
+ * - Each detector is built with {@link eventDetector}, which
12
+ * takes the event name as a literal string and a `detect`
13
+ * callback. TS infers `T extends GenieChatEventType` from the
14
+ * literal, looks up its scope in {@link DetectorScope}, and
15
+ * resolves `detect`'s parameter list (message vs
16
+ * per-attachment) and return type
17
+ * ({@link DetectorResult}<T>) accordingly. Pass an unknown
18
+ * name (`"status2"`) and the call fails to compile; pass a
19
+ * payload shape that doesn't match the named event and the
20
+ * return fails to compile.
21
+ * - {@link eventsFromMessage}: sync generator. Walks the
22
+ * snapshot diff and yields flat `{type, ...fields}` events
23
+ * for every detector that fires, in a stable order (status
24
+ * first, then per-attachment field deltas) so a subscriber
25
+ * that simply logs events as they arrive sees them in a
26
+ * sensible sequence.
27
+ * - Private helpers (`matchPrevAttachment`, `thoughtKey`):
28
+ * diff plumbing shared across detectors.
29
+ *
30
+ * The module is intentionally pure: no `EventEmitter`, no `this`,
31
+ * no I/O, no module-level mutable state. The `message` and
32
+ * `result` events are NOT derived here - they belong to the chat
33
+ * lifecycle layer (`chat.ts`, `genieEventChat`) because they track
34
+ * per-yield / per-turn-completion semantics rather than a
35
+ * field-level snapshot diff.
36
+ */
37
+
38
+ import {
39
+ detectAttachmentType,
40
+ type GenieAttachment,
41
+ type GenieChatEvent,
42
+ type GenieChatEventFields,
43
+ type GenieChatEventType,
44
+ type GenieChatLocation,
45
+ type GenieMessage,
46
+ type GenieThought,
47
+ } from "./protocol.js";
48
+
49
+ /* ----------------------------- contract ---------------------------- */
50
+
51
+ /**
52
+ * What a single detector call returns: zero (`undefined`), one
53
+ * (fields object), or many (`fields[]`) events of the same type.
54
+ * Each result is the variant's payload fields **without** the
55
+ * `type` discriminator - the orchestrator stamps `type` when it
56
+ * yields the event.
57
+ */
58
+ type DetectorResult<T extends GenieChatEventType> =
59
+ | GenieChatEventFields<T>
60
+ | GenieChatEventFields<T>[]
61
+ | undefined;
62
+
63
+ /**
64
+ * Where in the wire shape a given event is derived from. Drives
65
+ * which arguments `detect` receives. `"message"` events watch
66
+ * `GenieMessage` itself; `"attachment"` events watch one slot of
67
+ * `message.attachments[]`. `"lifecycle"` events (`message`,
68
+ * `result`) are emitted by `chat.ts` directly and can't be built
69
+ * with {@link eventDetector} - they have no diff signature.
70
+ */
71
+ interface DetectorScope {
72
+ // Message-scoped: top-level field diff on `GenieMessage`.
73
+ status: "message";
74
+ // Per-attachment: field diff on one `GenieAttachment` slot.
75
+ attachment: "attachment";
76
+ thinking: "attachment";
77
+ text: "attachment";
78
+ query: "attachment";
79
+ statement: "attachment";
80
+ rows: "attachment";
81
+ suggested_questions: "attachment";
82
+ // Lifecycle: not derived via diff, handled by `chat.ts`.
83
+ question: "lifecycle";
84
+ message: "lifecycle";
85
+ result: "lifecycle";
86
+ }
87
+
88
+ /**
89
+ * `detect` callback signature for a given event type. Resolved
90
+ * from {@link DetectorScope}: `"message"` events get the top-level
91
+ * snapshot triple, `"attachment"` events get the per-slot quad,
92
+ * `"lifecycle"` events resolve to `never` (no diff-based detector
93
+ * exists for them).
94
+ */
95
+ type DetectFn<T extends GenieChatEventType> = DetectorScope[T] extends "message"
96
+ ? (
97
+ current: GenieMessage,
98
+ previous: GenieMessage | undefined,
99
+ space_id: string,
100
+ ) => DetectorResult<T>
101
+ : DetectorScope[T] extends "attachment"
102
+ ? (
103
+ current: GenieAttachment,
104
+ previous: GenieAttachment | undefined,
105
+ location: GenieChatLocation,
106
+ index: number,
107
+ ) => DetectorResult<T>
108
+ : never;
109
+
110
+ /**
111
+ * Typed detector for one event in the {@link GenieChatEvent}
112
+ * union. The `type` field is the event name; `detect`'s signature
113
+ * is picked from {@link DetectorScope} based on `T`.
114
+ */
115
+ interface EventDetector<T extends GenieChatEventType> {
116
+ readonly type: T;
117
+ detect: DetectFn<T>;
118
+ }
119
+
120
+ /* ----------------------- detector factory ----------------------- */
121
+
122
+ /**
123
+ * Build an {@link EventDetector}. Pass the event name as the
124
+ * literal first arg and the matching `detect` callback as the
125
+ * second. TS infers `T` from the literal, narrows `detect`'s
126
+ * signature accordingly, and types the return as
127
+ * `EventDetector<T>`.
128
+ *
129
+ * Build-time guarantees:
130
+ *
131
+ * - `eventDetector("status2", ...)` fails - the name isn't in
132
+ * {@link GenieChatEvent}.
133
+ * - `eventDetector("status", attachmentArgsCallback)` fails -
134
+ * `"status"` is message-scoped, so `detect` must take
135
+ * `(GenieMessage, GenieMessage | undefined, string)`.
136
+ * - Returning a `ThinkingEvent`-shaped fields object from a
137
+ * `"status"` detector fails - the return type is constrained
138
+ * to `DetectorResult<"status">`.
139
+ *
140
+ * Lifecycle event names (`"message"`, `"result"`) resolve `detect`
141
+ * to `never` and won't compile, which is intentional: those have
142
+ * no diff signature and are emitted directly by `chat.ts`.
143
+ */
144
+ export function eventDetector<T extends GenieChatEventType>(
145
+ type: T,
146
+ detect: DetectFn<T>,
147
+ ): EventDetector<T> {
148
+ return { type, detect };
149
+ }
150
+
151
+ /* ---------------------------- detectors ---------------------------- */
152
+
153
+ /** Top-level `message.status` transitioned. */
154
+ export const detectStatus = eventDetector("status", (current, previous, space_id) => {
155
+ if (!current.status || current.status === previous?.status) return;
156
+ return {
157
+ status: current.status,
158
+ previous_status: previous?.status,
159
+ space_id,
160
+ conversation_id: current.conversation_id,
161
+ message_id: current.message_id,
162
+ };
163
+ });
164
+
165
+ /** First time we see an attachment slot. */
166
+ export const detectAttachmentAdded = eventDetector(
167
+ "attachment",
168
+ (current, previous, location, index) => {
169
+ if (previous) return;
170
+ return {
171
+ ...location,
172
+ index,
173
+ attachment_type: detectAttachmentType(current),
174
+ };
175
+ },
176
+ );
177
+
178
+ /**
179
+ * One emit per new `(thought_type, content)` tuple on a query
180
+ * attachment. Value-based set diff: Genie can mutate existing
181
+ * thought slots in place (e.g. re-typing index 0 from
182
+ * `DATA_SOURCING` to `DESCRIPTION` while re-appending the
183
+ * original at index 1), so positional / append-only diff would
184
+ * miss re-types and double-count re-orders.
185
+ */
186
+ export const detectThinking = eventDetector(
187
+ "thinking",
188
+ (current, previous, location) => {
189
+ const currThoughts = current.query?.thoughts ?? [];
190
+ if (currThoughts.length === 0) return;
191
+ const seen = new Set((previous?.query?.thoughts ?? []).map(thoughtKey));
192
+ const out: GenieChatEventFields<"thinking">[] = [];
193
+ for (const t of currThoughts) {
194
+ const key = thoughtKey(t);
195
+ if (seen.has(key)) continue;
196
+ // Defensive: dedupe within a single snapshot in case Genie
197
+ // ever ships the same thought twice in one `thoughts[]`.
198
+ seen.add(key);
199
+ out.push({ ...location, text: t.content, thought_type: t.thought_type });
200
+ }
201
+ return out;
202
+ },
203
+ );
204
+
205
+ /** Text-attachment `content` appeared or changed. */
206
+ export const detectText = eventDetector("text", (current, previous, location) => {
207
+ const curr = current.text?.content;
208
+ const prev = previous?.text?.content;
209
+ if (curr === undefined || curr === prev) return;
210
+ return { ...location, text: curr };
211
+ });
212
+
213
+ /** SQL transitioned undefined -> string, or changed. */
214
+ export const detectQuery = eventDetector("query", (current, previous, location) => {
215
+ const curr = current.query?.query;
216
+ const prev = previous?.query?.query;
217
+ if (!curr || curr === prev) return;
218
+ return {
219
+ ...location,
220
+ sql: curr,
221
+ title: current.query?.title,
222
+ description: current.query?.description,
223
+ };
224
+ });
225
+
226
+ /** Warehouse-statement id assigned. */
227
+ export const detectStatement = eventDetector(
228
+ "statement",
229
+ (current, previous, location) => {
230
+ const curr = current.query?.statement_id;
231
+ const prev = previous?.query?.statement_id;
232
+ if (!curr || curr === prev) return;
233
+ return { ...location, statement_id: curr };
234
+ },
235
+ );
236
+
237
+ /**
238
+ * `row_count` changed - fires on every transition including the
239
+ * initial `undefined -> 0` and the post-execution `0 -> N`.
240
+ * Carries the statement id when available for correlation.
241
+ */
242
+ export const detectRows = eventDetector("rows", (current, previous, location) => {
243
+ const curr = current.query?.query_result_metadata?.row_count;
244
+ const prev = previous?.query?.query_result_metadata?.row_count;
245
+ if (curr === undefined || curr === prev) return;
246
+ return {
247
+ ...location,
248
+ row_count: curr,
249
+ previous_row_count: prev,
250
+ statement_id: current.query?.statement_id ?? previous?.query?.statement_id,
251
+ };
252
+ });
253
+
254
+ /**
255
+ * Follow-up suggested-questions array appeared or changed.
256
+ * Compares JSON-stringified arrays so a length-preserving content
257
+ * rewrite still fires.
258
+ */
259
+ export const detectSuggestedQuestions = eventDetector(
260
+ "suggested_questions",
261
+ (current, previous, location) => {
262
+ const curr = current.suggested_questions?.questions;
263
+ const prev = previous?.suggested_questions?.questions;
264
+ if (!curr || curr.length === 0) return;
265
+ if (JSON.stringify(curr) === JSON.stringify(prev)) return;
266
+ return { ...location, questions: curr };
267
+ },
268
+ );
269
+
270
+ /* --------------------------- orchestrator --------------------------- */
271
+
272
+ /**
273
+ * Walk the diff between `current` and `previous` and yield every
274
+ * derived event the snapshot produced. Detector order mirrors
275
+ * Genie's wire ordering (status first, then per-attachment field
276
+ * deltas) so a subscriber that simply logs events as they arrive
277
+ * sees them in a sensible sequence.
278
+ *
279
+ * Caller responsibilities (not handled here):
280
+ *
281
+ * - Yield `{ type: "message", message: current }` BEFORE
282
+ * calling this, once per poll yield.
283
+ * - Yield `{ type: "result", ... }` AFTER calling this when
284
+ * `isTerminalStatus(current.status)` - per-turn lifecycle,
285
+ * not a per-snapshot field diff.
286
+ * - Decide what counts as a "fresh turn" and pass `undefined`
287
+ * for `previous` on turn boundaries, so anonymous-attachment
288
+ * state from a prior turn doesn't bleed in.
289
+ *
290
+ * Sync generator: the diff is pure CPU work, no awaits. Use
291
+ * `yield*` from an async generator to splice the events into a
292
+ * stream.
293
+ */
294
+ export function* eventsFromMessage(
295
+ current: GenieMessage,
296
+ previous: GenieMessage | undefined,
297
+ space_id: string,
298
+ ): Generator<GenieChatEvent, void, void> {
299
+ // Stamp `type` onto each detector result and yield it. Returning
300
+ // a typed generator keeps the `yield*` callsite tidy. The double
301
+ // cast (`unknown` -> `GenieChatEvent`) is needed because the
302
+ // generic merge `{type: T} & GenieChatEventFields<T>` doesn't
303
+ // structurally narrow back to a discriminated-union member -
304
+ // each detector's runtime output is shaped correctly by
305
+ // construction, so the cast is sound.
306
+ function* emit<T extends GenieChatEventType>(
307
+ detector: EventDetector<T>,
308
+ result: DetectorResult<T>,
309
+ ): Generator<GenieChatEvent, void, void> {
310
+ if (result === undefined) return;
311
+ if (Array.isArray(result)) {
312
+ for (const fields of result) {
313
+ yield { type: detector.type, ...fields } as unknown as GenieChatEvent;
314
+ }
315
+ } else {
316
+ yield { type: detector.type, ...result } as unknown as GenieChatEvent;
317
+ }
318
+ }
319
+
320
+ // Message-scoped detectors run once per snapshot.
321
+ yield* emit(detectStatus, detectStatus.detect(current, previous, space_id));
322
+
323
+ // Per-attachment detectors run once per attachment slot.
324
+ const currAtts = current.attachments ?? [];
325
+ const prevAtts = previous?.attachments ?? [];
326
+ for (let i = 0; i < currAtts.length; i++) {
327
+ const curr = currAtts[i]!;
328
+ const prev = matchPrevAttachment(curr, prevAtts, i);
329
+ const location: GenieChatLocation = {
330
+ space_id,
331
+ conversation_id: current.conversation_id,
332
+ message_id: current.message_id,
333
+ attachment_id: curr.attachment_id,
334
+ };
335
+ yield* emit(
336
+ detectAttachmentAdded,
337
+ detectAttachmentAdded.detect(curr, prev, location, i),
338
+ );
339
+ yield* emit(detectThinking, detectThinking.detect(curr, prev, location, i));
340
+ yield* emit(detectText, detectText.detect(curr, prev, location, i));
341
+ yield* emit(detectQuery, detectQuery.detect(curr, prev, location, i));
342
+ yield* emit(detectStatement, detectStatement.detect(curr, prev, location, i));
343
+ yield* emit(detectRows, detectRows.detect(curr, prev, location, i));
344
+ yield* emit(
345
+ detectSuggestedQuestions,
346
+ detectSuggestedQuestions.detect(curr, prev, location, i),
347
+ );
348
+ }
349
+ }
350
+
351
+ /* ----------------------------- helpers ----------------------------- */
352
+
353
+ /**
354
+ * Find the prior version of `curr` in `prevAtts`. Attachments
355
+ * with ids match by id (Genie keeps ids stable across polls);
356
+ * anonymous attachments (Genie's main-answer text doesn't get
357
+ * one) match positionally against an anonymous prev at the same
358
+ * index, so they don't accidentally bind to an id'd predecessor
359
+ * that happened to share the slot.
360
+ */
361
+ function matchPrevAttachment(
362
+ curr: GenieAttachment,
363
+ prevAtts: GenieAttachment[],
364
+ i: number,
365
+ ): GenieAttachment | undefined {
366
+ if (curr.attachment_id) {
367
+ return prevAtts.find((a) => a.attachment_id === curr.attachment_id);
368
+ }
369
+ const p = prevAtts[i];
370
+ return p && !p.attachment_id ? p : undefined;
371
+ }
372
+
373
+ /** Stable key for {@link detectThinking}'s value-based set diff. */
374
+ function thoughtKey(t: GenieThought): string {
375
+ return `${t.thought_type}|${t.content}`;
376
+ }