@deepagents/text2sql 0.10.2 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -41
- package/dist/index.d.ts +4 -7
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3321 -2661
- package/dist/index.js.map +4 -4
- package/dist/lib/adapters/adapter.d.ts +13 -1
- package/dist/lib/adapters/adapter.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/abstract.grounding.d.ts +19 -3
- package/dist/lib/adapters/groundings/abstract.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/column-stats.grounding.d.ts +1 -2
- package/dist/lib/adapters/groundings/column-stats.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/column-values.grounding.d.ts +1 -2
- package/dist/lib/adapters/groundings/column-values.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/constraint.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/constraint.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/index.js +1952 -272
- package/dist/lib/adapters/groundings/index.js.map +4 -4
- package/dist/lib/adapters/groundings/indexes.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/indexes.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/info.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/info.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/report.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/report.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/row-count.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/row-count.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/table.grounding.d.ts +3 -3
- package/dist/lib/adapters/groundings/table.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/view.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/view.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/mysql/index.js +2354 -439
- package/dist/lib/adapters/mysql/index.js.map +4 -4
- package/dist/lib/adapters/postgres/index.js +2415 -500
- package/dist/lib/adapters/postgres/index.js.map +4 -4
- package/dist/lib/adapters/spreadsheet/index.js +324 -272
- package/dist/lib/adapters/spreadsheet/index.js.map +4 -4
- package/dist/lib/adapters/sqlite/index.js +2337 -422
- package/dist/lib/adapters/sqlite/index.js.map +4 -4
- package/dist/lib/adapters/sqlserver/index.js +2413 -498
- package/dist/lib/adapters/sqlserver/index.js.map +4 -4
- package/dist/lib/agents/developer.agent.d.ts +33 -23
- package/dist/lib/agents/developer.agent.d.ts.map +1 -1
- package/dist/lib/agents/explainer.agent.d.ts +4 -5
- package/dist/lib/agents/explainer.agent.d.ts.map +1 -1
- package/dist/lib/agents/question.agent.d.ts.map +1 -1
- package/dist/lib/agents/result-tools.d.ts +34 -0
- package/dist/lib/agents/result-tools.d.ts.map +1 -0
- package/dist/lib/agents/sql.agent.d.ts +4 -4
- package/dist/lib/agents/sql.agent.d.ts.map +1 -1
- package/dist/lib/agents/teachables.agent.d.ts +2 -2
- package/dist/lib/agents/teachables.agent.d.ts.map +1 -1
- package/dist/lib/agents/text2sql.agent.d.ts +0 -74
- package/dist/lib/agents/text2sql.agent.d.ts.map +1 -1
- package/dist/lib/checkpoint.d.ts +1 -1
- package/dist/lib/checkpoint.d.ts.map +1 -1
- package/dist/lib/fragments/schema.d.ts +214 -0
- package/dist/lib/fragments/schema.d.ts.map +1 -0
- package/dist/lib/instructions.d.ts +10 -2
- package/dist/lib/instructions.d.ts.map +1 -1
- package/dist/lib/sql.d.ts +14 -104
- package/dist/lib/sql.d.ts.map +1 -1
- package/dist/lib/synthesis/extractors/base-contextual-extractor.d.ts +8 -9
- package/dist/lib/synthesis/extractors/base-contextual-extractor.d.ts.map +1 -1
- package/dist/lib/synthesis/extractors/last-query-extractor.d.ts.map +1 -1
- package/dist/lib/synthesis/extractors/message-extractor.d.ts +1 -2
- package/dist/lib/synthesis/extractors/message-extractor.d.ts.map +1 -1
- package/dist/lib/synthesis/extractors/segmented-context-extractor.d.ts +0 -6
- package/dist/lib/synthesis/extractors/segmented-context-extractor.d.ts.map +1 -1
- package/dist/lib/synthesis/extractors/sql-extractor.d.ts.map +1 -1
- package/dist/lib/synthesis/index.js +2489 -1112
- package/dist/lib/synthesis/index.js.map +4 -4
- package/dist/lib/synthesis/synthesizers/breadth-evolver.d.ts.map +1 -1
- package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -1
- package/dist/lib/synthesis/synthesizers/persona-generator.d.ts +7 -17
- package/dist/lib/synthesis/synthesizers/persona-generator.d.ts.map +1 -1
- package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts +2 -2
- package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -1
- package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts +8 -20
- package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts.map +1 -1
- package/package.json +9 -14
- package/dist/lib/agents/chat1.agent.d.ts +0 -50
- package/dist/lib/agents/chat1.agent.d.ts.map +0 -1
- package/dist/lib/agents/chat2.agent.d.ts +0 -68
- package/dist/lib/agents/chat2.agent.d.ts.map +0 -1
- package/dist/lib/agents/chat3.agent.d.ts +0 -80
- package/dist/lib/agents/chat3.agent.d.ts.map +0 -1
- package/dist/lib/agents/chat4.agent.d.ts +0 -88
- package/dist/lib/agents/chat4.agent.d.ts.map +0 -1
- package/dist/lib/history/history.d.ts +0 -41
- package/dist/lib/history/history.d.ts.map +0 -1
- package/dist/lib/history/memory.history.d.ts +0 -5
- package/dist/lib/history/memory.history.d.ts.map +0 -1
- package/dist/lib/history/sqlite.history.d.ts +0 -15
- package/dist/lib/history/sqlite.history.d.ts.map +0 -1
- package/dist/lib/instructions.js +0 -415
- package/dist/lib/instructions.js.map +0 -7
- package/dist/lib/memory/memory.prompt.d.ts +0 -3
- package/dist/lib/memory/memory.prompt.d.ts.map +0 -1
- package/dist/lib/memory/memory.store.d.ts +0 -5
- package/dist/lib/memory/memory.store.d.ts.map +0 -1
- package/dist/lib/memory/sqlite.store.d.ts +0 -14
- package/dist/lib/memory/sqlite.store.d.ts.map +0 -1
- package/dist/lib/memory/store.d.ts +0 -40
- package/dist/lib/memory/store.d.ts.map +0 -1
- package/dist/lib/teach/teachables.d.ts +0 -648
- package/dist/lib/teach/teachables.d.ts.map +0 -1
- package/dist/lib/teach/teachings.d.ts +0 -11
- package/dist/lib/teach/teachings.d.ts.map +0 -1
- package/dist/lib/teach/xml.d.ts +0 -6
- package/dist/lib/teach/xml.d.ts.map +0 -1
|
@@ -165,7 +165,7 @@ import {
|
|
|
165
165
|
} from "ai";
|
|
166
166
|
|
|
167
167
|
// packages/text2sql/src/lib/synthesis/extractors/base-contextual-extractor.ts
|
|
168
|
-
import { groq } from "@ai-sdk/groq";
|
|
168
|
+
import { groq as groq2 } from "@ai-sdk/groq";
|
|
169
169
|
import {
|
|
170
170
|
getToolOrDynamicToolName,
|
|
171
171
|
isTextUIPart,
|
|
@@ -173,69 +173,1874 @@ import {
|
|
|
173
173
|
} from "ai";
|
|
174
174
|
import dedent from "dedent";
|
|
175
175
|
import z from "zod";
|
|
176
|
-
import { agent, generate, user } from "@deepagents/agent";
|
|
177
|
-
var contextResolverAgent = agent({
|
|
178
|
-
name: "context_resolver",
|
|
179
|
-
model: groq("openai/gpt-oss-20b"),
|
|
180
|
-
output: z.object({
|
|
181
|
-
question: z.string().describe(
|
|
182
|
-
"A standalone natural language question that the SQL query answers"
|
|
183
|
-
)
|
|
184
|
-
}),
|
|
185
|
-
prompt: (state) => dedent`
|
|
186
|
-
<identity>
|
|
187
|
-
You are an expert at understanding conversational context and generating clear,
|
|
188
|
-
standalone questions from multi-turn conversations.
|
|
189
|
-
</identity>
|
|
190
|
-
|
|
191
|
-
${state?.introspection ? `<schema>
|
|
192
|
-
${state.introspection}
|
|
193
|
-
</schema>` : ""}
|
|
194
|
-
|
|
195
|
-
<conversation>
|
|
196
|
-
${state?.conversation}
|
|
197
|
-
</conversation>
|
|
198
|
-
|
|
199
|
-
<sql>
|
|
200
|
-
${state?.sql}
|
|
201
|
-
</sql>
|
|
202
176
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
177
|
+
// packages/context/dist/index.js
|
|
178
|
+
import { encode } from "gpt-tokenizer";
|
|
179
|
+
import { generateId } from "ai";
|
|
180
|
+
import pluralize from "pluralize";
|
|
181
|
+
import { titlecase } from "stringcase";
|
|
182
|
+
import chalk from "chalk";
|
|
183
|
+
import { defineCommand } from "just-bash";
|
|
184
|
+
import spawn from "nano-spawn";
|
|
185
|
+
import "bash-tool";
|
|
186
|
+
import spawn2 from "nano-spawn";
|
|
187
|
+
import {
|
|
188
|
+
createBashTool
|
|
189
|
+
} from "bash-tool";
|
|
190
|
+
import YAML from "yaml";
|
|
191
|
+
import { DatabaseSync } from "node:sqlite";
|
|
192
|
+
import { groq } from "@ai-sdk/groq";
|
|
193
|
+
import {
|
|
194
|
+
NoSuchToolError,
|
|
195
|
+
Output,
|
|
196
|
+
convertToModelMessages,
|
|
197
|
+
createUIMessageStream,
|
|
198
|
+
generateId as generateId2,
|
|
199
|
+
generateText,
|
|
200
|
+
smoothStream,
|
|
201
|
+
stepCountIs,
|
|
202
|
+
streamText
|
|
203
|
+
} from "ai";
|
|
204
|
+
import chalk2 from "chalk";
|
|
205
|
+
import "zod";
|
|
206
|
+
import "@deepagents/agent";
|
|
207
|
+
var defaultTokenizer = {
|
|
208
|
+
encode(text) {
|
|
209
|
+
return encode(text);
|
|
210
|
+
},
|
|
211
|
+
count(text) {
|
|
212
|
+
return encode(text).length;
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
var ModelsRegistry = class {
|
|
216
|
+
#cache = /* @__PURE__ */ new Map();
|
|
217
|
+
#loaded = false;
|
|
218
|
+
#tokenizers = /* @__PURE__ */ new Map();
|
|
219
|
+
#defaultTokenizer = defaultTokenizer;
|
|
220
|
+
/**
|
|
221
|
+
* Load models data from models.dev API
|
|
222
|
+
*/
|
|
223
|
+
async load() {
|
|
224
|
+
if (this.#loaded) return;
|
|
225
|
+
const response = await fetch("https://models.dev/api.json");
|
|
226
|
+
if (!response.ok) {
|
|
227
|
+
throw new Error(`Failed to fetch models: ${response.statusText}`);
|
|
228
|
+
}
|
|
229
|
+
const data = await response.json();
|
|
230
|
+
for (const [providerId, provider] of Object.entries(data)) {
|
|
231
|
+
for (const [modelId, model] of Object.entries(provider.models)) {
|
|
232
|
+
const info = {
|
|
233
|
+
id: model.id,
|
|
234
|
+
name: model.name,
|
|
235
|
+
family: model.family,
|
|
236
|
+
cost: model.cost,
|
|
237
|
+
limit: model.limit,
|
|
238
|
+
provider: providerId
|
|
239
|
+
};
|
|
240
|
+
this.#cache.set(`${providerId}:${modelId}`, info);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
this.#loaded = true;
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Get model info by ID
|
|
247
|
+
* @param modelId - Model ID (e.g., "openai:gpt-4o")
|
|
248
|
+
*/
|
|
249
|
+
get(modelId) {
|
|
250
|
+
return this.#cache.get(modelId);
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Check if a model exists in the registry
|
|
254
|
+
*/
|
|
255
|
+
has(modelId) {
|
|
256
|
+
return this.#cache.has(modelId);
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* List all available model IDs
|
|
260
|
+
*/
|
|
261
|
+
list() {
|
|
262
|
+
return [...this.#cache.keys()];
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Register a custom tokenizer for specific model families
|
|
266
|
+
* @param family - Model family name (e.g., "llama", "claude")
|
|
267
|
+
* @param tokenizer - Tokenizer implementation
|
|
268
|
+
*/
|
|
269
|
+
registerTokenizer(family, tokenizer) {
|
|
270
|
+
this.#tokenizers.set(family, tokenizer);
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Set the default tokenizer used when no family-specific tokenizer is registered
|
|
274
|
+
*/
|
|
275
|
+
setDefaultTokenizer(tokenizer) {
|
|
276
|
+
this.#defaultTokenizer = tokenizer;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Get the appropriate tokenizer for a model
|
|
280
|
+
*/
|
|
281
|
+
getTokenizer(modelId) {
|
|
282
|
+
const model = this.get(modelId);
|
|
283
|
+
if (model) {
|
|
284
|
+
const familyTokenizer = this.#tokenizers.get(model.family);
|
|
285
|
+
if (familyTokenizer) {
|
|
286
|
+
return familyTokenizer;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
return this.#defaultTokenizer;
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Estimate token count and cost for given text and model
|
|
293
|
+
* @param modelId - Model ID to use for pricing (e.g., "openai:gpt-4o")
|
|
294
|
+
* @param input - Input text (prompt)
|
|
295
|
+
*/
|
|
296
|
+
estimate(modelId, input) {
|
|
297
|
+
const model = this.get(modelId);
|
|
298
|
+
if (!model) {
|
|
299
|
+
throw new Error(
|
|
300
|
+
`Model "${modelId}" not found. Call load() first or check model ID.`
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
const tokenizer = this.getTokenizer(modelId);
|
|
304
|
+
const tokens = tokenizer.count(input);
|
|
305
|
+
const cost = tokens / 1e6 * model.cost.input;
|
|
306
|
+
return {
|
|
307
|
+
model: model.id,
|
|
308
|
+
provider: model.provider,
|
|
309
|
+
tokens,
|
|
310
|
+
cost,
|
|
311
|
+
limits: {
|
|
312
|
+
context: model.limit.context,
|
|
313
|
+
output: model.limit.output,
|
|
314
|
+
exceedsContext: tokens > model.limit.context
|
|
315
|
+
},
|
|
316
|
+
fragments: []
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
};
|
|
320
|
+
var _registry = null;
|
|
321
|
+
function getModelsRegistry() {
|
|
322
|
+
if (!_registry) {
|
|
323
|
+
_registry = new ModelsRegistry();
|
|
324
|
+
}
|
|
325
|
+
return _registry;
|
|
326
|
+
}
|
|
327
|
+
function isFragment(data) {
|
|
328
|
+
return typeof data === "object" && data !== null && "name" in data && "data" in data && typeof data.name === "string";
|
|
329
|
+
}
|
|
330
|
+
function isFragmentObject(data) {
|
|
331
|
+
return typeof data === "object" && data !== null && !Array.isArray(data) && !isFragment(data);
|
|
332
|
+
}
|
|
333
|
+
function isMessageFragment(fragment2) {
|
|
334
|
+
return fragment2.type === "message";
|
|
335
|
+
}
|
|
336
|
+
function fragment(name, ...children) {
|
|
337
|
+
return {
|
|
338
|
+
name,
|
|
339
|
+
data: children
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
function user(content) {
|
|
343
|
+
const message2 = typeof content === "string" ? {
|
|
344
|
+
id: generateId(),
|
|
345
|
+
role: "user",
|
|
346
|
+
parts: [{ type: "text", text: content }]
|
|
347
|
+
} : content;
|
|
348
|
+
return {
|
|
349
|
+
id: message2.id,
|
|
350
|
+
name: "user",
|
|
351
|
+
data: "content",
|
|
352
|
+
type: "message",
|
|
353
|
+
persist: true,
|
|
354
|
+
codec: {
|
|
355
|
+
decode() {
|
|
356
|
+
return message2;
|
|
357
|
+
},
|
|
358
|
+
encode() {
|
|
359
|
+
return message2;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
function message(content) {
|
|
365
|
+
const message2 = typeof content === "string" ? {
|
|
366
|
+
id: generateId(),
|
|
367
|
+
role: "user",
|
|
368
|
+
parts: [{ type: "text", text: content }]
|
|
369
|
+
} : content;
|
|
370
|
+
return {
|
|
371
|
+
id: message2.id,
|
|
372
|
+
name: "message",
|
|
373
|
+
data: "content",
|
|
374
|
+
type: "message",
|
|
375
|
+
persist: true,
|
|
376
|
+
codec: {
|
|
377
|
+
decode() {
|
|
378
|
+
return message2;
|
|
379
|
+
},
|
|
380
|
+
encode() {
|
|
381
|
+
return message2;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
var ContextRenderer = class {
|
|
387
|
+
options;
|
|
388
|
+
constructor(options = {}) {
|
|
389
|
+
this.options = options;
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* Check if data is a primitive (string, number, boolean).
|
|
393
|
+
*/
|
|
394
|
+
isPrimitive(data) {
|
|
395
|
+
return typeof data === "string" || typeof data === "number" || typeof data === "boolean";
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Group fragments by name for groupFragments option.
|
|
399
|
+
*/
|
|
400
|
+
groupByName(fragments) {
|
|
401
|
+
const groups = /* @__PURE__ */ new Map();
|
|
402
|
+
for (const fragment2 of fragments) {
|
|
403
|
+
const existing = groups.get(fragment2.name) ?? [];
|
|
404
|
+
existing.push(fragment2);
|
|
405
|
+
groups.set(fragment2.name, existing);
|
|
406
|
+
}
|
|
407
|
+
return groups;
|
|
408
|
+
}
|
|
409
|
+
/**
|
|
410
|
+
* Remove null/undefined from fragments and fragment data recursively.
|
|
411
|
+
* This protects renderers from nullish values and ensures they are ignored
|
|
412
|
+
* consistently across all output formats.
|
|
413
|
+
*/
|
|
414
|
+
sanitizeFragments(fragments) {
|
|
415
|
+
const sanitized = [];
|
|
416
|
+
for (const fragment2 of fragments) {
|
|
417
|
+
const cleaned = this.sanitizeFragment(fragment2, /* @__PURE__ */ new WeakSet());
|
|
418
|
+
if (cleaned) {
|
|
419
|
+
sanitized.push(cleaned);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
return sanitized;
|
|
423
|
+
}
|
|
424
|
+
sanitizeFragment(fragment2, seen) {
|
|
425
|
+
const data = this.sanitizeData(fragment2.data, seen);
|
|
426
|
+
if (data == null) {
|
|
427
|
+
return null;
|
|
428
|
+
}
|
|
429
|
+
return {
|
|
430
|
+
...fragment2,
|
|
431
|
+
data
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
sanitizeData(data, seen) {
|
|
435
|
+
if (data == null) {
|
|
436
|
+
return void 0;
|
|
437
|
+
}
|
|
438
|
+
if (isFragment(data)) {
|
|
439
|
+
return this.sanitizeFragment(data, seen) ?? void 0;
|
|
440
|
+
}
|
|
441
|
+
if (Array.isArray(data)) {
|
|
442
|
+
if (seen.has(data)) {
|
|
443
|
+
return void 0;
|
|
444
|
+
}
|
|
445
|
+
seen.add(data);
|
|
446
|
+
const cleaned = [];
|
|
447
|
+
for (const item of data) {
|
|
448
|
+
const sanitizedItem = this.sanitizeData(item, seen);
|
|
449
|
+
if (sanitizedItem != null) {
|
|
450
|
+
cleaned.push(sanitizedItem);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
return cleaned;
|
|
454
|
+
}
|
|
455
|
+
if (isFragmentObject(data)) {
|
|
456
|
+
if (seen.has(data)) {
|
|
457
|
+
return void 0;
|
|
458
|
+
}
|
|
459
|
+
seen.add(data);
|
|
460
|
+
const cleaned = {};
|
|
461
|
+
for (const [key, value] of Object.entries(data)) {
|
|
462
|
+
const sanitizedValue = this.sanitizeData(value, seen);
|
|
463
|
+
if (sanitizedValue != null) {
|
|
464
|
+
cleaned[key] = sanitizedValue;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
return cleaned;
|
|
468
|
+
}
|
|
469
|
+
return data;
|
|
470
|
+
}
|
|
471
|
+
/**
|
|
472
|
+
* Template method - dispatches value to appropriate handler.
|
|
473
|
+
*/
|
|
474
|
+
renderValue(key, value, ctx) {
|
|
475
|
+
if (value == null) {
|
|
476
|
+
return "";
|
|
477
|
+
}
|
|
478
|
+
if (isFragment(value)) {
|
|
479
|
+
return this.renderFragment(value, ctx);
|
|
480
|
+
}
|
|
481
|
+
if (Array.isArray(value)) {
|
|
482
|
+
return this.renderArray(key, value, ctx);
|
|
483
|
+
}
|
|
484
|
+
if (isFragmentObject(value)) {
|
|
485
|
+
return this.renderObject(key, value, ctx);
|
|
486
|
+
}
|
|
487
|
+
return this.renderPrimitive(key, String(value), ctx);
|
|
488
|
+
}
|
|
489
|
+
/**
|
|
490
|
+
* Render all entries of an object.
|
|
491
|
+
*/
|
|
492
|
+
renderEntries(data, ctx) {
|
|
493
|
+
return Object.entries(data).map(([key, value]) => this.renderValue(key, value, ctx)).filter(Boolean);
|
|
494
|
+
}
|
|
495
|
+
};
|
|
496
|
+
var XmlRenderer = class extends ContextRenderer {
|
|
497
|
+
render(fragments) {
|
|
498
|
+
const sanitized = this.sanitizeFragments(fragments);
|
|
499
|
+
return sanitized.map((f) => this.#renderTopLevel(f)).filter(Boolean).join("\n");
|
|
500
|
+
}
|
|
501
|
+
#renderTopLevel(fragment2) {
|
|
502
|
+
if (this.isPrimitive(fragment2.data)) {
|
|
503
|
+
return this.#leafRoot(fragment2.name, String(fragment2.data));
|
|
504
|
+
}
|
|
505
|
+
if (Array.isArray(fragment2.data)) {
|
|
506
|
+
return this.#renderArray(fragment2.name, fragment2.data, 0);
|
|
507
|
+
}
|
|
508
|
+
if (isFragment(fragment2.data)) {
|
|
509
|
+
const child = this.renderFragment(fragment2.data, { depth: 1, path: [] });
|
|
510
|
+
return this.#wrap(fragment2.name, [child]);
|
|
511
|
+
}
|
|
512
|
+
if (isFragmentObject(fragment2.data)) {
|
|
513
|
+
return this.#wrap(
|
|
514
|
+
fragment2.name,
|
|
515
|
+
this.renderEntries(fragment2.data, { depth: 1, path: [] })
|
|
516
|
+
);
|
|
517
|
+
}
|
|
518
|
+
return "";
|
|
519
|
+
}
|
|
520
|
+
#renderArray(name, items, depth) {
|
|
521
|
+
const fragmentItems = items.filter(isFragment);
|
|
522
|
+
const nonFragmentItems = items.filter((item) => !isFragment(item));
|
|
523
|
+
const children = [];
|
|
524
|
+
for (const item of nonFragmentItems) {
|
|
525
|
+
if (item != null) {
|
|
526
|
+
if (isFragmentObject(item)) {
|
|
527
|
+
children.push(
|
|
528
|
+
this.#wrapIndented(
|
|
529
|
+
pluralize.singular(name),
|
|
530
|
+
this.renderEntries(item, { depth: depth + 2, path: [] }),
|
|
531
|
+
depth + 1
|
|
532
|
+
)
|
|
533
|
+
);
|
|
534
|
+
} else {
|
|
535
|
+
children.push(
|
|
536
|
+
this.#leaf(pluralize.singular(name), String(item), depth + 1)
|
|
537
|
+
);
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
if (this.options.groupFragments && fragmentItems.length > 0) {
|
|
542
|
+
const groups = this.groupByName(fragmentItems);
|
|
543
|
+
for (const [groupName, groupFragments] of groups) {
|
|
544
|
+
const groupChildren = groupFragments.map(
|
|
545
|
+
(frag) => this.renderFragment(frag, { depth: depth + 2, path: [] })
|
|
546
|
+
);
|
|
547
|
+
const pluralName = pluralize.plural(groupName);
|
|
548
|
+
children.push(this.#wrapIndented(pluralName, groupChildren, depth + 1));
|
|
549
|
+
}
|
|
550
|
+
} else {
|
|
551
|
+
for (const frag of fragmentItems) {
|
|
552
|
+
children.push(
|
|
553
|
+
this.renderFragment(frag, { depth: depth + 1, path: [] })
|
|
554
|
+
);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
return this.#wrap(name, children);
|
|
558
|
+
}
|
|
559
|
+
#leafRoot(tag, value) {
|
|
560
|
+
const safe = this.#escape(value);
|
|
561
|
+
if (safe.includes("\n")) {
|
|
562
|
+
return `<${tag}>
|
|
563
|
+
${this.#indent(safe, 2)}
|
|
564
|
+
</${tag}>`;
|
|
565
|
+
}
|
|
566
|
+
return `<${tag}>${safe}</${tag}>`;
|
|
567
|
+
}
|
|
568
|
+
renderFragment(fragment2, ctx) {
|
|
569
|
+
const { name, data } = fragment2;
|
|
570
|
+
if (this.isPrimitive(data)) {
|
|
571
|
+
return this.#leaf(name, String(data), ctx.depth);
|
|
572
|
+
}
|
|
573
|
+
if (isFragment(data)) {
|
|
574
|
+
const child = this.renderFragment(data, { ...ctx, depth: ctx.depth + 1 });
|
|
575
|
+
return this.#wrapIndented(name, [child], ctx.depth);
|
|
576
|
+
}
|
|
577
|
+
if (Array.isArray(data)) {
|
|
578
|
+
return this.#renderArrayIndented(name, data, ctx.depth);
|
|
579
|
+
}
|
|
580
|
+
if (isFragmentObject(data)) {
|
|
581
|
+
const children = this.renderEntries(data, {
|
|
582
|
+
...ctx,
|
|
583
|
+
depth: ctx.depth + 1
|
|
584
|
+
});
|
|
585
|
+
return this.#wrapIndented(name, children, ctx.depth);
|
|
586
|
+
}
|
|
587
|
+
return "";
|
|
588
|
+
}
|
|
589
|
+
#renderArrayIndented(name, items, depth) {
|
|
590
|
+
const fragmentItems = items.filter(isFragment);
|
|
591
|
+
const nonFragmentItems = items.filter((item) => !isFragment(item));
|
|
592
|
+
const children = [];
|
|
593
|
+
for (const item of nonFragmentItems) {
|
|
594
|
+
if (item != null) {
|
|
595
|
+
if (isFragmentObject(item)) {
|
|
596
|
+
children.push(
|
|
597
|
+
this.#wrapIndented(
|
|
598
|
+
pluralize.singular(name),
|
|
599
|
+
this.renderEntries(item, { depth: depth + 2, path: [] }),
|
|
600
|
+
depth + 1
|
|
601
|
+
)
|
|
602
|
+
);
|
|
603
|
+
} else {
|
|
604
|
+
children.push(
|
|
605
|
+
this.#leaf(pluralize.singular(name), String(item), depth + 1)
|
|
606
|
+
);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
if (this.options.groupFragments && fragmentItems.length > 0) {
|
|
611
|
+
const groups = this.groupByName(fragmentItems);
|
|
612
|
+
for (const [groupName, groupFragments] of groups) {
|
|
613
|
+
const groupChildren = groupFragments.map(
|
|
614
|
+
(frag) => this.renderFragment(frag, { depth: depth + 2, path: [] })
|
|
615
|
+
);
|
|
616
|
+
const pluralName = pluralize.plural(groupName);
|
|
617
|
+
children.push(this.#wrapIndented(pluralName, groupChildren, depth + 1));
|
|
618
|
+
}
|
|
619
|
+
} else {
|
|
620
|
+
for (const frag of fragmentItems) {
|
|
621
|
+
children.push(
|
|
622
|
+
this.renderFragment(frag, { depth: depth + 1, path: [] })
|
|
623
|
+
);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
return this.#wrapIndented(name, children, depth);
|
|
627
|
+
}
|
|
628
|
+
renderPrimitive(key, value, ctx) {
|
|
629
|
+
return this.#leaf(key, value, ctx.depth);
|
|
630
|
+
}
|
|
631
|
+
renderArray(key, items, ctx) {
|
|
632
|
+
if (!items.length) {
|
|
633
|
+
return "";
|
|
634
|
+
}
|
|
635
|
+
const itemTag = pluralize.singular(key);
|
|
636
|
+
const children = items.filter((item) => item != null).map((item) => {
|
|
637
|
+
if (isFragment(item)) {
|
|
638
|
+
return this.renderFragment(item, { ...ctx, depth: ctx.depth + 1 });
|
|
639
|
+
}
|
|
640
|
+
if (isFragmentObject(item)) {
|
|
641
|
+
return this.#wrapIndented(
|
|
642
|
+
itemTag,
|
|
643
|
+
this.renderEntries(item, { ...ctx, depth: ctx.depth + 2 }),
|
|
644
|
+
ctx.depth + 1
|
|
645
|
+
);
|
|
646
|
+
}
|
|
647
|
+
return this.#leaf(itemTag, String(item), ctx.depth + 1);
|
|
648
|
+
});
|
|
649
|
+
return this.#wrapIndented(key, children, ctx.depth);
|
|
650
|
+
}
|
|
651
|
+
renderObject(key, obj, ctx) {
|
|
652
|
+
const children = this.renderEntries(obj, { ...ctx, depth: ctx.depth + 1 });
|
|
653
|
+
return this.#wrapIndented(key, children, ctx.depth);
|
|
654
|
+
}
|
|
655
|
+
#escape(value) {
|
|
656
|
+
if (value == null) {
|
|
657
|
+
return "";
|
|
658
|
+
}
|
|
659
|
+
return value.replaceAll(/&/g, "&").replaceAll(/</g, "<").replaceAll(/>/g, ">").replaceAll(/"/g, """).replaceAll(/'/g, "'");
|
|
660
|
+
}
|
|
661
|
+
#indent(text, spaces) {
|
|
662
|
+
if (!text.trim()) {
|
|
663
|
+
return "";
|
|
664
|
+
}
|
|
665
|
+
const padding = " ".repeat(spaces);
|
|
666
|
+
return text.split("\n").map((line) => line.length ? padding + line : padding).join("\n");
|
|
667
|
+
}
|
|
668
|
+
#leaf(tag, value, depth) {
|
|
669
|
+
const safe = this.#escape(value);
|
|
670
|
+
const pad = " ".repeat(depth);
|
|
671
|
+
if (safe.includes("\n")) {
|
|
672
|
+
return `${pad}<${tag}>
|
|
673
|
+
${this.#indent(safe, (depth + 1) * 2)}
|
|
674
|
+
${pad}</${tag}>`;
|
|
675
|
+
}
|
|
676
|
+
return `${pad}<${tag}>${safe}</${tag}>`;
|
|
677
|
+
}
|
|
678
|
+
#wrap(tag, children) {
|
|
679
|
+
const content = children.filter(Boolean).join("\n");
|
|
680
|
+
if (!content) {
|
|
681
|
+
return "";
|
|
682
|
+
}
|
|
683
|
+
return `<${tag}>
|
|
684
|
+
${content}
|
|
685
|
+
</${tag}>`;
|
|
686
|
+
}
|
|
687
|
+
#wrapIndented(tag, children, depth) {
|
|
688
|
+
const content = children.filter(Boolean).join("\n");
|
|
689
|
+
if (!content) {
|
|
690
|
+
return "";
|
|
691
|
+
}
|
|
692
|
+
const pad = " ".repeat(depth);
|
|
693
|
+
return `${pad}<${tag}>
|
|
694
|
+
${content}
|
|
695
|
+
${pad}</${tag}>`;
|
|
696
|
+
}
|
|
697
|
+
};
|
|
698
|
+
var ContextStore = class {
|
|
699
|
+
};
|
|
700
|
+
var ContextEngine = class {
|
|
701
|
+
/** Non-message fragments (role, hints, etc.) - not persisted in graph */
|
|
702
|
+
#fragments = [];
|
|
703
|
+
/** Pending message fragments to be added to graph */
|
|
704
|
+
#pendingMessages = [];
|
|
705
|
+
#store;
|
|
706
|
+
#chatId;
|
|
707
|
+
#userId;
|
|
708
|
+
#branchName;
|
|
709
|
+
#branch = null;
|
|
710
|
+
#chatData = null;
|
|
711
|
+
#initialized = false;
|
|
712
|
+
constructor(options) {
|
|
713
|
+
if (!options.chatId) {
|
|
714
|
+
throw new Error("chatId is required");
|
|
715
|
+
}
|
|
716
|
+
if (!options.userId) {
|
|
717
|
+
throw new Error("userId is required");
|
|
718
|
+
}
|
|
719
|
+
this.#store = options.store;
|
|
720
|
+
this.#chatId = options.chatId;
|
|
721
|
+
this.#userId = options.userId;
|
|
722
|
+
this.#branchName = "main";
|
|
723
|
+
}
|
|
724
|
+
/**
|
|
725
|
+
* Initialize the chat and branch if they don't exist.
|
|
726
|
+
*/
|
|
727
|
+
async #ensureInitialized() {
|
|
728
|
+
if (this.#initialized) {
|
|
729
|
+
return;
|
|
730
|
+
}
|
|
731
|
+
this.#chatData = await this.#store.upsertChat({
|
|
732
|
+
id: this.#chatId,
|
|
733
|
+
userId: this.#userId
|
|
734
|
+
});
|
|
735
|
+
this.#branch = await this.#store.getActiveBranch(this.#chatId);
|
|
736
|
+
this.#initialized = true;
|
|
737
|
+
}
|
|
738
|
+
/**
|
|
739
|
+
* Create a new branch from a specific message.
|
|
740
|
+
* Shared logic between rewind() and btw().
|
|
741
|
+
*/
|
|
742
|
+
async #createBranchFrom(messageId, switchTo) {
|
|
743
|
+
const branches = await this.#store.listBranches(this.#chatId);
|
|
744
|
+
const samePrefix = branches.filter(
|
|
745
|
+
(b) => b.name === this.#branchName || b.name.startsWith(`${this.#branchName}-v`)
|
|
746
|
+
);
|
|
747
|
+
const newBranchName = `${this.#branchName}-v${samePrefix.length + 1}`;
|
|
748
|
+
const newBranch = {
|
|
749
|
+
id: crypto.randomUUID(),
|
|
750
|
+
chatId: this.#chatId,
|
|
751
|
+
name: newBranchName,
|
|
752
|
+
headMessageId: messageId,
|
|
753
|
+
isActive: false,
|
|
754
|
+
createdAt: Date.now()
|
|
755
|
+
};
|
|
756
|
+
await this.#store.createBranch(newBranch);
|
|
757
|
+
if (switchTo) {
|
|
758
|
+
await this.#store.setActiveBranch(this.#chatId, newBranch.id);
|
|
759
|
+
this.#branch = { ...newBranch, isActive: true };
|
|
760
|
+
this.#branchName = newBranchName;
|
|
761
|
+
this.#pendingMessages = [];
|
|
762
|
+
}
|
|
763
|
+
const chain = await this.#store.getMessageChain(messageId);
|
|
764
|
+
return {
|
|
765
|
+
id: newBranch.id,
|
|
766
|
+
name: newBranch.name,
|
|
767
|
+
headMessageId: newBranch.headMessageId,
|
|
768
|
+
isActive: switchTo,
|
|
769
|
+
messageCount: chain.length,
|
|
770
|
+
createdAt: newBranch.createdAt
|
|
771
|
+
};
|
|
772
|
+
}
|
|
773
|
+
/**
|
|
774
|
+
* Get the current chat ID.
|
|
775
|
+
*/
|
|
776
|
+
get chatId() {
|
|
777
|
+
return this.#chatId;
|
|
778
|
+
}
|
|
779
|
+
/**
|
|
780
|
+
* Get the current branch name.
|
|
781
|
+
*/
|
|
782
|
+
get branch() {
|
|
783
|
+
return this.#branchName;
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Get metadata for the current chat.
|
|
787
|
+
* Returns null if the chat hasn't been initialized yet.
|
|
788
|
+
*/
|
|
789
|
+
get chat() {
|
|
790
|
+
if (!this.#chatData) {
|
|
791
|
+
return null;
|
|
792
|
+
}
|
|
793
|
+
return {
|
|
794
|
+
id: this.#chatData.id,
|
|
795
|
+
userId: this.#chatData.userId,
|
|
796
|
+
createdAt: this.#chatData.createdAt,
|
|
797
|
+
updatedAt: this.#chatData.updatedAt,
|
|
798
|
+
title: this.#chatData.title,
|
|
799
|
+
metadata: this.#chatData.metadata
|
|
800
|
+
};
|
|
801
|
+
}
|
|
802
|
+
/**
|
|
803
|
+
* Add fragments to the context.
|
|
804
|
+
*
|
|
805
|
+
* - Message fragments (user/assistant) are queued for persistence
|
|
806
|
+
* - Non-message fragments (role/hint) are kept in memory for system prompt
|
|
807
|
+
*/
|
|
808
|
+
set(...fragments) {
|
|
809
|
+
for (const fragment2 of fragments) {
|
|
810
|
+
if (isMessageFragment(fragment2)) {
|
|
811
|
+
this.#pendingMessages.push(fragment2);
|
|
812
|
+
} else {
|
|
813
|
+
this.#fragments.push(fragment2);
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
return this;
|
|
817
|
+
}
|
|
818
|
+
// Unset a fragment by ID (not implemented yet)
|
|
819
|
+
unset(fragmentId) {
|
|
820
|
+
}
|
|
821
|
+
/**
|
|
822
|
+
* Render all fragments using the provided renderer.
|
|
823
|
+
* @internal Use resolve() instead for public API.
|
|
824
|
+
*/
|
|
825
|
+
render(renderer) {
|
|
826
|
+
return renderer.render(this.#fragments);
|
|
827
|
+
}
|
|
828
|
+
/**
|
|
829
|
+
* Resolve context into AI SDK-ready format.
|
|
830
|
+
*
|
|
831
|
+
* - Initializes chat and branch if needed
|
|
832
|
+
* - Loads message history from the graph (walking parent chain)
|
|
833
|
+
* - Separates context fragments for system prompt
|
|
834
|
+
* - Combines with pending messages
|
|
835
|
+
*
|
|
836
|
+
* @example
|
|
837
|
+
* ```ts
|
|
838
|
+
* const context = new ContextEngine({ store, chatId: 'chat-1', userId: 'user-1' })
|
|
839
|
+
* .set(role('You are helpful'), user('Hello'));
|
|
840
|
+
*
|
|
841
|
+
* const { systemPrompt, messages } = await context.resolve();
|
|
842
|
+
* await generateText({ system: systemPrompt, messages });
|
|
843
|
+
* ```
|
|
844
|
+
*/
|
|
845
|
+
async resolve(options) {
|
|
846
|
+
await this.#ensureInitialized();
|
|
847
|
+
const systemPrompt = options.renderer.render(this.#fragments);
|
|
848
|
+
const messages = [];
|
|
849
|
+
if (this.#branch?.headMessageId) {
|
|
850
|
+
const chain = await this.#store.getMessageChain(
|
|
851
|
+
this.#branch.headMessageId
|
|
852
|
+
);
|
|
853
|
+
for (const msg of chain) {
|
|
854
|
+
messages.push(message(msg.data).codec?.decode());
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
for (const fragment2 of this.#pendingMessages) {
|
|
858
|
+
const decoded = fragment2.codec.decode();
|
|
859
|
+
messages.push(decoded);
|
|
860
|
+
}
|
|
861
|
+
return { systemPrompt, messages };
|
|
862
|
+
}
|
|
863
|
+
/**
|
|
864
|
+
* Save pending messages to the graph.
|
|
865
|
+
*
|
|
866
|
+
* Each message is added as a node with parentId pointing to the previous message.
|
|
867
|
+
* The branch head is updated to point to the last message.
|
|
868
|
+
*
|
|
869
|
+
* @example
|
|
870
|
+
* ```ts
|
|
871
|
+
* context.set(user('Hello'));
|
|
872
|
+
* // AI responds...
|
|
873
|
+
* context.set(assistant('Hi there!'));
|
|
874
|
+
* await context.save(); // Persist to graph
|
|
875
|
+
* ```
|
|
876
|
+
*/
|
|
877
|
+
async save() {
|
|
878
|
+
await this.#ensureInitialized();
|
|
879
|
+
if (this.#pendingMessages.length === 0) {
|
|
880
|
+
return;
|
|
881
|
+
}
|
|
882
|
+
let parentId = this.#branch.headMessageId;
|
|
883
|
+
const now = Date.now();
|
|
884
|
+
for (const fragment2 of this.#pendingMessages) {
|
|
885
|
+
const messageData = {
|
|
886
|
+
id: fragment2.id ?? crypto.randomUUID(),
|
|
887
|
+
chatId: this.#chatId,
|
|
888
|
+
parentId,
|
|
889
|
+
name: fragment2.name,
|
|
890
|
+
type: fragment2.type,
|
|
891
|
+
data: fragment2.codec.encode(),
|
|
892
|
+
createdAt: now
|
|
893
|
+
};
|
|
894
|
+
await this.#store.addMessage(messageData);
|
|
895
|
+
parentId = messageData.id;
|
|
896
|
+
}
|
|
897
|
+
await this.#store.updateBranchHead(this.#branch.id, parentId);
|
|
898
|
+
this.#branch.headMessageId = parentId;
|
|
899
|
+
this.#pendingMessages = [];
|
|
900
|
+
}
|
|
901
|
+
/**
|
|
902
|
+
* Estimate token count and cost for the full context.
|
|
903
|
+
*
|
|
904
|
+
* Includes:
|
|
905
|
+
* - System prompt fragments (role, hints, etc.)
|
|
906
|
+
* - Persisted chat messages (from store)
|
|
907
|
+
* - Pending messages (not yet saved)
|
|
908
|
+
*
|
|
909
|
+
* @param modelId - Model ID (e.g., "openai:gpt-4o", "anthropic:claude-3-5-sonnet")
|
|
910
|
+
* @param options - Optional settings
|
|
911
|
+
* @returns Estimate result with token counts, costs, and per-fragment breakdown
|
|
912
|
+
*/
|
|
913
|
+
async estimate(modelId, options = {}) {
|
|
914
|
+
await this.#ensureInitialized();
|
|
915
|
+
const renderer = options.renderer ?? new XmlRenderer();
|
|
916
|
+
const registry = getModelsRegistry();
|
|
917
|
+
await registry.load();
|
|
918
|
+
const model = registry.get(modelId);
|
|
919
|
+
if (!model) {
|
|
920
|
+
throw new Error(
|
|
921
|
+
`Model "${modelId}" not found. Call load() first or check model ID.`
|
|
922
|
+
);
|
|
923
|
+
}
|
|
924
|
+
const tokenizer = registry.getTokenizer(modelId);
|
|
925
|
+
const fragmentEstimates = [];
|
|
926
|
+
for (const fragment2 of this.#fragments) {
|
|
927
|
+
const rendered = renderer.render([fragment2]);
|
|
928
|
+
const tokens = tokenizer.count(rendered);
|
|
929
|
+
const cost = tokens / 1e6 * model.cost.input;
|
|
930
|
+
fragmentEstimates.push({
|
|
931
|
+
id: fragment2.id,
|
|
932
|
+
name: fragment2.name,
|
|
933
|
+
tokens,
|
|
934
|
+
cost
|
|
935
|
+
});
|
|
936
|
+
}
|
|
937
|
+
if (this.#branch?.headMessageId) {
|
|
938
|
+
const chain = await this.#store.getMessageChain(
|
|
939
|
+
this.#branch.headMessageId
|
|
940
|
+
);
|
|
941
|
+
for (const msg of chain) {
|
|
942
|
+
const content = String(msg.data);
|
|
943
|
+
const tokens = tokenizer.count(content);
|
|
944
|
+
const cost = tokens / 1e6 * model.cost.input;
|
|
945
|
+
fragmentEstimates.push({
|
|
946
|
+
name: msg.name,
|
|
947
|
+
id: msg.id,
|
|
948
|
+
tokens,
|
|
949
|
+
cost
|
|
950
|
+
});
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
for (const fragment2 of this.#pendingMessages) {
|
|
954
|
+
const content = String(fragment2.data);
|
|
955
|
+
const tokens = tokenizer.count(content);
|
|
956
|
+
const cost = tokens / 1e6 * model.cost.input;
|
|
957
|
+
fragmentEstimates.push({
|
|
958
|
+
name: fragment2.name,
|
|
959
|
+
id: fragment2.id,
|
|
960
|
+
tokens,
|
|
961
|
+
cost
|
|
962
|
+
});
|
|
963
|
+
}
|
|
964
|
+
const totalTokens = fragmentEstimates.reduce((sum, f) => sum + f.tokens, 0);
|
|
965
|
+
const totalCost = fragmentEstimates.reduce((sum, f) => sum + f.cost, 0);
|
|
966
|
+
return {
|
|
967
|
+
model: model.id,
|
|
968
|
+
provider: model.provider,
|
|
969
|
+
tokens: totalTokens,
|
|
970
|
+
cost: totalCost,
|
|
971
|
+
limits: {
|
|
972
|
+
context: model.limit.context,
|
|
973
|
+
output: model.limit.output,
|
|
974
|
+
exceedsContext: totalTokens > model.limit.context
|
|
975
|
+
},
|
|
976
|
+
fragments: fragmentEstimates
|
|
977
|
+
};
|
|
978
|
+
}
|
|
979
|
+
/**
|
|
980
|
+
* Rewind to a specific message by ID.
|
|
981
|
+
*
|
|
982
|
+
* Creates a new branch from that message, preserving the original branch.
|
|
983
|
+
* The new branch becomes active.
|
|
984
|
+
*
|
|
985
|
+
* @param messageId - The message ID to rewind to
|
|
986
|
+
* @returns The new branch info
|
|
987
|
+
*
|
|
988
|
+
* @example
|
|
989
|
+
* ```ts
|
|
990
|
+
* context.set(user('What is 2 + 2?', { id: 'q1' }));
|
|
991
|
+
* context.set(assistant('The answer is 5.', { id: 'wrong' })); // Oops!
|
|
992
|
+
* await context.save();
|
|
993
|
+
*
|
|
994
|
+
* // Rewind to the question, creates new branch
|
|
995
|
+
* const newBranch = await context.rewind('q1');
|
|
996
|
+
*
|
|
997
|
+
* // Now add correct answer on new branch
|
|
998
|
+
* context.set(assistant('The answer is 4.'));
|
|
999
|
+
* await context.save();
|
|
1000
|
+
* ```
|
|
1001
|
+
*/
|
|
1002
|
+
async rewind(messageId) {
|
|
1003
|
+
await this.#ensureInitialized();
|
|
1004
|
+
const message2 = await this.#store.getMessage(messageId);
|
|
1005
|
+
if (!message2) {
|
|
1006
|
+
throw new Error(`Message "${messageId}" not found`);
|
|
1007
|
+
}
|
|
1008
|
+
if (message2.chatId !== this.#chatId) {
|
|
1009
|
+
throw new Error(`Message "${messageId}" belongs to a different chat`);
|
|
1010
|
+
}
|
|
1011
|
+
return this.#createBranchFrom(messageId, true);
|
|
1012
|
+
}
|
|
1013
|
+
/**
|
|
1014
|
+
* Create a checkpoint at the current position.
|
|
1015
|
+
*
|
|
1016
|
+
* A checkpoint is a named pointer to the current branch head.
|
|
1017
|
+
* Use restore() to return to this point later.
|
|
1018
|
+
*
|
|
1019
|
+
* @param name - Name for the checkpoint
|
|
1020
|
+
* @returns The checkpoint info
|
|
1021
|
+
*
|
|
1022
|
+
* @example
|
|
1023
|
+
* ```ts
|
|
1024
|
+
* context.set(user('I want to learn a new skill.'));
|
|
1025
|
+
* context.set(assistant('Would you like coding or cooking?'));
|
|
1026
|
+
* await context.save();
|
|
1027
|
+
*
|
|
1028
|
+
* // Save checkpoint before user's choice
|
|
1029
|
+
* const cp = await context.checkpoint('before-choice');
|
|
1030
|
+
* ```
|
|
1031
|
+
*/
|
|
1032
|
+
async checkpoint(name) {
|
|
1033
|
+
await this.#ensureInitialized();
|
|
1034
|
+
if (!this.#branch?.headMessageId) {
|
|
1035
|
+
throw new Error("Cannot create checkpoint: no messages in conversation");
|
|
1036
|
+
}
|
|
1037
|
+
const checkpoint = {
|
|
1038
|
+
id: crypto.randomUUID(),
|
|
1039
|
+
chatId: this.#chatId,
|
|
1040
|
+
name,
|
|
1041
|
+
messageId: this.#branch.headMessageId,
|
|
1042
|
+
createdAt: Date.now()
|
|
1043
|
+
};
|
|
1044
|
+
await this.#store.createCheckpoint(checkpoint);
|
|
1045
|
+
return {
|
|
1046
|
+
id: checkpoint.id,
|
|
1047
|
+
name: checkpoint.name,
|
|
1048
|
+
messageId: checkpoint.messageId,
|
|
1049
|
+
createdAt: checkpoint.createdAt
|
|
1050
|
+
};
|
|
1051
|
+
}
|
|
1052
|
+
/**
|
|
1053
|
+
* Restore to a checkpoint by creating a new branch from that point.
|
|
1054
|
+
*
|
|
1055
|
+
* @param name - Name of the checkpoint to restore
|
|
1056
|
+
* @returns The new branch info
|
|
1057
|
+
*
|
|
1058
|
+
* @example
|
|
1059
|
+
* ```ts
|
|
1060
|
+
* // User chose cooking, but wants to try coding path
|
|
1061
|
+
* await context.restore('before-choice');
|
|
1062
|
+
*
|
|
1063
|
+
* context.set(user('I want to learn coding.'));
|
|
1064
|
+
* context.set(assistant('Python is a great starting language!'));
|
|
1065
|
+
* await context.save();
|
|
1066
|
+
* ```
|
|
1067
|
+
*/
|
|
1068
|
+
async restore(name) {
|
|
1069
|
+
await this.#ensureInitialized();
|
|
1070
|
+
const checkpoint = await this.#store.getCheckpoint(this.#chatId, name);
|
|
1071
|
+
if (!checkpoint) {
|
|
1072
|
+
throw new Error(
|
|
1073
|
+
`Checkpoint "${name}" not found in chat "${this.#chatId}"`
|
|
1074
|
+
);
|
|
1075
|
+
}
|
|
1076
|
+
return this.rewind(checkpoint.messageId);
|
|
1077
|
+
}
|
|
1078
|
+
/**
|
|
1079
|
+
* Switch to a different branch by name.
|
|
1080
|
+
*
|
|
1081
|
+
* @param name - Branch name to switch to
|
|
1082
|
+
*
|
|
1083
|
+
* @example
|
|
1084
|
+
* ```ts
|
|
1085
|
+
* // List branches (via store)
|
|
1086
|
+
* const branches = await store.listBranches(context.chatId);
|
|
1087
|
+
* console.log(branches); // [{name: 'main', ...}, {name: 'main-v2', ...}]
|
|
1088
|
+
*
|
|
1089
|
+
* // Switch to original branch
|
|
1090
|
+
* await context.switchBranch('main');
|
|
1091
|
+
* ```
|
|
1092
|
+
*/
|
|
1093
|
+
async switchBranch(name) {
|
|
1094
|
+
await this.#ensureInitialized();
|
|
1095
|
+
const branch = await this.#store.getBranch(this.#chatId, name);
|
|
1096
|
+
if (!branch) {
|
|
1097
|
+
throw new Error(`Branch "${name}" not found in chat "${this.#chatId}"`);
|
|
1098
|
+
}
|
|
1099
|
+
await this.#store.setActiveBranch(this.#chatId, branch.id);
|
|
1100
|
+
this.#branch = { ...branch, isActive: true };
|
|
1101
|
+
this.#branchName = name;
|
|
1102
|
+
this.#pendingMessages = [];
|
|
1103
|
+
}
|
|
1104
|
+
/**
|
|
1105
|
+
* Create a parallel branch from the current position ("by the way").
|
|
1106
|
+
*
|
|
1107
|
+
* Use this when you want to fork the conversation without leaving
|
|
1108
|
+
* the current branch. Common use case: user wants to ask another
|
|
1109
|
+
* question while waiting for the model to respond.
|
|
1110
|
+
*
|
|
1111
|
+
* Unlike rewind(), this method:
|
|
1112
|
+
* - Uses the current HEAD (no messageId needed)
|
|
1113
|
+
* - Does NOT switch to the new branch
|
|
1114
|
+
* - Keeps pending messages intact
|
|
1115
|
+
*
|
|
1116
|
+
* @returns The new branch info (does not switch to it)
|
|
1117
|
+
* @throws Error if no messages exist in the conversation
|
|
1118
|
+
*
|
|
1119
|
+
* @example
|
|
1120
|
+
* ```ts
|
|
1121
|
+
* // User asked a question, model is generating...
|
|
1122
|
+
* context.set(user('What is the weather?'));
|
|
1123
|
+
* await context.save();
|
|
1124
|
+
*
|
|
1125
|
+
* // User wants to ask something else without waiting
|
|
1126
|
+
* const newBranch = await context.btw();
|
|
1127
|
+
* // newBranch = { name: 'main-v2', ... }
|
|
1128
|
+
*
|
|
1129
|
+
* // Later, switch to the new branch and add the question
|
|
1130
|
+
* await context.switchBranch(newBranch.name);
|
|
1131
|
+
* context.set(user('Also, what time is it?'));
|
|
1132
|
+
* await context.save();
|
|
1133
|
+
* ```
|
|
1134
|
+
*/
|
|
1135
|
+
async btw() {
|
|
1136
|
+
await this.#ensureInitialized();
|
|
1137
|
+
if (!this.#branch?.headMessageId) {
|
|
1138
|
+
throw new Error("Cannot create btw branch: no messages in conversation");
|
|
1139
|
+
}
|
|
1140
|
+
return this.#createBranchFrom(this.#branch.headMessageId, false);
|
|
1141
|
+
}
|
|
1142
|
+
/**
|
|
1143
|
+
* Update metadata for the current chat.
|
|
1144
|
+
*
|
|
1145
|
+
* @param updates - Partial metadata to merge (title, metadata)
|
|
1146
|
+
*
|
|
1147
|
+
* @example
|
|
1148
|
+
* ```ts
|
|
1149
|
+
* await context.updateChat({
|
|
1150
|
+
* title: 'Coding Help Session',
|
|
1151
|
+
* metadata: { tags: ['python', 'debugging'] }
|
|
1152
|
+
* });
|
|
1153
|
+
* ```
|
|
1154
|
+
*/
|
|
1155
|
+
async updateChat(updates) {
|
|
1156
|
+
await this.#ensureInitialized();
|
|
1157
|
+
const storeUpdates = {};
|
|
1158
|
+
if (updates.title !== void 0) {
|
|
1159
|
+
storeUpdates.title = updates.title;
|
|
1160
|
+
}
|
|
1161
|
+
if (updates.metadata !== void 0) {
|
|
1162
|
+
storeUpdates.metadata = {
|
|
1163
|
+
...this.#chatData?.metadata,
|
|
1164
|
+
...updates.metadata
|
|
1165
|
+
};
|
|
1166
|
+
}
|
|
1167
|
+
this.#chatData = await this.#store.updateChat(this.#chatId, storeUpdates);
|
|
1168
|
+
}
|
|
1169
|
+
/**
|
|
1170
|
+
* Consolidate context fragments (no-op for now).
|
|
1171
|
+
*
|
|
1172
|
+
* This is a placeholder for future functionality that merges context fragments
|
|
1173
|
+
* using specific rules. Currently, it does nothing.
|
|
1174
|
+
*
|
|
1175
|
+
* @experimental
|
|
1176
|
+
*/
|
|
1177
|
+
consolidate() {
|
|
1178
|
+
return void 0;
|
|
1179
|
+
}
|
|
1180
|
+
/**
|
|
1181
|
+
* Inspect the full context state for debugging.
|
|
1182
|
+
* Returns a JSON-serializable object with context information.
|
|
1183
|
+
*
|
|
1184
|
+
* @param options - Inspection options (modelId and renderer required)
|
|
1185
|
+
* @returns Complete inspection data including estimates, rendered output, fragments, and graph
|
|
1186
|
+
*
|
|
1187
|
+
* @example
|
|
1188
|
+
* ```ts
|
|
1189
|
+
* const inspection = await context.inspect({
|
|
1190
|
+
* modelId: 'openai:gpt-4o',
|
|
1191
|
+
* renderer: new XmlRenderer(),
|
|
1192
|
+
* });
|
|
1193
|
+
* console.log(JSON.stringify(inspection, null, 2));
|
|
1194
|
+
*
|
|
1195
|
+
* // Or write to file for analysis
|
|
1196
|
+
* await fs.writeFile('context-debug.json', JSON.stringify(inspection, null, 2));
|
|
1197
|
+
* ```
|
|
1198
|
+
*/
|
|
1199
|
+
async inspect(options) {
|
|
1200
|
+
await this.#ensureInitialized();
|
|
1201
|
+
const { renderer } = options;
|
|
1202
|
+
const estimateResult = await this.estimate(options.modelId, { renderer });
|
|
1203
|
+
const rendered = renderer.render(this.#fragments);
|
|
1204
|
+
const persistedMessages = [];
|
|
1205
|
+
if (this.#branch?.headMessageId) {
|
|
1206
|
+
const chain = await this.#store.getMessageChain(
|
|
1207
|
+
this.#branch.headMessageId
|
|
1208
|
+
);
|
|
1209
|
+
persistedMessages.push(...chain);
|
|
1210
|
+
}
|
|
1211
|
+
const graph = await this.#store.getGraph(this.#chatId);
|
|
1212
|
+
return {
|
|
1213
|
+
estimate: estimateResult,
|
|
1214
|
+
rendered,
|
|
1215
|
+
fragments: {
|
|
1216
|
+
context: [...this.#fragments],
|
|
1217
|
+
pending: [...this.#pendingMessages],
|
|
1218
|
+
persisted: persistedMessages
|
|
1219
|
+
},
|
|
1220
|
+
graph,
|
|
1221
|
+
meta: {
|
|
1222
|
+
chatId: this.#chatId,
|
|
1223
|
+
branch: this.#branchName,
|
|
1224
|
+
timestamp: Date.now()
|
|
1225
|
+
}
|
|
1226
|
+
};
|
|
1227
|
+
}
|
|
1228
|
+
};
|
|
1229
|
+
function term(name, definition) {
|
|
1230
|
+
return {
|
|
1231
|
+
name: "term",
|
|
1232
|
+
data: { name, definition }
|
|
1233
|
+
};
|
|
1234
|
+
}
|
|
1235
|
+
function hint(text) {
|
|
1236
|
+
return {
|
|
1237
|
+
name: "hint",
|
|
1238
|
+
data: text
|
|
1239
|
+
};
|
|
1240
|
+
}
|
|
1241
|
+
function guardrail(input) {
|
|
1242
|
+
return {
|
|
1243
|
+
name: "guardrail",
|
|
1244
|
+
data: {
|
|
1245
|
+
rule: input.rule,
|
|
1246
|
+
...input.reason && { reason: input.reason },
|
|
1247
|
+
...input.action && { action: input.action }
|
|
1248
|
+
}
|
|
1249
|
+
};
|
|
1250
|
+
}
|
|
1251
|
+
function explain(input) {
|
|
1252
|
+
return {
|
|
1253
|
+
name: "explain",
|
|
1254
|
+
data: {
|
|
1255
|
+
concept: input.concept,
|
|
1256
|
+
explanation: input.explanation,
|
|
1257
|
+
...input.therefore && { therefore: input.therefore }
|
|
1258
|
+
}
|
|
1259
|
+
};
|
|
1260
|
+
}
|
|
1261
|
+
function example(input) {
|
|
1262
|
+
return {
|
|
1263
|
+
name: "example",
|
|
1264
|
+
data: {
|
|
1265
|
+
question: input.question,
|
|
1266
|
+
answer: input.answer,
|
|
1267
|
+
...input.note && { note: input.note }
|
|
1268
|
+
}
|
|
1269
|
+
};
|
|
1270
|
+
}
|
|
1271
|
+
function clarification(input) {
|
|
1272
|
+
return {
|
|
1273
|
+
name: "clarification",
|
|
1274
|
+
data: {
|
|
1275
|
+
when: input.when,
|
|
1276
|
+
ask: input.ask,
|
|
1277
|
+
reason: input.reason
|
|
1278
|
+
}
|
|
1279
|
+
};
|
|
1280
|
+
}
|
|
1281
|
+
function workflow(input) {
|
|
1282
|
+
return {
|
|
1283
|
+
name: "workflow",
|
|
1284
|
+
data: {
|
|
1285
|
+
task: input.task,
|
|
1286
|
+
steps: input.steps,
|
|
1287
|
+
...input.triggers?.length && { triggers: input.triggers },
|
|
1288
|
+
...input.notes && { notes: input.notes }
|
|
1289
|
+
}
|
|
1290
|
+
};
|
|
1291
|
+
}
|
|
1292
|
+
function quirk(input) {
|
|
1293
|
+
return {
|
|
1294
|
+
name: "quirk",
|
|
1295
|
+
data: {
|
|
1296
|
+
issue: input.issue,
|
|
1297
|
+
workaround: input.workaround
|
|
1298
|
+
}
|
|
1299
|
+
};
|
|
1300
|
+
}
|
|
1301
|
+
function styleGuide(input) {
|
|
1302
|
+
return {
|
|
1303
|
+
name: "styleGuide",
|
|
1304
|
+
data: {
|
|
1305
|
+
prefer: input.prefer,
|
|
1306
|
+
...input.never && { never: input.never },
|
|
1307
|
+
...input.always && { always: input.always }
|
|
1308
|
+
}
|
|
1309
|
+
};
|
|
1310
|
+
}
|
|
1311
|
+
function analogy(input) {
|
|
1312
|
+
return {
|
|
1313
|
+
name: "analogy",
|
|
1314
|
+
data: {
|
|
1315
|
+
concepts: input.concepts,
|
|
1316
|
+
relationship: input.relationship,
|
|
1317
|
+
...input.insight && { insight: input.insight },
|
|
1318
|
+
...input.therefore && { therefore: input.therefore },
|
|
1319
|
+
...input.pitfall && { pitfall: input.pitfall }
|
|
1320
|
+
}
|
|
1321
|
+
};
|
|
1322
|
+
}
|
|
1323
|
+
function persona(input) {
|
|
1324
|
+
return {
|
|
1325
|
+
name: "persona",
|
|
1326
|
+
data: {
|
|
1327
|
+
name: input.name,
|
|
1328
|
+
...input.role && { role: input.role },
|
|
1329
|
+
...input.objective && { objective: input.objective },
|
|
1330
|
+
...input.tone && { tone: input.tone }
|
|
1331
|
+
}
|
|
1332
|
+
};
|
|
1333
|
+
}
|
|
1334
|
+
var STORE_DDL = `
|
|
1335
|
+
-- Chats table
|
|
1336
|
+
-- createdAt/updatedAt: DEFAULT for insert, inline SET for updates
|
|
1337
|
+
CREATE TABLE IF NOT EXISTS chats (
|
|
1338
|
+
id TEXT PRIMARY KEY,
|
|
1339
|
+
userId TEXT NOT NULL,
|
|
1340
|
+
title TEXT,
|
|
1341
|
+
metadata TEXT,
|
|
1342
|
+
createdAt INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
|
|
1343
|
+
updatedAt INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000)
|
|
1344
|
+
);
|
|
1345
|
+
|
|
1346
|
+
CREATE INDEX IF NOT EXISTS idx_chats_updatedAt ON chats(updatedAt);
|
|
1347
|
+
CREATE INDEX IF NOT EXISTS idx_chats_userId ON chats(userId);
|
|
1348
|
+
|
|
1349
|
+
-- Messages table (nodes in the DAG)
|
|
1350
|
+
CREATE TABLE IF NOT EXISTS messages (
|
|
1351
|
+
id TEXT PRIMARY KEY,
|
|
1352
|
+
chatId TEXT NOT NULL,
|
|
1353
|
+
parentId TEXT,
|
|
1354
|
+
name TEXT NOT NULL,
|
|
1355
|
+
type TEXT,
|
|
1356
|
+
data TEXT NOT NULL,
|
|
1357
|
+
createdAt INTEGER NOT NULL,
|
|
1358
|
+
FOREIGN KEY (chatId) REFERENCES chats(id) ON DELETE CASCADE,
|
|
1359
|
+
FOREIGN KEY (parentId) REFERENCES messages(id)
|
|
1360
|
+
);
|
|
1361
|
+
|
|
1362
|
+
CREATE INDEX IF NOT EXISTS idx_messages_chatId ON messages(chatId);
|
|
1363
|
+
CREATE INDEX IF NOT EXISTS idx_messages_parentId ON messages(parentId);
|
|
1364
|
+
|
|
1365
|
+
-- Branches table (pointers to head messages)
|
|
1366
|
+
CREATE TABLE IF NOT EXISTS branches (
|
|
1367
|
+
id TEXT PRIMARY KEY,
|
|
1368
|
+
chatId TEXT NOT NULL,
|
|
1369
|
+
name TEXT NOT NULL,
|
|
1370
|
+
headMessageId TEXT,
|
|
1371
|
+
isActive INTEGER NOT NULL DEFAULT 0,
|
|
1372
|
+
createdAt INTEGER NOT NULL,
|
|
1373
|
+
FOREIGN KEY (chatId) REFERENCES chats(id) ON DELETE CASCADE,
|
|
1374
|
+
FOREIGN KEY (headMessageId) REFERENCES messages(id),
|
|
1375
|
+
UNIQUE(chatId, name)
|
|
1376
|
+
);
|
|
1377
|
+
|
|
1378
|
+
CREATE INDEX IF NOT EXISTS idx_branches_chatId ON branches(chatId);
|
|
1379
|
+
|
|
1380
|
+
-- Checkpoints table (pointers to message nodes)
|
|
1381
|
+
CREATE TABLE IF NOT EXISTS checkpoints (
|
|
1382
|
+
id TEXT PRIMARY KEY,
|
|
1383
|
+
chatId TEXT NOT NULL,
|
|
1384
|
+
name TEXT NOT NULL,
|
|
1385
|
+
messageId TEXT NOT NULL,
|
|
1386
|
+
createdAt INTEGER NOT NULL,
|
|
1387
|
+
FOREIGN KEY (chatId) REFERENCES chats(id) ON DELETE CASCADE,
|
|
1388
|
+
FOREIGN KEY (messageId) REFERENCES messages(id),
|
|
1389
|
+
UNIQUE(chatId, name)
|
|
1390
|
+
);
|
|
1391
|
+
|
|
1392
|
+
CREATE INDEX IF NOT EXISTS idx_checkpoints_chatId ON checkpoints(chatId);
|
|
1393
|
+
|
|
1394
|
+
-- FTS5 virtual table for full-text search
|
|
1395
|
+
-- messageId/chatId/name are UNINDEXED (stored but not searchable, used for filtering/joining)
|
|
1396
|
+
-- Only 'content' is indexed for full-text search
|
|
1397
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
|
|
1398
|
+
messageId UNINDEXED,
|
|
1399
|
+
chatId UNINDEXED,
|
|
1400
|
+
name UNINDEXED,
|
|
1401
|
+
content,
|
|
1402
|
+
tokenize='porter unicode61'
|
|
1403
|
+
);
|
|
1404
|
+
`;
|
|
1405
|
+
var SqliteContextStore = class extends ContextStore {
|
|
1406
|
+
#db;
|
|
1407
|
+
constructor(path3) {
|
|
1408
|
+
super();
|
|
1409
|
+
this.#db = new DatabaseSync(path3);
|
|
1410
|
+
this.#db.exec("PRAGMA foreign_keys = ON");
|
|
1411
|
+
this.#db.exec(STORE_DDL);
|
|
1412
|
+
}
|
|
1413
|
+
/**
|
|
1414
|
+
* Execute a function within a transaction.
|
|
1415
|
+
* Automatically commits on success or rolls back on error.
|
|
1416
|
+
*/
|
|
1417
|
+
#useTransaction(fn) {
|
|
1418
|
+
this.#db.exec("BEGIN TRANSACTION");
|
|
1419
|
+
try {
|
|
1420
|
+
const result = fn();
|
|
1421
|
+
this.#db.exec("COMMIT");
|
|
1422
|
+
return result;
|
|
1423
|
+
} catch (error) {
|
|
1424
|
+
this.#db.exec("ROLLBACK");
|
|
1425
|
+
throw error;
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1428
|
+
// ==========================================================================
|
|
1429
|
+
// Chat Operations
|
|
1430
|
+
// ==========================================================================
|
|
1431
|
+
async createChat(chat) {
|
|
1432
|
+
this.#useTransaction(() => {
|
|
1433
|
+
this.#db.prepare(
|
|
1434
|
+
`INSERT INTO chats (id, userId, title, metadata)
|
|
1435
|
+
VALUES (?, ?, ?, ?)`
|
|
1436
|
+
).run(
|
|
1437
|
+
chat.id,
|
|
1438
|
+
chat.userId,
|
|
1439
|
+
chat.title ?? null,
|
|
1440
|
+
chat.metadata ? JSON.stringify(chat.metadata) : null
|
|
1441
|
+
);
|
|
1442
|
+
this.#db.prepare(
|
|
1443
|
+
`INSERT INTO branches (id, chatId, name, headMessageId, isActive, createdAt)
|
|
1444
|
+
VALUES (?, ?, 'main', NULL, 1, ?)`
|
|
1445
|
+
).run(crypto.randomUUID(), chat.id, Date.now());
|
|
1446
|
+
});
|
|
1447
|
+
}
|
|
1448
|
+
async upsertChat(chat) {
|
|
1449
|
+
return this.#useTransaction(() => {
|
|
1450
|
+
const row = this.#db.prepare(
|
|
1451
|
+
`INSERT INTO chats (id, userId, title, metadata)
|
|
1452
|
+
VALUES (?, ?, ?, ?)
|
|
1453
|
+
ON CONFLICT(id) DO UPDATE SET id = excluded.id
|
|
1454
|
+
RETURNING *`
|
|
1455
|
+
).get(
|
|
1456
|
+
chat.id,
|
|
1457
|
+
chat.userId,
|
|
1458
|
+
chat.title ?? null,
|
|
1459
|
+
chat.metadata ? JSON.stringify(chat.metadata) : null
|
|
1460
|
+
);
|
|
1461
|
+
this.#db.prepare(
|
|
1462
|
+
`INSERT OR IGNORE INTO branches (id, chatId, name, headMessageId, isActive, createdAt)
|
|
1463
|
+
VALUES (?, ?, 'main', NULL, 1, ?)`
|
|
1464
|
+
).run(crypto.randomUUID(), chat.id, Date.now());
|
|
1465
|
+
return {
|
|
1466
|
+
id: row.id,
|
|
1467
|
+
userId: row.userId,
|
|
1468
|
+
title: row.title ?? void 0,
|
|
1469
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : void 0,
|
|
1470
|
+
createdAt: row.createdAt,
|
|
1471
|
+
updatedAt: row.updatedAt
|
|
1472
|
+
};
|
|
1473
|
+
});
|
|
1474
|
+
}
|
|
1475
|
+
async getChat(chatId) {
|
|
1476
|
+
const row = this.#db.prepare("SELECT * FROM chats WHERE id = ?").get(chatId);
|
|
1477
|
+
if (!row) {
|
|
1478
|
+
return void 0;
|
|
1479
|
+
}
|
|
1480
|
+
return {
|
|
1481
|
+
id: row.id,
|
|
1482
|
+
userId: row.userId,
|
|
1483
|
+
title: row.title ?? void 0,
|
|
1484
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : void 0,
|
|
1485
|
+
createdAt: row.createdAt,
|
|
1486
|
+
updatedAt: row.updatedAt
|
|
1487
|
+
};
|
|
1488
|
+
}
|
|
1489
|
+
async updateChat(chatId, updates) {
|
|
1490
|
+
const setClauses = ["updatedAt = strftime('%s', 'now') * 1000"];
|
|
1491
|
+
const params = [];
|
|
1492
|
+
if (updates.title !== void 0) {
|
|
1493
|
+
setClauses.push("title = ?");
|
|
1494
|
+
params.push(updates.title ?? null);
|
|
1495
|
+
}
|
|
1496
|
+
if (updates.metadata !== void 0) {
|
|
1497
|
+
setClauses.push("metadata = ?");
|
|
1498
|
+
params.push(JSON.stringify(updates.metadata));
|
|
1499
|
+
}
|
|
1500
|
+
params.push(chatId);
|
|
1501
|
+
const row = this.#db.prepare(
|
|
1502
|
+
`UPDATE chats SET ${setClauses.join(", ")} WHERE id = ? RETURNING *`
|
|
1503
|
+
).get(...params);
|
|
1504
|
+
return {
|
|
1505
|
+
id: row.id,
|
|
1506
|
+
userId: row.userId,
|
|
1507
|
+
title: row.title ?? void 0,
|
|
1508
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : void 0,
|
|
1509
|
+
createdAt: row.createdAt,
|
|
1510
|
+
updatedAt: row.updatedAt
|
|
1511
|
+
};
|
|
1512
|
+
}
|
|
1513
|
+
async listChats(options) {
|
|
1514
|
+
const params = [];
|
|
1515
|
+
let whereClause = "";
|
|
1516
|
+
let limitClause = "";
|
|
1517
|
+
if (options?.userId) {
|
|
1518
|
+
whereClause = "WHERE c.userId = ?";
|
|
1519
|
+
params.push(options.userId);
|
|
1520
|
+
}
|
|
1521
|
+
if (options?.limit !== void 0) {
|
|
1522
|
+
limitClause = " LIMIT ?";
|
|
1523
|
+
params.push(options.limit);
|
|
1524
|
+
if (options.offset !== void 0) {
|
|
1525
|
+
limitClause += " OFFSET ?";
|
|
1526
|
+
params.push(options.offset);
|
|
1527
|
+
}
|
|
1528
|
+
}
|
|
1529
|
+
const rows = this.#db.prepare(
|
|
1530
|
+
`SELECT
|
|
1531
|
+
c.id,
|
|
1532
|
+
c.userId,
|
|
1533
|
+
c.title,
|
|
1534
|
+
c.createdAt,
|
|
1535
|
+
c.updatedAt,
|
|
1536
|
+
COUNT(DISTINCT m.id) as messageCount,
|
|
1537
|
+
COUNT(DISTINCT b.id) as branchCount
|
|
1538
|
+
FROM chats c
|
|
1539
|
+
LEFT JOIN messages m ON m.chatId = c.id
|
|
1540
|
+
LEFT JOIN branches b ON b.chatId = c.id
|
|
1541
|
+
${whereClause}
|
|
1542
|
+
GROUP BY c.id
|
|
1543
|
+
ORDER BY c.updatedAt DESC${limitClause}`
|
|
1544
|
+
).all(...params);
|
|
1545
|
+
return rows.map((row) => ({
|
|
1546
|
+
id: row.id,
|
|
1547
|
+
userId: row.userId,
|
|
1548
|
+
title: row.title ?? void 0,
|
|
1549
|
+
messageCount: row.messageCount,
|
|
1550
|
+
branchCount: row.branchCount,
|
|
1551
|
+
createdAt: row.createdAt,
|
|
1552
|
+
updatedAt: row.updatedAt
|
|
1553
|
+
}));
|
|
1554
|
+
}
|
|
1555
|
+
async deleteChat(chatId, options) {
|
|
1556
|
+
return this.#useTransaction(() => {
|
|
1557
|
+
const messageIds = this.#db.prepare("SELECT id FROM messages WHERE chatId = ?").all(chatId);
|
|
1558
|
+
let sql = "DELETE FROM chats WHERE id = ?";
|
|
1559
|
+
const params = [chatId];
|
|
1560
|
+
if (options?.userId !== void 0) {
|
|
1561
|
+
sql += " AND userId = ?";
|
|
1562
|
+
params.push(options.userId);
|
|
1563
|
+
}
|
|
1564
|
+
const result = this.#db.prepare(sql).run(...params);
|
|
1565
|
+
if (result.changes > 0 && messageIds.length > 0) {
|
|
1566
|
+
const placeholders = messageIds.map(() => "?").join(", ");
|
|
1567
|
+
this.#db.prepare(
|
|
1568
|
+
`DELETE FROM messages_fts WHERE messageId IN (${placeholders})`
|
|
1569
|
+
).run(...messageIds.map((m) => m.id));
|
|
1570
|
+
}
|
|
1571
|
+
return result.changes > 0;
|
|
1572
|
+
});
|
|
1573
|
+
}
|
|
1574
|
+
// ==========================================================================
|
|
1575
|
+
// Message Operations (Graph Nodes)
|
|
1576
|
+
// ==========================================================================
|
|
1577
|
+
async addMessage(message2) {
|
|
1578
|
+
this.#db.prepare(
|
|
1579
|
+
`INSERT INTO messages (id, chatId, parentId, name, type, data, createdAt)
|
|
1580
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
1581
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
1582
|
+
parentId = excluded.parentId,
|
|
1583
|
+
name = excluded.name,
|
|
1584
|
+
type = excluded.type,
|
|
1585
|
+
data = excluded.data`
|
|
1586
|
+
).run(
|
|
1587
|
+
message2.id,
|
|
1588
|
+
message2.chatId,
|
|
1589
|
+
message2.parentId,
|
|
1590
|
+
message2.name,
|
|
1591
|
+
message2.type ?? null,
|
|
1592
|
+
JSON.stringify(message2.data),
|
|
1593
|
+
message2.createdAt
|
|
1594
|
+
);
|
|
1595
|
+
const content = typeof message2.data === "string" ? message2.data : JSON.stringify(message2.data);
|
|
1596
|
+
this.#db.prepare(`DELETE FROM messages_fts WHERE messageId = ?`).run(message2.id);
|
|
1597
|
+
this.#db.prepare(
|
|
1598
|
+
`INSERT INTO messages_fts(messageId, chatId, name, content)
|
|
1599
|
+
VALUES (?, ?, ?, ?)`
|
|
1600
|
+
).run(message2.id, message2.chatId, message2.name, content);
|
|
1601
|
+
}
|
|
1602
|
+
async getMessage(messageId) {
|
|
1603
|
+
const row = this.#db.prepare("SELECT * FROM messages WHERE id = ?").get(messageId);
|
|
1604
|
+
if (!row) {
|
|
1605
|
+
return void 0;
|
|
1606
|
+
}
|
|
1607
|
+
return {
|
|
1608
|
+
id: row.id,
|
|
1609
|
+
chatId: row.chatId,
|
|
1610
|
+
parentId: row.parentId,
|
|
1611
|
+
name: row.name,
|
|
1612
|
+
type: row.type ?? void 0,
|
|
1613
|
+
data: JSON.parse(row.data),
|
|
1614
|
+
createdAt: row.createdAt
|
|
1615
|
+
};
|
|
1616
|
+
}
|
|
1617
|
+
async getMessageChain(headId) {
|
|
1618
|
+
const rows = this.#db.prepare(
|
|
1619
|
+
`WITH RECURSIVE chain AS (
|
|
1620
|
+
SELECT *, 0 as depth FROM messages WHERE id = ?
|
|
1621
|
+
UNION ALL
|
|
1622
|
+
SELECT m.*, c.depth + 1 FROM messages m
|
|
1623
|
+
INNER JOIN chain c ON m.id = c.parentId
|
|
1624
|
+
)
|
|
1625
|
+
SELECT * FROM chain
|
|
1626
|
+
ORDER BY depth DESC`
|
|
1627
|
+
).all(headId);
|
|
1628
|
+
return rows.map((row) => ({
|
|
1629
|
+
id: row.id,
|
|
1630
|
+
chatId: row.chatId,
|
|
1631
|
+
parentId: row.parentId,
|
|
1632
|
+
name: row.name,
|
|
1633
|
+
type: row.type ?? void 0,
|
|
1634
|
+
data: JSON.parse(row.data),
|
|
1635
|
+
createdAt: row.createdAt
|
|
1636
|
+
}));
|
|
1637
|
+
}
|
|
1638
|
+
async hasChildren(messageId) {
|
|
1639
|
+
const row = this.#db.prepare(
|
|
1640
|
+
"SELECT EXISTS(SELECT 1 FROM messages WHERE parentId = ?) as hasChildren"
|
|
1641
|
+
).get(messageId);
|
|
1642
|
+
return row.hasChildren === 1;
|
|
1643
|
+
}
|
|
1644
|
+
async getMessages(chatId) {
|
|
1645
|
+
const chat = await this.getChat(chatId);
|
|
1646
|
+
if (!chat) {
|
|
1647
|
+
throw new Error(`Chat "${chatId}" not found`);
|
|
1648
|
+
}
|
|
1649
|
+
const activeBranch = await this.getActiveBranch(chatId);
|
|
1650
|
+
if (!activeBranch?.headMessageId) {
|
|
1651
|
+
return [];
|
|
1652
|
+
}
|
|
1653
|
+
return this.getMessageChain(activeBranch.headMessageId);
|
|
1654
|
+
}
|
|
1655
|
+
// ==========================================================================
|
|
1656
|
+
// Branch Operations
|
|
1657
|
+
// ==========================================================================
|
|
1658
|
+
async createBranch(branch) {
|
|
1659
|
+
this.#db.prepare(
|
|
1660
|
+
`INSERT INTO branches (id, chatId, name, headMessageId, isActive, createdAt)
|
|
1661
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
1662
|
+
).run(
|
|
1663
|
+
branch.id,
|
|
1664
|
+
branch.chatId,
|
|
1665
|
+
branch.name,
|
|
1666
|
+
branch.headMessageId,
|
|
1667
|
+
branch.isActive ? 1 : 0,
|
|
1668
|
+
branch.createdAt
|
|
1669
|
+
);
|
|
1670
|
+
}
|
|
1671
|
+
async getBranch(chatId, name) {
|
|
1672
|
+
const row = this.#db.prepare("SELECT * FROM branches WHERE chatId = ? AND name = ?").get(chatId, name);
|
|
1673
|
+
if (!row) {
|
|
1674
|
+
return void 0;
|
|
1675
|
+
}
|
|
1676
|
+
return {
|
|
1677
|
+
id: row.id,
|
|
1678
|
+
chatId: row.chatId,
|
|
1679
|
+
name: row.name,
|
|
1680
|
+
headMessageId: row.headMessageId,
|
|
1681
|
+
isActive: row.isActive === 1,
|
|
1682
|
+
createdAt: row.createdAt
|
|
1683
|
+
};
|
|
1684
|
+
}
|
|
1685
|
+
async getActiveBranch(chatId) {
|
|
1686
|
+
const row = this.#db.prepare("SELECT * FROM branches WHERE chatId = ? AND isActive = 1").get(chatId);
|
|
1687
|
+
if (!row) {
|
|
1688
|
+
return void 0;
|
|
1689
|
+
}
|
|
1690
|
+
return {
|
|
1691
|
+
id: row.id,
|
|
1692
|
+
chatId: row.chatId,
|
|
1693
|
+
name: row.name,
|
|
1694
|
+
headMessageId: row.headMessageId,
|
|
1695
|
+
isActive: true,
|
|
1696
|
+
createdAt: row.createdAt
|
|
1697
|
+
};
|
|
1698
|
+
}
|
|
1699
|
+
async setActiveBranch(chatId, branchId) {
|
|
1700
|
+
this.#db.prepare("UPDATE branches SET isActive = 0 WHERE chatId = ?").run(chatId);
|
|
1701
|
+
this.#db.prepare("UPDATE branches SET isActive = 1 WHERE id = ?").run(branchId);
|
|
1702
|
+
}
|
|
1703
|
+
async updateBranchHead(branchId, messageId) {
|
|
1704
|
+
this.#db.prepare("UPDATE branches SET headMessageId = ? WHERE id = ?").run(messageId, branchId);
|
|
1705
|
+
}
|
|
1706
|
+
async listBranches(chatId) {
|
|
1707
|
+
const branches = this.#db.prepare(
|
|
1708
|
+
`SELECT
|
|
1709
|
+
b.id,
|
|
1710
|
+
b.name,
|
|
1711
|
+
b.headMessageId,
|
|
1712
|
+
b.isActive,
|
|
1713
|
+
b.createdAt
|
|
1714
|
+
FROM branches b
|
|
1715
|
+
WHERE b.chatId = ?
|
|
1716
|
+
ORDER BY b.createdAt ASC`
|
|
1717
|
+
).all(chatId);
|
|
1718
|
+
const result = [];
|
|
1719
|
+
for (const branch of branches) {
|
|
1720
|
+
let messageCount = 0;
|
|
1721
|
+
if (branch.headMessageId) {
|
|
1722
|
+
const countRow = this.#db.prepare(
|
|
1723
|
+
`WITH RECURSIVE chain AS (
|
|
1724
|
+
SELECT id, parentId FROM messages WHERE id = ?
|
|
1725
|
+
UNION ALL
|
|
1726
|
+
SELECT m.id, m.parentId FROM messages m
|
|
1727
|
+
INNER JOIN chain c ON m.id = c.parentId
|
|
1728
|
+
)
|
|
1729
|
+
SELECT COUNT(*) as count FROM chain`
|
|
1730
|
+
).get(branch.headMessageId);
|
|
1731
|
+
messageCount = countRow.count;
|
|
1732
|
+
}
|
|
1733
|
+
result.push({
|
|
1734
|
+
id: branch.id,
|
|
1735
|
+
name: branch.name,
|
|
1736
|
+
headMessageId: branch.headMessageId,
|
|
1737
|
+
isActive: branch.isActive === 1,
|
|
1738
|
+
messageCount,
|
|
1739
|
+
createdAt: branch.createdAt
|
|
1740
|
+
});
|
|
1741
|
+
}
|
|
1742
|
+
return result;
|
|
1743
|
+
}
|
|
1744
|
+
// ==========================================================================
|
|
1745
|
+
// Checkpoint Operations
|
|
1746
|
+
// ==========================================================================
|
|
1747
|
+
async createCheckpoint(checkpoint) {
|
|
1748
|
+
this.#db.prepare(
|
|
1749
|
+
`INSERT INTO checkpoints (id, chatId, name, messageId, createdAt)
|
|
1750
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1751
|
+
ON CONFLICT(chatId, name) DO UPDATE SET
|
|
1752
|
+
messageId = excluded.messageId,
|
|
1753
|
+
createdAt = excluded.createdAt`
|
|
1754
|
+
).run(
|
|
1755
|
+
checkpoint.id,
|
|
1756
|
+
checkpoint.chatId,
|
|
1757
|
+
checkpoint.name,
|
|
1758
|
+
checkpoint.messageId,
|
|
1759
|
+
checkpoint.createdAt
|
|
1760
|
+
);
|
|
1761
|
+
}
|
|
1762
|
+
async getCheckpoint(chatId, name) {
|
|
1763
|
+
const row = this.#db.prepare("SELECT * FROM checkpoints WHERE chatId = ? AND name = ?").get(chatId, name);
|
|
1764
|
+
if (!row) {
|
|
1765
|
+
return void 0;
|
|
1766
|
+
}
|
|
1767
|
+
return {
|
|
1768
|
+
id: row.id,
|
|
1769
|
+
chatId: row.chatId,
|
|
1770
|
+
name: row.name,
|
|
1771
|
+
messageId: row.messageId,
|
|
1772
|
+
createdAt: row.createdAt
|
|
1773
|
+
};
|
|
1774
|
+
}
|
|
1775
|
+
async listCheckpoints(chatId) {
|
|
1776
|
+
const rows = this.#db.prepare(
|
|
1777
|
+
`SELECT id, name, messageId, createdAt
|
|
1778
|
+
FROM checkpoints
|
|
1779
|
+
WHERE chatId = ?
|
|
1780
|
+
ORDER BY createdAt DESC`
|
|
1781
|
+
).all(chatId);
|
|
1782
|
+
return rows.map((row) => ({
|
|
1783
|
+
id: row.id,
|
|
1784
|
+
name: row.name,
|
|
1785
|
+
messageId: row.messageId,
|
|
1786
|
+
createdAt: row.createdAt
|
|
1787
|
+
}));
|
|
1788
|
+
}
|
|
1789
|
+
async deleteCheckpoint(chatId, name) {
|
|
1790
|
+
this.#db.prepare("DELETE FROM checkpoints WHERE chatId = ? AND name = ?").run(chatId, name);
|
|
1791
|
+
}
|
|
1792
|
+
// ==========================================================================
|
|
1793
|
+
// Search Operations
|
|
1794
|
+
// ==========================================================================
|
|
1795
|
+
async searchMessages(chatId, query, options) {
|
|
1796
|
+
const limit = options?.limit ?? 20;
|
|
1797
|
+
const roles = options?.roles;
|
|
1798
|
+
let sql = `
|
|
1799
|
+
SELECT
|
|
1800
|
+
m.id,
|
|
1801
|
+
m.chatId,
|
|
1802
|
+
m.parentId,
|
|
1803
|
+
m.name,
|
|
1804
|
+
m.type,
|
|
1805
|
+
m.data,
|
|
1806
|
+
m.createdAt,
|
|
1807
|
+
fts.rank,
|
|
1808
|
+
snippet(messages_fts, 3, '<mark>', '</mark>', '...', 32) as snippet
|
|
1809
|
+
FROM messages_fts fts
|
|
1810
|
+
JOIN messages m ON m.id = fts.messageId
|
|
1811
|
+
WHERE messages_fts MATCH ?
|
|
1812
|
+
AND fts.chatId = ?
|
|
1813
|
+
`;
|
|
1814
|
+
const params = [query, chatId];
|
|
1815
|
+
if (roles && roles.length > 0) {
|
|
1816
|
+
const placeholders = roles.map(() => "?").join(", ");
|
|
1817
|
+
sql += ` AND fts.name IN (${placeholders})`;
|
|
1818
|
+
params.push(...roles);
|
|
1819
|
+
}
|
|
1820
|
+
sql += " ORDER BY fts.rank LIMIT ?";
|
|
1821
|
+
params.push(limit);
|
|
1822
|
+
const rows = this.#db.prepare(sql).all(...params);
|
|
1823
|
+
return rows.map((row) => ({
|
|
1824
|
+
message: {
|
|
1825
|
+
id: row.id,
|
|
1826
|
+
chatId: row.chatId,
|
|
1827
|
+
parentId: row.parentId,
|
|
1828
|
+
name: row.name,
|
|
1829
|
+
type: row.type ?? void 0,
|
|
1830
|
+
data: JSON.parse(row.data),
|
|
1831
|
+
createdAt: row.createdAt
|
|
1832
|
+
},
|
|
1833
|
+
rank: row.rank,
|
|
1834
|
+
snippet: row.snippet
|
|
1835
|
+
}));
|
|
1836
|
+
}
|
|
1837
|
+
// ==========================================================================
|
|
1838
|
+
// Visualization Operations
|
|
1839
|
+
// ==========================================================================
|
|
1840
|
+
async getGraph(chatId) {
|
|
1841
|
+
const messageRows = this.#db.prepare(
|
|
1842
|
+
`SELECT id, parentId, name, data, createdAt
|
|
1843
|
+
FROM messages
|
|
1844
|
+
WHERE chatId = ?
|
|
1845
|
+
ORDER BY createdAt ASC`
|
|
1846
|
+
).all(chatId);
|
|
1847
|
+
const nodes = messageRows.map((row) => {
|
|
1848
|
+
const data = JSON.parse(row.data);
|
|
1849
|
+
const content = typeof data === "string" ? data : JSON.stringify(data);
|
|
1850
|
+
return {
|
|
1851
|
+
id: row.id,
|
|
1852
|
+
parentId: row.parentId,
|
|
1853
|
+
role: row.name,
|
|
1854
|
+
content: content.length > 50 ? content.slice(0, 50) + "..." : content,
|
|
1855
|
+
createdAt: row.createdAt
|
|
1856
|
+
};
|
|
1857
|
+
});
|
|
1858
|
+
const branchRows = this.#db.prepare(
|
|
1859
|
+
`SELECT name, headMessageId, isActive
|
|
1860
|
+
FROM branches
|
|
1861
|
+
WHERE chatId = ?
|
|
1862
|
+
ORDER BY createdAt ASC`
|
|
1863
|
+
).all(chatId);
|
|
1864
|
+
const branches = branchRows.map((row) => ({
|
|
1865
|
+
name: row.name,
|
|
1866
|
+
headMessageId: row.headMessageId,
|
|
1867
|
+
isActive: row.isActive === 1
|
|
1868
|
+
}));
|
|
1869
|
+
const checkpointRows = this.#db.prepare(
|
|
1870
|
+
`SELECT name, messageId
|
|
1871
|
+
FROM checkpoints
|
|
1872
|
+
WHERE chatId = ?
|
|
1873
|
+
ORDER BY createdAt ASC`
|
|
1874
|
+
).all(chatId);
|
|
1875
|
+
const checkpoints = checkpointRows.map((row) => ({
|
|
1876
|
+
name: row.name,
|
|
1877
|
+
messageId: row.messageId
|
|
1878
|
+
}));
|
|
1879
|
+
return {
|
|
1880
|
+
chatId,
|
|
1881
|
+
nodes,
|
|
1882
|
+
branches,
|
|
1883
|
+
checkpoints
|
|
1884
|
+
};
|
|
1885
|
+
}
|
|
1886
|
+
};
|
|
1887
|
+
var InMemoryContextStore = class extends SqliteContextStore {
|
|
1888
|
+
constructor() {
|
|
1889
|
+
super(":memory:");
|
|
1890
|
+
}
|
|
1891
|
+
};
|
|
1892
|
+
function structuredOutput(options) {
|
|
1893
|
+
return {
|
|
1894
|
+
async generate(contextVariables, config) {
|
|
1895
|
+
if (!options.context) {
|
|
1896
|
+
throw new Error(`structuredOutput is missing a context.`);
|
|
1897
|
+
}
|
|
1898
|
+
if (!options.model) {
|
|
1899
|
+
throw new Error(`structuredOutput is missing a model.`);
|
|
1900
|
+
}
|
|
1901
|
+
const { messages, systemPrompt } = await options.context.resolve({
|
|
1902
|
+
renderer: new XmlRenderer()
|
|
1903
|
+
});
|
|
1904
|
+
const result = await generateText({
|
|
1905
|
+
abortSignal: config?.abortSignal,
|
|
1906
|
+
providerOptions: options.providerOptions,
|
|
1907
|
+
model: options.model,
|
|
1908
|
+
system: systemPrompt,
|
|
1909
|
+
messages: await convertToModelMessages(messages),
|
|
1910
|
+
stopWhen: stepCountIs(25),
|
|
1911
|
+
experimental_repairToolCall: repairToolCall,
|
|
1912
|
+
experimental_context: contextVariables,
|
|
1913
|
+
output: Output.object({ schema: options.schema }),
|
|
1914
|
+
tools: options.tools
|
|
1915
|
+
});
|
|
1916
|
+
return result.output;
|
|
1917
|
+
},
|
|
1918
|
+
async stream(contextVariables, config) {
|
|
1919
|
+
if (!options.context) {
|
|
1920
|
+
throw new Error(`structuredOutput is missing a context.`);
|
|
1921
|
+
}
|
|
1922
|
+
if (!options.model) {
|
|
1923
|
+
throw new Error(`structuredOutput is missing a model.`);
|
|
1924
|
+
}
|
|
1925
|
+
const { messages, systemPrompt } = await options.context.resolve({
|
|
1926
|
+
renderer: new XmlRenderer()
|
|
1927
|
+
});
|
|
1928
|
+
return streamText({
|
|
1929
|
+
abortSignal: config?.abortSignal,
|
|
1930
|
+
providerOptions: options.providerOptions,
|
|
1931
|
+
model: options.model,
|
|
1932
|
+
system: systemPrompt,
|
|
1933
|
+
experimental_repairToolCall: repairToolCall,
|
|
1934
|
+
messages: await convertToModelMessages(messages),
|
|
1935
|
+
stopWhen: stepCountIs(50),
|
|
1936
|
+
experimental_transform: config?.transform ?? smoothStream(),
|
|
1937
|
+
experimental_context: contextVariables,
|
|
1938
|
+
output: Output.object({ schema: options.schema }),
|
|
1939
|
+
tools: options.tools
|
|
1940
|
+
});
|
|
1941
|
+
}
|
|
1942
|
+
};
|
|
1943
|
+
}
|
|
1944
|
+
var repairToolCall = async ({
|
|
1945
|
+
toolCall,
|
|
1946
|
+
tools,
|
|
1947
|
+
inputSchema,
|
|
1948
|
+
error
|
|
1949
|
+
}) => {
|
|
1950
|
+
console.log(
|
|
1951
|
+
`Debug: ${chalk2.yellow("RepairingToolCall")}: ${toolCall.toolName}`,
|
|
1952
|
+
error.name
|
|
1953
|
+
);
|
|
1954
|
+
if (NoSuchToolError.isInstance(error)) {
|
|
1955
|
+
return null;
|
|
1956
|
+
}
|
|
1957
|
+
const tool = tools[toolCall.toolName];
|
|
1958
|
+
const { output } = await generateText({
|
|
1959
|
+
model: groq("openai/gpt-oss-20b"),
|
|
1960
|
+
output: Output.object({ schema: tool.inputSchema }),
|
|
1961
|
+
prompt: [
|
|
1962
|
+
`The model tried to call the tool "${toolCall.toolName}" with the following inputs:`,
|
|
1963
|
+
JSON.stringify(toolCall.input),
|
|
1964
|
+
`The tool accepts the following schema:`,
|
|
1965
|
+
JSON.stringify(inputSchema(toolCall)),
|
|
1966
|
+
"Please fix the inputs."
|
|
1967
|
+
].join("\n")
|
|
1968
|
+
});
|
|
1969
|
+
return { ...toolCall, input: JSON.stringify(output) };
|
|
1970
|
+
};
|
|
216
1971
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
1972
|
+
// packages/text2sql/src/lib/synthesis/extractors/base-contextual-extractor.ts
|
|
1973
|
+
var contextResolverSchema = z.object({
|
|
1974
|
+
question: z.string().describe(
|
|
1975
|
+
"A standalone natural language question that the SQL query answers"
|
|
1976
|
+
)
|
|
222
1977
|
});
|
|
223
|
-
function
|
|
224
|
-
const
|
|
1978
|
+
async function resolveContext(params) {
|
|
1979
|
+
const context = new ContextEngine({
|
|
1980
|
+
store: new InMemoryContextStore(),
|
|
1981
|
+
chatId: `context-resolver-${crypto.randomUUID()}`,
|
|
1982
|
+
userId: "system"
|
|
1983
|
+
});
|
|
1984
|
+
context.set(
|
|
1985
|
+
persona({
|
|
1986
|
+
name: "context_resolver",
|
|
1987
|
+
role: "You are an expert at understanding conversational context and generating clear, standalone questions from multi-turn conversations.",
|
|
1988
|
+
objective: "Transform context-dependent messages into standalone questions that fully capture user intent"
|
|
1989
|
+
}),
|
|
1990
|
+
...params.introspection ? [fragment("database_schema", params.introspection)] : [],
|
|
1991
|
+
fragment("conversation", params.conversation),
|
|
1992
|
+
fragment("sql", params.sql),
|
|
1993
|
+
fragment(
|
|
1994
|
+
"task",
|
|
1995
|
+
dedent`
|
|
1996
|
+
Given the conversation above and the SQL query that was executed,
|
|
1997
|
+
generate a single, standalone natural language question that:
|
|
1998
|
+
1. Fully captures the user's intent without needing prior context
|
|
1999
|
+
2. Uses natural business language (not SQL terminology)
|
|
2000
|
+
3. Could be asked by someone who hasn't seen the conversation
|
|
2001
|
+
4. Accurately represents what the SQL query answers
|
|
2002
|
+
`
|
|
2003
|
+
),
|
|
2004
|
+
fragment(
|
|
2005
|
+
"examples",
|
|
2006
|
+
dedent`
|
|
2007
|
+
Conversation: "Show me customers" → "Filter to NY" → "Sort by revenue"
|
|
2008
|
+
SQL: SELECT * FROM customers WHERE region = 'NY' ORDER BY revenue DESC
|
|
2009
|
+
Question: "Show me customers in the NY region sorted by revenue"
|
|
2010
|
+
|
|
2011
|
+
Conversation: "What were sales last month?" → "Break it down by category"
|
|
2012
|
+
SQL: SELECT category, SUM(amount) FROM sales WHERE date >= '2024-11-01' GROUP BY category
|
|
2013
|
+
Question: "What were sales by category for last month?"
|
|
2014
|
+
`
|
|
2015
|
+
),
|
|
2016
|
+
user("Generate a standalone question for this SQL query.")
|
|
2017
|
+
);
|
|
2018
|
+
const resolverOutput = structuredOutput({
|
|
2019
|
+
model: groq2("openai/gpt-oss-20b"),
|
|
2020
|
+
context,
|
|
2021
|
+
schema: contextResolverSchema
|
|
2022
|
+
});
|
|
2023
|
+
return resolverOutput.generate();
|
|
2024
|
+
}
|
|
2025
|
+
function getMessageText(message2) {
|
|
2026
|
+
const textParts = message2.parts.filter(isTextUIPart).map((part) => part.text);
|
|
225
2027
|
return textParts.join(" ").trim();
|
|
226
2028
|
}
|
|
227
2029
|
function formatConversation(messages) {
|
|
228
2030
|
return messages.map((msg, i) => `[${i + 1}] ${msg}`).join("\n");
|
|
229
2031
|
}
|
|
230
2032
|
var BaseContextualExtractor = class extends PairProducer {
|
|
2033
|
+
context = [];
|
|
2034
|
+
results = [];
|
|
2035
|
+
messages;
|
|
2036
|
+
adapter;
|
|
2037
|
+
options;
|
|
231
2038
|
constructor(messages, adapter, options = {}) {
|
|
232
2039
|
super();
|
|
233
2040
|
this.messages = messages;
|
|
234
2041
|
this.adapter = adapter;
|
|
235
2042
|
this.options = options;
|
|
236
2043
|
}
|
|
237
|
-
context = [];
|
|
238
|
-
results = [];
|
|
239
2044
|
/**
|
|
240
2045
|
* Template method - defines the extraction algorithm skeleton.
|
|
241
2046
|
* Subclasses customize behavior via hooks, not by overriding this method.
|
|
@@ -248,31 +2053,31 @@ var BaseContextualExtractor = class extends PairProducer {
|
|
|
248
2053
|
if (this.results.length === 0) {
|
|
249
2054
|
return;
|
|
250
2055
|
}
|
|
251
|
-
const introspection =
|
|
2056
|
+
const introspection = "";
|
|
252
2057
|
yield* this.resolveQuestions(introspection);
|
|
253
2058
|
}
|
|
254
2059
|
/**
|
|
255
2060
|
* Core extraction loop - iterates through messages and calls hooks.
|
|
256
2061
|
*/
|
|
257
2062
|
async extractSqlsWithContext(toolName, includeFailures) {
|
|
258
|
-
for (const
|
|
259
|
-
if (
|
|
260
|
-
const text = getMessageText(
|
|
2063
|
+
for (const message2 of this.messages) {
|
|
2064
|
+
if (message2.role === "user") {
|
|
2065
|
+
const text = getMessageText(message2);
|
|
261
2066
|
if (text) {
|
|
262
2067
|
await this.onUserMessage(text);
|
|
263
2068
|
}
|
|
264
2069
|
continue;
|
|
265
2070
|
}
|
|
266
|
-
if (
|
|
267
|
-
await this.extractFromAssistant(
|
|
2071
|
+
if (message2.role === "assistant") {
|
|
2072
|
+
await this.extractFromAssistant(message2, toolName, includeFailures);
|
|
268
2073
|
}
|
|
269
2074
|
}
|
|
270
2075
|
}
|
|
271
2076
|
/**
|
|
272
2077
|
* Extract SQL from assistant message parts.
|
|
273
2078
|
*/
|
|
274
|
-
async extractFromAssistant(
|
|
275
|
-
for (const part of
|
|
2079
|
+
async extractFromAssistant(message2, toolName, includeFailures) {
|
|
2080
|
+
for (const part of message2.parts) {
|
|
276
2081
|
if (!isToolOrDynamicToolUIPart(part)) {
|
|
277
2082
|
continue;
|
|
278
2083
|
}
|
|
@@ -301,7 +2106,7 @@ var BaseContextualExtractor = class extends PairProducer {
|
|
|
301
2106
|
conversationContext: snapshot
|
|
302
2107
|
});
|
|
303
2108
|
}
|
|
304
|
-
const assistantText = getMessageText(
|
|
2109
|
+
const assistantText = getMessageText(message2);
|
|
305
2110
|
if (assistantText) {
|
|
306
2111
|
this.context.push(`Assistant: ${assistantText}`);
|
|
307
2112
|
}
|
|
@@ -311,18 +2116,14 @@ var BaseContextualExtractor = class extends PairProducer {
|
|
|
311
2116
|
*/
|
|
312
2117
|
async *resolveQuestions(introspection) {
|
|
313
2118
|
for (const item of this.results) {
|
|
314
|
-
const
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
sql: item.sql,
|
|
320
|
-
introspection
|
|
321
|
-
}
|
|
322
|
-
);
|
|
2119
|
+
const output = await resolveContext({
|
|
2120
|
+
conversation: formatConversation(item.conversationContext),
|
|
2121
|
+
sql: item.sql,
|
|
2122
|
+
introspection
|
|
2123
|
+
});
|
|
323
2124
|
yield [
|
|
324
2125
|
{
|
|
325
|
-
question:
|
|
2126
|
+
question: output.question,
|
|
326
2127
|
sql: item.sql,
|
|
327
2128
|
context: item.conversationContext,
|
|
328
2129
|
success: item.success
|
|
@@ -334,29 +2135,31 @@ var BaseContextualExtractor = class extends PairProducer {
|
|
|
334
2135
|
|
|
335
2136
|
// packages/text2sql/src/lib/synthesis/extractors/message-extractor.ts
|
|
336
2137
|
var MessageExtractor = class extends PairProducer {
|
|
2138
|
+
#messages;
|
|
2139
|
+
#options;
|
|
337
2140
|
/**
|
|
338
2141
|
* @param messages - Chat history to extract pairs from
|
|
339
2142
|
* @param options - Extraction configuration
|
|
340
2143
|
*/
|
|
341
2144
|
constructor(messages, options = {}) {
|
|
342
2145
|
super();
|
|
343
|
-
this
|
|
344
|
-
this
|
|
2146
|
+
this.#messages = messages;
|
|
2147
|
+
this.#options = options;
|
|
345
2148
|
}
|
|
346
2149
|
/**
|
|
347
2150
|
* Extracts question-SQL pairs by parsing tool calls and pairing with user messages.
|
|
348
2151
|
* @returns Pairs extracted from db_query tool invocations
|
|
349
2152
|
*/
|
|
350
2153
|
async *produce() {
|
|
351
|
-
const { includeFailures = false, toolName = "db_query" } = this
|
|
2154
|
+
const { includeFailures = false, toolName = "db_query" } = this.#options;
|
|
352
2155
|
let lastUserMessage = null;
|
|
353
|
-
for (const
|
|
354
|
-
if (
|
|
355
|
-
lastUserMessage =
|
|
2156
|
+
for (const message2 of this.#messages) {
|
|
2157
|
+
if (message2.role === "user") {
|
|
2158
|
+
lastUserMessage = message2;
|
|
356
2159
|
continue;
|
|
357
2160
|
}
|
|
358
|
-
if (
|
|
359
|
-
for (const part of
|
|
2161
|
+
if (message2.role === "assistant" && lastUserMessage) {
|
|
2162
|
+
for (const part of message2.parts) {
|
|
360
2163
|
if (!isToolOrDynamicToolUIPart2(part)) {
|
|
361
2164
|
continue;
|
|
362
2165
|
}
|
|
@@ -393,50 +2196,11 @@ var MessageExtractor = class extends PairProducer {
|
|
|
393
2196
|
};
|
|
394
2197
|
|
|
395
2198
|
// packages/text2sql/src/lib/synthesis/extractors/sql-extractor.ts
|
|
396
|
-
import { groq as
|
|
2199
|
+
import { groq as groq3 } from "@ai-sdk/groq";
|
|
397
2200
|
import dedent2 from "dedent";
|
|
398
2201
|
import z2 from "zod";
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
name: "sql_to_question",
|
|
402
|
-
model: groq2("llama-3.3-70b-versatile"),
|
|
403
|
-
output: z2.object({
|
|
404
|
-
question: z2.string().describe("A natural language question that the SQL query answers")
|
|
405
|
-
}),
|
|
406
|
-
prompt: (state) => dedent2`
|
|
407
|
-
<identity>
|
|
408
|
-
You are an expert at understanding SQL queries and generating clear,
|
|
409
|
-
natural language questions that describe what the query retrieves.
|
|
410
|
-
</identity>
|
|
411
|
-
|
|
412
|
-
<schema>
|
|
413
|
-
${state?.introspection}
|
|
414
|
-
</schema>
|
|
415
|
-
|
|
416
|
-
<sql>
|
|
417
|
-
${state?.sql}
|
|
418
|
-
</sql>
|
|
419
|
-
|
|
420
|
-
<task>
|
|
421
|
-
Given the database schema and the SQL query above, generate a single
|
|
422
|
-
natural language question that:
|
|
423
|
-
1. Accurately describes what information the query retrieves
|
|
424
|
-
2. Uses natural business language (not SQL terminology)
|
|
425
|
-
3. Could be asked by a non-technical user
|
|
426
|
-
4. Is concise but complete
|
|
427
|
-
</task>
|
|
428
|
-
|
|
429
|
-
<examples>
|
|
430
|
-
SQL: SELECT COUNT(*) FROM customers WHERE region = 'NY'
|
|
431
|
-
Question: "How many customers do we have in New York?"
|
|
432
|
-
|
|
433
|
-
SQL: SELECT product_name, SUM(quantity) as total FROM orders GROUP BY product_name ORDER BY total DESC LIMIT 10
|
|
434
|
-
Question: "What are our top 10 products by quantity sold?"
|
|
435
|
-
|
|
436
|
-
SQL: SELECT c.name, COUNT(o.id) FROM customers c LEFT JOIN orders o ON c.id = o.customer_id GROUP BY c.id HAVING COUNT(o.id) = 0
|
|
437
|
-
Question: "Which customers have never placed an order?"
|
|
438
|
-
</examples>
|
|
439
|
-
`
|
|
2202
|
+
var outputSchema = z2.object({
|
|
2203
|
+
question: z2.string().describe("A natural language question that the SQL query answers")
|
|
440
2204
|
});
|
|
441
2205
|
var SqlExtractor = class extends PairProducer {
|
|
442
2206
|
#sqls;
|
|
@@ -459,7 +2223,7 @@ var SqlExtractor = class extends PairProducer {
|
|
|
459
2223
|
*/
|
|
460
2224
|
async *produce() {
|
|
461
2225
|
const { validateSql = true, skipInvalid = false } = this.#options;
|
|
462
|
-
const introspection =
|
|
2226
|
+
const introspection = "";
|
|
463
2227
|
for (const sql of this.#sqls) {
|
|
464
2228
|
let isValid = true;
|
|
465
2229
|
if (validateSql) {
|
|
@@ -469,17 +2233,54 @@ var SqlExtractor = class extends PairProducer {
|
|
|
469
2233
|
continue;
|
|
470
2234
|
}
|
|
471
2235
|
}
|
|
472
|
-
const
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
2236
|
+
const context = new ContextEngine({
|
|
2237
|
+
store: new InMemoryContextStore(),
|
|
2238
|
+
chatId: `sql-to-question-${crypto.randomUUID()}`,
|
|
2239
|
+
userId: "system"
|
|
2240
|
+
});
|
|
2241
|
+
context.set(
|
|
2242
|
+
persona({
|
|
2243
|
+
name: "sql_to_question",
|
|
2244
|
+
role: "You are an expert at understanding SQL queries and generating clear, natural language questions that describe what the query retrieves.",
|
|
2245
|
+
objective: "Generate clear, natural language questions that describe what SQL queries retrieve"
|
|
2246
|
+
}),
|
|
2247
|
+
fragment("database_schema", introspection),
|
|
2248
|
+
fragment("sql", sql),
|
|
2249
|
+
fragment(
|
|
2250
|
+
"task",
|
|
2251
|
+
dedent2`
|
|
2252
|
+
Given the database schema and the SQL query above, generate a single
|
|
2253
|
+
natural language question that:
|
|
2254
|
+
1. Accurately describes what information the query retrieves
|
|
2255
|
+
2. Uses natural business language (not SQL terminology)
|
|
2256
|
+
3. Could be asked by a non-technical user
|
|
2257
|
+
4. Is concise but complete
|
|
2258
|
+
`
|
|
2259
|
+
),
|
|
2260
|
+
fragment(
|
|
2261
|
+
"examples",
|
|
2262
|
+
dedent2`
|
|
2263
|
+
SQL: SELECT COUNT(*) FROM customers WHERE region = 'NY'
|
|
2264
|
+
Question: "How many customers do we have in New York?"
|
|
2265
|
+
|
|
2266
|
+
SQL: SELECT product_name, SUM(quantity) as total FROM orders GROUP BY product_name ORDER BY total DESC LIMIT 10
|
|
2267
|
+
Question: "What are our top 10 products by quantity sold?"
|
|
2268
|
+
|
|
2269
|
+
SQL: SELECT c.name, COUNT(o.id) FROM customers c LEFT JOIN orders o ON c.id = o.customer_id GROUP BY c.id HAVING COUNT(o.id) = 0
|
|
2270
|
+
Question: "Which customers have never placed an order?"
|
|
2271
|
+
`
|
|
2272
|
+
),
|
|
2273
|
+
user("Generate a natural language question for this SQL query.")
|
|
479
2274
|
);
|
|
2275
|
+
const sqlToQuestionOutput = structuredOutput({
|
|
2276
|
+
model: groq3("openai/gpt-oss-20b"),
|
|
2277
|
+
context,
|
|
2278
|
+
schema: outputSchema
|
|
2279
|
+
});
|
|
2280
|
+
const output = await sqlToQuestionOutput.generate();
|
|
480
2281
|
yield [
|
|
481
2282
|
{
|
|
482
|
-
question:
|
|
2283
|
+
question: output.question,
|
|
483
2284
|
sql,
|
|
484
2285
|
success: isValid
|
|
485
2286
|
}
|
|
@@ -532,58 +2333,67 @@ var WindowedContextExtractor = class extends BaseContextualExtractor {
|
|
|
532
2333
|
};
|
|
533
2334
|
|
|
534
2335
|
// packages/text2sql/src/lib/synthesis/extractors/segmented-context-extractor.ts
|
|
535
|
-
import { groq as
|
|
2336
|
+
import { groq as groq4 } from "@ai-sdk/groq";
|
|
536
2337
|
import dedent3 from "dedent";
|
|
537
2338
|
import z3 from "zod";
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
model: groq3("openai/gpt-oss-20b"),
|
|
542
|
-
output: z3.object({
|
|
543
|
-
isTopicChange: z3.boolean().describe("Whether the new message represents a topic change"),
|
|
544
|
-
reason: z3.string().describe("Brief explanation for the decision")
|
|
545
|
-
}),
|
|
546
|
-
prompt: (state) => dedent3`
|
|
547
|
-
<identity>
|
|
548
|
-
You are an expert at understanding conversational flow and detecting topic changes.
|
|
549
|
-
</identity>
|
|
550
|
-
|
|
551
|
-
<conversation_context>
|
|
552
|
-
${state?.context || "(no prior context)"}
|
|
553
|
-
</conversation_context>
|
|
554
|
-
|
|
555
|
-
<new_message>
|
|
556
|
-
${state?.newMessage}
|
|
557
|
-
</new_message>
|
|
558
|
-
|
|
559
|
-
<task>
|
|
560
|
-
Determine if the new message represents a significant topic change from the
|
|
561
|
-
prior conversation context. A topic change occurs when:
|
|
562
|
-
1. The user asks about a completely different entity/table/domain
|
|
563
|
-
2. The user starts a new analytical question unrelated to prior discussion
|
|
564
|
-
3. There's a clear shift in what data or metrics are being discussed
|
|
565
|
-
|
|
566
|
-
NOT a topic change:
|
|
567
|
-
- Follow-up questions refining the same query ("filter by...", "sort by...")
|
|
568
|
-
- Questions about the same entities with different conditions
|
|
569
|
-
- Requests for more details on the same topic
|
|
570
|
-
</task>
|
|
571
|
-
|
|
572
|
-
<examples>
|
|
573
|
-
Context: "Show me customers in NY" → "Sort by revenue"
|
|
574
|
-
New: "Filter to those with orders over $1000"
|
|
575
|
-
Decision: NOT a topic change (still refining customer query)
|
|
576
|
-
|
|
577
|
-
Context: "Show me customers in NY" → "Sort by revenue"
|
|
578
|
-
New: "What were our total sales last quarter?"
|
|
579
|
-
Decision: Topic change (shifted from customers to sales metrics)
|
|
580
|
-
|
|
581
|
-
Context: "List all products"
|
|
582
|
-
New: "How many orders did we have last month?"
|
|
583
|
-
Decision: Topic change (products → orders/sales)
|
|
584
|
-
</examples>
|
|
585
|
-
`
|
|
2339
|
+
var topicChangeSchema = z3.object({
|
|
2340
|
+
isTopicChange: z3.boolean().describe("Whether the new message represents a topic change"),
|
|
2341
|
+
reason: z3.string().describe("Brief explanation for the decision")
|
|
586
2342
|
});
|
|
2343
|
+
async function detectTopicChange(params) {
|
|
2344
|
+
const context = new ContextEngine({
|
|
2345
|
+
store: new InMemoryContextStore(),
|
|
2346
|
+
chatId: `topic-change-${crypto.randomUUID()}`,
|
|
2347
|
+
userId: "system"
|
|
2348
|
+
});
|
|
2349
|
+
context.set(
|
|
2350
|
+
persona({
|
|
2351
|
+
name: "topic_change_detector",
|
|
2352
|
+
role: "You are an expert at understanding conversational flow and detecting topic changes.",
|
|
2353
|
+
objective: "Detect significant topic changes in database conversations"
|
|
2354
|
+
}),
|
|
2355
|
+
fragment("conversation_context", params.context || "(no prior context)"),
|
|
2356
|
+
fragment("new_message", params.newMessage),
|
|
2357
|
+
fragment(
|
|
2358
|
+
"task",
|
|
2359
|
+
dedent3`
|
|
2360
|
+
Determine if the new message represents a significant topic change from the
|
|
2361
|
+
prior conversation context. A topic change occurs when:
|
|
2362
|
+
1. The user asks about a completely different entity/table/domain
|
|
2363
|
+
2. The user starts a new analytical question unrelated to prior discussion
|
|
2364
|
+
3. There's a clear shift in what data or metrics are being discussed
|
|
2365
|
+
|
|
2366
|
+
NOT a topic change:
|
|
2367
|
+
- Follow-up questions refining the same query ("filter by...", "sort by...")
|
|
2368
|
+
- Questions about the same entities with different conditions
|
|
2369
|
+
- Requests for more details on the same topic
|
|
2370
|
+
`
|
|
2371
|
+
),
|
|
2372
|
+
fragment(
|
|
2373
|
+
"examples",
|
|
2374
|
+
dedent3`
|
|
2375
|
+
Context: "Show me customers in NY" → "Sort by revenue"
|
|
2376
|
+
New: "Filter to those with orders over $1000"
|
|
2377
|
+
Decision: NOT a topic change (still refining customer query)
|
|
2378
|
+
|
|
2379
|
+
Context: "Show me customers in NY" → "Sort by revenue"
|
|
2380
|
+
New: "What were our total sales last quarter?"
|
|
2381
|
+
Decision: Topic change (shifted from customers to sales metrics)
|
|
2382
|
+
|
|
2383
|
+
Context: "List all products"
|
|
2384
|
+
New: "How many orders did we have last month?"
|
|
2385
|
+
Decision: Topic change (products → orders/sales)
|
|
2386
|
+
`
|
|
2387
|
+
),
|
|
2388
|
+
user("Determine if this is a topic change.")
|
|
2389
|
+
);
|
|
2390
|
+
const topicOutput = structuredOutput({
|
|
2391
|
+
model: groq4("openai/gpt-oss-20b"),
|
|
2392
|
+
context,
|
|
2393
|
+
schema: topicChangeSchema
|
|
2394
|
+
});
|
|
2395
|
+
return topicOutput.generate();
|
|
2396
|
+
}
|
|
587
2397
|
var SegmentedContextExtractor = class extends BaseContextualExtractor {
|
|
588
2398
|
constructor(messages, adapter, options = {}) {
|
|
589
2399
|
super(messages, adapter, options);
|
|
@@ -598,7 +2408,10 @@ var SegmentedContextExtractor = class extends BaseContextualExtractor {
|
|
|
598
2408
|
async onUserMessage(text) {
|
|
599
2409
|
if (this.context.length >= 2) {
|
|
600
2410
|
const contextSnapshot = [...this.context];
|
|
601
|
-
const isTopicChange = await
|
|
2411
|
+
const { isTopicChange } = await detectTopicChange({
|
|
2412
|
+
context: formatConversation(contextSnapshot),
|
|
2413
|
+
newMessage: text
|
|
2414
|
+
});
|
|
602
2415
|
if (isTopicChange) {
|
|
603
2416
|
const resolved = await this.resolveToStandalone(text, contextSnapshot);
|
|
604
2417
|
this.context = [`User: ${resolved}`];
|
|
@@ -613,22 +2426,6 @@ var SegmentedContextExtractor = class extends BaseContextualExtractor {
|
|
|
613
2426
|
getContextSnapshot() {
|
|
614
2427
|
return [...this.context];
|
|
615
2428
|
}
|
|
616
|
-
/**
|
|
617
|
-
* Detect if a new message represents a topic change using LLM.
|
|
618
|
-
* @param newMessage - The new user message to check
|
|
619
|
-
* @param contextSnapshot - Snapshot of context captured before this async call
|
|
620
|
-
*/
|
|
621
|
-
async detectTopicChange(newMessage, contextSnapshot) {
|
|
622
|
-
const { experimental_output } = await generate3(
|
|
623
|
-
topicChangeAgent,
|
|
624
|
-
[user3("Determine if this is a topic change.")],
|
|
625
|
-
{
|
|
626
|
-
context: formatConversation(contextSnapshot),
|
|
627
|
-
newMessage
|
|
628
|
-
}
|
|
629
|
-
);
|
|
630
|
-
return experimental_output.isTopicChange;
|
|
631
|
-
}
|
|
632
2429
|
/**
|
|
633
2430
|
* Resolve a context-dependent message into a standalone question.
|
|
634
2431
|
* Called when topic change is detected to preserve the meaning of
|
|
@@ -637,21 +2434,16 @@ var SegmentedContextExtractor = class extends BaseContextualExtractor {
|
|
|
637
2434
|
* @param contextSnapshot - Snapshot of context captured before this async call
|
|
638
2435
|
*/
|
|
639
2436
|
async resolveToStandalone(text, contextSnapshot) {
|
|
640
|
-
const
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
// No SQL yet, just resolving the question
|
|
647
|
-
}
|
|
648
|
-
);
|
|
649
|
-
return experimental_output.question;
|
|
2437
|
+
const output = await resolveContext({
|
|
2438
|
+
conversation: formatConversation([...contextSnapshot, `User: ${text}`]),
|
|
2439
|
+
sql: ""
|
|
2440
|
+
// No SQL yet, just resolving the question
|
|
2441
|
+
});
|
|
2442
|
+
return output.question;
|
|
650
2443
|
}
|
|
651
2444
|
};
|
|
652
2445
|
|
|
653
2446
|
// packages/text2sql/src/lib/synthesis/extractors/last-query-extractor.ts
|
|
654
|
-
import { generate as generate4, user as user4 } from "@deepagents/agent";
|
|
655
2447
|
var LastQueryExtractor = class extends BaseContextualExtractor {
|
|
656
2448
|
constructor(messages, adapter, options = {}) {
|
|
657
2449
|
super(messages, adapter, options);
|
|
@@ -676,18 +2468,14 @@ var LastQueryExtractor = class extends BaseContextualExtractor {
|
|
|
676
2468
|
return;
|
|
677
2469
|
}
|
|
678
2470
|
const last = this.results.at(-1);
|
|
679
|
-
const
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
sql: last.sql,
|
|
685
|
-
introspection
|
|
686
|
-
}
|
|
687
|
-
);
|
|
2471
|
+
const output = await resolveContext({
|
|
2472
|
+
conversation: formatConversation(last.conversationContext),
|
|
2473
|
+
sql: last.sql,
|
|
2474
|
+
introspection
|
|
2475
|
+
});
|
|
688
2476
|
yield [
|
|
689
2477
|
{
|
|
690
|
-
question:
|
|
2478
|
+
question: output.question,
|
|
691
2479
|
sql: last.sql,
|
|
692
2480
|
context: last.conversationContext,
|
|
693
2481
|
success: last.success
|
|
@@ -700,11 +2488,10 @@ var LastQueryExtractor = class extends BaseContextualExtractor {
|
|
|
700
2488
|
import pLimit from "p-limit";
|
|
701
2489
|
|
|
702
2490
|
// packages/text2sql/src/lib/agents/question.agent.ts
|
|
703
|
-
import { groq as
|
|
704
|
-
import { defaultSettingsMiddleware, wrapLanguageModel } from "ai";
|
|
2491
|
+
import { groq as groq5 } from "@ai-sdk/groq";
|
|
705
2492
|
import dedent4 from "dedent";
|
|
706
2493
|
import z4 from "zod";
|
|
707
|
-
import
|
|
2494
|
+
import "@deepagents/agent";
|
|
708
2495
|
var complexityInstructions = {
|
|
709
2496
|
simple: dedent4`
|
|
710
2497
|
Generate simple questions that require:
|
|
@@ -724,506 +2511,105 @@ var complexityInstructions = {
|
|
|
724
2511
|
Examples: "What are the top 5 customers by total orders?", "Which products have never been ordered?"
|
|
725
2512
|
`,
|
|
726
2513
|
complex: dedent4`
|
|
727
|
-
Generate complex questions that require:
|
|
728
|
-
- Multiple JOINs (3+ tables)
|
|
729
|
-
- Nested subqueries or CTEs
|
|
730
|
-
- Complex aggregations with multiple GROUP BY columns
|
|
731
|
-
- CASE expressions
|
|
732
|
-
- Date/time calculations
|
|
733
|
-
Examples: "What is the month-over-month growth rate?", "Which customers have increased spending compared to last year?"
|
|
734
|
-
`,
|
|
735
|
-
"high complex": dedent4`
|
|
736
|
-
Generate highly complex questions that require advanced SQL features:
|
|
737
|
-
- Window functions (ROW_NUMBER, RANK, DENSE_RANK)
|
|
738
|
-
- LAG, LEAD for comparisons
|
|
739
|
-
- Running totals (SUM OVER)
|
|
740
|
-
- Moving averages
|
|
741
|
-
- PARTITION BY clauses
|
|
742
|
-
- Complex CTEs with multiple levels
|
|
743
|
-
Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
|
|
744
|
-
`
|
|
745
|
-
};
|
|
746
|
-
var
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
return dedent4`
|
|
762
|
-
<identity>
|
|
763
|
-
You are a synthetic data generator specializing in creating realistic natural language questions
|
|
764
|
-
that users might ask about a database. You understand database schemas and can generate diverse,
|
|
765
|
-
practical questions that would require SQL queries to answer.
|
|
766
|
-
</identity>
|
|
767
|
-
|
|
768
|
-
${state?.introspection || ""}
|
|
769
|
-
|
|
770
|
-
<complexity level="${complexity}">
|
|
771
|
-
${complexityInstructions[complexity]}
|
|
772
|
-
</complexity>
|
|
773
|
-
|
|
774
|
-
<task>
|
|
775
|
-
Generate exactly ${count} natural language questions at the "${complexity}" complexity level.
|
|
776
|
-
The questions should:
|
|
777
|
-
1. Match the complexity requirements above
|
|
778
|
-
2. Use natural business language, not technical SQL terms
|
|
779
|
-
3. Be realistic questions a non-technical user would actually ask
|
|
780
|
-
4. Cover different tables and relationships when possible
|
|
781
|
-
</task>
|
|
782
|
-
|
|
783
|
-
<guardrails>
|
|
784
|
-
- Questions MUST ONLY reference tables and columns that exist in the schema above
|
|
785
|
-
- Before generating each question, verify that ALL entities (tables, columns, relationships) you reference are explicitly listed in the schema
|
|
786
|
-
- DO NOT invent or assume tables/columns that aren't explicitly shown in the schema
|
|
787
|
-
- Use natural language without SQL keywords like SELECT, WHERE, etc.
|
|
788
|
-
- All questions must match the specified complexity level
|
|
789
|
-
</guardrails>
|
|
790
|
-
`;
|
|
791
|
-
}
|
|
792
|
-
});
|
|
793
|
-
async function generateQuestions(params) {
|
|
794
|
-
const { introspection, complexity, count, prompt, model } = params;
|
|
795
|
-
const agentInstance = model ? questionGeneratorAgent.clone({ model }) : questionGeneratorAgent;
|
|
796
|
-
const userPrompt = prompt ?? `Generate ${count} questions at ${complexity} complexity given db schema.`;
|
|
797
|
-
const { experimental_output } = await generate5(
|
|
798
|
-
agentInstance,
|
|
799
|
-
[user5(userPrompt)],
|
|
800
|
-
{
|
|
801
|
-
introspection,
|
|
802
|
-
complexity,
|
|
803
|
-
count
|
|
804
|
-
}
|
|
805
|
-
);
|
|
806
|
-
return { questions: experimental_output.questions };
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
// packages/text2sql/src/lib/agents/sql.agent.ts
|
|
810
|
-
import { groq as groq5 } from "@ai-sdk/groq";
|
|
811
|
-
import {
|
|
812
|
-
APICallError,
|
|
813
|
-
JSONParseError,
|
|
814
|
-
NoContentGeneratedError,
|
|
815
|
-
NoObjectGeneratedError,
|
|
816
|
-
NoOutputGeneratedError,
|
|
817
|
-
TypeValidationError,
|
|
818
|
-
defaultSettingsMiddleware as defaultSettingsMiddleware2,
|
|
819
|
-
wrapLanguageModel as wrapLanguageModel2
|
|
820
|
-
} from "ai";
|
|
821
|
-
import { Console } from "node:console";
|
|
822
|
-
import { createWriteStream } from "node:fs";
|
|
823
|
-
import pRetry from "p-retry";
|
|
824
|
-
import z5 from "zod";
|
|
825
|
-
import {
|
|
826
|
-
agent as agent5,
|
|
827
|
-
generate as generate6,
|
|
828
|
-
toOutput,
|
|
829
|
-
user as user6
|
|
830
|
-
} from "@deepagents/agent";
|
|
831
|
-
|
|
832
|
-
// packages/text2sql/src/lib/teach/xml.ts
|
|
833
|
-
function wrapBlock(tag, children) {
|
|
834
|
-
const content = children.filter((child) => Boolean(child)).join("\n");
|
|
835
|
-
if (!content) {
|
|
836
|
-
return "";
|
|
837
|
-
}
|
|
838
|
-
return `<${tag}>
|
|
839
|
-
${indentBlock(content, 2)}
|
|
840
|
-
</${tag}>`;
|
|
841
|
-
}
|
|
842
|
-
function list(tag, values, childTag) {
|
|
843
|
-
if (!values.length) {
|
|
844
|
-
return "";
|
|
845
|
-
}
|
|
846
|
-
const children = values.map((value) => leaf(childTag, value)).join("\n");
|
|
847
|
-
return `<${tag}>
|
|
848
|
-
${indentBlock(children, 2)}
|
|
849
|
-
</${tag}>`;
|
|
850
|
-
}
|
|
851
|
-
function leaf(tag, value) {
|
|
852
|
-
const safe = escapeXml(value);
|
|
853
|
-
if (safe.includes("\n")) {
|
|
854
|
-
return `<${tag}>
|
|
855
|
-
${indentBlock(safe, 2)}
|
|
856
|
-
</${tag}>`;
|
|
857
|
-
}
|
|
858
|
-
return `<${tag}>${safe}</${tag}>`;
|
|
859
|
-
}
|
|
860
|
-
function indentBlock(text, spaces) {
|
|
861
|
-
if (!text.trim()) {
|
|
862
|
-
return "";
|
|
863
|
-
}
|
|
864
|
-
const padding = " ".repeat(spaces);
|
|
865
|
-
return text.split("\n").map((line) => line.length ? padding + line : padding).join("\n");
|
|
866
|
-
}
|
|
867
|
-
function escapeXml(value) {
|
|
868
|
-
if (value == null) {
|
|
869
|
-
return "";
|
|
870
|
-
}
|
|
871
|
-
return value.replaceAll(/&/g, "&").replaceAll(/</g, "<").replaceAll(/>/g, ">").replaceAll(/"/g, """).replaceAll(/'/g, "'");
|
|
872
|
-
}
|
|
873
|
-
|
|
874
|
-
// packages/text2sql/src/lib/teach/teachables.ts
|
|
875
|
-
function term(name, definition) {
|
|
876
|
-
return {
|
|
877
|
-
type: "term",
|
|
878
|
-
encode: () => ({ type: "term", name, definition }),
|
|
879
|
-
decode: () => wrapBlock("term", [leaf("name", name), leaf("definition", definition)])
|
|
880
|
-
};
|
|
881
|
-
}
|
|
882
|
-
function hint(text) {
|
|
883
|
-
return {
|
|
884
|
-
type: "hint",
|
|
885
|
-
encode: () => ({ type: "hint", text }),
|
|
886
|
-
decode: () => leaf("hint", text)
|
|
887
|
-
};
|
|
888
|
-
}
|
|
889
|
-
function guardrail(input) {
|
|
890
|
-
const { rule, reason, action } = input;
|
|
891
|
-
return {
|
|
892
|
-
type: "guardrail",
|
|
893
|
-
encode: () => ({ type: "guardrail", rule, reason, action }),
|
|
894
|
-
decode: () => wrapBlock("guardrail", [
|
|
895
|
-
leaf("rule", rule),
|
|
896
|
-
reason ? leaf("reason", reason) : "",
|
|
897
|
-
action ? leaf("action", action) : ""
|
|
898
|
-
])
|
|
899
|
-
};
|
|
900
|
-
}
|
|
901
|
-
function explain(input) {
|
|
902
|
-
const { concept, explanation, therefore } = input;
|
|
903
|
-
return {
|
|
904
|
-
type: "explain",
|
|
905
|
-
encode: () => ({ type: "explain", concept, explanation, therefore }),
|
|
906
|
-
decode: () => wrapBlock("explanation", [
|
|
907
|
-
leaf("concept", concept),
|
|
908
|
-
leaf("details", explanation),
|
|
909
|
-
therefore ? leaf("therefore", therefore) : ""
|
|
910
|
-
])
|
|
911
|
-
};
|
|
912
|
-
}
|
|
913
|
-
function example(input) {
|
|
914
|
-
const { question, answer, note } = input;
|
|
915
|
-
return {
|
|
916
|
-
type: "example",
|
|
917
|
-
encode: () => ({ type: "example", question, answer, note }),
|
|
918
|
-
decode: () => wrapBlock("example", [
|
|
919
|
-
leaf("question", question),
|
|
920
|
-
leaf("answer", answer),
|
|
921
|
-
note ? leaf("note", note) : ""
|
|
922
|
-
])
|
|
923
|
-
};
|
|
924
|
-
}
|
|
925
|
-
function clarification(input) {
|
|
926
|
-
const { when, ask, reason } = input;
|
|
927
|
-
return {
|
|
928
|
-
type: "clarification",
|
|
929
|
-
encode: () => ({ type: "clarification", when, ask, reason }),
|
|
930
|
-
decode: () => wrapBlock("clarification", [
|
|
931
|
-
leaf("when", when),
|
|
932
|
-
leaf("ask", ask),
|
|
933
|
-
leaf("reason", reason)
|
|
934
|
-
])
|
|
935
|
-
};
|
|
936
|
-
}
|
|
937
|
-
function workflow(input) {
|
|
938
|
-
const { task, steps, triggers, notes } = input;
|
|
939
|
-
return {
|
|
940
|
-
type: "workflow",
|
|
941
|
-
encode: () => ({ type: "workflow", task, steps, triggers, notes }),
|
|
942
|
-
decode: () => wrapBlock("workflow", [
|
|
943
|
-
leaf("task", task),
|
|
944
|
-
triggers?.length ? list("triggers", triggers, "trigger") : "",
|
|
945
|
-
list("steps", steps, "step"),
|
|
946
|
-
notes ? leaf("notes", notes) : ""
|
|
947
|
-
])
|
|
948
|
-
};
|
|
949
|
-
}
|
|
950
|
-
function quirk(input) {
|
|
951
|
-
const { issue, workaround } = input;
|
|
952
|
-
return {
|
|
953
|
-
type: "quirk",
|
|
954
|
-
encode: () => ({ type: "quirk", issue, workaround }),
|
|
955
|
-
decode: () => wrapBlock("quirk", [
|
|
956
|
-
leaf("issue", issue),
|
|
957
|
-
leaf("workaround", workaround)
|
|
958
|
-
])
|
|
959
|
-
};
|
|
960
|
-
}
|
|
961
|
-
function styleGuide(input) {
|
|
962
|
-
const { prefer, never, always } = input;
|
|
963
|
-
return {
|
|
964
|
-
type: "styleGuide",
|
|
965
|
-
encode: () => ({ type: "styleGuide", prefer, never, always }),
|
|
966
|
-
decode: () => wrapBlock("style_guide", [
|
|
967
|
-
leaf("prefer", prefer),
|
|
968
|
-
always ? leaf("always", always) : "",
|
|
969
|
-
never ? leaf("never", never) : ""
|
|
970
|
-
])
|
|
971
|
-
};
|
|
972
|
-
}
|
|
973
|
-
function analogy(input) {
|
|
974
|
-
const { concept, relationship, insight, therefore, pitfall } = input;
|
|
975
|
-
return {
|
|
976
|
-
type: "analogy",
|
|
977
|
-
encode: () => ({
|
|
978
|
-
type: "analogy",
|
|
979
|
-
concept,
|
|
980
|
-
relationship,
|
|
981
|
-
insight,
|
|
982
|
-
therefore,
|
|
983
|
-
pitfall
|
|
2514
|
+
Generate complex questions that require:
|
|
2515
|
+
- Multiple JOINs (3+ tables)
|
|
2516
|
+
- Nested subqueries or CTEs
|
|
2517
|
+
- Complex aggregations with multiple GROUP BY columns
|
|
2518
|
+
- CASE expressions
|
|
2519
|
+
- Date/time calculations
|
|
2520
|
+
Examples: "What is the month-over-month growth rate?", "Which customers have increased spending compared to last year?"
|
|
2521
|
+
`,
|
|
2522
|
+
"high complex": dedent4`
|
|
2523
|
+
Generate highly complex questions that require advanced SQL features:
|
|
2524
|
+
- Window functions (ROW_NUMBER, RANK, DENSE_RANK)
|
|
2525
|
+
- LAG, LEAD for comparisons
|
|
2526
|
+
- Running totals (SUM OVER)
|
|
2527
|
+
- Moving averages
|
|
2528
|
+
- PARTITION BY clauses
|
|
2529
|
+
- Complex CTEs with multiple levels
|
|
2530
|
+
Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
|
|
2531
|
+
`
|
|
2532
|
+
};
|
|
2533
|
+
var outputSchema2 = z4.object({
|
|
2534
|
+
questions: z4.array(z4.string().describe("A natural language question about the data")).min(1).describe("List of natural language questions a user might ask")
|
|
2535
|
+
});
|
|
2536
|
+
async function generateQuestions(params) {
|
|
2537
|
+
const { introspection, complexity, count, prompt, model } = params;
|
|
2538
|
+
const context = new ContextEngine({
|
|
2539
|
+
store: new InMemoryContextStore(),
|
|
2540
|
+
chatId: `question-gen-${crypto.randomUUID()}`,
|
|
2541
|
+
userId: "system"
|
|
2542
|
+
});
|
|
2543
|
+
context.set(
|
|
2544
|
+
persona({
|
|
2545
|
+
name: "question_generator",
|
|
2546
|
+
role: "You are a synthetic data generator specializing in creating realistic natural language questions that users might ask about a database.",
|
|
2547
|
+
objective: "Generate diverse, realistic natural language questions that match the specified complexity level"
|
|
984
2548
|
}),
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
2549
|
+
fragment("database_schema", introspection || ""),
|
|
2550
|
+
fragment(
|
|
2551
|
+
"complexity",
|
|
2552
|
+
{ level: complexity },
|
|
2553
|
+
complexityInstructions[complexity]
|
|
2554
|
+
),
|
|
2555
|
+
fragment(
|
|
2556
|
+
"task",
|
|
2557
|
+
dedent4`
|
|
2558
|
+
Generate exactly ${count} natural language questions at the "${complexity}" complexity level.
|
|
2559
|
+
The questions should:
|
|
2560
|
+
1. Match the complexity requirements above
|
|
2561
|
+
2. Use natural business language, not technical SQL terms
|
|
2562
|
+
3. Be realistic questions a non-technical user would actually ask
|
|
2563
|
+
4. Cover different tables and relationships when possible
|
|
2564
|
+
`
|
|
2565
|
+
),
|
|
2566
|
+
guardrail({
|
|
2567
|
+
rule: "Questions MUST ONLY reference tables and columns that exist in the schema above"
|
|
2568
|
+
}),
|
|
2569
|
+
guardrail({
|
|
2570
|
+
rule: "Before generating each question, verify that ALL entities (tables, columns, relationships) you reference are explicitly listed in the schema"
|
|
2571
|
+
}),
|
|
2572
|
+
guardrail({
|
|
2573
|
+
rule: "DO NOT invent or assume tables/columns that are not explicitly shown in the schema"
|
|
2574
|
+
}),
|
|
2575
|
+
guardrail({
|
|
2576
|
+
rule: "Use natural language without SQL keywords like SELECT, WHERE, etc."
|
|
2577
|
+
}),
|
|
2578
|
+
guardrail({
|
|
2579
|
+
rule: "All questions must match the specified complexity level"
|
|
2580
|
+
}),
|
|
2581
|
+
user(
|
|
2582
|
+
prompt ?? `Generate ${count} questions at ${complexity} complexity given db schema.`
|
|
1003
2583
|
)
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
type: "identity",
|
|
1010
|
-
encode: () => ({ type: "identity", name, role }),
|
|
1011
|
-
decode: () => wrapBlock("identity", [
|
|
1012
|
-
name ? leaf("name", name) : "",
|
|
1013
|
-
role ? leaf("role", role) : ""
|
|
1014
|
-
])
|
|
1015
|
-
};
|
|
1016
|
-
}
|
|
1017
|
-
function persona(input) {
|
|
1018
|
-
const { name, role, tone } = input;
|
|
1019
|
-
return {
|
|
1020
|
-
type: "persona",
|
|
1021
|
-
encode: () => ({ type: "persona", name, role, tone: tone ?? "" }),
|
|
1022
|
-
decode: () => wrapBlock("persona", [
|
|
1023
|
-
leaf("name", name),
|
|
1024
|
-
leaf("role", role),
|
|
1025
|
-
tone ? leaf("tone", tone) : ""
|
|
1026
|
-
])
|
|
1027
|
-
};
|
|
1028
|
-
}
|
|
1029
|
-
function alias(termName, meaning) {
|
|
1030
|
-
return {
|
|
1031
|
-
type: "alias",
|
|
1032
|
-
encode: () => ({ type: "alias", term: termName, meaning }),
|
|
1033
|
-
decode: () => wrapBlock("alias", [leaf("term", termName), leaf("meaning", meaning)])
|
|
1034
|
-
};
|
|
1035
|
-
}
|
|
1036
|
-
function preference(aspect, value) {
|
|
1037
|
-
return {
|
|
1038
|
-
type: "preference",
|
|
1039
|
-
encode: () => ({ type: "preference", aspect, value }),
|
|
1040
|
-
decode: () => wrapBlock("preference", [leaf("aspect", aspect), leaf("value", value)])
|
|
1041
|
-
};
|
|
1042
|
-
}
|
|
1043
|
-
function context(description) {
|
|
1044
|
-
return {
|
|
1045
|
-
type: "context",
|
|
1046
|
-
encode: () => ({ type: "context", description }),
|
|
1047
|
-
decode: () => leaf("context", description)
|
|
1048
|
-
};
|
|
1049
|
-
}
|
|
1050
|
-
function correction(subject, clarification2) {
|
|
1051
|
-
return {
|
|
1052
|
-
type: "correction",
|
|
1053
|
-
encode: () => ({ type: "correction", subject, clarification: clarification2 }),
|
|
1054
|
-
decode: () => wrapBlock("correction", [
|
|
1055
|
-
leaf("subject", subject),
|
|
1056
|
-
leaf("clarification", clarification2)
|
|
1057
|
-
])
|
|
1058
|
-
};
|
|
1059
|
-
}
|
|
1060
|
-
function toInstructions(tag, ...teachables) {
|
|
1061
|
-
if (!teachables.length) {
|
|
1062
|
-
return "";
|
|
1063
|
-
}
|
|
1064
|
-
const grouped = /* @__PURE__ */ new Map();
|
|
1065
|
-
for (const teachable of teachables) {
|
|
1066
|
-
const existing = grouped.get(teachable.type) ?? [];
|
|
1067
|
-
existing.push(teachable);
|
|
1068
|
-
grouped.set(teachable.type, existing);
|
|
1069
|
-
}
|
|
1070
|
-
const definedTypes = new Set(SECTION_ORDER.map((s) => s.type));
|
|
1071
|
-
const sections = SECTION_ORDER.map(({ type, tag: tag2 }) => {
|
|
1072
|
-
const items = grouped.get(type);
|
|
1073
|
-
if (!items?.length) {
|
|
1074
|
-
return "";
|
|
1075
|
-
}
|
|
1076
|
-
const renderedItems = items.map((item) => item.decode().trim()).filter(Boolean).map((item) => indentBlock(item, 2)).join("\n");
|
|
1077
|
-
if (!renderedItems.length) {
|
|
1078
|
-
return "";
|
|
1079
|
-
}
|
|
1080
|
-
return `<${tag2}>
|
|
1081
|
-
${renderedItems}
|
|
1082
|
-
</${tag2}>`;
|
|
1083
|
-
}).filter((section) => Boolean(section));
|
|
1084
|
-
for (const [type, items] of grouped) {
|
|
1085
|
-
if (definedTypes.has(type)) {
|
|
1086
|
-
continue;
|
|
1087
|
-
}
|
|
1088
|
-
const renderedItems = items.map((item) => item.decode().trim()).filter(Boolean).map((item) => indentBlock(item, 2)).join("\n");
|
|
1089
|
-
if (renderedItems.length) {
|
|
1090
|
-
sections.push(renderedItems);
|
|
1091
|
-
}
|
|
1092
|
-
}
|
|
1093
|
-
if (!sections.length) {
|
|
1094
|
-
return "";
|
|
1095
|
-
}
|
|
1096
|
-
const content = indentBlock(sections.join("\n"), 2);
|
|
1097
|
-
return `<${tag}>
|
|
1098
|
-
${content}
|
|
1099
|
-
</${tag}>`;
|
|
1100
|
-
}
|
|
1101
|
-
var SECTION_ORDER = [
|
|
1102
|
-
// User context (render first - most important for personalization)
|
|
1103
|
-
{ type: "identity", tag: "identity" },
|
|
1104
|
-
{ type: "persona", tag: "persona" },
|
|
1105
|
-
{ type: "context", tag: "user_context" },
|
|
1106
|
-
{ type: "preference", tag: "user_preferences" },
|
|
1107
|
-
{ type: "alias", tag: "user_vocabulary" },
|
|
1108
|
-
{ type: "correction", tag: "user_corrections" },
|
|
1109
|
-
// Domain knowledge
|
|
1110
|
-
{ type: "guardrail", tag: "guardrails" },
|
|
1111
|
-
{ type: "styleGuide", tag: "style_guides" },
|
|
1112
|
-
{ type: "hint", tag: "hints" },
|
|
1113
|
-
{ type: "clarification", tag: "clarifications" },
|
|
1114
|
-
{ type: "workflow", tag: "workflows" },
|
|
1115
|
-
{ type: "quirk", tag: "quirks" },
|
|
1116
|
-
{ type: "term", tag: "terminology" },
|
|
1117
|
-
{ type: "explain", tag: "explanations" },
|
|
1118
|
-
{ type: "analogy", tag: "analogies" },
|
|
1119
|
-
{ type: "glossary", tag: "glossary" },
|
|
1120
|
-
{ type: "example", tag: "examples" }
|
|
1121
|
-
];
|
|
1122
|
-
function toTeachables(generated) {
|
|
1123
|
-
return generated.map((item) => {
|
|
1124
|
-
switch (item.type) {
|
|
1125
|
-
case "persona":
|
|
1126
|
-
return persona({ name: item.name, role: item.role, tone: item.tone });
|
|
1127
|
-
case "term":
|
|
1128
|
-
return term(item.name, item.definition);
|
|
1129
|
-
case "hint":
|
|
1130
|
-
return hint(item.text);
|
|
1131
|
-
case "guardrail":
|
|
1132
|
-
return guardrail({
|
|
1133
|
-
rule: item.rule,
|
|
1134
|
-
reason: item.reason,
|
|
1135
|
-
action: item.action
|
|
1136
|
-
});
|
|
1137
|
-
case "explain":
|
|
1138
|
-
return explain({
|
|
1139
|
-
concept: item.concept,
|
|
1140
|
-
explanation: item.explanation,
|
|
1141
|
-
therefore: item.therefore
|
|
1142
|
-
});
|
|
1143
|
-
case "example":
|
|
1144
|
-
return example({
|
|
1145
|
-
question: item.question,
|
|
1146
|
-
answer: item.answer,
|
|
1147
|
-
note: item.note
|
|
1148
|
-
});
|
|
1149
|
-
case "clarification":
|
|
1150
|
-
return clarification({
|
|
1151
|
-
when: item.when,
|
|
1152
|
-
ask: item.ask,
|
|
1153
|
-
reason: item.reason
|
|
1154
|
-
});
|
|
1155
|
-
case "workflow":
|
|
1156
|
-
return workflow({
|
|
1157
|
-
task: item.task,
|
|
1158
|
-
steps: item.steps,
|
|
1159
|
-
triggers: item.triggers,
|
|
1160
|
-
notes: item.notes
|
|
1161
|
-
});
|
|
1162
|
-
case "quirk":
|
|
1163
|
-
return quirk({
|
|
1164
|
-
issue: item.issue,
|
|
1165
|
-
workaround: item.workaround
|
|
1166
|
-
});
|
|
1167
|
-
case "styleGuide":
|
|
1168
|
-
return styleGuide({
|
|
1169
|
-
prefer: item.prefer,
|
|
1170
|
-
never: item.never,
|
|
1171
|
-
always: item.always
|
|
1172
|
-
});
|
|
1173
|
-
case "analogy":
|
|
1174
|
-
return analogy({
|
|
1175
|
-
concept: item.concept,
|
|
1176
|
-
relationship: item.relationship,
|
|
1177
|
-
insight: item.insight,
|
|
1178
|
-
therefore: item.therefore,
|
|
1179
|
-
pitfall: item.pitfall
|
|
1180
|
-
});
|
|
1181
|
-
case "glossary":
|
|
1182
|
-
return glossary(item.entries);
|
|
1183
|
-
// User-specific teachable types
|
|
1184
|
-
case "identity":
|
|
1185
|
-
return identity({ name: item.name, role: item.role });
|
|
1186
|
-
case "alias":
|
|
1187
|
-
return alias(item.term, item.meaning);
|
|
1188
|
-
case "preference":
|
|
1189
|
-
return preference(item.aspect, item.value);
|
|
1190
|
-
case "context":
|
|
1191
|
-
return context(item.description);
|
|
1192
|
-
case "correction":
|
|
1193
|
-
return correction(item.subject, item.clarification);
|
|
1194
|
-
}
|
|
2584
|
+
);
|
|
2585
|
+
const questionOutput = structuredOutput({
|
|
2586
|
+
model: model ?? groq5("openai/gpt-oss-20b"),
|
|
2587
|
+
context,
|
|
2588
|
+
schema: outputSchema2
|
|
1195
2589
|
});
|
|
2590
|
+
return questionOutput.generate();
|
|
1196
2591
|
}
|
|
1197
2592
|
|
|
1198
2593
|
// packages/text2sql/src/lib/agents/sql.agent.ts
|
|
2594
|
+
import { groq as groq6 } from "@ai-sdk/groq";
|
|
2595
|
+
import {
|
|
2596
|
+
APICallError,
|
|
2597
|
+
JSONParseError,
|
|
2598
|
+
NoContentGeneratedError,
|
|
2599
|
+
NoObjectGeneratedError,
|
|
2600
|
+
NoOutputGeneratedError,
|
|
2601
|
+
TypeValidationError
|
|
2602
|
+
} from "ai";
|
|
2603
|
+
import { Console } from "node:console";
|
|
2604
|
+
import { createWriteStream } from "node:fs";
|
|
2605
|
+
import pRetry from "p-retry";
|
|
2606
|
+
import z5 from "zod";
|
|
2607
|
+
import "@deepagents/agent";
|
|
1199
2608
|
var logger = new Console({
|
|
1200
2609
|
stdout: createWriteStream("./sql-agent.log", { flags: "a" }),
|
|
1201
2610
|
stderr: createWriteStream("./sql-agent-error.log", { flags: "a" }),
|
|
1202
2611
|
inspectOptions: { depth: null }
|
|
1203
2612
|
});
|
|
1204
|
-
var RETRY_TEMPERATURES = [0, 0.2, 0.3];
|
|
1205
|
-
var sqlQueryAgent = agent5({
|
|
1206
|
-
name: "text2sql",
|
|
1207
|
-
model: groq5("openai/gpt-oss-20b"),
|
|
1208
|
-
logging: process.env.AGENT_LOGGING === "true",
|
|
1209
|
-
output: z5.union([
|
|
1210
|
-
z5.object({
|
|
1211
|
-
sql: z5.string().describe("The SQL query that answers the question"),
|
|
1212
|
-
reasoning: z5.string().optional().describe("The reasoning steps taken to generate the SQL")
|
|
1213
|
-
}),
|
|
1214
|
-
z5.object({
|
|
1215
|
-
error: z5.string().describe(
|
|
1216
|
-
"Error message explaining why the question cannot be answered with the given schema"
|
|
1217
|
-
)
|
|
1218
|
-
})
|
|
1219
|
-
]),
|
|
1220
|
-
prompt: (state) => {
|
|
1221
|
-
return `
|
|
1222
|
-
${state?.teachings || ""}
|
|
1223
|
-
${state?.introspection || ""}
|
|
1224
|
-
`;
|
|
1225
|
-
}
|
|
1226
|
-
});
|
|
1227
2613
|
function extractSql(output) {
|
|
1228
2614
|
const match = output.match(/```sql\n?([\s\S]*?)```/);
|
|
1229
2615
|
return match ? match[1].trim() : output.trim();
|
|
@@ -1231,8 +2617,8 @@ function extractSql(output) {
|
|
|
1231
2617
|
var marker = Symbol("SQLValidationError");
|
|
1232
2618
|
var SQLValidationError = class _SQLValidationError extends Error {
|
|
1233
2619
|
[marker];
|
|
1234
|
-
constructor(
|
|
1235
|
-
super(
|
|
2620
|
+
constructor(message2) {
|
|
2621
|
+
super(message2);
|
|
1236
2622
|
this.name = "SQLValidationError";
|
|
1237
2623
|
this[marker] = true;
|
|
1238
2624
|
}
|
|
@@ -1241,8 +2627,8 @@ var SQLValidationError = class _SQLValidationError extends Error {
|
|
|
1241
2627
|
}
|
|
1242
2628
|
};
|
|
1243
2629
|
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
1244
|
-
constructor(
|
|
1245
|
-
super(
|
|
2630
|
+
constructor(message2) {
|
|
2631
|
+
super(message2);
|
|
1246
2632
|
this.name = "UnanswerableSQLError";
|
|
1247
2633
|
}
|
|
1248
2634
|
static isInstance(error) {
|
|
@@ -1253,36 +2639,46 @@ async function toSql(options) {
|
|
|
1253
2639
|
const { maxRetries = 3 } = options;
|
|
1254
2640
|
return withRetry(
|
|
1255
2641
|
async (attemptNumber, errors, attempts) => {
|
|
1256
|
-
const
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
settings: {
|
|
1261
|
-
temperature: RETRY_TEMPERATURES[attemptNumber - 1] ?? 0.3,
|
|
1262
|
-
topP: 1
|
|
1263
|
-
}
|
|
1264
|
-
})
|
|
1265
|
-
})
|
|
2642
|
+
const context = new ContextEngine({
|
|
2643
|
+
store: new InMemoryContextStore(),
|
|
2644
|
+
chatId: `sql-gen-${crypto.randomUUID()}`,
|
|
2645
|
+
userId: "system"
|
|
1266
2646
|
});
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
introspection: options.introspection,
|
|
1276
|
-
teachings: toInstructions(
|
|
1277
|
-
"instructions",
|
|
1278
|
-
persona({
|
|
1279
|
-
name: "Freya",
|
|
1280
|
-
role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema."
|
|
1281
|
-
}),
|
|
1282
|
-
...options.instructions
|
|
1283
|
-
)
|
|
1284
|
-
})
|
|
2647
|
+
context.set(
|
|
2648
|
+
persona({
|
|
2649
|
+
name: "Freya",
|
|
2650
|
+
role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema.",
|
|
2651
|
+
objective: "Translate natural language questions into precise, efficient SQL queries"
|
|
2652
|
+
}),
|
|
2653
|
+
...options.instructions,
|
|
2654
|
+
...options.schemaFragments
|
|
1285
2655
|
);
|
|
2656
|
+
if (errors.length) {
|
|
2657
|
+
context.set(
|
|
2658
|
+
user(options.input),
|
|
2659
|
+
user(
|
|
2660
|
+
`<validation_error>Your previous SQL query had the following error: ${errors.at(-1)?.message}. Please fix the query.</validation_error>`
|
|
2661
|
+
)
|
|
2662
|
+
);
|
|
2663
|
+
} else {
|
|
2664
|
+
context.set(user(options.input));
|
|
2665
|
+
}
|
|
2666
|
+
const sqlOutput = structuredOutput({
|
|
2667
|
+
model: options.model ?? groq6("openai/gpt-oss-20b"),
|
|
2668
|
+
context,
|
|
2669
|
+
schema: z5.union([
|
|
2670
|
+
z5.object({
|
|
2671
|
+
sql: z5.string().describe("The SQL query that answers the question"),
|
|
2672
|
+
reasoning: z5.string().optional().describe("The reasoning steps taken to generate the SQL")
|
|
2673
|
+
}),
|
|
2674
|
+
z5.object({
|
|
2675
|
+
error: z5.string().describe(
|
|
2676
|
+
"Error message explaining why the question cannot be answered with the given schema"
|
|
2677
|
+
)
|
|
2678
|
+
})
|
|
2679
|
+
])
|
|
2680
|
+
});
|
|
2681
|
+
const output = await sqlOutput.generate();
|
|
1286
2682
|
if ("error" in output) {
|
|
1287
2683
|
throw new UnanswerableSQLError(output.error);
|
|
1288
2684
|
}
|
|
@@ -1321,35 +2717,35 @@ async function withRetry(computation, options = { retries: 3 }) {
|
|
|
1321
2717
|
},
|
|
1322
2718
|
{
|
|
1323
2719
|
retries: options.retries,
|
|
1324
|
-
shouldRetry: (
|
|
1325
|
-
if (UnanswerableSQLError.isInstance(
|
|
2720
|
+
shouldRetry: (context) => {
|
|
2721
|
+
if (UnanswerableSQLError.isInstance(context.error)) {
|
|
1326
2722
|
return false;
|
|
1327
2723
|
}
|
|
1328
|
-
if (SQLValidationError.isInstance(
|
|
2724
|
+
if (SQLValidationError.isInstance(context.error)) {
|
|
1329
2725
|
return true;
|
|
1330
2726
|
}
|
|
1331
2727
|
console.log({
|
|
1332
2728
|
NoObjectGeneratedError: NoObjectGeneratedError.isInstance(
|
|
1333
|
-
|
|
2729
|
+
context.error
|
|
1334
2730
|
),
|
|
1335
2731
|
NoOutputGeneratedError: NoOutputGeneratedError.isInstance(
|
|
1336
|
-
|
|
2732
|
+
context.error
|
|
1337
2733
|
),
|
|
1338
|
-
APICallError: APICallError.isInstance(
|
|
1339
|
-
JSONParseError: JSONParseError.isInstance(
|
|
1340
|
-
TypeValidationError: TypeValidationError.isInstance(
|
|
2734
|
+
APICallError: APICallError.isInstance(context.error),
|
|
2735
|
+
JSONParseError: JSONParseError.isInstance(context.error),
|
|
2736
|
+
TypeValidationError: TypeValidationError.isInstance(context.error),
|
|
1341
2737
|
NoContentGeneratedError: NoContentGeneratedError.isInstance(
|
|
1342
|
-
|
|
2738
|
+
context.error
|
|
1343
2739
|
)
|
|
1344
2740
|
});
|
|
1345
|
-
return APICallError.isInstance(
|
|
2741
|
+
return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
|
|
1346
2742
|
},
|
|
1347
|
-
onFailedAttempt(
|
|
1348
|
-
logger.error(`toSQL`,
|
|
2743
|
+
onFailedAttempt(context) {
|
|
2744
|
+
logger.error(`toSQL`, context.error);
|
|
1349
2745
|
console.log(
|
|
1350
|
-
`Attempt ${
|
|
2746
|
+
`Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
|
|
1351
2747
|
);
|
|
1352
|
-
errors.push(
|
|
2748
|
+
errors.push(context.error);
|
|
1353
2749
|
}
|
|
1354
2750
|
}
|
|
1355
2751
|
);
|
|
@@ -1379,7 +2775,7 @@ var SchemaSynthesizer = class extends PairProducer {
|
|
|
1379
2775
|
* @returns Generated pairs from all combinations
|
|
1380
2776
|
*/
|
|
1381
2777
|
async *produce() {
|
|
1382
|
-
const introspection =
|
|
2778
|
+
const introspection = "";
|
|
1383
2779
|
const combinations = this.#personas.flatMap(
|
|
1384
2780
|
(persona2) => this.#complexities.map((complexity) => ({ persona: persona2, complexity }))
|
|
1385
2781
|
);
|
|
@@ -1421,7 +2817,8 @@ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
|
|
|
1421
2817
|
return await toSql({
|
|
1422
2818
|
input: question,
|
|
1423
2819
|
adapter: this.adapter,
|
|
1424
|
-
|
|
2820
|
+
schemaFragments: [],
|
|
2821
|
+
// Placeholder - needs to pass actual fragments
|
|
1425
2822
|
instructions: this.options.teachings ?? [],
|
|
1426
2823
|
model: this.options.model
|
|
1427
2824
|
});
|
|
@@ -1450,17 +2847,11 @@ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
|
|
|
1450
2847
|
};
|
|
1451
2848
|
|
|
1452
2849
|
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
1453
|
-
import { groq as
|
|
1454
|
-
import { defaultSettingsMiddleware as defaultSettingsMiddleware3, wrapLanguageModel as wrapLanguageModel3 } from "ai";
|
|
2850
|
+
import { groq as groq7 } from "@ai-sdk/groq";
|
|
1455
2851
|
import dedent5 from "dedent";
|
|
1456
2852
|
import pLimit2 from "p-limit";
|
|
1457
2853
|
import z6 from "zod";
|
|
1458
|
-
import
|
|
1459
|
-
agent as agent6,
|
|
1460
|
-
generate as generate7,
|
|
1461
|
-
toOutput as toOutput2,
|
|
1462
|
-
user as user7
|
|
1463
|
-
} from "@deepagents/agent";
|
|
2854
|
+
import "@deepagents/agent";
|
|
1464
2855
|
|
|
1465
2856
|
// packages/text2sql/src/lib/synthesis/synthesizers/styles.ts
|
|
1466
2857
|
var ALL_STYLES = [
|
|
@@ -1496,63 +2887,53 @@ var styleInstructions = {
|
|
|
1496
2887
|
};
|
|
1497
2888
|
|
|
1498
2889
|
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
1499
|
-
var
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
)
|
|
1514
|
-
}),
|
|
1515
|
-
prompt: (state) => {
|
|
1516
|
-
const personaInstruction = state?.persona ? dedent5`
|
|
1517
|
-
<persona role="${state.persona.role}">
|
|
1518
|
-
${state.persona.perspective}
|
|
2890
|
+
var paraphraserOutputSchema = z6.object({
|
|
2891
|
+
paraphrases: z6.array(
|
|
2892
|
+
z6.string().describe("A paraphrased version of the original question")
|
|
2893
|
+
).min(1).describe("List of paraphrased questions that would produce the same SQL")
|
|
2894
|
+
});
|
|
2895
|
+
async function paraphraseQuestion(params) {
|
|
2896
|
+
const context = new ContextEngine({
|
|
2897
|
+
store: new InMemoryContextStore(),
|
|
2898
|
+
chatId: `paraphraser-${crypto.randomUUID()}`,
|
|
2899
|
+
userId: "system"
|
|
2900
|
+
});
|
|
2901
|
+
const personaInstruction = params.persona ? dedent5`
|
|
2902
|
+
<persona role="${params.persona.role}">
|
|
2903
|
+
${params.persona.perspective}
|
|
1519
2904
|
|
|
1520
2905
|
Paraphrase the question as this persona would naturally ask it.
|
|
1521
2906
|
Use their vocabulary, priorities, and framing style.
|
|
1522
2907
|
</persona>
|
|
1523
2908
|
` : "";
|
|
1524
|
-
|
|
2909
|
+
const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent5`
|
|
1525
2910
|
<communication_styles>
|
|
1526
|
-
Generate paraphrases using these communication styles: ${
|
|
2911
|
+
Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
|
|
1527
2912
|
|
|
1528
2913
|
Style definitions:
|
|
1529
|
-
${
|
|
2914
|
+
${params.persona.styles.map((s) => `- ${s}: ${styleInstructions[s]}`).join("\n")}
|
|
1530
2915
|
|
|
1531
2916
|
Distribute paraphrases across these styles for variety.
|
|
1532
2917
|
</communication_styles>
|
|
1533
2918
|
` : "";
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
${styleInstruction}
|
|
1553
|
-
|
|
1554
|
-
<task>
|
|
1555
|
-
Generate exactly ${state?.count} paraphrased versions of the original question.
|
|
2919
|
+
context.set(
|
|
2920
|
+
persona({
|
|
2921
|
+
name: "question_paraphraser",
|
|
2922
|
+
role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
|
|
2923
|
+
objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
|
|
2924
|
+
}),
|
|
2925
|
+
fragment("original_question", params.question),
|
|
2926
|
+
fragment(
|
|
2927
|
+
"reference_sql",
|
|
2928
|
+
params.sql,
|
|
2929
|
+
"This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
|
|
2930
|
+
),
|
|
2931
|
+
...personaInstruction ? [fragment("persona", personaInstruction)] : [],
|
|
2932
|
+
...styleInstruction ? [fragment("communication_styles", styleInstruction)] : [],
|
|
2933
|
+
fragment(
|
|
2934
|
+
"task",
|
|
2935
|
+
dedent5`
|
|
2936
|
+
Generate exactly ${params.count} paraphrased versions of the original question.
|
|
1556
2937
|
|
|
1557
2938
|
Requirements:
|
|
1558
2939
|
1. Each paraphrase must be semantically equivalent - it should produce the EXACT same SQL
|
|
@@ -1560,18 +2941,30 @@ var paraphraserAgent = agent6({
|
|
|
1560
2941
|
3. Use natural language without SQL keywords (SELECT, WHERE, JOIN, etc.)
|
|
1561
2942
|
4. Keep paraphrases realistic - how actual users would ask
|
|
1562
2943
|
5. Do not add or remove any conditions, filters, or requirements from the original
|
|
1563
|
-
${
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
2944
|
+
${params.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
|
|
2945
|
+
`
|
|
2946
|
+
),
|
|
2947
|
+
guardrail({ rule: "NEVER change what data is being requested" }),
|
|
2948
|
+
guardrail({
|
|
2949
|
+
rule: "NEVER add filters, aggregations, or conditions not in the original"
|
|
2950
|
+
}),
|
|
2951
|
+
guardrail({
|
|
2952
|
+
rule: "NEVER remove any specificity from the original question"
|
|
2953
|
+
}),
|
|
2954
|
+
guardrail({
|
|
2955
|
+
rule: "All paraphrases must be answerable by the exact same SQL query"
|
|
2956
|
+
}),
|
|
2957
|
+
user(
|
|
2958
|
+
`Paraphrase this question ${params.count} times: "${params.question}"`
|
|
2959
|
+
)
|
|
2960
|
+
);
|
|
2961
|
+
const paraphraserOutput = structuredOutput({
|
|
2962
|
+
model: params.model ?? groq7("openai/gpt-oss-20b"),
|
|
2963
|
+
context,
|
|
2964
|
+
schema: paraphraserOutputSchema
|
|
2965
|
+
});
|
|
2966
|
+
return paraphraserOutput.generate();
|
|
2967
|
+
}
|
|
1575
2968
|
var BreadthEvolver = class extends PairProducer {
|
|
1576
2969
|
/**
|
|
1577
2970
|
* @param source - Source pairs or producer to evolve
|
|
@@ -1592,23 +2985,14 @@ var BreadthEvolver = class extends PairProducer {
|
|
|
1592
2985
|
for await (const chunk of this.from(this.source)) {
|
|
1593
2986
|
const tasks = chunk.map(
|
|
1594
2987
|
(pair) => this.#limit(async () => {
|
|
1595
|
-
const
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
{
|
|
1604
|
-
question: pair.question,
|
|
1605
|
-
sql: pair.sql,
|
|
1606
|
-
count: this.options.count,
|
|
1607
|
-
persona: this.options.persona
|
|
1608
|
-
}
|
|
1609
|
-
)
|
|
1610
|
-
);
|
|
1611
|
-
return paraphrases.map((paraphrase) => ({
|
|
2988
|
+
const result = await paraphraseQuestion({
|
|
2989
|
+
question: pair.question,
|
|
2990
|
+
sql: pair.sql,
|
|
2991
|
+
count: this.options.count,
|
|
2992
|
+
persona: this.options.persona,
|
|
2993
|
+
model: this.options.model
|
|
2994
|
+
});
|
|
2995
|
+
return result.paraphrases.map((paraphrase) => ({
|
|
1612
2996
|
question: paraphrase,
|
|
1613
2997
|
sql: pair.sql,
|
|
1614
2998
|
context: pair.context,
|
|
@@ -1623,18 +3007,13 @@ var BreadthEvolver = class extends PairProducer {
|
|
|
1623
3007
|
};
|
|
1624
3008
|
|
|
1625
3009
|
// packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
|
|
1626
|
-
import { groq as
|
|
1627
|
-
import {
|
|
1628
|
-
NoObjectGeneratedError as NoObjectGeneratedError2,
|
|
1629
|
-
NoOutputGeneratedError as NoOutputGeneratedError2,
|
|
1630
|
-
defaultSettingsMiddleware as defaultSettingsMiddleware4,
|
|
1631
|
-
wrapLanguageModel as wrapLanguageModel4
|
|
1632
|
-
} from "ai";
|
|
3010
|
+
import { groq as groq8 } from "@ai-sdk/groq";
|
|
3011
|
+
import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
|
|
1633
3012
|
import dedent6 from "dedent";
|
|
1634
3013
|
import pLimit3 from "p-limit";
|
|
1635
3014
|
import pRetry2 from "p-retry";
|
|
1636
3015
|
import z7 from "zod";
|
|
1637
|
-
import
|
|
3016
|
+
import "@deepagents/agent";
|
|
1638
3017
|
var techniqueInstructions = {
|
|
1639
3018
|
"add-aggregation": dedent6`
|
|
1640
3019
|
Add aggregation requirements to the question.
|
|
@@ -1677,44 +3056,37 @@ var techniqueInstructions = {
|
|
|
1677
3056
|
- "Get costs" → "What would be the impact of a 10% discount on profit margins?"
|
|
1678
3057
|
`
|
|
1679
3058
|
};
|
|
1680
|
-
var
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
}
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
<technique name="${state?.technique}">
|
|
1713
|
-
${state?.techniqueInstruction}
|
|
1714
|
-
</technique>
|
|
1715
|
-
|
|
1716
|
-
<task>
|
|
1717
|
-
Evolve the original question using the "${state?.technique}" technique.
|
|
3059
|
+
var evolverOutputSchema = z7.object({
|
|
3060
|
+
evolvedQuestion: z7.string().describe("The evolved, more complex version of the original question")
|
|
3061
|
+
});
|
|
3062
|
+
async function evolveQuestion(params) {
|
|
3063
|
+
const context = new ContextEngine({
|
|
3064
|
+
store: new InMemoryContextStore(),
|
|
3065
|
+
chatId: `evolver-${crypto.randomUUID()}`,
|
|
3066
|
+
userId: "system"
|
|
3067
|
+
});
|
|
3068
|
+
context.set(
|
|
3069
|
+
persona({
|
|
3070
|
+
name: "question_evolver",
|
|
3071
|
+
role: "You are an expert at evolving simple database questions into more complex ones. Your task is to take a basic question and transform it into a more sophisticated version that requires advanced SQL techniques to answer.",
|
|
3072
|
+
objective: "Transform simple questions into complex versions requiring advanced SQL techniques"
|
|
3073
|
+
}),
|
|
3074
|
+
fragment("original_question", params.question),
|
|
3075
|
+
fragment(
|
|
3076
|
+
"original_sql",
|
|
3077
|
+
params.sql,
|
|
3078
|
+
"(This shows what the original question required)"
|
|
3079
|
+
),
|
|
3080
|
+
fragment("database_schema", params.schema),
|
|
3081
|
+
fragment(
|
|
3082
|
+
"technique",
|
|
3083
|
+
{ name: params.technique },
|
|
3084
|
+
params.techniqueInstruction
|
|
3085
|
+
),
|
|
3086
|
+
fragment(
|
|
3087
|
+
"task",
|
|
3088
|
+
dedent6`
|
|
3089
|
+
Evolve the original question using the "${params.technique}" technique.
|
|
1718
3090
|
|
|
1719
3091
|
Requirements:
|
|
1720
3092
|
1. The evolved question must be MORE COMPLEX than the original
|
|
@@ -1723,17 +3095,29 @@ var questionEvolverAgent = agent7({
|
|
|
1723
3095
|
4. Use natural language - no SQL keywords
|
|
1724
3096
|
5. Keep the question realistic and practical
|
|
1725
3097
|
6. The evolved question should build upon the original topic/domain
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
})
|
|
3098
|
+
`
|
|
3099
|
+
),
|
|
3100
|
+
guardrail({
|
|
3101
|
+
rule: "The evolved question MUST require more complex SQL than the original"
|
|
3102
|
+
}),
|
|
3103
|
+
guardrail({
|
|
3104
|
+
rule: "Do not ask for data that does not exist in the schema"
|
|
3105
|
+
}),
|
|
3106
|
+
guardrail({
|
|
3107
|
+
rule: "Keep the question grounded in the same domain as the original"
|
|
3108
|
+
}),
|
|
3109
|
+
guardrail({ rule: "Make sure the question is clear and unambiguous" }),
|
|
3110
|
+
user(
|
|
3111
|
+
`Evolve this question using "${params.technique}": "${params.question}"`
|
|
3112
|
+
)
|
|
3113
|
+
);
|
|
3114
|
+
const evolverOutput = structuredOutput({
|
|
3115
|
+
model: params.model ?? groq8("openai/gpt-oss-20b"),
|
|
3116
|
+
context,
|
|
3117
|
+
schema: evolverOutputSchema
|
|
3118
|
+
});
|
|
3119
|
+
return evolverOutput.generate();
|
|
3120
|
+
}
|
|
1737
3121
|
var ALL_TECHNIQUES = [
|
|
1738
3122
|
"add-aggregation",
|
|
1739
3123
|
"add-filter",
|
|
@@ -1760,7 +3144,7 @@ var DepthEvolver = class extends PairProducer {
|
|
|
1760
3144
|
* Removes batch barrier - no longer waits for all evolutions before yielding.
|
|
1761
3145
|
*/
|
|
1762
3146
|
async *produce() {
|
|
1763
|
-
const introspection =
|
|
3147
|
+
const introspection = "";
|
|
1764
3148
|
const count = this.options?.count ?? 1;
|
|
1765
3149
|
const techniques = this.options?.techniques ?? ALL_TECHNIQUES;
|
|
1766
3150
|
let pairIndex = 0;
|
|
@@ -1779,27 +3163,23 @@ var DepthEvolver = class extends PairProducer {
|
|
|
1779
3163
|
}
|
|
1780
3164
|
}
|
|
1781
3165
|
async #processTask(pair, technique, introspection) {
|
|
1782
|
-
const
|
|
1783
|
-
() =>
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
schema: introspection,
|
|
1792
|
-
technique,
|
|
1793
|
-
techniqueInstruction: techniqueInstructions[technique]
|
|
1794
|
-
}
|
|
1795
|
-
)
|
|
3166
|
+
const output = await withRetry2(
|
|
3167
|
+
() => evolveQuestion({
|
|
3168
|
+
question: pair.question,
|
|
3169
|
+
sql: pair.sql,
|
|
3170
|
+
schema: introspection,
|
|
3171
|
+
technique,
|
|
3172
|
+
techniqueInstruction: techniqueInstructions[technique],
|
|
3173
|
+
model: this.options?.model
|
|
3174
|
+
})
|
|
1796
3175
|
);
|
|
1797
|
-
const evolvedQuestion =
|
|
3176
|
+
const evolvedQuestion = output.evolvedQuestion;
|
|
1798
3177
|
try {
|
|
1799
3178
|
const sqlResult = await toSql({
|
|
1800
3179
|
input: evolvedQuestion,
|
|
1801
3180
|
adapter: this.adapter,
|
|
1802
|
-
|
|
3181
|
+
schemaFragments: [],
|
|
3182
|
+
// Placeholder - needs to pass actual fragments
|
|
1803
3183
|
instructions: [],
|
|
1804
3184
|
model: this.options?.model
|
|
1805
3185
|
});
|
|
@@ -1828,68 +3208,64 @@ var DepthEvolver = class extends PairProducer {
|
|
|
1828
3208
|
async function withRetry2(computation) {
|
|
1829
3209
|
return pRetry2(computation, {
|
|
1830
3210
|
retries: 3,
|
|
1831
|
-
shouldRetry: (
|
|
3211
|
+
shouldRetry: (context) => {
|
|
1832
3212
|
console.log({
|
|
1833
3213
|
NoObjectGeneratedError: NoObjectGeneratedError2.isInstance(
|
|
1834
|
-
|
|
3214
|
+
context.error
|
|
1835
3215
|
),
|
|
1836
3216
|
NoOutputGeneratedError: NoOutputGeneratedError2.isInstance(
|
|
1837
|
-
|
|
3217
|
+
context.error
|
|
1838
3218
|
)
|
|
1839
3219
|
});
|
|
1840
|
-
return NoObjectGeneratedError2.isInstance(
|
|
3220
|
+
return NoObjectGeneratedError2.isInstance(context.error) || NoOutputGeneratedError2.isInstance(context.error);
|
|
1841
3221
|
},
|
|
1842
|
-
onFailedAttempt(
|
|
3222
|
+
onFailedAttempt(context) {
|
|
1843
3223
|
console.log(
|
|
1844
|
-
`Attempt ${
|
|
3224
|
+
`Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
|
|
1845
3225
|
);
|
|
1846
|
-
console.dir(
|
|
3226
|
+
console.dir(context.error, { depth: null });
|
|
1847
3227
|
}
|
|
1848
3228
|
});
|
|
1849
3229
|
}
|
|
1850
3230
|
|
|
1851
3231
|
// packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
|
|
1852
|
-
import { groq as
|
|
1853
|
-
import { defaultSettingsMiddleware as defaultSettingsMiddleware5, wrapLanguageModel as wrapLanguageModel5 } from "ai";
|
|
3232
|
+
import { groq as groq9 } from "@ai-sdk/groq";
|
|
1854
3233
|
import dedent7 from "dedent";
|
|
1855
3234
|
import z8 from "zod";
|
|
1856
|
-
import
|
|
1857
|
-
var
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
3235
|
+
import "@deepagents/agent";
|
|
3236
|
+
var outputSchema3 = z8.object({
|
|
3237
|
+
personas: z8.array(
|
|
3238
|
+
z8.object({
|
|
3239
|
+
role: z8.string().describe("The job title or role of this persona"),
|
|
3240
|
+
perspective: z8.string().describe(
|
|
3241
|
+
"Rich description of what this persona cares about when querying the database"
|
|
3242
|
+
),
|
|
3243
|
+
styles: z8.array(z8.enum(ALL_STYLES)).min(1).max(3).describe(
|
|
3244
|
+
"Typical communication styles for this persona (1-3 styles)"
|
|
3245
|
+
)
|
|
1863
3246
|
})
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
3247
|
+
).min(1).describe("List of personas who would query this database")
|
|
3248
|
+
});
|
|
3249
|
+
async function generatePersonas(schemaFragments, options) {
|
|
3250
|
+
const schema = new XmlRenderer().render(schemaFragments);
|
|
3251
|
+
const count = options?.count ?? 5;
|
|
3252
|
+
const context = new ContextEngine({
|
|
3253
|
+
store: new InMemoryContextStore(),
|
|
3254
|
+
chatId: `persona-gen-${crypto.randomUUID()}`,
|
|
3255
|
+
userId: "system"
|
|
3256
|
+
});
|
|
3257
|
+
context.set(
|
|
3258
|
+
persona({
|
|
3259
|
+
name: "persona_generator",
|
|
3260
|
+
role: "You are an expert at understanding database schemas and inferring who would use them.",
|
|
3261
|
+
objective: "Generate realistic personas representing users who would query this database"
|
|
3262
|
+
}),
|
|
3263
|
+
fragment("database_schema", schema),
|
|
3264
|
+
fragment(
|
|
3265
|
+
"task",
|
|
3266
|
+
dedent7`
|
|
3267
|
+
Analyze the database schema and generate realistic personas representing
|
|
1884
3268
|
the different types of users who would query this database.
|
|
1885
|
-
</identity>
|
|
1886
|
-
|
|
1887
|
-
<database_schema>
|
|
1888
|
-
${state?.schema}
|
|
1889
|
-
</database_schema>
|
|
1890
|
-
|
|
1891
|
-
<task>
|
|
1892
|
-
Generate exactly ${state?.count} distinct personas who would query this database.
|
|
1893
3269
|
|
|
1894
3270
|
For each persona, provide:
|
|
1895
3271
|
1. **role**: Their job title or role (e.g., "Financial Analyst", "Customer Support Rep")
|
|
@@ -1916,9 +3292,11 @@ var personaGeneratorAgent = agent8({
|
|
|
1916
3292
|
- Perspectives should be detailed enough to guide question paraphrasing
|
|
1917
3293
|
- Cover different levels of technical expertise (some technical, some business-focused)
|
|
1918
3294
|
- Styles should match how this persona would naturally communicate
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
3295
|
+
`
|
|
3296
|
+
),
|
|
3297
|
+
fragment(
|
|
3298
|
+
"example",
|
|
3299
|
+
dedent7`
|
|
1922
3300
|
For an e-commerce schema with orders, customers, products tables:
|
|
1923
3301
|
|
|
1924
3302
|
{
|
|
@@ -1932,53 +3310,36 @@ var personaGeneratorAgent = agent8({
|
|
|
1932
3310
|
"perspective": "As inventory manager, I care about:\\n- Current stock levels and reorder points\\n- Product availability across warehouses\\n- Slow-moving inventory identification\\n- Supplier lead times and pending orders\\n- I need accurate counts, often aggregated by location",
|
|
1933
3311
|
"styles": ["formal", "interrogative"]
|
|
1934
3312
|
}
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
async generate() {
|
|
1959
|
-
const schema = await this.adapter.introspect();
|
|
1960
|
-
const count = this.options?.count ?? 5;
|
|
1961
|
-
const { experimental_output } = await generate9(
|
|
1962
|
-
personaGeneratorAgent.clone({
|
|
1963
|
-
model: this.options?.model
|
|
1964
|
-
}),
|
|
1965
|
-
[user9(`Generate ${count} personas for this database schema.`)],
|
|
1966
|
-
{
|
|
1967
|
-
schema,
|
|
1968
|
-
count
|
|
1969
|
-
}
|
|
1970
|
-
);
|
|
1971
|
-
return experimental_output.personas;
|
|
1972
|
-
}
|
|
1973
|
-
};
|
|
3313
|
+
`
|
|
3314
|
+
),
|
|
3315
|
+
guardrail({
|
|
3316
|
+
rule: "Only generate personas relevant to the actual schema provided"
|
|
3317
|
+
}),
|
|
3318
|
+
guardrail({
|
|
3319
|
+
rule: "Do not invent tables or data that do not exist in the schema"
|
|
3320
|
+
}),
|
|
3321
|
+
guardrail({
|
|
3322
|
+
rule: "Ensure perspectives are specific to the domain, not generic"
|
|
3323
|
+
}),
|
|
3324
|
+
user(
|
|
3325
|
+
`Generate exactly ${count} distinct personas who would query this database.`
|
|
3326
|
+
)
|
|
3327
|
+
);
|
|
3328
|
+
const personaOutput = structuredOutput({
|
|
3329
|
+
model: options?.model ?? groq9("openai/gpt-oss-20b"),
|
|
3330
|
+
context,
|
|
3331
|
+
schema: outputSchema3
|
|
3332
|
+
});
|
|
3333
|
+
const output = await personaOutput.generate();
|
|
3334
|
+
return output.personas;
|
|
3335
|
+
}
|
|
1974
3336
|
|
|
1975
3337
|
// packages/text2sql/src/lib/agents/teachables.agent.ts
|
|
1976
|
-
import { groq as
|
|
1977
|
-
import { defaultSettingsMiddleware as defaultSettingsMiddleware6, wrapLanguageModel as wrapLanguageModel6 } from "ai";
|
|
3338
|
+
import { groq as groq10 } from "@ai-sdk/groq";
|
|
1978
3339
|
import dedent8 from "dedent";
|
|
1979
3340
|
import z9 from "zod";
|
|
1980
|
-
import
|
|
1981
|
-
var
|
|
3341
|
+
import "@deepagents/agent";
|
|
3342
|
+
var outputSchema4 = z9.object({
|
|
1982
3343
|
terms: z9.array(z9.object({ name: z9.string(), definition: z9.string() })).optional().describe("Domain terminology definitions"),
|
|
1983
3344
|
hints: z9.array(z9.object({ text: z9.string() })).optional().describe("Helpful hints for SQL generation"),
|
|
1984
3345
|
guardrails: z9.array(
|
|
@@ -2021,7 +3382,7 @@ var outputSchema = z9.object({
|
|
|
2021
3382
|
).optional().describe("SQL style preferences"),
|
|
2022
3383
|
analogies: z9.array(
|
|
2023
3384
|
z9.object({
|
|
2024
|
-
|
|
3385
|
+
concepts: z9.array(z9.string()).min(2),
|
|
2025
3386
|
relationship: z9.string(),
|
|
2026
3387
|
insight: z9.string().optional(),
|
|
2027
3388
|
therefore: z9.string().optional(),
|
|
@@ -2029,122 +3390,138 @@ var outputSchema = z9.object({
|
|
|
2029
3390
|
})
|
|
2030
3391
|
).optional().describe("Concept analogies")
|
|
2031
3392
|
});
|
|
2032
|
-
var teachablesAuthorAgent = agent9({
|
|
2033
|
-
name: "teachables-author",
|
|
2034
|
-
model: wrapLanguageModel6({
|
|
2035
|
-
model: groq9("openai/gpt-oss-20b"),
|
|
2036
|
-
middleware: defaultSettingsMiddleware6({
|
|
2037
|
-
settings: { temperature: 0.4, topP: 0.95 }
|
|
2038
|
-
})
|
|
2039
|
-
}),
|
|
2040
|
-
output: outputSchema,
|
|
2041
|
-
prompt: (state) => dedent8`
|
|
2042
|
-
<identity>
|
|
2043
|
-
You design "teachables" for a Text2SQL system. Teachables become structured XML instructions.
|
|
2044
|
-
Choose only high-impact items that improve accuracy, safety, or clarity for this database.
|
|
2045
|
-
</identity>
|
|
2046
|
-
|
|
2047
|
-
<database_schema>
|
|
2048
|
-
${state?.schema}
|
|
2049
|
-
</database_schema>
|
|
2050
|
-
|
|
2051
|
-
${state?.context ? `<additional_context>${state.context}</additional_context>` : ""}
|
|
2052
|
-
|
|
2053
|
-
<output_structure>
|
|
2054
|
-
Output a JSON object with these optional arrays (include only relevant ones):
|
|
2055
|
-
- terms: [{ name: string, definition: string }] - Domain terminology
|
|
2056
|
-
- hints: [{ text: string }] - Helpful SQL generation hints
|
|
2057
|
-
- guardrails: [{ rule: string, reason?: string, action?: string }] - Safety constraints
|
|
2058
|
-
- explains: [{ concept: string, explanation: string, therefore?: string }] - Concept explanations
|
|
2059
|
-
- examples: [{ question: string, answer: string, note?: string }] - Q&A examples
|
|
2060
|
-
- clarifications: [{ when: string, ask: string, reason: string }] - Clarification triggers
|
|
2061
|
-
- workflows: [{ task: string, steps: string[], triggers?: string[], notes?: string }] - Multi-step tasks
|
|
2062
|
-
- quirks: [{ issue: string, workaround: string }] - Known issues
|
|
2063
|
-
- styleGuides: [{ prefer: string, never?: string, always?: string }] - SQL style rules
|
|
2064
|
-
- analogies: [{ concept: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
|
|
2065
|
-
</output_structure>
|
|
2066
|
-
|
|
2067
|
-
<instructions>
|
|
2068
|
-
1. Analyze the schema to infer domain, relationships, and sensitive columns.
|
|
2069
|
-
2. Generate 3-10 teachables total across all categories, prioritizing:
|
|
2070
|
-
- guardrails for PII columns (email, ssn, phone, etc)
|
|
2071
|
-
- hints for status/enum columns
|
|
2072
|
-
- clarifications for ambiguous terms
|
|
2073
|
-
3. Ground everything in the schema - do not invent tables/columns.
|
|
2074
|
-
4. Only include categories that are relevant to this schema.
|
|
2075
|
-
</instructions>
|
|
2076
|
-
`
|
|
2077
|
-
});
|
|
2078
3393
|
async function toTeachings(input, options) {
|
|
2079
|
-
const
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
3394
|
+
const context = new ContextEngine({
|
|
3395
|
+
store: new InMemoryContextStore(),
|
|
3396
|
+
chatId: `teachables-gen-${crypto.randomUUID()}`,
|
|
3397
|
+
userId: "system"
|
|
3398
|
+
});
|
|
3399
|
+
context.set(
|
|
3400
|
+
persona({
|
|
3401
|
+
name: "teachables-author",
|
|
3402
|
+
role: 'You design "fragments" for a Text2SQL system. Fragments become structured XML instructions.',
|
|
3403
|
+
objective: "Choose only high-impact items that improve accuracy, safety, or clarity for this database"
|
|
3404
|
+
}),
|
|
3405
|
+
fragment("database_schema", input.schema),
|
|
3406
|
+
...input.context ? [fragment("additional_context", input.context)] : [],
|
|
3407
|
+
fragment(
|
|
3408
|
+
"output_structure",
|
|
3409
|
+
dedent8`
|
|
3410
|
+
Output a JSON object with these optional arrays (include only relevant ones):
|
|
3411
|
+
- terms: [{ name: string, definition: string }] - Domain terminology
|
|
3412
|
+
- hints: [{ text: string }] - Helpful SQL generation hints
|
|
3413
|
+
- guardrails: [{ rule: string, reason?: string, action?: string }] - Safety constraints
|
|
3414
|
+
- explains: [{ concept: string, explanation: string, therefore?: string }] - Concept explanations
|
|
3415
|
+
- examples: [{ question: string, answer: string, note?: string }] - Q&A examples
|
|
3416
|
+
- clarifications: [{ when: string, ask: string, reason: string }] - Clarification triggers
|
|
3417
|
+
- workflows: [{ task: string, steps: string[], triggers?: string[], notes?: string }] - Multi-step tasks
|
|
3418
|
+
- quirks: [{ issue: string, workaround: string }] - Known issues
|
|
3419
|
+
- styleGuides: [{ prefer: string, never?: string, always?: string }] - SQL style rules
|
|
3420
|
+
- analogies: [{ concepts: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
|
|
3421
|
+
`
|
|
3422
|
+
),
|
|
3423
|
+
fragment(
|
|
3424
|
+
"task",
|
|
3425
|
+
dedent8`
|
|
3426
|
+
1. Analyze the schema to infer domain, relationships, and sensitive columns.
|
|
3427
|
+
2. Generate 3-10 fragments total across all categories, prioritizing:
|
|
3428
|
+
- guardrails for PII columns (email, ssn, phone, etc)
|
|
3429
|
+
- hints for status/enum columns
|
|
3430
|
+
- clarifications for ambiguous terms
|
|
3431
|
+
3. Ground everything in the schema - do not invent tables/columns.
|
|
3432
|
+
4. Only include categories that are relevant to this schema.
|
|
3433
|
+
`
|
|
3434
|
+
),
|
|
3435
|
+
user(
|
|
3436
|
+
`Analyze this database schema and generate fragments that will help an AI generate accurate SQL queries.`
|
|
3437
|
+
)
|
|
3438
|
+
);
|
|
3439
|
+
const teachablesOutput = structuredOutput({
|
|
3440
|
+
model: options?.model ?? groq10("openai/gpt-oss-20b"),
|
|
3441
|
+
context,
|
|
3442
|
+
schema: outputSchema4
|
|
3443
|
+
});
|
|
3444
|
+
const result = await teachablesOutput.generate();
|
|
3445
|
+
const fragments = [];
|
|
3446
|
+
result.terms?.forEach((t) => fragments.push(term(t.name, t.definition)));
|
|
3447
|
+
result.hints?.forEach((h) => fragments.push(hint(h.text)));
|
|
3448
|
+
result.guardrails?.forEach(
|
|
3449
|
+
(g) => fragments.push(
|
|
3450
|
+
guardrail({ rule: g.rule, reason: g.reason, action: g.action })
|
|
3451
|
+
)
|
|
3452
|
+
);
|
|
3453
|
+
result.explains?.forEach(
|
|
3454
|
+
(e) => fragments.push(
|
|
3455
|
+
explain({
|
|
3456
|
+
concept: e.concept,
|
|
3457
|
+
explanation: e.explanation,
|
|
3458
|
+
therefore: e.therefore
|
|
3459
|
+
})
|
|
3460
|
+
)
|
|
3461
|
+
);
|
|
3462
|
+
result.examples?.forEach(
|
|
3463
|
+
(e) => fragments.push(
|
|
3464
|
+
example({ question: e.question, answer: e.answer, note: e.note })
|
|
3465
|
+
)
|
|
3466
|
+
);
|
|
3467
|
+
result.clarifications?.forEach(
|
|
3468
|
+
(c) => fragments.push(
|
|
3469
|
+
clarification({ when: c.when, ask: c.ask, reason: c.reason })
|
|
3470
|
+
)
|
|
3471
|
+
);
|
|
3472
|
+
result.workflows?.forEach(
|
|
3473
|
+
(w) => fragments.push(
|
|
3474
|
+
workflow({
|
|
3475
|
+
task: w.task,
|
|
3476
|
+
steps: w.steps,
|
|
3477
|
+
triggers: w.triggers,
|
|
3478
|
+
notes: w.notes
|
|
3479
|
+
})
|
|
3480
|
+
)
|
|
3481
|
+
);
|
|
3482
|
+
result.quirks?.forEach(
|
|
3483
|
+
(q) => fragments.push(quirk({ issue: q.issue, workaround: q.workaround }))
|
|
3484
|
+
);
|
|
3485
|
+
result.styleGuides?.forEach(
|
|
3486
|
+
(s) => fragments.push(
|
|
3487
|
+
styleGuide({ prefer: s.prefer, never: s.never, always: s.always })
|
|
3488
|
+
)
|
|
3489
|
+
);
|
|
3490
|
+
result.analogies?.forEach(
|
|
3491
|
+
(a) => fragments.push(
|
|
3492
|
+
analogy({
|
|
3493
|
+
concepts: a.concepts,
|
|
3494
|
+
relationship: a.relationship,
|
|
3495
|
+
insight: a.insight,
|
|
3496
|
+
therefore: a.therefore,
|
|
3497
|
+
pitfall: a.pitfall
|
|
3498
|
+
})
|
|
3499
|
+
)
|
|
2087
3500
|
);
|
|
2088
|
-
|
|
2089
|
-
...result.terms?.map((t) => ({ type: "term", ...t })) ?? [],
|
|
2090
|
-
...result.hints?.map((h) => ({ type: "hint", ...h })) ?? [],
|
|
2091
|
-
...result.guardrails?.map((g) => ({ type: "guardrail", ...g })) ?? [],
|
|
2092
|
-
...result.explains?.map((e) => ({ type: "explain", ...e })) ?? [],
|
|
2093
|
-
...result.examples?.map((e) => ({ type: "example", ...e })) ?? [],
|
|
2094
|
-
...result.clarifications?.map((c) => ({
|
|
2095
|
-
type: "clarification",
|
|
2096
|
-
...c
|
|
2097
|
-
})) ?? [],
|
|
2098
|
-
...result.workflows?.map((w) => ({ type: "workflow", ...w })) ?? [],
|
|
2099
|
-
...result.quirks?.map((q) => ({ type: "quirk", ...q })) ?? [],
|
|
2100
|
-
...result.styleGuides?.map((s) => ({
|
|
2101
|
-
type: "styleGuide",
|
|
2102
|
-
...s
|
|
2103
|
-
})) ?? [],
|
|
2104
|
-
...result.analogies?.map((a) => ({ type: "analogy", ...a })) ?? []
|
|
2105
|
-
];
|
|
2106
|
-
return toTeachables(generated);
|
|
3501
|
+
return fragments;
|
|
2107
3502
|
}
|
|
2108
3503
|
|
|
2109
3504
|
// packages/text2sql/src/lib/synthesis/synthesizers/teachings-generator.ts
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
async generate(maxRetries = 3) {
|
|
2126
|
-
const schema = await this.adapter.introspect();
|
|
2127
|
-
let lastError;
|
|
2128
|
-
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
2129
|
-
try {
|
|
2130
|
-
return await toTeachings(
|
|
2131
|
-
{
|
|
2132
|
-
schema,
|
|
2133
|
-
context: this.options?.context
|
|
2134
|
-
},
|
|
2135
|
-
{ model: this.options?.model }
|
|
2136
|
-
);
|
|
2137
|
-
} catch (error) {
|
|
2138
|
-
lastError = error;
|
|
2139
|
-
const isRetryable = lastError.message.includes("parse") || lastError.message.includes("schema") || lastError.message.includes("No object generated") || lastError.name.includes("AI_");
|
|
2140
|
-
if (!isRetryable) {
|
|
2141
|
-
throw lastError;
|
|
2142
|
-
}
|
|
3505
|
+
async function generateTeachings(schemaFragments, options) {
|
|
3506
|
+
const schema = new XmlRenderer().render(schemaFragments);
|
|
3507
|
+
const maxRetries = options?.maxRetries ?? 3;
|
|
3508
|
+
let lastError;
|
|
3509
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
3510
|
+
try {
|
|
3511
|
+
return await toTeachings(
|
|
3512
|
+
{ schema, context: options?.context },
|
|
3513
|
+
{ model: options?.model }
|
|
3514
|
+
);
|
|
3515
|
+
} catch (error) {
|
|
3516
|
+
lastError = error;
|
|
3517
|
+
const isRetryable = lastError.message.includes("parse") || lastError.message.includes("schema") || lastError.message.includes("No object generated") || lastError.name.includes("AI_");
|
|
3518
|
+
if (!isRetryable) {
|
|
3519
|
+
throw lastError;
|
|
2143
3520
|
}
|
|
2144
3521
|
}
|
|
2145
|
-
throw lastError;
|
|
2146
3522
|
}
|
|
2147
|
-
|
|
3523
|
+
throw lastError;
|
|
3524
|
+
}
|
|
2148
3525
|
export {
|
|
2149
3526
|
ALL_STYLES,
|
|
2150
3527
|
BaseContextualExtractor,
|
|
@@ -2156,16 +3533,16 @@ export {
|
|
|
2156
3533
|
LastQueryExtractor,
|
|
2157
3534
|
MessageExtractor,
|
|
2158
3535
|
PairProducer,
|
|
2159
|
-
PersonaGenerator,
|
|
2160
3536
|
SchemaSynthesizer,
|
|
2161
3537
|
SegmentedContextExtractor,
|
|
2162
3538
|
SqlExtractor,
|
|
2163
|
-
TeachingsGenerator,
|
|
2164
3539
|
ValidatedProducer,
|
|
2165
3540
|
WindowedContextExtractor,
|
|
2166
|
-
contextResolverAgent,
|
|
2167
3541
|
formatConversation,
|
|
3542
|
+
generatePersonas,
|
|
3543
|
+
generateTeachings,
|
|
2168
3544
|
getMessageText,
|
|
3545
|
+
resolveContext,
|
|
2169
3546
|
styleInstructions,
|
|
2170
3547
|
toPairs
|
|
2171
3548
|
};
|