@deepagents/text2sql 0.3.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/README.md +167 -0
  2. package/dist/finetune/convert-to-gguf.d.ts +18 -0
  3. package/dist/finetune/convert-to-gguf.d.ts.map +1 -0
  4. package/dist/finetune/run-finetune.d.ts +23 -0
  5. package/dist/finetune/run-finetune.d.ts.map +1 -0
  6. package/dist/finetune/run-mlx.d.ts +22 -0
  7. package/dist/finetune/run-mlx.d.ts.map +1 -0
  8. package/dist/index.d.ts +3 -0
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +1793 -279
  11. package/dist/index.js.map +4 -4
  12. package/dist/lib/adapters/adapter.d.ts +3 -3
  13. package/dist/lib/adapters/adapter.d.ts.map +1 -1
  14. package/dist/lib/adapters/{grounding.ticket.d.ts → groundings/abstract.grounding.d.ts} +2 -2
  15. package/dist/lib/adapters/groundings/abstract.grounding.d.ts.map +1 -0
  16. package/dist/lib/adapters/groundings/column-stats.grounding.d.ts +1 -1
  17. package/dist/lib/adapters/groundings/column-stats.grounding.d.ts.map +1 -1
  18. package/dist/lib/adapters/groundings/column-values.grounding.d.ts +76 -0
  19. package/dist/lib/adapters/groundings/column-values.grounding.d.ts.map +1 -0
  20. package/dist/lib/adapters/groundings/constraint.grounding.d.ts +1 -1
  21. package/dist/lib/adapters/groundings/constraint.grounding.d.ts.map +1 -1
  22. package/dist/lib/adapters/groundings/context.d.ts +1 -1
  23. package/dist/lib/adapters/groundings/context.d.ts.map +1 -1
  24. package/dist/lib/adapters/groundings/{grounding.d.ts → index.d.ts} +8 -5
  25. package/dist/lib/adapters/groundings/index.d.ts.map +1 -0
  26. package/dist/lib/adapters/groundings/{grounding.js → index.js} +411 -206
  27. package/dist/lib/adapters/groundings/index.js.map +7 -0
  28. package/dist/lib/adapters/groundings/indexes.grounding.d.ts +1 -1
  29. package/dist/lib/adapters/groundings/indexes.grounding.d.ts.map +1 -1
  30. package/dist/lib/adapters/groundings/info.grounding.d.ts +1 -1
  31. package/dist/lib/adapters/groundings/info.grounding.d.ts.map +1 -1
  32. package/dist/lib/adapters/groundings/report.grounding.d.ts +1 -1
  33. package/dist/lib/adapters/groundings/report.grounding.d.ts.map +1 -1
  34. package/dist/lib/adapters/groundings/row-count.grounding.d.ts +1 -1
  35. package/dist/lib/adapters/groundings/row-count.grounding.d.ts.map +1 -1
  36. package/dist/lib/adapters/groundings/table.grounding.d.ts +1 -1
  37. package/dist/lib/adapters/groundings/table.grounding.d.ts.map +1 -1
  38. package/dist/lib/adapters/groundings/view.grounding.d.ts +1 -1
  39. package/dist/lib/adapters/groundings/view.grounding.d.ts.map +1 -1
  40. package/dist/lib/adapters/postgres/column-stats.postgres.grounding.d.ts.map +1 -1
  41. package/dist/lib/adapters/postgres/column-values.postgres.grounding.d.ts +17 -0
  42. package/dist/lib/adapters/postgres/column-values.postgres.grounding.d.ts.map +1 -0
  43. package/dist/lib/adapters/postgres/index.d.ts +4 -4
  44. package/dist/lib/adapters/postgres/index.d.ts.map +1 -1
  45. package/dist/lib/adapters/postgres/index.js +233 -33
  46. package/dist/lib/adapters/postgres/index.js.map +4 -4
  47. package/dist/lib/adapters/sqlite/column-values.sqlite.grounding.d.ts +17 -0
  48. package/dist/lib/adapters/sqlite/column-values.sqlite.grounding.d.ts.map +1 -0
  49. package/dist/lib/adapters/sqlite/constraint.sqlite.grounding.d.ts.map +1 -1
  50. package/dist/lib/adapters/sqlite/index.d.ts +4 -4
  51. package/dist/lib/adapters/sqlite/index.d.ts.map +1 -1
  52. package/dist/lib/adapters/sqlite/index.js +214 -46
  53. package/dist/lib/adapters/sqlite/index.js.map +4 -4
  54. package/dist/lib/adapters/sqlserver/column-values.sqlserver.grounding.d.ts +17 -0
  55. package/dist/lib/adapters/sqlserver/column-values.sqlserver.grounding.d.ts.map +1 -0
  56. package/dist/lib/adapters/sqlserver/index.d.ts +4 -4
  57. package/dist/lib/adapters/sqlserver/index.d.ts.map +1 -1
  58. package/dist/lib/adapters/sqlserver/index.js +179 -32
  59. package/dist/lib/adapters/sqlserver/index.js.map +4 -4
  60. package/dist/lib/agents/chat1.agent.d.ts +50 -0
  61. package/dist/lib/agents/chat1.agent.d.ts.map +1 -0
  62. package/dist/lib/agents/chat2.agent.d.ts +68 -0
  63. package/dist/lib/agents/chat2.agent.d.ts.map +1 -0
  64. package/dist/lib/agents/chat3.agent.d.ts +80 -0
  65. package/dist/lib/agents/chat3.agent.d.ts.map +1 -0
  66. package/dist/lib/agents/chat4.agent.d.ts +88 -0
  67. package/dist/lib/agents/chat4.agent.d.ts.map +1 -0
  68. package/dist/lib/agents/question.agent.d.ts +23 -0
  69. package/dist/lib/agents/question.agent.d.ts.map +1 -0
  70. package/dist/lib/agents/sql.agent.d.ts +62 -0
  71. package/dist/lib/agents/sql.agent.d.ts.map +1 -0
  72. package/dist/lib/agents/teachables.agent.d.ts +8 -9
  73. package/dist/lib/agents/teachables.agent.d.ts.map +1 -1
  74. package/dist/lib/agents/text2sql.agent.d.ts +0 -1
  75. package/dist/lib/agents/text2sql.agent.d.ts.map +1 -1
  76. package/dist/lib/checkpoint.d.ts +99 -0
  77. package/dist/lib/checkpoint.d.ts.map +1 -0
  78. package/dist/lib/instructions.js +50 -21
  79. package/dist/lib/instructions.js.map +2 -2
  80. package/dist/lib/sql.d.ts +83 -3
  81. package/dist/lib/sql.d.ts.map +1 -1
  82. package/dist/lib/syntheize.d.ts +2 -0
  83. package/dist/lib/syntheize.d.ts.map +1 -0
  84. package/dist/lib/synthesis/decorators/deduplicated-producer.d.ts +26 -0
  85. package/dist/lib/synthesis/decorators/deduplicated-producer.d.ts.map +1 -0
  86. package/dist/lib/synthesis/decorators/filtered-producer.d.ts +26 -0
  87. package/dist/lib/synthesis/decorators/filtered-producer.d.ts.map +1 -0
  88. package/dist/lib/synthesis/decorators/index.d.ts +7 -0
  89. package/dist/lib/synthesis/decorators/index.d.ts.map +1 -0
  90. package/dist/lib/synthesis/decorators/validated-producer.d.ts +33 -0
  91. package/dist/lib/synthesis/decorators/validated-producer.d.ts.map +1 -0
  92. package/dist/lib/synthesis/extractors/base-contextual-extractor.d.ts +76 -0
  93. package/dist/lib/synthesis/extractors/base-contextual-extractor.d.ts.map +1 -0
  94. package/dist/lib/synthesis/extractors/full-context-extractor.d.ts +25 -0
  95. package/dist/lib/synthesis/extractors/full-context-extractor.d.ts.map +1 -0
  96. package/dist/lib/synthesis/extractors/index.d.ts +8 -0
  97. package/dist/lib/synthesis/extractors/index.d.ts.map +1 -0
  98. package/dist/lib/synthesis/extractors/last-query-extractor.d.ts +30 -0
  99. package/dist/lib/synthesis/extractors/last-query-extractor.d.ts.map +1 -0
  100. package/dist/lib/synthesis/extractors/message-extractor.d.ts +27 -0
  101. package/dist/lib/synthesis/extractors/message-extractor.d.ts.map +1 -0
  102. package/dist/lib/synthesis/extractors/segmented-context-extractor.d.ts +48 -0
  103. package/dist/lib/synthesis/extractors/segmented-context-extractor.d.ts.map +1 -0
  104. package/dist/lib/synthesis/extractors/sql-extractor.d.ts +27 -0
  105. package/dist/lib/synthesis/extractors/sql-extractor.d.ts.map +1 -0
  106. package/dist/lib/synthesis/extractors/windowed-context-extractor.d.ts +30 -0
  107. package/dist/lib/synthesis/extractors/windowed-context-extractor.d.ts.map +1 -0
  108. package/dist/lib/synthesis/index.d.ts +6 -0
  109. package/dist/lib/synthesis/index.d.ts.map +1 -0
  110. package/dist/lib/synthesis/index.js +2069 -0
  111. package/dist/lib/synthesis/index.js.map +7 -0
  112. package/dist/lib/synthesis/synthesizers/breadth-evolver.d.ts +34 -0
  113. package/dist/lib/synthesis/synthesizers/breadth-evolver.d.ts.map +1 -0
  114. package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts +41 -0
  115. package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -0
  116. package/dist/lib/synthesis/synthesizers/index.d.ts +7 -0
  117. package/dist/lib/synthesis/synthesizers/index.d.ts.map +1 -0
  118. package/dist/lib/synthesis/synthesizers/persona-generator.d.ts +34 -0
  119. package/dist/lib/synthesis/synthesizers/persona-generator.d.ts.map +1 -0
  120. package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts +39 -0
  121. package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -0
  122. package/dist/lib/synthesis/synthesizers/styles.d.ts +8 -0
  123. package/dist/lib/synthesis/synthesizers/styles.d.ts.map +1 -0
  124. package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts +32 -0
  125. package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts.map +1 -0
  126. package/dist/lib/synthesis/types.d.ts +26 -0
  127. package/dist/lib/synthesis/types.d.ts.map +1 -0
  128. package/dist/lib/teach/teachables.d.ts +18 -3
  129. package/dist/lib/teach/teachables.d.ts.map +1 -1
  130. package/dist/lib/teach/teachings.d.ts +9 -2
  131. package/dist/lib/teach/teachings.d.ts.map +1 -1
  132. package/package.json +32 -15
  133. package/dist/lib/adapters/grounding.ticket.d.ts.map +0 -1
  134. package/dist/lib/adapters/groundings/grounding.d.ts.map +0 -1
  135. package/dist/lib/adapters/groundings/grounding.js.map +0 -7
  136. package/dist/lib/adapters/groundings/low-cardinality.grounding.d.ts +0 -35
  137. package/dist/lib/adapters/groundings/low-cardinality.grounding.d.ts.map +0 -1
  138. package/dist/lib/adapters/postgres/low-cardinality.postgres.grounding.d.ts +0 -14
  139. package/dist/lib/adapters/postgres/low-cardinality.postgres.grounding.d.ts.map +0 -1
  140. package/dist/lib/adapters/sqlite/low-cardinality.sqlite.grounding.d.ts +0 -14
  141. package/dist/lib/adapters/sqlite/low-cardinality.sqlite.grounding.d.ts.map +0 -1
  142. package/dist/lib/adapters/sqlserver/low-cardinality.sqlserver.grounding.d.ts +0 -14
  143. package/dist/lib/adapters/sqlserver/low-cardinality.sqlserver.grounding.d.ts.map +0 -1
@@ -0,0 +1,2069 @@
1
+ // packages/text2sql/src/lib/synthesis/types.ts
2
+ var PairProducer = class {
3
+ from(producer) {
4
+ return Array.isArray(producer) ? (async function* (pairs) {
5
+ yield pairs;
6
+ })(producer) : producer.produce();
7
+ }
8
+ toPairs() {
9
+ return toPairs(this);
10
+ }
11
+ };
12
+ async function toPairs(producer) {
13
+ const pairs = [];
14
+ for await (const chunk of producer.produce()) {
15
+ pairs.push(...chunk);
16
+ }
17
+ return pairs;
18
+ }
19
+
20
+ // packages/text2sql/src/lib/synthesis/decorators/filtered-producer.ts
21
+ var FilteredProducer = class extends PairProducer {
22
+ /**
23
+ * @param producer - Source producer to filter
24
+ * @param options - Filter configuration
25
+ */
26
+ constructor(producer, options = {}) {
27
+ super();
28
+ this.producer = producer;
29
+ this.options = options;
30
+ }
31
+ /**
32
+ * Produces pairs filtered by success status, table usage, and custom predicates.
33
+ * @returns Pairs matching all configured filter criteria
34
+ */
35
+ async *produce() {
36
+ for await (const chunk of this.producer.produce()) {
37
+ const filtered = chunk.filter((pair) => {
38
+ if (this.options.successOnly !== false && !pair.success) {
39
+ return false;
40
+ }
41
+ if (this.options.tables?.length) {
42
+ const sqlLower = pair.sql.toLowerCase();
43
+ const hasTable = this.options.tables.some(
44
+ (t) => sqlLower.includes(t.toLowerCase())
45
+ );
46
+ if (!hasTable) return false;
47
+ }
48
+ if (this.options.filter && !this.options.filter(pair)) {
49
+ return false;
50
+ }
51
+ return true;
52
+ });
53
+ if (filtered.length) {
54
+ yield filtered;
55
+ }
56
+ }
57
+ }
58
+ };
59
+
60
+ // packages/text2sql/src/lib/synthesis/decorators/deduplicated-producer.ts
61
+ var DeduplicatedProducer = class extends PairProducer {
62
+ /**
63
+ * @param producer - Source producer to deduplicate
64
+ * @param options - Deduplication configuration
65
+ */
66
+ constructor(producer, options = {}) {
67
+ super();
68
+ this.producer = producer;
69
+ this.options = options;
70
+ }
71
+ /**
72
+ * Produces pairs with duplicates removed based on the configured strategy.
73
+ * @returns Unique pairs after deduplication
74
+ */
75
+ async *produce() {
76
+ const { strategy = "exact" } = this.options;
77
+ const seen = /* @__PURE__ */ new Set();
78
+ for await (const chunk of this.producer.produce()) {
79
+ const unique = [];
80
+ for (const pair of chunk) {
81
+ let key;
82
+ switch (strategy) {
83
+ case "sql-only":
84
+ key = this.normalizeSQL(pair.sql);
85
+ break;
86
+ case "question-only":
87
+ key = pair.question.toLowerCase().trim();
88
+ break;
89
+ case "exact":
90
+ default:
91
+ key = `${pair.question.toLowerCase().trim()}|||${this.normalizeSQL(pair.sql)}`;
92
+ }
93
+ if (!seen.has(key)) {
94
+ seen.add(key);
95
+ unique.push(pair);
96
+ }
97
+ }
98
+ if (unique.length) {
99
+ yield unique;
100
+ }
101
+ }
102
+ }
103
+ normalizeSQL(sql) {
104
+ return sql.toLowerCase().replace(/\s+/g, " ").trim();
105
+ }
106
+ };
107
+
108
+ // packages/text2sql/src/lib/synthesis/decorators/validated-producer.ts
109
+ var ValidatedProducer = class extends PairProducer {
110
+ /**
111
+ * @param producer - Source producer to validate
112
+ * @param adapter - Database adapter for SQL validation
113
+ * @param options - Validation configuration
114
+ */
115
+ constructor(producer, adapter, options = {}) {
116
+ super();
117
+ this.producer = producer;
118
+ this.adapter = adapter;
119
+ this.options = options;
120
+ }
121
+ /**
122
+ * Produces pairs with SQL validation applied, optionally executing queries.
123
+ * @returns Validated pairs with error/rowCount metadata attached
124
+ */
125
+ async *produce() {
126
+ for await (const chunk of this.producer.produce()) {
127
+ const validated = [];
128
+ for (const pair of chunk) {
129
+ const error = await this.adapter.validate(pair.sql);
130
+ if (error) {
131
+ if (!this.options.removeInvalid) {
132
+ validated.push({
133
+ ...pair,
134
+ success: false,
135
+ error
136
+ });
137
+ }
138
+ continue;
139
+ }
140
+ let rowCount;
141
+ if (this.options.execute) {
142
+ try {
143
+ const result = await this.adapter.execute(pair.sql);
144
+ rowCount = Array.isArray(result) ? result.length : void 0;
145
+ } catch {
146
+ }
147
+ }
148
+ validated.push({
149
+ ...pair,
150
+ success: true,
151
+ rowCount
152
+ });
153
+ }
154
+ if (validated.length) {
155
+ yield validated;
156
+ }
157
+ }
158
+ }
159
+ };
160
+
161
+ // packages/text2sql/src/lib/synthesis/extractors/message-extractor.ts
162
+ import {
163
+ getToolOrDynamicToolName as getToolOrDynamicToolName2,
164
+ isToolOrDynamicToolUIPart as isToolOrDynamicToolUIPart2
165
+ } from "ai";
166
+
167
+ // packages/text2sql/src/lib/synthesis/extractors/base-contextual-extractor.ts
168
+ import { groq } from "@ai-sdk/groq";
169
+ import {
170
+ getToolOrDynamicToolName,
171
+ isTextUIPart,
172
+ isToolOrDynamicToolUIPart
173
+ } from "ai";
174
+ import dedent from "dedent";
175
+ import z from "zod";
176
+ import { agent, generate, user } from "@deepagents/agent";
177
+ var contextResolverAgent = agent({
178
+ name: "context_resolver",
179
+ model: groq("openai/gpt-oss-20b"),
180
+ output: z.object({
181
+ question: z.string().describe(
182
+ "A standalone natural language question that the SQL query answers"
183
+ )
184
+ }),
185
+ prompt: (state) => dedent`
186
+ <identity>
187
+ You are an expert at understanding conversational context and generating clear,
188
+ standalone questions from multi-turn conversations.
189
+ </identity>
190
+
191
+ ${state?.introspection ? `<schema>
192
+ ${state.introspection}
193
+ </schema>` : ""}
194
+
195
+ <conversation>
196
+ ${state?.conversation}
197
+ </conversation>
198
+
199
+ <sql>
200
+ ${state?.sql}
201
+ </sql>
202
+
203
+ <task>
204
+ Given the conversation above and the SQL query that was executed,
205
+ generate a single, standalone natural language question that:
206
+ 1. Fully captures the user's intent without needing prior context
207
+ 2. Uses natural business language (not SQL terminology)
208
+ 3. Could be asked by someone who hasn't seen the conversation
209
+ 4. Accurately represents what the SQL query answers
210
+ </task>
211
+
212
+ <examples>
213
+ Conversation: "Show me customers" → "Filter to NY" → "Sort by revenue"
214
+ SQL: SELECT * FROM customers WHERE region = 'NY' ORDER BY revenue DESC
215
+ Question: "Show me customers in the NY region sorted by revenue"
216
+
217
+ Conversation: "What were sales last month?" → "Break it down by category"
218
+ SQL: SELECT category, SUM(amount) FROM sales WHERE date >= '2024-11-01' GROUP BY category
219
+ Question: "What were sales by category for last month?"
220
+ </examples>
221
+ `
222
+ });
223
+ function getMessageText(message) {
224
+ const textParts = message.parts.filter(isTextUIPart).map((part) => part.text);
225
+ return textParts.join(" ").trim();
226
+ }
227
+ function formatConversation(messages) {
228
+ return messages.map((msg, i) => `[${i + 1}] ${msg}`).join("\n");
229
+ }
230
+ var BaseContextualExtractor = class extends PairProducer {
231
+ constructor(messages, adapter, options = {}) {
232
+ super();
233
+ this.messages = messages;
234
+ this.adapter = adapter;
235
+ this.options = options;
236
+ }
237
+ context = [];
238
+ results = [];
239
+ /**
240
+ * Template method - defines the extraction algorithm skeleton.
241
+ * Subclasses customize behavior via hooks, not by overriding this method.
242
+ */
243
+ async *produce() {
244
+ this.context = [];
245
+ this.results = [];
246
+ const { includeFailures = false, toolName = "db_query" } = this.options;
247
+ await this.extractSqlsWithContext(toolName, includeFailures);
248
+ if (this.results.length === 0) {
249
+ return;
250
+ }
251
+ const introspection = await this.adapter.introspect();
252
+ yield* this.resolveQuestions(introspection);
253
+ }
254
+ /**
255
+ * Core extraction loop - iterates through messages and calls hooks.
256
+ */
257
+ async extractSqlsWithContext(toolName, includeFailures) {
258
+ for (const message of this.messages) {
259
+ if (message.role === "user") {
260
+ const text = getMessageText(message);
261
+ if (text) {
262
+ await this.onUserMessage(text);
263
+ }
264
+ continue;
265
+ }
266
+ if (message.role === "assistant") {
267
+ await this.extractFromAssistant(message, toolName, includeFailures);
268
+ }
269
+ }
270
+ }
271
+ /**
272
+ * Extract SQL from assistant message parts.
273
+ */
274
+ async extractFromAssistant(message, toolName, includeFailures) {
275
+ for (const part of message.parts) {
276
+ if (!isToolOrDynamicToolUIPart(part)) {
277
+ continue;
278
+ }
279
+ if (getToolOrDynamicToolName(part) !== toolName) {
280
+ continue;
281
+ }
282
+ const toolInput = "input" in part ? part.input : void 0;
283
+ if (!toolInput?.sql) {
284
+ continue;
285
+ }
286
+ const success = part.state === "output-available";
287
+ const failed = part.state === "output-error";
288
+ if (failed && !includeFailures) {
289
+ continue;
290
+ }
291
+ if (!success && !failed) {
292
+ continue;
293
+ }
294
+ const snapshot = this.getContextSnapshot();
295
+ if (snapshot.length === 0) {
296
+ continue;
297
+ }
298
+ this.results.push({
299
+ sql: toolInput.sql,
300
+ success,
301
+ conversationContext: snapshot
302
+ });
303
+ }
304
+ const assistantText = getMessageText(message);
305
+ if (assistantText) {
306
+ this.context.push(`Assistant: ${assistantText}`);
307
+ }
308
+ }
309
+ /**
310
+ * Resolve extracted SQL contexts into standalone questions using LLM.
311
+ */
312
+ async *resolveQuestions(introspection) {
313
+ for (const item of this.results) {
314
+ const { experimental_output } = await generate(
315
+ contextResolverAgent,
316
+ [user("Generate a standalone question for this SQL query.")],
317
+ {
318
+ conversation: formatConversation(item.conversationContext),
319
+ sql: item.sql,
320
+ introspection
321
+ }
322
+ );
323
+ yield [
324
+ {
325
+ question: experimental_output.question,
326
+ sql: item.sql,
327
+ context: item.conversationContext,
328
+ success: item.success
329
+ }
330
+ ];
331
+ }
332
+ }
333
+ };
334
+
335
+ // packages/text2sql/src/lib/synthesis/extractors/message-extractor.ts
336
+ var MessageExtractor = class extends PairProducer {
337
+ /**
338
+ * @param messages - Chat history to extract pairs from
339
+ * @param options - Extraction configuration
340
+ */
341
+ constructor(messages, options = {}) {
342
+ super();
343
+ this.messages = messages;
344
+ this.options = options;
345
+ }
346
+ /**
347
+ * Extracts question-SQL pairs by parsing tool calls and pairing with user messages.
348
+ * @returns Pairs extracted from db_query tool invocations
349
+ */
350
+ async *produce() {
351
+ const { includeFailures = false, toolName = "db_query" } = this.options;
352
+ let lastUserMessage = null;
353
+ for (const message of this.messages) {
354
+ if (message.role === "user") {
355
+ lastUserMessage = message;
356
+ continue;
357
+ }
358
+ if (message.role === "assistant" && lastUserMessage) {
359
+ for (const part of message.parts) {
360
+ if (!isToolOrDynamicToolUIPart2(part)) {
361
+ continue;
362
+ }
363
+ if (getToolOrDynamicToolName2(part) !== toolName) {
364
+ continue;
365
+ }
366
+ const toolInput = "input" in part ? part.input : void 0;
367
+ if (!toolInput?.sql) {
368
+ continue;
369
+ }
370
+ const success = part.state === "output-available";
371
+ const failed = part.state === "output-error";
372
+ if (failed && !includeFailures) {
373
+ continue;
374
+ }
375
+ if (!success && !failed) {
376
+ continue;
377
+ }
378
+ const question = getMessageText(lastUserMessage);
379
+ if (!question) {
380
+ continue;
381
+ }
382
+ yield [
383
+ {
384
+ question,
385
+ sql: toolInput.sql,
386
+ success
387
+ }
388
+ ];
389
+ }
390
+ }
391
+ }
392
+ }
393
+ };
394
+
395
+ // packages/text2sql/src/lib/synthesis/extractors/sql-extractor.ts
396
+ import { groq as groq2 } from "@ai-sdk/groq";
397
+ import dedent2 from "dedent";
398
+ import z2 from "zod";
399
+ import { agent as agent2, generate as generate2, user as user2 } from "@deepagents/agent";
400
+ var sqlToQuestionAgent = agent2({
401
+ name: "sql_to_question",
402
+ model: groq2("llama-3.3-70b-versatile"),
403
+ output: z2.object({
404
+ question: z2.string().describe("A natural language question that the SQL query answers")
405
+ }),
406
+ prompt: (state) => dedent2`
407
+ <identity>
408
+ You are an expert at understanding SQL queries and generating clear,
409
+ natural language questions that describe what the query retrieves.
410
+ </identity>
411
+
412
+ <schema>
413
+ ${state?.introspection}
414
+ </schema>
415
+
416
+ <sql>
417
+ ${state?.sql}
418
+ </sql>
419
+
420
+ <task>
421
+ Given the database schema and the SQL query above, generate a single
422
+ natural language question that:
423
+ 1. Accurately describes what information the query retrieves
424
+ 2. Uses natural business language (not SQL terminology)
425
+ 3. Could be asked by a non-technical user
426
+ 4. Is concise but complete
427
+ </task>
428
+
429
+ <examples>
430
+ SQL: SELECT COUNT(*) FROM customers WHERE region = 'NY'
431
+ Question: "How many customers do we have in New York?"
432
+
433
+ SQL: SELECT product_name, SUM(quantity) as total FROM orders GROUP BY product_name ORDER BY total DESC LIMIT 10
434
+ Question: "What are our top 10 products by quantity sold?"
435
+
436
+ SQL: SELECT c.name, COUNT(o.id) FROM customers c LEFT JOIN orders o ON c.id = o.customer_id GROUP BY c.id HAVING COUNT(o.id) = 0
437
+ Question: "Which customers have never placed an order?"
438
+ </examples>
439
+ `
440
+ });
441
+ var SqlExtractor = class extends PairProducer {
442
+ #sqls;
443
+ #adapter;
444
+ #options;
445
+ /**
446
+ * @param sql - SQL query or queries to generate questions for
447
+ * @param adapter - Database adapter for validation and schema introspection
448
+ * @param options - Extraction configuration
449
+ */
450
+ constructor(sql, adapter, options = {}) {
451
+ super();
452
+ this.#sqls = Array.isArray(sql) ? sql : [sql];
453
+ this.#adapter = adapter;
454
+ this.#options = options;
455
+ }
456
+ /**
457
+ * Generates natural language questions for each SQL query using an LLM.
458
+ * @returns Pairs with generated questions and original SQL
459
+ */
460
+ async *produce() {
461
+ const { validateSql = true, skipInvalid = false } = this.#options;
462
+ const introspection = await this.#adapter.introspect();
463
+ for (const sql of this.#sqls) {
464
+ let isValid = true;
465
+ if (validateSql) {
466
+ const error = await this.#adapter.validate(sql);
467
+ isValid = error === void 0 || error === null;
468
+ if (!isValid && skipInvalid) {
469
+ continue;
470
+ }
471
+ }
472
+ const { experimental_output } = await generate2(
473
+ sqlToQuestionAgent,
474
+ [user2("Generate a natural language question for this SQL query.")],
475
+ {
476
+ sql,
477
+ introspection
478
+ }
479
+ );
480
+ yield [
481
+ {
482
+ question: experimental_output.question,
483
+ sql,
484
+ success: isValid
485
+ }
486
+ ];
487
+ }
488
+ }
489
+ };
490
+
491
+ // packages/text2sql/src/lib/synthesis/extractors/full-context-extractor.ts
492
+ var FullContextExtractor = class extends BaseContextualExtractor {
493
+ constructor(messages, adapter, options = {}) {
494
+ super(messages, adapter, options);
495
+ }
496
+ /**
497
+ * Add user message to context (keeps all messages).
498
+ */
499
+ async onUserMessage(text) {
500
+ this.context.push(`User: ${text}`);
501
+ }
502
+ /**
503
+ * Return all context accumulated so far.
504
+ */
505
+ getContextSnapshot() {
506
+ return [...this.context];
507
+ }
508
+ };
509
+
510
+ // packages/text2sql/src/lib/synthesis/extractors/windowed-context-extractor.ts
511
+ var WindowedContextExtractor = class extends BaseContextualExtractor {
512
+ windowSize;
513
+ constructor(messages, adapter, options) {
514
+ super(messages, adapter, options);
515
+ this.windowSize = options.windowSize;
516
+ }
517
+ /**
518
+ * Add user message to context (keeps all, windowing happens on snapshot).
519
+ */
520
+ async onUserMessage(text) {
521
+ this.context.push(`User: ${text}`);
522
+ }
523
+ /**
524
+ * Return only the last N messages based on window size.
525
+ */
526
+ getContextSnapshot() {
527
+ if (this.context.length <= this.windowSize) {
528
+ return [...this.context];
529
+ }
530
+ return this.context.slice(-this.windowSize);
531
+ }
532
+ };
533
+
534
+ // packages/text2sql/src/lib/synthesis/extractors/segmented-context-extractor.ts
535
+ import { groq as groq3 } from "@ai-sdk/groq";
536
+ import dedent3 from "dedent";
537
+ import z3 from "zod";
538
+ import { agent as agent3, generate as generate3, user as user3 } from "@deepagents/agent";
539
+ var topicChangeAgent = agent3({
540
+ name: "topic_change_detector",
541
+ model: groq3("openai/gpt-oss-20b"),
542
+ output: z3.object({
543
+ isTopicChange: z3.boolean().describe("Whether the new message represents a topic change"),
544
+ reason: z3.string().describe("Brief explanation for the decision")
545
+ }),
546
+ prompt: (state) => dedent3`
547
+ <identity>
548
+ You are an expert at understanding conversational flow and detecting topic changes.
549
+ </identity>
550
+
551
+ <conversation_context>
552
+ ${state?.context || "(no prior context)"}
553
+ </conversation_context>
554
+
555
+ <new_message>
556
+ ${state?.newMessage}
557
+ </new_message>
558
+
559
+ <task>
560
+ Determine if the new message represents a significant topic change from the
561
+ prior conversation context. A topic change occurs when:
562
+ 1. The user asks about a completely different entity/table/domain
563
+ 2. The user starts a new analytical question unrelated to prior discussion
564
+ 3. There's a clear shift in what data or metrics are being discussed
565
+
566
+ NOT a topic change:
567
+ - Follow-up questions refining the same query ("filter by...", "sort by...")
568
+ - Questions about the same entities with different conditions
569
+ - Requests for more details on the same topic
570
+ </task>
571
+
572
+ <examples>
573
+ Context: "Show me customers in NY" → "Sort by revenue"
574
+ New: "Filter to those with orders over $1000"
575
+ Decision: NOT a topic change (still refining customer query)
576
+
577
+ Context: "Show me customers in NY" → "Sort by revenue"
578
+ New: "What were our total sales last quarter?"
579
+ Decision: Topic change (shifted from customers to sales metrics)
580
+
581
+ Context: "List all products"
582
+ New: "How many orders did we have last month?"
583
+ Decision: Topic change (products → orders/sales)
584
+ </examples>
585
+ `
586
+ });
587
+ var SegmentedContextExtractor = class extends BaseContextualExtractor {
588
+ constructor(messages, adapter, options = {}) {
589
+ super(messages, adapter, options);
590
+ }
591
+ /**
592
+ * Handle user message with topic change detection.
593
+ * If topic changes, resolve the message to standalone form before resetting.
594
+ *
595
+ * Note: We capture context snapshot before async LLM calls to prevent race conditions
596
+ * where context might be modified during the async operation.
597
+ */
598
+ async onUserMessage(text) {
599
+ if (this.context.length >= 2) {
600
+ const contextSnapshot = [...this.context];
601
+ const isTopicChange = await this.detectTopicChange(text, contextSnapshot);
602
+ if (isTopicChange) {
603
+ const resolved = await this.resolveToStandalone(text, contextSnapshot);
604
+ this.context = [`User: ${resolved}`];
605
+ return;
606
+ }
607
+ }
608
+ this.context.push(`User: ${text}`);
609
+ }
610
+ /**
611
+ * Return all context in current topic segment.
612
+ */
613
+ getContextSnapshot() {
614
+ return [...this.context];
615
+ }
616
+ /**
617
+ * Detect if a new message represents a topic change using LLM.
618
+ * @param newMessage - The new user message to check
619
+ * @param contextSnapshot - Snapshot of context captured before this async call
620
+ */
621
+ async detectTopicChange(newMessage, contextSnapshot) {
622
+ const { experimental_output } = await generate3(
623
+ topicChangeAgent,
624
+ [user3("Determine if this is a topic change.")],
625
+ {
626
+ context: formatConversation(contextSnapshot),
627
+ newMessage
628
+ }
629
+ );
630
+ return experimental_output.isTopicChange;
631
+ }
632
+ /**
633
+ * Resolve a context-dependent message into a standalone question.
634
+ * Called when topic change is detected to preserve the meaning of
635
+ * the triggering message before context is reset.
636
+ * @param text - The user message to resolve
637
+ * @param contextSnapshot - Snapshot of context captured before this async call
638
+ */
639
+ async resolveToStandalone(text, contextSnapshot) {
640
+ const { experimental_output } = await generate3(
641
+ contextResolverAgent,
642
+ [user3("Generate a standalone question for this message.")],
643
+ {
644
+ conversation: formatConversation([...contextSnapshot, `User: ${text}`]),
645
+ sql: ""
646
+ // No SQL yet, just resolving the question
647
+ }
648
+ );
649
+ return experimental_output.question;
650
+ }
651
+ };
652
+
653
+ // packages/text2sql/src/lib/synthesis/extractors/last-query-extractor.ts
654
+ import { generate as generate4, user as user4 } from "@deepagents/agent";
655
+ var LastQueryExtractor = class extends BaseContextualExtractor {
656
+ constructor(messages, adapter, options = {}) {
657
+ super(messages, adapter, options);
658
+ }
659
+ /**
660
+ * Add user message to context (keeps all messages).
661
+ */
662
+ async onUserMessage(text) {
663
+ this.context.push(`User: ${text}`);
664
+ }
665
+ /**
666
+ * Return all context accumulated so far.
667
+ */
668
+ getContextSnapshot() {
669
+ return [...this.context];
670
+ }
671
+ /**
672
+ * Override to only resolve the LAST query instead of all queries.
673
+ */
674
+ async *resolveQuestions(introspection) {
675
+ if (this.results.length === 0) {
676
+ return;
677
+ }
678
+ const last = this.results.at(-1);
679
+ const { experimental_output } = await generate4(
680
+ contextResolverAgent,
681
+ [user4("Generate a standalone question for this SQL query.")],
682
+ {
683
+ conversation: formatConversation(last.conversationContext),
684
+ sql: last.sql,
685
+ introspection
686
+ }
687
+ );
688
+ yield [
689
+ {
690
+ question: experimental_output.question,
691
+ sql: last.sql,
692
+ context: last.conversationContext,
693
+ success: last.success
694
+ }
695
+ ];
696
+ }
697
+ };
698
+
699
+ // packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
700
+ import pLimit from "p-limit";
701
+
702
+ // packages/text2sql/src/lib/agents/question.agent.ts
703
+ import { groq as groq4 } from "@ai-sdk/groq";
704
+ import { defaultSettingsMiddleware, wrapLanguageModel } from "ai";
705
+ import dedent4 from "dedent";
706
+ import z4 from "zod";
707
+ import { agent as agent4, generate as generate5, user as user5 } from "@deepagents/agent";
708
+ var complexityInstructions = {
709
+ low: dedent4`
710
+ Generate simple questions that require:
711
+ - Basic SELECT with single table
712
+ - Simple WHERE clauses with one condition
713
+ - COUNT(*) or basic aggregations
714
+ - No joins required
715
+ Examples: "How many customers do we have?", "List all products", "What is the total revenue?"
716
+ `,
717
+ medium: dedent4`
718
+ Generate moderate questions that require:
719
+ - JOINs between 2-3 tables
720
+ - Multiple WHERE conditions (AND/OR)
721
+ - GROUP BY with HAVING clauses
722
+ - ORDER BY with LIMIT
723
+ - Basic subqueries
724
+ Examples: "What are the top 5 customers by total orders?", "Which products have never been ordered?"
725
+ `,
726
+ hard: dedent4`
727
+ Generate complex questions that require:
728
+ - Multiple JOINs (3+ tables)
729
+ - Nested subqueries or CTEs
730
+ - Complex aggregations with multiple GROUP BY columns
731
+ - CASE expressions
732
+ - Date/time calculations
733
+ Examples: "What is the month-over-month growth rate?", "Which customers have increased spending compared to last year?"
734
+ `,
735
+ window: dedent4`
736
+ Generate advanced questions that require window functions:
737
+ - ROW_NUMBER, RANK, DENSE_RANK
738
+ - LAG, LEAD for comparisons
739
+ - Running totals (SUM OVER)
740
+ - Moving averages
741
+ - PARTITION BY clauses
742
+ Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
743
+ `
744
+ };
745
+ var questionGeneratorAgent = agent4({
746
+ name: "question_generator",
747
+ model: wrapLanguageModel({
748
+ model: groq4("openai/gpt-oss-20b"),
749
+ middleware: defaultSettingsMiddleware({
750
+ settings: { temperature: 0.8, topP: 0.95 }
751
+ })
752
+ }),
753
+ handoffDescription: "Generates natural language questions that users might ask about the database schema.",
754
+ output: z4.object({
755
+ questions: z4.array(z4.string().describe("A natural language question about the data")).min(1).describe("List of natural language questions a user might ask")
756
+ }),
757
+ prompt: (state) => {
758
+ const count = state?.count;
759
+ const complexity = state?.complexity ?? "medium";
760
+ return dedent4`
761
+ <identity>
762
+ You are a synthetic data generator specializing in creating realistic natural language questions
763
+ that users might ask about a database. You understand database schemas and can generate diverse,
764
+ practical questions that would require SQL queries to answer.
765
+ </identity>
766
+
767
+ ${state?.introspection || ""}
768
+
769
+ <complexity level="${complexity}">
770
+ ${complexityInstructions[complexity]}
771
+ </complexity>
772
+
773
+ <task>
774
+ Generate exactly ${count} natural language questions at the "${complexity}" complexity level.
775
+ The questions should:
776
+ 1. Match the complexity requirements above
777
+ 2. Use natural business language, not technical SQL terms
778
+ 3. Be realistic questions a non-technical user would actually ask
779
+ 4. Cover different tables and relationships when possible
780
+ </task>
781
+
782
+ <guardrails>
783
+ - Questions MUST ONLY reference tables and columns that exist in the schema above
784
+ - Before generating each question, verify that ALL entities (tables, columns, relationships) you reference are explicitly listed in the schema
785
+ - DO NOT invent or assume tables/columns that aren't explicitly shown in the schema
786
+ - Use natural language without SQL keywords like SELECT, WHERE, etc.
787
+ - All questions must match the specified complexity level
788
+ </guardrails>
789
+ `;
790
+ }
791
+ });
792
+ async function generateQuestions(params) {
793
+ const { introspection, complexity, count, prompt, model } = params;
794
+ const agentInstance = model ? questionGeneratorAgent.clone({ model }) : questionGeneratorAgent;
795
+ const userPrompt = prompt ?? `Generate ${count} questions at ${complexity} complexity given db schema.`;
796
+ const { experimental_output } = await generate5(agentInstance, [user5(userPrompt)], {
797
+ introspection,
798
+ complexity,
799
+ count
800
+ });
801
+ return { questions: experimental_output.questions };
802
+ }
803
+
804
+ // packages/text2sql/src/lib/agents/sql.agent.ts
805
+ import { groq as groq5 } from "@ai-sdk/groq";
806
+ import { defaultSettingsMiddleware as defaultSettingsMiddleware2, wrapLanguageModel as wrapLanguageModel2 } from "ai";
807
+ import z5 from "zod";
808
+ import { agent as agent5, generate as generate6, user as user6 } from "@deepagents/agent";
809
+
810
+ // packages/text2sql/src/lib/teach/xml.ts
811
+ function wrapBlock(tag, children) {
812
+ const content = children.filter((child) => Boolean(child)).join("\n");
813
+ if (!content) {
814
+ return "";
815
+ }
816
+ return `<${tag}>
817
+ ${indentBlock(content, 2)}
818
+ </${tag}>`;
819
+ }
820
+ function list(tag, values, childTag) {
821
+ if (!values.length) {
822
+ return "";
823
+ }
824
+ const children = values.map((value) => leaf(childTag, value)).join("\n");
825
+ return `<${tag}>
826
+ ${indentBlock(children, 2)}
827
+ </${tag}>`;
828
+ }
829
+ function leaf(tag, value) {
830
+ const safe = escapeXml(value);
831
+ if (safe.includes("\n")) {
832
+ return `<${tag}>
833
+ ${indentBlock(safe, 2)}
834
+ </${tag}>`;
835
+ }
836
+ return `<${tag}>${safe}</${tag}>`;
837
+ }
838
+ function indentBlock(text, spaces) {
839
+ if (!text.trim()) {
840
+ return "";
841
+ }
842
+ const padding = " ".repeat(spaces);
843
+ return text.split("\n").map((line) => line.length ? padding + line : padding).join("\n");
844
+ }
845
+ function escapeXml(value) {
846
+ if (value == null) {
847
+ return "";
848
+ }
849
+ return value.replaceAll(/&/g, "&amp;").replaceAll(/</g, "&lt;").replaceAll(/>/g, "&gt;").replaceAll(/"/g, "&quot;").replaceAll(/'/g, "&apos;");
850
+ }
851
+
852
+ // packages/text2sql/src/lib/teach/teachables.ts
853
+ function term(name, definition) {
854
+ return {
855
+ type: "term",
856
+ encode: () => ({ type: "term", name, definition }),
857
+ decode: () => wrapBlock("term", [leaf("name", name), leaf("definition", definition)])
858
+ };
859
+ }
860
+ function hint(text) {
861
+ return {
862
+ type: "hint",
863
+ encode: () => ({ type: "hint", text }),
864
+ decode: () => leaf("hint", text)
865
+ };
866
+ }
867
+ function guardrail(input) {
868
+ const { rule, reason, action } = input;
869
+ return {
870
+ type: "guardrail",
871
+ encode: () => ({ type: "guardrail", rule, reason, action }),
872
+ decode: () => wrapBlock("guardrail", [
873
+ leaf("rule", rule),
874
+ reason ? leaf("reason", reason) : "",
875
+ action ? leaf("action", action) : ""
876
+ ])
877
+ };
878
+ }
879
+ function explain(input) {
880
+ const { concept, explanation, therefore } = input;
881
+ return {
882
+ type: "explain",
883
+ encode: () => ({ type: "explain", concept, explanation, therefore }),
884
+ decode: () => wrapBlock("explanation", [
885
+ leaf("concept", concept),
886
+ leaf("details", explanation),
887
+ therefore ? leaf("therefore", therefore) : ""
888
+ ])
889
+ };
890
+ }
891
+ function example(input) {
892
+ const { question, answer, note } = input;
893
+ return {
894
+ type: "example",
895
+ encode: () => ({ type: "example", question, answer, note }),
896
+ decode: () => wrapBlock("example", [
897
+ leaf("question", question),
898
+ leaf("answer", answer),
899
+ note ? leaf("note", note) : ""
900
+ ])
901
+ };
902
+ }
903
+ function clarification(input) {
904
+ const { when, ask, reason } = input;
905
+ return {
906
+ type: "clarification",
907
+ encode: () => ({ type: "clarification", when, ask, reason }),
908
+ decode: () => wrapBlock("clarification", [
909
+ leaf("when", when),
910
+ leaf("ask", ask),
911
+ leaf("reason", reason)
912
+ ])
913
+ };
914
+ }
915
+ function workflow(input) {
916
+ const { task, steps, triggers, notes } = input;
917
+ return {
918
+ type: "workflow",
919
+ encode: () => ({ type: "workflow", task, steps, triggers, notes }),
920
+ decode: () => wrapBlock("workflow", [
921
+ leaf("task", task),
922
+ triggers?.length ? list("triggers", triggers, "trigger") : "",
923
+ list("steps", steps, "step"),
924
+ notes ? leaf("notes", notes) : ""
925
+ ])
926
+ };
927
+ }
928
+ function quirk(input) {
929
+ const { issue, workaround } = input;
930
+ return {
931
+ type: "quirk",
932
+ encode: () => ({ type: "quirk", issue, workaround }),
933
+ decode: () => wrapBlock("quirk", [
934
+ leaf("issue", issue),
935
+ leaf("workaround", workaround)
936
+ ])
937
+ };
938
+ }
939
+ function styleGuide(input) {
940
+ const { prefer, never, always } = input;
941
+ return {
942
+ type: "styleGuide",
943
+ encode: () => ({ type: "styleGuide", prefer, never, always }),
944
+ decode: () => wrapBlock("style_guide", [
945
+ leaf("prefer", prefer),
946
+ always ? leaf("always", always) : "",
947
+ never ? leaf("never", never) : ""
948
+ ])
949
+ };
950
+ }
951
+ function analogy(input) {
952
+ const { concept, relationship, insight, therefore, pitfall } = input;
953
+ return {
954
+ type: "analogy",
955
+ encode: () => ({
956
+ type: "analogy",
957
+ concept,
958
+ relationship,
959
+ insight,
960
+ therefore,
961
+ pitfall
962
+ }),
963
+ decode: () => wrapBlock("analogy", [
964
+ list("concepts", concept, "concept"),
965
+ leaf("relationship", relationship),
966
+ insight ? leaf("insight", insight) : "",
967
+ therefore ? leaf("therefore", therefore) : "",
968
+ pitfall ? leaf("pitfall", pitfall) : ""
969
+ ])
970
+ };
971
+ }
972
+ function glossary(entries) {
973
+ return {
974
+ type: "glossary",
975
+ encode: () => ({ type: "glossary", entries }),
976
+ decode: () => wrapBlock(
977
+ "glossary",
978
+ Object.entries(entries).map(
979
+ ([term2, sql]) => wrapBlock("entry", [leaf("term", term2), leaf("sql", sql)])
980
+ )
981
+ )
982
+ };
983
+ }
984
+ function identity(input) {
985
+ const { name, role } = input;
986
+ return {
987
+ type: "identity",
988
+ encode: () => ({ type: "identity", name, role }),
989
+ decode: () => wrapBlock("identity", [
990
+ name ? leaf("name", name) : "",
991
+ role ? leaf("role", role) : ""
992
+ ])
993
+ };
994
+ }
995
+ function persona(input) {
996
+ const { name, role, tone } = input;
997
+ return {
998
+ type: "persona",
999
+ encode: () => ({ type: "persona", name, role, tone: tone ?? "" }),
1000
+ decode: () => wrapBlock("persona", [
1001
+ leaf("name", name),
1002
+ leaf("role", role),
1003
+ tone ? leaf("tone", tone) : ""
1004
+ ])
1005
+ };
1006
+ }
1007
+ function alias(termName, meaning) {
1008
+ return {
1009
+ type: "alias",
1010
+ encode: () => ({ type: "alias", term: termName, meaning }),
1011
+ decode: () => wrapBlock("alias", [leaf("term", termName), leaf("meaning", meaning)])
1012
+ };
1013
+ }
1014
+ function preference(aspect, value) {
1015
+ return {
1016
+ type: "preference",
1017
+ encode: () => ({ type: "preference", aspect, value }),
1018
+ decode: () => wrapBlock("preference", [leaf("aspect", aspect), leaf("value", value)])
1019
+ };
1020
+ }
1021
+ function context(description) {
1022
+ return {
1023
+ type: "context",
1024
+ encode: () => ({ type: "context", description }),
1025
+ decode: () => leaf("context", description)
1026
+ };
1027
+ }
1028
+ function correction(subject, clarification2) {
1029
+ return {
1030
+ type: "correction",
1031
+ encode: () => ({ type: "correction", subject, clarification: clarification2 }),
1032
+ decode: () => wrapBlock("correction", [
1033
+ leaf("subject", subject),
1034
+ leaf("clarification", clarification2)
1035
+ ])
1036
+ };
1037
+ }
1038
+ function toInstructions(tag, ...teachables) {
1039
+ if (!teachables.length) {
1040
+ return "";
1041
+ }
1042
+ const grouped = /* @__PURE__ */ new Map();
1043
+ for (const teachable of teachables) {
1044
+ const existing = grouped.get(teachable.type) ?? [];
1045
+ existing.push(teachable);
1046
+ grouped.set(teachable.type, existing);
1047
+ }
1048
+ const definedTypes = new Set(SECTION_ORDER.map((s) => s.type));
1049
+ const sections = SECTION_ORDER.map(({ type, tag: tag2 }) => {
1050
+ const items = grouped.get(type);
1051
+ if (!items?.length) {
1052
+ return "";
1053
+ }
1054
+ const renderedItems = items.map((item) => item.decode().trim()).filter(Boolean).map((item) => indentBlock(item, 2)).join("\n");
1055
+ if (!renderedItems.length) {
1056
+ return "";
1057
+ }
1058
+ return `<${tag2}>
1059
+ ${renderedItems}
1060
+ </${tag2}>`;
1061
+ }).filter((section) => Boolean(section));
1062
+ for (const [type, items] of grouped) {
1063
+ if (definedTypes.has(type)) {
1064
+ continue;
1065
+ }
1066
+ const renderedItems = items.map((item) => item.decode().trim()).filter(Boolean).map((item) => indentBlock(item, 2)).join("\n");
1067
+ if (renderedItems.length) {
1068
+ sections.push(renderedItems);
1069
+ }
1070
+ }
1071
+ if (!sections.length) {
1072
+ return "";
1073
+ }
1074
+ const content = indentBlock(sections.join("\n"), 2);
1075
+ return `<${tag}>
1076
+ ${content}
1077
+ </${tag}>`;
1078
+ }
1079
+ var SECTION_ORDER = [
1080
+ // User context (render first - most important for personalization)
1081
+ { type: "identity", tag: "identity" },
1082
+ { type: "persona", tag: "persona" },
1083
+ { type: "context", tag: "user_context" },
1084
+ { type: "preference", tag: "user_preferences" },
1085
+ { type: "alias", tag: "user_vocabulary" },
1086
+ { type: "correction", tag: "user_corrections" },
1087
+ // Domain knowledge
1088
+ { type: "guardrail", tag: "guardrails" },
1089
+ { type: "styleGuide", tag: "style_guides" },
1090
+ { type: "hint", tag: "hints" },
1091
+ { type: "clarification", tag: "clarifications" },
1092
+ { type: "workflow", tag: "workflows" },
1093
+ { type: "quirk", tag: "quirks" },
1094
+ { type: "term", tag: "terminology" },
1095
+ { type: "explain", tag: "explanations" },
1096
+ { type: "analogy", tag: "analogies" },
1097
+ { type: "glossary", tag: "glossary" },
1098
+ { type: "example", tag: "examples" }
1099
+ ];
1100
+ function toTeachables(generated) {
1101
+ return generated.map((item) => {
1102
+ switch (item.type) {
1103
+ case "persona":
1104
+ return persona({ name: item.name, role: item.role, tone: item.tone });
1105
+ case "term":
1106
+ return term(item.name, item.definition);
1107
+ case "hint":
1108
+ return hint(item.text);
1109
+ case "guardrail":
1110
+ return guardrail({
1111
+ rule: item.rule,
1112
+ reason: item.reason,
1113
+ action: item.action
1114
+ });
1115
+ case "explain":
1116
+ return explain({
1117
+ concept: item.concept,
1118
+ explanation: item.explanation,
1119
+ therefore: item.therefore
1120
+ });
1121
+ case "example":
1122
+ return example({
1123
+ question: item.question,
1124
+ answer: item.answer,
1125
+ note: item.note
1126
+ });
1127
+ case "clarification":
1128
+ return clarification({
1129
+ when: item.when,
1130
+ ask: item.ask,
1131
+ reason: item.reason
1132
+ });
1133
+ case "workflow":
1134
+ return workflow({
1135
+ task: item.task,
1136
+ steps: item.steps,
1137
+ triggers: item.triggers,
1138
+ notes: item.notes
1139
+ });
1140
+ case "quirk":
1141
+ return quirk({
1142
+ issue: item.issue,
1143
+ workaround: item.workaround
1144
+ });
1145
+ case "styleGuide":
1146
+ return styleGuide({
1147
+ prefer: item.prefer,
1148
+ never: item.never,
1149
+ always: item.always
1150
+ });
1151
+ case "analogy":
1152
+ return analogy({
1153
+ concept: item.concept,
1154
+ relationship: item.relationship,
1155
+ insight: item.insight,
1156
+ therefore: item.therefore,
1157
+ pitfall: item.pitfall
1158
+ });
1159
+ case "glossary":
1160
+ return glossary(item.entries);
1161
+ // User-specific teachable types
1162
+ case "identity":
1163
+ return identity({ name: item.name, role: item.role });
1164
+ case "alias":
1165
+ return alias(item.term, item.meaning);
1166
+ case "preference":
1167
+ return preference(item.aspect, item.value);
1168
+ case "context":
1169
+ return context(item.description);
1170
+ case "correction":
1171
+ return correction(item.subject, item.clarification);
1172
+ }
1173
+ });
1174
+ }
1175
+
1176
+ // packages/text2sql/src/lib/agents/sql.agent.ts
1177
+ var RETRY_TEMPERATURES = [0, 0.2, 0.3];
1178
+ var sqlQueryAgent = agent5({
1179
+ name: "text2sql",
1180
+ model: groq5("openai/gpt-oss-20b"),
1181
+ logging: process.env.AGENT_LOGGING === "true",
1182
+ output: z5.union([
1183
+ z5.object({
1184
+ sql: z5.string().describe("The SQL query that answers the question"),
1185
+ reasoning: z5.string().optional().describe("The reasoning steps taken to generate the SQL")
1186
+ }),
1187
+ z5.object({
1188
+ error: z5.string().describe(
1189
+ "Error message explaining why the question cannot be answered with the given schema"
1190
+ )
1191
+ })
1192
+ ]),
1193
+ prompt: (state) => {
1194
+ return `
1195
+ ${state?.teachings || ""}
1196
+ ${state?.introspection || ""}
1197
+ `;
1198
+ }
1199
+ });
1200
+ function extractSql(output) {
1201
+ const match = output.match(/```sql\n?([\s\S]*?)```/);
1202
+ return match ? match[1].trim() : output.trim();
1203
+ }
1204
+ async function generateSql(params) {
1205
+ const {
1206
+ input,
1207
+ model,
1208
+ temperature,
1209
+ introspection,
1210
+ instructions,
1211
+ previousError
1212
+ } = params;
1213
+ const agentInstance = sqlQueryAgent.clone({
1214
+ model: wrapLanguageModel2({
1215
+ model,
1216
+ middleware: defaultSettingsMiddleware2({
1217
+ settings: { temperature, topP: 1 }
1218
+ })
1219
+ })
1220
+ });
1221
+ const messages = previousError ? [
1222
+ user6(input),
1223
+ user6(
1224
+ `<validation_error>Your previous SQL query had the following error: ${previousError}. Please fix the query.</validation_error>`
1225
+ )
1226
+ ] : [user6(input)];
1227
+ try {
1228
+ const { experimental_output: output } = await generate6(
1229
+ agentInstance,
1230
+ messages,
1231
+ {
1232
+ teachings: toInstructions(
1233
+ "instructions",
1234
+ persona({
1235
+ name: "Freya",
1236
+ role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema."
1237
+ }),
1238
+ ...instructions
1239
+ ),
1240
+ introspection
1241
+ }
1242
+ );
1243
+ if ("error" in output) {
1244
+ return { success: false, error: output.error, isUnanswerable: true };
1245
+ }
1246
+ return { success: true, sql: extractSql(output.sql) };
1247
+ } catch (error) {
1248
+ if (error instanceof Error && (error.message.includes("Failed to validate JSON") || error.message.includes("response did not match schema"))) {
1249
+ return {
1250
+ success: false,
1251
+ error: `Schema validation failed: ${error.message}`
1252
+ };
1253
+ }
1254
+ throw error;
1255
+ }
1256
+ }
1257
+ var sqlGenerators = {
1258
+ generateSql
1259
+ };
1260
+ async function generateAndValidate(options, temperature, previousError) {
1261
+ const result = await sqlGenerators.generateSql({
1262
+ input: options.input,
1263
+ model: options.model ?? sqlQueryAgent.model,
1264
+ temperature,
1265
+ introspection: options.introspection,
1266
+ instructions: options.instructions,
1267
+ previousError
1268
+ });
1269
+ if (!result.success) {
1270
+ return {
1271
+ ok: false,
1272
+ error: result.error,
1273
+ isUnanswerable: result.isUnanswerable
1274
+ };
1275
+ }
1276
+ const validationError = await options.adapter.validate(result.sql);
1277
+ if (validationError) {
1278
+ return { ok: false, error: validationError };
1279
+ }
1280
+ return { ok: true, sql: result.sql };
1281
+ }
1282
+ async function toSql(options) {
1283
+ const { maxRetries = 3 } = options;
1284
+ const errors = [];
1285
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
1286
+ const temperature = RETRY_TEMPERATURES[attempt - 1] ?? 0.3;
1287
+ const result = await generateAndValidate(
1288
+ options,
1289
+ temperature,
1290
+ errors.at(-1)
1291
+ );
1292
+ if (result.ok) {
1293
+ return {
1294
+ sql: result.sql,
1295
+ attempts: attempt,
1296
+ errors: errors.length ? errors : void 0
1297
+ };
1298
+ }
1299
+ if (result.isUnanswerable) {
1300
+ return { sql: "", attempts: attempt, errors: [result.error] };
1301
+ }
1302
+ errors.push(result.error);
1303
+ }
1304
+ return { sql: "", attempts: maxRetries, errors };
1305
+ }
1306
+
1307
+ // packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
1308
+ var SchemaSynthesizer = class extends PairProducer {
1309
+ /**
1310
+ * @param adapter - Database adapter for schema introspection and SQL validation
1311
+ * @param options - Synthesis configuration including count, complexity, and concurrency
1312
+ */
1313
+ constructor(adapter, options) {
1314
+ super();
1315
+ this.adapter = adapter;
1316
+ this.options = options;
1317
+ this.#complexities = Array.isArray(this.options.complexity) ? this.options.complexity : [this.options.complexity ?? "medium"];
1318
+ this.#personas = this.options.personas ?? [void 0];
1319
+ this.#limit = pLimit(this.options.concurrency ?? 5);
1320
+ }
1321
+ #complexities = [];
1322
+ #personas = [];
1323
+ #limit;
1324
+ /**
1325
+ * Generates question-SQL pairs by iterating through all persona × complexity combinations.
1326
+ * Uses parallel processing bounded by the configured concurrency limit.
1327
+ * Yields results as each combination completes (streaming pattern).
1328
+ * @returns Generated pairs from all combinations
1329
+ */
1330
+ async *produce() {
1331
+ const introspection = await this.adapter.introspect();
1332
+ const combinations = this.#personas.flatMap(
1333
+ (persona2) => this.#complexities.map((complexity) => ({ persona: persona2, complexity }))
1334
+ );
1335
+ for (const { persona: persona2, complexity } of combinations) {
1336
+ const pairs = await this.#processCombination(
1337
+ introspection,
1338
+ persona2,
1339
+ complexity
1340
+ );
1341
+ if (pairs.length) {
1342
+ yield pairs;
1343
+ }
1344
+ }
1345
+ }
1346
+ /**
1347
+ * Processes a single persona × complexity combination by generating questions
1348
+ * and converting each to SQL in parallel.
1349
+ */
1350
+ async #processCombination(introspection, persona2, complexity) {
1351
+ const personaContext = persona2 ? `As ${persona2.role}, ${persona2.perspective}
1352
+
1353
+ Generate questions this persona would ask.` : void 0;
1354
+ const prompt = personaContext ? `${personaContext}
1355
+
1356
+ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
1357
+ const { questions } = await this.#limit(
1358
+ () => generateQuestions({
1359
+ introspection,
1360
+ complexity,
1361
+ count: this.options.count,
1362
+ prompt,
1363
+ model: this.options.model
1364
+ })
1365
+ );
1366
+ const pairs = await Promise.all(
1367
+ questions.map(async (question) => {
1368
+ const result = await this.#limit(
1369
+ () => toSql({
1370
+ input: question,
1371
+ adapter: this.adapter,
1372
+ introspection,
1373
+ instructions: this.options.teachings ?? [],
1374
+ model: this.options.model
1375
+ })
1376
+ );
1377
+ return {
1378
+ question,
1379
+ sql: result.sql,
1380
+ success: !result.errors || result.errors.length === 0
1381
+ };
1382
+ })
1383
+ );
1384
+ return pairs;
1385
+ }
1386
+ };
1387
+
1388
+ // packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
1389
+ import { groq as groq6 } from "@ai-sdk/groq";
1390
+ import { defaultSettingsMiddleware as defaultSettingsMiddleware3, wrapLanguageModel as wrapLanguageModel3 } from "ai";
1391
+ import dedent5 from "dedent";
1392
+ import pLimit2 from "p-limit";
1393
+ import z6 from "zod";
1394
+ import { agent as agent6, generate as generate7, user as user7 } from "@deepagents/agent";
1395
+
1396
+ // packages/text2sql/src/lib/synthesis/synthesizers/styles.ts
1397
+ var styleInstructions = {
1398
+ formal: "Use professional business language, complete sentences, no slang",
1399
+ colloquial: "Use casual everyday speech, contractions, informal tone",
1400
+ imperative: 'Phrase as commands: "Show me...", "Get...", "List..."',
1401
+ interrogative: 'Phrase as questions: "What is...", "How many...", "Which..."',
1402
+ descriptive: "Use detailed, verbose phrasing with extra context",
1403
+ concise: "Use minimal words, telegram-style brevity",
1404
+ vague: "Be intentionally ambiguous, use hedging language",
1405
+ metaphorical: "Use figurative language, analogies, creative phrasing",
1406
+ conversational: "Chat-like tone, as if talking to a colleague"
1407
+ };
1408
+ var ALL_STYLES = [
1409
+ "formal",
1410
+ "colloquial",
1411
+ "imperative",
1412
+ "interrogative",
1413
+ "descriptive",
1414
+ "concise",
1415
+ "vague",
1416
+ "metaphorical",
1417
+ "conversational"
1418
+ ];
1419
+
1420
+ // packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
1421
+ var paraphraserAgent = agent6({
1422
+ name: "question_paraphraser",
1423
+ model: wrapLanguageModel3({
1424
+ model: groq6("openai/gpt-oss-20b"),
1425
+ middleware: defaultSettingsMiddleware3({
1426
+ settings: { temperature: 0.9, topP: 0.95, frequencyPenalty: 0.2 }
1427
+ })
1428
+ }),
1429
+ logging: process.env.AGENT_LOGGING === "true",
1430
+ output: z6.object({
1431
+ paraphrases: z6.array(
1432
+ z6.string().describe("A paraphrased version of the original question")
1433
+ ).min(1).describe(
1434
+ "List of paraphrased questions that would produce the same SQL"
1435
+ )
1436
+ }),
1437
+ prompt: (state) => {
1438
+ const personaInstruction = state?.persona ? dedent5`
1439
+ <persona role="${state.persona.role}">
1440
+ ${state.persona.perspective}
1441
+
1442
+ Paraphrase the question as this persona would naturally ask it.
1443
+ Use their vocabulary, priorities, and framing style.
1444
+ </persona>
1445
+ ` : "";
1446
+ const styleInstruction = state?.persona?.styles && state.persona.styles.length > 0 ? dedent5`
1447
+ <communication_styles>
1448
+ Generate paraphrases using these communication styles: ${state.persona.styles.join(", ")}
1449
+
1450
+ Style definitions:
1451
+ ${state.persona.styles.map((s) => `- ${s}: ${styleInstructions[s]}`).join("\n")}
1452
+
1453
+ Distribute paraphrases across these styles for variety.
1454
+ </communication_styles>
1455
+ ` : "";
1456
+ return dedent5`
1457
+ <identity>
1458
+ You are a linguistic expert specializing in paraphrasing database questions.
1459
+ Your task is to generate alternative phrasings of questions that preserve
1460
+ the exact same semantic meaning - they must all produce the identical SQL query.
1461
+ </identity>
1462
+
1463
+ <original_question>
1464
+ ${state?.question}
1465
+ </original_question>
1466
+
1467
+ <reference_sql>
1468
+ ${state?.sql}
1469
+ (This SQL shows what the question is really asking - all paraphrases must ask for exactly this)
1470
+ </reference_sql>
1471
+
1472
+ ${personaInstruction}
1473
+
1474
+ ${styleInstruction}
1475
+
1476
+ <task>
1477
+ Generate exactly ${state?.count} paraphrased versions of the original question.
1478
+
1479
+ Requirements:
1480
+ 1. Each paraphrase must be semantically equivalent - it should produce the EXACT same SQL
1481
+ 2. Vary the sentence structure, word choice, and phrasing style
1482
+ 3. Use natural language without SQL keywords (SELECT, WHERE, JOIN, etc.)
1483
+ 4. Keep paraphrases realistic - how actual users would ask
1484
+ 5. Do not add or remove any conditions, filters, or requirements from the original
1485
+ ${state?.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
1486
+ </task>
1487
+
1488
+ <guardrails>
1489
+ - NEVER change what data is being requested
1490
+ - NEVER add filters, aggregations, or conditions not in the original
1491
+ - NEVER remove any specificity from the original question
1492
+ - All paraphrases must be answerable by the exact same SQL query
1493
+ </guardrails>
1494
+ `;
1495
+ }
1496
+ });
1497
+ var BreadthEvolver = class extends PairProducer {
1498
+ /**
1499
+ * @param source - Source pairs or producer to evolve
1500
+ * @param options - Evolution options including count, persona, and concurrency
1501
+ */
1502
+ constructor(source, options) {
1503
+ super();
1504
+ this.source = source;
1505
+ this.options = options;
1506
+ this.#limit = pLimit2(this.options.concurrency ?? 4);
1507
+ }
1508
+ #limit;
1509
+ /**
1510
+ * Batch pairs within each chunk for concurrent processing.
1511
+ * Uses pLimit for concurrency control, yields results per pair after chunk completes.
1512
+ */
1513
+ async *produce() {
1514
+ for await (const chunk of this.from(this.source)) {
1515
+ const tasks = chunk.map(
1516
+ (pair) => this.#limit(async () => {
1517
+ const { experimental_output } = await generate7(
1518
+ paraphraserAgent.clone({ model: this.options.model }),
1519
+ [
1520
+ user7(
1521
+ `Paraphrase this question ${this.options.count} times: "${pair.question}"`
1522
+ )
1523
+ ],
1524
+ {
1525
+ question: pair.question,
1526
+ sql: pair.sql,
1527
+ count: this.options.count,
1528
+ persona: this.options.persona
1529
+ }
1530
+ );
1531
+ return experimental_output.paraphrases.map((paraphrase) => ({
1532
+ question: paraphrase,
1533
+ sql: pair.sql,
1534
+ context: pair.context,
1535
+ success: pair.success
1536
+ }));
1537
+ })
1538
+ );
1539
+ const results = await Promise.all(tasks);
1540
+ yield results.flat();
1541
+ }
1542
+ }
1543
+ };
1544
+
1545
+ // packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
1546
+ import { groq as groq7 } from "@ai-sdk/groq";
1547
+ import {
1548
+ NoObjectGeneratedError,
1549
+ NoOutputGeneratedError,
1550
+ defaultSettingsMiddleware as defaultSettingsMiddleware4,
1551
+ wrapLanguageModel as wrapLanguageModel4
1552
+ } from "ai";
1553
+ import dedent6 from "dedent";
1554
+ import pLimit3 from "p-limit";
1555
+ import pRetry from "p-retry";
1556
+ import z7 from "zod";
1557
+ import { agent as agent7, generate as generate8, user as user8 } from "@deepagents/agent";
1558
+ var techniqueInstructions = {
1559
+ "add-aggregation": dedent6`
1560
+ Add aggregation requirements to the question.
1561
+ Transform it to require GROUP BY, COUNT, SUM, AVG, MIN, MAX, or similar operations.
1562
+ Examples:
1563
+ - "Show orders" → "Show total order count by customer"
1564
+ - "List products" → "What is the average price per category?"
1565
+ - "Get employees" → "How many employees are in each department?"
1566
+ `,
1567
+ "add-filter": dedent6`
1568
+ Add filtering conditions to the question.
1569
+ Transform it to require WHERE clauses with specific conditions.
1570
+ Examples:
1571
+ - "Show orders" → "Show orders from the last 30 days"
1572
+ - "List customers" → "List customers who have made more than 5 purchases"
1573
+ - "Get products" → "Get products with price above $100"
1574
+ `,
1575
+ "add-join": dedent6`
1576
+ Add requirements that need data from related tables.
1577
+ Transform it to require JOIN operations between multiple tables.
1578
+ Examples:
1579
+ - "Show orders" → "Show orders with customer names and addresses"
1580
+ - "List products" → "List products with their supplier information"
1581
+ - "Get employees" → "Get employees with their department and manager names"
1582
+ `,
1583
+ "add-reasoning": dedent6`
1584
+ Add multi-step reasoning requirements.
1585
+ Transform it to require logical deduction, comparisons, or derived calculations.
1586
+ Examples:
1587
+ - "Show orders" → "Which customers have orders above the average order value?"
1588
+ - "List products" → "Which products are underperforming compared to their category average?"
1589
+ - "Get revenue" → "Which month had the highest growth compared to the previous month?"
1590
+ `,
1591
+ hypothetical: dedent6`
1592
+ Add a hypothetical or speculative scenario.
1593
+ Transform it to require applying calculations or projections.
1594
+ Examples:
1595
+ - "Show revenue" → "What would revenue be if we increased all prices by 15%?"
1596
+ - "List inventory" → "How many days of stock remain at current sales rate?"
1597
+ - "Get costs" → "What would be the impact of a 10% discount on profit margins?"
1598
+ `
1599
+ };
1600
+ var questionEvolverAgent = agent7({
1601
+ name: "question_evolver",
1602
+ model: wrapLanguageModel4({
1603
+ model: groq7("openai/gpt-oss-20b"),
1604
+ middleware: defaultSettingsMiddleware4({
1605
+ settings: { temperature: 0.7, topP: 0.95 }
1606
+ })
1607
+ }),
1608
+ output: z7.object({
1609
+ evolvedQuestion: z7.string().describe("The evolved, more complex version of the original question")
1610
+ }),
1611
+ prompt: (state) => {
1612
+ return dedent6`
1613
+ <identity>
1614
+ You are an expert at evolving simple database questions into more complex ones.
1615
+ Your task is to take a basic question and transform it into a more sophisticated
1616
+ version that requires advanced SQL techniques to answer.
1617
+ </identity>
1618
+
1619
+ <original_question>
1620
+ ${state?.question}
1621
+ </original_question>
1622
+
1623
+ <original_sql>
1624
+ ${state?.sql}
1625
+ (This shows what the original question required)
1626
+ </original_sql>
1627
+
1628
+ <database_schema>
1629
+ ${state?.schema}
1630
+ </database_schema>
1631
+
1632
+ <technique name="${state?.technique}">
1633
+ ${state?.techniqueInstruction}
1634
+ </technique>
1635
+
1636
+ <task>
1637
+ Evolve the original question using the "${state?.technique}" technique.
1638
+
1639
+ Requirements:
1640
+ 1. The evolved question must be MORE COMPLEX than the original
1641
+ 2. Apply the specific technique described above
1642
+ 3. The evolved question must be answerable using the provided schema
1643
+ 4. Use natural language - no SQL keywords
1644
+ 5. Keep the question realistic and practical
1645
+ 6. The evolved question should build upon the original topic/domain
1646
+ </task>
1647
+
1648
+ <guardrails>
1649
+ - The evolved question MUST require more complex SQL than the original
1650
+ - Do not ask for data that doesn't exist in the schema
1651
+ - Keep the question grounded in the same domain as the original
1652
+ - Make sure the question is clear and unambiguous
1653
+ </guardrails>
1654
+ `;
1655
+ }
1656
+ });
1657
+ var ALL_TECHNIQUES = [
1658
+ "add-aggregation",
1659
+ "add-filter",
1660
+ "add-join",
1661
+ "add-reasoning",
1662
+ "hypothetical"
1663
+ ];
1664
+ var DepthEvolver = class extends PairProducer {
1665
+ /**
1666
+ * @param source - Source pairs or producer to evolve
1667
+ * @param adapter - Database adapter for SQL generation
1668
+ * @param options - Evolution options including techniques, count, and concurrency
1669
+ */
1670
+ constructor(source, adapter, options) {
1671
+ super();
1672
+ this.source = source;
1673
+ this.adapter = adapter;
1674
+ this.options = options;
1675
+ this.#limit = pLimit3(this.options?.concurrency ?? 4);
1676
+ }
1677
+ #limit;
1678
+ /**
1679
+ * Yields evolved pairs as each completes (streaming pattern).
1680
+ * Removes batch barrier - no longer waits for all evolutions before yielding.
1681
+ */
1682
+ async *produce() {
1683
+ const introspection = await this.adapter.introspect();
1684
+ const count = this.options?.count ?? 1;
1685
+ const techniques = this.options?.techniques ?? ALL_TECHNIQUES;
1686
+ let pairIndex = 0;
1687
+ for await (const chunk of this.from(this.source)) {
1688
+ for (const pair of chunk) {
1689
+ const tasks = Array.from({ length: count }, (_, i) => {
1690
+ const technique = this.options?.techniques ? techniques[i % techniques.length] : techniques[(pairIndex * count + i) % techniques.length];
1691
+ return this.#limit(
1692
+ () => this.#processTask(pair, technique, introspection)
1693
+ );
1694
+ });
1695
+ const results = await Promise.all(tasks);
1696
+ yield results;
1697
+ pairIndex++;
1698
+ }
1699
+ }
1700
+ }
1701
+ async #processTask(pair, technique, introspection) {
1702
+ const { experimental_output } = await withRetry(
1703
+ () => generate8(
1704
+ questionEvolverAgent.clone({
1705
+ model: this.options?.model
1706
+ }),
1707
+ [user8(`Evolve this question using "${technique}": "${pair.question}"`)],
1708
+ {
1709
+ question: pair.question,
1710
+ sql: pair.sql,
1711
+ schema: introspection,
1712
+ technique,
1713
+ techniqueInstruction: techniqueInstructions[technique]
1714
+ }
1715
+ )
1716
+ );
1717
+ const evolvedQuestion = experimental_output.evolvedQuestion;
1718
+ const sqlResult = await toSql({
1719
+ input: evolvedQuestion,
1720
+ adapter: this.adapter,
1721
+ introspection,
1722
+ instructions: [],
1723
+ model: this.options?.model
1724
+ });
1725
+ return {
1726
+ question: evolvedQuestion,
1727
+ sql: sqlResult.sql,
1728
+ context: pair.context,
1729
+ success: !sqlResult.errors || sqlResult.errors.length === 0
1730
+ };
1731
+ }
1732
+ };
1733
+ async function withRetry(computation) {
1734
+ return pRetry(computation, {
1735
+ retries: 3,
1736
+ shouldRetry: (context2) => {
1737
+ return NoObjectGeneratedError.isInstance(context2.error) || NoOutputGeneratedError.isInstance(context2.error);
1738
+ },
1739
+ onFailedAttempt(context2) {
1740
+ console.log(
1741
+ `Attempt ${context2.attemptNumber} failed. There are ${context2.retriesLeft} retries left.`
1742
+ );
1743
+ console.error(context2.error);
1744
+ }
1745
+ });
1746
+ }
1747
+
1748
+ // packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
1749
+ import { groq as groq8 } from "@ai-sdk/groq";
1750
+ import { defaultSettingsMiddleware as defaultSettingsMiddleware5, wrapLanguageModel as wrapLanguageModel5 } from "ai";
1751
+ import dedent7 from "dedent";
1752
+ import z8 from "zod";
1753
+ import { agent as agent8, generate as generate9, user as user9 } from "@deepagents/agent";
1754
+ var personaGeneratorAgent = agent8({
1755
+ name: "persona_generator",
1756
+ model: wrapLanguageModel5({
1757
+ model: groq8("openai/gpt-oss-20b"),
1758
+ middleware: defaultSettingsMiddleware5({
1759
+ settings: { temperature: 0.8, topP: 0.95, presencePenalty: 0.2 }
1760
+ })
1761
+ }),
1762
+ logging: process.env.AGENT_LOGGING === "true",
1763
+ output: z8.object({
1764
+ personas: z8.array(
1765
+ z8.object({
1766
+ role: z8.string().describe("The job title or role of this persona"),
1767
+ perspective: z8.string().describe(
1768
+ "Rich description of what this persona cares about when querying the database"
1769
+ ),
1770
+ styles: z8.array(z8.enum(ALL_STYLES)).min(1).max(3).describe(
1771
+ "Typical communication styles for this persona (1-3 styles)"
1772
+ )
1773
+ })
1774
+ ).min(1).describe("List of personas who would query this database")
1775
+ }),
1776
+ prompt: (state) => {
1777
+ return dedent7`
1778
+ <identity>
1779
+ You are an expert at understanding database schemas and inferring who would use them.
1780
+ Your task is to analyze a database schema and generate realistic personas representing
1781
+ the different types of users who would query this database.
1782
+ </identity>
1783
+
1784
+ <database_schema>
1785
+ ${state?.schema}
1786
+ </database_schema>
1787
+
1788
+ <task>
1789
+ Generate exactly ${state?.count} distinct personas who would query this database.
1790
+
1791
+ For each persona, provide:
1792
+ 1. **role**: Their job title or role (e.g., "Financial Analyst", "Customer Support Rep")
1793
+ 2. **perspective**: A rich description of what they care about, including:
1794
+ - What questions they typically ask
1795
+ - What metrics/data points matter to them
1796
+ - How they prefer data formatted or presented
1797
+ - Their priorities (speed vs accuracy, detail vs summary)
1798
+ - Domain-specific concerns relevant to their role
1799
+ 3. **styles**: 1-3 communication styles typical for this persona. Choose from:
1800
+ - formal: Professional business language, complete sentences
1801
+ - colloquial: Casual everyday speech, contractions
1802
+ - imperative: Commands like "Show me...", "Get...", "List..."
1803
+ - interrogative: Questions like "What is...", "How many..."
1804
+ - descriptive: Verbose, detailed phrasing
1805
+ - concise: Brief, minimal words
1806
+ - vague: Ambiguous, hedging language
1807
+ - metaphorical: Figurative language, analogies
1808
+ - conversational: Chat-like, casual tone
1809
+
1810
+ Requirements:
1811
+ - Personas should be realistic for the given schema
1812
+ - Each persona should have distinct concerns and priorities
1813
+ - Perspectives should be detailed enough to guide question paraphrasing
1814
+ - Cover different levels of technical expertise (some technical, some business-focused)
1815
+ - Styles should match how this persona would naturally communicate
1816
+ </task>
1817
+
1818
+ <example>
1819
+ For an e-commerce schema with orders, customers, products tables:
1820
+
1821
+ {
1822
+ "role": "Customer Support Rep",
1823
+ "perspective": "As customer support, I care about:\\n- Quick lookups by order ID or customer email\\n- Order status and shipping tracking\\n- Return and refund history\\n- Customer contact details and order history\\n- I need fast answers, not complex analysis",
1824
+ "styles": ["imperative", "concise"]
1825
+ }
1826
+
1827
+ {
1828
+ "role": "Inventory Manager",
1829
+ "perspective": "As inventory manager, I care about:\\n- Current stock levels and reorder points\\n- Product availability across warehouses\\n- Slow-moving inventory identification\\n- Supplier lead times and pending orders\\n- I need accurate counts, often aggregated by location",
1830
+ "styles": ["formal", "interrogative"]
1831
+ }
1832
+ </example>
1833
+
1834
+ <guardrails>
1835
+ - Only generate personas relevant to the actual schema provided
1836
+ - Do not invent tables or data that don't exist in the schema
1837
+ - Ensure perspectives are specific to the domain, not generic
1838
+ </guardrails>
1839
+ `;
1840
+ }
1841
+ });
1842
+ var PersonaGenerator = class {
1843
+ /**
1844
+ * @param adapter - Database adapter for schema introspection
1845
+ * @param options - Generation options including count and model
1846
+ */
1847
+ constructor(adapter, options) {
1848
+ this.adapter = adapter;
1849
+ this.options = options;
1850
+ }
1851
+ /**
1852
+ * Generates personas by analyzing the database schema to infer user types.
1853
+ * @returns Array of personas with roles and perspectives
1854
+ */
1855
+ async generate() {
1856
+ const schema = await this.adapter.introspect();
1857
+ const count = this.options?.count ?? 5;
1858
+ const { experimental_output } = await generate9(
1859
+ personaGeneratorAgent.clone({
1860
+ model: this.options?.model
1861
+ }),
1862
+ [user9(`Generate ${count} personas for this database schema.`)],
1863
+ {
1864
+ schema,
1865
+ count
1866
+ }
1867
+ );
1868
+ return experimental_output.personas;
1869
+ }
1870
+ };
1871
+
1872
+ // packages/text2sql/src/lib/agents/teachables.agent.ts
1873
+ import { groq as groq9 } from "@ai-sdk/groq";
1874
+ import { defaultSettingsMiddleware as defaultSettingsMiddleware6, wrapLanguageModel as wrapLanguageModel6 } from "ai";
1875
+ import dedent8 from "dedent";
1876
+ import z9 from "zod";
1877
+ import { agent as agent9, generate as generate10, user as user10 } from "@deepagents/agent";
1878
+ var outputSchema = z9.object({
1879
+ terms: z9.array(z9.object({ name: z9.string(), definition: z9.string() })).optional().describe("Domain terminology definitions"),
1880
+ hints: z9.array(z9.object({ text: z9.string() })).optional().describe("Helpful hints for SQL generation"),
1881
+ guardrails: z9.array(
1882
+ z9.object({
1883
+ rule: z9.string(),
1884
+ reason: z9.string().optional(),
1885
+ action: z9.string().optional()
1886
+ })
1887
+ ).optional().describe("Safety rules and constraints"),
1888
+ explains: z9.array(
1889
+ z9.object({
1890
+ concept: z9.string(),
1891
+ explanation: z9.string(),
1892
+ therefore: z9.string().optional()
1893
+ })
1894
+ ).optional().describe("Concept explanations"),
1895
+ examples: z9.array(
1896
+ z9.object({
1897
+ question: z9.string(),
1898
+ answer: z9.string(),
1899
+ note: z9.string().optional()
1900
+ })
1901
+ ).optional().describe("Example question-answer pairs"),
1902
+ clarifications: z9.array(z9.object({ when: z9.string(), ask: z9.string(), reason: z9.string() })).optional().describe("When to ask for clarification"),
1903
+ workflows: z9.array(
1904
+ z9.object({
1905
+ task: z9.string(),
1906
+ steps: z9.array(z9.string()).min(1),
1907
+ triggers: z9.array(z9.string()).optional(),
1908
+ notes: z9.string().optional()
1909
+ })
1910
+ ).optional().describe("Multi-step workflows"),
1911
+ quirks: z9.array(z9.object({ issue: z9.string(), workaround: z9.string() })).optional().describe("Known issues and workarounds"),
1912
+ styleGuides: z9.array(
1913
+ z9.object({
1914
+ prefer: z9.string(),
1915
+ never: z9.string().optional(),
1916
+ always: z9.string().optional()
1917
+ })
1918
+ ).optional().describe("SQL style preferences"),
1919
+ analogies: z9.array(
1920
+ z9.object({
1921
+ concept: z9.array(z9.string()).min(2),
1922
+ relationship: z9.string(),
1923
+ insight: z9.string().optional(),
1924
+ therefore: z9.string().optional(),
1925
+ pitfall: z9.string().optional()
1926
+ })
1927
+ ).optional().describe("Concept analogies")
1928
+ });
1929
+ var teachablesAuthorAgent = agent9({
1930
+ name: "teachables-author",
1931
+ model: wrapLanguageModel6({
1932
+ model: groq9("openai/gpt-oss-20b"),
1933
+ middleware: defaultSettingsMiddleware6({
1934
+ settings: { temperature: 0.4, topP: 0.95 }
1935
+ })
1936
+ }),
1937
+ output: outputSchema,
1938
+ prompt: (state) => dedent8`
1939
+ <identity>
1940
+ You design "teachables" for a Text2SQL system. Teachables become structured XML instructions.
1941
+ Choose only high-impact items that improve accuracy, safety, or clarity for this database.
1942
+ </identity>
1943
+
1944
+ <database_schema>
1945
+ ${state?.schema}
1946
+ </database_schema>
1947
+
1948
+ ${state?.context ? `<additional_context>${state.context}</additional_context>` : ""}
1949
+
1950
+ <output_structure>
1951
+ Output a JSON object with these optional arrays (include only relevant ones):
1952
+ - terms: [{ name: string, definition: string }] - Domain terminology
1953
+ - hints: [{ text: string }] - Helpful SQL generation hints
1954
+ - guardrails: [{ rule: string, reason?: string, action?: string }] - Safety constraints
1955
+ - explains: [{ concept: string, explanation: string, therefore?: string }] - Concept explanations
1956
+ - examples: [{ question: string, answer: string, note?: string }] - Q&A examples
1957
+ - clarifications: [{ when: string, ask: string, reason: string }] - Clarification triggers
1958
+ - workflows: [{ task: string, steps: string[], triggers?: string[], notes?: string }] - Multi-step tasks
1959
+ - quirks: [{ issue: string, workaround: string }] - Known issues
1960
+ - styleGuides: [{ prefer: string, never?: string, always?: string }] - SQL style rules
1961
+ - analogies: [{ concept: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
1962
+ </output_structure>
1963
+
1964
+ <instructions>
1965
+ 1. Analyze the schema to infer domain, relationships, and sensitive columns.
1966
+ 2. Generate 3-10 teachables total across all categories, prioritizing:
1967
+ - guardrails for PII columns (email, ssn, phone, etc)
1968
+ - hints for status/enum columns
1969
+ - clarifications for ambiguous terms
1970
+ 3. Ground everything in the schema - do not invent tables/columns.
1971
+ 4. Only include categories that are relevant to this schema.
1972
+ </instructions>
1973
+ `
1974
+ });
1975
+ async function toTeachings(input, options) {
1976
+ const { experimental_output: result } = await generate10(
1977
+ teachablesAuthorAgent.clone({ model: options?.model }),
1978
+ [
1979
+ user10(
1980
+ `Analyze this database schema and generate teachings that will help an AI generate accurate SQL queries.`
1981
+ )
1982
+ ],
1983
+ input
1984
+ );
1985
+ const generated = [
1986
+ ...result.terms?.map((t) => ({ type: "term", ...t })) ?? [],
1987
+ ...result.hints?.map((h) => ({ type: "hint", ...h })) ?? [],
1988
+ ...result.guardrails?.map((g) => ({ type: "guardrail", ...g })) ?? [],
1989
+ ...result.explains?.map((e) => ({ type: "explain", ...e })) ?? [],
1990
+ ...result.examples?.map((e) => ({ type: "example", ...e })) ?? [],
1991
+ ...result.clarifications?.map((c) => ({
1992
+ type: "clarification",
1993
+ ...c
1994
+ })) ?? [],
1995
+ ...result.workflows?.map((w) => ({ type: "workflow", ...w })) ?? [],
1996
+ ...result.quirks?.map((q) => ({ type: "quirk", ...q })) ?? [],
1997
+ ...result.styleGuides?.map((s) => ({
1998
+ type: "styleGuide",
1999
+ ...s
2000
+ })) ?? [],
2001
+ ...result.analogies?.map((a) => ({ type: "analogy", ...a })) ?? []
2002
+ ];
2003
+ return toTeachables(generated);
2004
+ }
2005
+
2006
+ // packages/text2sql/src/lib/synthesis/synthesizers/teachings-generator.ts
2007
+ var TeachingsGenerator = class {
2008
+ /**
2009
+ * @param adapter - Database adapter for schema introspection
2010
+ * @param options - Generation options including context and model
2011
+ */
2012
+ constructor(adapter, options) {
2013
+ this.adapter = adapter;
2014
+ this.options = options;
2015
+ }
2016
+ /**
2017
+ * Generates domain-specific teachings by analyzing the database schema.
2018
+ * Retries on transient generation errors up to maxRetries attempts.
2019
+ * @param maxRetries - Maximum retry attempts for transient failures
2020
+ * @returns Array of teachings including vocabulary, patterns, and guardrails
2021
+ */
2022
+ async generate(maxRetries = 3) {
2023
+ const schema = await this.adapter.introspect();
2024
+ let lastError;
2025
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
2026
+ try {
2027
+ return await toTeachings(
2028
+ {
2029
+ schema,
2030
+ context: this.options?.context
2031
+ },
2032
+ { model: this.options?.model }
2033
+ );
2034
+ } catch (error) {
2035
+ lastError = error;
2036
+ const isRetryable = lastError.message.includes("parse") || lastError.message.includes("schema") || lastError.message.includes("No object generated") || lastError.name.includes("AI_");
2037
+ if (!isRetryable) {
2038
+ throw lastError;
2039
+ }
2040
+ }
2041
+ }
2042
+ throw lastError;
2043
+ }
2044
+ };
2045
+ export {
2046
+ ALL_STYLES,
2047
+ BaseContextualExtractor,
2048
+ BreadthEvolver,
2049
+ DeduplicatedProducer,
2050
+ DepthEvolver,
2051
+ FilteredProducer,
2052
+ FullContextExtractor,
2053
+ LastQueryExtractor,
2054
+ MessageExtractor,
2055
+ PairProducer,
2056
+ PersonaGenerator,
2057
+ SchemaSynthesizer,
2058
+ SegmentedContextExtractor,
2059
+ SqlExtractor,
2060
+ TeachingsGenerator,
2061
+ ValidatedProducer,
2062
+ WindowedContextExtractor,
2063
+ contextResolverAgent,
2064
+ formatConversation,
2065
+ getMessageText,
2066
+ styleInstructions,
2067
+ toPairs
2068
+ };
2069
+ //# sourceMappingURL=index.js.map