@deepagents/text2sql 0.3.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +167 -0
- package/dist/finetune/convert-to-gguf.d.ts +18 -0
- package/dist/finetune/convert-to-gguf.d.ts.map +1 -0
- package/dist/finetune/run-finetune.d.ts +23 -0
- package/dist/finetune/run-finetune.d.ts.map +1 -0
- package/dist/finetune/run-mlx.d.ts +22 -0
- package/dist/finetune/run-mlx.d.ts.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2500 -318
- package/dist/index.js.map +4 -4
- package/dist/lib/adapters/adapter.d.ts +3 -3
- package/dist/lib/adapters/adapter.d.ts.map +1 -1
- package/dist/lib/adapters/{grounding.ticket.d.ts → groundings/abstract.grounding.d.ts} +2 -2
- package/dist/lib/adapters/groundings/abstract.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/groundings/column-stats.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/column-stats.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/column-values.grounding.d.ts +76 -0
- package/dist/lib/adapters/groundings/column-values.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/groundings/constraint.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/constraint.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/context.d.ts +1 -1
- package/dist/lib/adapters/groundings/context.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/{grounding.d.ts → index.d.ts} +8 -5
- package/dist/lib/adapters/groundings/index.d.ts.map +1 -0
- package/dist/lib/adapters/groundings/{grounding.js → index.js} +411 -206
- package/dist/lib/adapters/groundings/index.js.map +7 -0
- package/dist/lib/adapters/groundings/indexes.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/indexes.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/info.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/info.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/report.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/report.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/row-count.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/row-count.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/table.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/table.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/view.grounding.d.ts +1 -1
- package/dist/lib/adapters/groundings/view.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/mysql/column-stats.mysql.grounding.d.ts +14 -0
- package/dist/lib/adapters/mysql/column-stats.mysql.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/column-values.mysql.grounding.d.ts +22 -0
- package/dist/lib/adapters/mysql/column-values.mysql.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/constraint.mysql.grounding.d.ts +13 -0
- package/dist/lib/adapters/mysql/constraint.mysql.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/index.d.ts +44 -0
- package/dist/lib/adapters/mysql/index.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/indexes.mysql.grounding.d.ts +13 -0
- package/dist/lib/adapters/mysql/indexes.mysql.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/info.mysql.grounding.d.ts +13 -0
- package/dist/lib/adapters/mysql/info.mysql.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/mysql.d.ts +33 -0
- package/dist/lib/adapters/mysql/mysql.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/row-count.mysql.grounding.d.ts +13 -0
- package/dist/lib/adapters/mysql/row-count.mysql.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/table.mysql.grounding.d.ts +21 -0
- package/dist/lib/adapters/mysql/table.mysql.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/mysql/view.mysql.grounding.d.ts +18 -0
- package/dist/lib/adapters/mysql/view.mysql.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/postgres/column-stats.postgres.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/postgres/column-values.postgres.grounding.d.ts +17 -0
- package/dist/lib/adapters/postgres/column-values.postgres.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/postgres/index.d.ts +4 -4
- package/dist/lib/adapters/postgres/index.d.ts.map +1 -1
- package/dist/lib/adapters/postgres/index.js +233 -33
- package/dist/lib/adapters/postgres/index.js.map +4 -4
- package/dist/lib/adapters/sqlite/column-values.sqlite.grounding.d.ts +17 -0
- package/dist/lib/adapters/sqlite/column-values.sqlite.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/sqlite/constraint.sqlite.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/sqlite/index.d.ts +4 -4
- package/dist/lib/adapters/sqlite/index.d.ts.map +1 -1
- package/dist/lib/adapters/sqlite/index.js +214 -46
- package/dist/lib/adapters/sqlite/index.js.map +4 -4
- package/dist/lib/adapters/sqlserver/column-values.sqlserver.grounding.d.ts +17 -0
- package/dist/lib/adapters/sqlserver/column-values.sqlserver.grounding.d.ts.map +1 -0
- package/dist/lib/adapters/sqlserver/index.d.ts +4 -4
- package/dist/lib/adapters/sqlserver/index.d.ts.map +1 -1
- package/dist/lib/adapters/sqlserver/index.js +179 -32
- package/dist/lib/adapters/sqlserver/index.js.map +4 -4
- package/dist/lib/agents/bi.agent.d.ts +14 -0
- package/dist/lib/agents/bi.agent.d.ts.map +1 -0
- package/dist/lib/agents/chat1.agent.d.ts +50 -0
- package/dist/lib/agents/chat1.agent.d.ts.map +1 -0
- package/dist/lib/agents/chat2.agent.d.ts +68 -0
- package/dist/lib/agents/chat2.agent.d.ts.map +1 -0
- package/dist/lib/agents/chat3.agent.d.ts +80 -0
- package/dist/lib/agents/chat3.agent.d.ts.map +1 -0
- package/dist/lib/agents/chat4.agent.d.ts +88 -0
- package/dist/lib/agents/chat4.agent.d.ts.map +1 -0
- package/dist/lib/agents/developer.agent.d.ts +31 -0
- package/dist/lib/agents/developer.agent.d.ts.map +1 -0
- package/dist/lib/agents/question.agent.d.ts +23 -0
- package/dist/lib/agents/question.agent.d.ts.map +1 -0
- package/dist/lib/agents/sql.agent.d.ts +44 -0
- package/dist/lib/agents/sql.agent.d.ts.map +1 -0
- package/dist/lib/agents/teachables.agent.d.ts +8 -9
- package/dist/lib/agents/teachables.agent.d.ts.map +1 -1
- package/dist/lib/agents/text2sql.agent.d.ts +0 -1
- package/dist/lib/agents/text2sql.agent.d.ts.map +1 -1
- package/dist/lib/checkpoint.d.ts +99 -0
- package/dist/lib/checkpoint.d.ts.map +1 -0
- package/dist/lib/instructions.js +50 -21
- package/dist/lib/instructions.js.map +2 -2
- package/dist/lib/sql.d.ts +125 -3
- package/dist/lib/sql.d.ts.map +1 -1
- package/dist/lib/syntheize.d.ts +2 -0
- package/dist/lib/syntheize.d.ts.map +1 -0
- package/dist/lib/synthesis/decorators/deduplicated-producer.d.ts +26 -0
- package/dist/lib/synthesis/decorators/deduplicated-producer.d.ts.map +1 -0
- package/dist/lib/synthesis/decorators/filtered-producer.d.ts +26 -0
- package/dist/lib/synthesis/decorators/filtered-producer.d.ts.map +1 -0
- package/dist/lib/synthesis/decorators/index.d.ts +7 -0
- package/dist/lib/synthesis/decorators/index.d.ts.map +1 -0
- package/dist/lib/synthesis/decorators/validated-producer.d.ts +33 -0
- package/dist/lib/synthesis/decorators/validated-producer.d.ts.map +1 -0
- package/dist/lib/synthesis/extractors/base-contextual-extractor.d.ts +76 -0
- package/dist/lib/synthesis/extractors/base-contextual-extractor.d.ts.map +1 -0
- package/dist/lib/synthesis/extractors/full-context-extractor.d.ts +25 -0
- package/dist/lib/synthesis/extractors/full-context-extractor.d.ts.map +1 -0
- package/dist/lib/synthesis/extractors/index.d.ts +8 -0
- package/dist/lib/synthesis/extractors/index.d.ts.map +1 -0
- package/dist/lib/synthesis/extractors/last-query-extractor.d.ts +30 -0
- package/dist/lib/synthesis/extractors/last-query-extractor.d.ts.map +1 -0
- package/dist/lib/synthesis/extractors/message-extractor.d.ts +27 -0
- package/dist/lib/synthesis/extractors/message-extractor.d.ts.map +1 -0
- package/dist/lib/synthesis/extractors/segmented-context-extractor.d.ts +48 -0
- package/dist/lib/synthesis/extractors/segmented-context-extractor.d.ts.map +1 -0
- package/dist/lib/synthesis/extractors/sql-extractor.d.ts +27 -0
- package/dist/lib/synthesis/extractors/sql-extractor.d.ts.map +1 -0
- package/dist/lib/synthesis/extractors/windowed-context-extractor.d.ts +30 -0
- package/dist/lib/synthesis/extractors/windowed-context-extractor.d.ts.map +1 -0
- package/dist/lib/synthesis/index.d.ts +6 -0
- package/dist/lib/synthesis/index.d.ts.map +1 -0
- package/dist/lib/synthesis/index.js +2172 -0
- package/dist/lib/synthesis/index.js.map +7 -0
- package/dist/lib/synthesis/synthesizers/breadth-evolver.d.ts +34 -0
- package/dist/lib/synthesis/synthesizers/breadth-evolver.d.ts.map +1 -0
- package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts +41 -0
- package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -0
- package/dist/lib/synthesis/synthesizers/index.d.ts +7 -0
- package/dist/lib/synthesis/synthesizers/index.d.ts.map +1 -0
- package/dist/lib/synthesis/synthesizers/persona-generator.d.ts +34 -0
- package/dist/lib/synthesis/synthesizers/persona-generator.d.ts.map +1 -0
- package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts +39 -0
- package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -0
- package/dist/lib/synthesis/synthesizers/styles.d.ts +8 -0
- package/dist/lib/synthesis/synthesizers/styles.d.ts.map +1 -0
- package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts +32 -0
- package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts.map +1 -0
- package/dist/lib/synthesis/types.d.ts +26 -0
- package/dist/lib/synthesis/types.d.ts.map +1 -0
- package/dist/lib/teach/teachables.d.ts +18 -3
- package/dist/lib/teach/teachables.d.ts.map +1 -1
- package/dist/lib/teach/teachings.d.ts +9 -2
- package/dist/lib/teach/teachings.d.ts.map +1 -1
- package/package.json +38 -15
- package/dist/lib/adapters/grounding.ticket.d.ts.map +0 -1
- package/dist/lib/adapters/groundings/grounding.d.ts.map +0 -1
- package/dist/lib/adapters/groundings/grounding.js.map +0 -7
- package/dist/lib/adapters/groundings/low-cardinality.grounding.d.ts +0 -35
- package/dist/lib/adapters/groundings/low-cardinality.grounding.d.ts.map +0 -1
- package/dist/lib/adapters/postgres/low-cardinality.postgres.grounding.d.ts +0 -14
- package/dist/lib/adapters/postgres/low-cardinality.postgres.grounding.d.ts.map +0 -1
- package/dist/lib/adapters/sqlite/low-cardinality.sqlite.grounding.d.ts +0 -14
- package/dist/lib/adapters/sqlite/low-cardinality.sqlite.grounding.d.ts.map +0 -1
- package/dist/lib/adapters/sqlserver/low-cardinality.sqlserver.grounding.d.ts +0 -14
- package/dist/lib/adapters/sqlserver/low-cardinality.sqlserver.grounding.d.ts.map +0 -1
|
@@ -0,0 +1,2172 @@
|
|
|
1
|
+
// packages/text2sql/src/lib/synthesis/types.ts
|
|
2
|
+
var PairProducer = class {
|
|
3
|
+
from(producer) {
|
|
4
|
+
return Array.isArray(producer) ? (async function* (pairs) {
|
|
5
|
+
yield pairs;
|
|
6
|
+
})(producer) : producer.produce();
|
|
7
|
+
}
|
|
8
|
+
toPairs() {
|
|
9
|
+
return toPairs(this);
|
|
10
|
+
}
|
|
11
|
+
};
|
|
12
|
+
async function toPairs(producer) {
|
|
13
|
+
const pairs = [];
|
|
14
|
+
for await (const chunk of producer.produce()) {
|
|
15
|
+
pairs.push(...chunk);
|
|
16
|
+
}
|
|
17
|
+
return pairs;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// packages/text2sql/src/lib/synthesis/decorators/filtered-producer.ts
|
|
21
|
+
var FilteredProducer = class extends PairProducer {
|
|
22
|
+
/**
|
|
23
|
+
* @param producer - Source producer to filter
|
|
24
|
+
* @param options - Filter configuration
|
|
25
|
+
*/
|
|
26
|
+
constructor(producer, options = {}) {
|
|
27
|
+
super();
|
|
28
|
+
this.producer = producer;
|
|
29
|
+
this.options = options;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Produces pairs filtered by success status, table usage, and custom predicates.
|
|
33
|
+
* @returns Pairs matching all configured filter criteria
|
|
34
|
+
*/
|
|
35
|
+
async *produce() {
|
|
36
|
+
for await (const chunk of this.producer.produce()) {
|
|
37
|
+
const filtered = chunk.filter((pair) => {
|
|
38
|
+
if (this.options.successOnly !== false && !pair.success) {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
if (this.options.tables?.length) {
|
|
42
|
+
const sqlLower = pair.sql.toLowerCase();
|
|
43
|
+
const hasTable = this.options.tables.some(
|
|
44
|
+
(t) => sqlLower.includes(t.toLowerCase())
|
|
45
|
+
);
|
|
46
|
+
if (!hasTable) return false;
|
|
47
|
+
}
|
|
48
|
+
if (this.options.filter && !this.options.filter(pair)) {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
return true;
|
|
52
|
+
});
|
|
53
|
+
if (filtered.length) {
|
|
54
|
+
yield filtered;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// packages/text2sql/src/lib/synthesis/decorators/deduplicated-producer.ts
|
|
61
|
+
var DeduplicatedProducer = class extends PairProducer {
|
|
62
|
+
/**
|
|
63
|
+
* @param producer - Source producer to deduplicate
|
|
64
|
+
* @param options - Deduplication configuration
|
|
65
|
+
*/
|
|
66
|
+
constructor(producer, options = {}) {
|
|
67
|
+
super();
|
|
68
|
+
this.producer = producer;
|
|
69
|
+
this.options = options;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Produces pairs with duplicates removed based on the configured strategy.
|
|
73
|
+
* @returns Unique pairs after deduplication
|
|
74
|
+
*/
|
|
75
|
+
async *produce() {
|
|
76
|
+
const { strategy = "exact" } = this.options;
|
|
77
|
+
const seen = /* @__PURE__ */ new Set();
|
|
78
|
+
for await (const chunk of this.producer.produce()) {
|
|
79
|
+
const unique = [];
|
|
80
|
+
for (const pair of chunk) {
|
|
81
|
+
let key;
|
|
82
|
+
switch (strategy) {
|
|
83
|
+
case "sql-only":
|
|
84
|
+
key = this.normalizeSQL(pair.sql);
|
|
85
|
+
break;
|
|
86
|
+
case "question-only":
|
|
87
|
+
key = pair.question.toLowerCase().trim();
|
|
88
|
+
break;
|
|
89
|
+
case "exact":
|
|
90
|
+
default:
|
|
91
|
+
key = `${pair.question.toLowerCase().trim()}|||${this.normalizeSQL(pair.sql)}`;
|
|
92
|
+
}
|
|
93
|
+
if (!seen.has(key)) {
|
|
94
|
+
seen.add(key);
|
|
95
|
+
unique.push(pair);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (unique.length) {
|
|
99
|
+
yield unique;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
normalizeSQL(sql) {
|
|
104
|
+
return sql.toLowerCase().replace(/\s+/g, " ").trim();
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// packages/text2sql/src/lib/synthesis/decorators/validated-producer.ts
|
|
109
|
+
var ValidatedProducer = class extends PairProducer {
|
|
110
|
+
/**
|
|
111
|
+
* @param producer - Source producer to validate
|
|
112
|
+
* @param adapter - Database adapter for SQL validation
|
|
113
|
+
* @param options - Validation configuration
|
|
114
|
+
*/
|
|
115
|
+
constructor(producer, adapter, options = {}) {
|
|
116
|
+
super();
|
|
117
|
+
this.producer = producer;
|
|
118
|
+
this.adapter = adapter;
|
|
119
|
+
this.options = options;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Produces pairs with SQL validation applied, optionally executing queries.
|
|
123
|
+
* @returns Validated pairs with error/rowCount metadata attached
|
|
124
|
+
*/
|
|
125
|
+
async *produce() {
|
|
126
|
+
for await (const chunk of this.producer.produce()) {
|
|
127
|
+
const validated = [];
|
|
128
|
+
for (const pair of chunk) {
|
|
129
|
+
const error = await this.adapter.validate(pair.sql);
|
|
130
|
+
if (error) {
|
|
131
|
+
if (!this.options.removeInvalid) {
|
|
132
|
+
validated.push({
|
|
133
|
+
...pair,
|
|
134
|
+
success: false,
|
|
135
|
+
error
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
let rowCount;
|
|
141
|
+
if (this.options.execute) {
|
|
142
|
+
try {
|
|
143
|
+
const result = await this.adapter.execute(pair.sql);
|
|
144
|
+
rowCount = Array.isArray(result) ? result.length : void 0;
|
|
145
|
+
} catch {
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
validated.push({
|
|
149
|
+
...pair,
|
|
150
|
+
success: true,
|
|
151
|
+
rowCount
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
if (validated.length) {
|
|
155
|
+
yield validated;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
// packages/text2sql/src/lib/synthesis/extractors/message-extractor.ts
|
|
162
|
+
import {
|
|
163
|
+
getToolOrDynamicToolName as getToolOrDynamicToolName2,
|
|
164
|
+
isToolOrDynamicToolUIPart as isToolOrDynamicToolUIPart2
|
|
165
|
+
} from "ai";
|
|
166
|
+
|
|
167
|
+
// packages/text2sql/src/lib/synthesis/extractors/base-contextual-extractor.ts
|
|
168
|
+
import { groq } from "@ai-sdk/groq";
|
|
169
|
+
import {
|
|
170
|
+
getToolOrDynamicToolName,
|
|
171
|
+
isTextUIPart,
|
|
172
|
+
isToolOrDynamicToolUIPart
|
|
173
|
+
} from "ai";
|
|
174
|
+
import dedent from "dedent";
|
|
175
|
+
import z from "zod";
|
|
176
|
+
import { agent, generate, user } from "@deepagents/agent";
|
|
177
|
+
var contextResolverAgent = agent({
|
|
178
|
+
name: "context_resolver",
|
|
179
|
+
model: groq("openai/gpt-oss-20b"),
|
|
180
|
+
output: z.object({
|
|
181
|
+
question: z.string().describe(
|
|
182
|
+
"A standalone natural language question that the SQL query answers"
|
|
183
|
+
)
|
|
184
|
+
}),
|
|
185
|
+
prompt: (state) => dedent`
|
|
186
|
+
<identity>
|
|
187
|
+
You are an expert at understanding conversational context and generating clear,
|
|
188
|
+
standalone questions from multi-turn conversations.
|
|
189
|
+
</identity>
|
|
190
|
+
|
|
191
|
+
${state?.introspection ? `<schema>
|
|
192
|
+
${state.introspection}
|
|
193
|
+
</schema>` : ""}
|
|
194
|
+
|
|
195
|
+
<conversation>
|
|
196
|
+
${state?.conversation}
|
|
197
|
+
</conversation>
|
|
198
|
+
|
|
199
|
+
<sql>
|
|
200
|
+
${state?.sql}
|
|
201
|
+
</sql>
|
|
202
|
+
|
|
203
|
+
<task>
|
|
204
|
+
Given the conversation above and the SQL query that was executed,
|
|
205
|
+
generate a single, standalone natural language question that:
|
|
206
|
+
1. Fully captures the user's intent without needing prior context
|
|
207
|
+
2. Uses natural business language (not SQL terminology)
|
|
208
|
+
3. Could be asked by someone who hasn't seen the conversation
|
|
209
|
+
4. Accurately represents what the SQL query answers
|
|
210
|
+
</task>
|
|
211
|
+
|
|
212
|
+
<examples>
|
|
213
|
+
Conversation: "Show me customers" → "Filter to NY" → "Sort by revenue"
|
|
214
|
+
SQL: SELECT * FROM customers WHERE region = 'NY' ORDER BY revenue DESC
|
|
215
|
+
Question: "Show me customers in the NY region sorted by revenue"
|
|
216
|
+
|
|
217
|
+
Conversation: "What were sales last month?" → "Break it down by category"
|
|
218
|
+
SQL: SELECT category, SUM(amount) FROM sales WHERE date >= '2024-11-01' GROUP BY category
|
|
219
|
+
Question: "What were sales by category for last month?"
|
|
220
|
+
</examples>
|
|
221
|
+
`
|
|
222
|
+
});
|
|
223
|
+
function getMessageText(message) {
|
|
224
|
+
const textParts = message.parts.filter(isTextUIPart).map((part) => part.text);
|
|
225
|
+
return textParts.join(" ").trim();
|
|
226
|
+
}
|
|
227
|
+
function formatConversation(messages) {
|
|
228
|
+
return messages.map((msg, i) => `[${i + 1}] ${msg}`).join("\n");
|
|
229
|
+
}
|
|
230
|
+
var BaseContextualExtractor = class extends PairProducer {
|
|
231
|
+
constructor(messages, adapter, options = {}) {
|
|
232
|
+
super();
|
|
233
|
+
this.messages = messages;
|
|
234
|
+
this.adapter = adapter;
|
|
235
|
+
this.options = options;
|
|
236
|
+
}
|
|
237
|
+
context = [];
|
|
238
|
+
results = [];
|
|
239
|
+
/**
|
|
240
|
+
* Template method - defines the extraction algorithm skeleton.
|
|
241
|
+
* Subclasses customize behavior via hooks, not by overriding this method.
|
|
242
|
+
*/
|
|
243
|
+
async *produce() {
|
|
244
|
+
this.context = [];
|
|
245
|
+
this.results = [];
|
|
246
|
+
const { includeFailures = false, toolName = "db_query" } = this.options;
|
|
247
|
+
await this.extractSqlsWithContext(toolName, includeFailures);
|
|
248
|
+
if (this.results.length === 0) {
|
|
249
|
+
return;
|
|
250
|
+
}
|
|
251
|
+
const introspection = await this.adapter.introspect();
|
|
252
|
+
yield* this.resolveQuestions(introspection);
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Core extraction loop - iterates through messages and calls hooks.
|
|
256
|
+
*/
|
|
257
|
+
async extractSqlsWithContext(toolName, includeFailures) {
|
|
258
|
+
for (const message of this.messages) {
|
|
259
|
+
if (message.role === "user") {
|
|
260
|
+
const text = getMessageText(message);
|
|
261
|
+
if (text) {
|
|
262
|
+
await this.onUserMessage(text);
|
|
263
|
+
}
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
266
|
+
if (message.role === "assistant") {
|
|
267
|
+
await this.extractFromAssistant(message, toolName, includeFailures);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Extract SQL from assistant message parts.
|
|
273
|
+
*/
|
|
274
|
+
async extractFromAssistant(message, toolName, includeFailures) {
|
|
275
|
+
for (const part of message.parts) {
|
|
276
|
+
if (!isToolOrDynamicToolUIPart(part)) {
|
|
277
|
+
continue;
|
|
278
|
+
}
|
|
279
|
+
if (getToolOrDynamicToolName(part) !== toolName) {
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
const toolInput = "input" in part ? part.input : void 0;
|
|
283
|
+
if (!toolInput?.sql) {
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
const success = part.state === "output-available";
|
|
287
|
+
const failed = part.state === "output-error";
|
|
288
|
+
if (failed && !includeFailures) {
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
291
|
+
if (!success && !failed) {
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
const snapshot = this.getContextSnapshot();
|
|
295
|
+
if (snapshot.length === 0) {
|
|
296
|
+
continue;
|
|
297
|
+
}
|
|
298
|
+
this.results.push({
|
|
299
|
+
sql: toolInput.sql,
|
|
300
|
+
success,
|
|
301
|
+
conversationContext: snapshot
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
const assistantText = getMessageText(message);
|
|
305
|
+
if (assistantText) {
|
|
306
|
+
this.context.push(`Assistant: ${assistantText}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Resolve extracted SQL contexts into standalone questions using LLM.
|
|
311
|
+
*/
|
|
312
|
+
async *resolveQuestions(introspection) {
|
|
313
|
+
for (const item of this.results) {
|
|
314
|
+
const { experimental_output } = await generate(
|
|
315
|
+
contextResolverAgent,
|
|
316
|
+
[user("Generate a standalone question for this SQL query.")],
|
|
317
|
+
{
|
|
318
|
+
conversation: formatConversation(item.conversationContext),
|
|
319
|
+
sql: item.sql,
|
|
320
|
+
introspection
|
|
321
|
+
}
|
|
322
|
+
);
|
|
323
|
+
yield [
|
|
324
|
+
{
|
|
325
|
+
question: experimental_output.question,
|
|
326
|
+
sql: item.sql,
|
|
327
|
+
context: item.conversationContext,
|
|
328
|
+
success: item.success
|
|
329
|
+
}
|
|
330
|
+
];
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
};
|
|
334
|
+
|
|
335
|
+
// packages/text2sql/src/lib/synthesis/extractors/message-extractor.ts
|
|
336
|
+
var MessageExtractor = class extends PairProducer {
|
|
337
|
+
/**
|
|
338
|
+
* @param messages - Chat history to extract pairs from
|
|
339
|
+
* @param options - Extraction configuration
|
|
340
|
+
*/
|
|
341
|
+
constructor(messages, options = {}) {
|
|
342
|
+
super();
|
|
343
|
+
this.messages = messages;
|
|
344
|
+
this.options = options;
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Extracts question-SQL pairs by parsing tool calls and pairing with user messages.
|
|
348
|
+
* @returns Pairs extracted from db_query tool invocations
|
|
349
|
+
*/
|
|
350
|
+
async *produce() {
|
|
351
|
+
const { includeFailures = false, toolName = "db_query" } = this.options;
|
|
352
|
+
let lastUserMessage = null;
|
|
353
|
+
for (const message of this.messages) {
|
|
354
|
+
if (message.role === "user") {
|
|
355
|
+
lastUserMessage = message;
|
|
356
|
+
continue;
|
|
357
|
+
}
|
|
358
|
+
if (message.role === "assistant" && lastUserMessage) {
|
|
359
|
+
for (const part of message.parts) {
|
|
360
|
+
if (!isToolOrDynamicToolUIPart2(part)) {
|
|
361
|
+
continue;
|
|
362
|
+
}
|
|
363
|
+
if (getToolOrDynamicToolName2(part) !== toolName) {
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
const toolInput = "input" in part ? part.input : void 0;
|
|
367
|
+
if (!toolInput?.sql) {
|
|
368
|
+
continue;
|
|
369
|
+
}
|
|
370
|
+
const success = part.state === "output-available";
|
|
371
|
+
const failed = part.state === "output-error";
|
|
372
|
+
if (failed && !includeFailures) {
|
|
373
|
+
continue;
|
|
374
|
+
}
|
|
375
|
+
if (!success && !failed) {
|
|
376
|
+
continue;
|
|
377
|
+
}
|
|
378
|
+
const question = getMessageText(lastUserMessage);
|
|
379
|
+
if (!question) {
|
|
380
|
+
continue;
|
|
381
|
+
}
|
|
382
|
+
yield [
|
|
383
|
+
{
|
|
384
|
+
question,
|
|
385
|
+
sql: toolInput.sql,
|
|
386
|
+
success
|
|
387
|
+
}
|
|
388
|
+
];
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
};
|
|
394
|
+
|
|
395
|
+
// packages/text2sql/src/lib/synthesis/extractors/sql-extractor.ts
|
|
396
|
+
import { groq as groq2 } from "@ai-sdk/groq";
|
|
397
|
+
import dedent2 from "dedent";
|
|
398
|
+
import z2 from "zod";
|
|
399
|
+
import { agent as agent2, generate as generate2, user as user2 } from "@deepagents/agent";
|
|
400
|
+
var sqlToQuestionAgent = agent2({
|
|
401
|
+
name: "sql_to_question",
|
|
402
|
+
model: groq2("llama-3.3-70b-versatile"),
|
|
403
|
+
output: z2.object({
|
|
404
|
+
question: z2.string().describe("A natural language question that the SQL query answers")
|
|
405
|
+
}),
|
|
406
|
+
prompt: (state) => dedent2`
|
|
407
|
+
<identity>
|
|
408
|
+
You are an expert at understanding SQL queries and generating clear,
|
|
409
|
+
natural language questions that describe what the query retrieves.
|
|
410
|
+
</identity>
|
|
411
|
+
|
|
412
|
+
<schema>
|
|
413
|
+
${state?.introspection}
|
|
414
|
+
</schema>
|
|
415
|
+
|
|
416
|
+
<sql>
|
|
417
|
+
${state?.sql}
|
|
418
|
+
</sql>
|
|
419
|
+
|
|
420
|
+
<task>
|
|
421
|
+
Given the database schema and the SQL query above, generate a single
|
|
422
|
+
natural language question that:
|
|
423
|
+
1. Accurately describes what information the query retrieves
|
|
424
|
+
2. Uses natural business language (not SQL terminology)
|
|
425
|
+
3. Could be asked by a non-technical user
|
|
426
|
+
4. Is concise but complete
|
|
427
|
+
</task>
|
|
428
|
+
|
|
429
|
+
<examples>
|
|
430
|
+
SQL: SELECT COUNT(*) FROM customers WHERE region = 'NY'
|
|
431
|
+
Question: "How many customers do we have in New York?"
|
|
432
|
+
|
|
433
|
+
SQL: SELECT product_name, SUM(quantity) as total FROM orders GROUP BY product_name ORDER BY total DESC LIMIT 10
|
|
434
|
+
Question: "What are our top 10 products by quantity sold?"
|
|
435
|
+
|
|
436
|
+
SQL: SELECT c.name, COUNT(o.id) FROM customers c LEFT JOIN orders o ON c.id = o.customer_id GROUP BY c.id HAVING COUNT(o.id) = 0
|
|
437
|
+
Question: "Which customers have never placed an order?"
|
|
438
|
+
</examples>
|
|
439
|
+
`
|
|
440
|
+
});
|
|
441
|
+
var SqlExtractor = class extends PairProducer {
|
|
442
|
+
#sqls;
|
|
443
|
+
#adapter;
|
|
444
|
+
#options;
|
|
445
|
+
/**
|
|
446
|
+
* @param sql - SQL query or queries to generate questions for
|
|
447
|
+
* @param adapter - Database adapter for validation and schema introspection
|
|
448
|
+
* @param options - Extraction configuration
|
|
449
|
+
*/
|
|
450
|
+
constructor(sql, adapter, options = {}) {
|
|
451
|
+
super();
|
|
452
|
+
this.#sqls = Array.isArray(sql) ? sql : [sql];
|
|
453
|
+
this.#adapter = adapter;
|
|
454
|
+
this.#options = options;
|
|
455
|
+
}
|
|
456
|
+
/**
|
|
457
|
+
* Generates natural language questions for each SQL query using an LLM.
|
|
458
|
+
* @returns Pairs with generated questions and original SQL
|
|
459
|
+
*/
|
|
460
|
+
async *produce() {
|
|
461
|
+
const { validateSql = true, skipInvalid = false } = this.#options;
|
|
462
|
+
const introspection = await this.#adapter.introspect();
|
|
463
|
+
for (const sql of this.#sqls) {
|
|
464
|
+
let isValid = true;
|
|
465
|
+
if (validateSql) {
|
|
466
|
+
const error = await this.#adapter.validate(sql);
|
|
467
|
+
isValid = error === void 0 || error === null;
|
|
468
|
+
if (!isValid && skipInvalid) {
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
const { experimental_output } = await generate2(
|
|
473
|
+
sqlToQuestionAgent,
|
|
474
|
+
[user2("Generate a natural language question for this SQL query.")],
|
|
475
|
+
{
|
|
476
|
+
sql,
|
|
477
|
+
introspection
|
|
478
|
+
}
|
|
479
|
+
);
|
|
480
|
+
yield [
|
|
481
|
+
{
|
|
482
|
+
question: experimental_output.question,
|
|
483
|
+
sql,
|
|
484
|
+
success: isValid
|
|
485
|
+
}
|
|
486
|
+
];
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
};
|
|
490
|
+
|
|
491
|
+
// packages/text2sql/src/lib/synthesis/extractors/full-context-extractor.ts
|
|
492
|
+
var FullContextExtractor = class extends BaseContextualExtractor {
|
|
493
|
+
constructor(messages, adapter, options = {}) {
|
|
494
|
+
super(messages, adapter, options);
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* Add user message to context (keeps all messages).
|
|
498
|
+
*/
|
|
499
|
+
async onUserMessage(text) {
|
|
500
|
+
this.context.push(`User: ${text}`);
|
|
501
|
+
}
|
|
502
|
+
/**
|
|
503
|
+
* Return all context accumulated so far.
|
|
504
|
+
*/
|
|
505
|
+
getContextSnapshot() {
|
|
506
|
+
return [...this.context];
|
|
507
|
+
}
|
|
508
|
+
};
|
|
509
|
+
|
|
510
|
+
// packages/text2sql/src/lib/synthesis/extractors/windowed-context-extractor.ts
|
|
511
|
+
var WindowedContextExtractor = class extends BaseContextualExtractor {
|
|
512
|
+
windowSize;
|
|
513
|
+
constructor(messages, adapter, options) {
|
|
514
|
+
super(messages, adapter, options);
|
|
515
|
+
this.windowSize = options.windowSize;
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* Add user message to context (keeps all, windowing happens on snapshot).
|
|
519
|
+
*/
|
|
520
|
+
async onUserMessage(text) {
|
|
521
|
+
this.context.push(`User: ${text}`);
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* Return only the last N messages based on window size.
|
|
525
|
+
*/
|
|
526
|
+
getContextSnapshot() {
|
|
527
|
+
if (this.context.length <= this.windowSize) {
|
|
528
|
+
return [...this.context];
|
|
529
|
+
}
|
|
530
|
+
return this.context.slice(-this.windowSize);
|
|
531
|
+
}
|
|
532
|
+
};
|
|
533
|
+
|
|
534
|
+
// packages/text2sql/src/lib/synthesis/extractors/segmented-context-extractor.ts
|
|
535
|
+
import { groq as groq3 } from "@ai-sdk/groq";
|
|
536
|
+
import dedent3 from "dedent";
|
|
537
|
+
import z3 from "zod";
|
|
538
|
+
import { agent as agent3, generate as generate3, user as user3 } from "@deepagents/agent";
|
|
539
|
+
var topicChangeAgent = agent3({
|
|
540
|
+
name: "topic_change_detector",
|
|
541
|
+
model: groq3("openai/gpt-oss-20b"),
|
|
542
|
+
output: z3.object({
|
|
543
|
+
isTopicChange: z3.boolean().describe("Whether the new message represents a topic change"),
|
|
544
|
+
reason: z3.string().describe("Brief explanation for the decision")
|
|
545
|
+
}),
|
|
546
|
+
prompt: (state) => dedent3`
|
|
547
|
+
<identity>
|
|
548
|
+
You are an expert at understanding conversational flow and detecting topic changes.
|
|
549
|
+
</identity>
|
|
550
|
+
|
|
551
|
+
<conversation_context>
|
|
552
|
+
${state?.context || "(no prior context)"}
|
|
553
|
+
</conversation_context>
|
|
554
|
+
|
|
555
|
+
<new_message>
|
|
556
|
+
${state?.newMessage}
|
|
557
|
+
</new_message>
|
|
558
|
+
|
|
559
|
+
<task>
|
|
560
|
+
Determine if the new message represents a significant topic change from the
|
|
561
|
+
prior conversation context. A topic change occurs when:
|
|
562
|
+
1. The user asks about a completely different entity/table/domain
|
|
563
|
+
2. The user starts a new analytical question unrelated to prior discussion
|
|
564
|
+
3. There's a clear shift in what data or metrics are being discussed
|
|
565
|
+
|
|
566
|
+
NOT a topic change:
|
|
567
|
+
- Follow-up questions refining the same query ("filter by...", "sort by...")
|
|
568
|
+
- Questions about the same entities with different conditions
|
|
569
|
+
- Requests for more details on the same topic
|
|
570
|
+
</task>
|
|
571
|
+
|
|
572
|
+
<examples>
|
|
573
|
+
Context: "Show me customers in NY" → "Sort by revenue"
|
|
574
|
+
New: "Filter to those with orders over $1000"
|
|
575
|
+
Decision: NOT a topic change (still refining customer query)
|
|
576
|
+
|
|
577
|
+
Context: "Show me customers in NY" → "Sort by revenue"
|
|
578
|
+
New: "What were our total sales last quarter?"
|
|
579
|
+
Decision: Topic change (shifted from customers to sales metrics)
|
|
580
|
+
|
|
581
|
+
Context: "List all products"
|
|
582
|
+
New: "How many orders did we have last month?"
|
|
583
|
+
Decision: Topic change (products → orders/sales)
|
|
584
|
+
</examples>
|
|
585
|
+
`
|
|
586
|
+
});
|
|
587
|
+
var SegmentedContextExtractor = class extends BaseContextualExtractor {
|
|
588
|
+
constructor(messages, adapter, options = {}) {
|
|
589
|
+
super(messages, adapter, options);
|
|
590
|
+
}
|
|
591
|
+
/**
|
|
592
|
+
* Handle user message with topic change detection.
|
|
593
|
+
* If topic changes, resolve the message to standalone form before resetting.
|
|
594
|
+
*
|
|
595
|
+
* Note: We capture context snapshot before async LLM calls to prevent race conditions
|
|
596
|
+
* where context might be modified during the async operation.
|
|
597
|
+
*/
|
|
598
|
+
async onUserMessage(text) {
|
|
599
|
+
if (this.context.length >= 2) {
|
|
600
|
+
const contextSnapshot = [...this.context];
|
|
601
|
+
const isTopicChange = await this.detectTopicChange(text, contextSnapshot);
|
|
602
|
+
if (isTopicChange) {
|
|
603
|
+
const resolved = await this.resolveToStandalone(text, contextSnapshot);
|
|
604
|
+
this.context = [`User: ${resolved}`];
|
|
605
|
+
return;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
this.context.push(`User: ${text}`);
|
|
609
|
+
}
|
|
610
|
+
/**
|
|
611
|
+
* Return all context in current topic segment.
|
|
612
|
+
*/
|
|
613
|
+
getContextSnapshot() {
|
|
614
|
+
return [...this.context];
|
|
615
|
+
}
|
|
616
|
+
/**
|
|
617
|
+
* Detect if a new message represents a topic change using LLM.
|
|
618
|
+
* @param newMessage - The new user message to check
|
|
619
|
+
* @param contextSnapshot - Snapshot of context captured before this async call
|
|
620
|
+
*/
|
|
621
|
+
async detectTopicChange(newMessage, contextSnapshot) {
|
|
622
|
+
const { experimental_output } = await generate3(
|
|
623
|
+
topicChangeAgent,
|
|
624
|
+
[user3("Determine if this is a topic change.")],
|
|
625
|
+
{
|
|
626
|
+
context: formatConversation(contextSnapshot),
|
|
627
|
+
newMessage
|
|
628
|
+
}
|
|
629
|
+
);
|
|
630
|
+
return experimental_output.isTopicChange;
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Resolve a context-dependent message into a standalone question.
|
|
634
|
+
* Called when topic change is detected to preserve the meaning of
|
|
635
|
+
* the triggering message before context is reset.
|
|
636
|
+
* @param text - The user message to resolve
|
|
637
|
+
* @param contextSnapshot - Snapshot of context captured before this async call
|
|
638
|
+
*/
|
|
639
|
+
async resolveToStandalone(text, contextSnapshot) {
|
|
640
|
+
const { experimental_output } = await generate3(
|
|
641
|
+
contextResolverAgent,
|
|
642
|
+
[user3("Generate a standalone question for this message.")],
|
|
643
|
+
{
|
|
644
|
+
conversation: formatConversation([...contextSnapshot, `User: ${text}`]),
|
|
645
|
+
sql: ""
|
|
646
|
+
// No SQL yet, just resolving the question
|
|
647
|
+
}
|
|
648
|
+
);
|
|
649
|
+
return experimental_output.question;
|
|
650
|
+
}
|
|
651
|
+
};
|
|
652
|
+
|
|
653
|
+
// packages/text2sql/src/lib/synthesis/extractors/last-query-extractor.ts
|
|
654
|
+
import { generate as generate4, user as user4 } from "@deepagents/agent";
|
|
655
|
+
var LastQueryExtractor = class extends BaseContextualExtractor {
|
|
656
|
+
constructor(messages, adapter, options = {}) {
|
|
657
|
+
super(messages, adapter, options);
|
|
658
|
+
}
|
|
659
|
+
/**
|
|
660
|
+
* Add user message to context (keeps all messages).
|
|
661
|
+
*/
|
|
662
|
+
async onUserMessage(text) {
|
|
663
|
+
this.context.push(`User: ${text}`);
|
|
664
|
+
}
|
|
665
|
+
/**
|
|
666
|
+
* Return all context accumulated so far.
|
|
667
|
+
*/
|
|
668
|
+
getContextSnapshot() {
|
|
669
|
+
return [...this.context];
|
|
670
|
+
}
|
|
671
|
+
/**
|
|
672
|
+
* Override to only resolve the LAST query instead of all queries.
|
|
673
|
+
*/
|
|
674
|
+
async *resolveQuestions(introspection) {
|
|
675
|
+
if (this.results.length === 0) {
|
|
676
|
+
return;
|
|
677
|
+
}
|
|
678
|
+
const last = this.results.at(-1);
|
|
679
|
+
const { experimental_output } = await generate4(
|
|
680
|
+
contextResolverAgent,
|
|
681
|
+
[user4("Generate a standalone question for this SQL query.")],
|
|
682
|
+
{
|
|
683
|
+
conversation: formatConversation(last.conversationContext),
|
|
684
|
+
sql: last.sql,
|
|
685
|
+
introspection
|
|
686
|
+
}
|
|
687
|
+
);
|
|
688
|
+
yield [
|
|
689
|
+
{
|
|
690
|
+
question: experimental_output.question,
|
|
691
|
+
sql: last.sql,
|
|
692
|
+
context: last.conversationContext,
|
|
693
|
+
success: last.success
|
|
694
|
+
}
|
|
695
|
+
];
|
|
696
|
+
}
|
|
697
|
+
};
|
|
698
|
+
|
|
699
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
|
|
700
|
+
import pLimit from "p-limit";
|
|
701
|
+
|
|
702
|
+
// packages/text2sql/src/lib/agents/question.agent.ts
|
|
703
|
+
import { groq as groq4 } from "@ai-sdk/groq";
|
|
704
|
+
import { defaultSettingsMiddleware, wrapLanguageModel } from "ai";
|
|
705
|
+
import dedent4 from "dedent";
|
|
706
|
+
import z4 from "zod";
|
|
707
|
+
import { agent as agent4, generate as generate5, user as user5 } from "@deepagents/agent";
|
|
708
|
+
var complexityInstructions = {
|
|
709
|
+
simple: dedent4`
|
|
710
|
+
Generate simple questions that require:
|
|
711
|
+
- Basic SELECT with single table
|
|
712
|
+
- Simple WHERE clauses with one condition
|
|
713
|
+
- COUNT(*) or basic aggregations
|
|
714
|
+
- No joins required
|
|
715
|
+
Examples: "How many customers do we have?", "List all products", "What is the total revenue?"
|
|
716
|
+
`,
|
|
717
|
+
moderate: dedent4`
|
|
718
|
+
Generate moderate questions that require:
|
|
719
|
+
- JOINs between 2-3 tables
|
|
720
|
+
- Multiple WHERE conditions (AND/OR)
|
|
721
|
+
- GROUP BY with HAVING clauses
|
|
722
|
+
- ORDER BY with LIMIT
|
|
723
|
+
- Basic subqueries
|
|
724
|
+
Examples: "What are the top 5 customers by total orders?", "Which products have never been ordered?"
|
|
725
|
+
`,
|
|
726
|
+
complex: dedent4`
|
|
727
|
+
Generate complex questions that require:
|
|
728
|
+
- Multiple JOINs (3+ tables)
|
|
729
|
+
- Nested subqueries or CTEs
|
|
730
|
+
- Complex aggregations with multiple GROUP BY columns
|
|
731
|
+
- CASE expressions
|
|
732
|
+
- Date/time calculations
|
|
733
|
+
Examples: "What is the month-over-month growth rate?", "Which customers have increased spending compared to last year?"
|
|
734
|
+
`,
|
|
735
|
+
"high complex": dedent4`
|
|
736
|
+
Generate highly complex questions that require advanced SQL features:
|
|
737
|
+
- Window functions (ROW_NUMBER, RANK, DENSE_RANK)
|
|
738
|
+
- LAG, LEAD for comparisons
|
|
739
|
+
- Running totals (SUM OVER)
|
|
740
|
+
- Moving averages
|
|
741
|
+
- PARTITION BY clauses
|
|
742
|
+
- Complex CTEs with multiple levels
|
|
743
|
+
Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
|
|
744
|
+
`
|
|
745
|
+
};
|
|
746
|
+
var questionGeneratorAgent = agent4({
|
|
747
|
+
name: "question_generator",
|
|
748
|
+
model: wrapLanguageModel({
|
|
749
|
+
model: groq4("openai/gpt-oss-20b"),
|
|
750
|
+
middleware: defaultSettingsMiddleware({
|
|
751
|
+
settings: { temperature: 0.8, topP: 0.95 }
|
|
752
|
+
})
|
|
753
|
+
}),
|
|
754
|
+
handoffDescription: "Generates natural language questions that users might ask about the database schema.",
|
|
755
|
+
output: z4.object({
|
|
756
|
+
questions: z4.array(z4.string().describe("A natural language question about the data")).min(1).describe("List of natural language questions a user might ask")
|
|
757
|
+
}),
|
|
758
|
+
prompt: (state) => {
|
|
759
|
+
const count = state?.count;
|
|
760
|
+
const complexity = state?.complexity ?? "moderate";
|
|
761
|
+
return dedent4`
|
|
762
|
+
<identity>
|
|
763
|
+
You are a synthetic data generator specializing in creating realistic natural language questions
|
|
764
|
+
that users might ask about a database. You understand database schemas and can generate diverse,
|
|
765
|
+
practical questions that would require SQL queries to answer.
|
|
766
|
+
</identity>
|
|
767
|
+
|
|
768
|
+
${state?.introspection || ""}
|
|
769
|
+
|
|
770
|
+
<complexity level="${complexity}">
|
|
771
|
+
${complexityInstructions[complexity]}
|
|
772
|
+
</complexity>
|
|
773
|
+
|
|
774
|
+
<task>
|
|
775
|
+
Generate exactly ${count} natural language questions at the "${complexity}" complexity level.
|
|
776
|
+
The questions should:
|
|
777
|
+
1. Match the complexity requirements above
|
|
778
|
+
2. Use natural business language, not technical SQL terms
|
|
779
|
+
3. Be realistic questions a non-technical user would actually ask
|
|
780
|
+
4. Cover different tables and relationships when possible
|
|
781
|
+
</task>
|
|
782
|
+
|
|
783
|
+
<guardrails>
|
|
784
|
+
- Questions MUST ONLY reference tables and columns that exist in the schema above
|
|
785
|
+
- Before generating each question, verify that ALL entities (tables, columns, relationships) you reference are explicitly listed in the schema
|
|
786
|
+
- DO NOT invent or assume tables/columns that aren't explicitly shown in the schema
|
|
787
|
+
- Use natural language without SQL keywords like SELECT, WHERE, etc.
|
|
788
|
+
- All questions must match the specified complexity level
|
|
789
|
+
</guardrails>
|
|
790
|
+
`;
|
|
791
|
+
}
|
|
792
|
+
});
|
|
793
|
+
async function generateQuestions(params) {
|
|
794
|
+
const { introspection, complexity, count, prompt, model } = params;
|
|
795
|
+
const agentInstance = model ? questionGeneratorAgent.clone({ model }) : questionGeneratorAgent;
|
|
796
|
+
const userPrompt = prompt ?? `Generate ${count} questions at ${complexity} complexity given db schema.`;
|
|
797
|
+
const { experimental_output } = await generate5(
|
|
798
|
+
agentInstance,
|
|
799
|
+
[user5(userPrompt)],
|
|
800
|
+
{
|
|
801
|
+
introspection,
|
|
802
|
+
complexity,
|
|
803
|
+
count
|
|
804
|
+
}
|
|
805
|
+
);
|
|
806
|
+
return { questions: experimental_output.questions };
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
// packages/text2sql/src/lib/agents/sql.agent.ts
|
|
810
|
+
import { groq as groq5 } from "@ai-sdk/groq";
|
|
811
|
+
import {
|
|
812
|
+
APICallError,
|
|
813
|
+
JSONParseError,
|
|
814
|
+
NoContentGeneratedError,
|
|
815
|
+
NoObjectGeneratedError,
|
|
816
|
+
NoOutputGeneratedError,
|
|
817
|
+
TypeValidationError,
|
|
818
|
+
defaultSettingsMiddleware as defaultSettingsMiddleware2,
|
|
819
|
+
wrapLanguageModel as wrapLanguageModel2
|
|
820
|
+
} from "ai";
|
|
821
|
+
import { Console } from "node:console";
|
|
822
|
+
import { createWriteStream } from "node:fs";
|
|
823
|
+
import pRetry from "p-retry";
|
|
824
|
+
import z5 from "zod";
|
|
825
|
+
import {
|
|
826
|
+
agent as agent5,
|
|
827
|
+
generate as generate6,
|
|
828
|
+
toOutput,
|
|
829
|
+
user as user6
|
|
830
|
+
} from "@deepagents/agent";
|
|
831
|
+
|
|
832
|
+
// packages/text2sql/src/lib/teach/xml.ts
|
|
833
|
+
function wrapBlock(tag, children) {
|
|
834
|
+
const content = children.filter((child) => Boolean(child)).join("\n");
|
|
835
|
+
if (!content) {
|
|
836
|
+
return "";
|
|
837
|
+
}
|
|
838
|
+
return `<${tag}>
|
|
839
|
+
${indentBlock(content, 2)}
|
|
840
|
+
</${tag}>`;
|
|
841
|
+
}
|
|
842
|
+
function list(tag, values, childTag) {
|
|
843
|
+
if (!values.length) {
|
|
844
|
+
return "";
|
|
845
|
+
}
|
|
846
|
+
const children = values.map((value) => leaf(childTag, value)).join("\n");
|
|
847
|
+
return `<${tag}>
|
|
848
|
+
${indentBlock(children, 2)}
|
|
849
|
+
</${tag}>`;
|
|
850
|
+
}
|
|
851
|
+
function leaf(tag, value) {
|
|
852
|
+
const safe = escapeXml(value);
|
|
853
|
+
if (safe.includes("\n")) {
|
|
854
|
+
return `<${tag}>
|
|
855
|
+
${indentBlock(safe, 2)}
|
|
856
|
+
</${tag}>`;
|
|
857
|
+
}
|
|
858
|
+
return `<${tag}>${safe}</${tag}>`;
|
|
859
|
+
}
|
|
860
|
+
function indentBlock(text, spaces) {
|
|
861
|
+
if (!text.trim()) {
|
|
862
|
+
return "";
|
|
863
|
+
}
|
|
864
|
+
const padding = " ".repeat(spaces);
|
|
865
|
+
return text.split("\n").map((line) => line.length ? padding + line : padding).join("\n");
|
|
866
|
+
}
|
|
867
|
+
function escapeXml(value) {
|
|
868
|
+
if (value == null) {
|
|
869
|
+
return "";
|
|
870
|
+
}
|
|
871
|
+
return value.replaceAll(/&/g, "&").replaceAll(/</g, "<").replaceAll(/>/g, ">").replaceAll(/"/g, """).replaceAll(/'/g, "'");
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
// packages/text2sql/src/lib/teach/teachables.ts
|
|
875
|
+
function term(name, definition) {
|
|
876
|
+
return {
|
|
877
|
+
type: "term",
|
|
878
|
+
encode: () => ({ type: "term", name, definition }),
|
|
879
|
+
decode: () => wrapBlock("term", [leaf("name", name), leaf("definition", definition)])
|
|
880
|
+
};
|
|
881
|
+
}
|
|
882
|
+
function hint(text) {
|
|
883
|
+
return {
|
|
884
|
+
type: "hint",
|
|
885
|
+
encode: () => ({ type: "hint", text }),
|
|
886
|
+
decode: () => leaf("hint", text)
|
|
887
|
+
};
|
|
888
|
+
}
|
|
889
|
+
function guardrail(input) {
|
|
890
|
+
const { rule, reason, action } = input;
|
|
891
|
+
return {
|
|
892
|
+
type: "guardrail",
|
|
893
|
+
encode: () => ({ type: "guardrail", rule, reason, action }),
|
|
894
|
+
decode: () => wrapBlock("guardrail", [
|
|
895
|
+
leaf("rule", rule),
|
|
896
|
+
reason ? leaf("reason", reason) : "",
|
|
897
|
+
action ? leaf("action", action) : ""
|
|
898
|
+
])
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
function explain(input) {
|
|
902
|
+
const { concept, explanation, therefore } = input;
|
|
903
|
+
return {
|
|
904
|
+
type: "explain",
|
|
905
|
+
encode: () => ({ type: "explain", concept, explanation, therefore }),
|
|
906
|
+
decode: () => wrapBlock("explanation", [
|
|
907
|
+
leaf("concept", concept),
|
|
908
|
+
leaf("details", explanation),
|
|
909
|
+
therefore ? leaf("therefore", therefore) : ""
|
|
910
|
+
])
|
|
911
|
+
};
|
|
912
|
+
}
|
|
913
|
+
function example(input) {
|
|
914
|
+
const { question, answer, note } = input;
|
|
915
|
+
return {
|
|
916
|
+
type: "example",
|
|
917
|
+
encode: () => ({ type: "example", question, answer, note }),
|
|
918
|
+
decode: () => wrapBlock("example", [
|
|
919
|
+
leaf("question", question),
|
|
920
|
+
leaf("answer", answer),
|
|
921
|
+
note ? leaf("note", note) : ""
|
|
922
|
+
])
|
|
923
|
+
};
|
|
924
|
+
}
|
|
925
|
+
function clarification(input) {
|
|
926
|
+
const { when, ask, reason } = input;
|
|
927
|
+
return {
|
|
928
|
+
type: "clarification",
|
|
929
|
+
encode: () => ({ type: "clarification", when, ask, reason }),
|
|
930
|
+
decode: () => wrapBlock("clarification", [
|
|
931
|
+
leaf("when", when),
|
|
932
|
+
leaf("ask", ask),
|
|
933
|
+
leaf("reason", reason)
|
|
934
|
+
])
|
|
935
|
+
};
|
|
936
|
+
}
|
|
937
|
+
function workflow(input) {
|
|
938
|
+
const { task, steps, triggers, notes } = input;
|
|
939
|
+
return {
|
|
940
|
+
type: "workflow",
|
|
941
|
+
encode: () => ({ type: "workflow", task, steps, triggers, notes }),
|
|
942
|
+
decode: () => wrapBlock("workflow", [
|
|
943
|
+
leaf("task", task),
|
|
944
|
+
triggers?.length ? list("triggers", triggers, "trigger") : "",
|
|
945
|
+
list("steps", steps, "step"),
|
|
946
|
+
notes ? leaf("notes", notes) : ""
|
|
947
|
+
])
|
|
948
|
+
};
|
|
949
|
+
}
|
|
950
|
+
function quirk(input) {
|
|
951
|
+
const { issue, workaround } = input;
|
|
952
|
+
return {
|
|
953
|
+
type: "quirk",
|
|
954
|
+
encode: () => ({ type: "quirk", issue, workaround }),
|
|
955
|
+
decode: () => wrapBlock("quirk", [
|
|
956
|
+
leaf("issue", issue),
|
|
957
|
+
leaf("workaround", workaround)
|
|
958
|
+
])
|
|
959
|
+
};
|
|
960
|
+
}
|
|
961
|
+
function styleGuide(input) {
|
|
962
|
+
const { prefer, never, always } = input;
|
|
963
|
+
return {
|
|
964
|
+
type: "styleGuide",
|
|
965
|
+
encode: () => ({ type: "styleGuide", prefer, never, always }),
|
|
966
|
+
decode: () => wrapBlock("style_guide", [
|
|
967
|
+
leaf("prefer", prefer),
|
|
968
|
+
always ? leaf("always", always) : "",
|
|
969
|
+
never ? leaf("never", never) : ""
|
|
970
|
+
])
|
|
971
|
+
};
|
|
972
|
+
}
|
|
973
|
+
function analogy(input) {
|
|
974
|
+
const { concept, relationship, insight, therefore, pitfall } = input;
|
|
975
|
+
return {
|
|
976
|
+
type: "analogy",
|
|
977
|
+
encode: () => ({
|
|
978
|
+
type: "analogy",
|
|
979
|
+
concept,
|
|
980
|
+
relationship,
|
|
981
|
+
insight,
|
|
982
|
+
therefore,
|
|
983
|
+
pitfall
|
|
984
|
+
}),
|
|
985
|
+
decode: () => wrapBlock("analogy", [
|
|
986
|
+
list("concepts", concept, "concept"),
|
|
987
|
+
leaf("relationship", relationship),
|
|
988
|
+
insight ? leaf("insight", insight) : "",
|
|
989
|
+
therefore ? leaf("therefore", therefore) : "",
|
|
990
|
+
pitfall ? leaf("pitfall", pitfall) : ""
|
|
991
|
+
])
|
|
992
|
+
};
|
|
993
|
+
}
|
|
994
|
+
function glossary(entries) {
|
|
995
|
+
return {
|
|
996
|
+
type: "glossary",
|
|
997
|
+
encode: () => ({ type: "glossary", entries }),
|
|
998
|
+
decode: () => wrapBlock(
|
|
999
|
+
"glossary",
|
|
1000
|
+
Object.entries(entries).map(
|
|
1001
|
+
([term2, sql]) => wrapBlock("entry", [leaf("term", term2), leaf("sql", sql)])
|
|
1002
|
+
)
|
|
1003
|
+
)
|
|
1004
|
+
};
|
|
1005
|
+
}
|
|
1006
|
+
function identity(input) {
|
|
1007
|
+
const { name, role } = input;
|
|
1008
|
+
return {
|
|
1009
|
+
type: "identity",
|
|
1010
|
+
encode: () => ({ type: "identity", name, role }),
|
|
1011
|
+
decode: () => wrapBlock("identity", [
|
|
1012
|
+
name ? leaf("name", name) : "",
|
|
1013
|
+
role ? leaf("role", role) : ""
|
|
1014
|
+
])
|
|
1015
|
+
};
|
|
1016
|
+
}
|
|
1017
|
+
function persona(input) {
|
|
1018
|
+
const { name, role, tone } = input;
|
|
1019
|
+
return {
|
|
1020
|
+
type: "persona",
|
|
1021
|
+
encode: () => ({ type: "persona", name, role, tone: tone ?? "" }),
|
|
1022
|
+
decode: () => wrapBlock("persona", [
|
|
1023
|
+
leaf("name", name),
|
|
1024
|
+
leaf("role", role),
|
|
1025
|
+
tone ? leaf("tone", tone) : ""
|
|
1026
|
+
])
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
1029
|
+
function alias(termName, meaning) {
|
|
1030
|
+
return {
|
|
1031
|
+
type: "alias",
|
|
1032
|
+
encode: () => ({ type: "alias", term: termName, meaning }),
|
|
1033
|
+
decode: () => wrapBlock("alias", [leaf("term", termName), leaf("meaning", meaning)])
|
|
1034
|
+
};
|
|
1035
|
+
}
|
|
1036
|
+
function preference(aspect, value) {
|
|
1037
|
+
return {
|
|
1038
|
+
type: "preference",
|
|
1039
|
+
encode: () => ({ type: "preference", aspect, value }),
|
|
1040
|
+
decode: () => wrapBlock("preference", [leaf("aspect", aspect), leaf("value", value)])
|
|
1041
|
+
};
|
|
1042
|
+
}
|
|
1043
|
+
function context(description) {
|
|
1044
|
+
return {
|
|
1045
|
+
type: "context",
|
|
1046
|
+
encode: () => ({ type: "context", description }),
|
|
1047
|
+
decode: () => leaf("context", description)
|
|
1048
|
+
};
|
|
1049
|
+
}
|
|
1050
|
+
function correction(subject, clarification2) {
|
|
1051
|
+
return {
|
|
1052
|
+
type: "correction",
|
|
1053
|
+
encode: () => ({ type: "correction", subject, clarification: clarification2 }),
|
|
1054
|
+
decode: () => wrapBlock("correction", [
|
|
1055
|
+
leaf("subject", subject),
|
|
1056
|
+
leaf("clarification", clarification2)
|
|
1057
|
+
])
|
|
1058
|
+
};
|
|
1059
|
+
}
|
|
1060
|
+
function toInstructions(tag, ...teachables) {
|
|
1061
|
+
if (!teachables.length) {
|
|
1062
|
+
return "";
|
|
1063
|
+
}
|
|
1064
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
1065
|
+
for (const teachable of teachables) {
|
|
1066
|
+
const existing = grouped.get(teachable.type) ?? [];
|
|
1067
|
+
existing.push(teachable);
|
|
1068
|
+
grouped.set(teachable.type, existing);
|
|
1069
|
+
}
|
|
1070
|
+
const definedTypes = new Set(SECTION_ORDER.map((s) => s.type));
|
|
1071
|
+
const sections = SECTION_ORDER.map(({ type, tag: tag2 }) => {
|
|
1072
|
+
const items = grouped.get(type);
|
|
1073
|
+
if (!items?.length) {
|
|
1074
|
+
return "";
|
|
1075
|
+
}
|
|
1076
|
+
const renderedItems = items.map((item) => item.decode().trim()).filter(Boolean).map((item) => indentBlock(item, 2)).join("\n");
|
|
1077
|
+
if (!renderedItems.length) {
|
|
1078
|
+
return "";
|
|
1079
|
+
}
|
|
1080
|
+
return `<${tag2}>
|
|
1081
|
+
${renderedItems}
|
|
1082
|
+
</${tag2}>`;
|
|
1083
|
+
}).filter((section) => Boolean(section));
|
|
1084
|
+
for (const [type, items] of grouped) {
|
|
1085
|
+
if (definedTypes.has(type)) {
|
|
1086
|
+
continue;
|
|
1087
|
+
}
|
|
1088
|
+
const renderedItems = items.map((item) => item.decode().trim()).filter(Boolean).map((item) => indentBlock(item, 2)).join("\n");
|
|
1089
|
+
if (renderedItems.length) {
|
|
1090
|
+
sections.push(renderedItems);
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
if (!sections.length) {
|
|
1094
|
+
return "";
|
|
1095
|
+
}
|
|
1096
|
+
const content = indentBlock(sections.join("\n"), 2);
|
|
1097
|
+
return `<${tag}>
|
|
1098
|
+
${content}
|
|
1099
|
+
</${tag}>`;
|
|
1100
|
+
}
|
|
1101
|
+
var SECTION_ORDER = [
|
|
1102
|
+
// User context (render first - most important for personalization)
|
|
1103
|
+
{ type: "identity", tag: "identity" },
|
|
1104
|
+
{ type: "persona", tag: "persona" },
|
|
1105
|
+
{ type: "context", tag: "user_context" },
|
|
1106
|
+
{ type: "preference", tag: "user_preferences" },
|
|
1107
|
+
{ type: "alias", tag: "user_vocabulary" },
|
|
1108
|
+
{ type: "correction", tag: "user_corrections" },
|
|
1109
|
+
// Domain knowledge
|
|
1110
|
+
{ type: "guardrail", tag: "guardrails" },
|
|
1111
|
+
{ type: "styleGuide", tag: "style_guides" },
|
|
1112
|
+
{ type: "hint", tag: "hints" },
|
|
1113
|
+
{ type: "clarification", tag: "clarifications" },
|
|
1114
|
+
{ type: "workflow", tag: "workflows" },
|
|
1115
|
+
{ type: "quirk", tag: "quirks" },
|
|
1116
|
+
{ type: "term", tag: "terminology" },
|
|
1117
|
+
{ type: "explain", tag: "explanations" },
|
|
1118
|
+
{ type: "analogy", tag: "analogies" },
|
|
1119
|
+
{ type: "glossary", tag: "glossary" },
|
|
1120
|
+
{ type: "example", tag: "examples" }
|
|
1121
|
+
];
|
|
1122
|
+
function toTeachables(generated) {
|
|
1123
|
+
return generated.map((item) => {
|
|
1124
|
+
switch (item.type) {
|
|
1125
|
+
case "persona":
|
|
1126
|
+
return persona({ name: item.name, role: item.role, tone: item.tone });
|
|
1127
|
+
case "term":
|
|
1128
|
+
return term(item.name, item.definition);
|
|
1129
|
+
case "hint":
|
|
1130
|
+
return hint(item.text);
|
|
1131
|
+
case "guardrail":
|
|
1132
|
+
return guardrail({
|
|
1133
|
+
rule: item.rule,
|
|
1134
|
+
reason: item.reason,
|
|
1135
|
+
action: item.action
|
|
1136
|
+
});
|
|
1137
|
+
case "explain":
|
|
1138
|
+
return explain({
|
|
1139
|
+
concept: item.concept,
|
|
1140
|
+
explanation: item.explanation,
|
|
1141
|
+
therefore: item.therefore
|
|
1142
|
+
});
|
|
1143
|
+
case "example":
|
|
1144
|
+
return example({
|
|
1145
|
+
question: item.question,
|
|
1146
|
+
answer: item.answer,
|
|
1147
|
+
note: item.note
|
|
1148
|
+
});
|
|
1149
|
+
case "clarification":
|
|
1150
|
+
return clarification({
|
|
1151
|
+
when: item.when,
|
|
1152
|
+
ask: item.ask,
|
|
1153
|
+
reason: item.reason
|
|
1154
|
+
});
|
|
1155
|
+
case "workflow":
|
|
1156
|
+
return workflow({
|
|
1157
|
+
task: item.task,
|
|
1158
|
+
steps: item.steps,
|
|
1159
|
+
triggers: item.triggers,
|
|
1160
|
+
notes: item.notes
|
|
1161
|
+
});
|
|
1162
|
+
case "quirk":
|
|
1163
|
+
return quirk({
|
|
1164
|
+
issue: item.issue,
|
|
1165
|
+
workaround: item.workaround
|
|
1166
|
+
});
|
|
1167
|
+
case "styleGuide":
|
|
1168
|
+
return styleGuide({
|
|
1169
|
+
prefer: item.prefer,
|
|
1170
|
+
never: item.never,
|
|
1171
|
+
always: item.always
|
|
1172
|
+
});
|
|
1173
|
+
case "analogy":
|
|
1174
|
+
return analogy({
|
|
1175
|
+
concept: item.concept,
|
|
1176
|
+
relationship: item.relationship,
|
|
1177
|
+
insight: item.insight,
|
|
1178
|
+
therefore: item.therefore,
|
|
1179
|
+
pitfall: item.pitfall
|
|
1180
|
+
});
|
|
1181
|
+
case "glossary":
|
|
1182
|
+
return glossary(item.entries);
|
|
1183
|
+
// User-specific teachable types
|
|
1184
|
+
case "identity":
|
|
1185
|
+
return identity({ name: item.name, role: item.role });
|
|
1186
|
+
case "alias":
|
|
1187
|
+
return alias(item.term, item.meaning);
|
|
1188
|
+
case "preference":
|
|
1189
|
+
return preference(item.aspect, item.value);
|
|
1190
|
+
case "context":
|
|
1191
|
+
return context(item.description);
|
|
1192
|
+
case "correction":
|
|
1193
|
+
return correction(item.subject, item.clarification);
|
|
1194
|
+
}
|
|
1195
|
+
});
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
// packages/text2sql/src/lib/agents/sql.agent.ts
|
|
1199
|
+
var logger = new Console({
|
|
1200
|
+
stdout: createWriteStream("./sql-agent.log", { flags: "a" }),
|
|
1201
|
+
stderr: createWriteStream("./sql-agent-error.log", { flags: "a" }),
|
|
1202
|
+
inspectOptions: { depth: null }
|
|
1203
|
+
});
|
|
1204
|
+
var RETRY_TEMPERATURES = [0, 0.2, 0.3];
|
|
1205
|
+
var sqlQueryAgent = agent5({
|
|
1206
|
+
name: "text2sql",
|
|
1207
|
+
model: groq5("openai/gpt-oss-20b"),
|
|
1208
|
+
logging: process.env.AGENT_LOGGING === "true",
|
|
1209
|
+
output: z5.union([
|
|
1210
|
+
z5.object({
|
|
1211
|
+
sql: z5.string().describe("The SQL query that answers the question"),
|
|
1212
|
+
reasoning: z5.string().optional().describe("The reasoning steps taken to generate the SQL")
|
|
1213
|
+
}),
|
|
1214
|
+
z5.object({
|
|
1215
|
+
error: z5.string().describe(
|
|
1216
|
+
"Error message explaining why the question cannot be answered with the given schema"
|
|
1217
|
+
)
|
|
1218
|
+
})
|
|
1219
|
+
]),
|
|
1220
|
+
prompt: (state) => {
|
|
1221
|
+
return `
|
|
1222
|
+
${state?.teachings || ""}
|
|
1223
|
+
${state?.introspection || ""}
|
|
1224
|
+
`;
|
|
1225
|
+
}
|
|
1226
|
+
});
|
|
1227
|
+
function extractSql(output) {
|
|
1228
|
+
const match = output.match(/```sql\n?([\s\S]*?)```/);
|
|
1229
|
+
return match ? match[1].trim() : output.trim();
|
|
1230
|
+
}
|
|
1231
|
+
var marker = Symbol("SQLValidationError");
|
|
1232
|
+
var SQLValidationError = class _SQLValidationError extends Error {
|
|
1233
|
+
[marker];
|
|
1234
|
+
constructor(message) {
|
|
1235
|
+
super(message);
|
|
1236
|
+
this.name = "SQLValidationError";
|
|
1237
|
+
this[marker] = true;
|
|
1238
|
+
}
|
|
1239
|
+
static isInstance(error) {
|
|
1240
|
+
return error instanceof _SQLValidationError && error[marker] === true;
|
|
1241
|
+
}
|
|
1242
|
+
};
|
|
1243
|
+
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
1244
|
+
constructor(message) {
|
|
1245
|
+
super(message);
|
|
1246
|
+
this.name = "UnanswerableSQLError";
|
|
1247
|
+
}
|
|
1248
|
+
static isInstance(error) {
|
|
1249
|
+
return error instanceof _UnanswerableSQLError;
|
|
1250
|
+
}
|
|
1251
|
+
};
|
|
1252
|
+
async function toSql(options) {
|
|
1253
|
+
const { maxRetries = 3 } = options;
|
|
1254
|
+
return withRetry(
|
|
1255
|
+
async (attemptNumber, errors, attempts) => {
|
|
1256
|
+
const agentInstance = sqlQueryAgent.clone({
|
|
1257
|
+
model: wrapLanguageModel2({
|
|
1258
|
+
model: options.model ?? sqlQueryAgent.model,
|
|
1259
|
+
middleware: defaultSettingsMiddleware2({
|
|
1260
|
+
settings: {
|
|
1261
|
+
temperature: RETRY_TEMPERATURES[attemptNumber - 1] ?? 0.3,
|
|
1262
|
+
topP: 1
|
|
1263
|
+
}
|
|
1264
|
+
})
|
|
1265
|
+
})
|
|
1266
|
+
});
|
|
1267
|
+
const messages = errors.length ? [
|
|
1268
|
+
user6(options.input),
|
|
1269
|
+
user6(
|
|
1270
|
+
`<validation_error>Your previous SQL query had the following error: ${errors.at(-1)?.message}. Please fix the query.</validation_error>`
|
|
1271
|
+
)
|
|
1272
|
+
] : [user6(options.input)];
|
|
1273
|
+
const output = await toOutput(
|
|
1274
|
+
generate6(agentInstance, messages, {
|
|
1275
|
+
introspection: options.introspection,
|
|
1276
|
+
teachings: toInstructions(
|
|
1277
|
+
"instructions",
|
|
1278
|
+
persona({
|
|
1279
|
+
name: "Freya",
|
|
1280
|
+
role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema."
|
|
1281
|
+
}),
|
|
1282
|
+
...options.instructions
|
|
1283
|
+
)
|
|
1284
|
+
})
|
|
1285
|
+
);
|
|
1286
|
+
if ("error" in output) {
|
|
1287
|
+
throw new UnanswerableSQLError(output.error);
|
|
1288
|
+
}
|
|
1289
|
+
const sql = extractSql(output.sql);
|
|
1290
|
+
const validationError = await options.adapter.validate(sql);
|
|
1291
|
+
if (validationError) {
|
|
1292
|
+
throw new SQLValidationError(validationError);
|
|
1293
|
+
}
|
|
1294
|
+
return {
|
|
1295
|
+
attempts,
|
|
1296
|
+
sql,
|
|
1297
|
+
errors: errors.length ? errors.map(formatErrorMessage) : void 0
|
|
1298
|
+
};
|
|
1299
|
+
},
|
|
1300
|
+
{ retries: maxRetries - 1 }
|
|
1301
|
+
);
|
|
1302
|
+
}
|
|
1303
|
+
function formatErrorMessage(error) {
|
|
1304
|
+
if (APICallError.isInstance(error)) {
|
|
1305
|
+
if (error.message.startsWith("Failed to validate JSON")) {
|
|
1306
|
+
return `Schema validation failed: ${error.message}`;
|
|
1307
|
+
}
|
|
1308
|
+
return error.message;
|
|
1309
|
+
}
|
|
1310
|
+
if (SQLValidationError.isInstance(error)) {
|
|
1311
|
+
return `SQL Validation Error: ${error.message}`;
|
|
1312
|
+
}
|
|
1313
|
+
return error.message;
|
|
1314
|
+
}
|
|
1315
|
+
async function withRetry(computation, options = { retries: 3 }) {
|
|
1316
|
+
const errors = [];
|
|
1317
|
+
let attempts = 0;
|
|
1318
|
+
return pRetry(
|
|
1319
|
+
(attemptNumber) => {
|
|
1320
|
+
return computation(attemptNumber, errors, ++attempts);
|
|
1321
|
+
},
|
|
1322
|
+
{
|
|
1323
|
+
retries: options.retries,
|
|
1324
|
+
shouldRetry: (context2) => {
|
|
1325
|
+
if (UnanswerableSQLError.isInstance(context2.error)) {
|
|
1326
|
+
return false;
|
|
1327
|
+
}
|
|
1328
|
+
if (SQLValidationError.isInstance(context2.error)) {
|
|
1329
|
+
return true;
|
|
1330
|
+
}
|
|
1331
|
+
console.log({
|
|
1332
|
+
NoObjectGeneratedError: NoObjectGeneratedError.isInstance(
|
|
1333
|
+
context2.error
|
|
1334
|
+
),
|
|
1335
|
+
NoOutputGeneratedError: NoOutputGeneratedError.isInstance(
|
|
1336
|
+
context2.error
|
|
1337
|
+
),
|
|
1338
|
+
APICallError: APICallError.isInstance(context2.error),
|
|
1339
|
+
JSONParseError: JSONParseError.isInstance(context2.error),
|
|
1340
|
+
TypeValidationError: TypeValidationError.isInstance(context2.error),
|
|
1341
|
+
NoContentGeneratedError: NoContentGeneratedError.isInstance(
|
|
1342
|
+
context2.error
|
|
1343
|
+
)
|
|
1344
|
+
});
|
|
1345
|
+
return APICallError.isInstance(context2.error) || JSONParseError.isInstance(context2.error) || TypeValidationError.isInstance(context2.error) || NoObjectGeneratedError.isInstance(context2.error) || NoOutputGeneratedError.isInstance(context2.error) || NoContentGeneratedError.isInstance(context2.error);
|
|
1346
|
+
},
|
|
1347
|
+
onFailedAttempt(context2) {
|
|
1348
|
+
logger.error(`toSQL`, context2.error);
|
|
1349
|
+
console.log(
|
|
1350
|
+
`Attempt ${context2.attemptNumber} failed. There are ${context2.retriesLeft} retries left.`
|
|
1351
|
+
);
|
|
1352
|
+
errors.push(context2.error);
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
);
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
|
|
1359
|
+
var SchemaSynthesizer = class extends PairProducer {
|
|
1360
|
+
/**
|
|
1361
|
+
* @param adapter - Database adapter for schema introspection and SQL validation
|
|
1362
|
+
* @param options - Synthesis configuration including count, complexity, and concurrency
|
|
1363
|
+
*/
|
|
1364
|
+
constructor(adapter, options) {
|
|
1365
|
+
super();
|
|
1366
|
+
this.adapter = adapter;
|
|
1367
|
+
this.options = options;
|
|
1368
|
+
this.#complexities = Array.isArray(this.options.complexity) ? this.options.complexity : [this.options.complexity ?? "moderate"];
|
|
1369
|
+
this.#personas = this.options.personas ?? [void 0];
|
|
1370
|
+
this.#limit = pLimit(this.options.concurrency ?? 5);
|
|
1371
|
+
}
|
|
1372
|
+
#complexities = [];
|
|
1373
|
+
#personas = [];
|
|
1374
|
+
#limit;
|
|
1375
|
+
/**
|
|
1376
|
+
* Generates question-SQL pairs by iterating through all persona × complexity combinations.
|
|
1377
|
+
* Uses parallel processing bounded by the configured concurrency limit.
|
|
1378
|
+
* Yields results as each combination completes (streaming pattern).
|
|
1379
|
+
* @returns Generated pairs from all combinations
|
|
1380
|
+
*/
|
|
1381
|
+
async *produce() {
|
|
1382
|
+
const introspection = await this.adapter.introspect();
|
|
1383
|
+
const combinations = this.#personas.flatMap(
|
|
1384
|
+
(persona2) => this.#complexities.map((complexity) => ({ persona: persona2, complexity }))
|
|
1385
|
+
);
|
|
1386
|
+
for (const { persona: persona2, complexity } of combinations) {
|
|
1387
|
+
const pairs = await this.#processCombination(
|
|
1388
|
+
introspection,
|
|
1389
|
+
persona2,
|
|
1390
|
+
complexity
|
|
1391
|
+
);
|
|
1392
|
+
if (pairs.length) {
|
|
1393
|
+
yield pairs;
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
/**
|
|
1398
|
+
* Processes a single persona × complexity combination by generating questions
|
|
1399
|
+
* and converting each to SQL in parallel.
|
|
1400
|
+
*/
|
|
1401
|
+
async #processCombination(introspection, persona2, complexity) {
|
|
1402
|
+
const personaContext = persona2 ? `As ${persona2.role}, ${persona2.perspective}
|
|
1403
|
+
|
|
1404
|
+
Generate questions this persona would ask.` : void 0;
|
|
1405
|
+
const prompt = personaContext ? `${personaContext}
|
|
1406
|
+
|
|
1407
|
+
Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
|
|
1408
|
+
const { questions } = await this.#limit(
|
|
1409
|
+
() => generateQuestions({
|
|
1410
|
+
introspection,
|
|
1411
|
+
complexity,
|
|
1412
|
+
count: this.options.count,
|
|
1413
|
+
prompt,
|
|
1414
|
+
model: this.options.model
|
|
1415
|
+
})
|
|
1416
|
+
);
|
|
1417
|
+
const pairs = await Promise.all(
|
|
1418
|
+
questions.map(async (question) => {
|
|
1419
|
+
const result = await this.#limit(async () => {
|
|
1420
|
+
try {
|
|
1421
|
+
return await toSql({
|
|
1422
|
+
input: question,
|
|
1423
|
+
adapter: this.adapter,
|
|
1424
|
+
introspection,
|
|
1425
|
+
instructions: this.options.teachings ?? [],
|
|
1426
|
+
model: this.options.model
|
|
1427
|
+
});
|
|
1428
|
+
} catch (error) {
|
|
1429
|
+
if (UnanswerableSQLError.isInstance(error)) {
|
|
1430
|
+
return {
|
|
1431
|
+
attempts: 0,
|
|
1432
|
+
sql: "",
|
|
1433
|
+
errors: [
|
|
1434
|
+
`Cannot answer the question ${question} because ${error.message}`
|
|
1435
|
+
]
|
|
1436
|
+
};
|
|
1437
|
+
}
|
|
1438
|
+
throw error;
|
|
1439
|
+
}
|
|
1440
|
+
});
|
|
1441
|
+
return {
|
|
1442
|
+
question,
|
|
1443
|
+
sql: result.sql,
|
|
1444
|
+
success: !result.errors || result.errors.length === 0
|
|
1445
|
+
};
|
|
1446
|
+
})
|
|
1447
|
+
);
|
|
1448
|
+
return pairs;
|
|
1449
|
+
}
|
|
1450
|
+
};
|
|
1451
|
+
|
|
1452
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
1453
|
+
import { groq as groq6 } from "@ai-sdk/groq";
|
|
1454
|
+
import { defaultSettingsMiddleware as defaultSettingsMiddleware3, wrapLanguageModel as wrapLanguageModel3 } from "ai";
|
|
1455
|
+
import dedent5 from "dedent";
|
|
1456
|
+
import pLimit2 from "p-limit";
|
|
1457
|
+
import z6 from "zod";
|
|
1458
|
+
import {
|
|
1459
|
+
agent as agent6,
|
|
1460
|
+
generate as generate7,
|
|
1461
|
+
toOutput as toOutput2,
|
|
1462
|
+
user as user7
|
|
1463
|
+
} from "@deepagents/agent";
|
|
1464
|
+
|
|
1465
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/styles.ts
|
|
1466
|
+
var ALL_STYLES = [
|
|
1467
|
+
"formal",
|
|
1468
|
+
// Professional business language
|
|
1469
|
+
"colloquial",
|
|
1470
|
+
// Casual everyday speech
|
|
1471
|
+
"imperative",
|
|
1472
|
+
// Commands: "Show me...", "Get..."
|
|
1473
|
+
"interrogative",
|
|
1474
|
+
// Questions: "What is...", "How many..."
|
|
1475
|
+
"descriptive",
|
|
1476
|
+
// Verbose, detailed
|
|
1477
|
+
"concise",
|
|
1478
|
+
// Brief, minimal
|
|
1479
|
+
"vague",
|
|
1480
|
+
// Ambiguous, hedging
|
|
1481
|
+
"metaphorical",
|
|
1482
|
+
// Figurative language
|
|
1483
|
+
"conversational"
|
|
1484
|
+
// Chat-like
|
|
1485
|
+
];
|
|
1486
|
+
var styleInstructions = {
|
|
1487
|
+
formal: "Use professional business language, complete sentences, no slang",
|
|
1488
|
+
colloquial: "Use casual everyday speech, contractions, informal tone",
|
|
1489
|
+
imperative: 'Phrase as commands: "Show me...", "Get...", "List..."',
|
|
1490
|
+
interrogative: 'Phrase as questions: "What is...", "How many...", "Which..."',
|
|
1491
|
+
descriptive: "Use detailed, verbose phrasing with extra context",
|
|
1492
|
+
concise: "Use minimal words, telegram-style brevity",
|
|
1493
|
+
vague: "Be intentionally ambiguous, use hedging language",
|
|
1494
|
+
metaphorical: "Use figurative language, analogies, creative phrasing",
|
|
1495
|
+
conversational: "Chat-like tone, as if talking to a colleague"
|
|
1496
|
+
};
|
|
1497
|
+
|
|
1498
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
1499
|
+
var paraphraserAgent = agent6({
|
|
1500
|
+
name: "question_paraphraser",
|
|
1501
|
+
model: wrapLanguageModel3({
|
|
1502
|
+
model: groq6("openai/gpt-oss-20b"),
|
|
1503
|
+
middleware: defaultSettingsMiddleware3({
|
|
1504
|
+
settings: { temperature: 0.9, topP: 0.95, frequencyPenalty: 0.2 }
|
|
1505
|
+
})
|
|
1506
|
+
}),
|
|
1507
|
+
logging: process.env.AGENT_LOGGING === "true",
|
|
1508
|
+
output: z6.object({
|
|
1509
|
+
paraphrases: z6.array(
|
|
1510
|
+
z6.string().describe("A paraphrased version of the original question")
|
|
1511
|
+
).min(1).describe(
|
|
1512
|
+
"List of paraphrased questions that would produce the same SQL"
|
|
1513
|
+
)
|
|
1514
|
+
}),
|
|
1515
|
+
prompt: (state) => {
|
|
1516
|
+
const personaInstruction = state?.persona ? dedent5`
|
|
1517
|
+
<persona role="${state.persona.role}">
|
|
1518
|
+
${state.persona.perspective}
|
|
1519
|
+
|
|
1520
|
+
Paraphrase the question as this persona would naturally ask it.
|
|
1521
|
+
Use their vocabulary, priorities, and framing style.
|
|
1522
|
+
</persona>
|
|
1523
|
+
` : "";
|
|
1524
|
+
const styleInstruction = state?.persona?.styles && state.persona.styles.length > 0 ? dedent5`
|
|
1525
|
+
<communication_styles>
|
|
1526
|
+
Generate paraphrases using these communication styles: ${state.persona.styles.join(", ")}
|
|
1527
|
+
|
|
1528
|
+
Style definitions:
|
|
1529
|
+
${state.persona.styles.map((s) => `- ${s}: ${styleInstructions[s]}`).join("\n")}
|
|
1530
|
+
|
|
1531
|
+
Distribute paraphrases across these styles for variety.
|
|
1532
|
+
</communication_styles>
|
|
1533
|
+
` : "";
|
|
1534
|
+
return dedent5`
|
|
1535
|
+
<identity>
|
|
1536
|
+
You are a linguistic expert specializing in paraphrasing database questions.
|
|
1537
|
+
Your task is to generate alternative phrasings of questions that preserve
|
|
1538
|
+
the exact same semantic meaning - they must all produce the identical SQL query.
|
|
1539
|
+
</identity>
|
|
1540
|
+
|
|
1541
|
+
<original_question>
|
|
1542
|
+
${state?.question}
|
|
1543
|
+
</original_question>
|
|
1544
|
+
|
|
1545
|
+
<reference_sql>
|
|
1546
|
+
${state?.sql}
|
|
1547
|
+
(This SQL shows what the question is really asking - all paraphrases must ask for exactly this)
|
|
1548
|
+
</reference_sql>
|
|
1549
|
+
|
|
1550
|
+
${personaInstruction}
|
|
1551
|
+
|
|
1552
|
+
${styleInstruction}
|
|
1553
|
+
|
|
1554
|
+
<task>
|
|
1555
|
+
Generate exactly ${state?.count} paraphrased versions of the original question.
|
|
1556
|
+
|
|
1557
|
+
Requirements:
|
|
1558
|
+
1. Each paraphrase must be semantically equivalent - it should produce the EXACT same SQL
|
|
1559
|
+
2. Vary the sentence structure, word choice, and phrasing style
|
|
1560
|
+
3. Use natural language without SQL keywords (SELECT, WHERE, JOIN, etc.)
|
|
1561
|
+
4. Keep paraphrases realistic - how actual users would ask
|
|
1562
|
+
5. Do not add or remove any conditions, filters, or requirements from the original
|
|
1563
|
+
${state?.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
|
|
1564
|
+
</task>
|
|
1565
|
+
|
|
1566
|
+
<guardrails>
|
|
1567
|
+
- NEVER change what data is being requested
|
|
1568
|
+
- NEVER add filters, aggregations, or conditions not in the original
|
|
1569
|
+
- NEVER remove any specificity from the original question
|
|
1570
|
+
- All paraphrases must be answerable by the exact same SQL query
|
|
1571
|
+
</guardrails>
|
|
1572
|
+
`;
|
|
1573
|
+
}
|
|
1574
|
+
});
|
|
1575
|
+
var BreadthEvolver = class extends PairProducer {
|
|
1576
|
+
/**
|
|
1577
|
+
* @param source - Source pairs or producer to evolve
|
|
1578
|
+
* @param options - Evolution options including count, persona, and concurrency
|
|
1579
|
+
*/
|
|
1580
|
+
constructor(source, options) {
|
|
1581
|
+
super();
|
|
1582
|
+
this.source = source;
|
|
1583
|
+
this.options = options;
|
|
1584
|
+
this.#limit = pLimit2(this.options.concurrency ?? 4);
|
|
1585
|
+
}
|
|
1586
|
+
#limit;
|
|
1587
|
+
/**
|
|
1588
|
+
* Batch pairs within each chunk for concurrent processing.
|
|
1589
|
+
* Uses pLimit for concurrency control, yields results per pair after chunk completes.
|
|
1590
|
+
*/
|
|
1591
|
+
async *produce() {
|
|
1592
|
+
for await (const chunk of this.from(this.source)) {
|
|
1593
|
+
const tasks = chunk.map(
|
|
1594
|
+
(pair) => this.#limit(async () => {
|
|
1595
|
+
const { paraphrases } = await toOutput2(
|
|
1596
|
+
generate7(
|
|
1597
|
+
paraphraserAgent.clone({ model: this.options.model }),
|
|
1598
|
+
[
|
|
1599
|
+
user7(
|
|
1600
|
+
`Paraphrase this question ${this.options.count} times: "${pair.question}"`
|
|
1601
|
+
)
|
|
1602
|
+
],
|
|
1603
|
+
{
|
|
1604
|
+
question: pair.question,
|
|
1605
|
+
sql: pair.sql,
|
|
1606
|
+
count: this.options.count,
|
|
1607
|
+
persona: this.options.persona
|
|
1608
|
+
}
|
|
1609
|
+
)
|
|
1610
|
+
);
|
|
1611
|
+
return paraphrases.map((paraphrase) => ({
|
|
1612
|
+
question: paraphrase,
|
|
1613
|
+
sql: pair.sql,
|
|
1614
|
+
context: pair.context,
|
|
1615
|
+
success: pair.success
|
|
1616
|
+
}));
|
|
1617
|
+
})
|
|
1618
|
+
);
|
|
1619
|
+
const results = await Promise.all(tasks);
|
|
1620
|
+
yield results.flat();
|
|
1621
|
+
}
|
|
1622
|
+
}
|
|
1623
|
+
};
|
|
1624
|
+
|
|
1625
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
|
|
1626
|
+
import { groq as groq7 } from "@ai-sdk/groq";
|
|
1627
|
+
import {
|
|
1628
|
+
NoObjectGeneratedError as NoObjectGeneratedError2,
|
|
1629
|
+
NoOutputGeneratedError as NoOutputGeneratedError2,
|
|
1630
|
+
defaultSettingsMiddleware as defaultSettingsMiddleware4,
|
|
1631
|
+
wrapLanguageModel as wrapLanguageModel4
|
|
1632
|
+
} from "ai";
|
|
1633
|
+
import dedent6 from "dedent";
|
|
1634
|
+
import pLimit3 from "p-limit";
|
|
1635
|
+
import pRetry2 from "p-retry";
|
|
1636
|
+
import z7 from "zod";
|
|
1637
|
+
import { agent as agent7, generate as generate8, user as user8 } from "@deepagents/agent";
|
|
1638
|
+
var techniqueInstructions = {
|
|
1639
|
+
"add-aggregation": dedent6`
|
|
1640
|
+
Add aggregation requirements to the question.
|
|
1641
|
+
Transform it to require GROUP BY, COUNT, SUM, AVG, MIN, MAX, or similar operations.
|
|
1642
|
+
Examples:
|
|
1643
|
+
- "Show orders" → "Show total order count by customer"
|
|
1644
|
+
- "List products" → "What is the average price per category?"
|
|
1645
|
+
- "Get employees" → "How many employees are in each department?"
|
|
1646
|
+
`,
|
|
1647
|
+
"add-filter": dedent6`
|
|
1648
|
+
Add filtering conditions to the question.
|
|
1649
|
+
Transform it to require WHERE clauses with specific conditions.
|
|
1650
|
+
Examples:
|
|
1651
|
+
- "Show orders" → "Show orders from the last 30 days"
|
|
1652
|
+
- "List customers" → "List customers who have made more than 5 purchases"
|
|
1653
|
+
- "Get products" → "Get products with price above $100"
|
|
1654
|
+
`,
|
|
1655
|
+
"add-join": dedent6`
|
|
1656
|
+
Add requirements that need data from related tables.
|
|
1657
|
+
Transform it to require JOIN operations between multiple tables.
|
|
1658
|
+
Examples:
|
|
1659
|
+
- "Show orders" → "Show orders with customer names and addresses"
|
|
1660
|
+
- "List products" → "List products with their supplier information"
|
|
1661
|
+
- "Get employees" → "Get employees with their department and manager names"
|
|
1662
|
+
`,
|
|
1663
|
+
"add-reasoning": dedent6`
|
|
1664
|
+
Add multi-step reasoning requirements.
|
|
1665
|
+
Transform it to require logical deduction, comparisons, or derived calculations.
|
|
1666
|
+
Examples:
|
|
1667
|
+
- "Show orders" → "Which customers have orders above the average order value?"
|
|
1668
|
+
- "List products" → "Which products are underperforming compared to their category average?"
|
|
1669
|
+
- "Get revenue" → "Which month had the highest growth compared to the previous month?"
|
|
1670
|
+
`,
|
|
1671
|
+
hypothetical: dedent6`
|
|
1672
|
+
Add a hypothetical or speculative scenario.
|
|
1673
|
+
Transform it to require applying calculations or projections.
|
|
1674
|
+
Examples:
|
|
1675
|
+
- "Show revenue" → "What would revenue be if we increased all prices by 15%?"
|
|
1676
|
+
- "List inventory" → "How many days of stock remain at current sales rate?"
|
|
1677
|
+
- "Get costs" → "What would be the impact of a 10% discount on profit margins?"
|
|
1678
|
+
`
|
|
1679
|
+
};
|
|
1680
|
+
var questionEvolverAgent = agent7({
|
|
1681
|
+
name: "question_evolver",
|
|
1682
|
+
model: wrapLanguageModel4({
|
|
1683
|
+
model: groq7("openai/gpt-oss-20b"),
|
|
1684
|
+
middleware: defaultSettingsMiddleware4({
|
|
1685
|
+
settings: { temperature: 0.7, topP: 0.95 }
|
|
1686
|
+
})
|
|
1687
|
+
}),
|
|
1688
|
+
output: z7.object({
|
|
1689
|
+
evolvedQuestion: z7.string().describe("The evolved, more complex version of the original question")
|
|
1690
|
+
}),
|
|
1691
|
+
prompt: (state) => {
|
|
1692
|
+
return dedent6`
|
|
1693
|
+
<identity>
|
|
1694
|
+
You are an expert at evolving simple database questions into more complex ones.
|
|
1695
|
+
Your task is to take a basic question and transform it into a more sophisticated
|
|
1696
|
+
version that requires advanced SQL techniques to answer.
|
|
1697
|
+
</identity>
|
|
1698
|
+
|
|
1699
|
+
<original_question>
|
|
1700
|
+
${state?.question}
|
|
1701
|
+
</original_question>
|
|
1702
|
+
|
|
1703
|
+
<original_sql>
|
|
1704
|
+
${state?.sql}
|
|
1705
|
+
(This shows what the original question required)
|
|
1706
|
+
</original_sql>
|
|
1707
|
+
|
|
1708
|
+
<database_schema>
|
|
1709
|
+
${state?.schema}
|
|
1710
|
+
</database_schema>
|
|
1711
|
+
|
|
1712
|
+
<technique name="${state?.technique}">
|
|
1713
|
+
${state?.techniqueInstruction}
|
|
1714
|
+
</technique>
|
|
1715
|
+
|
|
1716
|
+
<task>
|
|
1717
|
+
Evolve the original question using the "${state?.technique}" technique.
|
|
1718
|
+
|
|
1719
|
+
Requirements:
|
|
1720
|
+
1. The evolved question must be MORE COMPLEX than the original
|
|
1721
|
+
2. Apply the specific technique described above
|
|
1722
|
+
3. The evolved question must be answerable using the provided schema
|
|
1723
|
+
4. Use natural language - no SQL keywords
|
|
1724
|
+
5. Keep the question realistic and practical
|
|
1725
|
+
6. The evolved question should build upon the original topic/domain
|
|
1726
|
+
</task>
|
|
1727
|
+
|
|
1728
|
+
<guardrails>
|
|
1729
|
+
- The evolved question MUST require more complex SQL than the original
|
|
1730
|
+
- Do not ask for data that doesn't exist in the schema
|
|
1731
|
+
- Keep the question grounded in the same domain as the original
|
|
1732
|
+
- Make sure the question is clear and unambiguous
|
|
1733
|
+
</guardrails>
|
|
1734
|
+
`;
|
|
1735
|
+
}
|
|
1736
|
+
});
|
|
1737
|
+
var ALL_TECHNIQUES = [
|
|
1738
|
+
"add-aggregation",
|
|
1739
|
+
"add-filter",
|
|
1740
|
+
"add-join",
|
|
1741
|
+
"add-reasoning",
|
|
1742
|
+
"hypothetical"
|
|
1743
|
+
];
|
|
1744
|
+
var DepthEvolver = class extends PairProducer {
|
|
1745
|
+
/**
|
|
1746
|
+
* @param source - Source pairs or producer to evolve
|
|
1747
|
+
* @param adapter - Database adapter for SQL generation
|
|
1748
|
+
* @param options - Evolution options including techniques, count, and concurrency
|
|
1749
|
+
*/
|
|
1750
|
+
constructor(source, adapter, options) {
|
|
1751
|
+
super();
|
|
1752
|
+
this.source = source;
|
|
1753
|
+
this.adapter = adapter;
|
|
1754
|
+
this.options = options;
|
|
1755
|
+
this.#limit = pLimit3(this.options?.concurrency ?? 4);
|
|
1756
|
+
}
|
|
1757
|
+
#limit;
|
|
1758
|
+
/**
|
|
1759
|
+
* Yields evolved pairs as each completes (streaming pattern).
|
|
1760
|
+
* Removes batch barrier - no longer waits for all evolutions before yielding.
|
|
1761
|
+
*/
|
|
1762
|
+
async *produce() {
|
|
1763
|
+
const introspection = await this.adapter.introspect();
|
|
1764
|
+
const count = this.options?.count ?? 1;
|
|
1765
|
+
const techniques = this.options?.techniques ?? ALL_TECHNIQUES;
|
|
1766
|
+
let pairIndex = 0;
|
|
1767
|
+
for await (const chunk of this.from(this.source)) {
|
|
1768
|
+
for (const pair of chunk) {
|
|
1769
|
+
const tasks = Array.from({ length: count }, (_, i) => {
|
|
1770
|
+
const technique = this.options?.techniques ? techniques[i % techniques.length] : techniques[(pairIndex * count + i) % techniques.length];
|
|
1771
|
+
return this.#limit(
|
|
1772
|
+
() => this.#processTask(pair, technique, introspection)
|
|
1773
|
+
);
|
|
1774
|
+
});
|
|
1775
|
+
const results = await Promise.all(tasks);
|
|
1776
|
+
yield results;
|
|
1777
|
+
pairIndex++;
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
async #processTask(pair, technique, introspection) {
|
|
1782
|
+
const { experimental_output } = await withRetry2(
|
|
1783
|
+
() => generate8(
|
|
1784
|
+
questionEvolverAgent.clone({
|
|
1785
|
+
model: this.options?.model
|
|
1786
|
+
}),
|
|
1787
|
+
[user8(`Evolve this question using "${technique}": "${pair.question}"`)],
|
|
1788
|
+
{
|
|
1789
|
+
question: pair.question,
|
|
1790
|
+
sql: pair.sql,
|
|
1791
|
+
schema: introspection,
|
|
1792
|
+
technique,
|
|
1793
|
+
techniqueInstruction: techniqueInstructions[technique]
|
|
1794
|
+
}
|
|
1795
|
+
)
|
|
1796
|
+
);
|
|
1797
|
+
const evolvedQuestion = experimental_output.evolvedQuestion;
|
|
1798
|
+
try {
|
|
1799
|
+
const sqlResult = await toSql({
|
|
1800
|
+
input: evolvedQuestion,
|
|
1801
|
+
adapter: this.adapter,
|
|
1802
|
+
introspection,
|
|
1803
|
+
instructions: [],
|
|
1804
|
+
model: this.options?.model
|
|
1805
|
+
});
|
|
1806
|
+
return {
|
|
1807
|
+
question: evolvedQuestion,
|
|
1808
|
+
sql: sqlResult.sql,
|
|
1809
|
+
context: pair.context,
|
|
1810
|
+
success: !sqlResult.errors || sqlResult.errors.length === 0
|
|
1811
|
+
};
|
|
1812
|
+
} catch (error) {
|
|
1813
|
+
if (UnanswerableSQLError.isInstance(error)) {
|
|
1814
|
+
return {
|
|
1815
|
+
question: evolvedQuestion,
|
|
1816
|
+
sql: "",
|
|
1817
|
+
context: pair.context,
|
|
1818
|
+
success: false,
|
|
1819
|
+
errors: [
|
|
1820
|
+
`Cannot answer the question ${evolvedQuestion} because ${error.message}`
|
|
1821
|
+
]
|
|
1822
|
+
};
|
|
1823
|
+
}
|
|
1824
|
+
throw error;
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
};
|
|
1828
|
+
async function withRetry2(computation) {
|
|
1829
|
+
return pRetry2(computation, {
|
|
1830
|
+
retries: 3,
|
|
1831
|
+
shouldRetry: (context2) => {
|
|
1832
|
+
console.log({
|
|
1833
|
+
NoObjectGeneratedError: NoObjectGeneratedError2.isInstance(
|
|
1834
|
+
context2.error
|
|
1835
|
+
),
|
|
1836
|
+
NoOutputGeneratedError: NoOutputGeneratedError2.isInstance(
|
|
1837
|
+
context2.error
|
|
1838
|
+
)
|
|
1839
|
+
});
|
|
1840
|
+
return NoObjectGeneratedError2.isInstance(context2.error) || NoOutputGeneratedError2.isInstance(context2.error);
|
|
1841
|
+
},
|
|
1842
|
+
onFailedAttempt(context2) {
|
|
1843
|
+
console.log(
|
|
1844
|
+
`Attempt ${context2.attemptNumber} failed. There are ${context2.retriesLeft} retries left.`
|
|
1845
|
+
);
|
|
1846
|
+
console.dir(context2.error, { depth: null });
|
|
1847
|
+
}
|
|
1848
|
+
});
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
|
|
1852
|
+
import { groq as groq8 } from "@ai-sdk/groq";
|
|
1853
|
+
import { defaultSettingsMiddleware as defaultSettingsMiddleware5, wrapLanguageModel as wrapLanguageModel5 } from "ai";
|
|
1854
|
+
import dedent7 from "dedent";
|
|
1855
|
+
import z8 from "zod";
|
|
1856
|
+
import { agent as agent8, generate as generate9, user as user9 } from "@deepagents/agent";
|
|
1857
|
+
var personaGeneratorAgent = agent8({
|
|
1858
|
+
name: "persona_generator",
|
|
1859
|
+
model: wrapLanguageModel5({
|
|
1860
|
+
model: groq8("openai/gpt-oss-20b"),
|
|
1861
|
+
middleware: defaultSettingsMiddleware5({
|
|
1862
|
+
settings: { temperature: 0.8, topP: 0.95, presencePenalty: 0.2 }
|
|
1863
|
+
})
|
|
1864
|
+
}),
|
|
1865
|
+
logging: process.env.AGENT_LOGGING === "true",
|
|
1866
|
+
output: z8.object({
|
|
1867
|
+
personas: z8.array(
|
|
1868
|
+
z8.object({
|
|
1869
|
+
role: z8.string().describe("The job title or role of this persona"),
|
|
1870
|
+
perspective: z8.string().describe(
|
|
1871
|
+
"Rich description of what this persona cares about when querying the database"
|
|
1872
|
+
),
|
|
1873
|
+
styles: z8.array(z8.enum(ALL_STYLES)).min(1).max(3).describe(
|
|
1874
|
+
"Typical communication styles for this persona (1-3 styles)"
|
|
1875
|
+
)
|
|
1876
|
+
})
|
|
1877
|
+
).min(1).describe("List of personas who would query this database")
|
|
1878
|
+
}),
|
|
1879
|
+
prompt: (state) => {
|
|
1880
|
+
return dedent7`
|
|
1881
|
+
<identity>
|
|
1882
|
+
You are an expert at understanding database schemas and inferring who would use them.
|
|
1883
|
+
Your task is to analyze a database schema and generate realistic personas representing
|
|
1884
|
+
the different types of users who would query this database.
|
|
1885
|
+
</identity>
|
|
1886
|
+
|
|
1887
|
+
<database_schema>
|
|
1888
|
+
${state?.schema}
|
|
1889
|
+
</database_schema>
|
|
1890
|
+
|
|
1891
|
+
<task>
|
|
1892
|
+
Generate exactly ${state?.count} distinct personas who would query this database.
|
|
1893
|
+
|
|
1894
|
+
For each persona, provide:
|
|
1895
|
+
1. **role**: Their job title or role (e.g., "Financial Analyst", "Customer Support Rep")
|
|
1896
|
+
2. **perspective**: A rich description of what they care about, including:
|
|
1897
|
+
- What questions they typically ask
|
|
1898
|
+
- What metrics/data points matter to them
|
|
1899
|
+
- How they prefer data formatted or presented
|
|
1900
|
+
- Their priorities (speed vs accuracy, detail vs summary)
|
|
1901
|
+
- Domain-specific concerns relevant to their role
|
|
1902
|
+
3. **styles**: 1-3 communication styles typical for this persona. Choose from:
|
|
1903
|
+
- formal: Professional business language, complete sentences
|
|
1904
|
+
- colloquial: Casual everyday speech, contractions
|
|
1905
|
+
- imperative: Commands like "Show me...", "Get...", "List..."
|
|
1906
|
+
- interrogative: Questions like "What is...", "How many..."
|
|
1907
|
+
- descriptive: Verbose, detailed phrasing
|
|
1908
|
+
- concise: Brief, minimal words
|
|
1909
|
+
- vague: Ambiguous, hedging language
|
|
1910
|
+
- metaphorical: Figurative language, analogies
|
|
1911
|
+
- conversational: Chat-like, casual tone
|
|
1912
|
+
|
|
1913
|
+
Requirements:
|
|
1914
|
+
- Personas should be realistic for the given schema
|
|
1915
|
+
- Each persona should have distinct concerns and priorities
|
|
1916
|
+
- Perspectives should be detailed enough to guide question paraphrasing
|
|
1917
|
+
- Cover different levels of technical expertise (some technical, some business-focused)
|
|
1918
|
+
- Styles should match how this persona would naturally communicate
|
|
1919
|
+
</task>
|
|
1920
|
+
|
|
1921
|
+
<example>
|
|
1922
|
+
For an e-commerce schema with orders, customers, products tables:
|
|
1923
|
+
|
|
1924
|
+
{
|
|
1925
|
+
"role": "Customer Support Rep",
|
|
1926
|
+
"perspective": "As customer support, I care about:\\n- Quick lookups by order ID or customer email\\n- Order status and shipping tracking\\n- Return and refund history\\n- Customer contact details and order history\\n- I need fast answers, not complex analysis",
|
|
1927
|
+
"styles": ["imperative", "concise"]
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
{
|
|
1931
|
+
"role": "Inventory Manager",
|
|
1932
|
+
"perspective": "As inventory manager, I care about:\\n- Current stock levels and reorder points\\n- Product availability across warehouses\\n- Slow-moving inventory identification\\n- Supplier lead times and pending orders\\n- I need accurate counts, often aggregated by location",
|
|
1933
|
+
"styles": ["formal", "interrogative"]
|
|
1934
|
+
}
|
|
1935
|
+
</example>
|
|
1936
|
+
|
|
1937
|
+
<guardrails>
|
|
1938
|
+
- Only generate personas relevant to the actual schema provided
|
|
1939
|
+
- Do not invent tables or data that don't exist in the schema
|
|
1940
|
+
- Ensure perspectives are specific to the domain, not generic
|
|
1941
|
+
</guardrails>
|
|
1942
|
+
`;
|
|
1943
|
+
}
|
|
1944
|
+
});
|
|
1945
|
+
var PersonaGenerator = class {
|
|
1946
|
+
/**
|
|
1947
|
+
* @param adapter - Database adapter for schema introspection
|
|
1948
|
+
* @param options - Generation options including count and model
|
|
1949
|
+
*/
|
|
1950
|
+
constructor(adapter, options) {
|
|
1951
|
+
this.adapter = adapter;
|
|
1952
|
+
this.options = options;
|
|
1953
|
+
}
|
|
1954
|
+
/**
|
|
1955
|
+
* Generates personas by analyzing the database schema to infer user types.
|
|
1956
|
+
* @returns Array of personas with roles and perspectives
|
|
1957
|
+
*/
|
|
1958
|
+
async generate() {
|
|
1959
|
+
const schema = await this.adapter.introspect();
|
|
1960
|
+
const count = this.options?.count ?? 5;
|
|
1961
|
+
const { experimental_output } = await generate9(
|
|
1962
|
+
personaGeneratorAgent.clone({
|
|
1963
|
+
model: this.options?.model
|
|
1964
|
+
}),
|
|
1965
|
+
[user9(`Generate ${count} personas for this database schema.`)],
|
|
1966
|
+
{
|
|
1967
|
+
schema,
|
|
1968
|
+
count
|
|
1969
|
+
}
|
|
1970
|
+
);
|
|
1971
|
+
return experimental_output.personas;
|
|
1972
|
+
}
|
|
1973
|
+
};
|
|
1974
|
+
|
|
1975
|
+
// packages/text2sql/src/lib/agents/teachables.agent.ts
|
|
1976
|
+
import { groq as groq9 } from "@ai-sdk/groq";
|
|
1977
|
+
import { defaultSettingsMiddleware as defaultSettingsMiddleware6, wrapLanguageModel as wrapLanguageModel6 } from "ai";
|
|
1978
|
+
import dedent8 from "dedent";
|
|
1979
|
+
import z9 from "zod";
|
|
1980
|
+
import { agent as agent9, generate as generate10, user as user10 } from "@deepagents/agent";
|
|
1981
|
+
var outputSchema = z9.object({
|
|
1982
|
+
terms: z9.array(z9.object({ name: z9.string(), definition: z9.string() })).optional().describe("Domain terminology definitions"),
|
|
1983
|
+
hints: z9.array(z9.object({ text: z9.string() })).optional().describe("Helpful hints for SQL generation"),
|
|
1984
|
+
guardrails: z9.array(
|
|
1985
|
+
z9.object({
|
|
1986
|
+
rule: z9.string(),
|
|
1987
|
+
reason: z9.string().optional(),
|
|
1988
|
+
action: z9.string().optional()
|
|
1989
|
+
})
|
|
1990
|
+
).optional().describe("Safety rules and constraints"),
|
|
1991
|
+
explains: z9.array(
|
|
1992
|
+
z9.object({
|
|
1993
|
+
concept: z9.string(),
|
|
1994
|
+
explanation: z9.string(),
|
|
1995
|
+
therefore: z9.string().optional()
|
|
1996
|
+
})
|
|
1997
|
+
).optional().describe("Concept explanations"),
|
|
1998
|
+
examples: z9.array(
|
|
1999
|
+
z9.object({
|
|
2000
|
+
question: z9.string(),
|
|
2001
|
+
answer: z9.string(),
|
|
2002
|
+
note: z9.string().optional()
|
|
2003
|
+
})
|
|
2004
|
+
).optional().describe("Example question-answer pairs"),
|
|
2005
|
+
clarifications: z9.array(z9.object({ when: z9.string(), ask: z9.string(), reason: z9.string() })).optional().describe("When to ask for clarification"),
|
|
2006
|
+
workflows: z9.array(
|
|
2007
|
+
z9.object({
|
|
2008
|
+
task: z9.string(),
|
|
2009
|
+
steps: z9.array(z9.string()).min(1),
|
|
2010
|
+
triggers: z9.array(z9.string()).optional(),
|
|
2011
|
+
notes: z9.string().optional()
|
|
2012
|
+
})
|
|
2013
|
+
).optional().describe("Multi-step workflows"),
|
|
2014
|
+
quirks: z9.array(z9.object({ issue: z9.string(), workaround: z9.string() })).optional().describe("Known issues and workarounds"),
|
|
2015
|
+
styleGuides: z9.array(
|
|
2016
|
+
z9.object({
|
|
2017
|
+
prefer: z9.string(),
|
|
2018
|
+
never: z9.string().optional(),
|
|
2019
|
+
always: z9.string().optional()
|
|
2020
|
+
})
|
|
2021
|
+
).optional().describe("SQL style preferences"),
|
|
2022
|
+
analogies: z9.array(
|
|
2023
|
+
z9.object({
|
|
2024
|
+
concept: z9.array(z9.string()).min(2),
|
|
2025
|
+
relationship: z9.string(),
|
|
2026
|
+
insight: z9.string().optional(),
|
|
2027
|
+
therefore: z9.string().optional(),
|
|
2028
|
+
pitfall: z9.string().optional()
|
|
2029
|
+
})
|
|
2030
|
+
).optional().describe("Concept analogies")
|
|
2031
|
+
});
|
|
2032
|
+
var teachablesAuthorAgent = agent9({
|
|
2033
|
+
name: "teachables-author",
|
|
2034
|
+
model: wrapLanguageModel6({
|
|
2035
|
+
model: groq9("openai/gpt-oss-20b"),
|
|
2036
|
+
middleware: defaultSettingsMiddleware6({
|
|
2037
|
+
settings: { temperature: 0.4, topP: 0.95 }
|
|
2038
|
+
})
|
|
2039
|
+
}),
|
|
2040
|
+
output: outputSchema,
|
|
2041
|
+
prompt: (state) => dedent8`
|
|
2042
|
+
<identity>
|
|
2043
|
+
You design "teachables" for a Text2SQL system. Teachables become structured XML instructions.
|
|
2044
|
+
Choose only high-impact items that improve accuracy, safety, or clarity for this database.
|
|
2045
|
+
</identity>
|
|
2046
|
+
|
|
2047
|
+
<database_schema>
|
|
2048
|
+
${state?.schema}
|
|
2049
|
+
</database_schema>
|
|
2050
|
+
|
|
2051
|
+
${state?.context ? `<additional_context>${state.context}</additional_context>` : ""}
|
|
2052
|
+
|
|
2053
|
+
<output_structure>
|
|
2054
|
+
Output a JSON object with these optional arrays (include only relevant ones):
|
|
2055
|
+
- terms: [{ name: string, definition: string }] - Domain terminology
|
|
2056
|
+
- hints: [{ text: string }] - Helpful SQL generation hints
|
|
2057
|
+
- guardrails: [{ rule: string, reason?: string, action?: string }] - Safety constraints
|
|
2058
|
+
- explains: [{ concept: string, explanation: string, therefore?: string }] - Concept explanations
|
|
2059
|
+
- examples: [{ question: string, answer: string, note?: string }] - Q&A examples
|
|
2060
|
+
- clarifications: [{ when: string, ask: string, reason: string }] - Clarification triggers
|
|
2061
|
+
- workflows: [{ task: string, steps: string[], triggers?: string[], notes?: string }] - Multi-step tasks
|
|
2062
|
+
- quirks: [{ issue: string, workaround: string }] - Known issues
|
|
2063
|
+
- styleGuides: [{ prefer: string, never?: string, always?: string }] - SQL style rules
|
|
2064
|
+
- analogies: [{ concept: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
|
|
2065
|
+
</output_structure>
|
|
2066
|
+
|
|
2067
|
+
<instructions>
|
|
2068
|
+
1. Analyze the schema to infer domain, relationships, and sensitive columns.
|
|
2069
|
+
2. Generate 3-10 teachables total across all categories, prioritizing:
|
|
2070
|
+
- guardrails for PII columns (email, ssn, phone, etc)
|
|
2071
|
+
- hints for status/enum columns
|
|
2072
|
+
- clarifications for ambiguous terms
|
|
2073
|
+
3. Ground everything in the schema - do not invent tables/columns.
|
|
2074
|
+
4. Only include categories that are relevant to this schema.
|
|
2075
|
+
</instructions>
|
|
2076
|
+
`
|
|
2077
|
+
});
|
|
2078
|
+
async function toTeachings(input, options) {
|
|
2079
|
+
const { experimental_output: result } = await generate10(
|
|
2080
|
+
teachablesAuthorAgent.clone({ model: options?.model }),
|
|
2081
|
+
[
|
|
2082
|
+
user10(
|
|
2083
|
+
`Analyze this database schema and generate teachings that will help an AI generate accurate SQL queries.`
|
|
2084
|
+
)
|
|
2085
|
+
],
|
|
2086
|
+
input
|
|
2087
|
+
);
|
|
2088
|
+
const generated = [
|
|
2089
|
+
...result.terms?.map((t) => ({ type: "term", ...t })) ?? [],
|
|
2090
|
+
...result.hints?.map((h) => ({ type: "hint", ...h })) ?? [],
|
|
2091
|
+
...result.guardrails?.map((g) => ({ type: "guardrail", ...g })) ?? [],
|
|
2092
|
+
...result.explains?.map((e) => ({ type: "explain", ...e })) ?? [],
|
|
2093
|
+
...result.examples?.map((e) => ({ type: "example", ...e })) ?? [],
|
|
2094
|
+
...result.clarifications?.map((c) => ({
|
|
2095
|
+
type: "clarification",
|
|
2096
|
+
...c
|
|
2097
|
+
})) ?? [],
|
|
2098
|
+
...result.workflows?.map((w) => ({ type: "workflow", ...w })) ?? [],
|
|
2099
|
+
...result.quirks?.map((q) => ({ type: "quirk", ...q })) ?? [],
|
|
2100
|
+
...result.styleGuides?.map((s) => ({
|
|
2101
|
+
type: "styleGuide",
|
|
2102
|
+
...s
|
|
2103
|
+
})) ?? [],
|
|
2104
|
+
...result.analogies?.map((a) => ({ type: "analogy", ...a })) ?? []
|
|
2105
|
+
];
|
|
2106
|
+
return toTeachables(generated);
|
|
2107
|
+
}
|
|
2108
|
+
|
|
2109
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/teachings-generator.ts
|
|
2110
|
+
var TeachingsGenerator = class {
|
|
2111
|
+
/**
|
|
2112
|
+
* @param adapter - Database adapter for schema introspection
|
|
2113
|
+
* @param options - Generation options including context and model
|
|
2114
|
+
*/
|
|
2115
|
+
constructor(adapter, options) {
|
|
2116
|
+
this.adapter = adapter;
|
|
2117
|
+
this.options = options;
|
|
2118
|
+
}
|
|
2119
|
+
/**
|
|
2120
|
+
* Generates domain-specific teachings by analyzing the database schema.
|
|
2121
|
+
* Retries on transient generation errors up to maxRetries attempts.
|
|
2122
|
+
* @param maxRetries - Maximum retry attempts for transient failures
|
|
2123
|
+
* @returns Array of teachings including vocabulary, patterns, and guardrails
|
|
2124
|
+
*/
|
|
2125
|
+
async generate(maxRetries = 3) {
|
|
2126
|
+
const schema = await this.adapter.introspect();
|
|
2127
|
+
let lastError;
|
|
2128
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
2129
|
+
try {
|
|
2130
|
+
return await toTeachings(
|
|
2131
|
+
{
|
|
2132
|
+
schema,
|
|
2133
|
+
context: this.options?.context
|
|
2134
|
+
},
|
|
2135
|
+
{ model: this.options?.model }
|
|
2136
|
+
);
|
|
2137
|
+
} catch (error) {
|
|
2138
|
+
lastError = error;
|
|
2139
|
+
const isRetryable = lastError.message.includes("parse") || lastError.message.includes("schema") || lastError.message.includes("No object generated") || lastError.name.includes("AI_");
|
|
2140
|
+
if (!isRetryable) {
|
|
2141
|
+
throw lastError;
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
2145
|
+
throw lastError;
|
|
2146
|
+
}
|
|
2147
|
+
};
|
|
2148
|
+
export {
|
|
2149
|
+
ALL_STYLES,
|
|
2150
|
+
BaseContextualExtractor,
|
|
2151
|
+
BreadthEvolver,
|
|
2152
|
+
DeduplicatedProducer,
|
|
2153
|
+
DepthEvolver,
|
|
2154
|
+
FilteredProducer,
|
|
2155
|
+
FullContextExtractor,
|
|
2156
|
+
LastQueryExtractor,
|
|
2157
|
+
MessageExtractor,
|
|
2158
|
+
PairProducer,
|
|
2159
|
+
PersonaGenerator,
|
|
2160
|
+
SchemaSynthesizer,
|
|
2161
|
+
SegmentedContextExtractor,
|
|
2162
|
+
SqlExtractor,
|
|
2163
|
+
TeachingsGenerator,
|
|
2164
|
+
ValidatedProducer,
|
|
2165
|
+
WindowedContextExtractor,
|
|
2166
|
+
contextResolverAgent,
|
|
2167
|
+
formatConversation,
|
|
2168
|
+
getMessageText,
|
|
2169
|
+
styleInstructions,
|
|
2170
|
+
toPairs
|
|
2171
|
+
};
|
|
2172
|
+
//# sourceMappingURL=index.js.map
|