@deepagents/text2sql 0.25.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.ts +0 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +212 -285
- package/dist/index.js.map +4 -4
- package/dist/lib/adapters/adapter.d.ts +6 -0
- package/dist/lib/adapters/adapter.d.ts.map +1 -1
- package/dist/lib/adapters/bigquery/index.js +18 -1
- package/dist/lib/adapters/bigquery/index.js.map +2 -2
- package/dist/lib/adapters/groundings/abstract.grounding.d.ts +2 -1
- package/dist/lib/adapters/groundings/abstract.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/index.js.map +2 -2
- package/dist/lib/adapters/mysql/index.js +18 -1
- package/dist/lib/adapters/mysql/index.js.map +2 -2
- package/dist/lib/adapters/postgres/index.js +18 -1
- package/dist/lib/adapters/postgres/index.js.map +2 -2
- package/dist/lib/adapters/runtime-scope.d.ts +14 -0
- package/dist/lib/adapters/runtime-scope.d.ts.map +1 -0
- package/dist/lib/adapters/spreadsheet/index.js +18 -1
- package/dist/lib/adapters/spreadsheet/index.js.map +2 -2
- package/dist/lib/adapters/sqlite/index.js +18 -1
- package/dist/lib/adapters/sqlite/index.js.map +2 -2
- package/dist/lib/adapters/sqlserver/index.js +18 -1
- package/dist/lib/adapters/sqlserver/index.js.map +2 -2
- package/dist/lib/agents/exceptions.d.ts +22 -0
- package/dist/lib/agents/exceptions.d.ts.map +1 -1
- package/dist/lib/agents/result-tools.d.ts.map +1 -1
- package/dist/lib/fragments/schema.d.ts +2 -1
- package/dist/lib/fragments/schema.d.ts.map +1 -1
- package/dist/lib/instructions.d.ts +1 -9
- package/dist/lib/instructions.d.ts.map +1 -1
- package/dist/lib/sql.d.ts +0 -3
- package/dist/lib/sql.d.ts.map +1 -1
- package/dist/lib/synthesis/index.js +428 -621
- package/dist/lib/synthesis/index.js.map +4 -4
- package/dist/lib/synthesis/synthesizers/index.d.ts +1 -2
- package/dist/lib/synthesis/synthesizers/index.d.ts.map +1 -1
- package/package.json +7 -6
- package/dist/lib/agents/developer.agent.d.ts +0 -41
- package/dist/lib/agents/developer.agent.d.ts.map +0 -1
- package/dist/lib/agents/teachables.agent.d.ts +0 -10
- package/dist/lib/agents/teachables.agent.d.ts.map +0 -1
- package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts +0 -20
- package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts.map +0 -1
package/dist/index.js
CHANGED
|
@@ -8,7 +8,8 @@ function dialectInfo(input) {
|
|
|
8
8
|
data: {
|
|
9
9
|
dialect: input.dialect,
|
|
10
10
|
...input.version && { version: input.version },
|
|
11
|
-
...input.database && { database: input.database }
|
|
11
|
+
...input.database && { database: input.database },
|
|
12
|
+
...input.details && { details: input.details }
|
|
12
13
|
}
|
|
13
14
|
};
|
|
14
15
|
}
|
|
@@ -121,14 +122,30 @@ var Adapter = class {
|
|
|
121
122
|
}
|
|
122
123
|
return this.#toSchemaFragments(ctx);
|
|
123
124
|
}
|
|
125
|
+
/**
|
|
126
|
+
* Resolve the allowed entity names (tables + views) from grounding config.
|
|
127
|
+
* Runs all configured groundings and returns the resolved set of names.
|
|
128
|
+
* Results are NOT cached — call once and store the result.
|
|
129
|
+
*/
|
|
130
|
+
async resolveAllowedEntities() {
|
|
131
|
+
const ctx = createGroundingContext();
|
|
132
|
+
for (const fn of this.grounding) {
|
|
133
|
+
const grounding = fn(this);
|
|
134
|
+
await grounding.execute(ctx);
|
|
135
|
+
}
|
|
136
|
+
return [
|
|
137
|
+
...ctx.tables.map((t) => t.name),
|
|
138
|
+
...ctx.views.map((v) => v.name)
|
|
139
|
+
];
|
|
140
|
+
}
|
|
124
141
|
/**
|
|
125
142
|
* Convert complete grounding context to schema fragments.
|
|
126
143
|
* Called after all groundings have populated ctx with data.
|
|
127
144
|
*/
|
|
128
145
|
#toSchemaFragments(ctx) {
|
|
129
|
-
const
|
|
146
|
+
const fragments = [];
|
|
130
147
|
if (ctx.info) {
|
|
131
|
-
|
|
148
|
+
fragments.push(
|
|
132
149
|
dialectInfo({
|
|
133
150
|
dialect: ctx.info.dialect,
|
|
134
151
|
version: ctx.info.version,
|
|
@@ -137,23 +154,23 @@ var Adapter = class {
|
|
|
137
154
|
);
|
|
138
155
|
}
|
|
139
156
|
for (const t of ctx.tables) {
|
|
140
|
-
|
|
157
|
+
fragments.push(this.#tableToFragment(t));
|
|
141
158
|
}
|
|
142
159
|
for (const v of ctx.views) {
|
|
143
|
-
|
|
160
|
+
fragments.push(this.#viewToFragment(v));
|
|
144
161
|
}
|
|
145
162
|
const tableMap = new Map(ctx.tables.map((t) => [t.name, t]));
|
|
146
163
|
for (const rel of ctx.relationships) {
|
|
147
164
|
const sourceTable = tableMap.get(rel.table);
|
|
148
165
|
const targetTable = tableMap.get(rel.referenced_table);
|
|
149
|
-
|
|
166
|
+
fragments.push(
|
|
150
167
|
this.#relationshipToFragment(rel, sourceTable, targetTable)
|
|
151
168
|
);
|
|
152
169
|
}
|
|
153
170
|
if (ctx.report) {
|
|
154
|
-
|
|
171
|
+
fragments.push({ name: "businessContext", data: ctx.report });
|
|
155
172
|
}
|
|
156
|
-
return
|
|
173
|
+
return fragments;
|
|
157
174
|
}
|
|
158
175
|
/**
|
|
159
176
|
* Convert a Table to a table fragment with nested column, index, and constraint fragments.
|
|
@@ -396,125 +413,10 @@ function getTablesWithRelated(allTables, relationships, filter) {
|
|
|
396
413
|
return Array.from(result);
|
|
397
414
|
}
|
|
398
415
|
|
|
399
|
-
// packages/text2sql/src/lib/agents/developer.agent.ts
|
|
400
|
-
import { tool } from "ai";
|
|
401
|
-
import dedent from "dedent";
|
|
402
|
-
import z2 from "zod";
|
|
403
|
-
import { toState } from "@deepagents/agent";
|
|
404
|
-
import { hint, persona as persona2 } from "@deepagents/context";
|
|
405
|
-
|
|
406
|
-
// packages/text2sql/src/lib/agents/explainer.agent.ts
|
|
407
|
-
import { groq } from "@ai-sdk/groq";
|
|
408
|
-
import z from "zod";
|
|
409
|
-
import {
|
|
410
|
-
ContextEngine,
|
|
411
|
-
InMemoryContextStore,
|
|
412
|
-
fragment,
|
|
413
|
-
persona,
|
|
414
|
-
structuredOutput,
|
|
415
|
-
user
|
|
416
|
-
} from "@deepagents/context";
|
|
417
|
-
var outputSchema = z.object({
|
|
418
|
-
explanation: z.string().describe("The explanation of the SQL query.")
|
|
419
|
-
});
|
|
420
|
-
async function explainSql(sql) {
|
|
421
|
-
const context = new ContextEngine({
|
|
422
|
-
store: new InMemoryContextStore(),
|
|
423
|
-
chatId: `explainer-${crypto.randomUUID()}`,
|
|
424
|
-
userId: "system"
|
|
425
|
-
});
|
|
426
|
-
context.set(
|
|
427
|
-
persona({
|
|
428
|
-
name: "explainer",
|
|
429
|
-
role: "You are an expert SQL tutor.",
|
|
430
|
-
objective: "Explain SQL queries in plain English that non-technical users understand"
|
|
431
|
-
}),
|
|
432
|
-
fragment("sql", sql),
|
|
433
|
-
fragment("task", "Focus on the intent and logic, not the syntax."),
|
|
434
|
-
user("Explain this SQL query in plain English to a non-technical user.")
|
|
435
|
-
);
|
|
436
|
-
const explainerOutput = structuredOutput({
|
|
437
|
-
model: groq("openai/gpt-oss-20b"),
|
|
438
|
-
context,
|
|
439
|
-
schema: outputSchema
|
|
440
|
-
});
|
|
441
|
-
return explainerOutput.generate();
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
// packages/text2sql/src/lib/agents/developer.agent.ts
|
|
445
|
-
var tools = {
|
|
446
|
-
/**
|
|
447
|
-
* Validate SQL query syntax before execution.
|
|
448
|
-
*/
|
|
449
|
-
validate_query: tool({
|
|
450
|
-
description: `Validate SQL query syntax before execution. Use this to check if your SQL is valid before running db_query. This helps catch errors early and allows you to correct the query if needed.`,
|
|
451
|
-
inputSchema: z2.object({
|
|
452
|
-
sql: z2.string().describe("The SQL query to validate.")
|
|
453
|
-
}),
|
|
454
|
-
execute: async ({ sql }, options) => {
|
|
455
|
-
const state = toState(options);
|
|
456
|
-
const result = await state.adapter.validate(sql);
|
|
457
|
-
if (typeof result === "string") {
|
|
458
|
-
return `Validation Error: ${result}`;
|
|
459
|
-
}
|
|
460
|
-
return "Query is valid.";
|
|
461
|
-
}
|
|
462
|
-
}),
|
|
463
|
-
/**
|
|
464
|
-
* Execute SQL query against the database.
|
|
465
|
-
*/
|
|
466
|
-
db_query: tool({
|
|
467
|
-
description: `Internal tool to fetch data from the store's database. Write a SQL query to retrieve the information needed to answer the user's question. The results will be returned as data that you can then present to the user in natural language.`,
|
|
468
|
-
inputSchema: z2.object({
|
|
469
|
-
reasoning: z2.string().describe(
|
|
470
|
-
"Your reasoning for why this SQL query is relevant to the user request."
|
|
471
|
-
),
|
|
472
|
-
sql: z2.string().min(1, { message: "SQL query cannot be empty." }).refine(
|
|
473
|
-
(sql) => sql.trim().toUpperCase().startsWith("SELECT") || sql.trim().toUpperCase().startsWith("WITH"),
|
|
474
|
-
{
|
|
475
|
-
message: "Only read-only SELECT or WITH queries are allowed."
|
|
476
|
-
}
|
|
477
|
-
).describe("The SQL query to execute against the database.")
|
|
478
|
-
}),
|
|
479
|
-
execute: ({ sql }, options) => {
|
|
480
|
-
const state = toState(options);
|
|
481
|
-
return state.adapter.execute(sql);
|
|
482
|
-
}
|
|
483
|
-
}),
|
|
484
|
-
/**
|
|
485
|
-
* Get plain-English explanation of a SQL query.
|
|
486
|
-
*/
|
|
487
|
-
explain_sql: tool({
|
|
488
|
-
description: dedent`
|
|
489
|
-
Get a plain-English explanation of a SQL query.
|
|
490
|
-
Use this to help the user understand what a query does.
|
|
491
|
-
|
|
492
|
-
The explanation focuses on intent and logic, not syntax.
|
|
493
|
-
`,
|
|
494
|
-
inputSchema: z2.object({
|
|
495
|
-
sql: z2.string().min(1).describe("The SQL query to explain")
|
|
496
|
-
}),
|
|
497
|
-
execute: async ({ sql }) => {
|
|
498
|
-
return explainSql(sql);
|
|
499
|
-
}
|
|
500
|
-
})
|
|
501
|
-
};
|
|
502
|
-
var fragments = [
|
|
503
|
-
persona2({
|
|
504
|
-
name: "developer_assistant",
|
|
505
|
-
role: "You are an expert SQL developer assistant helping power users build and refine queries.",
|
|
506
|
-
objective: "Help power users build and refine SQL queries with precision and clarity"
|
|
507
|
-
}),
|
|
508
|
-
hint("Be transparent: show the SQL you generate before explaining it"),
|
|
509
|
-
hint("Be precise: provide exact column names and table references"),
|
|
510
|
-
hint("Suggest refinements and alternatives when appropriate"),
|
|
511
|
-
hint("Support both natural language questions AND raw SQL input"),
|
|
512
|
-
hint("When validating user SQL, explain any errors clearly")
|
|
513
|
-
];
|
|
514
|
-
|
|
515
416
|
// packages/text2sql/src/lib/agents/exceptions.ts
|
|
516
417
|
var sqlValidationMarker = Symbol("SQLValidationError");
|
|
517
418
|
var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
|
|
419
|
+
var sqlScopeMarker = Symbol("SQLScopeError");
|
|
518
420
|
var SQLValidationError = class _SQLValidationError extends Error {
|
|
519
421
|
[sqlValidationMarker];
|
|
520
422
|
constructor(message) {
|
|
@@ -537,9 +439,24 @@ var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
|
537
439
|
return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
|
|
538
440
|
}
|
|
539
441
|
};
|
|
442
|
+
var SQLScopeError = class _SQLScopeError extends Error {
|
|
443
|
+
[sqlScopeMarker];
|
|
444
|
+
payload;
|
|
445
|
+
errorType;
|
|
446
|
+
constructor(payload) {
|
|
447
|
+
super(JSON.stringify(payload));
|
|
448
|
+
this.name = "SQLScopeError";
|
|
449
|
+
this.payload = payload;
|
|
450
|
+
this.errorType = payload.error_type;
|
|
451
|
+
this[sqlScopeMarker] = true;
|
|
452
|
+
}
|
|
453
|
+
static isInstance(error) {
|
|
454
|
+
return error instanceof _SQLScopeError && error[sqlScopeMarker] === true;
|
|
455
|
+
}
|
|
456
|
+
};
|
|
540
457
|
|
|
541
458
|
// packages/text2sql/src/lib/agents/result-tools.ts
|
|
542
|
-
import { tool
|
|
459
|
+
import { tool } from "ai";
|
|
543
460
|
import { createBashTool } from "bash-tool";
|
|
544
461
|
import chalk from "chalk";
|
|
545
462
|
import {
|
|
@@ -552,7 +469,7 @@ import {
|
|
|
552
469
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
553
470
|
import * as path from "node:path";
|
|
554
471
|
import { v7 } from "uuid";
|
|
555
|
-
import
|
|
472
|
+
import z from "zod";
|
|
556
473
|
function createCommand(name, subcommands) {
|
|
557
474
|
const usageLines = Object.entries(subcommands).map(([, def]) => ` ${name} ${def.usage.padEnd(30)} ${def.description}`).join("\n");
|
|
558
475
|
return defineCommand(name, async (args, ctx) => {
|
|
@@ -578,12 +495,17 @@ function validateReadOnly(query) {
|
|
|
578
495
|
}
|
|
579
496
|
return { valid: true };
|
|
580
497
|
}
|
|
498
|
+
var SQL_VALIDATE_REMINDER = "Always run `sql validate` before `sql run` to catch syntax errors early.";
|
|
581
499
|
function createSqlCommand(adapter, metaStore) {
|
|
582
500
|
return createCommand("sql", {
|
|
583
501
|
run: {
|
|
584
502
|
usage: 'run "SELECT ..."',
|
|
585
503
|
description: "Execute query and store results",
|
|
586
504
|
handler: async (args, ctx) => {
|
|
505
|
+
const store = metaStore.getStore();
|
|
506
|
+
if (store) {
|
|
507
|
+
store.value = { ...store.value, reminder: SQL_VALIDATE_REMINDER };
|
|
508
|
+
}
|
|
587
509
|
const rawQuery = args.join(" ").trim().replace(/\\n/g, "\n").replace(/\\t/g, " ");
|
|
588
510
|
if (!rawQuery) {
|
|
589
511
|
return {
|
|
@@ -601,8 +523,9 @@ function createSqlCommand(adapter, metaStore) {
|
|
|
601
523
|
};
|
|
602
524
|
}
|
|
603
525
|
const query = adapter.format(rawQuery);
|
|
604
|
-
|
|
605
|
-
|
|
526
|
+
if (store) {
|
|
527
|
+
store.value = { ...store.value, formattedSql: query };
|
|
528
|
+
}
|
|
606
529
|
const syntaxError = await adapter.validate(query);
|
|
607
530
|
if (syntaxError) {
|
|
608
531
|
return {
|
|
@@ -660,7 +583,7 @@ function createSqlCommand(adapter, metaStore) {
|
|
|
660
583
|
}
|
|
661
584
|
const query = adapter.format(rawQuery);
|
|
662
585
|
const store = metaStore.getStore();
|
|
663
|
-
if (store) store.value = { formattedSql: query };
|
|
586
|
+
if (store) store.value = { ...store.value, formattedSql: query };
|
|
664
587
|
const syntaxError = await adapter.validate(query);
|
|
665
588
|
if (syntaxError) {
|
|
666
589
|
return {
|
|
@@ -1358,7 +1281,7 @@ async function createResultTools(options) {
|
|
|
1358
1281
|
customCommands: [sqlCommand],
|
|
1359
1282
|
fs: filesystem
|
|
1360
1283
|
});
|
|
1361
|
-
const { sandbox, tools
|
|
1284
|
+
const { sandbox, tools } = await createBashTool({
|
|
1362
1285
|
sandbox: bashInstance,
|
|
1363
1286
|
destination: "/",
|
|
1364
1287
|
extraInstructions: 'Every bash tool call must include a brief non-empty "reasoning" input explaining why the command is needed.',
|
|
@@ -1383,14 +1306,14 @@ async function createResultTools(options) {
|
|
|
1383
1306
|
return sandbox.executeCommand(command);
|
|
1384
1307
|
}
|
|
1385
1308
|
};
|
|
1386
|
-
const bash =
|
|
1387
|
-
...
|
|
1388
|
-
inputSchema:
|
|
1389
|
-
command:
|
|
1390
|
-
reasoning:
|
|
1309
|
+
const bash = tool({
|
|
1310
|
+
...tools.bash,
|
|
1311
|
+
inputSchema: z.object({
|
|
1312
|
+
command: z.string().describe("The bash command to execute"),
|
|
1313
|
+
reasoning: z.string().trim().describe("Brief reason for executing this command")
|
|
1391
1314
|
}),
|
|
1392
1315
|
execute: async ({ command }, execOptions) => {
|
|
1393
|
-
const execute =
|
|
1316
|
+
const execute = tools.bash.execute;
|
|
1394
1317
|
if (!execute) {
|
|
1395
1318
|
throw new Error("bash tool execution is not available");
|
|
1396
1319
|
}
|
|
@@ -1400,8 +1323,10 @@ async function createResultTools(options) {
|
|
|
1400
1323
|
}
|
|
1401
1324
|
return metaStore.run({}, async () => {
|
|
1402
1325
|
const result = await execute({ command }, execOptions);
|
|
1403
|
-
const
|
|
1404
|
-
|
|
1326
|
+
const storeValue = metaStore.getStore()?.value;
|
|
1327
|
+
if (!storeValue) return result;
|
|
1328
|
+
const { reminder, ...meta } = storeValue;
|
|
1329
|
+
return { ...result, meta, reminder };
|
|
1405
1330
|
});
|
|
1406
1331
|
},
|
|
1407
1332
|
toModelOutput: ({ output }) => {
|
|
@@ -1415,14 +1340,14 @@ async function createResultTools(options) {
|
|
|
1415
1340
|
return {
|
|
1416
1341
|
sandbox: guardedSandbox,
|
|
1417
1342
|
tools: {
|
|
1418
|
-
...
|
|
1343
|
+
...tools,
|
|
1419
1344
|
bash
|
|
1420
1345
|
}
|
|
1421
1346
|
};
|
|
1422
1347
|
}
|
|
1423
1348
|
|
|
1424
1349
|
// packages/text2sql/src/lib/agents/sql.agent.ts
|
|
1425
|
-
import { groq
|
|
1350
|
+
import { groq } from "@ai-sdk/groq";
|
|
1426
1351
|
import {
|
|
1427
1352
|
APICallError,
|
|
1428
1353
|
JSONParseError,
|
|
@@ -1433,35 +1358,35 @@ import {
|
|
|
1433
1358
|
defaultSettingsMiddleware,
|
|
1434
1359
|
wrapLanguageModel
|
|
1435
1360
|
} from "ai";
|
|
1436
|
-
import
|
|
1361
|
+
import dedent from "dedent";
|
|
1437
1362
|
import pRetry from "p-retry";
|
|
1438
|
-
import
|
|
1363
|
+
import z2 from "zod";
|
|
1439
1364
|
import "@deepagents/agent";
|
|
1440
1365
|
import {
|
|
1441
|
-
ContextEngine
|
|
1442
|
-
InMemoryContextStore
|
|
1366
|
+
ContextEngine,
|
|
1367
|
+
InMemoryContextStore,
|
|
1443
1368
|
example,
|
|
1444
|
-
fragment
|
|
1369
|
+
fragment,
|
|
1445
1370
|
guardrail,
|
|
1446
|
-
hint
|
|
1447
|
-
persona
|
|
1371
|
+
hint,
|
|
1372
|
+
persona,
|
|
1448
1373
|
policy,
|
|
1449
|
-
structuredOutput
|
|
1450
|
-
user
|
|
1374
|
+
structuredOutput,
|
|
1375
|
+
user,
|
|
1451
1376
|
workflow
|
|
1452
1377
|
} from "@deepagents/context";
|
|
1453
1378
|
var RETRY_TEMPERATURES = [0, 0.2, 0.3];
|
|
1454
1379
|
var SQL_AGENT_ROLE = "Expert SQL query generator.";
|
|
1455
1380
|
var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
|
|
1456
1381
|
var SQL_AGENT_POLICIES = [
|
|
1457
|
-
|
|
1382
|
+
fragment(
|
|
1458
1383
|
"schema_mapping",
|
|
1459
1384
|
policy({
|
|
1460
1385
|
rule: "Translate natural language into precise SQL grounded in available schema entities."
|
|
1461
1386
|
}),
|
|
1462
|
-
|
|
1387
|
+
hint("Preserve schema spelling exactly, including typos in column names.")
|
|
1463
1388
|
),
|
|
1464
|
-
|
|
1389
|
+
fragment(
|
|
1465
1390
|
"projection_minimality",
|
|
1466
1391
|
policy({
|
|
1467
1392
|
rule: "Return only columns requested by the question; do not add helper columns unless explicitly requested."
|
|
@@ -1472,17 +1397,17 @@ var SQL_AGENT_POLICIES = [
|
|
|
1472
1397
|
policy({
|
|
1473
1398
|
rule: "Prefer selecting schema columns directly without derived expressions when direct selection answers the request."
|
|
1474
1399
|
}),
|
|
1475
|
-
|
|
1400
|
+
hint(
|
|
1476
1401
|
"Do not include ORDER BY, GROUP BY, or JOIN helper columns in SELECT output unless the question explicitly asks for them."
|
|
1477
1402
|
),
|
|
1478
1403
|
policy({
|
|
1479
1404
|
rule: "Use DISTINCT only when uniqueness is explicitly requested (for example distinct/unique/different/no duplicates)."
|
|
1480
1405
|
}),
|
|
1481
|
-
|
|
1406
|
+
hint(
|
|
1482
1407
|
'Do not infer DISTINCT from generic wording such as "some", plural nouns, or entity-set phrasing; for transactional/attendance-style tables, default to raw rows unless uniqueness is explicitly requested.'
|
|
1483
1408
|
)
|
|
1484
1409
|
),
|
|
1485
|
-
|
|
1410
|
+
fragment(
|
|
1486
1411
|
"date_transform_safety",
|
|
1487
1412
|
policy({
|
|
1488
1413
|
rule: "Do not assume VARCHAR/TEXT values are parseable dates. Avoid date extraction functions on text columns by default."
|
|
@@ -1490,14 +1415,14 @@ var SQL_AGENT_POLICIES = [
|
|
|
1490
1415
|
policy({
|
|
1491
1416
|
rule: "Use date-part extraction only when both conditions hold: the question explicitly asks for transformation and schema values require transformation to produce that unit."
|
|
1492
1417
|
}),
|
|
1493
|
-
|
|
1418
|
+
hint(
|
|
1494
1419
|
"Do not apply SUBSTR, STRFTIME, DATE_PART, YEAR, or similar extraction functions unless the question explicitly asks for transformation and schema values require it."
|
|
1495
1420
|
),
|
|
1496
|
-
|
|
1421
|
+
hint(
|
|
1497
1422
|
"If a column already represents the requested concept (for example a stored year-like value), use the column as-is."
|
|
1498
1423
|
)
|
|
1499
1424
|
),
|
|
1500
|
-
|
|
1425
|
+
fragment(
|
|
1501
1426
|
"sql_minimality",
|
|
1502
1427
|
guardrail({
|
|
1503
1428
|
rule: "Never hallucinate tables or columns.",
|
|
@@ -1510,7 +1435,7 @@ var SQL_AGENT_POLICIES = [
|
|
|
1510
1435
|
action: "Do not add date parsing, substring extraction, or derived columns unless explicitly required by the question or schema."
|
|
1511
1436
|
})
|
|
1512
1437
|
),
|
|
1513
|
-
|
|
1438
|
+
fragment(
|
|
1514
1439
|
"preflight_checklist",
|
|
1515
1440
|
workflow({
|
|
1516
1441
|
task: "Final SQL preflight before returning output",
|
|
@@ -1523,7 +1448,7 @@ var SQL_AGENT_POLICIES = [
|
|
|
1523
1448
|
]
|
|
1524
1449
|
})
|
|
1525
1450
|
),
|
|
1526
|
-
|
|
1451
|
+
fragment(
|
|
1527
1452
|
"set_semantics",
|
|
1528
1453
|
policy({
|
|
1529
1454
|
rule: "For questions asking where both condition A and condition B hold over an attribute, compute the intersection of qualifying sets for that attribute."
|
|
@@ -1531,28 +1456,28 @@ var SQL_AGENT_POLICIES = [
|
|
|
1531
1456
|
policy({
|
|
1532
1457
|
rule: "Do not force the same entity instance to satisfy both conditions unless the question explicitly requests the same person/row/entity."
|
|
1533
1458
|
}),
|
|
1534
|
-
|
|
1459
|
+
hint(
|
|
1535
1460
|
"Prefer INTERSECT (or logically equivalent set-based shape) over requiring the same physical row/entity to satisfy both conditions unless explicitly requested."
|
|
1536
1461
|
),
|
|
1537
|
-
|
|
1462
|
+
hint(
|
|
1538
1463
|
"When two conditions describe different row groups whose shared attribute is requested, build each group separately and intersect the attribute values."
|
|
1539
1464
|
),
|
|
1540
|
-
|
|
1465
|
+
hint(
|
|
1541
1466
|
"Do not collapse cross-group conditions into a single-row AND predicate when the intent is shared values across groups."
|
|
1542
1467
|
),
|
|
1543
1468
|
policy({
|
|
1544
1469
|
rule: "If two predicates on the same field cannot both be true for one row, do not combine them with AND; use set operations across separate filtered subsets when shared values are requested."
|
|
1545
1470
|
})
|
|
1546
1471
|
),
|
|
1547
|
-
|
|
1472
|
+
fragment(
|
|
1548
1473
|
"predicate_column_alignment",
|
|
1549
1474
|
policy({
|
|
1550
1475
|
rule: "Match literal values to semantically compatible columns. Do not compare descriptive names to identifier columns."
|
|
1551
1476
|
}),
|
|
1552
|
-
|
|
1477
|
+
hint(
|
|
1553
1478
|
"When a filter value is a descriptive label (for example a department name), join through the lookup table and filter on its name/title column, not on *_id columns."
|
|
1554
1479
|
),
|
|
1555
|
-
|
|
1480
|
+
hint(
|
|
1556
1481
|
"When relation roles are explicit in wording (for example host/home/source/destination), prefer foreign keys with matching role qualifiers over generic similarly named columns."
|
|
1557
1482
|
),
|
|
1558
1483
|
policy({
|
|
@@ -1561,7 +1486,7 @@ var SQL_AGENT_POLICIES = [
|
|
|
1561
1486
|
policy({
|
|
1562
1487
|
rule: "For hosting/held semantics, prefer host_* relationship columns when available over generic *_id alternatives."
|
|
1563
1488
|
}),
|
|
1564
|
-
|
|
1489
|
+
hint(
|
|
1565
1490
|
'Interpret wording like "held/hosted a competition or event" as a hosting relationship and map to host_* foreign keys when present.'
|
|
1566
1491
|
),
|
|
1567
1492
|
policy({
|
|
@@ -1574,7 +1499,7 @@ var SQL_AGENT_POLICIES = [
|
|
|
1574
1499
|
rule: "When filtering by a descriptive label value and a related table exposes a corresponding *_name or title column, join to that table and filter on the descriptive column."
|
|
1575
1500
|
})
|
|
1576
1501
|
),
|
|
1577
|
-
|
|
1502
|
+
fragment(
|
|
1578
1503
|
"ordering_semantics",
|
|
1579
1504
|
policy({
|
|
1580
1505
|
rule: "Respect explicit sort direction terms. If direction is not specified, use ascending order unless a superlative intent (most/least/highest/lowest) implies direction."
|
|
@@ -1588,23 +1513,23 @@ var SQL_AGENT_POLICIES = [
|
|
|
1588
1513
|
policy({
|
|
1589
1514
|
rule: 'For "most common/frequent <attribute>" requests, return the attribute value(s) only; use counts only for ordering/filtering unless the question explicitly asks to return counts.'
|
|
1590
1515
|
}),
|
|
1591
|
-
|
|
1516
|
+
hint(
|
|
1592
1517
|
'Use DESC with LIMIT 1 for "most/highest/largest"; use ASC with LIMIT 1 for "least/lowest/smallest".'
|
|
1593
1518
|
)
|
|
1594
1519
|
),
|
|
1595
|
-
|
|
1520
|
+
fragment(
|
|
1596
1521
|
"negative_membership_queries",
|
|
1597
1522
|
policy({
|
|
1598
1523
|
rule: "For requests asking entities that did not participate/host/appear in related records, prefer NOT IN or NOT EXISTS against the related foreign-key set."
|
|
1599
1524
|
}),
|
|
1600
|
-
|
|
1525
|
+
hint(
|
|
1601
1526
|
"Map role-bearing relationship columns carefully (for example host_* foreign keys for hosting relationships) instead of generic IDs when role wording is explicit."
|
|
1602
1527
|
),
|
|
1603
|
-
|
|
1528
|
+
hint(
|
|
1604
1529
|
'For "never had/never exceeded" conditions over history tables, exclude entities via NOT IN/NOT EXISTS against the disqualifying entity-id set (often built with GROUP BY/HAVING MAX(...)).'
|
|
1605
1530
|
)
|
|
1606
1531
|
),
|
|
1607
|
-
|
|
1532
|
+
fragment(
|
|
1608
1533
|
"join_completeness",
|
|
1609
1534
|
policy({
|
|
1610
1535
|
rule: "Preserve entity-restricting joins implied by the question. Do not widen results by querying only a broader attribute table when a subset entity table is available."
|
|
@@ -1612,17 +1537,17 @@ var SQL_AGENT_POLICIES = [
|
|
|
1612
1537
|
policy({
|
|
1613
1538
|
rule: "If an entity term in the question maps to a table, keep that table in query scope and join to attribute tables rather than dropping the entity table."
|
|
1614
1539
|
}),
|
|
1615
|
-
|
|
1540
|
+
hint(
|
|
1616
1541
|
"If the question targets a specific entity group, include that entity table and its join conditions even when selected columns come from a related table."
|
|
1617
1542
|
),
|
|
1618
|
-
|
|
1543
|
+
hint(
|
|
1619
1544
|
"When the question names an entity type and a relation table links to that entity via *_id, include the entity table in scope instead of counting only relation rows."
|
|
1620
1545
|
),
|
|
1621
|
-
|
|
1546
|
+
hint(
|
|
1622
1547
|
"Prefer INNER JOIN by default; use LEFT JOIN only when the question explicitly requests including unmatched rows or zero-related entities."
|
|
1623
1548
|
)
|
|
1624
1549
|
),
|
|
1625
|
-
|
|
1550
|
+
fragment(
|
|
1626
1551
|
"aggregation_exactness",
|
|
1627
1552
|
policy({
|
|
1628
1553
|
rule: "Preserve requested aggregation semantics exactly: use COUNT(*) by default for total rows, use COUNT(DISTINCT ...) only when uniqueness is explicitly requested, and group by stable entity keys when computing per-entity aggregates."
|
|
@@ -1630,11 +1555,11 @@ var SQL_AGENT_POLICIES = [
|
|
|
1630
1555
|
policy({
|
|
1631
1556
|
rule: "For questions asking which entity has lowest/highest average of a metric, compute AVG(metric) per entity (GROUP BY entity) and rank those aggregates."
|
|
1632
1557
|
}),
|
|
1633
|
-
|
|
1558
|
+
hint(
|
|
1634
1559
|
'For "how many <entities>" questions over relation records, default to COUNT(*) on qualifying rows unless explicit uniqueness language is present.'
|
|
1635
1560
|
)
|
|
1636
1561
|
),
|
|
1637
|
-
|
|
1562
|
+
fragment(
|
|
1638
1563
|
"query_shape_examples",
|
|
1639
1564
|
example({
|
|
1640
1565
|
question: "List categories ordered by how many records belong to each category.",
|
|
@@ -1662,13 +1587,13 @@ async function toSql(options) {
|
|
|
1662
1587
|
const { maxRetries = 3 } = options;
|
|
1663
1588
|
return withRetry(
|
|
1664
1589
|
async (attemptNumber, errors, attempts) => {
|
|
1665
|
-
const context = new
|
|
1666
|
-
store: new
|
|
1590
|
+
const context = new ContextEngine({
|
|
1591
|
+
store: new InMemoryContextStore(),
|
|
1667
1592
|
chatId: `sql-gen-${crypto.randomUUID()}`,
|
|
1668
1593
|
userId: "system"
|
|
1669
1594
|
});
|
|
1670
1595
|
context.set(
|
|
1671
|
-
|
|
1596
|
+
persona({
|
|
1672
1597
|
name: "Freya",
|
|
1673
1598
|
role: SQL_AGENT_ROLE,
|
|
1674
1599
|
objective: SQL_AGENT_OBJECTIVE
|
|
@@ -1681,21 +1606,21 @@ async function toSql(options) {
|
|
|
1681
1606
|
if (errors.length) {
|
|
1682
1607
|
const lastError = errors.at(-1);
|
|
1683
1608
|
context.set(
|
|
1684
|
-
|
|
1609
|
+
user(dedent`
|
|
1685
1610
|
Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
|
|
1686
1611
|
Given the question, the evidence and the database schema, return the SQL script that addresses the question.
|
|
1687
1612
|
|
|
1688
1613
|
Question: ${options.input}
|
|
1689
1614
|
`),
|
|
1690
|
-
UnanswerableSQLError.isInstance(lastError) ?
|
|
1615
|
+
UnanswerableSQLError.isInstance(lastError) ? user(
|
|
1691
1616
|
`<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
|
|
1692
|
-
) :
|
|
1617
|
+
) : user(
|
|
1693
1618
|
`<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
|
|
1694
1619
|
)
|
|
1695
1620
|
);
|
|
1696
1621
|
} else {
|
|
1697
1622
|
context.set(
|
|
1698
|
-
|
|
1623
|
+
user(dedent`
|
|
1699
1624
|
Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
|
|
1700
1625
|
Given the question, the evidence and the database schema, return the SQL script that addresses the question.
|
|
1701
1626
|
|
|
@@ -1704,22 +1629,22 @@ Question: ${options.input}
|
|
|
1704
1629
|
);
|
|
1705
1630
|
}
|
|
1706
1631
|
const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
|
|
1707
|
-
const baseModel = options.model ??
|
|
1632
|
+
const baseModel = options.model ?? groq("openai/gpt-oss-20b");
|
|
1708
1633
|
const model = wrapLanguageModel({
|
|
1709
1634
|
model: baseModel,
|
|
1710
1635
|
middleware: defaultSettingsMiddleware({ settings: { temperature } })
|
|
1711
1636
|
});
|
|
1712
|
-
const sqlOutput =
|
|
1637
|
+
const sqlOutput = structuredOutput({
|
|
1713
1638
|
model,
|
|
1714
1639
|
context,
|
|
1715
|
-
schema:
|
|
1716
|
-
result:
|
|
1717
|
-
|
|
1718
|
-
sql:
|
|
1719
|
-
reasoning:
|
|
1640
|
+
schema: z2.object({
|
|
1641
|
+
result: z2.union([
|
|
1642
|
+
z2.object({
|
|
1643
|
+
sql: z2.string().describe("The SQL query that answers the question"),
|
|
1644
|
+
reasoning: z2.string().describe("The reasoning steps taken to generate the SQL")
|
|
1720
1645
|
}),
|
|
1721
|
-
|
|
1722
|
-
error:
|
|
1646
|
+
z2.object({
|
|
1647
|
+
error: z2.string().describe(
|
|
1723
1648
|
"Error message explaining why the question cannot be answered with the given schema"
|
|
1724
1649
|
)
|
|
1725
1650
|
})
|
|
@@ -1741,18 +1666,18 @@ Question: ${options.input}
|
|
|
1741
1666
|
};
|
|
1742
1667
|
if ("error" in output) {
|
|
1743
1668
|
context.set(
|
|
1744
|
-
|
|
1669
|
+
user(
|
|
1745
1670
|
"<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
|
|
1746
1671
|
)
|
|
1747
1672
|
);
|
|
1748
|
-
const forcedSqlOutput =
|
|
1673
|
+
const forcedSqlOutput = structuredOutput({
|
|
1749
1674
|
model,
|
|
1750
1675
|
context,
|
|
1751
|
-
schema:
|
|
1752
|
-
sql:
|
|
1676
|
+
schema: z2.object({
|
|
1677
|
+
sql: z2.string().describe(
|
|
1753
1678
|
"Best-effort SQL query that answers the core intent using only available schema entities."
|
|
1754
1679
|
),
|
|
1755
|
-
reasoning:
|
|
1680
|
+
reasoning: z2.string().describe("Reasoning steps for best-effort schema mapping.")
|
|
1756
1681
|
})
|
|
1757
1682
|
});
|
|
1758
1683
|
try {
|
|
@@ -1835,24 +1760,24 @@ async function withRetry(computation, options = { retries: 3 }) {
|
|
|
1835
1760
|
}
|
|
1836
1761
|
|
|
1837
1762
|
// packages/text2sql/src/lib/agents/suggestions.agents.ts
|
|
1838
|
-
import { groq as
|
|
1839
|
-
import
|
|
1840
|
-
import
|
|
1763
|
+
import { groq as groq2 } from "@ai-sdk/groq";
|
|
1764
|
+
import dedent2 from "dedent";
|
|
1765
|
+
import z3 from "zod";
|
|
1841
1766
|
import { agent, thirdPersonPrompt } from "@deepagents/agent";
|
|
1842
1767
|
var suggestionsAgent = agent({
|
|
1843
1768
|
name: "text2sql-suggestions",
|
|
1844
|
-
model:
|
|
1845
|
-
output:
|
|
1846
|
-
suggestions:
|
|
1847
|
-
|
|
1848
|
-
question:
|
|
1849
|
-
sql:
|
|
1850
|
-
businessValue:
|
|
1769
|
+
model: groq2("openai/gpt-oss-20b"),
|
|
1770
|
+
output: z3.object({
|
|
1771
|
+
suggestions: z3.array(
|
|
1772
|
+
z3.object({
|
|
1773
|
+
question: z3.string().describe("A complex, high-impact business question."),
|
|
1774
|
+
sql: z3.string().describe("The SQL statement needed to answer the question."),
|
|
1775
|
+
businessValue: z3.string().describe("Why the question matters to stakeholders.")
|
|
1851
1776
|
})
|
|
1852
1777
|
).min(1).max(5).describe("A set of up to two advanced question + SQL pairs.")
|
|
1853
1778
|
}),
|
|
1854
1779
|
prompt: (state) => {
|
|
1855
|
-
return
|
|
1780
|
+
return dedent2`
|
|
1856
1781
|
${thirdPersonPrompt()}
|
|
1857
1782
|
|
|
1858
1783
|
<identity>
|
|
@@ -4818,9 +4743,9 @@ import {
|
|
|
4818
4743
|
clarification,
|
|
4819
4744
|
example as example2,
|
|
4820
4745
|
explain,
|
|
4821
|
-
fragment as
|
|
4746
|
+
fragment as fragment2,
|
|
4822
4747
|
guardrail as guardrail2,
|
|
4823
|
-
hint as
|
|
4748
|
+
hint as hint2,
|
|
4824
4749
|
policy as policy2,
|
|
4825
4750
|
principle,
|
|
4826
4751
|
quirk,
|
|
@@ -4833,9 +4758,9 @@ function reasoningFramework() {
|
|
|
4833
4758
|
role(
|
|
4834
4759
|
"You are a very strong reasoner and planner. Use these critical instructions to structure your plans, thoughts, and responses."
|
|
4835
4760
|
),
|
|
4836
|
-
|
|
4837
|
-
"
|
|
4838
|
-
|
|
4761
|
+
fragment2(
|
|
4762
|
+
"meta_cognitive_reasoning_framework",
|
|
4763
|
+
hint2(
|
|
4839
4764
|
"Before taking any action (either tool calls *or* responses to the user), you must proactively, methodically, and independently plan and reason about:"
|
|
4840
4765
|
),
|
|
4841
4766
|
// 1) Logical dependencies and constraints
|
|
@@ -4944,13 +4869,12 @@ function reasoningFramework() {
|
|
|
4944
4869
|
)
|
|
4945
4870
|
];
|
|
4946
4871
|
}
|
|
4947
|
-
function guidelines(
|
|
4948
|
-
const { date = "strict" } = options;
|
|
4872
|
+
function guidelines() {
|
|
4949
4873
|
const baseTeachings = [
|
|
4950
4874
|
// Include the meta-cognitive reasoning framework
|
|
4951
4875
|
...reasoningFramework(),
|
|
4952
4876
|
// Prerequisite policies (must do X before Y)
|
|
4953
|
-
|
|
4877
|
+
fragment2(
|
|
4954
4878
|
"prerequisite_policies",
|
|
4955
4879
|
policy2({
|
|
4956
4880
|
rule: "YOU MUST inspect schema structure and available tables",
|
|
@@ -4960,7 +4884,7 @@ function guidelines(options = {}) {
|
|
|
4960
4884
|
policy2({
|
|
4961
4885
|
rule: "YOU MUST resolve ambiguous business terms with the user",
|
|
4962
4886
|
before: "making ANY assumptions about terminology meaning",
|
|
4963
|
-
reason: "NEVER guess domain-specific language
|
|
4887
|
+
reason: "NEVER guess domain-specific language, instead ask for clarification"
|
|
4964
4888
|
}),
|
|
4965
4889
|
policy2({
|
|
4966
4890
|
rule: "YOU MUST validate SQL syntax",
|
|
@@ -4974,8 +4898,8 @@ function guidelines(options = {}) {
|
|
|
4974
4898
|
})
|
|
4975
4899
|
),
|
|
4976
4900
|
// Few-shot: Applying reasoning principles
|
|
4977
|
-
|
|
4978
|
-
"
|
|
4901
|
+
fragment2(
|
|
4902
|
+
"reasoning_examples",
|
|
4979
4903
|
example2({
|
|
4980
4904
|
question: "Show me sales last month",
|
|
4981
4905
|
answer: `Applying Principle 1 (Logical dependencies):
|
|
@@ -5014,17 +4938,21 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
5014
4938
|
})
|
|
5015
4939
|
),
|
|
5016
4940
|
// Schema adherence - consolidated into clear rules
|
|
5017
|
-
|
|
4941
|
+
fragment2(
|
|
5018
4942
|
"schema_adherence",
|
|
5019
|
-
|
|
5020
|
-
"Use only tables and columns
|
|
5021
|
-
|
|
5022
|
-
|
|
5023
|
-
|
|
5024
|
-
|
|
4943
|
+
guardrail2({
|
|
4944
|
+
rule: "Use only tables and columns that exist in the schema.",
|
|
4945
|
+
reason: "Inventing tables or columns produces invalid SQL and breaks schema grounding.",
|
|
4946
|
+
action: "If the user requests unspecified fields, use SELECT *. When showing related items, include IDs and requested details."
|
|
4947
|
+
}),
|
|
4948
|
+
explain({
|
|
4949
|
+
concept: "query intent words",
|
|
4950
|
+
explanation: '"Show" asks for listing rows, while "count" or "total" asks for aggregation.',
|
|
4951
|
+
therefore: 'Use listing queries for "show" requests, aggregate queries for "count" or "total", and use canonical schema values verbatim in filters.'
|
|
4952
|
+
})
|
|
5025
4953
|
),
|
|
5026
|
-
|
|
5027
|
-
"
|
|
4954
|
+
fragment2(
|
|
4955
|
+
"column_statistics",
|
|
5028
4956
|
explain({
|
|
5029
4957
|
concept: "nDistinct in column stats",
|
|
5030
4958
|
explanation: "Positive values are the estimated count of distinct values. Negative values represent the fraction of unique rows (e.g., -1 means all rows are unique, -0.5 means 50% unique)",
|
|
@@ -5035,18 +4963,18 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
5035
4963
|
explanation: "Measures how closely the physical row order matches the logical sort order of the column. Values near 1 or -1 mean the data is well-ordered; near 0 means scattered",
|
|
5036
4964
|
therefore: "High correlation means range queries (BETWEEN, >, <) on that column benefit from index scans. Low correlation means the index is less effective for ranges"
|
|
5037
4965
|
}),
|
|
5038
|
-
|
|
4966
|
+
hint2(
|
|
5039
4967
|
"When min/max stats are available, use them to validate filter values. If a user asks for values outside the known range, warn them the query may return no results."
|
|
5040
4968
|
)
|
|
5041
4969
|
),
|
|
5042
4970
|
// Joins - use relationship metadata
|
|
5043
|
-
|
|
4971
|
+
hint2(
|
|
5044
4972
|
"Use JOINs based on schema relationships. Favor PK/indexed columns; follow relationship metadata for direction and cardinality."
|
|
5045
4973
|
),
|
|
5046
4974
|
// Aggregations - explain the concepts
|
|
5047
|
-
|
|
5048
|
-
"
|
|
5049
|
-
|
|
4975
|
+
fragment2(
|
|
4976
|
+
"aggregations",
|
|
4977
|
+
hint2(
|
|
5050
4978
|
"Apply COUNT, SUM, AVG when the question implies summarization. Use window functions for ranking, running totals, or row comparisons."
|
|
5051
4979
|
),
|
|
5052
4980
|
explain({
|
|
@@ -5056,8 +4984,8 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
5056
4984
|
})
|
|
5057
4985
|
),
|
|
5058
4986
|
// Query semantics - explain concepts and document quirks
|
|
5059
|
-
|
|
5060
|
-
"
|
|
4987
|
+
fragment2(
|
|
4988
|
+
"query_interpretation",
|
|
5061
4989
|
explain({
|
|
5062
4990
|
concept: "threshold language",
|
|
5063
4991
|
explanation: 'Words like "reach", "hit", "exceed" with a value imply a threshold being met or passed',
|
|
@@ -5071,7 +4999,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
5071
4999
|
issue: "NULL values behave unexpectedly in comparisons and aggregations",
|
|
5072
5000
|
workaround: "Use IS NULL, IS NOT NULL, or COALESCE() to handle NULLs explicitly"
|
|
5073
5001
|
}),
|
|
5074
|
-
|
|
5002
|
+
hint2(
|
|
5075
5003
|
"Always include mentioned filters from joined tables in WHERE conditions."
|
|
5076
5004
|
)
|
|
5077
5005
|
),
|
|
@@ -5084,8 +5012,8 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
5084
5012
|
prefer: "Concise, business-friendly summaries with key comparisons and helpful follow-ups."
|
|
5085
5013
|
}),
|
|
5086
5014
|
// Safety guardrails - consolidated
|
|
5087
|
-
|
|
5088
|
-
"
|
|
5015
|
+
fragment2(
|
|
5016
|
+
"query_safety",
|
|
5089
5017
|
guardrail2({
|
|
5090
5018
|
rule: "Generate only valid, executable SELECT/WITH statements.",
|
|
5091
5019
|
reason: "Read-only access prevents data modification.",
|
|
@@ -5100,19 +5028,9 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
5100
5028
|
rule: "Preserve query semantics.",
|
|
5101
5029
|
reason: "Arbitrary modifications change results.",
|
|
5102
5030
|
action: 'Only add LIMIT for explicit "top N" requests. Add ORDER BY for deterministic results.'
|
|
5103
|
-
}),
|
|
5104
|
-
guardrail2({
|
|
5105
|
-
rule: "Seek clarification for genuine ambiguity.",
|
|
5106
|
-
reason: "Prevents incorrect assumptions.",
|
|
5107
|
-
action: "Ask a focused question before guessing."
|
|
5108
5031
|
})
|
|
5109
5032
|
),
|
|
5110
|
-
|
|
5111
|
-
when: "Ambiguous ranking language (top, best, active) without a metric.",
|
|
5112
|
-
ask: "Clarify the ranking metric or definition.",
|
|
5113
|
-
reason: "Ensures correct aggregation and ordering."
|
|
5114
|
-
}),
|
|
5115
|
-
hint3(
|
|
5033
|
+
hint2(
|
|
5116
5034
|
'Use sample cell values from schema hints to match exact casing and format in WHERE conditions (e.g., "Male" vs "male" vs "M").'
|
|
5117
5035
|
),
|
|
5118
5036
|
workflow2({
|
|
@@ -5170,8 +5088,8 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
5170
5088
|
],
|
|
5171
5089
|
notes: "If reference is ambiguous, ask which previous result or entity the user means."
|
|
5172
5090
|
}),
|
|
5173
|
-
|
|
5174
|
-
"
|
|
5091
|
+
fragment2(
|
|
5092
|
+
"bash_tool_usage",
|
|
5175
5093
|
workflow2({
|
|
5176
5094
|
task: "Query execution",
|
|
5177
5095
|
steps: [
|
|
@@ -5181,35 +5099,44 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
5181
5099
|
"For large results, slice first: cat <path> | jq '.[:10]'"
|
|
5182
5100
|
]
|
|
5183
5101
|
}),
|
|
5184
|
-
|
|
5185
|
-
|
|
5186
|
-
|
|
5187
|
-
|
|
5188
|
-
|
|
5189
|
-
|
|
5190
|
-
|
|
5191
|
-
|
|
5192
|
-
|
|
5193
|
-
|
|
5194
|
-
|
|
5195
|
-
|
|
5196
|
-
|
|
5197
|
-
|
|
5198
|
-
|
|
5199
|
-
|
|
5102
|
+
guardrail2({
|
|
5103
|
+
rule: "Do not attempt SQL access through non-bash tools.",
|
|
5104
|
+
reason: "SQL access is only available through the virtual bash environment.",
|
|
5105
|
+
action: 'Use "sql run" and "sql validate" through bash.'
|
|
5106
|
+
}),
|
|
5107
|
+
explain({
|
|
5108
|
+
concept: "sql command output format",
|
|
5109
|
+
explanation: "The sql command returns a file path, comma-separated column names, and a row count.",
|
|
5110
|
+
therefore: "Use the returned column names to build precise jq queries against the output file."
|
|
5111
|
+
}),
|
|
5112
|
+
quirk({
|
|
5113
|
+
issue: "This is a virtual bash environment, so you cannot access underlying SQL files directly.",
|
|
5114
|
+
workaround: "Treat the returned result path as the artifact to inspect, rather than trying to access SQL files themselves."
|
|
5115
|
+
}),
|
|
5116
|
+
quirk({
|
|
5117
|
+
issue: "If a query fails, the sql command reports the error on stderr.",
|
|
5118
|
+
workaround: "Read stderr first and classify the failure before retrying or changing the query."
|
|
5119
|
+
})
|
|
5120
|
+
),
|
|
5121
|
+
fragment2(
|
|
5122
|
+
"clarifications",
|
|
5123
|
+
guardrail2({
|
|
5124
|
+
rule: "Do not invent an answer when the available schema, results, or user request are insufficient to determine it.",
|
|
5125
|
+
reason: "Prevents hallucinations and improves trustworthiness.",
|
|
5126
|
+
action: "State that you do not have enough information to determine the answer and ask a focused clarification question."
|
|
5127
|
+
}),
|
|
5128
|
+
clarification({
|
|
5129
|
+
when: "Ambiguous ranking language (top, best, active) without a metric.",
|
|
5130
|
+
ask: "Clarify the ranking metric or definition.",
|
|
5131
|
+
reason: "Ensures correct aggregation and ordering."
|
|
5132
|
+
}),
|
|
5200
5133
|
clarification({
|
|
5201
5134
|
when: "The request targets time-based data without a date range.",
|
|
5202
5135
|
ask: "Confirm the intended timeframe (e.g., last 30/90 days, YTD, specific year).",
|
|
5203
5136
|
reason: "Prevents large scans and irrelevant results."
|
|
5204
5137
|
})
|
|
5205
|
-
)
|
|
5206
|
-
|
|
5207
|
-
baseTeachings.push(
|
|
5208
|
-
hint3(
|
|
5209
|
-
'When a month, day, or time period is mentioned without a year (e.g., "in August", "on Monday"), assume ALL occurrences of that period in the data. Do not ask for year clarification.'
|
|
5210
|
-
)
|
|
5211
|
-
);
|
|
5212
|
-
}
|
|
5138
|
+
)
|
|
5139
|
+
];
|
|
5213
5140
|
return baseTeachings;
|
|
5214
5141
|
}
|
|
5215
5142
|
|
|
@@ -5235,7 +5162,6 @@ var Text2Sql = class {
|
|
|
5235
5162
|
#config;
|
|
5236
5163
|
constructor(config) {
|
|
5237
5164
|
this.#config = {
|
|
5238
|
-
teachingsOptions: config.teachingsOptions,
|
|
5239
5165
|
adapter: config.adapter,
|
|
5240
5166
|
context: config.context,
|
|
5241
5167
|
tools: config.tools ?? {},
|
|
@@ -5266,9 +5192,9 @@ var Text2Sql = class {
|
|
|
5266
5192
|
if (cached) {
|
|
5267
5193
|
return cached;
|
|
5268
5194
|
}
|
|
5269
|
-
const
|
|
5270
|
-
await this.#config.introspection.write(
|
|
5271
|
-
return
|
|
5195
|
+
const fragments = await this.#config.adapter.introspect();
|
|
5196
|
+
await this.#config.introspection.write(fragments);
|
|
5197
|
+
return fragments;
|
|
5272
5198
|
}
|
|
5273
5199
|
/**
|
|
5274
5200
|
* Generate training data pairs using a producer factory.
|
|
@@ -5299,7 +5225,7 @@ var Text2Sql = class {
|
|
|
5299
5225
|
}
|
|
5300
5226
|
const trackedFs = new TrackedFs(this.#config.filesystem);
|
|
5301
5227
|
const context = this.#config.context(
|
|
5302
|
-
...guidelines(
|
|
5228
|
+
...guidelines(),
|
|
5303
5229
|
...await this.index()
|
|
5304
5230
|
);
|
|
5305
5231
|
const lastItem = messages[messages.length - 1];
|
|
@@ -5317,7 +5243,7 @@ var Text2Sql = class {
|
|
|
5317
5243
|
}
|
|
5318
5244
|
const uiMessages = messages.map(chatMessageToUIMessage);
|
|
5319
5245
|
const { mounts: skillMounts } = context.getSkillMounts();
|
|
5320
|
-
const { tools
|
|
5246
|
+
const { tools } = await createResultTools({
|
|
5321
5247
|
adapter: this.#config.adapter,
|
|
5322
5248
|
skillMounts,
|
|
5323
5249
|
filesystem: trackedFs
|
|
@@ -5327,7 +5253,7 @@ var Text2Sql = class {
|
|
|
5327
5253
|
model: this.#config.model,
|
|
5328
5254
|
context,
|
|
5329
5255
|
tools: {
|
|
5330
|
-
...
|
|
5256
|
+
...tools,
|
|
5331
5257
|
...this.#config.tools
|
|
5332
5258
|
},
|
|
5333
5259
|
guardrails: [errorRecoveryGuardrail],
|
|
@@ -5410,6 +5336,7 @@ export {
|
|
|
5410
5336
|
MssqlFs,
|
|
5411
5337
|
Point,
|
|
5412
5338
|
PostgresFs,
|
|
5339
|
+
SQLScopeError,
|
|
5413
5340
|
SQLValidationError,
|
|
5414
5341
|
ScopedFs,
|
|
5415
5342
|
SqliteFs,
|