@deepagents/text2sql 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  export * from './lib/adapters/adapter.ts';
2
2
  export * from './lib/agents/developer.agent.ts';
3
+ export * from './lib/agents/exceptions.ts';
3
4
  export * from './lib/agents/result-tools.ts';
4
5
  export * from './lib/agents/sql.agent.ts';
5
6
  export * from './lib/agents/suggestions.agents.ts';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,2BAA2B,CAAC;AAC1C,cAAc,iCAAiC,CAAC;AAChD,cAAc,8BAA8B,CAAC;AAC7C,cAAc,2BAA2B,CAAC;AAC1C,cAAc,oCAAoC,CAAC;AACnD,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,mBAAmB,CAAC;AAClC,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,2BAA2B,CAAC;AAC1C,cAAc,iCAAiC,CAAC;AAChD,cAAc,4BAA4B,CAAC;AAC3C,cAAc,8BAA8B,CAAC;AAC7C,cAAc,2BAA2B,CAAC;AAC1C,cAAc,oCAAoC,CAAC;AACnD,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,mBAAmB,CAAC;AAClC,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC"}
package/dist/index.js CHANGED
@@ -511,6 +511,32 @@ var fragments = [
511
511
  hint("When validating user SQL, explain any errors clearly")
512
512
  ];
513
513
 
514
+ // packages/text2sql/src/lib/agents/exceptions.ts
515
+ var sqlValidationMarker = Symbol("SQLValidationError");
516
+ var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
517
+ var SQLValidationError = class _SQLValidationError extends Error {
518
+ [sqlValidationMarker];
519
+ constructor(message2) {
520
+ super(message2);
521
+ this.name = "SQLValidationError";
522
+ this[sqlValidationMarker] = true;
523
+ }
524
+ static isInstance(error) {
525
+ return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
526
+ }
527
+ };
528
+ var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
529
+ [unanswerableSqlMarker];
530
+ constructor(message2) {
531
+ super(message2);
532
+ this.name = "UnanswerableSQLError";
533
+ this[unanswerableSqlMarker] = true;
534
+ }
535
+ static isInstance(error) {
536
+ return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
537
+ }
538
+ };
539
+
514
540
  // packages/text2sql/src/lib/agents/result-tools.ts
515
541
  import { tool as tool2 } from "ai";
516
542
  import { createBashTool } from "bash-tool";
@@ -1040,42 +1066,120 @@ import {
1040
1066
  defaultSettingsMiddleware,
1041
1067
  wrapLanguageModel
1042
1068
  } from "ai";
1069
+ import dedent2 from "dedent";
1043
1070
  import pRetry from "p-retry";
1044
1071
  import z4 from "zod";
1045
1072
  import "@deepagents/agent";
1046
1073
  import {
1047
1074
  ContextEngine as ContextEngine2,
1048
1075
  InMemoryContextStore as InMemoryContextStore2,
1076
+ fragment as fragment2,
1049
1077
  persona as persona3,
1078
+ policy,
1050
1079
  structuredOutput as structuredOutput2,
1051
1080
  user as user2
1052
1081
  } from "@deepagents/context";
1053
1082
  var RETRY_TEMPERATURES = [0, 0.2, 0.3];
1083
+ var SQL_AGENT_ROLE = "Expert SQL query generator.";
1084
+ var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
1085
+ var SQL_AGENT_POLICIES = [
1086
+ fragment2(
1087
+ "schema_mapping",
1088
+ policy({
1089
+ rule: "Translate natural language into precise SQL grounded in available schema entities."
1090
+ })
1091
+ // policy({
1092
+ // rule: 'Before returning an error, perform a schema-grounded self-check: identify core intent, draft best-effort SQL, then verify it uses only existing tables/columns.',
1093
+ // }),
1094
+ // policy({
1095
+ // rule: 'Return unanswerable only if that self-check confirms no valid SQL can express the required intent without inventing schema elements.',
1096
+ // }),
1097
+ // policy({
1098
+ // rule: 'Prefer a best-effort valid SQL query when entities can be reasonably inferred from table or column names.',
1099
+ // }),
1100
+ // policy({
1101
+ // rule: 'Use lexical normalization (singular/plural, paraphrases, role synonyms, and minor wording differences) to align question terms with schema names.',
1102
+ // }),
1103
+ // policy({
1104
+ // rule: 'Decompose noun phrases into core entity and qualifiers, and map the core entity first.',
1105
+ // }),
1106
+ // policy({
1107
+ // rule: 'Do not require every descriptive word to map to a separate schema field when the core entity match is unambiguous.',
1108
+ // }),
1109
+ // policy({
1110
+ // rule: 'For phrases like "X of Y", treat Y as contextual (non-blocking) when Y has no mapped schema field and the question does not ask to filter/group/select by Y explicitly.',
1111
+ // }),
1112
+ // policy({
1113
+ // rule: 'Treat unmatched qualifiers as blockers only when they are restrictive constraints (specific values, comparisons, or conditions that change row eligibility).',
1114
+ // }),
1115
+ // hint('Preserve schema spelling exactly, including typos in column names.'),
1116
+ )
1117
+ // fragment(
1118
+ // 'unanswerable_gate',
1119
+ // workflow({
1120
+ // task: 'Unanswerable decision',
1121
+ // steps: [
1122
+ // 'Identify the core intent (metric/projection and required filters).',
1123
+ // 'Attempt schema-grounded mapping for the core intent before considering error.',
1124
+ // 'If a valid SELECT can answer the core intent without inventing schema entities, return SQL.',
1125
+ // 'Return unanswerable only when required information cannot be mapped to any available table or column.',
1126
+ // ],
1127
+ // }),
1128
+ // policy({
1129
+ // rule: 'Do not reject a question as unanswerable when requested information can be derived by filtering, joining, grouping, counting, set operations, or sorting on available columns.',
1130
+ // }),
1131
+ // ),
1132
+ // fragment(
1133
+ // 'query_shape_preferences',
1134
+ // hint(
1135
+ // 'Prefer explicit INNER JOINs over LEFT JOINs unless the question requires unmatched rows.',
1136
+ // ),
1137
+ // hint(
1138
+ // 'Prefer direct joins over dropping join constraints or using weaker alternatives.',
1139
+ // ),
1140
+ // hint('Use DISTINCT only when uniqueness is explicitly requested.'),
1141
+ // hint(
1142
+ // 'For superlatives over grouped entities (most/least/highest/lowest by group), prefer GROUP BY with ORDER BY aggregate and LIMIT 1.',
1143
+ // ),
1144
+ // hint(
1145
+ // 'For average/count conditions per entity, prefer GROUP BY with HAVING aggregate predicates over row-level WHERE predicates.',
1146
+ // ),
1147
+ // hint(
1148
+ // 'For "both" conditions across two criteria, prefer INTERSECT when selecting shared values.',
1149
+ // ),
1150
+ // hint(
1151
+ // 'For "A or B" retrieval across criteria, prefer UNION when combining two qualifying sets.',
1152
+ // ),
1153
+ // hint(
1154
+ // 'For "never" constraints against related records, prefer NOT IN or EXCEPT against the disqualifying set.',
1155
+ // ),
1156
+ // hint(
1157
+ // 'Use equality predicates for exact values unless the question asks for pattern matching.',
1158
+ // ),
1159
+ // hint(
1160
+ // 'Keep numeric literals unquoted when they are purely numeric tokens in the question.',
1161
+ // ),
1162
+ // ),
1163
+ // fragment(
1164
+ // 'sql_minimality',
1165
+ // guardrail({
1166
+ // rule: 'Never hallucinate tables or columns.',
1167
+ // reason: 'Schema fidelity is required.',
1168
+ // action: 'Use only available schema entities.',
1169
+ // }),
1170
+ // guardrail({
1171
+ // rule: 'Prefer the minimal query over transformed expressions.',
1172
+ // reason:
1173
+ // 'Unnecessary transformations reduce correctness and add avoidable complexity.',
1174
+ // action:
1175
+ // 'Do not add date parsing, substring extraction, derived projections, or extra selected columns unless explicitly requested or required by schema mismatch.',
1176
+ // }),
1177
+ // ),
1178
+ ];
1054
1179
  function extractSql(output) {
1055
1180
  const match = output.match(/```sql\n?([\s\S]*?)```/);
1056
1181
  return match ? match[1].trim() : output.trim();
1057
1182
  }
1058
- var marker = Symbol("SQLValidationError");
1059
- var SQLValidationError = class _SQLValidationError extends Error {
1060
- [marker];
1061
- constructor(message2) {
1062
- super(message2);
1063
- this.name = "SQLValidationError";
1064
- this[marker] = true;
1065
- }
1066
- static isInstance(error) {
1067
- return error instanceof _SQLValidationError && error[marker] === true;
1068
- }
1069
- };
1070
- var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
1071
- constructor(message2) {
1072
- super(message2);
1073
- this.name = "UnanswerableSQLError";
1074
- }
1075
- static isInstance(error) {
1076
- return error instanceof _UnanswerableSQLError;
1077
- }
1078
- };
1079
1183
  async function toSql(options) {
1080
1184
  const { maxRetries = 3 } = options;
1081
1185
  return withRetry(
@@ -1088,20 +1192,38 @@ async function toSql(options) {
1088
1192
  context.set(
1089
1193
  persona3({
1090
1194
  name: "Freya",
1091
- role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema.",
1092
- objective: "Translate natural language questions into precise, efficient SQL queries"
1195
+ role: SQL_AGENT_ROLE,
1196
+ objective: SQL_AGENT_OBJECTIVE
1197
+ // role: `You are a data science expert that provides well-reasoned and detailed responses.`,
1198
+ // objective: `Your task is to understand the schema and generate a valid SQL query to answer the question. You first think about the reasoning process as an internal monologue and then provide the user with the answer.`,
1093
1199
  }),
1200
+ ...SQL_AGENT_POLICIES,
1094
1201
  ...options.fragments
1095
1202
  );
1096
1203
  if (errors.length) {
1204
+ const lastError = errors.at(-1);
1097
1205
  context.set(
1098
- user2(options.input),
1099
- user2(
1100
- `<validation_error>Your previous SQL query had the following error: ${errors.at(-1)?.message}. Please fix the query.</validation_error>`
1206
+ user2(dedent2`
1207
+ Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
1208
+ Given the question, the evidence and the database schema, return the SQL script that addresses the question.
1209
+
1210
+ Question: ${options.input}
1211
+ `),
1212
+ UnanswerableSQLError.isInstance(lastError) ? user2(
1213
+ `<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
1214
+ ) : user2(
1215
+ `<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
1101
1216
  )
1102
1217
  );
1103
1218
  } else {
1104
- context.set(user2(options.input));
1219
+ context.set(
1220
+ user2(dedent2`
1221
+ Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
1222
+ Given the question, the evidence and the database schema, return the SQL script that addresses the question.
1223
+
1224
+ Question: ${options.input}
1225
+ `)
1226
+ );
1105
1227
  }
1106
1228
  const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
1107
1229
  const baseModel = options.model ?? groq2("openai/gpt-oss-20b");
@@ -1127,19 +1249,45 @@ async function toSql(options) {
1127
1249
  })
1128
1250
  });
1129
1251
  const { result: output } = await sqlOutput.generate();
1252
+ const finalizeSql = async (rawSql) => {
1253
+ const sql = options.adapter.format(extractSql(rawSql));
1254
+ const validationError = await options.adapter.validate(sql);
1255
+ if (validationError) {
1256
+ throw new SQLValidationError(validationError);
1257
+ }
1258
+ return {
1259
+ attempts,
1260
+ sql,
1261
+ errors: errors.length ? errors.map(formatErrorMessage) : void 0
1262
+ };
1263
+ };
1130
1264
  if ("error" in output) {
1131
- throw new UnanswerableSQLError(output.error);
1132
- }
1133
- const sql = options.adapter.format(extractSql(output.sql));
1134
- const validationError = await options.adapter.validate(sql);
1135
- if (validationError) {
1136
- throw new SQLValidationError(validationError);
1265
+ context.set(
1266
+ user2(
1267
+ "<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
1268
+ )
1269
+ );
1270
+ const forcedSqlOutput = structuredOutput2({
1271
+ model,
1272
+ context,
1273
+ schema: z4.object({
1274
+ sql: z4.string().describe(
1275
+ "Best-effort SQL query that answers the core intent using only available schema entities."
1276
+ ),
1277
+ reasoning: z4.string().describe("Reasoning steps for best-effort schema mapping.")
1278
+ })
1279
+ });
1280
+ try {
1281
+ const forced = await forcedSqlOutput.generate();
1282
+ return await finalizeSql(forced.sql);
1283
+ } catch (error) {
1284
+ if (SQLValidationError.isInstance(error) || APICallError.isInstance(error) || JSONParseError.isInstance(error) || TypeValidationError.isInstance(error) || NoObjectGeneratedError.isInstance(error) || NoOutputGeneratedError.isInstance(error) || NoContentGeneratedError.isInstance(error)) {
1285
+ throw error;
1286
+ }
1287
+ throw new UnanswerableSQLError(output.error);
1288
+ }
1137
1289
  }
1138
- return {
1139
- attempts,
1140
- sql,
1141
- errors: errors.length ? errors.map(formatErrorMessage) : void 0
1142
- };
1290
+ return await finalizeSql(output.sql);
1143
1291
  },
1144
1292
  { retries: maxRetries - 1 }
1145
1293
  );
@@ -1202,9 +1350,6 @@ async function withRetry(computation, options = { retries: 3 }) {
1202
1350
  return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
1203
1351
  },
1204
1352
  onFailedAttempt(context) {
1205
- console.log(
1206
- `Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
1207
- );
1208
1353
  errors.push(context.error);
1209
1354
  }
1210
1355
  }
@@ -1213,7 +1358,7 @@ async function withRetry(computation, options = { retries: 3 }) {
1213
1358
 
1214
1359
  // packages/text2sql/src/lib/agents/suggestions.agents.ts
1215
1360
  import { groq as groq3 } from "@ai-sdk/groq";
1216
- import dedent2 from "dedent";
1361
+ import dedent3 from "dedent";
1217
1362
  import z5 from "zod";
1218
1363
  import { agent, thirdPersonPrompt } from "@deepagents/agent";
1219
1364
  var suggestionsAgent = agent({
@@ -1229,7 +1374,7 @@ var suggestionsAgent = agent({
1229
1374
  ).min(1).max(5).describe("A set of up to two advanced question + SQL pairs.")
1230
1375
  }),
1231
1376
  prompt: (state) => {
1232
- return dedent2`
1377
+ return dedent3`
1233
1378
  ${thirdPersonPrompt()}
1234
1379
 
1235
1380
  <identity>
@@ -4195,10 +4340,10 @@ import {
4195
4340
  clarification,
4196
4341
  example,
4197
4342
  explain,
4198
- fragment as fragment2,
4343
+ fragment as fragment3,
4199
4344
  guardrail,
4200
4345
  hint as hint2,
4201
- policy,
4346
+ policy as policy2,
4202
4347
  principle,
4203
4348
  quirk,
4204
4349
  role,
@@ -4210,7 +4355,7 @@ function reasoningFramework() {
4210
4355
  role(
4211
4356
  "You are a very strong reasoner and planner. Use these critical instructions to structure your plans, thoughts, and responses."
4212
4357
  ),
4213
- fragment2(
4358
+ fragment3(
4214
4359
  "meta-cognitive-reasoning-framework",
4215
4360
  hint2(
4216
4361
  "Before taking any action (either tool calls *or* responses to the user), you must proactively, methodically, and independently plan and reason about:"
@@ -4220,19 +4365,19 @@ function reasoningFramework() {
4220
4365
  title: "Logical dependencies and constraints",
4221
4366
  description: "Analyze the intended action against the following factors. Resolve conflicts in order of importance:",
4222
4367
  policies: [
4223
- policy({
4368
+ policy2({
4224
4369
  rule: "Policy-based rules, mandatory prerequisites, and constraints."
4225
4370
  }),
4226
- policy({
4371
+ policy2({
4227
4372
  rule: "Order of operations: Ensure taking an action does not prevent a subsequent necessary action.",
4228
4373
  policies: [
4229
4374
  "The user may request actions in a random order, but you may need to reorder operations to maximize successful completion of the task."
4230
4375
  ]
4231
4376
  }),
4232
- policy({
4377
+ policy2({
4233
4378
  rule: "Other prerequisites (information and/or actions needed)."
4234
4379
  }),
4235
- policy({ rule: "Explicit user constraints or preferences." })
4380
+ policy2({ rule: "Explicit user constraints or preferences." })
4236
4381
  ]
4237
4382
  }),
4238
4383
  // 2) Risk assessment
@@ -4285,17 +4430,17 @@ function reasoningFramework() {
4285
4430
  title: "Completeness",
4286
4431
  description: "Ensure that all requirements, constraints, options, and preferences are exhaustively incorporated into your plan.",
4287
4432
  policies: [
4288
- policy({
4433
+ policy2({
4289
4434
  rule: "Resolve conflicts using the order of importance in #1."
4290
4435
  }),
4291
- policy({
4436
+ policy2({
4292
4437
  rule: "Avoid premature conclusions: There may be multiple relevant options for a given situation.",
4293
4438
  policies: [
4294
4439
  "To check for whether an option is relevant, reason about all information sources from #5.",
4295
4440
  "You may need to consult the user to even know whether something is applicable. Do not assume it is not applicable without checking."
4296
4441
  ]
4297
4442
  }),
4298
- policy({
4443
+ policy2({
4299
4444
  rule: "Review applicable sources of information from #5 to confirm which are relevant to the current state."
4300
4445
  })
4301
4446
  ]
@@ -4327,31 +4472,31 @@ function guidelines(options = {}) {
4327
4472
  // Include the meta-cognitive reasoning framework
4328
4473
  ...reasoningFramework(),
4329
4474
  // Prerequisite policies (must do X before Y)
4330
- fragment2(
4475
+ fragment3(
4331
4476
  "prerequisite_policies",
4332
- policy({
4477
+ policy2({
4333
4478
  rule: "YOU MUST inspect schema structure and available tables",
4334
4479
  before: "generating ANY SQL query",
4335
4480
  reason: "NEVER generate SQL without knowing valid tables, columns, and relationships"
4336
4481
  }),
4337
- policy({
4482
+ policy2({
4338
4483
  rule: "YOU MUST resolve ambiguous business terms with the user",
4339
4484
  before: "making ANY assumptions about terminology meaning",
4340
4485
  reason: "NEVER guess domain-specific language\u2014ask for clarification"
4341
4486
  }),
4342
- policy({
4487
+ policy2({
4343
4488
  rule: "YOU MUST validate SQL syntax",
4344
4489
  before: "executing ANY query against the database",
4345
4490
  reason: "NEVER execute unvalidated queries"
4346
4491
  }),
4347
- policy({
4492
+ policy2({
4348
4493
  rule: "YOU MUST complete ALL reasoning steps",
4349
4494
  before: "taking ANY tool call or response action",
4350
4495
  reason: "Once an action is taken, it CANNOT be undone. NO EXCEPTIONS."
4351
4496
  })
4352
4497
  ),
4353
4498
  // Few-shot: Applying reasoning principles
4354
- fragment2(
4499
+ fragment3(
4355
4500
  "reasoning-examples",
4356
4501
  example({
4357
4502
  question: "Show me sales last month",
@@ -4391,7 +4536,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4391
4536
  })
4392
4537
  ),
4393
4538
  // Schema adherence - consolidated into clear rules
4394
- fragment2(
4539
+ fragment3(
4395
4540
  "schema_adherence",
4396
4541
  hint2(
4397
4542
  "Use only tables and columns from the schema. For unspecified columns, use SELECT *. When showing related items, include IDs and requested details."
@@ -4400,7 +4545,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4400
4545
  '"Show" means list items; "count" or "total" means aggregate. Use canonical values verbatim for filtering.'
4401
4546
  )
4402
4547
  ),
4403
- fragment2(
4548
+ fragment3(
4404
4549
  "Column statistics",
4405
4550
  explain({
4406
4551
  concept: "nDistinct in column stats",
@@ -4421,7 +4566,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4421
4566
  "Use JOINs based on schema relationships. Favor PK/indexed columns; follow relationship metadata for direction and cardinality."
4422
4567
  ),
4423
4568
  // Aggregations - explain the concepts
4424
- fragment2(
4569
+ fragment3(
4425
4570
  "Aggregations",
4426
4571
  hint2(
4427
4572
  "Apply COUNT, SUM, AVG when the question implies summarization. Use window functions for ranking, running totals, or row comparisons."
@@ -4433,7 +4578,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4433
4578
  })
4434
4579
  ),
4435
4580
  // Query semantics - explain concepts and document quirks
4436
- fragment2(
4581
+ fragment3(
4437
4582
  "Query interpretation",
4438
4583
  explain({
4439
4584
  concept: "threshold language",
@@ -4461,7 +4606,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4461
4606
  prefer: "Concise, business-friendly summaries with key comparisons and helpful follow-ups."
4462
4607
  }),
4463
4608
  // Safety guardrails - consolidated
4464
- fragment2(
4609
+ fragment3(
4465
4610
  "Query safety",
4466
4611
  guardrail({
4467
4612
  rule: "Generate only valid, executable SELECT/WITH statements.",
@@ -4547,7 +4692,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4547
4692
  ],
4548
4693
  notes: "If reference is ambiguous, ask which previous result or entity the user means."
4549
4694
  }),
4550
- fragment2(
4695
+ fragment3(
4551
4696
  "Bash tool usage",
4552
4697
  workflow({
4553
4698
  task: "Query execution",