@deepagents/text2sql 0.19.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -714,6 +714,32 @@ var LastQueryExtractor = class extends BaseContextualExtractor {
714
714
  // packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
715
715
  import pLimit from "p-limit";
716
716
 
717
+ // packages/text2sql/src/lib/agents/exceptions.ts
718
+ var sqlValidationMarker = Symbol("SQLValidationError");
719
+ var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
720
+ var SQLValidationError = class _SQLValidationError extends Error {
721
+ [sqlValidationMarker];
722
+ constructor(message) {
723
+ super(message);
724
+ this.name = "SQLValidationError";
725
+ this[sqlValidationMarker] = true;
726
+ }
727
+ static isInstance(error) {
728
+ return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
729
+ }
730
+ };
731
+ var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
732
+ [unanswerableSqlMarker];
733
+ constructor(message) {
734
+ super(message);
735
+ this.name = "UnanswerableSQLError";
736
+ this[unanswerableSqlMarker] = true;
737
+ }
738
+ static isInstance(error) {
739
+ return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
740
+ }
741
+ };
742
+
717
743
  // packages/text2sql/src/lib/agents/question.agent.ts
718
744
  import { groq as groq4 } from "@ai-sdk/groq";
719
745
  import dedent4 from "dedent";
@@ -838,42 +864,231 @@ import {
838
864
  defaultSettingsMiddleware,
839
865
  wrapLanguageModel
840
866
  } from "ai";
867
+ import dedent5 from "dedent";
841
868
  import pRetry from "p-retry";
842
869
  import z5 from "zod";
843
870
  import "@deepagents/agent";
844
871
  import {
845
872
  ContextEngine as ContextEngine5,
846
873
  InMemoryContextStore as InMemoryContextStore5,
874
+ example,
875
+ fragment as fragment5,
876
+ guardrail as guardrail2,
877
+ hint,
847
878
  persona as persona5,
879
+ policy,
848
880
  structuredOutput as structuredOutput5,
849
- user as user5
881
+ user as user5,
882
+ workflow
850
883
  } from "@deepagents/context";
851
- var RETRY_TEMPERATURES = [0, 0.2, 0.3];
884
+ var RETRY_TEMPERATURES = [0, 0.4, 0.8];
885
+ var SQL_AGENT_ROLE = "Expert SQL query generator.";
886
+ var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
887
+ var SQL_AGENT_POLICIES = [
888
+ fragment5(
889
+ "schema_mapping",
890
+ policy({
891
+ rule: "Translate natural language into precise SQL grounded in available schema entities."
892
+ }),
893
+ hint("Preserve schema spelling exactly, including typos in column names.")
894
+ ),
895
+ fragment5(
896
+ "projection_minimality",
897
+ policy({
898
+ rule: "Return only columns requested by the question; do not add helper columns unless explicitly requested."
899
+ }),
900
+ policy({
901
+ rule: 'For requests of the form "X sorted/ordered by Y", project X only unless Y is explicitly requested as an output field.'
902
+ }),
903
+ policy({
904
+ rule: "Prefer selecting schema columns directly without derived expressions when direct selection answers the request."
905
+ }),
906
+ hint(
907
+ "Do not include ORDER BY, GROUP BY, or JOIN helper columns in SELECT output unless the question explicitly asks for them."
908
+ ),
909
+ policy({
910
+ rule: "Use DISTINCT only when uniqueness is explicitly requested (for example distinct/unique/different/no duplicates)."
911
+ }),
912
+ hint(
913
+ 'Do not infer DISTINCT from generic wording such as "some", plural nouns, or entity-set phrasing; for transactional/attendance-style tables, default to raw rows unless uniqueness is explicitly requested.'
914
+ )
915
+ ),
916
+ fragment5(
917
+ "date_transform_safety",
918
+ policy({
919
+ rule: "Do not assume VARCHAR/TEXT values are parseable dates. Avoid date extraction functions on text columns by default."
920
+ }),
921
+ policy({
922
+ rule: "Use date-part extraction only when both conditions hold: the question explicitly asks for transformation and schema values require transformation to produce that unit."
923
+ }),
924
+ hint(
925
+ "Do not apply SUBSTR, STRFTIME, DATE_PART, YEAR, or similar extraction functions unless the question explicitly asks for transformation and schema values require it."
926
+ ),
927
+ hint(
928
+ "If a column already represents the requested concept (for example a stored year-like value), use the column as-is."
929
+ )
930
+ ),
931
+ fragment5(
932
+ "sql_minimality",
933
+ guardrail2({
934
+ rule: "Never hallucinate tables or columns.",
935
+ reason: "Schema fidelity is required.",
936
+ action: "Use only available schema entities."
937
+ }),
938
+ guardrail2({
939
+ rule: "Avoid unnecessary transformations and derived projections.",
940
+ reason: "Extra transformations frequently change semantics and reduce correctness.",
941
+ action: "Do not add date parsing, substring extraction, or derived columns unless explicitly required by the question or schema."
942
+ })
943
+ ),
944
+ fragment5(
945
+ "preflight_checklist",
946
+ workflow({
947
+ task: "Final SQL preflight before returning output",
948
+ steps: [
949
+ "Verify selected columns match the question and remove unrequested helper projections.",
950
+ "If aggregate values are used only for ranking/filtering, keep them out of SELECT unless explicitly requested.",
951
+ "Prefer raw schema columns over derived expressions when raw columns already satisfy the request.",
952
+ "If a candidate query uses STRFTIME, SUBSTR, DATE_PART, YEAR, or similar extraction on text-like columns, remove that transformation unless explicitly required by the question.",
953
+ "Return only schema-grounded SQL using existing tables and columns."
954
+ ]
955
+ })
956
+ ),
957
+ fragment5(
958
+ "set_semantics",
959
+ policy({
960
+ rule: "For questions asking where both condition A and condition B hold over an attribute, compute the intersection of qualifying sets for that attribute."
961
+ }),
962
+ policy({
963
+ rule: "Do not force the same entity instance to satisfy both conditions unless the question explicitly requests the same person/row/entity."
964
+ }),
965
+ hint(
966
+ "Prefer INTERSECT (or logically equivalent set-based shape) over requiring the same physical row/entity to satisfy both conditions unless explicitly requested."
967
+ ),
968
+ hint(
969
+ "When two conditions describe different row groups whose shared attribute is requested, build each group separately and intersect the attribute values."
970
+ ),
971
+ hint(
972
+ "Do not collapse cross-group conditions into a single-row AND predicate when the intent is shared values across groups."
973
+ ),
974
+ policy({
975
+ rule: "If two predicates on the same field cannot both be true for one row, do not combine them with AND; use set operations across separate filtered subsets when shared values are requested."
976
+ })
977
+ ),
978
+ fragment5(
979
+ "predicate_column_alignment",
980
+ policy({
981
+ rule: "Match literal values to semantically compatible columns. Do not compare descriptive names to identifier columns."
982
+ }),
983
+ hint(
984
+ "When a filter value is a descriptive label (for example a department name), join through the lookup table and filter on its name/title column, not on *_id columns."
985
+ ),
986
+ hint(
987
+ "When relation roles are explicit in wording (for example host/home/source/destination), prefer foreign keys with matching role qualifiers over generic similarly named columns."
988
+ ),
989
+ policy({
990
+ rule: "When multiple foreign-key candidates exist, select the column whose qualifier best matches the relationship described in the question."
991
+ }),
992
+ policy({
993
+ rule: "For hosting/held semantics, prefer host_* relationship columns when available over generic *_id alternatives."
994
+ }),
995
+ hint(
996
+ 'Interpret wording like "held/hosted a competition or event" as a hosting relationship and map to host_* foreign keys when present.'
997
+ ),
998
+ policy({
999
+ rule: "Do not compare descriptive labels or names to *_id columns; join to the table containing the descriptive field and filter there."
1000
+ }),
1001
+ policy({
1002
+ rule: "Keep numeric identifiers unquoted when used as numeric equality filters unless schema indicates text identifiers."
1003
+ }),
1004
+ policy({
1005
+ rule: "When filtering by a descriptive label value and a related table exposes a corresponding *_name or title column, join to that table and filter on the descriptive column."
1006
+ })
1007
+ ),
1008
+ fragment5(
1009
+ "ordering_semantics",
1010
+ policy({
1011
+ rule: "Respect explicit sort direction terms. If direction is not specified, use ascending order unless a superlative intent (most/least/highest/lowest) implies direction."
1012
+ }),
1013
+ policy({
1014
+ rule: "When ranking categories by frequency, use COUNT for ordering but keep output focused on requested category fields unless counts are explicitly requested."
1015
+ }),
1016
+ policy({
1017
+ rule: "Do not use DESC unless descending direction is explicit or a superlative intent requires descending ranking."
1018
+ }),
1019
+ policy({
1020
+ rule: 'For "most common/frequent <attribute>" requests, return the attribute value(s) only; use counts only for ordering/filtering unless the question explicitly asks to return counts.'
1021
+ }),
1022
+ hint(
1023
+ 'Use DESC with LIMIT 1 for "most/highest/largest"; use ASC with LIMIT 1 for "least/lowest/smallest".'
1024
+ )
1025
+ ),
1026
+ fragment5(
1027
+ "negative_membership_queries",
1028
+ policy({
1029
+ rule: "For requests asking entities that did not participate/host/appear in related records, prefer NOT IN or NOT EXISTS against the related foreign-key set."
1030
+ }),
1031
+ hint(
1032
+ "Map role-bearing relationship columns carefully (for example host_* foreign keys for hosting relationships) instead of generic IDs when role wording is explicit."
1033
+ ),
1034
+ hint(
1035
+ 'For "never had/never exceeded" conditions over history tables, exclude entities via NOT IN/NOT EXISTS against the disqualifying entity-id set (often built with GROUP BY/HAVING MAX(...)).'
1036
+ )
1037
+ ),
1038
+ fragment5(
1039
+ "join_completeness",
1040
+ policy({
1041
+ rule: "Preserve entity-restricting joins implied by the question. Do not widen results by querying only a broader attribute table when a subset entity table is available."
1042
+ }),
1043
+ policy({
1044
+ rule: "If an entity term in the question maps to a table, keep that table in query scope and join to attribute tables rather than dropping the entity table."
1045
+ }),
1046
+ hint(
1047
+ "If the question targets a specific entity group, include that entity table and its join conditions even when selected columns come from a related table."
1048
+ ),
1049
+ hint(
1050
+ "When the question names an entity type and a relation table links to that entity via *_id, include the entity table in scope instead of counting only relation rows."
1051
+ ),
1052
+ hint(
1053
+ "Prefer INNER JOIN by default; use LEFT JOIN only when the question explicitly requests including unmatched rows or zero-related entities."
1054
+ )
1055
+ ),
1056
+ fragment5(
1057
+ "aggregation_exactness",
1058
+ policy({
1059
+ rule: "Preserve requested aggregation semantics exactly: use COUNT(*) by default for total rows, use COUNT(DISTINCT ...) only when uniqueness is explicitly requested, and group by stable entity keys when computing per-entity aggregates."
1060
+ }),
1061
+ policy({
1062
+ rule: "For questions asking which entity has lowest/highest average of a metric, compute AVG(metric) per entity (GROUP BY entity) and rank those aggregates."
1063
+ }),
1064
+ hint(
1065
+ 'For "how many <entities>" questions over relation records, default to COUNT(*) on qualifying rows unless explicit uniqueness language is present.'
1066
+ )
1067
+ ),
1068
+ fragment5(
1069
+ "query_shape_examples",
1070
+ example({
1071
+ question: "List categories ordered by how many records belong to each category.",
1072
+ answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*)"
1073
+ }),
1074
+ example({
1075
+ question: "Show labels shared by rows with metric > 100 and rows with metric < 10.",
1076
+ answer: "SELECT label FROM records WHERE metric > 100 INTERSECT SELECT label FROM records WHERE metric < 10"
1077
+ }),
1078
+ example({
1079
+ question: "List locations that have not hosted any event.",
1080
+ answer: "SELECT location_name FROM locations WHERE location_id NOT IN (SELECT host_location_id FROM events)"
1081
+ }),
1082
+ example({
1083
+ question: "List the most common category across records.",
1084
+ answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*) DESC LIMIT 1"
1085
+ })
1086
+ )
1087
+ ];
852
1088
  function extractSql(output) {
853
1089
  const match = output.match(/```sql\n?([\s\S]*?)```/);
854
1090
  return match ? match[1].trim() : output.trim();
855
1091
  }
856
- var marker = Symbol("SQLValidationError");
857
- var SQLValidationError = class _SQLValidationError extends Error {
858
- [marker];
859
- constructor(message) {
860
- super(message);
861
- this.name = "SQLValidationError";
862
- this[marker] = true;
863
- }
864
- static isInstance(error) {
865
- return error instanceof _SQLValidationError && error[marker] === true;
866
- }
867
- };
868
- var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
869
- constructor(message) {
870
- super(message);
871
- this.name = "UnanswerableSQLError";
872
- }
873
- static isInstance(error) {
874
- return error instanceof _UnanswerableSQLError;
875
- }
876
- };
877
1092
  async function toSql(options) {
878
1093
  const { maxRetries = 3 } = options;
879
1094
  return withRetry(
@@ -886,20 +1101,38 @@ async function toSql(options) {
886
1101
  context.set(
887
1102
  persona5({
888
1103
  name: "Freya",
889
- role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema.",
890
- objective: "Translate natural language questions into precise, efficient SQL queries"
1104
+ role: SQL_AGENT_ROLE,
1105
+ objective: SQL_AGENT_OBJECTIVE
1106
+ // role: `You are a data science expert that provides well-reasoned and detailed responses.`,
1107
+ // objective: `Your task is to understand the schema and generate a valid SQL query to answer the question. You first think about the reasoning process as an internal monologue and then provide the user with the answer.`,
891
1108
  }),
1109
+ ...SQL_AGENT_POLICIES,
892
1110
  ...options.fragments
893
1111
  );
894
1112
  if (errors.length) {
1113
+ const lastError = errors.at(-1);
895
1114
  context.set(
896
- user5(options.input),
897
- user5(
898
- `<validation_error>Your previous SQL query had the following error: ${errors.at(-1)?.message}. Please fix the query.</validation_error>`
1115
+ user5(dedent5`
1116
+ Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
1117
+ Given the question, the evidence and the database schema, return the SQL script that addresses the question.
1118
+
1119
+ Question: ${options.input}
1120
+ `),
1121
+ UnanswerableSQLError.isInstance(lastError) ? user5(
1122
+ `<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
1123
+ ) : user5(
1124
+ `<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
899
1125
  )
900
1126
  );
901
1127
  } else {
902
- context.set(user5(options.input));
1128
+ context.set(
1129
+ user5(dedent5`
1130
+ Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
1131
+ Given the question, the evidence and the database schema, return the SQL script that addresses the question.
1132
+
1133
+ Question: ${options.input}
1134
+ `)
1135
+ );
903
1136
  }
904
1137
  const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
905
1138
  const baseModel = options.model ?? groq5("openai/gpt-oss-20b");
@@ -925,19 +1158,45 @@ async function toSql(options) {
925
1158
  })
926
1159
  });
927
1160
  const { result: output } = await sqlOutput.generate();
1161
+ const finalizeSql = async (rawSql) => {
1162
+ const sql = options.adapter.format(extractSql(rawSql));
1163
+ const validationError = await options.adapter.validate(sql);
1164
+ if (validationError) {
1165
+ throw new SQLValidationError(validationError);
1166
+ }
1167
+ return {
1168
+ attempts,
1169
+ sql,
1170
+ errors: errors.length ? errors.map(formatErrorMessage) : void 0
1171
+ };
1172
+ };
928
1173
  if ("error" in output) {
929
- throw new UnanswerableSQLError(output.error);
930
- }
931
- const sql = options.adapter.format(extractSql(output.sql));
932
- const validationError = await options.adapter.validate(sql);
933
- if (validationError) {
934
- throw new SQLValidationError(validationError);
1174
+ context.set(
1175
+ user5(
1176
+ "<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
1177
+ )
1178
+ );
1179
+ const forcedSqlOutput = structuredOutput5({
1180
+ model,
1181
+ context,
1182
+ schema: z5.object({
1183
+ sql: z5.string().describe(
1184
+ "Best-effort SQL query that answers the core intent using only available schema entities."
1185
+ ),
1186
+ reasoning: z5.string().describe("Reasoning steps for best-effort schema mapping.")
1187
+ })
1188
+ });
1189
+ try {
1190
+ const forced = await forcedSqlOutput.generate();
1191
+ return await finalizeSql(forced.sql);
1192
+ } catch (error) {
1193
+ if (SQLValidationError.isInstance(error) || APICallError.isInstance(error) || JSONParseError.isInstance(error) || TypeValidationError.isInstance(error) || NoObjectGeneratedError.isInstance(error) || NoOutputGeneratedError.isInstance(error) || NoContentGeneratedError.isInstance(error)) {
1194
+ throw error;
1195
+ }
1196
+ throw new UnanswerableSQLError(output.error);
1197
+ }
935
1198
  }
936
- return {
937
- attempts,
938
- sql,
939
- errors: errors.length ? errors.map(formatErrorMessage) : void 0
940
- };
1199
+ return await finalizeSql(output.sql);
941
1200
  },
942
1201
  { retries: maxRetries - 1 }
943
1202
  );
@@ -1000,9 +1259,6 @@ async function withRetry(computation, options = { retries: 3 }) {
1000
1259
  return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
1001
1260
  },
1002
1261
  onFailedAttempt(context) {
1003
- console.log(
1004
- `Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
1005
- );
1006
1262
  errors.push(context.error);
1007
1263
  }
1008
1264
  }
@@ -1104,15 +1360,15 @@ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
1104
1360
 
1105
1361
  // packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
1106
1362
  import { groq as groq6 } from "@ai-sdk/groq";
1107
- import dedent5 from "dedent";
1363
+ import dedent6 from "dedent";
1108
1364
  import pLimit2 from "p-limit";
1109
1365
  import z6 from "zod";
1110
1366
  import "@deepagents/agent";
1111
1367
  import {
1112
1368
  ContextEngine as ContextEngine6,
1113
1369
  InMemoryContextStore as InMemoryContextStore6,
1114
- fragment as fragment5,
1115
- guardrail as guardrail2,
1370
+ fragment as fragment6,
1371
+ guardrail as guardrail3,
1116
1372
  persona as personaFragment,
1117
1373
  structuredOutput as structuredOutput6,
1118
1374
  user as user6
@@ -1163,7 +1419,7 @@ async function paraphraseQuestion(params) {
1163
1419
  chatId: `paraphraser-${crypto.randomUUID()}`,
1164
1420
  userId: "system"
1165
1421
  });
1166
- const personaInstruction = params.persona ? dedent5`
1422
+ const personaInstruction = params.persona ? dedent6`
1167
1423
  <persona role="${params.persona.role}">
1168
1424
  ${params.persona.perspective}
1169
1425
 
@@ -1171,7 +1427,7 @@ async function paraphraseQuestion(params) {
1171
1427
  Use their vocabulary, priorities, and framing style.
1172
1428
  </persona>
1173
1429
  ` : "";
1174
- const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent5`
1430
+ const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent6`
1175
1431
  <communication_styles>
1176
1432
  Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
1177
1433
 
@@ -1187,17 +1443,17 @@ async function paraphraseQuestion(params) {
1187
1443
  role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
1188
1444
  objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
1189
1445
  }),
1190
- fragment5("original_question", params.question),
1191
- fragment5(
1446
+ fragment6("original_question", params.question),
1447
+ fragment6(
1192
1448
  "reference_sql",
1193
1449
  params.sql,
1194
1450
  "This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
1195
1451
  ),
1196
- ...personaInstruction ? [fragment5("persona", personaInstruction)] : [],
1197
- ...styleInstruction ? [fragment5("communication_styles", styleInstruction)] : [],
1198
- fragment5(
1452
+ ...personaInstruction ? [fragment6("persona", personaInstruction)] : [],
1453
+ ...styleInstruction ? [fragment6("communication_styles", styleInstruction)] : [],
1454
+ fragment6(
1199
1455
  "task",
1200
- dedent5`
1456
+ dedent6`
1201
1457
  Generate exactly ${params.count} paraphrased versions of the original question.
1202
1458
 
1203
1459
  Requirements:
@@ -1209,14 +1465,14 @@ async function paraphraseQuestion(params) {
1209
1465
  ${params.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
1210
1466
  `
1211
1467
  ),
1212
- guardrail2({ rule: "NEVER change what data is being requested" }),
1213
- guardrail2({
1468
+ guardrail3({ rule: "NEVER change what data is being requested" }),
1469
+ guardrail3({
1214
1470
  rule: "NEVER add filters, aggregations, or conditions not in the original"
1215
1471
  }),
1216
- guardrail2({
1472
+ guardrail3({
1217
1473
  rule: "NEVER remove any specificity from the original question"
1218
1474
  }),
1219
- guardrail2({
1475
+ guardrail3({
1220
1476
  rule: "All paraphrases must be answerable by the exact same SQL query"
1221
1477
  }),
1222
1478
  user6(
@@ -1274,7 +1530,7 @@ var BreadthEvolver = class extends PairProducer {
1274
1530
  // packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
1275
1531
  import { groq as groq7 } from "@ai-sdk/groq";
1276
1532
  import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
1277
- import dedent6 from "dedent";
1533
+ import dedent7 from "dedent";
1278
1534
  import pLimit3 from "p-limit";
1279
1535
  import pRetry2 from "p-retry";
1280
1536
  import z7 from "zod";
@@ -1282,14 +1538,14 @@ import "@deepagents/agent";
1282
1538
  import {
1283
1539
  ContextEngine as ContextEngine7,
1284
1540
  InMemoryContextStore as InMemoryContextStore7,
1285
- fragment as fragment6,
1286
- guardrail as guardrail3,
1541
+ fragment as fragment7,
1542
+ guardrail as guardrail4,
1287
1543
  persona as persona6,
1288
1544
  structuredOutput as structuredOutput7,
1289
1545
  user as user7
1290
1546
  } from "@deepagents/context";
1291
1547
  var techniqueInstructions = {
1292
- "add-aggregation": dedent6`
1548
+ "add-aggregation": dedent7`
1293
1549
  Add aggregation requirements to the question.
1294
1550
  Transform it to require GROUP BY, COUNT, SUM, AVG, MIN, MAX, or similar operations.
1295
1551
  Examples:
@@ -1297,7 +1553,7 @@ var techniqueInstructions = {
1297
1553
  - "List products" → "What is the average price per category?"
1298
1554
  - "Get employees" → "How many employees are in each department?"
1299
1555
  `,
1300
- "add-filter": dedent6`
1556
+ "add-filter": dedent7`
1301
1557
  Add filtering conditions to the question.
1302
1558
  Transform it to require WHERE clauses with specific conditions.
1303
1559
  Examples:
@@ -1305,7 +1561,7 @@ var techniqueInstructions = {
1305
1561
  - "List customers" → "List customers who have made more than 5 purchases"
1306
1562
  - "Get products" → "Get products with price above $100"
1307
1563
  `,
1308
- "add-join": dedent6`
1564
+ "add-join": dedent7`
1309
1565
  Add requirements that need data from related tables.
1310
1566
  Transform it to require JOIN operations between multiple tables.
1311
1567
  Examples:
@@ -1313,7 +1569,7 @@ var techniqueInstructions = {
1313
1569
  - "List products" → "List products with their supplier information"
1314
1570
  - "Get employees" → "Get employees with their department and manager names"
1315
1571
  `,
1316
- "add-reasoning": dedent6`
1572
+ "add-reasoning": dedent7`
1317
1573
  Add multi-step reasoning requirements.
1318
1574
  Transform it to require logical deduction, comparisons, or derived calculations.
1319
1575
  Examples:
@@ -1321,7 +1577,7 @@ var techniqueInstructions = {
1321
1577
  - "List products" → "Which products are underperforming compared to their category average?"
1322
1578
  - "Get revenue" → "Which month had the highest growth compared to the previous month?"
1323
1579
  `,
1324
- hypothetical: dedent6`
1580
+ hypothetical: dedent7`
1325
1581
  Add a hypothetical or speculative scenario.
1326
1582
  Transform it to require applying calculations or projections.
1327
1583
  Examples:
@@ -1345,21 +1601,21 @@ async function evolveQuestion(params) {
1345
1601
  role: "You are an expert at evolving simple database questions into more complex ones. Your task is to take a basic question and transform it into a more sophisticated version that requires advanced SQL techniques to answer.",
1346
1602
  objective: "Transform simple questions into complex versions requiring advanced SQL techniques"
1347
1603
  }),
1348
- fragment6("original_question", params.question),
1349
- fragment6(
1604
+ fragment7("original_question", params.question),
1605
+ fragment7(
1350
1606
  "original_sql",
1351
1607
  params.sql,
1352
1608
  "(This shows what the original question required)"
1353
1609
  ),
1354
- fragment6("database_schema", params.schema),
1355
- fragment6(
1610
+ fragment7("database_schema", params.schema),
1611
+ fragment7(
1356
1612
  "technique",
1357
1613
  { name: params.technique },
1358
1614
  params.techniqueInstruction
1359
1615
  ),
1360
- fragment6(
1616
+ fragment7(
1361
1617
  "task",
1362
- dedent6`
1618
+ dedent7`
1363
1619
  Evolve the original question using the "${params.technique}" technique.
1364
1620
 
1365
1621
  Requirements:
@@ -1371,16 +1627,16 @@ async function evolveQuestion(params) {
1371
1627
  6. The evolved question should build upon the original topic/domain
1372
1628
  `
1373
1629
  ),
1374
- guardrail3({
1630
+ guardrail4({
1375
1631
  rule: "The evolved question MUST require more complex SQL than the original"
1376
1632
  }),
1377
- guardrail3({
1633
+ guardrail4({
1378
1634
  rule: "Do not ask for data that does not exist in the schema"
1379
1635
  }),
1380
- guardrail3({
1636
+ guardrail4({
1381
1637
  rule: "Keep the question grounded in the same domain as the original"
1382
1638
  }),
1383
- guardrail3({ rule: "Make sure the question is clear and unambiguous" }),
1639
+ guardrail4({ rule: "Make sure the question is clear and unambiguous" }),
1384
1640
  user7(
1385
1641
  `Evolve this question using "${params.technique}": "${params.question}"`
1386
1642
  )
@@ -1502,15 +1758,15 @@ async function withRetry2(computation) {
1502
1758
 
1503
1759
  // packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
1504
1760
  import { groq as groq8 } from "@ai-sdk/groq";
1505
- import dedent7 from "dedent";
1761
+ import dedent8 from "dedent";
1506
1762
  import z8 from "zod";
1507
1763
  import "@deepagents/agent";
1508
1764
  import {
1509
1765
  ContextEngine as ContextEngine8,
1510
1766
  InMemoryContextStore as InMemoryContextStore8,
1511
1767
  XmlRenderer,
1512
- fragment as fragment7,
1513
- guardrail as guardrail4,
1768
+ fragment as fragment8,
1769
+ guardrail as guardrail5,
1514
1770
  persona as personaFragment2,
1515
1771
  structuredOutput as structuredOutput8,
1516
1772
  user as user8
@@ -1542,10 +1798,10 @@ async function generatePersonas(schemaFragments, options) {
1542
1798
  role: "You are an expert at understanding database schemas and inferring who would use them.",
1543
1799
  objective: "Generate realistic personas representing users who would query this database"
1544
1800
  }),
1545
- fragment7("database_schema", schema),
1546
- fragment7(
1801
+ fragment8("database_schema", schema),
1802
+ fragment8(
1547
1803
  "task",
1548
- dedent7`
1804
+ dedent8`
1549
1805
  Analyze the database schema and generate realistic personas representing
1550
1806
  the different types of users who would query this database.
1551
1807
 
@@ -1576,9 +1832,9 @@ async function generatePersonas(schemaFragments, options) {
1576
1832
  - Styles should match how this persona would naturally communicate
1577
1833
  `
1578
1834
  ),
1579
- fragment7(
1835
+ fragment8(
1580
1836
  "example",
1581
- dedent7`
1837
+ dedent8`
1582
1838
  For an e-commerce schema with orders, customers, products tables:
1583
1839
 
1584
1840
  {
@@ -1594,13 +1850,13 @@ async function generatePersonas(schemaFragments, options) {
1594
1850
  }
1595
1851
  `
1596
1852
  ),
1597
- guardrail4({
1853
+ guardrail5({
1598
1854
  rule: "Only generate personas relevant to the actual schema provided"
1599
1855
  }),
1600
- guardrail4({
1856
+ guardrail5({
1601
1857
  rule: "Do not invent tables or data that do not exist in the schema"
1602
1858
  }),
1603
- guardrail4({
1859
+ guardrail5({
1604
1860
  rule: "Ensure perspectives are specific to the domain, not generic"
1605
1861
  }),
1606
1862
  user8(
@@ -1621,7 +1877,7 @@ import { XmlRenderer as XmlRenderer2 } from "@deepagents/context";
1621
1877
 
1622
1878
  // packages/text2sql/src/lib/agents/teachables.agent.ts
1623
1879
  import { groq as groq9 } from "@ai-sdk/groq";
1624
- import dedent8 from "dedent";
1880
+ import dedent9 from "dedent";
1625
1881
  import z9 from "zod";
1626
1882
  import "@deepagents/agent";
1627
1883
  import {
@@ -1629,18 +1885,18 @@ import {
1629
1885
  InMemoryContextStore as InMemoryContextStore9,
1630
1886
  analogy,
1631
1887
  clarification,
1632
- example,
1888
+ example as example2,
1633
1889
  explain,
1634
- fragment as fragment8,
1635
- guardrail as guardrail5,
1636
- hint,
1890
+ fragment as fragment9,
1891
+ guardrail as guardrail6,
1892
+ hint as hint2,
1637
1893
  persona as persona7,
1638
1894
  quirk,
1639
1895
  structuredOutput as structuredOutput9,
1640
1896
  styleGuide,
1641
1897
  term,
1642
1898
  user as user9,
1643
- workflow
1899
+ workflow as workflow2
1644
1900
  } from "@deepagents/context";
1645
1901
  var outputSchema4 = z9.object({
1646
1902
  terms: z9.array(z9.object({ name: z9.string(), definition: z9.string() })).optional().describe("Domain terminology definitions"),
@@ -1705,11 +1961,11 @@ async function toTeachings(input, options) {
1705
1961
  role: 'You design "fragments" for a Text2SQL system. Fragments become structured XML instructions.',
1706
1962
  objective: "Choose only high-impact items that improve accuracy, safety, or clarity for this database"
1707
1963
  }),
1708
- fragment8("database_schema", input.schema),
1709
- ...input.context ? [fragment8("additional_context", input.context)] : [],
1710
- fragment8(
1964
+ fragment9("database_schema", input.schema),
1965
+ ...input.context ? [fragment9("additional_context", input.context)] : [],
1966
+ fragment9(
1711
1967
  "output_structure",
1712
- dedent8`
1968
+ dedent9`
1713
1969
  Output a JSON object with these optional arrays (include only relevant ones):
1714
1970
  - terms: [{ name: string, definition: string }] - Domain terminology
1715
1971
  - hints: [{ text: string }] - Helpful SQL generation hints
@@ -1723,9 +1979,9 @@ async function toTeachings(input, options) {
1723
1979
  - analogies: [{ concepts: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
1724
1980
  `
1725
1981
  ),
1726
- fragment8(
1982
+ fragment9(
1727
1983
  "task",
1728
- dedent8`
1984
+ dedent9`
1729
1985
  1. Analyze the schema to infer domain, relationships, and sensitive columns.
1730
1986
  2. Generate 3-10 fragments total across all categories, prioritizing:
1731
1987
  - guardrails for PII columns (email, ssn, phone, etc)
@@ -1747,10 +2003,10 @@ async function toTeachings(input, options) {
1747
2003
  const result = await teachablesOutput.generate();
1748
2004
  const fragments = [];
1749
2005
  result.terms?.forEach((t) => fragments.push(term(t.name, t.definition)));
1750
- result.hints?.forEach((h) => fragments.push(hint(h.text)));
2006
+ result.hints?.forEach((h) => fragments.push(hint2(h.text)));
1751
2007
  result.guardrails?.forEach(
1752
2008
  (g) => fragments.push(
1753
- guardrail5({ rule: g.rule, reason: g.reason, action: g.action })
2009
+ guardrail6({ rule: g.rule, reason: g.reason, action: g.action })
1754
2010
  )
1755
2011
  );
1756
2012
  result.explains?.forEach(
@@ -1764,7 +2020,7 @@ async function toTeachings(input, options) {
1764
2020
  );
1765
2021
  result.examples?.forEach(
1766
2022
  (e) => fragments.push(
1767
- example({ question: e.question, answer: e.answer, note: e.note })
2023
+ example2({ question: e.question, answer: e.answer, note: e.note })
1768
2024
  )
1769
2025
  );
1770
2026
  result.clarifications?.forEach(
@@ -1774,7 +2030,7 @@ async function toTeachings(input, options) {
1774
2030
  );
1775
2031
  result.workflows?.forEach(
1776
2032
  (w) => fragments.push(
1777
- workflow({
2033
+ workflow2({
1778
2034
  task: w.task,
1779
2035
  steps: w.steps,
1780
2036
  triggers: w.triggers,