@deepagents/text2sql 0.19.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +759 -137
- package/dist/index.js.map +4 -4
- package/dist/lib/agents/exceptions.d.ts +20 -0
- package/dist/lib/agents/exceptions.d.ts.map +1 -0
- package/dist/lib/agents/result-tools.d.ts.map +1 -1
- package/dist/lib/agents/sql.agent.d.ts +0 -17
- package/dist/lib/agents/sql.agent.d.ts.map +1 -1
- package/dist/lib/synthesis/index.js +359 -103
- package/dist/lib/synthesis/index.js.map +4 -4
- package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -1
- package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -1
- package/package.json +4 -4
|
@@ -714,6 +714,32 @@ var LastQueryExtractor = class extends BaseContextualExtractor {
|
|
|
714
714
|
// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
|
|
715
715
|
import pLimit from "p-limit";
|
|
716
716
|
|
|
717
|
+
// packages/text2sql/src/lib/agents/exceptions.ts
|
|
718
|
+
var sqlValidationMarker = Symbol("SQLValidationError");
|
|
719
|
+
var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
|
|
720
|
+
var SQLValidationError = class _SQLValidationError extends Error {
|
|
721
|
+
[sqlValidationMarker];
|
|
722
|
+
constructor(message) {
|
|
723
|
+
super(message);
|
|
724
|
+
this.name = "SQLValidationError";
|
|
725
|
+
this[sqlValidationMarker] = true;
|
|
726
|
+
}
|
|
727
|
+
static isInstance(error) {
|
|
728
|
+
return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
|
|
729
|
+
}
|
|
730
|
+
};
|
|
731
|
+
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
732
|
+
[unanswerableSqlMarker];
|
|
733
|
+
constructor(message) {
|
|
734
|
+
super(message);
|
|
735
|
+
this.name = "UnanswerableSQLError";
|
|
736
|
+
this[unanswerableSqlMarker] = true;
|
|
737
|
+
}
|
|
738
|
+
static isInstance(error) {
|
|
739
|
+
return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
|
|
740
|
+
}
|
|
741
|
+
};
|
|
742
|
+
|
|
717
743
|
// packages/text2sql/src/lib/agents/question.agent.ts
|
|
718
744
|
import { groq as groq4 } from "@ai-sdk/groq";
|
|
719
745
|
import dedent4 from "dedent";
|
|
@@ -838,42 +864,231 @@ import {
|
|
|
838
864
|
defaultSettingsMiddleware,
|
|
839
865
|
wrapLanguageModel
|
|
840
866
|
} from "ai";
|
|
867
|
+
import dedent5 from "dedent";
|
|
841
868
|
import pRetry from "p-retry";
|
|
842
869
|
import z5 from "zod";
|
|
843
870
|
import "@deepagents/agent";
|
|
844
871
|
import {
|
|
845
872
|
ContextEngine as ContextEngine5,
|
|
846
873
|
InMemoryContextStore as InMemoryContextStore5,
|
|
874
|
+
example,
|
|
875
|
+
fragment as fragment5,
|
|
876
|
+
guardrail as guardrail2,
|
|
877
|
+
hint,
|
|
847
878
|
persona as persona5,
|
|
879
|
+
policy,
|
|
848
880
|
structuredOutput as structuredOutput5,
|
|
849
|
-
user as user5
|
|
881
|
+
user as user5,
|
|
882
|
+
workflow
|
|
850
883
|
} from "@deepagents/context";
|
|
851
|
-
var RETRY_TEMPERATURES = [0, 0.
|
|
884
|
+
var RETRY_TEMPERATURES = [0, 0.4, 0.8];
|
|
885
|
+
var SQL_AGENT_ROLE = "Expert SQL query generator.";
|
|
886
|
+
var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
|
|
887
|
+
var SQL_AGENT_POLICIES = [
|
|
888
|
+
fragment5(
|
|
889
|
+
"schema_mapping",
|
|
890
|
+
policy({
|
|
891
|
+
rule: "Translate natural language into precise SQL grounded in available schema entities."
|
|
892
|
+
}),
|
|
893
|
+
hint("Preserve schema spelling exactly, including typos in column names.")
|
|
894
|
+
),
|
|
895
|
+
fragment5(
|
|
896
|
+
"projection_minimality",
|
|
897
|
+
policy({
|
|
898
|
+
rule: "Return only columns requested by the question; do not add helper columns unless explicitly requested."
|
|
899
|
+
}),
|
|
900
|
+
policy({
|
|
901
|
+
rule: 'For requests of the form "X sorted/ordered by Y", project X only unless Y is explicitly requested as an output field.'
|
|
902
|
+
}),
|
|
903
|
+
policy({
|
|
904
|
+
rule: "Prefer selecting schema columns directly without derived expressions when direct selection answers the request."
|
|
905
|
+
}),
|
|
906
|
+
hint(
|
|
907
|
+
"Do not include ORDER BY, GROUP BY, or JOIN helper columns in SELECT output unless the question explicitly asks for them."
|
|
908
|
+
),
|
|
909
|
+
policy({
|
|
910
|
+
rule: "Use DISTINCT only when uniqueness is explicitly requested (for example distinct/unique/different/no duplicates)."
|
|
911
|
+
}),
|
|
912
|
+
hint(
|
|
913
|
+
'Do not infer DISTINCT from generic wording such as "some", plural nouns, or entity-set phrasing; for transactional/attendance-style tables, default to raw rows unless uniqueness is explicitly requested.'
|
|
914
|
+
)
|
|
915
|
+
),
|
|
916
|
+
fragment5(
|
|
917
|
+
"date_transform_safety",
|
|
918
|
+
policy({
|
|
919
|
+
rule: "Do not assume VARCHAR/TEXT values are parseable dates. Avoid date extraction functions on text columns by default."
|
|
920
|
+
}),
|
|
921
|
+
policy({
|
|
922
|
+
rule: "Use date-part extraction only when both conditions hold: the question explicitly asks for transformation and schema values require transformation to produce that unit."
|
|
923
|
+
}),
|
|
924
|
+
hint(
|
|
925
|
+
"Do not apply SUBSTR, STRFTIME, DATE_PART, YEAR, or similar extraction functions unless the question explicitly asks for transformation and schema values require it."
|
|
926
|
+
),
|
|
927
|
+
hint(
|
|
928
|
+
"If a column already represents the requested concept (for example a stored year-like value), use the column as-is."
|
|
929
|
+
)
|
|
930
|
+
),
|
|
931
|
+
fragment5(
|
|
932
|
+
"sql_minimality",
|
|
933
|
+
guardrail2({
|
|
934
|
+
rule: "Never hallucinate tables or columns.",
|
|
935
|
+
reason: "Schema fidelity is required.",
|
|
936
|
+
action: "Use only available schema entities."
|
|
937
|
+
}),
|
|
938
|
+
guardrail2({
|
|
939
|
+
rule: "Avoid unnecessary transformations and derived projections.",
|
|
940
|
+
reason: "Extra transformations frequently change semantics and reduce correctness.",
|
|
941
|
+
action: "Do not add date parsing, substring extraction, or derived columns unless explicitly required by the question or schema."
|
|
942
|
+
})
|
|
943
|
+
),
|
|
944
|
+
fragment5(
|
|
945
|
+
"preflight_checklist",
|
|
946
|
+
workflow({
|
|
947
|
+
task: "Final SQL preflight before returning output",
|
|
948
|
+
steps: [
|
|
949
|
+
"Verify selected columns match the question and remove unrequested helper projections.",
|
|
950
|
+
"If aggregate values are used only for ranking/filtering, keep them out of SELECT unless explicitly requested.",
|
|
951
|
+
"Prefer raw schema columns over derived expressions when raw columns already satisfy the request.",
|
|
952
|
+
"If a candidate query uses STRFTIME, SUBSTR, DATE_PART, YEAR, or similar extraction on text-like columns, remove that transformation unless explicitly required by the question.",
|
|
953
|
+
"Return only schema-grounded SQL using existing tables and columns."
|
|
954
|
+
]
|
|
955
|
+
})
|
|
956
|
+
),
|
|
957
|
+
fragment5(
|
|
958
|
+
"set_semantics",
|
|
959
|
+
policy({
|
|
960
|
+
rule: "For questions asking where both condition A and condition B hold over an attribute, compute the intersection of qualifying sets for that attribute."
|
|
961
|
+
}),
|
|
962
|
+
policy({
|
|
963
|
+
rule: "Do not force the same entity instance to satisfy both conditions unless the question explicitly requests the same person/row/entity."
|
|
964
|
+
}),
|
|
965
|
+
hint(
|
|
966
|
+
"Prefer INTERSECT (or logically equivalent set-based shape) over requiring the same physical row/entity to satisfy both conditions unless explicitly requested."
|
|
967
|
+
),
|
|
968
|
+
hint(
|
|
969
|
+
"When two conditions describe different row groups whose shared attribute is requested, build each group separately and intersect the attribute values."
|
|
970
|
+
),
|
|
971
|
+
hint(
|
|
972
|
+
"Do not collapse cross-group conditions into a single-row AND predicate when the intent is shared values across groups."
|
|
973
|
+
),
|
|
974
|
+
policy({
|
|
975
|
+
rule: "If two predicates on the same field cannot both be true for one row, do not combine them with AND; use set operations across separate filtered subsets when shared values are requested."
|
|
976
|
+
})
|
|
977
|
+
),
|
|
978
|
+
fragment5(
|
|
979
|
+
"predicate_column_alignment",
|
|
980
|
+
policy({
|
|
981
|
+
rule: "Match literal values to semantically compatible columns. Do not compare descriptive names to identifier columns."
|
|
982
|
+
}),
|
|
983
|
+
hint(
|
|
984
|
+
"When a filter value is a descriptive label (for example a department name), join through the lookup table and filter on its name/title column, not on *_id columns."
|
|
985
|
+
),
|
|
986
|
+
hint(
|
|
987
|
+
"When relation roles are explicit in wording (for example host/home/source/destination), prefer foreign keys with matching role qualifiers over generic similarly named columns."
|
|
988
|
+
),
|
|
989
|
+
policy({
|
|
990
|
+
rule: "When multiple foreign-key candidates exist, select the column whose qualifier best matches the relationship described in the question."
|
|
991
|
+
}),
|
|
992
|
+
policy({
|
|
993
|
+
rule: "For hosting/held semantics, prefer host_* relationship columns when available over generic *_id alternatives."
|
|
994
|
+
}),
|
|
995
|
+
hint(
|
|
996
|
+
'Interpret wording like "held/hosted a competition or event" as a hosting relationship and map to host_* foreign keys when present.'
|
|
997
|
+
),
|
|
998
|
+
policy({
|
|
999
|
+
rule: "Do not compare descriptive labels or names to *_id columns; join to the table containing the descriptive field and filter there."
|
|
1000
|
+
}),
|
|
1001
|
+
policy({
|
|
1002
|
+
rule: "Keep numeric identifiers unquoted when used as numeric equality filters unless schema indicates text identifiers."
|
|
1003
|
+
}),
|
|
1004
|
+
policy({
|
|
1005
|
+
rule: "When filtering by a descriptive label value and a related table exposes a corresponding *_name or title column, join to that table and filter on the descriptive column."
|
|
1006
|
+
})
|
|
1007
|
+
),
|
|
1008
|
+
fragment5(
|
|
1009
|
+
"ordering_semantics",
|
|
1010
|
+
policy({
|
|
1011
|
+
rule: "Respect explicit sort direction terms. If direction is not specified, use ascending order unless a superlative intent (most/least/highest/lowest) implies direction."
|
|
1012
|
+
}),
|
|
1013
|
+
policy({
|
|
1014
|
+
rule: "When ranking categories by frequency, use COUNT for ordering but keep output focused on requested category fields unless counts are explicitly requested."
|
|
1015
|
+
}),
|
|
1016
|
+
policy({
|
|
1017
|
+
rule: "Do not use DESC unless descending direction is explicit or a superlative intent requires descending ranking."
|
|
1018
|
+
}),
|
|
1019
|
+
policy({
|
|
1020
|
+
rule: 'For "most common/frequent <attribute>" requests, return the attribute value(s) only; use counts only for ordering/filtering unless the question explicitly asks to return counts.'
|
|
1021
|
+
}),
|
|
1022
|
+
hint(
|
|
1023
|
+
'Use DESC with LIMIT 1 for "most/highest/largest"; use ASC with LIMIT 1 for "least/lowest/smallest".'
|
|
1024
|
+
)
|
|
1025
|
+
),
|
|
1026
|
+
fragment5(
|
|
1027
|
+
"negative_membership_queries",
|
|
1028
|
+
policy({
|
|
1029
|
+
rule: "For requests asking entities that did not participate/host/appear in related records, prefer NOT IN or NOT EXISTS against the related foreign-key set."
|
|
1030
|
+
}),
|
|
1031
|
+
hint(
|
|
1032
|
+
"Map role-bearing relationship columns carefully (for example host_* foreign keys for hosting relationships) instead of generic IDs when role wording is explicit."
|
|
1033
|
+
),
|
|
1034
|
+
hint(
|
|
1035
|
+
'For "never had/never exceeded" conditions over history tables, exclude entities via NOT IN/NOT EXISTS against the disqualifying entity-id set (often built with GROUP BY/HAVING MAX(...)).'
|
|
1036
|
+
)
|
|
1037
|
+
),
|
|
1038
|
+
fragment5(
|
|
1039
|
+
"join_completeness",
|
|
1040
|
+
policy({
|
|
1041
|
+
rule: "Preserve entity-restricting joins implied by the question. Do not widen results by querying only a broader attribute table when a subset entity table is available."
|
|
1042
|
+
}),
|
|
1043
|
+
policy({
|
|
1044
|
+
rule: "If an entity term in the question maps to a table, keep that table in query scope and join to attribute tables rather than dropping the entity table."
|
|
1045
|
+
}),
|
|
1046
|
+
hint(
|
|
1047
|
+
"If the question targets a specific entity group, include that entity table and its join conditions even when selected columns come from a related table."
|
|
1048
|
+
),
|
|
1049
|
+
hint(
|
|
1050
|
+
"When the question names an entity type and a relation table links to that entity via *_id, include the entity table in scope instead of counting only relation rows."
|
|
1051
|
+
),
|
|
1052
|
+
hint(
|
|
1053
|
+
"Prefer INNER JOIN by default; use LEFT JOIN only when the question explicitly requests including unmatched rows or zero-related entities."
|
|
1054
|
+
)
|
|
1055
|
+
),
|
|
1056
|
+
fragment5(
|
|
1057
|
+
"aggregation_exactness",
|
|
1058
|
+
policy({
|
|
1059
|
+
rule: "Preserve requested aggregation semantics exactly: use COUNT(*) by default for total rows, use COUNT(DISTINCT ...) only when uniqueness is explicitly requested, and group by stable entity keys when computing per-entity aggregates."
|
|
1060
|
+
}),
|
|
1061
|
+
policy({
|
|
1062
|
+
rule: "For questions asking which entity has lowest/highest average of a metric, compute AVG(metric) per entity (GROUP BY entity) and rank those aggregates."
|
|
1063
|
+
}),
|
|
1064
|
+
hint(
|
|
1065
|
+
'For "how many <entities>" questions over relation records, default to COUNT(*) on qualifying rows unless explicit uniqueness language is present.'
|
|
1066
|
+
)
|
|
1067
|
+
),
|
|
1068
|
+
fragment5(
|
|
1069
|
+
"query_shape_examples",
|
|
1070
|
+
example({
|
|
1071
|
+
question: "List categories ordered by how many records belong to each category.",
|
|
1072
|
+
answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*)"
|
|
1073
|
+
}),
|
|
1074
|
+
example({
|
|
1075
|
+
question: "Show labels shared by rows with metric > 100 and rows with metric < 10.",
|
|
1076
|
+
answer: "SELECT label FROM records WHERE metric > 100 INTERSECT SELECT label FROM records WHERE metric < 10"
|
|
1077
|
+
}),
|
|
1078
|
+
example({
|
|
1079
|
+
question: "List locations that have not hosted any event.",
|
|
1080
|
+
answer: "SELECT location_name FROM locations WHERE location_id NOT IN (SELECT host_location_id FROM events)"
|
|
1081
|
+
}),
|
|
1082
|
+
example({
|
|
1083
|
+
question: "List the most common category across records.",
|
|
1084
|
+
answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*) DESC LIMIT 1"
|
|
1085
|
+
})
|
|
1086
|
+
)
|
|
1087
|
+
];
|
|
852
1088
|
function extractSql(output) {
|
|
853
1089
|
const match = output.match(/```sql\n?([\s\S]*?)```/);
|
|
854
1090
|
return match ? match[1].trim() : output.trim();
|
|
855
1091
|
}
|
|
856
|
-
var marker = Symbol("SQLValidationError");
|
|
857
|
-
var SQLValidationError = class _SQLValidationError extends Error {
|
|
858
|
-
[marker];
|
|
859
|
-
constructor(message) {
|
|
860
|
-
super(message);
|
|
861
|
-
this.name = "SQLValidationError";
|
|
862
|
-
this[marker] = true;
|
|
863
|
-
}
|
|
864
|
-
static isInstance(error) {
|
|
865
|
-
return error instanceof _SQLValidationError && error[marker] === true;
|
|
866
|
-
}
|
|
867
|
-
};
|
|
868
|
-
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
869
|
-
constructor(message) {
|
|
870
|
-
super(message);
|
|
871
|
-
this.name = "UnanswerableSQLError";
|
|
872
|
-
}
|
|
873
|
-
static isInstance(error) {
|
|
874
|
-
return error instanceof _UnanswerableSQLError;
|
|
875
|
-
}
|
|
876
|
-
};
|
|
877
1092
|
async function toSql(options) {
|
|
878
1093
|
const { maxRetries = 3 } = options;
|
|
879
1094
|
return withRetry(
|
|
@@ -886,20 +1101,38 @@ async function toSql(options) {
|
|
|
886
1101
|
context.set(
|
|
887
1102
|
persona5({
|
|
888
1103
|
name: "Freya",
|
|
889
|
-
role:
|
|
890
|
-
objective:
|
|
1104
|
+
role: SQL_AGENT_ROLE,
|
|
1105
|
+
objective: SQL_AGENT_OBJECTIVE
|
|
1106
|
+
// role: `You are a data science expert that provides well-reasoned and detailed responses.`,
|
|
1107
|
+
// objective: `Your task is to understand the schema and generate a valid SQL query to answer the question. You first think about the reasoning process as an internal monologue and then provide the user with the answer.`,
|
|
891
1108
|
}),
|
|
1109
|
+
...SQL_AGENT_POLICIES,
|
|
892
1110
|
...options.fragments
|
|
893
1111
|
);
|
|
894
1112
|
if (errors.length) {
|
|
1113
|
+
const lastError = errors.at(-1);
|
|
895
1114
|
context.set(
|
|
896
|
-
user5(
|
|
897
|
-
|
|
898
|
-
|
|
1115
|
+
user5(dedent5`
|
|
1116
|
+
Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
|
|
1117
|
+
Given the question, the evidence and the database schema, return the SQL script that addresses the question.
|
|
1118
|
+
|
|
1119
|
+
Question: ${options.input}
|
|
1120
|
+
`),
|
|
1121
|
+
UnanswerableSQLError.isInstance(lastError) ? user5(
|
|
1122
|
+
`<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
|
|
1123
|
+
) : user5(
|
|
1124
|
+
`<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
|
|
899
1125
|
)
|
|
900
1126
|
);
|
|
901
1127
|
} else {
|
|
902
|
-
context.set(
|
|
1128
|
+
context.set(
|
|
1129
|
+
user5(dedent5`
|
|
1130
|
+
Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
|
|
1131
|
+
Given the question, the evidence and the database schema, return the SQL script that addresses the question.
|
|
1132
|
+
|
|
1133
|
+
Question: ${options.input}
|
|
1134
|
+
`)
|
|
1135
|
+
);
|
|
903
1136
|
}
|
|
904
1137
|
const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
|
|
905
1138
|
const baseModel = options.model ?? groq5("openai/gpt-oss-20b");
|
|
@@ -925,19 +1158,45 @@ async function toSql(options) {
|
|
|
925
1158
|
})
|
|
926
1159
|
});
|
|
927
1160
|
const { result: output } = await sqlOutput.generate();
|
|
1161
|
+
const finalizeSql = async (rawSql) => {
|
|
1162
|
+
const sql = options.adapter.format(extractSql(rawSql));
|
|
1163
|
+
const validationError = await options.adapter.validate(sql);
|
|
1164
|
+
if (validationError) {
|
|
1165
|
+
throw new SQLValidationError(validationError);
|
|
1166
|
+
}
|
|
1167
|
+
return {
|
|
1168
|
+
attempts,
|
|
1169
|
+
sql,
|
|
1170
|
+
errors: errors.length ? errors.map(formatErrorMessage) : void 0
|
|
1171
|
+
};
|
|
1172
|
+
};
|
|
928
1173
|
if ("error" in output) {
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
1174
|
+
context.set(
|
|
1175
|
+
user5(
|
|
1176
|
+
"<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
|
|
1177
|
+
)
|
|
1178
|
+
);
|
|
1179
|
+
const forcedSqlOutput = structuredOutput5({
|
|
1180
|
+
model,
|
|
1181
|
+
context,
|
|
1182
|
+
schema: z5.object({
|
|
1183
|
+
sql: z5.string().describe(
|
|
1184
|
+
"Best-effort SQL query that answers the core intent using only available schema entities."
|
|
1185
|
+
),
|
|
1186
|
+
reasoning: z5.string().describe("Reasoning steps for best-effort schema mapping.")
|
|
1187
|
+
})
|
|
1188
|
+
});
|
|
1189
|
+
try {
|
|
1190
|
+
const forced = await forcedSqlOutput.generate();
|
|
1191
|
+
return await finalizeSql(forced.sql);
|
|
1192
|
+
} catch (error) {
|
|
1193
|
+
if (SQLValidationError.isInstance(error) || APICallError.isInstance(error) || JSONParseError.isInstance(error) || TypeValidationError.isInstance(error) || NoObjectGeneratedError.isInstance(error) || NoOutputGeneratedError.isInstance(error) || NoContentGeneratedError.isInstance(error)) {
|
|
1194
|
+
throw error;
|
|
1195
|
+
}
|
|
1196
|
+
throw new UnanswerableSQLError(output.error);
|
|
1197
|
+
}
|
|
935
1198
|
}
|
|
936
|
-
return
|
|
937
|
-
attempts,
|
|
938
|
-
sql,
|
|
939
|
-
errors: errors.length ? errors.map(formatErrorMessage) : void 0
|
|
940
|
-
};
|
|
1199
|
+
return await finalizeSql(output.sql);
|
|
941
1200
|
},
|
|
942
1201
|
{ retries: maxRetries - 1 }
|
|
943
1202
|
);
|
|
@@ -1000,9 +1259,6 @@ async function withRetry(computation, options = { retries: 3 }) {
|
|
|
1000
1259
|
return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
|
|
1001
1260
|
},
|
|
1002
1261
|
onFailedAttempt(context) {
|
|
1003
|
-
console.log(
|
|
1004
|
-
`Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
|
|
1005
|
-
);
|
|
1006
1262
|
errors.push(context.error);
|
|
1007
1263
|
}
|
|
1008
1264
|
}
|
|
@@ -1104,15 +1360,15 @@ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
|
|
|
1104
1360
|
|
|
1105
1361
|
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
1106
1362
|
import { groq as groq6 } from "@ai-sdk/groq";
|
|
1107
|
-
import
|
|
1363
|
+
import dedent6 from "dedent";
|
|
1108
1364
|
import pLimit2 from "p-limit";
|
|
1109
1365
|
import z6 from "zod";
|
|
1110
1366
|
import "@deepagents/agent";
|
|
1111
1367
|
import {
|
|
1112
1368
|
ContextEngine as ContextEngine6,
|
|
1113
1369
|
InMemoryContextStore as InMemoryContextStore6,
|
|
1114
|
-
fragment as
|
|
1115
|
-
guardrail as
|
|
1370
|
+
fragment as fragment6,
|
|
1371
|
+
guardrail as guardrail3,
|
|
1116
1372
|
persona as personaFragment,
|
|
1117
1373
|
structuredOutput as structuredOutput6,
|
|
1118
1374
|
user as user6
|
|
@@ -1163,7 +1419,7 @@ async function paraphraseQuestion(params) {
|
|
|
1163
1419
|
chatId: `paraphraser-${crypto.randomUUID()}`,
|
|
1164
1420
|
userId: "system"
|
|
1165
1421
|
});
|
|
1166
|
-
const personaInstruction = params.persona ?
|
|
1422
|
+
const personaInstruction = params.persona ? dedent6`
|
|
1167
1423
|
<persona role="${params.persona.role}">
|
|
1168
1424
|
${params.persona.perspective}
|
|
1169
1425
|
|
|
@@ -1171,7 +1427,7 @@ async function paraphraseQuestion(params) {
|
|
|
1171
1427
|
Use their vocabulary, priorities, and framing style.
|
|
1172
1428
|
</persona>
|
|
1173
1429
|
` : "";
|
|
1174
|
-
const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ?
|
|
1430
|
+
const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent6`
|
|
1175
1431
|
<communication_styles>
|
|
1176
1432
|
Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
|
|
1177
1433
|
|
|
@@ -1187,17 +1443,17 @@ async function paraphraseQuestion(params) {
|
|
|
1187
1443
|
role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
|
|
1188
1444
|
objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
|
|
1189
1445
|
}),
|
|
1190
|
-
|
|
1191
|
-
|
|
1446
|
+
fragment6("original_question", params.question),
|
|
1447
|
+
fragment6(
|
|
1192
1448
|
"reference_sql",
|
|
1193
1449
|
params.sql,
|
|
1194
1450
|
"This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
|
|
1195
1451
|
),
|
|
1196
|
-
...personaInstruction ? [
|
|
1197
|
-
...styleInstruction ? [
|
|
1198
|
-
|
|
1452
|
+
...personaInstruction ? [fragment6("persona", personaInstruction)] : [],
|
|
1453
|
+
...styleInstruction ? [fragment6("communication_styles", styleInstruction)] : [],
|
|
1454
|
+
fragment6(
|
|
1199
1455
|
"task",
|
|
1200
|
-
|
|
1456
|
+
dedent6`
|
|
1201
1457
|
Generate exactly ${params.count} paraphrased versions of the original question.
|
|
1202
1458
|
|
|
1203
1459
|
Requirements:
|
|
@@ -1209,14 +1465,14 @@ async function paraphraseQuestion(params) {
|
|
|
1209
1465
|
${params.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
|
|
1210
1466
|
`
|
|
1211
1467
|
),
|
|
1212
|
-
|
|
1213
|
-
|
|
1468
|
+
guardrail3({ rule: "NEVER change what data is being requested" }),
|
|
1469
|
+
guardrail3({
|
|
1214
1470
|
rule: "NEVER add filters, aggregations, or conditions not in the original"
|
|
1215
1471
|
}),
|
|
1216
|
-
|
|
1472
|
+
guardrail3({
|
|
1217
1473
|
rule: "NEVER remove any specificity from the original question"
|
|
1218
1474
|
}),
|
|
1219
|
-
|
|
1475
|
+
guardrail3({
|
|
1220
1476
|
rule: "All paraphrases must be answerable by the exact same SQL query"
|
|
1221
1477
|
}),
|
|
1222
1478
|
user6(
|
|
@@ -1274,7 +1530,7 @@ var BreadthEvolver = class extends PairProducer {
|
|
|
1274
1530
|
// packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
|
|
1275
1531
|
import { groq as groq7 } from "@ai-sdk/groq";
|
|
1276
1532
|
import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
|
|
1277
|
-
import
|
|
1533
|
+
import dedent7 from "dedent";
|
|
1278
1534
|
import pLimit3 from "p-limit";
|
|
1279
1535
|
import pRetry2 from "p-retry";
|
|
1280
1536
|
import z7 from "zod";
|
|
@@ -1282,14 +1538,14 @@ import "@deepagents/agent";
|
|
|
1282
1538
|
import {
|
|
1283
1539
|
ContextEngine as ContextEngine7,
|
|
1284
1540
|
InMemoryContextStore as InMemoryContextStore7,
|
|
1285
|
-
fragment as
|
|
1286
|
-
guardrail as
|
|
1541
|
+
fragment as fragment7,
|
|
1542
|
+
guardrail as guardrail4,
|
|
1287
1543
|
persona as persona6,
|
|
1288
1544
|
structuredOutput as structuredOutput7,
|
|
1289
1545
|
user as user7
|
|
1290
1546
|
} from "@deepagents/context";
|
|
1291
1547
|
var techniqueInstructions = {
|
|
1292
|
-
"add-aggregation":
|
|
1548
|
+
"add-aggregation": dedent7`
|
|
1293
1549
|
Add aggregation requirements to the question.
|
|
1294
1550
|
Transform it to require GROUP BY, COUNT, SUM, AVG, MIN, MAX, or similar operations.
|
|
1295
1551
|
Examples:
|
|
@@ -1297,7 +1553,7 @@ var techniqueInstructions = {
|
|
|
1297
1553
|
- "List products" → "What is the average price per category?"
|
|
1298
1554
|
- "Get employees" → "How many employees are in each department?"
|
|
1299
1555
|
`,
|
|
1300
|
-
"add-filter":
|
|
1556
|
+
"add-filter": dedent7`
|
|
1301
1557
|
Add filtering conditions to the question.
|
|
1302
1558
|
Transform it to require WHERE clauses with specific conditions.
|
|
1303
1559
|
Examples:
|
|
@@ -1305,7 +1561,7 @@ var techniqueInstructions = {
|
|
|
1305
1561
|
- "List customers" → "List customers who have made more than 5 purchases"
|
|
1306
1562
|
- "Get products" → "Get products with price above $100"
|
|
1307
1563
|
`,
|
|
1308
|
-
"add-join":
|
|
1564
|
+
"add-join": dedent7`
|
|
1309
1565
|
Add requirements that need data from related tables.
|
|
1310
1566
|
Transform it to require JOIN operations between multiple tables.
|
|
1311
1567
|
Examples:
|
|
@@ -1313,7 +1569,7 @@ var techniqueInstructions = {
|
|
|
1313
1569
|
- "List products" → "List products with their supplier information"
|
|
1314
1570
|
- "Get employees" → "Get employees with their department and manager names"
|
|
1315
1571
|
`,
|
|
1316
|
-
"add-reasoning":
|
|
1572
|
+
"add-reasoning": dedent7`
|
|
1317
1573
|
Add multi-step reasoning requirements.
|
|
1318
1574
|
Transform it to require logical deduction, comparisons, or derived calculations.
|
|
1319
1575
|
Examples:
|
|
@@ -1321,7 +1577,7 @@ var techniqueInstructions = {
|
|
|
1321
1577
|
- "List products" → "Which products are underperforming compared to their category average?"
|
|
1322
1578
|
- "Get revenue" → "Which month had the highest growth compared to the previous month?"
|
|
1323
1579
|
`,
|
|
1324
|
-
hypothetical:
|
|
1580
|
+
hypothetical: dedent7`
|
|
1325
1581
|
Add a hypothetical or speculative scenario.
|
|
1326
1582
|
Transform it to require applying calculations or projections.
|
|
1327
1583
|
Examples:
|
|
@@ -1345,21 +1601,21 @@ async function evolveQuestion(params) {
|
|
|
1345
1601
|
role: "You are an expert at evolving simple database questions into more complex ones. Your task is to take a basic question and transform it into a more sophisticated version that requires advanced SQL techniques to answer.",
|
|
1346
1602
|
objective: "Transform simple questions into complex versions requiring advanced SQL techniques"
|
|
1347
1603
|
}),
|
|
1348
|
-
|
|
1349
|
-
|
|
1604
|
+
fragment7("original_question", params.question),
|
|
1605
|
+
fragment7(
|
|
1350
1606
|
"original_sql",
|
|
1351
1607
|
params.sql,
|
|
1352
1608
|
"(This shows what the original question required)"
|
|
1353
1609
|
),
|
|
1354
|
-
|
|
1355
|
-
|
|
1610
|
+
fragment7("database_schema", params.schema),
|
|
1611
|
+
fragment7(
|
|
1356
1612
|
"technique",
|
|
1357
1613
|
{ name: params.technique },
|
|
1358
1614
|
params.techniqueInstruction
|
|
1359
1615
|
),
|
|
1360
|
-
|
|
1616
|
+
fragment7(
|
|
1361
1617
|
"task",
|
|
1362
|
-
|
|
1618
|
+
dedent7`
|
|
1363
1619
|
Evolve the original question using the "${params.technique}" technique.
|
|
1364
1620
|
|
|
1365
1621
|
Requirements:
|
|
@@ -1371,16 +1627,16 @@ async function evolveQuestion(params) {
|
|
|
1371
1627
|
6. The evolved question should build upon the original topic/domain
|
|
1372
1628
|
`
|
|
1373
1629
|
),
|
|
1374
|
-
|
|
1630
|
+
guardrail4({
|
|
1375
1631
|
rule: "The evolved question MUST require more complex SQL than the original"
|
|
1376
1632
|
}),
|
|
1377
|
-
|
|
1633
|
+
guardrail4({
|
|
1378
1634
|
rule: "Do not ask for data that does not exist in the schema"
|
|
1379
1635
|
}),
|
|
1380
|
-
|
|
1636
|
+
guardrail4({
|
|
1381
1637
|
rule: "Keep the question grounded in the same domain as the original"
|
|
1382
1638
|
}),
|
|
1383
|
-
|
|
1639
|
+
guardrail4({ rule: "Make sure the question is clear and unambiguous" }),
|
|
1384
1640
|
user7(
|
|
1385
1641
|
`Evolve this question using "${params.technique}": "${params.question}"`
|
|
1386
1642
|
)
|
|
@@ -1502,15 +1758,15 @@ async function withRetry2(computation) {
|
|
|
1502
1758
|
|
|
1503
1759
|
// packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
|
|
1504
1760
|
import { groq as groq8 } from "@ai-sdk/groq";
|
|
1505
|
-
import
|
|
1761
|
+
import dedent8 from "dedent";
|
|
1506
1762
|
import z8 from "zod";
|
|
1507
1763
|
import "@deepagents/agent";
|
|
1508
1764
|
import {
|
|
1509
1765
|
ContextEngine as ContextEngine8,
|
|
1510
1766
|
InMemoryContextStore as InMemoryContextStore8,
|
|
1511
1767
|
XmlRenderer,
|
|
1512
|
-
fragment as
|
|
1513
|
-
guardrail as
|
|
1768
|
+
fragment as fragment8,
|
|
1769
|
+
guardrail as guardrail5,
|
|
1514
1770
|
persona as personaFragment2,
|
|
1515
1771
|
structuredOutput as structuredOutput8,
|
|
1516
1772
|
user as user8
|
|
@@ -1542,10 +1798,10 @@ async function generatePersonas(schemaFragments, options) {
|
|
|
1542
1798
|
role: "You are an expert at understanding database schemas and inferring who would use them.",
|
|
1543
1799
|
objective: "Generate realistic personas representing users who would query this database"
|
|
1544
1800
|
}),
|
|
1545
|
-
|
|
1546
|
-
|
|
1801
|
+
fragment8("database_schema", schema),
|
|
1802
|
+
fragment8(
|
|
1547
1803
|
"task",
|
|
1548
|
-
|
|
1804
|
+
dedent8`
|
|
1549
1805
|
Analyze the database schema and generate realistic personas representing
|
|
1550
1806
|
the different types of users who would query this database.
|
|
1551
1807
|
|
|
@@ -1576,9 +1832,9 @@ async function generatePersonas(schemaFragments, options) {
|
|
|
1576
1832
|
- Styles should match how this persona would naturally communicate
|
|
1577
1833
|
`
|
|
1578
1834
|
),
|
|
1579
|
-
|
|
1835
|
+
fragment8(
|
|
1580
1836
|
"example",
|
|
1581
|
-
|
|
1837
|
+
dedent8`
|
|
1582
1838
|
For an e-commerce schema with orders, customers, products tables:
|
|
1583
1839
|
|
|
1584
1840
|
{
|
|
@@ -1594,13 +1850,13 @@ async function generatePersonas(schemaFragments, options) {
|
|
|
1594
1850
|
}
|
|
1595
1851
|
`
|
|
1596
1852
|
),
|
|
1597
|
-
|
|
1853
|
+
guardrail5({
|
|
1598
1854
|
rule: "Only generate personas relevant to the actual schema provided"
|
|
1599
1855
|
}),
|
|
1600
|
-
|
|
1856
|
+
guardrail5({
|
|
1601
1857
|
rule: "Do not invent tables or data that do not exist in the schema"
|
|
1602
1858
|
}),
|
|
1603
|
-
|
|
1859
|
+
guardrail5({
|
|
1604
1860
|
rule: "Ensure perspectives are specific to the domain, not generic"
|
|
1605
1861
|
}),
|
|
1606
1862
|
user8(
|
|
@@ -1621,7 +1877,7 @@ import { XmlRenderer as XmlRenderer2 } from "@deepagents/context";
|
|
|
1621
1877
|
|
|
1622
1878
|
// packages/text2sql/src/lib/agents/teachables.agent.ts
|
|
1623
1879
|
import { groq as groq9 } from "@ai-sdk/groq";
|
|
1624
|
-
import
|
|
1880
|
+
import dedent9 from "dedent";
|
|
1625
1881
|
import z9 from "zod";
|
|
1626
1882
|
import "@deepagents/agent";
|
|
1627
1883
|
import {
|
|
@@ -1629,18 +1885,18 @@ import {
|
|
|
1629
1885
|
InMemoryContextStore as InMemoryContextStore9,
|
|
1630
1886
|
analogy,
|
|
1631
1887
|
clarification,
|
|
1632
|
-
example,
|
|
1888
|
+
example as example2,
|
|
1633
1889
|
explain,
|
|
1634
|
-
fragment as
|
|
1635
|
-
guardrail as
|
|
1636
|
-
hint,
|
|
1890
|
+
fragment as fragment9,
|
|
1891
|
+
guardrail as guardrail6,
|
|
1892
|
+
hint as hint2,
|
|
1637
1893
|
persona as persona7,
|
|
1638
1894
|
quirk,
|
|
1639
1895
|
structuredOutput as structuredOutput9,
|
|
1640
1896
|
styleGuide,
|
|
1641
1897
|
term,
|
|
1642
1898
|
user as user9,
|
|
1643
|
-
workflow
|
|
1899
|
+
workflow as workflow2
|
|
1644
1900
|
} from "@deepagents/context";
|
|
1645
1901
|
var outputSchema4 = z9.object({
|
|
1646
1902
|
terms: z9.array(z9.object({ name: z9.string(), definition: z9.string() })).optional().describe("Domain terminology definitions"),
|
|
@@ -1705,11 +1961,11 @@ async function toTeachings(input, options) {
|
|
|
1705
1961
|
role: 'You design "fragments" for a Text2SQL system. Fragments become structured XML instructions.',
|
|
1706
1962
|
objective: "Choose only high-impact items that improve accuracy, safety, or clarity for this database"
|
|
1707
1963
|
}),
|
|
1708
|
-
|
|
1709
|
-
...input.context ? [
|
|
1710
|
-
|
|
1964
|
+
fragment9("database_schema", input.schema),
|
|
1965
|
+
...input.context ? [fragment9("additional_context", input.context)] : [],
|
|
1966
|
+
fragment9(
|
|
1711
1967
|
"output_structure",
|
|
1712
|
-
|
|
1968
|
+
dedent9`
|
|
1713
1969
|
Output a JSON object with these optional arrays (include only relevant ones):
|
|
1714
1970
|
- terms: [{ name: string, definition: string }] - Domain terminology
|
|
1715
1971
|
- hints: [{ text: string }] - Helpful SQL generation hints
|
|
@@ -1723,9 +1979,9 @@ async function toTeachings(input, options) {
|
|
|
1723
1979
|
- analogies: [{ concepts: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
|
|
1724
1980
|
`
|
|
1725
1981
|
),
|
|
1726
|
-
|
|
1982
|
+
fragment9(
|
|
1727
1983
|
"task",
|
|
1728
|
-
|
|
1984
|
+
dedent9`
|
|
1729
1985
|
1. Analyze the schema to infer domain, relationships, and sensitive columns.
|
|
1730
1986
|
2. Generate 3-10 fragments total across all categories, prioritizing:
|
|
1731
1987
|
- guardrails for PII columns (email, ssn, phone, etc)
|
|
@@ -1747,10 +2003,10 @@ async function toTeachings(input, options) {
|
|
|
1747
2003
|
const result = await teachablesOutput.generate();
|
|
1748
2004
|
const fragments = [];
|
|
1749
2005
|
result.terms?.forEach((t) => fragments.push(term(t.name, t.definition)));
|
|
1750
|
-
result.hints?.forEach((h) => fragments.push(
|
|
2006
|
+
result.hints?.forEach((h) => fragments.push(hint2(h.text)));
|
|
1751
2007
|
result.guardrails?.forEach(
|
|
1752
2008
|
(g) => fragments.push(
|
|
1753
|
-
|
|
2009
|
+
guardrail6({ rule: g.rule, reason: g.reason, action: g.action })
|
|
1754
2010
|
)
|
|
1755
2011
|
);
|
|
1756
2012
|
result.explains?.forEach(
|
|
@@ -1764,7 +2020,7 @@ async function toTeachings(input, options) {
|
|
|
1764
2020
|
);
|
|
1765
2021
|
result.examples?.forEach(
|
|
1766
2022
|
(e) => fragments.push(
|
|
1767
|
-
|
|
2023
|
+
example2({ question: e.question, answer: e.answer, note: e.note })
|
|
1768
2024
|
)
|
|
1769
2025
|
);
|
|
1770
2026
|
result.clarifications?.forEach(
|
|
@@ -1774,7 +2030,7 @@ async function toTeachings(input, options) {
|
|
|
1774
2030
|
);
|
|
1775
2031
|
result.workflows?.forEach(
|
|
1776
2032
|
(w) => fragments.push(
|
|
1777
|
-
|
|
2033
|
+
workflow2({
|
|
1778
2034
|
task: w.task,
|
|
1779
2035
|
steps: w.steps,
|
|
1780
2036
|
triggers: w.triggers,
|