@deepagents/text2sql 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +526 -71
- package/dist/index.js.map +4 -4
- package/dist/lib/agents/exceptions.d.ts +20 -0
- package/dist/lib/agents/exceptions.d.ts.map +1 -0
- package/dist/lib/agents/result-tools.d.ts +15 -10
- package/dist/lib/agents/result-tools.d.ts.map +1 -1
- package/dist/lib/agents/sql.agent.d.ts +2 -21
- package/dist/lib/agents/sql.agent.d.ts.map +1 -1
- package/dist/lib/sql.d.ts.map +1 -1
- package/dist/lib/synthesis/index.js +227 -87
- package/dist/lib/synthesis/index.js.map +4 -4
- package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -1
- package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -1
- package/package.json +8 -8
package/dist/index.js
CHANGED
|
@@ -511,6 +511,32 @@ var fragments = [
|
|
|
511
511
|
hint("When validating user SQL, explain any errors clearly")
|
|
512
512
|
];
|
|
513
513
|
|
|
514
|
+
// packages/text2sql/src/lib/agents/exceptions.ts
|
|
515
|
+
var sqlValidationMarker = Symbol("SQLValidationError");
|
|
516
|
+
var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
|
|
517
|
+
var SQLValidationError = class _SQLValidationError extends Error {
|
|
518
|
+
[sqlValidationMarker];
|
|
519
|
+
constructor(message2) {
|
|
520
|
+
super(message2);
|
|
521
|
+
this.name = "SQLValidationError";
|
|
522
|
+
this[sqlValidationMarker] = true;
|
|
523
|
+
}
|
|
524
|
+
static isInstance(error) {
|
|
525
|
+
return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
|
|
526
|
+
}
|
|
527
|
+
};
|
|
528
|
+
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
529
|
+
[unanswerableSqlMarker];
|
|
530
|
+
constructor(message2) {
|
|
531
|
+
super(message2);
|
|
532
|
+
this.name = "UnanswerableSQLError";
|
|
533
|
+
this[unanswerableSqlMarker] = true;
|
|
534
|
+
}
|
|
535
|
+
static isInstance(error) {
|
|
536
|
+
return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
|
|
537
|
+
}
|
|
538
|
+
};
|
|
539
|
+
|
|
514
540
|
// packages/text2sql/src/lib/agents/result-tools.ts
|
|
515
541
|
import { tool as tool2 } from "ai";
|
|
516
542
|
import { createBashTool } from "bash-tool";
|
|
@@ -519,7 +545,8 @@ import {
|
|
|
519
545
|
Bash,
|
|
520
546
|
MountableFs,
|
|
521
547
|
OverlayFs,
|
|
522
|
-
defineCommand
|
|
548
|
+
defineCommand,
|
|
549
|
+
parse
|
|
523
550
|
} from "just-bash";
|
|
524
551
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
525
552
|
import * as path from "node:path";
|
|
@@ -650,6 +677,300 @@ function createSqlCommand(adapter, metaStore) {
|
|
|
650
677
|
}
|
|
651
678
|
});
|
|
652
679
|
}
|
|
680
|
+
var BLOCKED_DB_CLIENT_COMMANDS = /* @__PURE__ */ new Set([
|
|
681
|
+
"psql",
|
|
682
|
+
"sqlite3",
|
|
683
|
+
"mysql",
|
|
684
|
+
"duckdb"
|
|
685
|
+
]);
|
|
686
|
+
var BLOCKED_RAW_SQL_COMMANDS = /* @__PURE__ */ new Set(["select", "with"]);
|
|
687
|
+
var ALLOWED_SQL_PROXY_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "validate"]);
|
|
688
|
+
var SQL_PROXY_ENFORCEMENT_MESSAGE = [
|
|
689
|
+
"Direct database querying through bash is blocked.",
|
|
690
|
+
"Use SQL proxy commands in this order:",
|
|
691
|
+
'1) sql validate "SELECT ..."',
|
|
692
|
+
'2) sql run "SELECT ..."'
|
|
693
|
+
].join("\n");
|
|
694
|
+
function cloneInspectionContext(context) {
|
|
695
|
+
return {
|
|
696
|
+
functionDefinitions: new Map(context.functionDefinitions),
|
|
697
|
+
callStack: new Set(context.callStack)
|
|
698
|
+
};
|
|
699
|
+
}
|
|
700
|
+
function asStaticWordText(word) {
|
|
701
|
+
if (!word) {
|
|
702
|
+
return null;
|
|
703
|
+
}
|
|
704
|
+
return asStaticWordPartText(
|
|
705
|
+
word.parts
|
|
706
|
+
);
|
|
707
|
+
}
|
|
708
|
+
function asStaticWordPartText(parts) {
|
|
709
|
+
let text = "";
|
|
710
|
+
for (const part of parts) {
|
|
711
|
+
const type = part.type;
|
|
712
|
+
if (type === "Literal" || type === "SingleQuoted" || type === "Escaped") {
|
|
713
|
+
if (typeof part.value !== "string") {
|
|
714
|
+
return null;
|
|
715
|
+
}
|
|
716
|
+
text += part.value;
|
|
717
|
+
continue;
|
|
718
|
+
}
|
|
719
|
+
if (type === "DoubleQuoted") {
|
|
720
|
+
if (!Array.isArray(part.parts)) {
|
|
721
|
+
return null;
|
|
722
|
+
}
|
|
723
|
+
const inner = asStaticWordPartText(
|
|
724
|
+
part.parts
|
|
725
|
+
);
|
|
726
|
+
if (inner == null) {
|
|
727
|
+
return null;
|
|
728
|
+
}
|
|
729
|
+
text += inner;
|
|
730
|
+
continue;
|
|
731
|
+
}
|
|
732
|
+
return null;
|
|
733
|
+
}
|
|
734
|
+
return text;
|
|
735
|
+
}
|
|
736
|
+
function isScriptNode(value) {
|
|
737
|
+
if (typeof value !== "object" || value === null) {
|
|
738
|
+
return false;
|
|
739
|
+
}
|
|
740
|
+
const node = value;
|
|
741
|
+
return node.type === "Script" && Array.isArray(node.statements);
|
|
742
|
+
}
|
|
743
|
+
function scriptContainsBlockedCommand(script, context) {
|
|
744
|
+
return statementsContainBlockedCommand(script.statements, context);
|
|
745
|
+
}
|
|
746
|
+
function statementsContainBlockedCommand(statements, context) {
|
|
747
|
+
for (const statement of statements) {
|
|
748
|
+
if (statementContainsBlockedCommand(statement, context)) {
|
|
749
|
+
return true;
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
return false;
|
|
753
|
+
}
|
|
754
|
+
function statementContainsBlockedCommand(statement, context) {
|
|
755
|
+
for (const pipeline of statement.pipelines) {
|
|
756
|
+
if (pipelineContainsBlockedCommand(pipeline, context)) {
|
|
757
|
+
return true;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
return false;
|
|
761
|
+
}
|
|
762
|
+
function pipelineContainsBlockedCommand(pipeline, context) {
|
|
763
|
+
for (const command of pipeline.commands) {
|
|
764
|
+
if (command.type === "FunctionDef") {
|
|
765
|
+
context.functionDefinitions.set(command.name, command);
|
|
766
|
+
continue;
|
|
767
|
+
}
|
|
768
|
+
if (commandContainsBlockedCommand(command, context)) {
|
|
769
|
+
return true;
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
return false;
|
|
773
|
+
}
|
|
774
|
+
function stringCommandContainsBlockedCommand(command, context) {
|
|
775
|
+
let script;
|
|
776
|
+
try {
|
|
777
|
+
script = parse(command);
|
|
778
|
+
} catch {
|
|
779
|
+
return false;
|
|
780
|
+
}
|
|
781
|
+
return scriptContainsBlockedCommand(script, cloneInspectionContext(context));
|
|
782
|
+
}
|
|
783
|
+
function wordContainsBlockedCommand(word, context) {
|
|
784
|
+
if (!word) {
|
|
785
|
+
return false;
|
|
786
|
+
}
|
|
787
|
+
return wordPartContainsBlockedCommand(
|
|
788
|
+
word.parts,
|
|
789
|
+
context
|
|
790
|
+
);
|
|
791
|
+
}
|
|
792
|
+
function wordPartContainsBlockedCommand(parts, context) {
|
|
793
|
+
for (const part of parts) {
|
|
794
|
+
if (partContainsBlockedCommand(part, context)) {
|
|
795
|
+
return true;
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
return false;
|
|
799
|
+
}
|
|
800
|
+
function partContainsBlockedCommand(node, context) {
|
|
801
|
+
const type = node.type;
|
|
802
|
+
if (type === "CommandSubstitution" || type === "ProcessSubstitution") {
|
|
803
|
+
if (isScriptNode(node.body)) {
|
|
804
|
+
return scriptContainsBlockedCommand(
|
|
805
|
+
node.body,
|
|
806
|
+
cloneInspectionContext(context)
|
|
807
|
+
);
|
|
808
|
+
}
|
|
809
|
+
return false;
|
|
810
|
+
}
|
|
811
|
+
if (type === "ArithCommandSubst" && typeof node.command === "string") {
|
|
812
|
+
return stringCommandContainsBlockedCommand(node.command, context);
|
|
813
|
+
}
|
|
814
|
+
for (const value of Object.values(node)) {
|
|
815
|
+
if (Array.isArray(value)) {
|
|
816
|
+
for (const item of value) {
|
|
817
|
+
if (typeof item === "object" && item !== null) {
|
|
818
|
+
if (partContainsBlockedCommand(item, context)) {
|
|
819
|
+
return true;
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
continue;
|
|
824
|
+
}
|
|
825
|
+
if (typeof value === "object" && value !== null) {
|
|
826
|
+
if (partContainsBlockedCommand(value, context)) {
|
|
827
|
+
return true;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
return false;
|
|
832
|
+
}
|
|
833
|
+
function functionInvocationContainsBlockedCommand(functionName, context) {
|
|
834
|
+
const definition = context.functionDefinitions.get(functionName);
|
|
835
|
+
if (!definition) {
|
|
836
|
+
return false;
|
|
837
|
+
}
|
|
838
|
+
if (context.callStack.has(functionName)) {
|
|
839
|
+
return false;
|
|
840
|
+
}
|
|
841
|
+
const invocationContext = cloneInspectionContext(context);
|
|
842
|
+
invocationContext.callStack.add(functionName);
|
|
843
|
+
return commandContainsBlockedCommand(definition.body, invocationContext);
|
|
844
|
+
}
|
|
845
|
+
function commandContainsBlockedCommand(command, context) {
|
|
846
|
+
switch (command.type) {
|
|
847
|
+
case "SimpleCommand":
|
|
848
|
+
return isBlockedSimpleCommand(command, context);
|
|
849
|
+
case "If":
|
|
850
|
+
return command.clauses.some(
|
|
851
|
+
(clause) => statementsContainBlockedCommand(
|
|
852
|
+
clause.condition,
|
|
853
|
+
cloneInspectionContext(context)
|
|
854
|
+
) || statementsContainBlockedCommand(
|
|
855
|
+
clause.body,
|
|
856
|
+
cloneInspectionContext(context)
|
|
857
|
+
)
|
|
858
|
+
) || (command.elseBody ? statementsContainBlockedCommand(
|
|
859
|
+
command.elseBody,
|
|
860
|
+
cloneInspectionContext(context)
|
|
861
|
+
) : false);
|
|
862
|
+
case "For":
|
|
863
|
+
case "CStyleFor":
|
|
864
|
+
return statementsContainBlockedCommand(
|
|
865
|
+
command.body,
|
|
866
|
+
cloneInspectionContext(context)
|
|
867
|
+
);
|
|
868
|
+
case "While":
|
|
869
|
+
case "Until":
|
|
870
|
+
return statementsContainBlockedCommand(
|
|
871
|
+
command.condition,
|
|
872
|
+
cloneInspectionContext(context)
|
|
873
|
+
) || statementsContainBlockedCommand(
|
|
874
|
+
command.body,
|
|
875
|
+
cloneInspectionContext(context)
|
|
876
|
+
);
|
|
877
|
+
case "Case":
|
|
878
|
+
return command.items.some(
|
|
879
|
+
(item) => statementsContainBlockedCommand(
|
|
880
|
+
item.body,
|
|
881
|
+
cloneInspectionContext(context)
|
|
882
|
+
)
|
|
883
|
+
);
|
|
884
|
+
case "Subshell":
|
|
885
|
+
case "Group":
|
|
886
|
+
return statementsContainBlockedCommand(
|
|
887
|
+
command.body,
|
|
888
|
+
cloneInspectionContext(context)
|
|
889
|
+
);
|
|
890
|
+
case "FunctionDef":
|
|
891
|
+
return false;
|
|
892
|
+
case "ArithmeticCommand":
|
|
893
|
+
case "ConditionalCommand":
|
|
894
|
+
return false;
|
|
895
|
+
default: {
|
|
896
|
+
const _unreachable = command;
|
|
897
|
+
return _unreachable;
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
function isBlockedSimpleCommand(command, context) {
|
|
902
|
+
if (wordContainsBlockedCommand(command.name, context)) {
|
|
903
|
+
return true;
|
|
904
|
+
}
|
|
905
|
+
if (command.args.some((arg) => wordContainsBlockedCommand(arg, context))) {
|
|
906
|
+
return true;
|
|
907
|
+
}
|
|
908
|
+
if (command.assignments.some(
|
|
909
|
+
(assignment) => wordContainsBlockedCommand(assignment.value, context) || (assignment.array?.some(
|
|
910
|
+
(value) => wordContainsBlockedCommand(value, context)
|
|
911
|
+
) ?? false)
|
|
912
|
+
)) {
|
|
913
|
+
return true;
|
|
914
|
+
}
|
|
915
|
+
if (command.redirections.some((redirection) => {
|
|
916
|
+
if (redirection.target.type === "Word") {
|
|
917
|
+
return wordContainsBlockedCommand(
|
|
918
|
+
redirection.target,
|
|
919
|
+
context
|
|
920
|
+
);
|
|
921
|
+
}
|
|
922
|
+
if (redirection.target.type === "HereDoc" && redirection.target.content) {
|
|
923
|
+
return wordContainsBlockedCommand(redirection.target.content, context);
|
|
924
|
+
}
|
|
925
|
+
return false;
|
|
926
|
+
})) {
|
|
927
|
+
return true;
|
|
928
|
+
}
|
|
929
|
+
const commandName = asStaticWordText(command.name);
|
|
930
|
+
if (!commandName) {
|
|
931
|
+
return false;
|
|
932
|
+
}
|
|
933
|
+
const normalizedName = path.posix.basename(commandName).toLowerCase();
|
|
934
|
+
if (BLOCKED_DB_CLIENT_COMMANDS.has(normalizedName)) {
|
|
935
|
+
return true;
|
|
936
|
+
}
|
|
937
|
+
if (BLOCKED_RAW_SQL_COMMANDS.has(normalizedName)) {
|
|
938
|
+
return true;
|
|
939
|
+
}
|
|
940
|
+
if (normalizedName === "sql") {
|
|
941
|
+
const subcommand = asStaticWordText(command.args[0])?.toLowerCase();
|
|
942
|
+
return !subcommand || !ALLOWED_SQL_PROXY_SUBCOMMANDS.has(subcommand);
|
|
943
|
+
}
|
|
944
|
+
if (functionInvocationContainsBlockedCommand(commandName, context)) {
|
|
945
|
+
return true;
|
|
946
|
+
}
|
|
947
|
+
return false;
|
|
948
|
+
}
|
|
949
|
+
function getSqlProxyEnforcementResult(command) {
|
|
950
|
+
const trimmed = command.trim();
|
|
951
|
+
if (!trimmed) {
|
|
952
|
+
return null;
|
|
953
|
+
}
|
|
954
|
+
let script;
|
|
955
|
+
try {
|
|
956
|
+
script = parse(trimmed);
|
|
957
|
+
} catch {
|
|
958
|
+
return null;
|
|
959
|
+
}
|
|
960
|
+
const blocked = scriptContainsBlockedCommand(script, {
|
|
961
|
+
functionDefinitions: /* @__PURE__ */ new Map(),
|
|
962
|
+
callStack: /* @__PURE__ */ new Set()
|
|
963
|
+
});
|
|
964
|
+
if (!blocked) {
|
|
965
|
+
return null;
|
|
966
|
+
}
|
|
967
|
+
return {
|
|
968
|
+
stdout: "",
|
|
969
|
+
stderr: `${SQL_PROXY_ENFORCEMENT_MESSAGE}
|
|
970
|
+
`,
|
|
971
|
+
exitCode: 1
|
|
972
|
+
};
|
|
973
|
+
}
|
|
653
974
|
async function createResultTools(options) {
|
|
654
975
|
const { adapter, skillMounts, filesystem: baseFs } = options;
|
|
655
976
|
const metaStore = new AsyncLocalStorage();
|
|
@@ -685,6 +1006,16 @@ async function createResultTools(options) {
|
|
|
685
1006
|
return { result };
|
|
686
1007
|
}
|
|
687
1008
|
});
|
|
1009
|
+
const guardedSandbox = {
|
|
1010
|
+
...sandbox,
|
|
1011
|
+
executeCommand: async (command) => {
|
|
1012
|
+
const blockedResult = getSqlProxyEnforcementResult(command);
|
|
1013
|
+
if (blockedResult) {
|
|
1014
|
+
return blockedResult;
|
|
1015
|
+
}
|
|
1016
|
+
return sandbox.executeCommand(command);
|
|
1017
|
+
}
|
|
1018
|
+
};
|
|
688
1019
|
const bash = tool2({
|
|
689
1020
|
...tools2.bash,
|
|
690
1021
|
inputSchema: z3.object({
|
|
@@ -696,6 +1027,10 @@ async function createResultTools(options) {
|
|
|
696
1027
|
if (!execute) {
|
|
697
1028
|
throw new Error("bash tool execution is not available");
|
|
698
1029
|
}
|
|
1030
|
+
const blockedResult = getSqlProxyEnforcementResult(command);
|
|
1031
|
+
if (blockedResult) {
|
|
1032
|
+
return blockedResult;
|
|
1033
|
+
}
|
|
699
1034
|
return metaStore.run({}, async () => {
|
|
700
1035
|
const result = await execute({ command }, execOptions);
|
|
701
1036
|
const meta = metaStore.getStore()?.value;
|
|
@@ -703,12 +1038,15 @@ async function createResultTools(options) {
|
|
|
703
1038
|
});
|
|
704
1039
|
},
|
|
705
1040
|
toModelOutput: ({ output }) => {
|
|
1041
|
+
if (typeof output !== "object" || output === null) {
|
|
1042
|
+
return { type: "json", value: output };
|
|
1043
|
+
}
|
|
706
1044
|
const { meta, ...rest } = output;
|
|
707
1045
|
return { type: "json", value: rest };
|
|
708
1046
|
}
|
|
709
1047
|
});
|
|
710
1048
|
return {
|
|
711
|
-
sandbox,
|
|
1049
|
+
sandbox: guardedSandbox,
|
|
712
1050
|
tools: {
|
|
713
1051
|
...tools2,
|
|
714
1052
|
bash
|
|
@@ -728,42 +1066,120 @@ import {
|
|
|
728
1066
|
defaultSettingsMiddleware,
|
|
729
1067
|
wrapLanguageModel
|
|
730
1068
|
} from "ai";
|
|
1069
|
+
import dedent2 from "dedent";
|
|
731
1070
|
import pRetry from "p-retry";
|
|
732
1071
|
import z4 from "zod";
|
|
733
1072
|
import "@deepagents/agent";
|
|
734
1073
|
import {
|
|
735
1074
|
ContextEngine as ContextEngine2,
|
|
736
1075
|
InMemoryContextStore as InMemoryContextStore2,
|
|
1076
|
+
fragment as fragment2,
|
|
737
1077
|
persona as persona3,
|
|
1078
|
+
policy,
|
|
738
1079
|
structuredOutput as structuredOutput2,
|
|
739
1080
|
user as user2
|
|
740
1081
|
} from "@deepagents/context";
|
|
741
1082
|
var RETRY_TEMPERATURES = [0, 0.2, 0.3];
|
|
1083
|
+
var SQL_AGENT_ROLE = "Expert SQL query generator.";
|
|
1084
|
+
var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
|
|
1085
|
+
var SQL_AGENT_POLICIES = [
|
|
1086
|
+
fragment2(
|
|
1087
|
+
"schema_mapping",
|
|
1088
|
+
policy({
|
|
1089
|
+
rule: "Translate natural language into precise SQL grounded in available schema entities."
|
|
1090
|
+
})
|
|
1091
|
+
// policy({
|
|
1092
|
+
// rule: 'Before returning an error, perform a schema-grounded self-check: identify core intent, draft best-effort SQL, then verify it uses only existing tables/columns.',
|
|
1093
|
+
// }),
|
|
1094
|
+
// policy({
|
|
1095
|
+
// rule: 'Return unanswerable only if that self-check confirms no valid SQL can express the required intent without inventing schema elements.',
|
|
1096
|
+
// }),
|
|
1097
|
+
// policy({
|
|
1098
|
+
// rule: 'Prefer a best-effort valid SQL query when entities can be reasonably inferred from table or column names.',
|
|
1099
|
+
// }),
|
|
1100
|
+
// policy({
|
|
1101
|
+
// rule: 'Use lexical normalization (singular/plural, paraphrases, role synonyms, and minor wording differences) to align question terms with schema names.',
|
|
1102
|
+
// }),
|
|
1103
|
+
// policy({
|
|
1104
|
+
// rule: 'Decompose noun phrases into core entity and qualifiers, and map the core entity first.',
|
|
1105
|
+
// }),
|
|
1106
|
+
// policy({
|
|
1107
|
+
// rule: 'Do not require every descriptive word to map to a separate schema field when the core entity match is unambiguous.',
|
|
1108
|
+
// }),
|
|
1109
|
+
// policy({
|
|
1110
|
+
// rule: 'For phrases like "X of Y", treat Y as contextual (non-blocking) when Y has no mapped schema field and the question does not ask to filter/group/select by Y explicitly.',
|
|
1111
|
+
// }),
|
|
1112
|
+
// policy({
|
|
1113
|
+
// rule: 'Treat unmatched qualifiers as blockers only when they are restrictive constraints (specific values, comparisons, or conditions that change row eligibility).',
|
|
1114
|
+
// }),
|
|
1115
|
+
// hint('Preserve schema spelling exactly, including typos in column names.'),
|
|
1116
|
+
)
|
|
1117
|
+
// fragment(
|
|
1118
|
+
// 'unanswerable_gate',
|
|
1119
|
+
// workflow({
|
|
1120
|
+
// task: 'Unanswerable decision',
|
|
1121
|
+
// steps: [
|
|
1122
|
+
// 'Identify the core intent (metric/projection and required filters).',
|
|
1123
|
+
// 'Attempt schema-grounded mapping for the core intent before considering error.',
|
|
1124
|
+
// 'If a valid SELECT can answer the core intent without inventing schema entities, return SQL.',
|
|
1125
|
+
// 'Return unanswerable only when required information cannot be mapped to any available table or column.',
|
|
1126
|
+
// ],
|
|
1127
|
+
// }),
|
|
1128
|
+
// policy({
|
|
1129
|
+
// rule: 'Do not reject a question as unanswerable when requested information can be derived by filtering, joining, grouping, counting, set operations, or sorting on available columns.',
|
|
1130
|
+
// }),
|
|
1131
|
+
// ),
|
|
1132
|
+
// fragment(
|
|
1133
|
+
// 'query_shape_preferences',
|
|
1134
|
+
// hint(
|
|
1135
|
+
// 'Prefer explicit INNER JOINs over LEFT JOINs unless the question requires unmatched rows.',
|
|
1136
|
+
// ),
|
|
1137
|
+
// hint(
|
|
1138
|
+
// 'Prefer direct joins over dropping join constraints or using weaker alternatives.',
|
|
1139
|
+
// ),
|
|
1140
|
+
// hint('Use DISTINCT only when uniqueness is explicitly requested.'),
|
|
1141
|
+
// hint(
|
|
1142
|
+
// 'For superlatives over grouped entities (most/least/highest/lowest by group), prefer GROUP BY with ORDER BY aggregate and LIMIT 1.',
|
|
1143
|
+
// ),
|
|
1144
|
+
// hint(
|
|
1145
|
+
// 'For average/count conditions per entity, prefer GROUP BY with HAVING aggregate predicates over row-level WHERE predicates.',
|
|
1146
|
+
// ),
|
|
1147
|
+
// hint(
|
|
1148
|
+
// 'For "both" conditions across two criteria, prefer INTERSECT when selecting shared values.',
|
|
1149
|
+
// ),
|
|
1150
|
+
// hint(
|
|
1151
|
+
// 'For "A or B" retrieval across criteria, prefer UNION when combining two qualifying sets.',
|
|
1152
|
+
// ),
|
|
1153
|
+
// hint(
|
|
1154
|
+
// 'For "never" constraints against related records, prefer NOT IN or EXCEPT against the disqualifying set.',
|
|
1155
|
+
// ),
|
|
1156
|
+
// hint(
|
|
1157
|
+
// 'Use equality predicates for exact values unless the question asks for pattern matching.',
|
|
1158
|
+
// ),
|
|
1159
|
+
// hint(
|
|
1160
|
+
// 'Keep numeric literals unquoted when they are purely numeric tokens in the question.',
|
|
1161
|
+
// ),
|
|
1162
|
+
// ),
|
|
1163
|
+
// fragment(
|
|
1164
|
+
// 'sql_minimality',
|
|
1165
|
+
// guardrail({
|
|
1166
|
+
// rule: 'Never hallucinate tables or columns.',
|
|
1167
|
+
// reason: 'Schema fidelity is required.',
|
|
1168
|
+
// action: 'Use only available schema entities.',
|
|
1169
|
+
// }),
|
|
1170
|
+
// guardrail({
|
|
1171
|
+
// rule: 'Prefer the minimal query over transformed expressions.',
|
|
1172
|
+
// reason:
|
|
1173
|
+
// 'Unnecessary transformations reduce correctness and add avoidable complexity.',
|
|
1174
|
+
// action:
|
|
1175
|
+
// 'Do not add date parsing, substring extraction, derived projections, or extra selected columns unless explicitly requested or required by schema mismatch.',
|
|
1176
|
+
// }),
|
|
1177
|
+
// ),
|
|
1178
|
+
];
|
|
742
1179
|
function extractSql(output) {
|
|
743
1180
|
const match = output.match(/```sql\n?([\s\S]*?)```/);
|
|
744
1181
|
return match ? match[1].trim() : output.trim();
|
|
745
1182
|
}
|
|
746
|
-
var marker = Symbol("SQLValidationError");
|
|
747
|
-
var SQLValidationError = class _SQLValidationError extends Error {
|
|
748
|
-
[marker];
|
|
749
|
-
constructor(message2) {
|
|
750
|
-
super(message2);
|
|
751
|
-
this.name = "SQLValidationError";
|
|
752
|
-
this[marker] = true;
|
|
753
|
-
}
|
|
754
|
-
static isInstance(error) {
|
|
755
|
-
return error instanceof _SQLValidationError && error[marker] === true;
|
|
756
|
-
}
|
|
757
|
-
};
|
|
758
|
-
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
759
|
-
constructor(message2) {
|
|
760
|
-
super(message2);
|
|
761
|
-
this.name = "UnanswerableSQLError";
|
|
762
|
-
}
|
|
763
|
-
static isInstance(error) {
|
|
764
|
-
return error instanceof _UnanswerableSQLError;
|
|
765
|
-
}
|
|
766
|
-
};
|
|
767
1183
|
async function toSql(options) {
|
|
768
1184
|
const { maxRetries = 3 } = options;
|
|
769
1185
|
return withRetry(
|
|
@@ -776,21 +1192,38 @@ async function toSql(options) {
|
|
|
776
1192
|
context.set(
|
|
777
1193
|
persona3({
|
|
778
1194
|
name: "Freya",
|
|
779
|
-
role:
|
|
780
|
-
objective:
|
|
1195
|
+
role: SQL_AGENT_ROLE,
|
|
1196
|
+
objective: SQL_AGENT_OBJECTIVE
|
|
1197
|
+
// role: `You are a data science expert that provides well-reasoned and detailed responses.`,
|
|
1198
|
+
// objective: `Your task is to understand the schema and generate a valid SQL query to answer the question. You first think about the reasoning process as an internal monologue and then provide the user with the answer.`,
|
|
781
1199
|
}),
|
|
782
|
-
...
|
|
783
|
-
...options.
|
|
1200
|
+
...SQL_AGENT_POLICIES,
|
|
1201
|
+
...options.fragments
|
|
784
1202
|
);
|
|
785
1203
|
if (errors.length) {
|
|
1204
|
+
const lastError = errors.at(-1);
|
|
786
1205
|
context.set(
|
|
787
|
-
user2(
|
|
788
|
-
|
|
789
|
-
|
|
1206
|
+
user2(dedent2`
|
|
1207
|
+
Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
|
|
1208
|
+
Given the question, the evidence and the database schema, return the SQL script that addresses the question.
|
|
1209
|
+
|
|
1210
|
+
Question: ${options.input}
|
|
1211
|
+
`),
|
|
1212
|
+
UnanswerableSQLError.isInstance(lastError) ? user2(
|
|
1213
|
+
`<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
|
|
1214
|
+
) : user2(
|
|
1215
|
+
`<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
|
|
790
1216
|
)
|
|
791
1217
|
);
|
|
792
1218
|
} else {
|
|
793
|
-
context.set(
|
|
1219
|
+
context.set(
|
|
1220
|
+
user2(dedent2`
|
|
1221
|
+
Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
|
|
1222
|
+
Given the question, the evidence and the database schema, return the SQL script that addresses the question.
|
|
1223
|
+
|
|
1224
|
+
Question: ${options.input}
|
|
1225
|
+
`)
|
|
1226
|
+
);
|
|
794
1227
|
}
|
|
795
1228
|
const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
|
|
796
1229
|
const baseModel = options.model ?? groq2("openai/gpt-oss-20b");
|
|
@@ -816,19 +1249,45 @@ async function toSql(options) {
|
|
|
816
1249
|
})
|
|
817
1250
|
});
|
|
818
1251
|
const { result: output } = await sqlOutput.generate();
|
|
1252
|
+
const finalizeSql = async (rawSql) => {
|
|
1253
|
+
const sql = options.adapter.format(extractSql(rawSql));
|
|
1254
|
+
const validationError = await options.adapter.validate(sql);
|
|
1255
|
+
if (validationError) {
|
|
1256
|
+
throw new SQLValidationError(validationError);
|
|
1257
|
+
}
|
|
1258
|
+
return {
|
|
1259
|
+
attempts,
|
|
1260
|
+
sql,
|
|
1261
|
+
errors: errors.length ? errors.map(formatErrorMessage) : void 0
|
|
1262
|
+
};
|
|
1263
|
+
};
|
|
819
1264
|
if ("error" in output) {
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
1265
|
+
context.set(
|
|
1266
|
+
user2(
|
|
1267
|
+
"<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
|
|
1268
|
+
)
|
|
1269
|
+
);
|
|
1270
|
+
const forcedSqlOutput = structuredOutput2({
|
|
1271
|
+
model,
|
|
1272
|
+
context,
|
|
1273
|
+
schema: z4.object({
|
|
1274
|
+
sql: z4.string().describe(
|
|
1275
|
+
"Best-effort SQL query that answers the core intent using only available schema entities."
|
|
1276
|
+
),
|
|
1277
|
+
reasoning: z4.string().describe("Reasoning steps for best-effort schema mapping.")
|
|
1278
|
+
})
|
|
1279
|
+
});
|
|
1280
|
+
try {
|
|
1281
|
+
const forced = await forcedSqlOutput.generate();
|
|
1282
|
+
return await finalizeSql(forced.sql);
|
|
1283
|
+
} catch (error) {
|
|
1284
|
+
if (SQLValidationError.isInstance(error) || APICallError.isInstance(error) || JSONParseError.isInstance(error) || TypeValidationError.isInstance(error) || NoObjectGeneratedError.isInstance(error) || NoOutputGeneratedError.isInstance(error) || NoContentGeneratedError.isInstance(error)) {
|
|
1285
|
+
throw error;
|
|
1286
|
+
}
|
|
1287
|
+
throw new UnanswerableSQLError(output.error);
|
|
1288
|
+
}
|
|
826
1289
|
}
|
|
827
|
-
return
|
|
828
|
-
attempts,
|
|
829
|
-
sql,
|
|
830
|
-
errors: errors.length ? errors.map(formatErrorMessage) : void 0
|
|
831
|
-
};
|
|
1290
|
+
return await finalizeSql(output.sql);
|
|
832
1291
|
},
|
|
833
1292
|
{ retries: maxRetries - 1 }
|
|
834
1293
|
);
|
|
@@ -891,9 +1350,6 @@ async function withRetry(computation, options = { retries: 3 }) {
|
|
|
891
1350
|
return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
|
|
892
1351
|
},
|
|
893
1352
|
onFailedAttempt(context) {
|
|
894
|
-
console.log(
|
|
895
|
-
`Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
|
|
896
|
-
);
|
|
897
1353
|
errors.push(context.error);
|
|
898
1354
|
}
|
|
899
1355
|
}
|
|
@@ -902,7 +1358,7 @@ async function withRetry(computation, options = { retries: 3 }) {
|
|
|
902
1358
|
|
|
903
1359
|
// packages/text2sql/src/lib/agents/suggestions.agents.ts
|
|
904
1360
|
import { groq as groq3 } from "@ai-sdk/groq";
|
|
905
|
-
import
|
|
1361
|
+
import dedent3 from "dedent";
|
|
906
1362
|
import z5 from "zod";
|
|
907
1363
|
import { agent, thirdPersonPrompt } from "@deepagents/agent";
|
|
908
1364
|
var suggestionsAgent = agent({
|
|
@@ -918,7 +1374,7 @@ var suggestionsAgent = agent({
|
|
|
918
1374
|
).min(1).max(5).describe("A set of up to two advanced question + SQL pairs.")
|
|
919
1375
|
}),
|
|
920
1376
|
prompt: (state) => {
|
|
921
|
-
return
|
|
1377
|
+
return dedent3`
|
|
922
1378
|
${thirdPersonPrompt()}
|
|
923
1379
|
|
|
924
1380
|
<identity>
|
|
@@ -3884,10 +4340,10 @@ import {
|
|
|
3884
4340
|
clarification,
|
|
3885
4341
|
example,
|
|
3886
4342
|
explain,
|
|
3887
|
-
fragment as
|
|
4343
|
+
fragment as fragment3,
|
|
3888
4344
|
guardrail,
|
|
3889
4345
|
hint as hint2,
|
|
3890
|
-
policy,
|
|
4346
|
+
policy as policy2,
|
|
3891
4347
|
principle,
|
|
3892
4348
|
quirk,
|
|
3893
4349
|
role,
|
|
@@ -3899,7 +4355,7 @@ function reasoningFramework() {
|
|
|
3899
4355
|
role(
|
|
3900
4356
|
"You are a very strong reasoner and planner. Use these critical instructions to structure your plans, thoughts, and responses."
|
|
3901
4357
|
),
|
|
3902
|
-
|
|
4358
|
+
fragment3(
|
|
3903
4359
|
"meta-cognitive-reasoning-framework",
|
|
3904
4360
|
hint2(
|
|
3905
4361
|
"Before taking any action (either tool calls *or* responses to the user), you must proactively, methodically, and independently plan and reason about:"
|
|
@@ -3909,19 +4365,19 @@ function reasoningFramework() {
|
|
|
3909
4365
|
title: "Logical dependencies and constraints",
|
|
3910
4366
|
description: "Analyze the intended action against the following factors. Resolve conflicts in order of importance:",
|
|
3911
4367
|
policies: [
|
|
3912
|
-
|
|
4368
|
+
policy2({
|
|
3913
4369
|
rule: "Policy-based rules, mandatory prerequisites, and constraints."
|
|
3914
4370
|
}),
|
|
3915
|
-
|
|
4371
|
+
policy2({
|
|
3916
4372
|
rule: "Order of operations: Ensure taking an action does not prevent a subsequent necessary action.",
|
|
3917
4373
|
policies: [
|
|
3918
4374
|
"The user may request actions in a random order, but you may need to reorder operations to maximize successful completion of the task."
|
|
3919
4375
|
]
|
|
3920
4376
|
}),
|
|
3921
|
-
|
|
4377
|
+
policy2({
|
|
3922
4378
|
rule: "Other prerequisites (information and/or actions needed)."
|
|
3923
4379
|
}),
|
|
3924
|
-
|
|
4380
|
+
policy2({ rule: "Explicit user constraints or preferences." })
|
|
3925
4381
|
]
|
|
3926
4382
|
}),
|
|
3927
4383
|
// 2) Risk assessment
|
|
@@ -3974,17 +4430,17 @@ function reasoningFramework() {
|
|
|
3974
4430
|
title: "Completeness",
|
|
3975
4431
|
description: "Ensure that all requirements, constraints, options, and preferences are exhaustively incorporated into your plan.",
|
|
3976
4432
|
policies: [
|
|
3977
|
-
|
|
4433
|
+
policy2({
|
|
3978
4434
|
rule: "Resolve conflicts using the order of importance in #1."
|
|
3979
4435
|
}),
|
|
3980
|
-
|
|
4436
|
+
policy2({
|
|
3981
4437
|
rule: "Avoid premature conclusions: There may be multiple relevant options for a given situation.",
|
|
3982
4438
|
policies: [
|
|
3983
4439
|
"To check for whether an option is relevant, reason about all information sources from #5.",
|
|
3984
4440
|
"You may need to consult the user to even know whether something is applicable. Do not assume it is not applicable without checking."
|
|
3985
4441
|
]
|
|
3986
4442
|
}),
|
|
3987
|
-
|
|
4443
|
+
policy2({
|
|
3988
4444
|
rule: "Review applicable sources of information from #5 to confirm which are relevant to the current state."
|
|
3989
4445
|
})
|
|
3990
4446
|
]
|
|
@@ -4016,31 +4472,31 @@ function guidelines(options = {}) {
|
|
|
4016
4472
|
// Include the meta-cognitive reasoning framework
|
|
4017
4473
|
...reasoningFramework(),
|
|
4018
4474
|
// Prerequisite policies (must do X before Y)
|
|
4019
|
-
|
|
4475
|
+
fragment3(
|
|
4020
4476
|
"prerequisite_policies",
|
|
4021
|
-
|
|
4477
|
+
policy2({
|
|
4022
4478
|
rule: "YOU MUST inspect schema structure and available tables",
|
|
4023
4479
|
before: "generating ANY SQL query",
|
|
4024
4480
|
reason: "NEVER generate SQL without knowing valid tables, columns, and relationships"
|
|
4025
4481
|
}),
|
|
4026
|
-
|
|
4482
|
+
policy2({
|
|
4027
4483
|
rule: "YOU MUST resolve ambiguous business terms with the user",
|
|
4028
4484
|
before: "making ANY assumptions about terminology meaning",
|
|
4029
4485
|
reason: "NEVER guess domain-specific language\u2014ask for clarification"
|
|
4030
4486
|
}),
|
|
4031
|
-
|
|
4487
|
+
policy2({
|
|
4032
4488
|
rule: "YOU MUST validate SQL syntax",
|
|
4033
4489
|
before: "executing ANY query against the database",
|
|
4034
4490
|
reason: "NEVER execute unvalidated queries"
|
|
4035
4491
|
}),
|
|
4036
|
-
|
|
4492
|
+
policy2({
|
|
4037
4493
|
rule: "YOU MUST complete ALL reasoning steps",
|
|
4038
4494
|
before: "taking ANY tool call or response action",
|
|
4039
4495
|
reason: "Once an action is taken, it CANNOT be undone. NO EXCEPTIONS."
|
|
4040
4496
|
})
|
|
4041
4497
|
),
|
|
4042
4498
|
// Few-shot: Applying reasoning principles
|
|
4043
|
-
|
|
4499
|
+
fragment3(
|
|
4044
4500
|
"reasoning-examples",
|
|
4045
4501
|
example({
|
|
4046
4502
|
question: "Show me sales last month",
|
|
@@ -4080,7 +4536,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4080
4536
|
})
|
|
4081
4537
|
),
|
|
4082
4538
|
// Schema adherence - consolidated into clear rules
|
|
4083
|
-
|
|
4539
|
+
fragment3(
|
|
4084
4540
|
"schema_adherence",
|
|
4085
4541
|
hint2(
|
|
4086
4542
|
"Use only tables and columns from the schema. For unspecified columns, use SELECT *. When showing related items, include IDs and requested details."
|
|
@@ -4089,7 +4545,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4089
4545
|
'"Show" means list items; "count" or "total" means aggregate. Use canonical values verbatim for filtering.'
|
|
4090
4546
|
)
|
|
4091
4547
|
),
|
|
4092
|
-
|
|
4548
|
+
fragment3(
|
|
4093
4549
|
"Column statistics",
|
|
4094
4550
|
explain({
|
|
4095
4551
|
concept: "nDistinct in column stats",
|
|
@@ -4110,7 +4566,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4110
4566
|
"Use JOINs based on schema relationships. Favor PK/indexed columns; follow relationship metadata for direction and cardinality."
|
|
4111
4567
|
),
|
|
4112
4568
|
// Aggregations - explain the concepts
|
|
4113
|
-
|
|
4569
|
+
fragment3(
|
|
4114
4570
|
"Aggregations",
|
|
4115
4571
|
hint2(
|
|
4116
4572
|
"Apply COUNT, SUM, AVG when the question implies summarization. Use window functions for ranking, running totals, or row comparisons."
|
|
@@ -4122,7 +4578,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4122
4578
|
})
|
|
4123
4579
|
),
|
|
4124
4580
|
// Query semantics - explain concepts and document quirks
|
|
4125
|
-
|
|
4581
|
+
fragment3(
|
|
4126
4582
|
"Query interpretation",
|
|
4127
4583
|
explain({
|
|
4128
4584
|
concept: "threshold language",
|
|
@@ -4150,7 +4606,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4150
4606
|
prefer: "Concise, business-friendly summaries with key comparisons and helpful follow-ups."
|
|
4151
4607
|
}),
|
|
4152
4608
|
// Safety guardrails - consolidated
|
|
4153
|
-
|
|
4609
|
+
fragment3(
|
|
4154
4610
|
"Query safety",
|
|
4155
4611
|
guardrail({
|
|
4156
4612
|
rule: "Generate only valid, executable SELECT/WITH statements.",
|
|
@@ -4236,7 +4692,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4236
4692
|
],
|
|
4237
4693
|
notes: "If reference is ambiguous, ask which previous result or entity the user means."
|
|
4238
4694
|
}),
|
|
4239
|
-
|
|
4695
|
+
fragment3(
|
|
4240
4696
|
"Bash tool usage",
|
|
4241
4697
|
workflow({
|
|
4242
4698
|
task: "Query execution",
|
|
@@ -4317,8 +4773,7 @@ var Text2Sql = class {
|
|
|
4317
4773
|
const result = await toSql({
|
|
4318
4774
|
input,
|
|
4319
4775
|
adapter: this.#config.adapter,
|
|
4320
|
-
schemaFragments,
|
|
4321
|
-
instructions: [],
|
|
4776
|
+
fragments: schemaFragments,
|
|
4322
4777
|
model: this.#config.model
|
|
4323
4778
|
});
|
|
4324
4779
|
return result.sql;
|