@deepagents/text2sql 0.20.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +655 -173
- package/dist/index.js.map +3 -3
- package/dist/lib/adapters/bigquery/bigquery-fk.d.ts +25 -0
- package/dist/lib/adapters/bigquery/bigquery-fk.d.ts.map +1 -0
- package/dist/lib/adapters/bigquery/constraint.bigquery.grounding.d.ts +3 -1
- package/dist/lib/adapters/bigquery/constraint.bigquery.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/bigquery/index.d.ts.map +1 -1
- package/dist/lib/adapters/bigquery/index.js +356 -201
- package/dist/lib/adapters/bigquery/index.js.map +4 -4
- package/dist/lib/adapters/bigquery/indexes.bigquery.grounding.d.ts +6 -7
- package/dist/lib/adapters/bigquery/indexes.bigquery.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/bigquery/info.bigquery.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/bigquery/row-count.bigquery.grounding.d.ts +5 -7
- package/dist/lib/adapters/bigquery/row-count.bigquery.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/bigquery/table.bigquery.grounding.d.ts +2 -0
- package/dist/lib/adapters/bigquery/table.bigquery.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/bigquery/view.bigquery.grounding.d.ts +2 -2
- package/dist/lib/adapters/bigquery/view.bigquery.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/context.d.ts +2 -0
- package/dist/lib/adapters/groundings/context.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/index.js +2 -1
- package/dist/lib/adapters/groundings/index.js.map +2 -2
- package/dist/lib/adapters/mysql/index.js +2 -1
- package/dist/lib/adapters/mysql/index.js.map +2 -2
- package/dist/lib/adapters/postgres/index.js +2 -1
- package/dist/lib/adapters/postgres/index.js.map +2 -2
- package/dist/lib/adapters/spreadsheet/index.js +2 -1
- package/dist/lib/adapters/spreadsheet/index.js.map +2 -2
- package/dist/lib/adapters/sqlite/index.js +2 -1
- package/dist/lib/adapters/sqlite/index.js.map +2 -2
- package/dist/lib/adapters/sqlserver/index.js +2 -1
- package/dist/lib/adapters/sqlserver/index.js.map +2 -2
- package/dist/lib/agents/result-tools.d.ts.map +1 -1
- package/dist/lib/agents/sql.agent.d.ts.map +1 -1
- package/dist/lib/sql.d.ts +3 -3
- package/dist/lib/sql.d.ts.map +1 -1
- package/dist/lib/synthesis/index.js +220 -109
- package/dist/lib/synthesis/index.js.map +3 -3
- package/package.json +8 -7
package/dist/index.js
CHANGED
|
@@ -98,7 +98,8 @@ function createGroundingContext() {
|
|
|
98
98
|
tables: [],
|
|
99
99
|
views: [],
|
|
100
100
|
relationships: [],
|
|
101
|
-
info: void 0
|
|
101
|
+
info: void 0,
|
|
102
|
+
cache: /* @__PURE__ */ new Map()
|
|
102
103
|
};
|
|
103
104
|
}
|
|
104
105
|
|
|
@@ -516,8 +517,8 @@ var sqlValidationMarker = Symbol("SQLValidationError");
|
|
|
516
517
|
var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
|
|
517
518
|
var SQLValidationError = class _SQLValidationError extends Error {
|
|
518
519
|
[sqlValidationMarker];
|
|
519
|
-
constructor(
|
|
520
|
-
super(
|
|
520
|
+
constructor(message) {
|
|
521
|
+
super(message);
|
|
521
522
|
this.name = "SQLValidationError";
|
|
522
523
|
this[sqlValidationMarker] = true;
|
|
523
524
|
}
|
|
@@ -527,8 +528,8 @@ var SQLValidationError = class _SQLValidationError extends Error {
|
|
|
527
528
|
};
|
|
528
529
|
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
529
530
|
[unanswerableSqlMarker];
|
|
530
|
-
constructor(
|
|
531
|
-
super(
|
|
531
|
+
constructor(message) {
|
|
532
|
+
super(message);
|
|
532
533
|
this.name = "UnanswerableSQLError";
|
|
533
534
|
this[unanswerableSqlMarker] = true;
|
|
534
535
|
}
|
|
@@ -685,6 +686,14 @@ var BLOCKED_DB_CLIENT_COMMANDS = /* @__PURE__ */ new Set([
|
|
|
685
686
|
]);
|
|
686
687
|
var BLOCKED_RAW_SQL_COMMANDS = /* @__PURE__ */ new Set(["select", "with"]);
|
|
687
688
|
var ALLOWED_SQL_PROXY_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "validate"]);
|
|
689
|
+
var SHELL_INTERPRETER_COMMANDS = /* @__PURE__ */ new Set([
|
|
690
|
+
"bash",
|
|
691
|
+
"sh",
|
|
692
|
+
"zsh",
|
|
693
|
+
"dash",
|
|
694
|
+
"ksh"
|
|
695
|
+
]);
|
|
696
|
+
var WRAPPER_COMMANDS = /* @__PURE__ */ new Set(["env", "command", "eval"]);
|
|
688
697
|
var SQL_PROXY_ENFORCEMENT_MESSAGE = [
|
|
689
698
|
"Direct database querying through bash is blocked.",
|
|
690
699
|
"Use SQL proxy commands in this order:",
|
|
@@ -740,82 +749,94 @@ function isScriptNode(value) {
|
|
|
740
749
|
const node = value;
|
|
741
750
|
return node.type === "Script" && Array.isArray(node.statements);
|
|
742
751
|
}
|
|
743
|
-
function scriptContainsBlockedCommand(script, context) {
|
|
744
|
-
return statementsContainBlockedCommand(script.statements, context);
|
|
752
|
+
function scriptContainsBlockedCommand(script, context, mode = "blocked-only") {
|
|
753
|
+
return statementsContainBlockedCommand(script.statements, context, mode);
|
|
745
754
|
}
|
|
746
|
-
function statementsContainBlockedCommand(statements, context) {
|
|
755
|
+
function statementsContainBlockedCommand(statements, context, mode) {
|
|
747
756
|
for (const statement of statements) {
|
|
748
|
-
if (statementContainsBlockedCommand(statement, context)) {
|
|
757
|
+
if (statementContainsBlockedCommand(statement, context, mode)) {
|
|
749
758
|
return true;
|
|
750
759
|
}
|
|
751
760
|
}
|
|
752
761
|
return false;
|
|
753
762
|
}
|
|
754
|
-
function statementContainsBlockedCommand(statement, context) {
|
|
763
|
+
function statementContainsBlockedCommand(statement, context, mode) {
|
|
755
764
|
for (const pipeline of statement.pipelines) {
|
|
756
|
-
if (pipelineContainsBlockedCommand(pipeline, context)) {
|
|
765
|
+
if (pipelineContainsBlockedCommand(pipeline, context, mode)) {
|
|
757
766
|
return true;
|
|
758
767
|
}
|
|
759
768
|
}
|
|
760
769
|
return false;
|
|
761
770
|
}
|
|
762
|
-
function pipelineContainsBlockedCommand(pipeline, context) {
|
|
763
|
-
for (const command of pipeline.commands) {
|
|
771
|
+
function pipelineContainsBlockedCommand(pipeline, context, mode) {
|
|
772
|
+
for (const [index2, command] of pipeline.commands.entries()) {
|
|
764
773
|
if (command.type === "FunctionDef") {
|
|
765
774
|
context.functionDefinitions.set(command.name, command);
|
|
766
775
|
continue;
|
|
767
776
|
}
|
|
768
|
-
if (commandContainsBlockedCommand(command, context
|
|
777
|
+
if (commandContainsBlockedCommand(command, context, mode, {
|
|
778
|
+
stdinFromPipe: index2 > 0
|
|
779
|
+
})) {
|
|
769
780
|
return true;
|
|
770
781
|
}
|
|
771
782
|
}
|
|
772
783
|
return false;
|
|
773
784
|
}
|
|
774
|
-
function stringCommandContainsBlockedCommand(command, context) {
|
|
785
|
+
function stringCommandContainsBlockedCommand(command, context, mode = "blocked-only") {
|
|
775
786
|
let script;
|
|
776
787
|
try {
|
|
777
788
|
script = parse(command);
|
|
778
789
|
} catch {
|
|
779
790
|
return false;
|
|
780
791
|
}
|
|
781
|
-
return scriptContainsBlockedCommand(
|
|
792
|
+
return scriptContainsBlockedCommand(
|
|
793
|
+
script,
|
|
794
|
+
cloneInspectionContext(context),
|
|
795
|
+
mode
|
|
796
|
+
);
|
|
782
797
|
}
|
|
783
|
-
function wordContainsBlockedCommand(word, context) {
|
|
798
|
+
function wordContainsBlockedCommand(word, context, mode) {
|
|
784
799
|
if (!word) {
|
|
785
800
|
return false;
|
|
786
801
|
}
|
|
787
802
|
return wordPartContainsBlockedCommand(
|
|
788
803
|
word.parts,
|
|
789
|
-
context
|
|
804
|
+
context,
|
|
805
|
+
mode
|
|
790
806
|
);
|
|
791
807
|
}
|
|
792
|
-
function wordPartContainsBlockedCommand(parts, context) {
|
|
808
|
+
function wordPartContainsBlockedCommand(parts, context, mode) {
|
|
793
809
|
for (const part of parts) {
|
|
794
|
-
if (partContainsBlockedCommand(part, context)) {
|
|
810
|
+
if (partContainsBlockedCommand(part, context, mode)) {
|
|
795
811
|
return true;
|
|
796
812
|
}
|
|
797
813
|
}
|
|
798
814
|
return false;
|
|
799
815
|
}
|
|
800
|
-
function partContainsBlockedCommand(node, context) {
|
|
816
|
+
function partContainsBlockedCommand(node, context, mode) {
|
|
801
817
|
const type = node.type;
|
|
802
818
|
if (type === "CommandSubstitution" || type === "ProcessSubstitution") {
|
|
803
819
|
if (isScriptNode(node.body)) {
|
|
804
820
|
return scriptContainsBlockedCommand(
|
|
805
821
|
node.body,
|
|
806
|
-
cloneInspectionContext(context)
|
|
822
|
+
cloneInspectionContext(context),
|
|
823
|
+
mode
|
|
807
824
|
);
|
|
808
825
|
}
|
|
809
826
|
return false;
|
|
810
827
|
}
|
|
811
828
|
if (type === "ArithCommandSubst" && typeof node.command === "string") {
|
|
812
|
-
return stringCommandContainsBlockedCommand(node.command, context);
|
|
829
|
+
return stringCommandContainsBlockedCommand(node.command, context, mode);
|
|
813
830
|
}
|
|
814
831
|
for (const value of Object.values(node)) {
|
|
815
832
|
if (Array.isArray(value)) {
|
|
816
833
|
for (const item of value) {
|
|
817
834
|
if (typeof item === "object" && item !== null) {
|
|
818
|
-
if (partContainsBlockedCommand(
|
|
835
|
+
if (partContainsBlockedCommand(
|
|
836
|
+
item,
|
|
837
|
+
context,
|
|
838
|
+
mode
|
|
839
|
+
)) {
|
|
819
840
|
return true;
|
|
820
841
|
}
|
|
821
842
|
}
|
|
@@ -823,14 +844,18 @@ function partContainsBlockedCommand(node, context) {
|
|
|
823
844
|
continue;
|
|
824
845
|
}
|
|
825
846
|
if (typeof value === "object" && value !== null) {
|
|
826
|
-
if (partContainsBlockedCommand(
|
|
847
|
+
if (partContainsBlockedCommand(
|
|
848
|
+
value,
|
|
849
|
+
context,
|
|
850
|
+
mode
|
|
851
|
+
)) {
|
|
827
852
|
return true;
|
|
828
853
|
}
|
|
829
854
|
}
|
|
830
855
|
}
|
|
831
856
|
return false;
|
|
832
857
|
}
|
|
833
|
-
function functionInvocationContainsBlockedCommand(functionName, context) {
|
|
858
|
+
function functionInvocationContainsBlockedCommand(functionName, context, mode) {
|
|
834
859
|
const definition = context.functionDefinitions.get(functionName);
|
|
835
860
|
if (!definition) {
|
|
836
861
|
return false;
|
|
@@ -840,52 +865,306 @@ function functionInvocationContainsBlockedCommand(functionName, context) {
|
|
|
840
865
|
}
|
|
841
866
|
const invocationContext = cloneInspectionContext(context);
|
|
842
867
|
invocationContext.callStack.add(functionName);
|
|
843
|
-
return commandContainsBlockedCommand(
|
|
868
|
+
return commandContainsBlockedCommand(
|
|
869
|
+
definition.body,
|
|
870
|
+
invocationContext,
|
|
871
|
+
mode,
|
|
872
|
+
{ stdinFromPipe: false }
|
|
873
|
+
);
|
|
874
|
+
}
|
|
875
|
+
function isAsciiLetter(character) {
|
|
876
|
+
const charCode = character.charCodeAt(0);
|
|
877
|
+
return charCode >= 65 && charCode <= 90 || charCode >= 97 && charCode <= 122;
|
|
878
|
+
}
|
|
879
|
+
function isAsciiDigit(character) {
|
|
880
|
+
const charCode = character.charCodeAt(0);
|
|
881
|
+
return charCode >= 48 && charCode <= 57;
|
|
882
|
+
}
|
|
883
|
+
function isValidEnvVariableName(name) {
|
|
884
|
+
if (!name) {
|
|
885
|
+
return false;
|
|
886
|
+
}
|
|
887
|
+
const firstChar = name[0];
|
|
888
|
+
if (!(isAsciiLetter(firstChar) || firstChar === "_")) {
|
|
889
|
+
return false;
|
|
890
|
+
}
|
|
891
|
+
for (let index2 = 1; index2 < name.length; index2 += 1) {
|
|
892
|
+
const char = name[index2];
|
|
893
|
+
if (!(isAsciiLetter(char) || isAsciiDigit(char) || char === "_")) {
|
|
894
|
+
return false;
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
return true;
|
|
898
|
+
}
|
|
899
|
+
function isEnvAssignmentToken(token) {
|
|
900
|
+
const separatorIndex = token.indexOf("=");
|
|
901
|
+
if (separatorIndex <= 0) {
|
|
902
|
+
return false;
|
|
903
|
+
}
|
|
904
|
+
return isValidEnvVariableName(token.slice(0, separatorIndex));
|
|
905
|
+
}
|
|
906
|
+
function parseShortOptionCluster(option) {
|
|
907
|
+
if (!option.startsWith("-") || option.startsWith("--") || option.length <= 1) {
|
|
908
|
+
return {
|
|
909
|
+
valid: false,
|
|
910
|
+
hasCommandFlag: false,
|
|
911
|
+
hasStdinFlag: false,
|
|
912
|
+
consumesNextArg: false
|
|
913
|
+
};
|
|
914
|
+
}
|
|
915
|
+
let hasCommandFlag = false;
|
|
916
|
+
let hasStdinFlag = false;
|
|
917
|
+
let consumesNextArg = false;
|
|
918
|
+
for (let index2 = 1; index2 < option.length; index2 += 1) {
|
|
919
|
+
const char = option[index2];
|
|
920
|
+
if (!isAsciiLetter(char)) {
|
|
921
|
+
return {
|
|
922
|
+
valid: false,
|
|
923
|
+
hasCommandFlag: false,
|
|
924
|
+
hasStdinFlag: false,
|
|
925
|
+
consumesNextArg: false
|
|
926
|
+
};
|
|
927
|
+
}
|
|
928
|
+
if (char === "c") {
|
|
929
|
+
hasCommandFlag = true;
|
|
930
|
+
} else if (char === "s") {
|
|
931
|
+
hasStdinFlag = true;
|
|
932
|
+
} else if (char === "O" || char === "o") {
|
|
933
|
+
consumesNextArg = true;
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
return { valid: true, hasCommandFlag, hasStdinFlag, consumesNextArg };
|
|
937
|
+
}
|
|
938
|
+
function getShellInvocationDescriptor(args) {
|
|
939
|
+
let readsFromStdin = false;
|
|
940
|
+
const longOptionsWithValue = /* @__PURE__ */ new Set(["--rcfile", "--init-file"]);
|
|
941
|
+
for (let index2 = 0; index2 < args.length; index2 += 1) {
|
|
942
|
+
const token = asStaticWordText(args[index2]);
|
|
943
|
+
if (token == null) {
|
|
944
|
+
return { kind: "unknown", payload: null };
|
|
945
|
+
}
|
|
946
|
+
if (token === "--") {
|
|
947
|
+
if (index2 + 1 >= args.length) {
|
|
948
|
+
break;
|
|
949
|
+
}
|
|
950
|
+
return {
|
|
951
|
+
kind: "script",
|
|
952
|
+
payload: asStaticWordText(args[index2 + 1])
|
|
953
|
+
};
|
|
954
|
+
}
|
|
955
|
+
if (token === "--command") {
|
|
956
|
+
return {
|
|
957
|
+
kind: "command",
|
|
958
|
+
payload: asStaticWordText(args[index2 + 1])
|
|
959
|
+
};
|
|
960
|
+
}
|
|
961
|
+
if (token.startsWith("--command=")) {
|
|
962
|
+
return {
|
|
963
|
+
kind: "command",
|
|
964
|
+
payload: token.slice("--command=".length)
|
|
965
|
+
};
|
|
966
|
+
}
|
|
967
|
+
if (token.startsWith("--")) {
|
|
968
|
+
if (token.includes("=")) {
|
|
969
|
+
continue;
|
|
970
|
+
}
|
|
971
|
+
if (longOptionsWithValue.has(token)) {
|
|
972
|
+
if (index2 + 1 >= args.length) {
|
|
973
|
+
return { kind: "unknown", payload: null };
|
|
974
|
+
}
|
|
975
|
+
index2 += 1;
|
|
976
|
+
}
|
|
977
|
+
continue;
|
|
978
|
+
}
|
|
979
|
+
if (token.startsWith("-") && !token.startsWith("--")) {
|
|
980
|
+
const parsed = parseShortOptionCluster(token);
|
|
981
|
+
if (!parsed.valid) {
|
|
982
|
+
return { kind: "unknown", payload: null };
|
|
983
|
+
}
|
|
984
|
+
if (parsed.hasCommandFlag) {
|
|
985
|
+
return {
|
|
986
|
+
kind: "command",
|
|
987
|
+
payload: asStaticWordText(args[index2 + 1])
|
|
988
|
+
};
|
|
989
|
+
}
|
|
990
|
+
if (parsed.hasStdinFlag) {
|
|
991
|
+
readsFromStdin = true;
|
|
992
|
+
}
|
|
993
|
+
if (parsed.consumesNextArg) {
|
|
994
|
+
if (index2 + 1 >= args.length) {
|
|
995
|
+
return { kind: "unknown", payload: null };
|
|
996
|
+
}
|
|
997
|
+
index2 += 1;
|
|
998
|
+
}
|
|
999
|
+
continue;
|
|
1000
|
+
}
|
|
1001
|
+
return {
|
|
1002
|
+
kind: "script",
|
|
1003
|
+
payload: token
|
|
1004
|
+
};
|
|
1005
|
+
}
|
|
1006
|
+
if (readsFromStdin) {
|
|
1007
|
+
return { kind: "stdin", payload: null };
|
|
1008
|
+
}
|
|
1009
|
+
return { kind: "none", payload: null };
|
|
1010
|
+
}
|
|
1011
|
+
function getHereDocPayload(redirections) {
|
|
1012
|
+
const payloads = [];
|
|
1013
|
+
for (const redirection of redirections) {
|
|
1014
|
+
if (redirection.target.type !== "HereDoc") {
|
|
1015
|
+
continue;
|
|
1016
|
+
}
|
|
1017
|
+
if (!redirection.target.content) {
|
|
1018
|
+
payloads.push("");
|
|
1019
|
+
continue;
|
|
1020
|
+
}
|
|
1021
|
+
const payload = asStaticWordText(redirection.target.content);
|
|
1022
|
+
if (payload == null) {
|
|
1023
|
+
return { hasHereDoc: true, payload: null };
|
|
1024
|
+
}
|
|
1025
|
+
payloads.push(payload);
|
|
1026
|
+
}
|
|
1027
|
+
if (payloads.length === 0) {
|
|
1028
|
+
return { hasHereDoc: false, payload: null };
|
|
1029
|
+
}
|
|
1030
|
+
return { hasHereDoc: true, payload: payloads.join("\n") };
|
|
1031
|
+
}
|
|
1032
|
+
function joinStaticWords(words) {
|
|
1033
|
+
const tokens = [];
|
|
1034
|
+
for (const word of words) {
|
|
1035
|
+
const token = asStaticWordText(word);
|
|
1036
|
+
if (token == null) {
|
|
1037
|
+
return null;
|
|
1038
|
+
}
|
|
1039
|
+
tokens.push(token);
|
|
1040
|
+
}
|
|
1041
|
+
return tokens.join(" ");
|
|
1042
|
+
}
|
|
1043
|
+
function resolveEnvWrapperCommand(args) {
|
|
1044
|
+
let index2 = 0;
|
|
1045
|
+
while (index2 < args.length) {
|
|
1046
|
+
const token = asStaticWordText(args[index2]);
|
|
1047
|
+
if (token == null) {
|
|
1048
|
+
return { kind: "unknown" };
|
|
1049
|
+
}
|
|
1050
|
+
if (token === "--") {
|
|
1051
|
+
index2 += 1;
|
|
1052
|
+
break;
|
|
1053
|
+
}
|
|
1054
|
+
if (token === "-u" || token === "--unset" || token === "--chdir") {
|
|
1055
|
+
if (index2 + 1 >= args.length) {
|
|
1056
|
+
return { kind: "unknown" };
|
|
1057
|
+
}
|
|
1058
|
+
index2 += 2;
|
|
1059
|
+
continue;
|
|
1060
|
+
}
|
|
1061
|
+
if (token.startsWith("--unset=") || token.startsWith("--chdir=")) {
|
|
1062
|
+
index2 += 1;
|
|
1063
|
+
continue;
|
|
1064
|
+
}
|
|
1065
|
+
if (token.startsWith("-") && token !== "-" && !isEnvAssignmentToken(token)) {
|
|
1066
|
+
index2 += 1;
|
|
1067
|
+
continue;
|
|
1068
|
+
}
|
|
1069
|
+
if (isEnvAssignmentToken(token)) {
|
|
1070
|
+
index2 += 1;
|
|
1071
|
+
continue;
|
|
1072
|
+
}
|
|
1073
|
+
break;
|
|
1074
|
+
}
|
|
1075
|
+
if (index2 >= args.length) {
|
|
1076
|
+
return { kind: "none" };
|
|
1077
|
+
}
|
|
1078
|
+
return {
|
|
1079
|
+
kind: "resolved",
|
|
1080
|
+
name: args[index2],
|
|
1081
|
+
args: args.slice(index2 + 1)
|
|
1082
|
+
};
|
|
844
1083
|
}
|
|
845
|
-
function
|
|
1084
|
+
function resolveCommandWrapperCommand(args) {
|
|
1085
|
+
let index2 = 0;
|
|
1086
|
+
let lookupOnly = false;
|
|
1087
|
+
while (index2 < args.length) {
|
|
1088
|
+
const token = asStaticWordText(args[index2]);
|
|
1089
|
+
if (token == null) {
|
|
1090
|
+
return { kind: "unknown" };
|
|
1091
|
+
}
|
|
1092
|
+
if (token === "--") {
|
|
1093
|
+
index2 += 1;
|
|
1094
|
+
break;
|
|
1095
|
+
}
|
|
1096
|
+
if (token === "-v" || token === "-V") {
|
|
1097
|
+
lookupOnly = true;
|
|
1098
|
+
index2 += 1;
|
|
1099
|
+
continue;
|
|
1100
|
+
}
|
|
1101
|
+
if (token.startsWith("-") && token !== "-") {
|
|
1102
|
+
index2 += 1;
|
|
1103
|
+
continue;
|
|
1104
|
+
}
|
|
1105
|
+
break;
|
|
1106
|
+
}
|
|
1107
|
+
if (lookupOnly || index2 >= args.length) {
|
|
1108
|
+
return { kind: "none" };
|
|
1109
|
+
}
|
|
1110
|
+
return {
|
|
1111
|
+
kind: "resolved",
|
|
1112
|
+
name: args[index2],
|
|
1113
|
+
args: args.slice(index2 + 1)
|
|
1114
|
+
};
|
|
1115
|
+
}
|
|
1116
|
+
function commandContainsBlockedCommand(command, context, mode, options = { stdinFromPipe: false }) {
|
|
846
1117
|
switch (command.type) {
|
|
847
1118
|
case "SimpleCommand":
|
|
848
|
-
return isBlockedSimpleCommand(command, context);
|
|
1119
|
+
return isBlockedSimpleCommand(command, context, mode, options);
|
|
849
1120
|
case "If":
|
|
850
1121
|
return command.clauses.some(
|
|
851
1122
|
(clause) => statementsContainBlockedCommand(
|
|
852
1123
|
clause.condition,
|
|
853
|
-
cloneInspectionContext(context)
|
|
1124
|
+
cloneInspectionContext(context),
|
|
1125
|
+
mode
|
|
854
1126
|
) || statementsContainBlockedCommand(
|
|
855
1127
|
clause.body,
|
|
856
|
-
cloneInspectionContext(context)
|
|
1128
|
+
cloneInspectionContext(context),
|
|
1129
|
+
mode
|
|
857
1130
|
)
|
|
858
1131
|
) || (command.elseBody ? statementsContainBlockedCommand(
|
|
859
1132
|
command.elseBody,
|
|
860
|
-
cloneInspectionContext(context)
|
|
1133
|
+
cloneInspectionContext(context),
|
|
1134
|
+
mode
|
|
861
1135
|
) : false);
|
|
862
1136
|
case "For":
|
|
863
1137
|
case "CStyleFor":
|
|
864
1138
|
return statementsContainBlockedCommand(
|
|
865
1139
|
command.body,
|
|
866
|
-
cloneInspectionContext(context)
|
|
1140
|
+
cloneInspectionContext(context),
|
|
1141
|
+
mode
|
|
867
1142
|
);
|
|
868
1143
|
case "While":
|
|
869
1144
|
case "Until":
|
|
870
1145
|
return statementsContainBlockedCommand(
|
|
871
1146
|
command.condition,
|
|
872
|
-
cloneInspectionContext(context)
|
|
1147
|
+
cloneInspectionContext(context),
|
|
1148
|
+
mode
|
|
873
1149
|
) || statementsContainBlockedCommand(
|
|
874
1150
|
command.body,
|
|
875
|
-
cloneInspectionContext(context)
|
|
1151
|
+
cloneInspectionContext(context),
|
|
1152
|
+
mode
|
|
876
1153
|
);
|
|
877
1154
|
case "Case":
|
|
878
1155
|
return command.items.some(
|
|
879
1156
|
(item) => statementsContainBlockedCommand(
|
|
880
1157
|
item.body,
|
|
881
|
-
cloneInspectionContext(context)
|
|
1158
|
+
cloneInspectionContext(context),
|
|
1159
|
+
mode
|
|
882
1160
|
)
|
|
883
1161
|
);
|
|
884
1162
|
case "Subshell":
|
|
885
1163
|
case "Group":
|
|
886
1164
|
return statementsContainBlockedCommand(
|
|
887
1165
|
command.body,
|
|
888
|
-
cloneInspectionContext(context)
|
|
1166
|
+
cloneInspectionContext(context),
|
|
1167
|
+
mode
|
|
889
1168
|
);
|
|
890
1169
|
case "FunctionDef":
|
|
891
1170
|
return false;
|
|
@@ -898,16 +1177,16 @@ function commandContainsBlockedCommand(command, context) {
|
|
|
898
1177
|
}
|
|
899
1178
|
}
|
|
900
1179
|
}
|
|
901
|
-
function isBlockedSimpleCommand(command, context) {
|
|
902
|
-
if (wordContainsBlockedCommand(command.name, context)) {
|
|
1180
|
+
function isBlockedSimpleCommand(command, context, mode, options) {
|
|
1181
|
+
if (wordContainsBlockedCommand(command.name, context, mode)) {
|
|
903
1182
|
return true;
|
|
904
1183
|
}
|
|
905
|
-
if (command.args.some((arg) => wordContainsBlockedCommand(arg, context))) {
|
|
1184
|
+
if (command.args.some((arg) => wordContainsBlockedCommand(arg, context, mode))) {
|
|
906
1185
|
return true;
|
|
907
1186
|
}
|
|
908
1187
|
if (command.assignments.some(
|
|
909
|
-
(assignment) => wordContainsBlockedCommand(assignment.value, context) || (assignment.array?.some(
|
|
910
|
-
(value) => wordContainsBlockedCommand(value, context)
|
|
1188
|
+
(assignment) => wordContainsBlockedCommand(assignment.value, context, mode) || (assignment.array?.some(
|
|
1189
|
+
(value) => wordContainsBlockedCommand(value, context, mode)
|
|
911
1190
|
) ?? false)
|
|
912
1191
|
)) {
|
|
913
1192
|
return true;
|
|
@@ -916,11 +1195,16 @@ function isBlockedSimpleCommand(command, context) {
|
|
|
916
1195
|
if (redirection.target.type === "Word") {
|
|
917
1196
|
return wordContainsBlockedCommand(
|
|
918
1197
|
redirection.target,
|
|
919
|
-
context
|
|
1198
|
+
context,
|
|
1199
|
+
mode
|
|
920
1200
|
);
|
|
921
1201
|
}
|
|
922
1202
|
if (redirection.target.type === "HereDoc" && redirection.target.content) {
|
|
923
|
-
return wordContainsBlockedCommand(
|
|
1203
|
+
return wordContainsBlockedCommand(
|
|
1204
|
+
redirection.target.content,
|
|
1205
|
+
context,
|
|
1206
|
+
mode
|
|
1207
|
+
);
|
|
924
1208
|
}
|
|
925
1209
|
return false;
|
|
926
1210
|
})) {
|
|
@@ -939,9 +1223,92 @@ function isBlockedSimpleCommand(command, context) {
|
|
|
939
1223
|
}
|
|
940
1224
|
if (normalizedName === "sql") {
|
|
941
1225
|
const subcommand = asStaticWordText(command.args[0])?.toLowerCase();
|
|
942
|
-
|
|
1226
|
+
if (!subcommand) {
|
|
1227
|
+
return true;
|
|
1228
|
+
}
|
|
1229
|
+
if (mode === "block-all-sql") {
|
|
1230
|
+
return true;
|
|
1231
|
+
}
|
|
1232
|
+
return !ALLOWED_SQL_PROXY_SUBCOMMANDS.has(subcommand);
|
|
943
1233
|
}
|
|
944
|
-
|
|
1234
|
+
const inspectWrappedCommand = (resolved) => {
|
|
1235
|
+
if (resolved.kind === "none") {
|
|
1236
|
+
return false;
|
|
1237
|
+
}
|
|
1238
|
+
if (resolved.kind === "unknown" || !resolved.name || !resolved.args) {
|
|
1239
|
+
return true;
|
|
1240
|
+
}
|
|
1241
|
+
return isBlockedSimpleCommand(
|
|
1242
|
+
{
|
|
1243
|
+
name: resolved.name,
|
|
1244
|
+
args: resolved.args,
|
|
1245
|
+
assignments: [],
|
|
1246
|
+
redirections: []
|
|
1247
|
+
},
|
|
1248
|
+
context,
|
|
1249
|
+
"block-all-sql",
|
|
1250
|
+
options
|
|
1251
|
+
);
|
|
1252
|
+
};
|
|
1253
|
+
if (WRAPPER_COMMANDS.has(normalizedName)) {
|
|
1254
|
+
if (normalizedName === "env") {
|
|
1255
|
+
return inspectWrappedCommand(resolveEnvWrapperCommand(command.args));
|
|
1256
|
+
}
|
|
1257
|
+
if (normalizedName === "command") {
|
|
1258
|
+
return inspectWrappedCommand(resolveCommandWrapperCommand(command.args));
|
|
1259
|
+
}
|
|
1260
|
+
const evalScript = joinStaticWords(command.args);
|
|
1261
|
+
if (evalScript == null) {
|
|
1262
|
+
return true;
|
|
1263
|
+
}
|
|
1264
|
+
if (!evalScript.trim()) {
|
|
1265
|
+
return false;
|
|
1266
|
+
}
|
|
1267
|
+
return stringCommandContainsBlockedCommand(
|
|
1268
|
+
evalScript,
|
|
1269
|
+
context,
|
|
1270
|
+
"block-all-sql"
|
|
1271
|
+
);
|
|
1272
|
+
}
|
|
1273
|
+
if (SHELL_INTERPRETER_COMMANDS.has(normalizedName)) {
|
|
1274
|
+
const shellInvocation = getShellInvocationDescriptor(command.args);
|
|
1275
|
+
if (shellInvocation.kind === "unknown") {
|
|
1276
|
+
return true;
|
|
1277
|
+
}
|
|
1278
|
+
if (shellInvocation.kind === "command") {
|
|
1279
|
+
if (!shellInvocation.payload) {
|
|
1280
|
+
return true;
|
|
1281
|
+
}
|
|
1282
|
+
if (stringCommandContainsBlockedCommand(
|
|
1283
|
+
shellInvocation.payload,
|
|
1284
|
+
context,
|
|
1285
|
+
"block-all-sql"
|
|
1286
|
+
)) {
|
|
1287
|
+
return true;
|
|
1288
|
+
}
|
|
1289
|
+
return false;
|
|
1290
|
+
}
|
|
1291
|
+
const hereDoc = getHereDocPayload(command.redirections);
|
|
1292
|
+
if (hereDoc.hasHereDoc) {
|
|
1293
|
+
if (hereDoc.payload == null) {
|
|
1294
|
+
return true;
|
|
1295
|
+
}
|
|
1296
|
+
if (hereDoc.payload.trim().length > 0 && stringCommandContainsBlockedCommand(
|
|
1297
|
+
hereDoc.payload,
|
|
1298
|
+
context,
|
|
1299
|
+
"block-all-sql"
|
|
1300
|
+
)) {
|
|
1301
|
+
return true;
|
|
1302
|
+
}
|
|
1303
|
+
}
|
|
1304
|
+
if (shellInvocation.kind === "script") {
|
|
1305
|
+
return true;
|
|
1306
|
+
}
|
|
1307
|
+
if (options.stdinFromPipe || shellInvocation.kind === "stdin") {
|
|
1308
|
+
return !hereDoc.hasHereDoc;
|
|
1309
|
+
}
|
|
1310
|
+
}
|
|
1311
|
+
if (functionInvocationContainsBlockedCommand(commandName, context, mode)) {
|
|
945
1312
|
return true;
|
|
946
1313
|
}
|
|
947
1314
|
return false;
|
|
@@ -1073,11 +1440,15 @@ import "@deepagents/agent";
|
|
|
1073
1440
|
import {
|
|
1074
1441
|
ContextEngine as ContextEngine2,
|
|
1075
1442
|
InMemoryContextStore as InMemoryContextStore2,
|
|
1443
|
+
example,
|
|
1076
1444
|
fragment as fragment2,
|
|
1445
|
+
guardrail,
|
|
1446
|
+
hint as hint2,
|
|
1077
1447
|
persona as persona3,
|
|
1078
1448
|
policy,
|
|
1079
1449
|
structuredOutput as structuredOutput2,
|
|
1080
|
-
user as user2
|
|
1450
|
+
user as user2,
|
|
1451
|
+
workflow
|
|
1081
1452
|
} from "@deepagents/context";
|
|
1082
1453
|
var RETRY_TEMPERATURES = [0, 0.2, 0.3];
|
|
1083
1454
|
var SQL_AGENT_ROLE = "Expert SQL query generator.";
|
|
@@ -1087,94 +1458,201 @@ var SQL_AGENT_POLICIES = [
|
|
|
1087
1458
|
"schema_mapping",
|
|
1088
1459
|
policy({
|
|
1089
1460
|
rule: "Translate natural language into precise SQL grounded in available schema entities."
|
|
1461
|
+
}),
|
|
1462
|
+
hint2("Preserve schema spelling exactly, including typos in column names.")
|
|
1463
|
+
),
|
|
1464
|
+
fragment2(
|
|
1465
|
+
"projection_minimality",
|
|
1466
|
+
policy({
|
|
1467
|
+
rule: "Return only columns requested by the question; do not add helper columns unless explicitly requested."
|
|
1468
|
+
}),
|
|
1469
|
+
policy({
|
|
1470
|
+
rule: 'For requests of the form "X sorted/ordered by Y", project X only unless Y is explicitly requested as an output field.'
|
|
1471
|
+
}),
|
|
1472
|
+
policy({
|
|
1473
|
+
rule: "Prefer selecting schema columns directly without derived expressions when direct selection answers the request."
|
|
1474
|
+
}),
|
|
1475
|
+
hint2(
|
|
1476
|
+
"Do not include ORDER BY, GROUP BY, or JOIN helper columns in SELECT output unless the question explicitly asks for them."
|
|
1477
|
+
),
|
|
1478
|
+
policy({
|
|
1479
|
+
rule: "Use DISTINCT only when uniqueness is explicitly requested (for example distinct/unique/different/no duplicates)."
|
|
1480
|
+
}),
|
|
1481
|
+
hint2(
|
|
1482
|
+
'Do not infer DISTINCT from generic wording such as "some", plural nouns, or entity-set phrasing; for transactional/attendance-style tables, default to raw rows unless uniqueness is explicitly requested.'
|
|
1483
|
+
)
|
|
1484
|
+
),
|
|
1485
|
+
fragment2(
|
|
1486
|
+
"date_transform_safety",
|
|
1487
|
+
policy({
|
|
1488
|
+
rule: "Do not assume VARCHAR/TEXT values are parseable dates. Avoid date extraction functions on text columns by default."
|
|
1489
|
+
}),
|
|
1490
|
+
policy({
|
|
1491
|
+
rule: "Use date-part extraction only when both conditions hold: the question explicitly asks for transformation and schema values require transformation to produce that unit."
|
|
1492
|
+
}),
|
|
1493
|
+
hint2(
|
|
1494
|
+
"Do not apply SUBSTR, STRFTIME, DATE_PART, YEAR, or similar extraction functions unless the question explicitly asks for transformation and schema values require it."
|
|
1495
|
+
),
|
|
1496
|
+
hint2(
|
|
1497
|
+
"If a column already represents the requested concept (for example a stored year-like value), use the column as-is."
|
|
1498
|
+
)
|
|
1499
|
+
),
|
|
1500
|
+
fragment2(
|
|
1501
|
+
"sql_minimality",
|
|
1502
|
+
guardrail({
|
|
1503
|
+
rule: "Never hallucinate tables or columns.",
|
|
1504
|
+
reason: "Schema fidelity is required.",
|
|
1505
|
+
action: "Use only available schema entities."
|
|
1506
|
+
}),
|
|
1507
|
+
guardrail({
|
|
1508
|
+
rule: "Avoid unnecessary transformations and derived projections.",
|
|
1509
|
+
reason: "Extra transformations frequently change semantics and reduce correctness.",
|
|
1510
|
+
action: "Do not add date parsing, substring extraction, or derived columns unless explicitly required by the question or schema."
|
|
1511
|
+
})
|
|
1512
|
+
),
|
|
1513
|
+
fragment2(
|
|
1514
|
+
"preflight_checklist",
|
|
1515
|
+
workflow({
|
|
1516
|
+
task: "Final SQL preflight before returning output",
|
|
1517
|
+
steps: [
|
|
1518
|
+
"Verify selected columns match the question and remove unrequested helper projections.",
|
|
1519
|
+
"If aggregate values are used only for ranking/filtering, keep them out of SELECT unless explicitly requested.",
|
|
1520
|
+
"Prefer raw schema columns over derived expressions when raw columns already satisfy the request.",
|
|
1521
|
+
"If a candidate query uses STRFTIME, SUBSTR, DATE_PART, YEAR, or similar extraction on text-like columns, remove that transformation unless explicitly required by the question.",
|
|
1522
|
+
"Return only schema-grounded SQL using existing tables and columns."
|
|
1523
|
+
]
|
|
1524
|
+
})
|
|
1525
|
+
),
|
|
1526
|
+
fragment2(
|
|
1527
|
+
"set_semantics",
|
|
1528
|
+
policy({
|
|
1529
|
+
rule: "For questions asking where both condition A and condition B hold over an attribute, compute the intersection of qualifying sets for that attribute."
|
|
1530
|
+
}),
|
|
1531
|
+
policy({
|
|
1532
|
+
rule: "Do not force the same entity instance to satisfy both conditions unless the question explicitly requests the same person/row/entity."
|
|
1533
|
+
}),
|
|
1534
|
+
hint2(
|
|
1535
|
+
"Prefer INTERSECT (or logically equivalent set-based shape) over requiring the same physical row/entity to satisfy both conditions unless explicitly requested."
|
|
1536
|
+
),
|
|
1537
|
+
hint2(
|
|
1538
|
+
"When two conditions describe different row groups whose shared attribute is requested, build each group separately and intersect the attribute values."
|
|
1539
|
+
),
|
|
1540
|
+
hint2(
|
|
1541
|
+
"Do not collapse cross-group conditions into a single-row AND predicate when the intent is shared values across groups."
|
|
1542
|
+
),
|
|
1543
|
+
policy({
|
|
1544
|
+
rule: "If two predicates on the same field cannot both be true for one row, do not combine them with AND; use set operations across separate filtered subsets when shared values are requested."
|
|
1545
|
+
})
|
|
1546
|
+
),
|
|
1547
|
+
fragment2(
|
|
1548
|
+
"predicate_column_alignment",
|
|
1549
|
+
policy({
|
|
1550
|
+
rule: "Match literal values to semantically compatible columns. Do not compare descriptive names to identifier columns."
|
|
1551
|
+
}),
|
|
1552
|
+
hint2(
|
|
1553
|
+
"When a filter value is a descriptive label (for example a department name), join through the lookup table and filter on its name/title column, not on *_id columns."
|
|
1554
|
+
),
|
|
1555
|
+
hint2(
|
|
1556
|
+
"When relation roles are explicit in wording (for example host/home/source/destination), prefer foreign keys with matching role qualifiers over generic similarly named columns."
|
|
1557
|
+
),
|
|
1558
|
+
policy({
|
|
1559
|
+
rule: "When multiple foreign-key candidates exist, select the column whose qualifier best matches the relationship described in the question."
|
|
1560
|
+
}),
|
|
1561
|
+
policy({
|
|
1562
|
+
rule: "For hosting/held semantics, prefer host_* relationship columns when available over generic *_id alternatives."
|
|
1563
|
+
}),
|
|
1564
|
+
hint2(
|
|
1565
|
+
'Interpret wording like "held/hosted a competition or event" as a hosting relationship and map to host_* foreign keys when present.'
|
|
1566
|
+
),
|
|
1567
|
+
policy({
|
|
1568
|
+
rule: "Do not compare descriptive labels or names to *_id columns; join to the table containing the descriptive field and filter there."
|
|
1569
|
+
}),
|
|
1570
|
+
policy({
|
|
1571
|
+
rule: "Keep numeric identifiers unquoted when used as numeric equality filters unless schema indicates text identifiers."
|
|
1572
|
+
}),
|
|
1573
|
+
policy({
|
|
1574
|
+
rule: "When filtering by a descriptive label value and a related table exposes a corresponding *_name or title column, join to that table and filter on the descriptive column."
|
|
1575
|
+
})
|
|
1576
|
+
),
|
|
1577
|
+
fragment2(
|
|
1578
|
+
"ordering_semantics",
|
|
1579
|
+
policy({
|
|
1580
|
+
rule: "Respect explicit sort direction terms. If direction is not specified, use ascending order unless a superlative intent (most/least/highest/lowest) implies direction."
|
|
1581
|
+
}),
|
|
1582
|
+
policy({
|
|
1583
|
+
rule: "When ranking categories by frequency, use COUNT for ordering but keep output focused on requested category fields unless counts are explicitly requested."
|
|
1584
|
+
}),
|
|
1585
|
+
policy({
|
|
1586
|
+
rule: "Do not use DESC unless descending direction is explicit or a superlative intent requires descending ranking."
|
|
1587
|
+
}),
|
|
1588
|
+
policy({
|
|
1589
|
+
rule: 'For "most common/frequent <attribute>" requests, return the attribute value(s) only; use counts only for ordering/filtering unless the question explicitly asks to return counts.'
|
|
1590
|
+
}),
|
|
1591
|
+
hint2(
|
|
1592
|
+
'Use DESC with LIMIT 1 for "most/highest/largest"; use ASC with LIMIT 1 for "least/lowest/smallest".'
|
|
1593
|
+
)
|
|
1594
|
+
),
|
|
1595
|
+
fragment2(
|
|
1596
|
+
"negative_membership_queries",
|
|
1597
|
+
policy({
|
|
1598
|
+
rule: "For requests asking entities that did not participate/host/appear in related records, prefer NOT IN or NOT EXISTS against the related foreign-key set."
|
|
1599
|
+
}),
|
|
1600
|
+
hint2(
|
|
1601
|
+
"Map role-bearing relationship columns carefully (for example host_* foreign keys for hosting relationships) instead of generic IDs when role wording is explicit."
|
|
1602
|
+
),
|
|
1603
|
+
hint2(
|
|
1604
|
+
'For "never had/never exceeded" conditions over history tables, exclude entities via NOT IN/NOT EXISTS against the disqualifying entity-id set (often built with GROUP BY/HAVING MAX(...)).'
|
|
1605
|
+
)
|
|
1606
|
+
),
|
|
1607
|
+
fragment2(
|
|
1608
|
+
"join_completeness",
|
|
1609
|
+
policy({
|
|
1610
|
+
rule: "Preserve entity-restricting joins implied by the question. Do not widen results by querying only a broader attribute table when a subset entity table is available."
|
|
1611
|
+
}),
|
|
1612
|
+
policy({
|
|
1613
|
+
rule: "If an entity term in the question maps to a table, keep that table in query scope and join to attribute tables rather than dropping the entity table."
|
|
1614
|
+
}),
|
|
1615
|
+
hint2(
|
|
1616
|
+
"If the question targets a specific entity group, include that entity table and its join conditions even when selected columns come from a related table."
|
|
1617
|
+
),
|
|
1618
|
+
hint2(
|
|
1619
|
+
"When the question names an entity type and a relation table links to that entity via *_id, include the entity table in scope instead of counting only relation rows."
|
|
1620
|
+
),
|
|
1621
|
+
hint2(
|
|
1622
|
+
"Prefer INNER JOIN by default; use LEFT JOIN only when the question explicitly requests including unmatched rows or zero-related entities."
|
|
1623
|
+
)
|
|
1624
|
+
),
|
|
1625
|
+
fragment2(
|
|
1626
|
+
"aggregation_exactness",
|
|
1627
|
+
policy({
|
|
1628
|
+
rule: "Preserve requested aggregation semantics exactly: use COUNT(*) by default for total rows, use COUNT(DISTINCT ...) only when uniqueness is explicitly requested, and group by stable entity keys when computing per-entity aggregates."
|
|
1629
|
+
}),
|
|
1630
|
+
policy({
|
|
1631
|
+
rule: "For questions asking which entity has lowest/highest average of a metric, compute AVG(metric) per entity (GROUP BY entity) and rank those aggregates."
|
|
1632
|
+
}),
|
|
1633
|
+
hint2(
|
|
1634
|
+
'For "how many <entities>" questions over relation records, default to COUNT(*) on qualifying rows unless explicit uniqueness language is present.'
|
|
1635
|
+
)
|
|
1636
|
+
),
|
|
1637
|
+
fragment2(
|
|
1638
|
+
"query_shape_examples",
|
|
1639
|
+
example({
|
|
1640
|
+
question: "List categories ordered by how many records belong to each category.",
|
|
1641
|
+
answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*)"
|
|
1642
|
+
}),
|
|
1643
|
+
example({
|
|
1644
|
+
question: "Show labels shared by rows with metric > 100 and rows with metric < 10.",
|
|
1645
|
+
answer: "SELECT label FROM records WHERE metric > 100 INTERSECT SELECT label FROM records WHERE metric < 10"
|
|
1646
|
+
}),
|
|
1647
|
+
example({
|
|
1648
|
+
question: "List locations that have not hosted any event.",
|
|
1649
|
+
answer: "SELECT location_name FROM locations WHERE location_id NOT IN (SELECT host_location_id FROM events)"
|
|
1650
|
+
}),
|
|
1651
|
+
example({
|
|
1652
|
+
question: "List the most common category across records.",
|
|
1653
|
+
answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*) DESC LIMIT 1"
|
|
1090
1654
|
})
|
|
1091
|
-
// policy({
|
|
1092
|
-
// rule: 'Before returning an error, perform a schema-grounded self-check: identify core intent, draft best-effort SQL, then verify it uses only existing tables/columns.',
|
|
1093
|
-
// }),
|
|
1094
|
-
// policy({
|
|
1095
|
-
// rule: 'Return unanswerable only if that self-check confirms no valid SQL can express the required intent without inventing schema elements.',
|
|
1096
|
-
// }),
|
|
1097
|
-
// policy({
|
|
1098
|
-
// rule: 'Prefer a best-effort valid SQL query when entities can be reasonably inferred from table or column names.',
|
|
1099
|
-
// }),
|
|
1100
|
-
// policy({
|
|
1101
|
-
// rule: 'Use lexical normalization (singular/plural, paraphrases, role synonyms, and minor wording differences) to align question terms with schema names.',
|
|
1102
|
-
// }),
|
|
1103
|
-
// policy({
|
|
1104
|
-
// rule: 'Decompose noun phrases into core entity and qualifiers, and map the core entity first.',
|
|
1105
|
-
// }),
|
|
1106
|
-
// policy({
|
|
1107
|
-
// rule: 'Do not require every descriptive word to map to a separate schema field when the core entity match is unambiguous.',
|
|
1108
|
-
// }),
|
|
1109
|
-
// policy({
|
|
1110
|
-
// rule: 'For phrases like "X of Y", treat Y as contextual (non-blocking) when Y has no mapped schema field and the question does not ask to filter/group/select by Y explicitly.',
|
|
1111
|
-
// }),
|
|
1112
|
-
// policy({
|
|
1113
|
-
// rule: 'Treat unmatched qualifiers as blockers only when they are restrictive constraints (specific values, comparisons, or conditions that change row eligibility).',
|
|
1114
|
-
// }),
|
|
1115
|
-
// hint('Preserve schema spelling exactly, including typos in column names.'),
|
|
1116
1655
|
)
|
|
1117
|
-
// fragment(
|
|
1118
|
-
// 'unanswerable_gate',
|
|
1119
|
-
// workflow({
|
|
1120
|
-
// task: 'Unanswerable decision',
|
|
1121
|
-
// steps: [
|
|
1122
|
-
// 'Identify the core intent (metric/projection and required filters).',
|
|
1123
|
-
// 'Attempt schema-grounded mapping for the core intent before considering error.',
|
|
1124
|
-
// 'If a valid SELECT can answer the core intent without inventing schema entities, return SQL.',
|
|
1125
|
-
// 'Return unanswerable only when required information cannot be mapped to any available table or column.',
|
|
1126
|
-
// ],
|
|
1127
|
-
// }),
|
|
1128
|
-
// policy({
|
|
1129
|
-
// rule: 'Do not reject a question as unanswerable when requested information can be derived by filtering, joining, grouping, counting, set operations, or sorting on available columns.',
|
|
1130
|
-
// }),
|
|
1131
|
-
// ),
|
|
1132
|
-
// fragment(
|
|
1133
|
-
// 'query_shape_preferences',
|
|
1134
|
-
// hint(
|
|
1135
|
-
// 'Prefer explicit INNER JOINs over LEFT JOINs unless the question requires unmatched rows.',
|
|
1136
|
-
// ),
|
|
1137
|
-
// hint(
|
|
1138
|
-
// 'Prefer direct joins over dropping join constraints or using weaker alternatives.',
|
|
1139
|
-
// ),
|
|
1140
|
-
// hint('Use DISTINCT only when uniqueness is explicitly requested.'),
|
|
1141
|
-
// hint(
|
|
1142
|
-
// 'For superlatives over grouped entities (most/least/highest/lowest by group), prefer GROUP BY with ORDER BY aggregate and LIMIT 1.',
|
|
1143
|
-
// ),
|
|
1144
|
-
// hint(
|
|
1145
|
-
// 'For average/count conditions per entity, prefer GROUP BY with HAVING aggregate predicates over row-level WHERE predicates.',
|
|
1146
|
-
// ),
|
|
1147
|
-
// hint(
|
|
1148
|
-
// 'For "both" conditions across two criteria, prefer INTERSECT when selecting shared values.',
|
|
1149
|
-
// ),
|
|
1150
|
-
// hint(
|
|
1151
|
-
// 'For "A or B" retrieval across criteria, prefer UNION when combining two qualifying sets.',
|
|
1152
|
-
// ),
|
|
1153
|
-
// hint(
|
|
1154
|
-
// 'For "never" constraints against related records, prefer NOT IN or EXCEPT against the disqualifying set.',
|
|
1155
|
-
// ),
|
|
1156
|
-
// hint(
|
|
1157
|
-
// 'Use equality predicates for exact values unless the question asks for pattern matching.',
|
|
1158
|
-
// ),
|
|
1159
|
-
// hint(
|
|
1160
|
-
// 'Keep numeric literals unquoted when they are purely numeric tokens in the question.',
|
|
1161
|
-
// ),
|
|
1162
|
-
// ),
|
|
1163
|
-
// fragment(
|
|
1164
|
-
// 'sql_minimality',
|
|
1165
|
-
// guardrail({
|
|
1166
|
-
// rule: 'Never hallucinate tables or columns.',
|
|
1167
|
-
// reason: 'Schema fidelity is required.',
|
|
1168
|
-
// action: 'Use only available schema entities.',
|
|
1169
|
-
// }),
|
|
1170
|
-
// guardrail({
|
|
1171
|
-
// rule: 'Prefer the minimal query over transformed expressions.',
|
|
1172
|
-
// reason:
|
|
1173
|
-
// 'Unnecessary transformations reduce correctness and add avoidable complexity.',
|
|
1174
|
-
// action:
|
|
1175
|
-
// 'Do not add date parsing, substring extraction, derived projections, or extra selected columns unless explicitly requested or required by schema mismatch.',
|
|
1176
|
-
// }),
|
|
1177
|
-
// ),
|
|
1178
1656
|
];
|
|
1179
1657
|
function extractSql(output) {
|
|
1180
1658
|
const match = output.match(/```sql\n?([\s\S]*?)```/);
|
|
@@ -1308,11 +1786,11 @@ function isModelUnavailableError(error) {
|
|
|
1308
1786
|
if (!APICallError.isInstance(error)) {
|
|
1309
1787
|
return false;
|
|
1310
1788
|
}
|
|
1311
|
-
const
|
|
1789
|
+
const message = error.message.toLowerCase();
|
|
1312
1790
|
const responseBody = (error.responseBody ?? "").toLowerCase();
|
|
1313
|
-
const is404ModelError = error.statusCode === 404 && (
|
|
1791
|
+
const is404ModelError = error.statusCode === 404 && (message.includes("model") || responseBody.includes("model_not_found"));
|
|
1314
1792
|
const errorCode = typeof error.data === "object" && error.data !== null && "error" in error.data && typeof error.data.error === "object" && error.data.error !== null && "code" in error.data.error && typeof error.data.error.code === "string" ? error.data.error.code.toLowerCase() : void 0;
|
|
1315
|
-
return is404ModelError || errorCode === "model_not_found" || responseBody.includes('"code":"model_not_found"') ||
|
|
1793
|
+
return is404ModelError || errorCode === "model_not_found" || responseBody.includes('"code":"model_not_found"') || message.includes("model") && message.includes("does not exist or you do not have access to it");
|
|
1316
1794
|
}
|
|
1317
1795
|
async function withRetry(computation, options = { retries: 3 }) {
|
|
1318
1796
|
const errors = [];
|
|
@@ -4338,17 +4816,17 @@ var TrackedFs = class {
|
|
|
4338
4816
|
// packages/text2sql/src/lib/instructions.ts
|
|
4339
4817
|
import {
|
|
4340
4818
|
clarification,
|
|
4341
|
-
example,
|
|
4819
|
+
example as example2,
|
|
4342
4820
|
explain,
|
|
4343
4821
|
fragment as fragment3,
|
|
4344
|
-
guardrail,
|
|
4345
|
-
hint as
|
|
4822
|
+
guardrail as guardrail2,
|
|
4823
|
+
hint as hint3,
|
|
4346
4824
|
policy as policy2,
|
|
4347
4825
|
principle,
|
|
4348
4826
|
quirk,
|
|
4349
4827
|
role,
|
|
4350
4828
|
styleGuide,
|
|
4351
|
-
workflow
|
|
4829
|
+
workflow as workflow2
|
|
4352
4830
|
} from "@deepagents/context";
|
|
4353
4831
|
function reasoningFramework() {
|
|
4354
4832
|
return [
|
|
@@ -4357,7 +4835,7 @@ function reasoningFramework() {
|
|
|
4357
4835
|
),
|
|
4358
4836
|
fragment3(
|
|
4359
4837
|
"meta-cognitive-reasoning-framework",
|
|
4360
|
-
|
|
4838
|
+
hint3(
|
|
4361
4839
|
"Before taking any action (either tool calls *or* responses to the user), you must proactively, methodically, and independently plan and reason about:"
|
|
4362
4840
|
),
|
|
4363
4841
|
// 1) Logical dependencies and constraints
|
|
@@ -4498,7 +4976,7 @@ function guidelines(options = {}) {
|
|
|
4498
4976
|
// Few-shot: Applying reasoning principles
|
|
4499
4977
|
fragment3(
|
|
4500
4978
|
"reasoning-examples",
|
|
4501
|
-
|
|
4979
|
+
example2({
|
|
4502
4980
|
question: "Show me sales last month",
|
|
4503
4981
|
answer: `Applying Principle 1 (Logical dependencies):
|
|
4504
4982
|
- Need: schema to know which table has sales data
|
|
@@ -4510,7 +4988,7 @@ Applying Principle 5 (Information availability):
|
|
|
4510
4988
|
|
|
4511
4989
|
Action: Ask user for date range clarification BEFORE generating SQL.`
|
|
4512
4990
|
}),
|
|
4513
|
-
|
|
4991
|
+
example2({
|
|
4514
4992
|
question: "Why did my query return no results?",
|
|
4515
4993
|
answer: `Applying Principle 3 (Abductive reasoning):
|
|
4516
4994
|
- Hypothesis 1 (most likely): Filter too restrictive
|
|
@@ -4524,7 +5002,7 @@ Testing hypotheses:
|
|
|
4524
5002
|
|
|
4525
5003
|
Action: Start with most likely hypothesis, test incrementally. NEVER guess.`
|
|
4526
5004
|
}),
|
|
4527
|
-
|
|
5005
|
+
example2({
|
|
4528
5006
|
question: "Get me the top customers",
|
|
4529
5007
|
answer: `Applying Principle 1 (Logical dependencies):
|
|
4530
5008
|
- "Top" is ambiguous\u2014by revenue? by order count? by recency?
|
|
@@ -4538,10 +5016,10 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4538
5016
|
// Schema adherence - consolidated into clear rules
|
|
4539
5017
|
fragment3(
|
|
4540
5018
|
"schema_adherence",
|
|
4541
|
-
|
|
5019
|
+
hint3(
|
|
4542
5020
|
"Use only tables and columns from the schema. For unspecified columns, use SELECT *. When showing related items, include IDs and requested details."
|
|
4543
5021
|
),
|
|
4544
|
-
|
|
5022
|
+
hint3(
|
|
4545
5023
|
'"Show" means list items; "count" or "total" means aggregate. Use canonical values verbatim for filtering.'
|
|
4546
5024
|
)
|
|
4547
5025
|
),
|
|
@@ -4557,18 +5035,18 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4557
5035
|
explanation: "Measures how closely the physical row order matches the logical sort order of the column. Values near 1 or -1 mean the data is well-ordered; near 0 means scattered",
|
|
4558
5036
|
therefore: "High correlation means range queries (BETWEEN, >, <) on that column benefit from index scans. Low correlation means the index is less effective for ranges"
|
|
4559
5037
|
}),
|
|
4560
|
-
|
|
5038
|
+
hint3(
|
|
4561
5039
|
"When min/max stats are available, use them to validate filter values. If a user asks for values outside the known range, warn them the query may return no results."
|
|
4562
5040
|
)
|
|
4563
5041
|
),
|
|
4564
5042
|
// Joins - use relationship metadata
|
|
4565
|
-
|
|
5043
|
+
hint3(
|
|
4566
5044
|
"Use JOINs based on schema relationships. Favor PK/indexed columns; follow relationship metadata for direction and cardinality."
|
|
4567
5045
|
),
|
|
4568
5046
|
// Aggregations - explain the concepts
|
|
4569
5047
|
fragment3(
|
|
4570
5048
|
"Aggregations",
|
|
4571
|
-
|
|
5049
|
+
hint3(
|
|
4572
5050
|
"Apply COUNT, SUM, AVG when the question implies summarization. Use window functions for ranking, running totals, or row comparisons."
|
|
4573
5051
|
),
|
|
4574
5052
|
explain({
|
|
@@ -4593,7 +5071,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4593
5071
|
issue: "NULL values behave unexpectedly in comparisons and aggregations",
|
|
4594
5072
|
workaround: "Use IS NULL, IS NOT NULL, or COALESCE() to handle NULLs explicitly"
|
|
4595
5073
|
}),
|
|
4596
|
-
|
|
5074
|
+
hint3(
|
|
4597
5075
|
"Always include mentioned filters from joined tables in WHERE conditions."
|
|
4598
5076
|
)
|
|
4599
5077
|
),
|
|
@@ -4608,22 +5086,22 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4608
5086
|
// Safety guardrails - consolidated
|
|
4609
5087
|
fragment3(
|
|
4610
5088
|
"Query safety",
|
|
4611
|
-
|
|
5089
|
+
guardrail2({
|
|
4612
5090
|
rule: "Generate only valid, executable SELECT/WITH statements.",
|
|
4613
5091
|
reason: "Read-only access prevents data modification.",
|
|
4614
5092
|
action: "Never generate INSERT, UPDATE, DELETE, DROP, or DDL statements."
|
|
4615
5093
|
}),
|
|
4616
|
-
|
|
5094
|
+
guardrail2({
|
|
4617
5095
|
rule: "Avoid unbounded scans and cartesian joins.",
|
|
4618
5096
|
reason: "Protects performance and correctness.",
|
|
4619
5097
|
action: "Apply filters on indexed columns. If join keys are unclear, ask for clarification."
|
|
4620
5098
|
}),
|
|
4621
|
-
|
|
5099
|
+
guardrail2({
|
|
4622
5100
|
rule: "Preserve query semantics.",
|
|
4623
5101
|
reason: "Arbitrary modifications change results.",
|
|
4624
5102
|
action: 'Only add LIMIT for explicit "top N" requests. Add ORDER BY for deterministic results.'
|
|
4625
5103
|
}),
|
|
4626
|
-
|
|
5104
|
+
guardrail2({
|
|
4627
5105
|
rule: "Seek clarification for genuine ambiguity.",
|
|
4628
5106
|
reason: "Prevents incorrect assumptions.",
|
|
4629
5107
|
action: "Ask a focused question before guessing."
|
|
@@ -4634,10 +5112,10 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4634
5112
|
ask: "Clarify the ranking metric or definition.",
|
|
4635
5113
|
reason: "Ensures correct aggregation and ordering."
|
|
4636
5114
|
}),
|
|
4637
|
-
|
|
5115
|
+
hint3(
|
|
4638
5116
|
'Use sample cell values from schema hints to match exact casing and format in WHERE conditions (e.g., "Male" vs "male" vs "M").'
|
|
4639
5117
|
),
|
|
4640
|
-
|
|
5118
|
+
workflow2({
|
|
4641
5119
|
task: "SQL generation",
|
|
4642
5120
|
steps: [
|
|
4643
5121
|
"Schema linking: identify which tables and columns are mentioned or implied by the question.",
|
|
@@ -4649,7 +5127,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4649
5127
|
"Verify: mentally translate SQL back to natural language. Does it match the original question?"
|
|
4650
5128
|
]
|
|
4651
5129
|
}),
|
|
4652
|
-
|
|
5130
|
+
workflow2({
|
|
4653
5131
|
task: "Error recovery",
|
|
4654
5132
|
triggers: ["SQL error", "query failed", "execution error"],
|
|
4655
5133
|
steps: [
|
|
@@ -4662,7 +5140,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4662
5140
|
],
|
|
4663
5141
|
notes: "Maximum 3 retry attempts. If still failing, explain the issue to the user."
|
|
4664
5142
|
}),
|
|
4665
|
-
|
|
5143
|
+
workflow2({
|
|
4666
5144
|
task: "Complex query decomposition",
|
|
4667
5145
|
triggers: [
|
|
4668
5146
|
"multiple conditions",
|
|
@@ -4679,7 +5157,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4679
5157
|
],
|
|
4680
5158
|
notes: "Complex questions often need CTEs (WITH clauses) for clarity and reusability."
|
|
4681
5159
|
}),
|
|
4682
|
-
|
|
5160
|
+
workflow2({
|
|
4683
5161
|
task: "Multi-turn context",
|
|
4684
5162
|
triggers: ["follow-up", "and also", "what about", "same but", "instead"],
|
|
4685
5163
|
steps: [
|
|
@@ -4694,7 +5172,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4694
5172
|
}),
|
|
4695
5173
|
fragment3(
|
|
4696
5174
|
"Bash tool usage",
|
|
4697
|
-
|
|
5175
|
+
workflow2({
|
|
4698
5176
|
task: "Query execution",
|
|
4699
5177
|
steps: [
|
|
4700
5178
|
'Execute SQL through bash tool: sql run "SELECT ..."',
|
|
@@ -4703,16 +5181,16 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4703
5181
|
"For large results, slice first: cat <path> | jq '.[:10]'"
|
|
4704
5182
|
]
|
|
4705
5183
|
}),
|
|
4706
|
-
|
|
5184
|
+
hint3(
|
|
4707
5185
|
`You cannot access sql through a tool, it'll fail so the proper way to access it is through the bash tool using "sql run" and "sql validate" commands.`
|
|
4708
5186
|
),
|
|
4709
|
-
|
|
5187
|
+
hint3(
|
|
4710
5188
|
"The sql command outputs: file path, column names (comma-separated), and row count. Use column names to construct precise jq queries."
|
|
4711
5189
|
),
|
|
4712
|
-
|
|
5190
|
+
hint3(
|
|
4713
5191
|
'This is virtual bash environment and "sql" commands proxy to the database hence you cannot access sql files directly.'
|
|
4714
5192
|
),
|
|
4715
|
-
|
|
5193
|
+
hint3(
|
|
4716
5194
|
"If a query fails, the sql command returns an error message in stderr."
|
|
4717
5195
|
)
|
|
4718
5196
|
)
|
|
@@ -4727,7 +5205,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4727
5205
|
);
|
|
4728
5206
|
} else {
|
|
4729
5207
|
baseTeachings.push(
|
|
4730
|
-
|
|
5208
|
+
hint3(
|
|
4731
5209
|
'When a month, day, or time period is mentioned without a year (e.g., "in August", "on Monday"), assume ALL occurrences of that period in the data. Do not ask for year clarification.'
|
|
4732
5210
|
)
|
|
4733
5211
|
);
|
|
@@ -4749,8 +5227,9 @@ import "@deepagents/agent";
|
|
|
4749
5227
|
import {
|
|
4750
5228
|
agent as agent2,
|
|
4751
5229
|
assistant,
|
|
5230
|
+
chatMessageToUIMessage,
|
|
4752
5231
|
errorRecoveryGuardrail,
|
|
4753
|
-
|
|
5232
|
+
toMessageFragment
|
|
4754
5233
|
} from "@deepagents/context";
|
|
4755
5234
|
var Text2Sql = class {
|
|
4756
5235
|
#config;
|
|
@@ -4823,17 +5302,20 @@ var Text2Sql = class {
|
|
|
4823
5302
|
...guidelines(this.#config.teachingsOptions),
|
|
4824
5303
|
...await this.index()
|
|
4825
5304
|
);
|
|
4826
|
-
const
|
|
5305
|
+
const lastItem = messages[messages.length - 1];
|
|
5306
|
+
const lastFragment = toMessageFragment(lastItem);
|
|
5307
|
+
const lastUIMessage = chatMessageToUIMessage(lastItem);
|
|
4827
5308
|
let assistantMsgId;
|
|
4828
|
-
if (
|
|
4829
|
-
context.set(
|
|
5309
|
+
if (lastUIMessage.role === "assistant") {
|
|
5310
|
+
context.set(lastFragment);
|
|
4830
5311
|
await context.save({ branch: false });
|
|
4831
|
-
assistantMsgId =
|
|
5312
|
+
assistantMsgId = lastUIMessage.id;
|
|
4832
5313
|
} else {
|
|
4833
|
-
context.set(
|
|
5314
|
+
context.set(lastFragment);
|
|
4834
5315
|
await context.save();
|
|
4835
5316
|
assistantMsgId = generateId();
|
|
4836
5317
|
}
|
|
5318
|
+
const uiMessages = messages.map(chatMessageToUIMessage);
|
|
4837
5319
|
const { mounts: skillMounts } = context.getSkillMounts();
|
|
4838
5320
|
const { tools: tools2 } = await createResultTools({
|
|
4839
5321
|
adapter: this.#config.adapter,
|
|
@@ -4861,7 +5343,7 @@ var Text2Sql = class {
|
|
|
4861
5343
|
sendFinish: true,
|
|
4862
5344
|
sendReasoning: true,
|
|
4863
5345
|
sendSources: true,
|
|
4864
|
-
originalMessages:
|
|
5346
|
+
originalMessages: uiMessages,
|
|
4865
5347
|
generateMessageId: () => assistantMsgId,
|
|
4866
5348
|
messageMetadata: ({ part }) => {
|
|
4867
5349
|
if (part.type === "finish-step") {
|
|
@@ -4880,7 +5362,7 @@ var Text2Sql = class {
|
|
|
4880
5362
|
}
|
|
4881
5363
|
});
|
|
4882
5364
|
return createUIMessageStream({
|
|
4883
|
-
originalMessages:
|
|
5365
|
+
originalMessages: uiMessages,
|
|
4884
5366
|
generateId: () => assistantMsgId,
|
|
4885
5367
|
onStepFinish: async ({ responseMessage }) => {
|
|
4886
5368
|
context.set(assistant({ ...responseMessage, id: assistantMsgId }));
|