@deepagents/text2sql 0.20.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -685,6 +685,14 @@ var BLOCKED_DB_CLIENT_COMMANDS = /* @__PURE__ */ new Set([
|
|
|
685
685
|
]);
|
|
686
686
|
var BLOCKED_RAW_SQL_COMMANDS = /* @__PURE__ */ new Set(["select", "with"]);
|
|
687
687
|
var ALLOWED_SQL_PROXY_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "validate"]);
|
|
688
|
+
var SHELL_INTERPRETER_COMMANDS = /* @__PURE__ */ new Set([
|
|
689
|
+
"bash",
|
|
690
|
+
"sh",
|
|
691
|
+
"zsh",
|
|
692
|
+
"dash",
|
|
693
|
+
"ksh"
|
|
694
|
+
]);
|
|
695
|
+
var WRAPPER_COMMANDS = /* @__PURE__ */ new Set(["env", "command", "eval"]);
|
|
688
696
|
var SQL_PROXY_ENFORCEMENT_MESSAGE = [
|
|
689
697
|
"Direct database querying through bash is blocked.",
|
|
690
698
|
"Use SQL proxy commands in this order:",
|
|
@@ -740,82 +748,94 @@ function isScriptNode(value) {
|
|
|
740
748
|
const node = value;
|
|
741
749
|
return node.type === "Script" && Array.isArray(node.statements);
|
|
742
750
|
}
|
|
743
|
-
function scriptContainsBlockedCommand(script, context) {
|
|
744
|
-
return statementsContainBlockedCommand(script.statements, context);
|
|
751
|
+
function scriptContainsBlockedCommand(script, context, mode = "blocked-only") {
|
|
752
|
+
return statementsContainBlockedCommand(script.statements, context, mode);
|
|
745
753
|
}
|
|
746
|
-
function statementsContainBlockedCommand(statements, context) {
|
|
754
|
+
function statementsContainBlockedCommand(statements, context, mode) {
|
|
747
755
|
for (const statement of statements) {
|
|
748
|
-
if (statementContainsBlockedCommand(statement, context)) {
|
|
756
|
+
if (statementContainsBlockedCommand(statement, context, mode)) {
|
|
749
757
|
return true;
|
|
750
758
|
}
|
|
751
759
|
}
|
|
752
760
|
return false;
|
|
753
761
|
}
|
|
754
|
-
function statementContainsBlockedCommand(statement, context) {
|
|
762
|
+
function statementContainsBlockedCommand(statement, context, mode) {
|
|
755
763
|
for (const pipeline of statement.pipelines) {
|
|
756
|
-
if (pipelineContainsBlockedCommand(pipeline, context)) {
|
|
764
|
+
if (pipelineContainsBlockedCommand(pipeline, context, mode)) {
|
|
757
765
|
return true;
|
|
758
766
|
}
|
|
759
767
|
}
|
|
760
768
|
return false;
|
|
761
769
|
}
|
|
762
|
-
function pipelineContainsBlockedCommand(pipeline, context) {
|
|
763
|
-
for (const command of pipeline.commands) {
|
|
770
|
+
function pipelineContainsBlockedCommand(pipeline, context, mode) {
|
|
771
|
+
for (const [index2, command] of pipeline.commands.entries()) {
|
|
764
772
|
if (command.type === "FunctionDef") {
|
|
765
773
|
context.functionDefinitions.set(command.name, command);
|
|
766
774
|
continue;
|
|
767
775
|
}
|
|
768
|
-
if (commandContainsBlockedCommand(command, context
|
|
776
|
+
if (commandContainsBlockedCommand(command, context, mode, {
|
|
777
|
+
stdinFromPipe: index2 > 0
|
|
778
|
+
})) {
|
|
769
779
|
return true;
|
|
770
780
|
}
|
|
771
781
|
}
|
|
772
782
|
return false;
|
|
773
783
|
}
|
|
774
|
-
function stringCommandContainsBlockedCommand(command, context) {
|
|
784
|
+
function stringCommandContainsBlockedCommand(command, context, mode = "blocked-only") {
|
|
775
785
|
let script;
|
|
776
786
|
try {
|
|
777
787
|
script = parse(command);
|
|
778
788
|
} catch {
|
|
779
789
|
return false;
|
|
780
790
|
}
|
|
781
|
-
return scriptContainsBlockedCommand(
|
|
791
|
+
return scriptContainsBlockedCommand(
|
|
792
|
+
script,
|
|
793
|
+
cloneInspectionContext(context),
|
|
794
|
+
mode
|
|
795
|
+
);
|
|
782
796
|
}
|
|
783
|
-
function wordContainsBlockedCommand(word, context) {
|
|
797
|
+
function wordContainsBlockedCommand(word, context, mode) {
|
|
784
798
|
if (!word) {
|
|
785
799
|
return false;
|
|
786
800
|
}
|
|
787
801
|
return wordPartContainsBlockedCommand(
|
|
788
802
|
word.parts,
|
|
789
|
-
context
|
|
803
|
+
context,
|
|
804
|
+
mode
|
|
790
805
|
);
|
|
791
806
|
}
|
|
792
|
-
function wordPartContainsBlockedCommand(parts, context) {
|
|
807
|
+
function wordPartContainsBlockedCommand(parts, context, mode) {
|
|
793
808
|
for (const part of parts) {
|
|
794
|
-
if (partContainsBlockedCommand(part, context)) {
|
|
809
|
+
if (partContainsBlockedCommand(part, context, mode)) {
|
|
795
810
|
return true;
|
|
796
811
|
}
|
|
797
812
|
}
|
|
798
813
|
return false;
|
|
799
814
|
}
|
|
800
|
-
function partContainsBlockedCommand(node, context) {
|
|
815
|
+
function partContainsBlockedCommand(node, context, mode) {
|
|
801
816
|
const type = node.type;
|
|
802
817
|
if (type === "CommandSubstitution" || type === "ProcessSubstitution") {
|
|
803
818
|
if (isScriptNode(node.body)) {
|
|
804
819
|
return scriptContainsBlockedCommand(
|
|
805
820
|
node.body,
|
|
806
|
-
cloneInspectionContext(context)
|
|
821
|
+
cloneInspectionContext(context),
|
|
822
|
+
mode
|
|
807
823
|
);
|
|
808
824
|
}
|
|
809
825
|
return false;
|
|
810
826
|
}
|
|
811
827
|
if (type === "ArithCommandSubst" && typeof node.command === "string") {
|
|
812
|
-
return stringCommandContainsBlockedCommand(node.command, context);
|
|
828
|
+
return stringCommandContainsBlockedCommand(node.command, context, mode);
|
|
813
829
|
}
|
|
814
830
|
for (const value of Object.values(node)) {
|
|
815
831
|
if (Array.isArray(value)) {
|
|
816
832
|
for (const item of value) {
|
|
817
833
|
if (typeof item === "object" && item !== null) {
|
|
818
|
-
if (partContainsBlockedCommand(
|
|
834
|
+
if (partContainsBlockedCommand(
|
|
835
|
+
item,
|
|
836
|
+
context,
|
|
837
|
+
mode
|
|
838
|
+
)) {
|
|
819
839
|
return true;
|
|
820
840
|
}
|
|
821
841
|
}
|
|
@@ -823,14 +843,18 @@ function partContainsBlockedCommand(node, context) {
|
|
|
823
843
|
continue;
|
|
824
844
|
}
|
|
825
845
|
if (typeof value === "object" && value !== null) {
|
|
826
|
-
if (partContainsBlockedCommand(
|
|
846
|
+
if (partContainsBlockedCommand(
|
|
847
|
+
value,
|
|
848
|
+
context,
|
|
849
|
+
mode
|
|
850
|
+
)) {
|
|
827
851
|
return true;
|
|
828
852
|
}
|
|
829
853
|
}
|
|
830
854
|
}
|
|
831
855
|
return false;
|
|
832
856
|
}
|
|
833
|
-
function functionInvocationContainsBlockedCommand(functionName, context) {
|
|
857
|
+
function functionInvocationContainsBlockedCommand(functionName, context, mode) {
|
|
834
858
|
const definition = context.functionDefinitions.get(functionName);
|
|
835
859
|
if (!definition) {
|
|
836
860
|
return false;
|
|
@@ -840,52 +864,306 @@ function functionInvocationContainsBlockedCommand(functionName, context) {
|
|
|
840
864
|
}
|
|
841
865
|
const invocationContext = cloneInspectionContext(context);
|
|
842
866
|
invocationContext.callStack.add(functionName);
|
|
843
|
-
return commandContainsBlockedCommand(
|
|
867
|
+
return commandContainsBlockedCommand(
|
|
868
|
+
definition.body,
|
|
869
|
+
invocationContext,
|
|
870
|
+
mode,
|
|
871
|
+
{ stdinFromPipe: false }
|
|
872
|
+
);
|
|
873
|
+
}
|
|
874
|
+
function isAsciiLetter(character) {
|
|
875
|
+
const charCode = character.charCodeAt(0);
|
|
876
|
+
return charCode >= 65 && charCode <= 90 || charCode >= 97 && charCode <= 122;
|
|
877
|
+
}
|
|
878
|
+
function isAsciiDigit(character) {
|
|
879
|
+
const charCode = character.charCodeAt(0);
|
|
880
|
+
return charCode >= 48 && charCode <= 57;
|
|
881
|
+
}
|
|
882
|
+
function isValidEnvVariableName(name) {
|
|
883
|
+
if (!name) {
|
|
884
|
+
return false;
|
|
885
|
+
}
|
|
886
|
+
const firstChar = name[0];
|
|
887
|
+
if (!(isAsciiLetter(firstChar) || firstChar === "_")) {
|
|
888
|
+
return false;
|
|
889
|
+
}
|
|
890
|
+
for (let index2 = 1; index2 < name.length; index2 += 1) {
|
|
891
|
+
const char = name[index2];
|
|
892
|
+
if (!(isAsciiLetter(char) || isAsciiDigit(char) || char === "_")) {
|
|
893
|
+
return false;
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
return true;
|
|
897
|
+
}
|
|
898
|
+
function isEnvAssignmentToken(token) {
|
|
899
|
+
const separatorIndex = token.indexOf("=");
|
|
900
|
+
if (separatorIndex <= 0) {
|
|
901
|
+
return false;
|
|
902
|
+
}
|
|
903
|
+
return isValidEnvVariableName(token.slice(0, separatorIndex));
|
|
904
|
+
}
|
|
905
|
+
function parseShortOptionCluster(option) {
|
|
906
|
+
if (!option.startsWith("-") || option.startsWith("--") || option.length <= 1) {
|
|
907
|
+
return {
|
|
908
|
+
valid: false,
|
|
909
|
+
hasCommandFlag: false,
|
|
910
|
+
hasStdinFlag: false,
|
|
911
|
+
consumesNextArg: false
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
let hasCommandFlag = false;
|
|
915
|
+
let hasStdinFlag = false;
|
|
916
|
+
let consumesNextArg = false;
|
|
917
|
+
for (let index2 = 1; index2 < option.length; index2 += 1) {
|
|
918
|
+
const char = option[index2];
|
|
919
|
+
if (!isAsciiLetter(char)) {
|
|
920
|
+
return {
|
|
921
|
+
valid: false,
|
|
922
|
+
hasCommandFlag: false,
|
|
923
|
+
hasStdinFlag: false,
|
|
924
|
+
consumesNextArg: false
|
|
925
|
+
};
|
|
926
|
+
}
|
|
927
|
+
if (char === "c") {
|
|
928
|
+
hasCommandFlag = true;
|
|
929
|
+
} else if (char === "s") {
|
|
930
|
+
hasStdinFlag = true;
|
|
931
|
+
} else if (char === "O" || char === "o") {
|
|
932
|
+
consumesNextArg = true;
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
return { valid: true, hasCommandFlag, hasStdinFlag, consumesNextArg };
|
|
936
|
+
}
|
|
937
|
+
function getShellInvocationDescriptor(args) {
|
|
938
|
+
let readsFromStdin = false;
|
|
939
|
+
const longOptionsWithValue = /* @__PURE__ */ new Set(["--rcfile", "--init-file"]);
|
|
940
|
+
for (let index2 = 0; index2 < args.length; index2 += 1) {
|
|
941
|
+
const token = asStaticWordText(args[index2]);
|
|
942
|
+
if (token == null) {
|
|
943
|
+
return { kind: "unknown", payload: null };
|
|
944
|
+
}
|
|
945
|
+
if (token === "--") {
|
|
946
|
+
if (index2 + 1 >= args.length) {
|
|
947
|
+
break;
|
|
948
|
+
}
|
|
949
|
+
return {
|
|
950
|
+
kind: "script",
|
|
951
|
+
payload: asStaticWordText(args[index2 + 1])
|
|
952
|
+
};
|
|
953
|
+
}
|
|
954
|
+
if (token === "--command") {
|
|
955
|
+
return {
|
|
956
|
+
kind: "command",
|
|
957
|
+
payload: asStaticWordText(args[index2 + 1])
|
|
958
|
+
};
|
|
959
|
+
}
|
|
960
|
+
if (token.startsWith("--command=")) {
|
|
961
|
+
return {
|
|
962
|
+
kind: "command",
|
|
963
|
+
payload: token.slice("--command=".length)
|
|
964
|
+
};
|
|
965
|
+
}
|
|
966
|
+
if (token.startsWith("--")) {
|
|
967
|
+
if (token.includes("=")) {
|
|
968
|
+
continue;
|
|
969
|
+
}
|
|
970
|
+
if (longOptionsWithValue.has(token)) {
|
|
971
|
+
if (index2 + 1 >= args.length) {
|
|
972
|
+
return { kind: "unknown", payload: null };
|
|
973
|
+
}
|
|
974
|
+
index2 += 1;
|
|
975
|
+
}
|
|
976
|
+
continue;
|
|
977
|
+
}
|
|
978
|
+
if (token.startsWith("-") && !token.startsWith("--")) {
|
|
979
|
+
const parsed = parseShortOptionCluster(token);
|
|
980
|
+
if (!parsed.valid) {
|
|
981
|
+
return { kind: "unknown", payload: null };
|
|
982
|
+
}
|
|
983
|
+
if (parsed.hasCommandFlag) {
|
|
984
|
+
return {
|
|
985
|
+
kind: "command",
|
|
986
|
+
payload: asStaticWordText(args[index2 + 1])
|
|
987
|
+
};
|
|
988
|
+
}
|
|
989
|
+
if (parsed.hasStdinFlag) {
|
|
990
|
+
readsFromStdin = true;
|
|
991
|
+
}
|
|
992
|
+
if (parsed.consumesNextArg) {
|
|
993
|
+
if (index2 + 1 >= args.length) {
|
|
994
|
+
return { kind: "unknown", payload: null };
|
|
995
|
+
}
|
|
996
|
+
index2 += 1;
|
|
997
|
+
}
|
|
998
|
+
continue;
|
|
999
|
+
}
|
|
1000
|
+
return {
|
|
1001
|
+
kind: "script",
|
|
1002
|
+
payload: token
|
|
1003
|
+
};
|
|
1004
|
+
}
|
|
1005
|
+
if (readsFromStdin) {
|
|
1006
|
+
return { kind: "stdin", payload: null };
|
|
1007
|
+
}
|
|
1008
|
+
return { kind: "none", payload: null };
|
|
1009
|
+
}
|
|
1010
|
+
function getHereDocPayload(redirections) {
|
|
1011
|
+
const payloads = [];
|
|
1012
|
+
for (const redirection of redirections) {
|
|
1013
|
+
if (redirection.target.type !== "HereDoc") {
|
|
1014
|
+
continue;
|
|
1015
|
+
}
|
|
1016
|
+
if (!redirection.target.content) {
|
|
1017
|
+
payloads.push("");
|
|
1018
|
+
continue;
|
|
1019
|
+
}
|
|
1020
|
+
const payload = asStaticWordText(redirection.target.content);
|
|
1021
|
+
if (payload == null) {
|
|
1022
|
+
return { hasHereDoc: true, payload: null };
|
|
1023
|
+
}
|
|
1024
|
+
payloads.push(payload);
|
|
1025
|
+
}
|
|
1026
|
+
if (payloads.length === 0) {
|
|
1027
|
+
return { hasHereDoc: false, payload: null };
|
|
1028
|
+
}
|
|
1029
|
+
return { hasHereDoc: true, payload: payloads.join("\n") };
|
|
1030
|
+
}
|
|
1031
|
+
function joinStaticWords(words) {
|
|
1032
|
+
const tokens = [];
|
|
1033
|
+
for (const word of words) {
|
|
1034
|
+
const token = asStaticWordText(word);
|
|
1035
|
+
if (token == null) {
|
|
1036
|
+
return null;
|
|
1037
|
+
}
|
|
1038
|
+
tokens.push(token);
|
|
1039
|
+
}
|
|
1040
|
+
return tokens.join(" ");
|
|
1041
|
+
}
|
|
1042
|
+
function resolveEnvWrapperCommand(args) {
|
|
1043
|
+
let index2 = 0;
|
|
1044
|
+
while (index2 < args.length) {
|
|
1045
|
+
const token = asStaticWordText(args[index2]);
|
|
1046
|
+
if (token == null) {
|
|
1047
|
+
return { kind: "unknown" };
|
|
1048
|
+
}
|
|
1049
|
+
if (token === "--") {
|
|
1050
|
+
index2 += 1;
|
|
1051
|
+
break;
|
|
1052
|
+
}
|
|
1053
|
+
if (token === "-u" || token === "--unset" || token === "--chdir") {
|
|
1054
|
+
if (index2 + 1 >= args.length) {
|
|
1055
|
+
return { kind: "unknown" };
|
|
1056
|
+
}
|
|
1057
|
+
index2 += 2;
|
|
1058
|
+
continue;
|
|
1059
|
+
}
|
|
1060
|
+
if (token.startsWith("--unset=") || token.startsWith("--chdir=")) {
|
|
1061
|
+
index2 += 1;
|
|
1062
|
+
continue;
|
|
1063
|
+
}
|
|
1064
|
+
if (token.startsWith("-") && token !== "-" && !isEnvAssignmentToken(token)) {
|
|
1065
|
+
index2 += 1;
|
|
1066
|
+
continue;
|
|
1067
|
+
}
|
|
1068
|
+
if (isEnvAssignmentToken(token)) {
|
|
1069
|
+
index2 += 1;
|
|
1070
|
+
continue;
|
|
1071
|
+
}
|
|
1072
|
+
break;
|
|
1073
|
+
}
|
|
1074
|
+
if (index2 >= args.length) {
|
|
1075
|
+
return { kind: "none" };
|
|
1076
|
+
}
|
|
1077
|
+
return {
|
|
1078
|
+
kind: "resolved",
|
|
1079
|
+
name: args[index2],
|
|
1080
|
+
args: args.slice(index2 + 1)
|
|
1081
|
+
};
|
|
844
1082
|
}
|
|
845
|
-
function
|
|
1083
|
+
function resolveCommandWrapperCommand(args) {
|
|
1084
|
+
let index2 = 0;
|
|
1085
|
+
let lookupOnly = false;
|
|
1086
|
+
while (index2 < args.length) {
|
|
1087
|
+
const token = asStaticWordText(args[index2]);
|
|
1088
|
+
if (token == null) {
|
|
1089
|
+
return { kind: "unknown" };
|
|
1090
|
+
}
|
|
1091
|
+
if (token === "--") {
|
|
1092
|
+
index2 += 1;
|
|
1093
|
+
break;
|
|
1094
|
+
}
|
|
1095
|
+
if (token === "-v" || token === "-V") {
|
|
1096
|
+
lookupOnly = true;
|
|
1097
|
+
index2 += 1;
|
|
1098
|
+
continue;
|
|
1099
|
+
}
|
|
1100
|
+
if (token.startsWith("-") && token !== "-") {
|
|
1101
|
+
index2 += 1;
|
|
1102
|
+
continue;
|
|
1103
|
+
}
|
|
1104
|
+
break;
|
|
1105
|
+
}
|
|
1106
|
+
if (lookupOnly || index2 >= args.length) {
|
|
1107
|
+
return { kind: "none" };
|
|
1108
|
+
}
|
|
1109
|
+
return {
|
|
1110
|
+
kind: "resolved",
|
|
1111
|
+
name: args[index2],
|
|
1112
|
+
args: args.slice(index2 + 1)
|
|
1113
|
+
};
|
|
1114
|
+
}
|
|
1115
|
+
function commandContainsBlockedCommand(command, context, mode, options = { stdinFromPipe: false }) {
|
|
846
1116
|
switch (command.type) {
|
|
847
1117
|
case "SimpleCommand":
|
|
848
|
-
return isBlockedSimpleCommand(command, context);
|
|
1118
|
+
return isBlockedSimpleCommand(command, context, mode, options);
|
|
849
1119
|
case "If":
|
|
850
1120
|
return command.clauses.some(
|
|
851
1121
|
(clause) => statementsContainBlockedCommand(
|
|
852
1122
|
clause.condition,
|
|
853
|
-
cloneInspectionContext(context)
|
|
1123
|
+
cloneInspectionContext(context),
|
|
1124
|
+
mode
|
|
854
1125
|
) || statementsContainBlockedCommand(
|
|
855
1126
|
clause.body,
|
|
856
|
-
cloneInspectionContext(context)
|
|
1127
|
+
cloneInspectionContext(context),
|
|
1128
|
+
mode
|
|
857
1129
|
)
|
|
858
1130
|
) || (command.elseBody ? statementsContainBlockedCommand(
|
|
859
1131
|
command.elseBody,
|
|
860
|
-
cloneInspectionContext(context)
|
|
1132
|
+
cloneInspectionContext(context),
|
|
1133
|
+
mode
|
|
861
1134
|
) : false);
|
|
862
1135
|
case "For":
|
|
863
1136
|
case "CStyleFor":
|
|
864
1137
|
return statementsContainBlockedCommand(
|
|
865
1138
|
command.body,
|
|
866
|
-
cloneInspectionContext(context)
|
|
1139
|
+
cloneInspectionContext(context),
|
|
1140
|
+
mode
|
|
867
1141
|
);
|
|
868
1142
|
case "While":
|
|
869
1143
|
case "Until":
|
|
870
1144
|
return statementsContainBlockedCommand(
|
|
871
1145
|
command.condition,
|
|
872
|
-
cloneInspectionContext(context)
|
|
1146
|
+
cloneInspectionContext(context),
|
|
1147
|
+
mode
|
|
873
1148
|
) || statementsContainBlockedCommand(
|
|
874
1149
|
command.body,
|
|
875
|
-
cloneInspectionContext(context)
|
|
1150
|
+
cloneInspectionContext(context),
|
|
1151
|
+
mode
|
|
876
1152
|
);
|
|
877
1153
|
case "Case":
|
|
878
1154
|
return command.items.some(
|
|
879
1155
|
(item) => statementsContainBlockedCommand(
|
|
880
1156
|
item.body,
|
|
881
|
-
cloneInspectionContext(context)
|
|
1157
|
+
cloneInspectionContext(context),
|
|
1158
|
+
mode
|
|
882
1159
|
)
|
|
883
1160
|
);
|
|
884
1161
|
case "Subshell":
|
|
885
1162
|
case "Group":
|
|
886
1163
|
return statementsContainBlockedCommand(
|
|
887
1164
|
command.body,
|
|
888
|
-
cloneInspectionContext(context)
|
|
1165
|
+
cloneInspectionContext(context),
|
|
1166
|
+
mode
|
|
889
1167
|
);
|
|
890
1168
|
case "FunctionDef":
|
|
891
1169
|
return false;
|
|
@@ -898,16 +1176,16 @@ function commandContainsBlockedCommand(command, context) {
|
|
|
898
1176
|
}
|
|
899
1177
|
}
|
|
900
1178
|
}
|
|
901
|
-
function isBlockedSimpleCommand(command, context) {
|
|
902
|
-
if (wordContainsBlockedCommand(command.name, context)) {
|
|
1179
|
+
function isBlockedSimpleCommand(command, context, mode, options) {
|
|
1180
|
+
if (wordContainsBlockedCommand(command.name, context, mode)) {
|
|
903
1181
|
return true;
|
|
904
1182
|
}
|
|
905
|
-
if (command.args.some((arg) => wordContainsBlockedCommand(arg, context))) {
|
|
1183
|
+
if (command.args.some((arg) => wordContainsBlockedCommand(arg, context, mode))) {
|
|
906
1184
|
return true;
|
|
907
1185
|
}
|
|
908
1186
|
if (command.assignments.some(
|
|
909
|
-
(assignment) => wordContainsBlockedCommand(assignment.value, context) || (assignment.array?.some(
|
|
910
|
-
(value) => wordContainsBlockedCommand(value, context)
|
|
1187
|
+
(assignment) => wordContainsBlockedCommand(assignment.value, context, mode) || (assignment.array?.some(
|
|
1188
|
+
(value) => wordContainsBlockedCommand(value, context, mode)
|
|
911
1189
|
) ?? false)
|
|
912
1190
|
)) {
|
|
913
1191
|
return true;
|
|
@@ -916,11 +1194,16 @@ function isBlockedSimpleCommand(command, context) {
|
|
|
916
1194
|
if (redirection.target.type === "Word") {
|
|
917
1195
|
return wordContainsBlockedCommand(
|
|
918
1196
|
redirection.target,
|
|
919
|
-
context
|
|
1197
|
+
context,
|
|
1198
|
+
mode
|
|
920
1199
|
);
|
|
921
1200
|
}
|
|
922
1201
|
if (redirection.target.type === "HereDoc" && redirection.target.content) {
|
|
923
|
-
return wordContainsBlockedCommand(
|
|
1202
|
+
return wordContainsBlockedCommand(
|
|
1203
|
+
redirection.target.content,
|
|
1204
|
+
context,
|
|
1205
|
+
mode
|
|
1206
|
+
);
|
|
924
1207
|
}
|
|
925
1208
|
return false;
|
|
926
1209
|
})) {
|
|
@@ -939,9 +1222,92 @@ function isBlockedSimpleCommand(command, context) {
|
|
|
939
1222
|
}
|
|
940
1223
|
if (normalizedName === "sql") {
|
|
941
1224
|
const subcommand = asStaticWordText(command.args[0])?.toLowerCase();
|
|
942
|
-
|
|
1225
|
+
if (!subcommand) {
|
|
1226
|
+
return true;
|
|
1227
|
+
}
|
|
1228
|
+
if (mode === "block-all-sql") {
|
|
1229
|
+
return true;
|
|
1230
|
+
}
|
|
1231
|
+
return !ALLOWED_SQL_PROXY_SUBCOMMANDS.has(subcommand);
|
|
943
1232
|
}
|
|
944
|
-
|
|
1233
|
+
const inspectWrappedCommand = (resolved) => {
|
|
1234
|
+
if (resolved.kind === "none") {
|
|
1235
|
+
return false;
|
|
1236
|
+
}
|
|
1237
|
+
if (resolved.kind === "unknown" || !resolved.name || !resolved.args) {
|
|
1238
|
+
return true;
|
|
1239
|
+
}
|
|
1240
|
+
return isBlockedSimpleCommand(
|
|
1241
|
+
{
|
|
1242
|
+
name: resolved.name,
|
|
1243
|
+
args: resolved.args,
|
|
1244
|
+
assignments: [],
|
|
1245
|
+
redirections: []
|
|
1246
|
+
},
|
|
1247
|
+
context,
|
|
1248
|
+
"block-all-sql",
|
|
1249
|
+
options
|
|
1250
|
+
);
|
|
1251
|
+
};
|
|
1252
|
+
if (WRAPPER_COMMANDS.has(normalizedName)) {
|
|
1253
|
+
if (normalizedName === "env") {
|
|
1254
|
+
return inspectWrappedCommand(resolveEnvWrapperCommand(command.args));
|
|
1255
|
+
}
|
|
1256
|
+
if (normalizedName === "command") {
|
|
1257
|
+
return inspectWrappedCommand(resolveCommandWrapperCommand(command.args));
|
|
1258
|
+
}
|
|
1259
|
+
const evalScript = joinStaticWords(command.args);
|
|
1260
|
+
if (evalScript == null) {
|
|
1261
|
+
return true;
|
|
1262
|
+
}
|
|
1263
|
+
if (!evalScript.trim()) {
|
|
1264
|
+
return false;
|
|
1265
|
+
}
|
|
1266
|
+
return stringCommandContainsBlockedCommand(
|
|
1267
|
+
evalScript,
|
|
1268
|
+
context,
|
|
1269
|
+
"block-all-sql"
|
|
1270
|
+
);
|
|
1271
|
+
}
|
|
1272
|
+
if (SHELL_INTERPRETER_COMMANDS.has(normalizedName)) {
|
|
1273
|
+
const shellInvocation = getShellInvocationDescriptor(command.args);
|
|
1274
|
+
if (shellInvocation.kind === "unknown") {
|
|
1275
|
+
return true;
|
|
1276
|
+
}
|
|
1277
|
+
if (shellInvocation.kind === "command") {
|
|
1278
|
+
if (!shellInvocation.payload) {
|
|
1279
|
+
return true;
|
|
1280
|
+
}
|
|
1281
|
+
if (stringCommandContainsBlockedCommand(
|
|
1282
|
+
shellInvocation.payload,
|
|
1283
|
+
context,
|
|
1284
|
+
"block-all-sql"
|
|
1285
|
+
)) {
|
|
1286
|
+
return true;
|
|
1287
|
+
}
|
|
1288
|
+
return false;
|
|
1289
|
+
}
|
|
1290
|
+
const hereDoc = getHereDocPayload(command.redirections);
|
|
1291
|
+
if (hereDoc.hasHereDoc) {
|
|
1292
|
+
if (hereDoc.payload == null) {
|
|
1293
|
+
return true;
|
|
1294
|
+
}
|
|
1295
|
+
if (hereDoc.payload.trim().length > 0 && stringCommandContainsBlockedCommand(
|
|
1296
|
+
hereDoc.payload,
|
|
1297
|
+
context,
|
|
1298
|
+
"block-all-sql"
|
|
1299
|
+
)) {
|
|
1300
|
+
return true;
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
if (shellInvocation.kind === "script") {
|
|
1304
|
+
return true;
|
|
1305
|
+
}
|
|
1306
|
+
if (options.stdinFromPipe || shellInvocation.kind === "stdin") {
|
|
1307
|
+
return !hereDoc.hasHereDoc;
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
if (functionInvocationContainsBlockedCommand(commandName, context, mode)) {
|
|
945
1311
|
return true;
|
|
946
1312
|
}
|
|
947
1313
|
return false;
|
|
@@ -1073,13 +1439,17 @@ import "@deepagents/agent";
|
|
|
1073
1439
|
import {
|
|
1074
1440
|
ContextEngine as ContextEngine2,
|
|
1075
1441
|
InMemoryContextStore as InMemoryContextStore2,
|
|
1442
|
+
example,
|
|
1076
1443
|
fragment as fragment2,
|
|
1444
|
+
guardrail,
|
|
1445
|
+
hint as hint2,
|
|
1077
1446
|
persona as persona3,
|
|
1078
1447
|
policy,
|
|
1079
1448
|
structuredOutput as structuredOutput2,
|
|
1080
|
-
user as user2
|
|
1449
|
+
user as user2,
|
|
1450
|
+
workflow
|
|
1081
1451
|
} from "@deepagents/context";
|
|
1082
|
-
var RETRY_TEMPERATURES = [0, 0.
|
|
1452
|
+
var RETRY_TEMPERATURES = [0, 0.4, 0.8];
|
|
1083
1453
|
var SQL_AGENT_ROLE = "Expert SQL query generator.";
|
|
1084
1454
|
var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
|
|
1085
1455
|
var SQL_AGENT_POLICIES = [
|
|
@@ -1087,94 +1457,201 @@ var SQL_AGENT_POLICIES = [
|
|
|
1087
1457
|
"schema_mapping",
|
|
1088
1458
|
policy({
|
|
1089
1459
|
rule: "Translate natural language into precise SQL grounded in available schema entities."
|
|
1460
|
+
}),
|
|
1461
|
+
hint2("Preserve schema spelling exactly, including typos in column names.")
|
|
1462
|
+
),
|
|
1463
|
+
fragment2(
|
|
1464
|
+
"projection_minimality",
|
|
1465
|
+
policy({
|
|
1466
|
+
rule: "Return only columns requested by the question; do not add helper columns unless explicitly requested."
|
|
1467
|
+
}),
|
|
1468
|
+
policy({
|
|
1469
|
+
rule: 'For requests of the form "X sorted/ordered by Y", project X only unless Y is explicitly requested as an output field.'
|
|
1470
|
+
}),
|
|
1471
|
+
policy({
|
|
1472
|
+
rule: "Prefer selecting schema columns directly without derived expressions when direct selection answers the request."
|
|
1473
|
+
}),
|
|
1474
|
+
hint2(
|
|
1475
|
+
"Do not include ORDER BY, GROUP BY, or JOIN helper columns in SELECT output unless the question explicitly asks for them."
|
|
1476
|
+
),
|
|
1477
|
+
policy({
|
|
1478
|
+
rule: "Use DISTINCT only when uniqueness is explicitly requested (for example distinct/unique/different/no duplicates)."
|
|
1479
|
+
}),
|
|
1480
|
+
hint2(
|
|
1481
|
+
'Do not infer DISTINCT from generic wording such as "some", plural nouns, or entity-set phrasing; for transactional/attendance-style tables, default to raw rows unless uniqueness is explicitly requested.'
|
|
1482
|
+
)
|
|
1483
|
+
),
|
|
1484
|
+
fragment2(
|
|
1485
|
+
"date_transform_safety",
|
|
1486
|
+
policy({
|
|
1487
|
+
rule: "Do not assume VARCHAR/TEXT values are parseable dates. Avoid date extraction functions on text columns by default."
|
|
1488
|
+
}),
|
|
1489
|
+
policy({
|
|
1490
|
+
rule: "Use date-part extraction only when both conditions hold: the question explicitly asks for transformation and schema values require transformation to produce that unit."
|
|
1491
|
+
}),
|
|
1492
|
+
hint2(
|
|
1493
|
+
"Do not apply SUBSTR, STRFTIME, DATE_PART, YEAR, or similar extraction functions unless the question explicitly asks for transformation and schema values require it."
|
|
1494
|
+
),
|
|
1495
|
+
hint2(
|
|
1496
|
+
"If a column already represents the requested concept (for example a stored year-like value), use the column as-is."
|
|
1497
|
+
)
|
|
1498
|
+
),
|
|
1499
|
+
fragment2(
|
|
1500
|
+
"sql_minimality",
|
|
1501
|
+
guardrail({
|
|
1502
|
+
rule: "Never hallucinate tables or columns.",
|
|
1503
|
+
reason: "Schema fidelity is required.",
|
|
1504
|
+
action: "Use only available schema entities."
|
|
1505
|
+
}),
|
|
1506
|
+
guardrail({
|
|
1507
|
+
rule: "Avoid unnecessary transformations and derived projections.",
|
|
1508
|
+
reason: "Extra transformations frequently change semantics and reduce correctness.",
|
|
1509
|
+
action: "Do not add date parsing, substring extraction, or derived columns unless explicitly required by the question or schema."
|
|
1510
|
+
})
|
|
1511
|
+
),
|
|
1512
|
+
fragment2(
|
|
1513
|
+
"preflight_checklist",
|
|
1514
|
+
workflow({
|
|
1515
|
+
task: "Final SQL preflight before returning output",
|
|
1516
|
+
steps: [
|
|
1517
|
+
"Verify selected columns match the question and remove unrequested helper projections.",
|
|
1518
|
+
"If aggregate values are used only for ranking/filtering, keep them out of SELECT unless explicitly requested.",
|
|
1519
|
+
"Prefer raw schema columns over derived expressions when raw columns already satisfy the request.",
|
|
1520
|
+
"If a candidate query uses STRFTIME, SUBSTR, DATE_PART, YEAR, or similar extraction on text-like columns, remove that transformation unless explicitly required by the question.",
|
|
1521
|
+
"Return only schema-grounded SQL using existing tables and columns."
|
|
1522
|
+
]
|
|
1523
|
+
})
|
|
1524
|
+
),
|
|
1525
|
+
fragment2(
|
|
1526
|
+
"set_semantics",
|
|
1527
|
+
policy({
|
|
1528
|
+
rule: "For questions asking where both condition A and condition B hold over an attribute, compute the intersection of qualifying sets for that attribute."
|
|
1529
|
+
}),
|
|
1530
|
+
policy({
|
|
1531
|
+
rule: "Do not force the same entity instance to satisfy both conditions unless the question explicitly requests the same person/row/entity."
|
|
1532
|
+
}),
|
|
1533
|
+
hint2(
|
|
1534
|
+
"Prefer INTERSECT (or logically equivalent set-based shape) over requiring the same physical row/entity to satisfy both conditions unless explicitly requested."
|
|
1535
|
+
),
|
|
1536
|
+
hint2(
|
|
1537
|
+
"When two conditions describe different row groups whose shared attribute is requested, build each group separately and intersect the attribute values."
|
|
1538
|
+
),
|
|
1539
|
+
hint2(
|
|
1540
|
+
"Do not collapse cross-group conditions into a single-row AND predicate when the intent is shared values across groups."
|
|
1541
|
+
),
|
|
1542
|
+
policy({
|
|
1543
|
+
rule: "If two predicates on the same field cannot both be true for one row, do not combine them with AND; use set operations across separate filtered subsets when shared values are requested."
|
|
1544
|
+
})
|
|
1545
|
+
),
|
|
1546
|
+
fragment2(
|
|
1547
|
+
"predicate_column_alignment",
|
|
1548
|
+
policy({
|
|
1549
|
+
rule: "Match literal values to semantically compatible columns. Do not compare descriptive names to identifier columns."
|
|
1550
|
+
}),
|
|
1551
|
+
hint2(
|
|
1552
|
+
"When a filter value is a descriptive label (for example a department name), join through the lookup table and filter on its name/title column, not on *_id columns."
|
|
1553
|
+
),
|
|
1554
|
+
hint2(
|
|
1555
|
+
"When relation roles are explicit in wording (for example host/home/source/destination), prefer foreign keys with matching role qualifiers over generic similarly named columns."
|
|
1556
|
+
),
|
|
1557
|
+
policy({
|
|
1558
|
+
rule: "When multiple foreign-key candidates exist, select the column whose qualifier best matches the relationship described in the question."
|
|
1559
|
+
}),
|
|
1560
|
+
policy({
|
|
1561
|
+
rule: "For hosting/held semantics, prefer host_* relationship columns when available over generic *_id alternatives."
|
|
1562
|
+
}),
|
|
1563
|
+
hint2(
|
|
1564
|
+
'Interpret wording like "held/hosted a competition or event" as a hosting relationship and map to host_* foreign keys when present.'
|
|
1565
|
+
),
|
|
1566
|
+
policy({
|
|
1567
|
+
rule: "Do not compare descriptive labels or names to *_id columns; join to the table containing the descriptive field and filter there."
|
|
1568
|
+
}),
|
|
1569
|
+
policy({
|
|
1570
|
+
rule: "Keep numeric identifiers unquoted when used as numeric equality filters unless schema indicates text identifiers."
|
|
1571
|
+
}),
|
|
1572
|
+
policy({
|
|
1573
|
+
rule: "When filtering by a descriptive label value and a related table exposes a corresponding *_name or title column, join to that table and filter on the descriptive column."
|
|
1574
|
+
})
|
|
1575
|
+
),
|
|
1576
|
+
fragment2(
|
|
1577
|
+
"ordering_semantics",
|
|
1578
|
+
policy({
|
|
1579
|
+
rule: "Respect explicit sort direction terms. If direction is not specified, use ascending order unless a superlative intent (most/least/highest/lowest) implies direction."
|
|
1580
|
+
}),
|
|
1581
|
+
policy({
|
|
1582
|
+
rule: "When ranking categories by frequency, use COUNT for ordering but keep output focused on requested category fields unless counts are explicitly requested."
|
|
1583
|
+
}),
|
|
1584
|
+
policy({
|
|
1585
|
+
rule: "Do not use DESC unless descending direction is explicit or a superlative intent requires descending ranking."
|
|
1586
|
+
}),
|
|
1587
|
+
policy({
|
|
1588
|
+
rule: 'For "most common/frequent <attribute>" requests, return the attribute value(s) only; use counts only for ordering/filtering unless the question explicitly asks to return counts.'
|
|
1589
|
+
}),
|
|
1590
|
+
hint2(
|
|
1591
|
+
'Use DESC with LIMIT 1 for "most/highest/largest"; use ASC with LIMIT 1 for "least/lowest/smallest".'
|
|
1592
|
+
)
|
|
1593
|
+
),
|
|
1594
|
+
fragment2(
|
|
1595
|
+
"negative_membership_queries",
|
|
1596
|
+
policy({
|
|
1597
|
+
rule: "For requests asking entities that did not participate/host/appear in related records, prefer NOT IN or NOT EXISTS against the related foreign-key set."
|
|
1598
|
+
}),
|
|
1599
|
+
hint2(
|
|
1600
|
+
"Map role-bearing relationship columns carefully (for example host_* foreign keys for hosting relationships) instead of generic IDs when role wording is explicit."
|
|
1601
|
+
),
|
|
1602
|
+
hint2(
|
|
1603
|
+
'For "never had/never exceeded" conditions over history tables, exclude entities via NOT IN/NOT EXISTS against the disqualifying entity-id set (often built with GROUP BY/HAVING MAX(...)).'
|
|
1604
|
+
)
|
|
1605
|
+
),
|
|
1606
|
+
fragment2(
|
|
1607
|
+
"join_completeness",
|
|
1608
|
+
policy({
|
|
1609
|
+
rule: "Preserve entity-restricting joins implied by the question. Do not widen results by querying only a broader attribute table when a subset entity table is available."
|
|
1610
|
+
}),
|
|
1611
|
+
policy({
|
|
1612
|
+
rule: "If an entity term in the question maps to a table, keep that table in query scope and join to attribute tables rather than dropping the entity table."
|
|
1613
|
+
}),
|
|
1614
|
+
hint2(
|
|
1615
|
+
"If the question targets a specific entity group, include that entity table and its join conditions even when selected columns come from a related table."
|
|
1616
|
+
),
|
|
1617
|
+
hint2(
|
|
1618
|
+
"When the question names an entity type and a relation table links to that entity via *_id, include the entity table in scope instead of counting only relation rows."
|
|
1619
|
+
),
|
|
1620
|
+
hint2(
|
|
1621
|
+
"Prefer INNER JOIN by default; use LEFT JOIN only when the question explicitly requests including unmatched rows or zero-related entities."
|
|
1622
|
+
)
|
|
1623
|
+
),
|
|
1624
|
+
fragment2(
|
|
1625
|
+
"aggregation_exactness",
|
|
1626
|
+
policy({
|
|
1627
|
+
rule: "Preserve requested aggregation semantics exactly: use COUNT(*) by default for total rows, use COUNT(DISTINCT ...) only when uniqueness is explicitly requested, and group by stable entity keys when computing per-entity aggregates."
|
|
1628
|
+
}),
|
|
1629
|
+
policy({
|
|
1630
|
+
rule: "For questions asking which entity has lowest/highest average of a metric, compute AVG(metric) per entity (GROUP BY entity) and rank those aggregates."
|
|
1631
|
+
}),
|
|
1632
|
+
hint2(
|
|
1633
|
+
'For "how many <entities>" questions over relation records, default to COUNT(*) on qualifying rows unless explicit uniqueness language is present.'
|
|
1634
|
+
)
|
|
1635
|
+
),
|
|
1636
|
+
fragment2(
|
|
1637
|
+
"query_shape_examples",
|
|
1638
|
+
example({
|
|
1639
|
+
question: "List categories ordered by how many records belong to each category.",
|
|
1640
|
+
answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*)"
|
|
1641
|
+
}),
|
|
1642
|
+
example({
|
|
1643
|
+
question: "Show labels shared by rows with metric > 100 and rows with metric < 10.",
|
|
1644
|
+
answer: "SELECT label FROM records WHERE metric > 100 INTERSECT SELECT label FROM records WHERE metric < 10"
|
|
1645
|
+
}),
|
|
1646
|
+
example({
|
|
1647
|
+
question: "List locations that have not hosted any event.",
|
|
1648
|
+
answer: "SELECT location_name FROM locations WHERE location_id NOT IN (SELECT host_location_id FROM events)"
|
|
1649
|
+
}),
|
|
1650
|
+
example({
|
|
1651
|
+
question: "List the most common category across records.",
|
|
1652
|
+
answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*) DESC LIMIT 1"
|
|
1090
1653
|
})
|
|
1091
|
-
// policy({
|
|
1092
|
-
// rule: 'Before returning an error, perform a schema-grounded self-check: identify core intent, draft best-effort SQL, then verify it uses only existing tables/columns.',
|
|
1093
|
-
// }),
|
|
1094
|
-
// policy({
|
|
1095
|
-
// rule: 'Return unanswerable only if that self-check confirms no valid SQL can express the required intent without inventing schema elements.',
|
|
1096
|
-
// }),
|
|
1097
|
-
// policy({
|
|
1098
|
-
// rule: 'Prefer a best-effort valid SQL query when entities can be reasonably inferred from table or column names.',
|
|
1099
|
-
// }),
|
|
1100
|
-
// policy({
|
|
1101
|
-
// rule: 'Use lexical normalization (singular/plural, paraphrases, role synonyms, and minor wording differences) to align question terms with schema names.',
|
|
1102
|
-
// }),
|
|
1103
|
-
// policy({
|
|
1104
|
-
// rule: 'Decompose noun phrases into core entity and qualifiers, and map the core entity first.',
|
|
1105
|
-
// }),
|
|
1106
|
-
// policy({
|
|
1107
|
-
// rule: 'Do not require every descriptive word to map to a separate schema field when the core entity match is unambiguous.',
|
|
1108
|
-
// }),
|
|
1109
|
-
// policy({
|
|
1110
|
-
// rule: 'For phrases like "X of Y", treat Y as contextual (non-blocking) when Y has no mapped schema field and the question does not ask to filter/group/select by Y explicitly.',
|
|
1111
|
-
// }),
|
|
1112
|
-
// policy({
|
|
1113
|
-
// rule: 'Treat unmatched qualifiers as blockers only when they are restrictive constraints (specific values, comparisons, or conditions that change row eligibility).',
|
|
1114
|
-
// }),
|
|
1115
|
-
// hint('Preserve schema spelling exactly, including typos in column names.'),
|
|
1116
1654
|
)
|
|
1117
|
-
// fragment(
|
|
1118
|
-
// 'unanswerable_gate',
|
|
1119
|
-
// workflow({
|
|
1120
|
-
// task: 'Unanswerable decision',
|
|
1121
|
-
// steps: [
|
|
1122
|
-
// 'Identify the core intent (metric/projection and required filters).',
|
|
1123
|
-
// 'Attempt schema-grounded mapping for the core intent before considering error.',
|
|
1124
|
-
// 'If a valid SELECT can answer the core intent without inventing schema entities, return SQL.',
|
|
1125
|
-
// 'Return unanswerable only when required information cannot be mapped to any available table or column.',
|
|
1126
|
-
// ],
|
|
1127
|
-
// }),
|
|
1128
|
-
// policy({
|
|
1129
|
-
// rule: 'Do not reject a question as unanswerable when requested information can be derived by filtering, joining, grouping, counting, set operations, or sorting on available columns.',
|
|
1130
|
-
// }),
|
|
1131
|
-
// ),
|
|
1132
|
-
// fragment(
|
|
1133
|
-
// 'query_shape_preferences',
|
|
1134
|
-
// hint(
|
|
1135
|
-
// 'Prefer explicit INNER JOINs over LEFT JOINs unless the question requires unmatched rows.',
|
|
1136
|
-
// ),
|
|
1137
|
-
// hint(
|
|
1138
|
-
// 'Prefer direct joins over dropping join constraints or using weaker alternatives.',
|
|
1139
|
-
// ),
|
|
1140
|
-
// hint('Use DISTINCT only when uniqueness is explicitly requested.'),
|
|
1141
|
-
// hint(
|
|
1142
|
-
// 'For superlatives over grouped entities (most/least/highest/lowest by group), prefer GROUP BY with ORDER BY aggregate and LIMIT 1.',
|
|
1143
|
-
// ),
|
|
1144
|
-
// hint(
|
|
1145
|
-
// 'For average/count conditions per entity, prefer GROUP BY with HAVING aggregate predicates over row-level WHERE predicates.',
|
|
1146
|
-
// ),
|
|
1147
|
-
// hint(
|
|
1148
|
-
// 'For "both" conditions across two criteria, prefer INTERSECT when selecting shared values.',
|
|
1149
|
-
// ),
|
|
1150
|
-
// hint(
|
|
1151
|
-
// 'For "A or B" retrieval across criteria, prefer UNION when combining two qualifying sets.',
|
|
1152
|
-
// ),
|
|
1153
|
-
// hint(
|
|
1154
|
-
// 'For "never" constraints against related records, prefer NOT IN or EXCEPT against the disqualifying set.',
|
|
1155
|
-
// ),
|
|
1156
|
-
// hint(
|
|
1157
|
-
// 'Use equality predicates for exact values unless the question asks for pattern matching.',
|
|
1158
|
-
// ),
|
|
1159
|
-
// hint(
|
|
1160
|
-
// 'Keep numeric literals unquoted when they are purely numeric tokens in the question.',
|
|
1161
|
-
// ),
|
|
1162
|
-
// ),
|
|
1163
|
-
// fragment(
|
|
1164
|
-
// 'sql_minimality',
|
|
1165
|
-
// guardrail({
|
|
1166
|
-
// rule: 'Never hallucinate tables or columns.',
|
|
1167
|
-
// reason: 'Schema fidelity is required.',
|
|
1168
|
-
// action: 'Use only available schema entities.',
|
|
1169
|
-
// }),
|
|
1170
|
-
// guardrail({
|
|
1171
|
-
// rule: 'Prefer the minimal query over transformed expressions.',
|
|
1172
|
-
// reason:
|
|
1173
|
-
// 'Unnecessary transformations reduce correctness and add avoidable complexity.',
|
|
1174
|
-
// action:
|
|
1175
|
-
// 'Do not add date parsing, substring extraction, derived projections, or extra selected columns unless explicitly requested or required by schema mismatch.',
|
|
1176
|
-
// }),
|
|
1177
|
-
// ),
|
|
1178
1655
|
];
|
|
1179
1656
|
function extractSql(output) {
|
|
1180
1657
|
const match = output.match(/```sql\n?([\s\S]*?)```/);
|
|
@@ -4338,17 +4815,17 @@ var TrackedFs = class {
|
|
|
4338
4815
|
// packages/text2sql/src/lib/instructions.ts
|
|
4339
4816
|
import {
|
|
4340
4817
|
clarification,
|
|
4341
|
-
example,
|
|
4818
|
+
example as example2,
|
|
4342
4819
|
explain,
|
|
4343
4820
|
fragment as fragment3,
|
|
4344
|
-
guardrail,
|
|
4345
|
-
hint as
|
|
4821
|
+
guardrail as guardrail2,
|
|
4822
|
+
hint as hint3,
|
|
4346
4823
|
policy as policy2,
|
|
4347
4824
|
principle,
|
|
4348
4825
|
quirk,
|
|
4349
4826
|
role,
|
|
4350
4827
|
styleGuide,
|
|
4351
|
-
workflow
|
|
4828
|
+
workflow as workflow2
|
|
4352
4829
|
} from "@deepagents/context";
|
|
4353
4830
|
function reasoningFramework() {
|
|
4354
4831
|
return [
|
|
@@ -4357,7 +4834,7 @@ function reasoningFramework() {
|
|
|
4357
4834
|
),
|
|
4358
4835
|
fragment3(
|
|
4359
4836
|
"meta-cognitive-reasoning-framework",
|
|
4360
|
-
|
|
4837
|
+
hint3(
|
|
4361
4838
|
"Before taking any action (either tool calls *or* responses to the user), you must proactively, methodically, and independently plan and reason about:"
|
|
4362
4839
|
),
|
|
4363
4840
|
// 1) Logical dependencies and constraints
|
|
@@ -4498,7 +4975,7 @@ function guidelines(options = {}) {
|
|
|
4498
4975
|
// Few-shot: Applying reasoning principles
|
|
4499
4976
|
fragment3(
|
|
4500
4977
|
"reasoning-examples",
|
|
4501
|
-
|
|
4978
|
+
example2({
|
|
4502
4979
|
question: "Show me sales last month",
|
|
4503
4980
|
answer: `Applying Principle 1 (Logical dependencies):
|
|
4504
4981
|
- Need: schema to know which table has sales data
|
|
@@ -4510,7 +4987,7 @@ Applying Principle 5 (Information availability):
|
|
|
4510
4987
|
|
|
4511
4988
|
Action: Ask user for date range clarification BEFORE generating SQL.`
|
|
4512
4989
|
}),
|
|
4513
|
-
|
|
4990
|
+
example2({
|
|
4514
4991
|
question: "Why did my query return no results?",
|
|
4515
4992
|
answer: `Applying Principle 3 (Abductive reasoning):
|
|
4516
4993
|
- Hypothesis 1 (most likely): Filter too restrictive
|
|
@@ -4524,7 +5001,7 @@ Testing hypotheses:
|
|
|
4524
5001
|
|
|
4525
5002
|
Action: Start with most likely hypothesis, test incrementally. NEVER guess.`
|
|
4526
5003
|
}),
|
|
4527
|
-
|
|
5004
|
+
example2({
|
|
4528
5005
|
question: "Get me the top customers",
|
|
4529
5006
|
answer: `Applying Principle 1 (Logical dependencies):
|
|
4530
5007
|
- "Top" is ambiguous\u2014by revenue? by order count? by recency?
|
|
@@ -4538,10 +5015,10 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4538
5015
|
// Schema adherence - consolidated into clear rules
|
|
4539
5016
|
fragment3(
|
|
4540
5017
|
"schema_adherence",
|
|
4541
|
-
|
|
5018
|
+
hint3(
|
|
4542
5019
|
"Use only tables and columns from the schema. For unspecified columns, use SELECT *. When showing related items, include IDs and requested details."
|
|
4543
5020
|
),
|
|
4544
|
-
|
|
5021
|
+
hint3(
|
|
4545
5022
|
'"Show" means list items; "count" or "total" means aggregate. Use canonical values verbatim for filtering.'
|
|
4546
5023
|
)
|
|
4547
5024
|
),
|
|
@@ -4557,18 +5034,18 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4557
5034
|
explanation: "Measures how closely the physical row order matches the logical sort order of the column. Values near 1 or -1 mean the data is well-ordered; near 0 means scattered",
|
|
4558
5035
|
therefore: "High correlation means range queries (BETWEEN, >, <) on that column benefit from index scans. Low correlation means the index is less effective for ranges"
|
|
4559
5036
|
}),
|
|
4560
|
-
|
|
5037
|
+
hint3(
|
|
4561
5038
|
"When min/max stats are available, use them to validate filter values. If a user asks for values outside the known range, warn them the query may return no results."
|
|
4562
5039
|
)
|
|
4563
5040
|
),
|
|
4564
5041
|
// Joins - use relationship metadata
|
|
4565
|
-
|
|
5042
|
+
hint3(
|
|
4566
5043
|
"Use JOINs based on schema relationships. Favor PK/indexed columns; follow relationship metadata for direction and cardinality."
|
|
4567
5044
|
),
|
|
4568
5045
|
// Aggregations - explain the concepts
|
|
4569
5046
|
fragment3(
|
|
4570
5047
|
"Aggregations",
|
|
4571
|
-
|
|
5048
|
+
hint3(
|
|
4572
5049
|
"Apply COUNT, SUM, AVG when the question implies summarization. Use window functions for ranking, running totals, or row comparisons."
|
|
4573
5050
|
),
|
|
4574
5051
|
explain({
|
|
@@ -4593,7 +5070,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4593
5070
|
issue: "NULL values behave unexpectedly in comparisons and aggregations",
|
|
4594
5071
|
workaround: "Use IS NULL, IS NOT NULL, or COALESCE() to handle NULLs explicitly"
|
|
4595
5072
|
}),
|
|
4596
|
-
|
|
5073
|
+
hint3(
|
|
4597
5074
|
"Always include mentioned filters from joined tables in WHERE conditions."
|
|
4598
5075
|
)
|
|
4599
5076
|
),
|
|
@@ -4608,22 +5085,22 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4608
5085
|
// Safety guardrails - consolidated
|
|
4609
5086
|
fragment3(
|
|
4610
5087
|
"Query safety",
|
|
4611
|
-
|
|
5088
|
+
guardrail2({
|
|
4612
5089
|
rule: "Generate only valid, executable SELECT/WITH statements.",
|
|
4613
5090
|
reason: "Read-only access prevents data modification.",
|
|
4614
5091
|
action: "Never generate INSERT, UPDATE, DELETE, DROP, or DDL statements."
|
|
4615
5092
|
}),
|
|
4616
|
-
|
|
5093
|
+
guardrail2({
|
|
4617
5094
|
rule: "Avoid unbounded scans and cartesian joins.",
|
|
4618
5095
|
reason: "Protects performance and correctness.",
|
|
4619
5096
|
action: "Apply filters on indexed columns. If join keys are unclear, ask for clarification."
|
|
4620
5097
|
}),
|
|
4621
|
-
|
|
5098
|
+
guardrail2({
|
|
4622
5099
|
rule: "Preserve query semantics.",
|
|
4623
5100
|
reason: "Arbitrary modifications change results.",
|
|
4624
5101
|
action: 'Only add LIMIT for explicit "top N" requests. Add ORDER BY for deterministic results.'
|
|
4625
5102
|
}),
|
|
4626
|
-
|
|
5103
|
+
guardrail2({
|
|
4627
5104
|
rule: "Seek clarification for genuine ambiguity.",
|
|
4628
5105
|
reason: "Prevents incorrect assumptions.",
|
|
4629
5106
|
action: "Ask a focused question before guessing."
|
|
@@ -4634,10 +5111,10 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4634
5111
|
ask: "Clarify the ranking metric or definition.",
|
|
4635
5112
|
reason: "Ensures correct aggregation and ordering."
|
|
4636
5113
|
}),
|
|
4637
|
-
|
|
5114
|
+
hint3(
|
|
4638
5115
|
'Use sample cell values from schema hints to match exact casing and format in WHERE conditions (e.g., "Male" vs "male" vs "M").'
|
|
4639
5116
|
),
|
|
4640
|
-
|
|
5117
|
+
workflow2({
|
|
4641
5118
|
task: "SQL generation",
|
|
4642
5119
|
steps: [
|
|
4643
5120
|
"Schema linking: identify which tables and columns are mentioned or implied by the question.",
|
|
@@ -4649,7 +5126,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4649
5126
|
"Verify: mentally translate SQL back to natural language. Does it match the original question?"
|
|
4650
5127
|
]
|
|
4651
5128
|
}),
|
|
4652
|
-
|
|
5129
|
+
workflow2({
|
|
4653
5130
|
task: "Error recovery",
|
|
4654
5131
|
triggers: ["SQL error", "query failed", "execution error"],
|
|
4655
5132
|
steps: [
|
|
@@ -4662,7 +5139,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4662
5139
|
],
|
|
4663
5140
|
notes: "Maximum 3 retry attempts. If still failing, explain the issue to the user."
|
|
4664
5141
|
}),
|
|
4665
|
-
|
|
5142
|
+
workflow2({
|
|
4666
5143
|
task: "Complex query decomposition",
|
|
4667
5144
|
triggers: [
|
|
4668
5145
|
"multiple conditions",
|
|
@@ -4679,7 +5156,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4679
5156
|
],
|
|
4680
5157
|
notes: "Complex questions often need CTEs (WITH clauses) for clarity and reusability."
|
|
4681
5158
|
}),
|
|
4682
|
-
|
|
5159
|
+
workflow2({
|
|
4683
5160
|
task: "Multi-turn context",
|
|
4684
5161
|
triggers: ["follow-up", "and also", "what about", "same but", "instead"],
|
|
4685
5162
|
steps: [
|
|
@@ -4694,7 +5171,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4694
5171
|
}),
|
|
4695
5172
|
fragment3(
|
|
4696
5173
|
"Bash tool usage",
|
|
4697
|
-
|
|
5174
|
+
workflow2({
|
|
4698
5175
|
task: "Query execution",
|
|
4699
5176
|
steps: [
|
|
4700
5177
|
'Execute SQL through bash tool: sql run "SELECT ..."',
|
|
@@ -4703,16 +5180,16 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4703
5180
|
"For large results, slice first: cat <path> | jq '.[:10]'"
|
|
4704
5181
|
]
|
|
4705
5182
|
}),
|
|
4706
|
-
|
|
5183
|
+
hint3(
|
|
4707
5184
|
`You cannot access sql through a tool, it'll fail so the proper way to access it is through the bash tool using "sql run" and "sql validate" commands.`
|
|
4708
5185
|
),
|
|
4709
|
-
|
|
5186
|
+
hint3(
|
|
4710
5187
|
"The sql command outputs: file path, column names (comma-separated), and row count. Use column names to construct precise jq queries."
|
|
4711
5188
|
),
|
|
4712
|
-
|
|
5189
|
+
hint3(
|
|
4713
5190
|
'This is virtual bash environment and "sql" commands proxy to the database hence you cannot access sql files directly.'
|
|
4714
5191
|
),
|
|
4715
|
-
|
|
5192
|
+
hint3(
|
|
4716
5193
|
"If a query fails, the sql command returns an error message in stderr."
|
|
4717
5194
|
)
|
|
4718
5195
|
)
|
|
@@ -4727,7 +5204,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
|
|
|
4727
5204
|
);
|
|
4728
5205
|
} else {
|
|
4729
5206
|
baseTeachings.push(
|
|
4730
|
-
|
|
5207
|
+
hint3(
|
|
4731
5208
|
'When a month, day, or time period is mentioned without a year (e.g., "in August", "on Monday"), assume ALL occurrences of that period in the data. Do not ask for year clarification.'
|
|
4732
5209
|
)
|
|
4733
5210
|
);
|